├── .circleci
└── config.yml
├── .conda_build.sh
├── .flake8
├── .github
├── ISSUE_TEMPLATE
│ ├── bug_report.md
│ └── feature_request.md
└── workflows
│ ├── release.yaml
│ └── test.yaml
├── .gitignore
├── .nojekyll
├── LICENSE
├── MANIFEST.in
├── README.md
├── ci_scripts
└── circleci
│ ├── install.sh
│ ├── push_doc.sh
│ └── pypi_deploy.sh
├── doc
├── .special.rst
├── Makefile
├── _figures
│ ├── datasets.svg
│ ├── example_graph.pdf
│ ├── example_graph.svg
│ ├── example_graph_attributed.pdf
│ ├── example_graph_attributed.svg
│ ├── example_graph_directed.pdf
│ ├── example_graph_directed.svg
│ ├── example_graph_edge_attributed.pdf
│ ├── example_graph_edge_attributed.svg
│ ├── example_graph_edge_labeled.pdf
│ ├── example_graph_edge_labeled.svg
│ ├── example_graph_labeled.pdf
│ ├── example_graph_labeled.svg
│ ├── example_graph_weighted.pdf
│ ├── example_graph_weighted.svg
│ ├── grakel_schema.svg
│ ├── graph_schema.svg
│ ├── kernel_schema.svg
│ ├── logo.svg
│ ├── number_of_graphs.svg
│ ├── number_of_nodes.svg
│ ├── optimal_assignment_example.png
│ ├── optimal_assignment_histograms.png
│ └── wl_optimal_assignment.png
├── _static
│ ├── css
│ │ └── supplementary.css
│ ├── kataoka1.png
│ ├── marion1.png
│ ├── marion2.png
│ ├── marion3.png
│ ├── marion4.png
│ ├── odd_sth_1.png
│ ├── odd_sth_2.png
│ ├── odd_sth_3.png
│ └── odd_sth_4.png
├── _templates
│ ├── class.rst
│ ├── function.rst
│ ├── function_bib.rst
│ └── kernel.rst
├── api.rst
├── benchmarks.rst
├── benchmarks
│ ├── benchmarks.bib
│ ├── comparison.rst
│ └── evaluation.rst
├── biblio.bib
├── classes.rst
├── conf.py
├── datasets.rst
├── documentation.rst
├── documentation
│ ├── code_for_examples
│ │ ├── vertex_kernel.py
│ │ └── vertex_kernel_advanced.py
│ ├── contributing.rst
│ ├── core_concepts.rst
│ ├── creating_kernels.rst
│ ├── installation.rst
│ └── introduction.rst
├── graph.rst
├── graph_kernel.rst
├── index.rst
├── kernels.rst
├── kernels
│ ├── core_framework.rst
│ ├── edge_histogram.rst
│ ├── graph_hopper.rst
│ ├── graph_kernels.bib
│ ├── graphlet_sampling.rst
│ ├── hadamard_code.rst
│ ├── kernel.rst
│ ├── lovasz_theta.rst
│ ├── multiscale_laplacian.rst
│ ├── neighborhood_hash.rst
│ ├── neighborhood_subgraph_pairwise_distance.rst
│ ├── odd_sth.rst
│ ├── propagation.rst
│ ├── pyramid_match.rst
│ ├── random_walk.rst
│ ├── shortest_path.rst
│ ├── subgraph_matching.rst
│ ├── svm_theta.rst
│ ├── vertex_histogram.rst
│ ├── weisfeiler_lehman.rst
│ └── weisfeiler_lehman_optimal_assignment.rst
├── make.bat
├── sphinxext
│ ├── MANIFEST.in
│ ├── github_link.py
│ ├── sphinx_issues.py
│ └── xref.py
└── tutorials.rst
├── examples
├── README.txt
├── document_retrieval_example.py
├── erdos_renyi.py
├── node_attributed_dataset.py
├── nx_to_grakel.py
├── optimizing_hyperparameters.py
├── plot_pipeline_example.py
├── shortest_path.py
└── weisfeiler_lehman_subtree.py
├── git
├── grakel
├── __init__.py
├── datasets
│ ├── __init__.py
│ ├── base.py
│ └── testing.py
├── graph.py
├── graph_kernels.py
├── kernels
│ ├── __init__.py
│ ├── _c_functions
│ │ ├── __init__.pyx
│ │ ├── functions.pyx
│ │ ├── header.pxd
│ │ ├── include
│ │ │ └── functions.hpp
│ │ └── src
│ │ │ ├── ArashPartov.cpp
│ │ │ └── sm_core.cpp
│ ├── _isomorphism
│ │ ├── __init__.py
│ │ ├── bliss-0.50
│ │ │ ├── bignum.hh
│ │ │ ├── bliss.cc
│ │ │ ├── bliss_C.cc
│ │ │ ├── bliss_C.h
│ │ │ ├── defs.hh
│ │ │ ├── graph.cc
│ │ │ ├── graph.hh
│ │ │ ├── heap.cc
│ │ │ ├── heap.hh
│ │ │ ├── kqueue.hh
│ │ │ ├── kstack.hh
│ │ │ ├── orbit.cc
│ │ │ ├── orbit.hh
│ │ │ ├── partition.cc
│ │ │ ├── partition.hh
│ │ │ ├── uintseqhash.cc
│ │ │ ├── uintseqhash.hh
│ │ │ ├── utils.cc
│ │ │ └── utils.hh
│ │ ├── bliss.pyx
│ │ ├── intpyblissmodule_2.cc
│ │ └── intpyblissmodule_3.cc
│ ├── core_framework.py
│ ├── edge_histogram.py
│ ├── graph_hopper.py
│ ├── graphlet_sampling.py
│ ├── hadamard_code.py
│ ├── kernel.py
│ ├── lovasz_theta.py
│ ├── multiscale_laplacian.py
│ ├── neighborhood_hash.py
│ ├── neighborhood_subgraph_pairwise_distance.py
│ ├── odd_sth.py
│ ├── propagation.py
│ ├── pyramid_match.py
│ ├── random_walk.py
│ ├── shortest_path.py
│ ├── subgraph_matching.py
│ ├── svm_theta.py
│ ├── vertex_histogram.py
│ ├── weisfeiler_lehman.py
│ └── weisfeiler_lehman_optimal_assignment.py
├── tests
│ ├── __main__.py
│ ├── data
│ │ ├── Cuneiform
│ │ │ ├── Cuneiform_A.txt
│ │ │ ├── Cuneiform_edge_attributes.txt
│ │ │ ├── Cuneiform_edge_labels.txt
│ │ │ ├── Cuneiform_graph_indicator.txt
│ │ │ ├── Cuneiform_graph_labels.txt
│ │ │ ├── Cuneiform_node_attributes.txt
│ │ │ ├── Cuneiform_node_labels.txt
│ │ │ └── README.txt
│ │ └── MUTAG
│ │ │ ├── MUTAG_A.txt
│ │ │ ├── MUTAG_edge_labels.txt
│ │ │ ├── MUTAG_graph_indicator.txt
│ │ │ ├── MUTAG_graph_labels.txt
│ │ │ ├── MUTAG_node_labels.txt
│ │ │ └── README.txt
│ ├── test_Kernel.py
│ ├── test_common.py
│ ├── test_graph.py
│ ├── test_graph_kernels.py
│ ├── test_kernels.py
│ ├── test_utils.py
│ └── test_windows_sdp_issue.py
├── tools.py
└── utils.py
├── meta.yaml
├── misc
├── implement_list
└── install_pynauty.py
├── pyproject.toml
├── requirements.txt
├── setup.py
└── tutorials
├── digit_classification
└── digit_classification.ipynb
└── text_categorization
├── data
├── TREC_10_coarse.label
└── train_5500_coarse.label
└── text_categorization.ipynb
/.circleci/config.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 |
3 | jobs:
4 | python3:
5 | docker:
6 | - image: circleci/python:3.6
7 | steps:
8 | - checkout
9 | - run: python -m venv ./venv
10 | - run: bash ./ci_scripts/circleci/install.sh
11 | - store_artifacts:
12 | path: doc/_build/
13 | destination: doc
14 | - store_artifacts:
15 | path: ~/log.txt
16 | destination: log.txt
17 | - persist_to_workspace:
18 | root: .
19 | paths: .
20 |
21 | deploy:
22 | docker:
23 | - image: circleci/python:3.6
24 | environment:
25 | # The github organization or username of the repository which hosts the
26 | # project and documentation.
27 | - USERNAME: ysig
28 |
29 | # The repository where the documentation will be hosted
30 | - DOC_REPO: GraKeL
31 |
32 | # The base URL for the Github page where the documentation will be hosted
33 | - DOC_URL: 0.1a10
34 |
35 | # The email is to be used for commits in the Github Page
36 | - EMAIL: y.siglidis@gmail.com
37 |
38 | # Deploy docs on pypi
39 | - DEPLOY_PYPI: false
40 |
41 | - PUSH_DOCS: false
42 |
43 | steps:
44 | - checkout
45 | - attach_workspace:
46 | at: ~/project
47 | - run: bash ./ci_scripts/circleci/pypi_deploy.sh
48 | - run: bash ./ci_scripts/circleci/push_doc.sh
49 |
50 | workflows:
51 | version: 2
52 | build-doc-and-deploy:
53 | jobs:
54 | - python3
55 | - deploy:
56 | requires:
57 | - python3
58 | # filters:
59 | # branches:
60 | # only: develop
61 |
62 |
--------------------------------------------------------------------------------
/.conda_build.sh:
--------------------------------------------------------------------------------
1 | set -e
2 | set -x
3 | rm -rf conda_build
4 | mkdir -p conda_build
5 | conda build purge
6 | conda-build . --output-folder conda_build/ --python 2.7
7 | conda-build . --output-folder conda_build/ --python 3.5
8 | conda-build . --output-folder conda_build/ --python 3.6
9 | conda-build . --output-folder conda_build/ --python 3.7
10 | conda-build . --output-folder conda_build/ --python 3.8
11 | conda-build . --output-folder conda_build/ --python 3.9
12 | conda-build . --output-folder conda_build/ --python 3.10
13 | conda-build . --output-folder conda_build/ --python 3.11
14 | conda-build . --output-folder conda_build/ --python 3.12
15 | conda convert -f --platform all conda_build/linux-64/*.tar.bz2 -o conda_build
16 |
--------------------------------------------------------------------------------
/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | # Default flake8 3.5 ignored flags
3 | max-line-length = 130
4 | ignore=H306,E121,E123,E126,E741,E226,E24,E704,W503,W504
5 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Bug report
3 | about: Create a report to help us improve
4 | title: ''
5 | labels: ''
6 | assignees: ''
7 |
8 | ---
9 |
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 |
13 | **To Reproduce**
14 | Steps to reproduce the behavior:
15 | X = ...
16 | y = ...
17 | import grakel as gk
18 | y = gk.fit(..)
19 |
20 | **Expected behavior**
21 | A clear and concise description of what you expected to happen.
22 |
23 | **Stack Trace**
24 | If applicable, provide the stack trace related to your error.
25 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Feature request
3 | about: Suggest an idea for this project
4 | title: ''
5 | labels: ''
6 | assignees: ''
7 |
8 | ---
9 |
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 |
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 |
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 |
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 |
--------------------------------------------------------------------------------
/.github/workflows/release.yaml:
--------------------------------------------------------------------------------
1 | # Workflow to build and test wheels
2 | # =================================
3 | # This github action gets triggered whenever there is a push to the master branch or a release is created.
4 | # It generates both wheels and distributions files, making sure their contents are correct via unit-testing.
5 | #
6 | # Please note that creating a git tag and pushing it (git tag <>; git push --tags) is not
7 | # sufficient to append the wheels and distribution files to your release.
8 | # You need to generate a new release using github, not git.
9 | #
10 | # Guides
11 | # ------
12 | # cibuildwheel docs:
13 | # * https://cibuildwheel.readthedocs.io/en/stable/options/
14 | # github actions:
15 | # * https://docs.github.com/en/actions
16 |
17 | name: Wheel builder
18 |
19 | on:
20 | # Manual trigger on github
21 | workflow_dispatch:
22 | inputs:
23 | deploy:
24 | description: "Deploy to PyPI"
25 | required: false
26 | type: boolean
27 |
28 | # Bushj
29 | push:
30 | branches:
31 | - master
32 | # Release branches
33 | - "[0-9]+.[0-9]+.X"
34 |
35 | env:
36 | package-name: GraKeL
37 |
38 | test-cmd: pytest
39 | extra-requires: "wheel"
40 | sdist-version: "3.7"
41 |
42 | jobs:
43 |
44 | build_wheels:
45 | name: Wheels ${{ matrix.os }}-${{ matrix.py }}
46 | runs-on: ${{ matrix.os }}
47 |
48 | # Parellilize as much as possible across github action workers
49 | strategy:
50 | # 1 runner per combination of (os, py)
51 | matrix:
52 | os: [ubuntu-latest, windows-latest, macos-latest]
53 | py: ["cp37-*", "cp38-*", "cp39-*", "cp310-*", "cp311-*"] #, "cp312-*"] not yet supported
54 | # All workers independent, don't cancel all if one fails
55 | fail-fast: false
56 |
57 | steps:
58 | - uses: actions/checkout@v3
59 |
60 | # Let's us build aarch64 on linux
61 | - name: Set up QEMU
62 | if: runner.os == 'Linux'
63 | uses: docker/setup-qemu-action@v2
64 | with:
65 | platforms: all
66 |
67 | # https://cibuildwheel.readthedocs.io/en/1.x/options
68 | - name: Build wheels
69 | uses: pypa/cibuildwheel@v2.9.0
70 | env:
71 | CIBW_BUILD_FRONTEND: "build"
72 | CIBW_BUILD: ${{ matrix.py }}
73 | CIBW_ARCHS_MACOS: x86_64 universal2
74 | CIBW_ARCHS_LINUX: x86_64 aarch64
75 | # No win32 ("x86") for Windows as scipy declared it has stopped releasing wheels
76 | # from 1.8.0 onwards, officially from 1.9.3
77 | CIBW_ARCHS_WINDOWS: AMD64
78 | # Install test requirements and run the test-cmd
79 | CIBW_TEST_EXTRAS: ${{ env.extra-requires }}
80 | # {project} is a special string recognized by CIBW and replaced with the project dir
81 | CIBW_TEST_COMMAND: ${{ env.test-cmd }} {project}
82 | # * Scipy has no wheels released for musllinux and will not build because OpenBLAS is not found
83 | CIBW_SKIP: "*-musllinux*"
84 | # https://cibuildwheel.readthedocs.io/en/stable/options/#test-skip
85 | # * Will avoid testing on emulated architectures (aarch64)
86 | # * Skip trying to test arm64 builds on Intel Macs
87 | CIBW_TEST_SKIP: "*-*linux_aarch64 *-macosx_universal2:arm64 "
88 |
89 | - uses: actions/upload-artifact@v3
90 | with:
91 | path: ./wheelhouse/*.whl
92 |
93 | build_sdist:
94 | name: sdist
95 | runs-on: ubuntu-latest
96 |
97 | steps:
98 | - name: Checkout ${{ env.package-name }}
99 | uses: actions/checkout@v3
100 |
101 | - name: Setup Python
102 | uses: actions/setup-python@v4
103 | with:
104 | python-version: ${{ env.sdist-version }}
105 |
106 | - name: Build source distribution
107 | run: |
108 | python -m pip install --upgrade pip setuptools wheel build
109 | python -m build -s
110 | echo "sdist_name=$(ls -t dist/${{ env.package-name }}-*.tar.gz | head -n 1)" >> $GITHUB_ENV
111 |
112 | - name: Twine check ${{ env.package-name }}
113 | run: |
114 | python -m pip install twine
115 | twine_output=`twine check ${{ env.sdist_name }}`
116 | twine check ${{env.sdist_name}} --strict
117 |
118 | - name: Install dist
119 | run: |
120 | python -m pip install ${{ env.sdist_name }}[${{ env.extra-requires }}]
121 |
122 | - name: Tests
123 | run: |
124 | ${{ env.test-cmd }}
125 |
126 | - name: Store artifacts
127 | uses: actions/upload-artifact@v2
128 | with:
129 | path: dist/*.tar.gz
130 |
131 | release_assets:
132 | # Only when manually specified
133 | if: ${{ inputs.deploy }}
134 |
135 | name: Upload Release
136 | runs-on: ubuntu-latest
137 | needs: [build_wheels, build_sdist]
138 | steps:
139 | - name: Download artifacts
140 | uses: actions/download-artifact@v2
141 | with:
142 | path: dist
143 |
144 | - name: Display structure of downloaded files
145 | run: |
146 | ls -R
147 | mv dist/artifact/* dist/
148 | rm -rf dist/artifact
149 | ls -R
150 |
151 | - name: Publish a Python distribution to PyPI
152 | uses: pypa/gh-action-pypi-publish@release/v1
153 | with:
154 | password: ${{ secrets.PYPI_API_TOKEN }}
155 | packages_dir: dist/
156 |
--------------------------------------------------------------------------------
/.github/workflows/test.yaml:
--------------------------------------------------------------------------------
1 | name: Tests
2 |
3 | on:
4 | # Allow to manually trigger through github API
5 | workflow_dispatch:
6 |
7 | # Triggers with push to master
8 | push:
9 | branches:
10 | - master
11 |
12 | # Triggers with push to a pr aimed at master
13 | pull_request:
14 | branches:
15 | - master
16 |
17 | schedule:
18 | # https://crontab.guru/#42_2_3_*_*
19 | # "At 02:42 on day-of-month 3"
20 | # Put it at this odd time to reduce competing with load spikes on github action servers
21 | - cron: '42 2 3 * *'
22 |
23 | env:
24 |
25 | import-package-name: grakel
26 | extra-requires: "[dev]" # "" for no extra_requires
27 | extra-requires-soft: "[test]" # "" for no extra_requires
28 | test-dir: grakel/tests
29 |
30 | # https://github.com/eddiebergman/GraKeL/blob/63a2723fc9488257a7c880fa9b5e5cc95ada9f42/ci_scripts/travis/install.sh#L8-L11
31 | coverage-reqs: "networkx pandas"
32 | codecov-py: "3.7"
33 | codecov-args: >-
34 | --cov=grakel
35 | --cov-report=xml
36 |
37 | jobs:
38 |
39 | # General unit tests
40 | source-test:
41 | name: ${{ matrix.py }}-${{ matrix.os }}
42 |
43 | runs-on: ${{ matrix.os }}
44 | defaults:
45 | run:
46 | shell: bash # Default to using bash on all
47 |
48 | strategy:
49 | fail-fast: false
50 | matrix:
51 | py: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"]
52 | os: ["ubuntu-latest", "macos-latest", "windows-latest"]
53 |
54 | steps:
55 | - name: Checkout
56 | uses: actions/checkout@v3
57 |
58 | - name: Setup Python
59 | uses: actions/setup-python@v4
60 | with:
61 | python-version: ${{ matrix.py }}
62 |
63 | - name: Install ${{ env.import-package-name }}
64 | run: |
65 | python -m pip install --upgrade pip setuptools wheel
66 | # escape for cvxopt
67 | if ( [[ "${{ matrix.os }}" == "windows-latest" ]] && [[ "${{ matrix.py }}" == "3.7" ]] ) || [[ "${{ matrix.py }}" == "3.12" ]];then
68 | python -m pip install -e ".${{ env.extra-requires-soft }}"
69 | else
70 | python -m pip install -e ".${{ env.extra-requires }}"
71 | fi
72 |
73 | - name: Tests
74 | run: |
75 | python -m pytest # ${{ env.test-dir }}
76 |
77 | # Testing with codecov coverage uploaded
78 | codecov-test:
79 | name: codecov-test
80 | runs-on: ubuntu-latest
81 |
82 | steps:
83 | - name: Checkout
84 | uses: actions/checkout@v3
85 |
86 | - name: Setup Python
87 | uses: actions/setup-python@v4
88 | with:
89 | python-version: ${{ env.codecov-py }}
90 |
91 | - name: Install ${{ env.import-package-name }}
92 | run: |
93 | python -m pip install --upgrade pip setuptools
94 | python -m pip install -e ".${{ env.extra-requires }}"
95 |
96 | - name: Tests
97 | run: |
98 | python -m pytest ${{ env.codecov-args }} ${{ env.test-dir }}
99 |
100 | - name: Upload coverage
101 | uses: codecov/codecov-action@v3
102 | # Only upload coverage when it's **not** a scheduled test run
103 | if: ${{ ! github.event.schedule }}
104 | with:
105 | fail_ci_if_error: true
106 | verbose: true
107 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # scikit-learn specific
10 | doc/_build/
11 | doc/auto_examples/
12 | doc/generated/
13 | doc/modules/generated
14 |
15 | # Distribution / packaging
16 |
17 | .Python
18 | env/
19 | build/
20 | develop-eggs/
21 | dist/
22 | downloads/
23 | eggs/
24 | .eggs/
25 | lib/
26 | lib64/
27 | parts/
28 | sdist/
29 | var/
30 | *.egg-info/
31 | .installed.cfg
32 | *.egg
33 |
34 | # PyInstaller
35 | # Usually these files are written by a python script from a template
36 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
37 | *.manifest
38 | *.spec
39 |
40 | # Installer logs
41 | pip-log.txt
42 | pip-delete-this-directory.txt
43 |
44 | # Unit test / coverage reports
45 | htmlcov/
46 | .tox/
47 | .coverage
48 | .coverage.*
49 | .cache
50 | nosetests.xml
51 | coverage.xml
52 | *,cover
53 | .hypothesis/
54 |
55 | # Translations
56 | *.mo
57 | *.pot
58 |
59 | # Django stuff:
60 | *.log
61 |
62 | # Sphinx documentation
63 | docs/_build/
64 |
65 | # PyBuilder
66 | target/
67 |
68 | # Pycharm
69 | .idea
70 |
71 | # PyPi
72 | .pypirc
73 |
74 | # Cython Generated files
75 | grakel/kernels/_c_functions.cpython-35m-x86_64-linux-gnu.so
76 | grakel/kernels/_c_functions/functions.cpp
77 | grakel/kernels/_isomorphism/bliss.cpp
78 |
--------------------------------------------------------------------------------
/.nojekyll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ysig/GraKeL/6a9cebf185aa4e71b1e05f8d4e91edf8ce818aeb/.nojekyll
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | New BSD License
2 |
3 | Copyright (c) 2018- The grakel developers.
4 | All rights reserved.
5 |
6 |
7 | Redistribution and use in source and binary forms, with or without
8 | modification, are permitted provided that the following conditions are met:
9 |
10 | a. Redistributions of source code must retain the above copyright notice,
11 | this list of conditions and the following disclaimer.
12 | b. Redistributions in binary form must reproduce the above copyright
13 | notice, this list of conditions and the following disclaimer in the
14 | documentation and/or other materials provided with the distribution.
15 | c. Neither the name of the Scikit-learn Developers nor the names of
16 | its contributors may be used to endorse or promote products
17 | derived from this software without specific prior written
18 | permission.
19 |
20 |
21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 | ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR
25 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 | OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
31 | DAMAGE.
32 |
33 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include README.md
2 | include requirements.txt
3 | include LICENSE
4 |
5 | include grakel/kernels/_c_functions/*.pxd
6 | include grakel/kernels/_c_functions/*.pyx
7 | recursive-include grakel/kernels/_c_functions/include *
8 | recursive-include grakel/kernels/_c_functions/src *
9 |
10 | recursive-include grakel/kernels/_isomorphism/bliss-0.50 *
11 | recursive-include grakel/kernels/_isomorphism *
12 |
13 | recursive-include grakel/tests/data/Cuneiform *
14 | recursive-include grakel/tests/data/MUTAG *
15 |
16 | include doc/*.rst
17 | include doc/conf.py
18 | include doc/Makefile
19 | include doc/make.bat
20 | recursive-include doc/documentation *
21 | recursive-include doc/benchmarks *
22 | recursive-include doc/_static *
23 | recursive-include doc/_templates *
24 | recursive-include doc/kernels *
25 | recursive-include doc/sphinxext *
26 | recursive-include examples *
27 |
--------------------------------------------------------------------------------
/ci_scripts/circleci/install.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # This script is meant to be called in the "deploy" step defined in
3 | # circle.yml. See https://circleci.com/docs/ for more details.
4 | # The behavior of the script is controlled by environment variable defined
5 | # in the circle.yml in the top level folder of the project.
6 |
7 | # System dependencies
8 | sudo -E apt-get -yq remove texlive-binaries --purge > /dev/null
9 | sudo apt-get install software-properties-common
10 | sudo add-apt-repository universe /dev/null
11 | sudo add-apt-repository main > /dev/null
12 | sudo apt-get update > /dev/null
13 | sudo apt-get install libatlas-dev libatlas3gf-base > /dev/null
14 | sudo apt-get install build-essential python-dev python-setuptools > /dev/null
15 |
16 | # Setup a python venv and install basics
17 | source ./venv/bin/activate
18 | pip install --upgrade pip
19 |
20 | pip install --upgrade pandas networkx matplotlib setuptools nose coverage "Sphinx<5" pillow sphinx-gallery sphinx_rtd_theme "sphinxcontrib-bibtex==1.0" nb2plots numpydoc tqdm > /dev/null
21 | pip install -r requirements.txt > /dev/null
22 | pip install "cvxopt==1.2.0" > /dev/null
23 |
24 |
25 | # More dependencies
26 | sudo -E apt-get -yq update > /dev/null
27 | sudo -E apt-get -yq --no-install-suggests --no-install-recommends --force-yes install dvipng texlive-latex-base texlive-latex-extra > /dev/null
28 |
29 | # Install project
30 | python setup.py clean
31 | pip install -e .
32 |
33 | set -o pipefail && cd doc && make clean html doctest 2>&1 | tee ~/log.txt && cd ..
34 | cat ~/log.txt && if tail -n 1 ~/log.txt | grep -q "Error " ~/log.txt; then false; else true; fi
35 |
--------------------------------------------------------------------------------
/ci_scripts/circleci/push_doc.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # This script is meant to be called in the "deploy" step defined in
3 | # circle.yml. See https://circleci.com/docs/ for more details.
4 | # The behavior of the script is controlled by environment variable defined
5 | # in the circle.yml in the top level folder of the project.
6 |
7 | if [[ $PUSH_DOCS == "true" ]]; then
8 | MSG="Pushing the docs for revision for branch: $CIRCLE_BRANCH, commit $CIRCLE_SHA1"
9 |
10 | cd $HOME
11 | # Copy the build docs to a temporary folder
12 |
13 | # rename the project folder to the doc repo folder
14 | if [ -d project ];
15 | then mv project $DOC_REPO;
16 | fi
17 |
18 | rm -rf tmp
19 | mkdir tmp
20 | cp -R $HOME/$DOC_REPO/doc/_build/html/* ./tmp/
21 |
22 | # Clone the docs repo if it isnt already there
23 | if [ ! -d $DOC_REPO ];
24 | then git clone "git@github.com:$USERNAME/"$DOC_REPO".git";
25 | fi
26 |
27 | cd $DOC_REPO
28 | git branch gh-pages
29 | git checkout -f gh-pages
30 | git reset --hard origin/gh-pages
31 | git clean -dfx
32 |
33 | # Copy the new build docs
34 | git rm -rf $DOC_URL
35 | mkdir $DOC_URL
36 | rm -f .nojekyll
37 | touch .nojekyll
38 | cp -R $HOME/tmp/* ./$DOC_URL/
39 |
40 | git config --global user.email $EMAIL
41 | git config --global user.name $USERNAME
42 | git add -f ./$DOC_URL/ index.html .nojekyll
43 | git commit -m "$MSG [ci skip]"
44 | git push -f origin gh-pages
45 | if [ $? -ne 0 ]; then
46 | echo "Pushing docs failed"
47 | echo
48 | exit 1
49 | fi
50 |
51 | echo $MSG
52 | fi
--------------------------------------------------------------------------------
/ci_scripts/circleci/pypi_deploy.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # This script is meant to be called in the "deploy" step defined in
3 | # circle.yml. See https://circleci.com/docs/ for more details.
4 | # The behavior of the script is controlled by environment variable defined
5 | # in the circle.yml in the top level folder of the project.
6 |
7 | # Deploy on PyPi: Only works with python2
8 | if [[ $DEPLOY_PYPI == "true" ]]; then
9 | # Build & Upload sphinx-docs
10 | # Initialise .pypirc
11 | echo "[distutils]" > ~/.pypirc
12 | echo "index-servers = pypi" >> ~/.pypirc
13 | echo >> ~/.pypirc
14 | echo "[pypi]" >> ~/.pypirc
15 | echo "username=$USERNAME" >> ~/.pypirc
16 | echo "password=$PYPI_PASSWORD" >> ~/.pypirc
17 |
18 | # Upload sphinx docs
19 | cd ~/project
20 | sudo apt-get install tree
21 | source ~/project/venv/bin/activate
22 | pip install sphinx-pypi-upload
23 | ls ./doc/_build/html
24 | mkdir upload_dir && cp -r ./doc/_build/html ./upload_dir
25 | tree -d ~/project/ || true
26 | ls ./upload_dir/html
27 | sudo apt-get install realpath
28 | ls $(realpath ./upload_dir/html)
29 | python setup.py upload_sphinx --upload-dir=$(realpath ./upload_dir/html) --show-response || true
30 | fi
31 |
--------------------------------------------------------------------------------
/doc/.special.rst:
--------------------------------------------------------------------------------
1 | .. Color profiles for Sphinx.
2 | .. Has to be used with hacks.css (bitbucket.org/lbesson/web-sphinx/src/master/.static/hacks.css)
3 | .. role:: black
4 | .. role:: gray
5 | .. role:: grey
6 | .. role:: silver
7 | .. role:: white
8 | .. role:: maroon
9 | .. role:: red
10 | .. role:: magenta
11 | .. role:: fuchsia
12 | .. role:: pink
13 | .. role:: orange
14 | .. role:: yellow
15 | .. role:: lime
16 | .. role:: green
17 | .. role:: olive
18 | .. role:: teal
19 | .. role:: cyan
20 | .. role:: aqua
21 | .. role:: blue
22 | .. role:: navy
23 | .. role:: purple
24 |
25 | .. role:: under
26 | .. role:: over
27 | .. role:: blink
28 | .. role:: line
29 | .. role:: strike
30 |
31 | .. role:: it
32 | .. role:: ob
33 |
34 | .. role:: small
35 | .. role:: large
36 |
37 | .. role:: center
38 | .. role:: left
39 | .. role:: right
40 |
41 |
42 | .. (c) Lilian Besson, 2011-2016, https://bitbucket.org/lbesson/web-sphinx/
43 |
--------------------------------------------------------------------------------
/doc/_figures/example_graph.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ysig/GraKeL/6a9cebf185aa4e71b1e05f8d4e91edf8ce818aeb/doc/_figures/example_graph.pdf
--------------------------------------------------------------------------------
/doc/_figures/example_graph.svg:
--------------------------------------------------------------------------------
1 |
2 |
36 |
--------------------------------------------------------------------------------
/doc/_figures/example_graph_attributed.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ysig/GraKeL/6a9cebf185aa4e71b1e05f8d4e91edf8ce818aeb/doc/_figures/example_graph_attributed.pdf
--------------------------------------------------------------------------------
/doc/_figures/example_graph_directed.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ysig/GraKeL/6a9cebf185aa4e71b1e05f8d4e91edf8ce818aeb/doc/_figures/example_graph_directed.pdf
--------------------------------------------------------------------------------
/doc/_figures/example_graph_edge_attributed.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ysig/GraKeL/6a9cebf185aa4e71b1e05f8d4e91edf8ce818aeb/doc/_figures/example_graph_edge_attributed.pdf
--------------------------------------------------------------------------------
/doc/_figures/example_graph_edge_labeled.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ysig/GraKeL/6a9cebf185aa4e71b1e05f8d4e91edf8ce818aeb/doc/_figures/example_graph_edge_labeled.pdf
--------------------------------------------------------------------------------
/doc/_figures/example_graph_labeled.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ysig/GraKeL/6a9cebf185aa4e71b1e05f8d4e91edf8ce818aeb/doc/_figures/example_graph_labeled.pdf
--------------------------------------------------------------------------------
/doc/_figures/example_graph_weighted.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ysig/GraKeL/6a9cebf185aa4e71b1e05f8d4e91edf8ce818aeb/doc/_figures/example_graph_weighted.pdf
--------------------------------------------------------------------------------
/doc/_figures/optimal_assignment_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ysig/GraKeL/6a9cebf185aa4e71b1e05f8d4e91edf8ce818aeb/doc/_figures/optimal_assignment_example.png
--------------------------------------------------------------------------------
/doc/_figures/optimal_assignment_histograms.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ysig/GraKeL/6a9cebf185aa4e71b1e05f8d4e91edf8ce818aeb/doc/_figures/optimal_assignment_histograms.png
--------------------------------------------------------------------------------
/doc/_figures/wl_optimal_assignment.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ysig/GraKeL/6a9cebf185aa4e71b1e05f8d4e91edf8ce818aeb/doc/_figures/wl_optimal_assignment.png
--------------------------------------------------------------------------------
/doc/_static/kataoka1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ysig/GraKeL/6a9cebf185aa4e71b1e05f8d4e91edf8ce818aeb/doc/_static/kataoka1.png
--------------------------------------------------------------------------------
/doc/_static/marion1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ysig/GraKeL/6a9cebf185aa4e71b1e05f8d4e91edf8ce818aeb/doc/_static/marion1.png
--------------------------------------------------------------------------------
/doc/_static/marion2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ysig/GraKeL/6a9cebf185aa4e71b1e05f8d4e91edf8ce818aeb/doc/_static/marion2.png
--------------------------------------------------------------------------------
/doc/_static/marion3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ysig/GraKeL/6a9cebf185aa4e71b1e05f8d4e91edf8ce818aeb/doc/_static/marion3.png
--------------------------------------------------------------------------------
/doc/_static/marion4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ysig/GraKeL/6a9cebf185aa4e71b1e05f8d4e91edf8ce818aeb/doc/_static/marion4.png
--------------------------------------------------------------------------------
/doc/_static/odd_sth_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ysig/GraKeL/6a9cebf185aa4e71b1e05f8d4e91edf8ce818aeb/doc/_static/odd_sth_1.png
--------------------------------------------------------------------------------
/doc/_static/odd_sth_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ysig/GraKeL/6a9cebf185aa4e71b1e05f8d4e91edf8ce818aeb/doc/_static/odd_sth_2.png
--------------------------------------------------------------------------------
/doc/_static/odd_sth_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ysig/GraKeL/6a9cebf185aa4e71b1e05f8d4e91edf8ce818aeb/doc/_static/odd_sth_3.png
--------------------------------------------------------------------------------
/doc/_static/odd_sth_4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ysig/GraKeL/6a9cebf185aa4e71b1e05f8d4e91edf8ce818aeb/doc/_static/odd_sth_4.png
--------------------------------------------------------------------------------
/doc/_templates/class.rst:
--------------------------------------------------------------------------------
1 | :mod:`{{module}}`.{{objname}}
2 | {{ underline }}==============
3 |
4 | .. currentmodule:: {{ module }}
5 |
6 | .. autoclass:: {{ objname }}
7 |
8 | {% block methods %}
9 | .. automethod:: __init__
10 | {% endblock %}
11 |
12 | .. include:: {{module}}.{{objname}}.examples
13 |
14 | .. raw:: html
15 |
16 |
17 |
--------------------------------------------------------------------------------
/doc/_templates/function.rst:
--------------------------------------------------------------------------------
1 | :mod:`{{module}}`.{{objname}}
2 | {{ underline }}====================
3 |
4 | .. currentmodule:: {{ module }}
5 |
6 | .. autofunction:: {{ objname }}
7 |
8 | .. include:: {{module}}.{{objname}}.examples
9 |
10 | .. raw:: html
11 |
12 |
13 |
14 |
--------------------------------------------------------------------------------
/doc/_templates/function_bib.rst:
--------------------------------------------------------------------------------
1 | :mod:`{{module}}`.{{objname}}
2 | {{ underline }}====================
3 |
4 | .. currentmodule:: {{ module }}
5 |
6 | .. autofunction:: {{ objname }}
7 |
8 | Bibliography
9 | ------------
10 | .. bibliography:: ../kernels/graph_kernels.bib
11 | :filter: docname in docnames
12 |
13 | .. include:: {{module}}.{{objname}}.examples
14 |
15 | .. raw:: html
16 |
17 |
18 |
19 |
--------------------------------------------------------------------------------
/doc/_templates/kernel.rst:
--------------------------------------------------------------------------------
1 | :mod:`{{module}}`.{{objname}}
2 | {{ underline }}==============
3 |
4 | .. currentmodule:: {{ module }}
5 |
6 | .. autoclass:: {{ objname }}
7 |
8 | {% block methods %}
9 | .. automethod:: __init__
10 | {% endblock %}
11 |
12 |
13 | Bibliography
14 | ------------
15 | .. bibliography:: ../kernels/graph_kernels.bib
16 | :filter: docname in docnames
17 |
18 | .. include:: {{module}}.{{objname}}.examples
19 |
20 | .. raw:: html
21 |
22 |
23 |
--------------------------------------------------------------------------------
/doc/api.rst:
--------------------------------------------------------------------------------
1 | ======
2 | GraKeL
3 | ======
4 | .. module:: grakel
5 |
6 | The :code:`grakel` module is structured as follows
7 |
8 | .. toctree::
9 | :maxdepth: 1
10 |
11 | kernels
12 | graph_kernel
13 | graph
14 | datasets
15 |
--------------------------------------------------------------------------------
/doc/benchmarks.rst:
--------------------------------------------------------------------------------
1 | .. _benchmarks:
2 |
3 | ==========
4 | Benchmarks
5 | ==========
6 |
7 | In this section, we measure the running times of the implementations of several graph kernels from *GraKeL* and we compare them to each other and to implementations from other packages.
8 |
9 | .. toctree::
10 | :maxdepth: 1
11 |
12 | benchmarks/comparison
13 | benchmarks/evaluation
--------------------------------------------------------------------------------
/doc/benchmarks/benchmarks.bib:
--------------------------------------------------------------------------------
1 | @article{sugiyama2017graphkernels,
2 | title={graphkernels: R and Python packages for graph comparison},
3 | author={Sugiyama, Mahito and Ghisu, M Elisabetta and Llinares-L{\'o}pez, Felipe and Borgwardt, Karsten},
4 | journal={Bioinformatics},
5 | volume={34},
6 | number={3},
7 | pages={530--532},
8 | year={2017}
9 | }
10 |
11 |
--------------------------------------------------------------------------------
/doc/classes.rst:
--------------------------------------------------------------------------------
1 | .. _api_ref:
2 |
3 | =============
4 | API Reference
5 | =============
6 |
7 | This is the class and function reference of *GraKeL*. In order for the user to understand how to use the package, we suggest he reads :ref:`documentation` section.
8 |
9 | :mod:`grakel.graph`: Graph class with its utility functions
10 | ===========================================================
11 |
12 | Base Class
13 | ----------
14 | .. currentmodule:: grakel
15 |
16 | .. autosummary::
17 | :toctree: generated/
18 | :template: class.rst
19 |
20 | Graph
21 |
22 |
23 | Utility Functions
24 | -----------------
25 | .. currentmodule:: grakel
26 |
27 | .. autosummary::
28 | :toctree: generated/
29 | :template: function.rst
30 |
31 | graph.is_adjacency
32 | graph.is_edge_dictionary
33 | graph.laplacian
34 | graph.floyd_warshall
35 |
36 | **User guide:** See the :ref:`graph` section for further details.
37 |
38 | :mod:`grakel.graph_kernels`: A kernel decorator
39 | ===============================================
40 | .. currentmodule:: grakel
41 |
42 | Graph Kernel (decorator)
43 | ------------------------
44 | .. autosummary::
45 | :toctree: generated/
46 | :template: class.rst
47 |
48 | grakel.GraphKernel
49 |
50 | **User guide:** See the :ref:`graph_kernel` section for further details.
51 |
52 | :mod:`grakel.kernels`: A collection of graph kernels
53 | ====================================================
54 |
55 | Kernels
56 | -------
57 |
58 | .. currentmodule:: grakel
59 |
60 | .. autosummary::
61 | :toctree: generated/
62 | :template: kernel.rst
63 |
64 | Kernel
65 | RandomWalk
66 | RandomWalkLabeled
67 | PyramidMatch
68 | NeighborhoodHash
69 | ShortestPath
70 | ShortestPathAttr
71 | GraphletSampling
72 | SubgraphMatching
73 | WeisfeilerLehman
74 | HadamardCode
75 | NeighborhoodSubgraphPairwiseDistance
76 | LovaszTheta
77 | SvmTheta
78 | Propagation
79 | PropagationAttr
80 | OddSth
81 | MultiscaleLaplacian
82 | MultiscaleLaplacianFast
83 | HadamardCode
84 | VertexHistogram
85 | EdgeHistogram
86 | GraphHopper
87 | CoreFramework
88 | WeisfeilerLehmanOptimalAssignment
89 |
90 | **User guide:** See the :ref:`kernels` section for further details.
91 |
92 | :mod:`grakel.datasets`: Datasets
93 | =================================
94 |
95 | Fetch
96 | -----
97 |
98 | .. currentmodule:: grakel.datasets
99 |
100 | .. autosummary::
101 | :toctree: generated/
102 | :template: function_bib.rst
103 |
104 | fetch_dataset
105 |
106 | .. autosummary::
107 | :toctree: generated/
108 | :template: function.rst
109 |
110 | get_dataset_info
111 |
112 |
113 | **User guide:** See the :ref:`datasets` section for further details.
114 |
115 |
116 | :mod:`grakel`: Utils
117 | =================================
118 |
119 | .. currentmodule:: grakel
120 |
121 | Use a kernel matrix as a transformer
122 | ------------------------------------
123 |
124 | .. autosummary::
125 | :toctree: generated/
126 | :template: class.rst
127 |
128 | KMTransformer
129 |
130 | Cross Validation
131 | ----------------
132 |
133 | .. autosummary::
134 | :toctree: generated/
135 | :template: function.rst
136 |
137 | cross_validate_Kfold_SVM
138 |
139 | Load from other file formats
140 | ----------------------------
141 |
142 | .. autosummary::
143 | :toctree: generated/
144 | :template: function.rst
145 |
146 | graph_from_networkx
147 | graph_from_pandas
148 | graph_from_csv
149 |
150 | **User guide:** Usefull functions for applying to existing datasets, of other formats.
151 |
152 | .. _gd: https://ls11-www.cs.tu-dortmund.de/staff/morris/graphkerneldatasets
153 |
--------------------------------------------------------------------------------
/doc/datasets.rst:
--------------------------------------------------------------------------------
1 | .. _datasets:
2 |
3 | =========================
4 | Dataset loading utilities
5 | =========================
6 | .. module:: grakel.datasets
7 |
8 | A module for loading and fetching datasets related with graph kernels.
9 |
10 | .. autosummary::
11 | :toctree: generated/
12 |
13 | grakel.datasets.fetch_dataset
14 | grakel.datasets.get_dataset_info
15 |
16 | .. _gd: https://ls11-www.cs.tu-dortmund.de/staff/morris/graphkerneldatasets
--------------------------------------------------------------------------------
/doc/documentation.rst:
--------------------------------------------------------------------------------
1 | .. _documentation:
2 |
3 | =============
4 | Documentation
5 | =============
6 |
7 | In this section, we cover the core concepts in *GraKeL* and show how to use it.
8 |
9 | .. toctree::
10 | :maxdepth: 1
11 |
12 | documentation/installation
13 | documentation/introduction
14 | documentation/core_concepts
15 | documentation/creating_kernels
16 | documentation/contributing
--------------------------------------------------------------------------------
/doc/documentation/code_for_examples/vertex_kernel.py:
--------------------------------------------------------------------------------
1 | from warnings import warn
2 | from collections import Counter
3 | from grakel import Kernel, Graph
4 |
5 | # For python2/3 compatibility
6 | from six.moves.collections_abc import Iterable
7 |
8 |
9 | class VertexHistogram(Kernel):
10 | """Vertex Histogram kernel as found in :cite:`Sugiyama2015NIPS`
11 |
12 | Parameters
13 | ----------
14 | None.
15 |
16 | Attributes
17 | ----------
18 | None.
19 |
20 | """
21 |
22 | # Define the graph format that this kernel needs (if needed)
23 | # _graph_format = "auto" (default: "auto")
24 |
25 | def __init__(self,
26 | n_jobs=n_jobs,
27 | verbose=False,
28 | normalize=False,
29 | # kernel_param_1=kernel_param_1_default,
30 | # ...
31 | # kernel_param_n=kernel_param_n_default,
32 | ):
33 | """Initialise an `odd_sth` kernel."""
34 |
35 | # Add new parameters
36 | self._valid_parameters |= new_parameters
37 |
38 | super(VertexHistogram, self).__init__(n_jobs=n_jobs, verbose=verbose, normalize=normalize)
39 |
40 | # Get parameters and check the new ones
41 | # @for i=1 to num_new_parameters
42 | # self.kernel_param_i = kernel_param_i
43 |
44 | # self.initialized_.update({
45 | # param_needing_initialization_1 : False
46 | # ...
47 | # param_needing_initialization_m : False
48 | # })
49 |
50 | def initialize_(self):
51 | """Initialize all transformer arguments, needing initialization."""
52 | # If you want to implement a parallelization by your self here is your chance
53 | # If there is a pairwise operation on the Kernel object there is parallelization is implemented
54 | # Just run the initialise from father to initialise a joblib Parallel (if n_jobs is not None).
55 | super(VertexHistogram, self).initialize_()
56 |
57 | # for i=1 .. m
58 | # if not self.initialized_["param_needing_initialization_i"]:
59 | # # Apply checks (raise ValueError or TypeError accordingly)
60 | # # calculate derived fields stored on self._derived_field_ia .. z
61 | # self.initialized_["param_needing_initialization_i"] = True
62 | pass
63 |
64 | def parse_input(self, X):
65 | """Parse and check the given input for vertex kernel.
66 |
67 | Parameters
68 | ----------
69 | X : iterable
70 | For the input to pass the test, we must have:
71 | Each element must be an iterable with at most three features and at
72 | least one. The first that is obligatory is a valid graph structure
73 | (adjacency matrix or edge_dictionary) while the second is
74 | node_labels and the third edge_labels (that fitting the given graph
75 | format).
76 |
77 | Returns
78 | -------
79 | out : list
80 | List of frequency-histogram for each Graph.
81 |
82 | """
83 | if not isinstance(X, Iterable):
84 | raise TypeError('input must be an iterable\n')
85 | else:
86 | out = list()
87 | for (i, x) in enumerate(iter(X)):
88 | is_iter = isinstance(x, Iterable)
89 | if is_iter:
90 | x = list(x)
91 | if is_iter and len(x) in [0, 2, 3]:
92 | if len(x) == 0:
93 | warn('Ignoring empty element on index: '+str(i))
94 | continue
95 | else:
96 | # Our element is an iterable of at least 2 elements
97 | labels = x[1]
98 | elif type(x) is Graph:
99 | # get labels in any existing format
100 | labels = x.get_labels(purpose="any")
101 | else:
102 | raise TypeError('each element of X must be either a ' +
103 | 'graph object or a list with at least ' +
104 | 'a graph like object and node labels ' +
105 | 'dict \n')
106 |
107 | # Append frequencies for the current Graph
108 | out.append(Counter(labels.values()))
109 |
110 | if len(out) == 0:
111 | raise ValueError('parsed input is empty')
112 | return out
113 |
114 | def pairwise_operation(self, x, y):
115 | """Calculate sum of frequency products.
116 |
117 | Parameters
118 | ----------
119 | x, y : Counter
120 | Label-Frequency Counters as occur from `parse_input`.
121 |
122 | Returns
123 | -------
124 | kernel : number
125 | The kernel value.
126 |
127 | """
128 | return sum(x[k]*y[k] for k in x.keys())
129 |
--------------------------------------------------------------------------------
/doc/documentation/contributing.rst:
--------------------------------------------------------------------------------
1 | .. _contributing:
2 |
3 | ============
4 | Contributing
5 | ============
6 |
7 | All contributions are welcome! If you are not sure about how you can contribute, please contact the authors of the library.
8 |
9 | Areas you can contribute
10 | ------------------------
11 | Curious about how you can contribute to *GraKeL*? Here are a few ideas!
12 |
13 | * **Implementing a kernel**: The number of graph kernels that have been proposed in the past years is very large. *GraKeL* contains implementations of several of these kernels, but still, there are many kernels that are not contained in the library. You can help making *GraKeL* more complete by implementing new graph kernels.
14 |
15 | * **Optimizing kernel computation**: We have done our best to write maintainable and efficient Python code. However, this does not mean that the code cannot be further optimized. For even higher efficiency, some graph kernels can be re-implemented using wrapped C++ packages. Furthermore, most kernels solve combinatorial problems for which more efficient algorithms (than the employed ones) may exist.
16 |
17 | * **Improving the** :class:`grakel.Graph` **class**: As discussed in the :ref:`core_concepts` section, the :class:`grakel.Graph` class supports both adjacency matrix and edgelist representations of graphs. There are also methods that allow *GraKeL* to read graphs in various formats (e.g., NetworkX graphs). Furthermore, there are methods that implement graph algorithms (e.g., shortest path distances). These operations have to be efficient, both in terms of time and space complexity. Therefore, the :class:`grakel.Graph` class needs to be optimized. The project may benefit a lot from a *Cython* implementation of the class.
18 |
19 | * **Redesigning the** :class:`grakel.Kernel` **class**: The :class:`grakel.Kernel` class was designed to satisfy some constraints (e.g., compatibility with *scikit-learn*) and to be as simple as possible. This class can be extended to support families of kernels or frameworks that are not currently developed such as *deep graph kernels*.
20 |
21 | * **Unit-Testing**: As far as the kernel module is concerned, we have not managed to come up with any methodology for testing if the kernels are correctly implemented. We could use some "reference" code to check if our kernels produce identical results on some datasets, however, in most cases, this is not practical. Our tests check if the kernel matrices produced by the kernels are positive semidefinite, however, this can be true even if a kernel is not correctly implemented. We would like to design new tests that can verify the validity of implemeted kernels.
22 |
23 | * **Parallel/Concurrent execution**: The :class:`grakel.GraphKernel` class supports a parallel computation scheme (i.e., using the :code:`n_jobs` attribute), but this has not been implemented for all the kernels or the current implementation is not optimal. Implementations that allow parallel computation of the kernels are of high importance since they can lead to significant speed-ups of kernel computations.
24 |
25 | * **Examples and tutorials**: Have you created an example or tutorial that makes use of the *GraKeL* library? Please let us know. We would be more than happy to include it in our list of examples or tutorials.
26 |
27 | .. _master: https://github.com/ysig/GraKeL
28 | .. _develop: https://github.com/ysig/GraKeL/tree/develop
29 |
30 |
31 | Who to Blame for the GraKeL Project
32 | -----------------------------------
33 | The *GraKeL* project started in 2018 as part of a one year project funded by `Labex DigiCosme`_. The main contributor to *GraKeL*'s development is Ioannis Siglidis. Ioannis is also responsible for its maintenance. Giannis Nikolentzos is also an active contributor. The project was carried out under the supervision of Professor `Michalis Vazirgiannis`_ at the LIX laboratory of École Polytechnique. The following people have also contributed to the project: Christos Giatsidis, Stratis Limnios and Konstantinos Skianis.
34 |
35 | License
36 | -------
37 | GraKeL is distributed under the **BSD 3-clause** license. The library makes use of the C++ source code of BLISS_ (a tool for computing automorphism groups and canonical labelings of graphs) which is **LGPL** licensed. Futhermore, the cvxopt_ package (a software package for convex optimization) which is an optional dependency of GraKeL is **GPL** licensed.
38 |
39 | .. _Labex DigiCosme: https://digicosme.lri.fr/tiki-index.php
40 | .. _Michalis Vazirgiannis: http://www.lix.polytechnique.fr/~mvazirg/
41 | .. _BLISS: http://www.tcs.hut.fi/Software/bliss
42 | .. _cvxopt: https://cvxopt.org/
43 |
--------------------------------------------------------------------------------
/doc/documentation/installation.rst:
--------------------------------------------------------------------------------
1 | .. _installation:
2 |
3 | =================
4 | Installing GraKeL
5 | =================
6 | The GraKeL library requires the following packages to be installed:
7 |
8 | * Python (>=2.7, >=3.5)
9 | * NumPy (>=1.8.2)
10 | * SciPy (>=0.13.3)
11 | * Cython (>=0.27.3)
12 | * cvxopt (>=1.2.0) [optional]
13 | * future (>=0.16.0) (for python 2.7)
14 |
15 | *GraKeL* is available via `PyPI`_ . You can install the latest release of *GraKeL* using the following command:
16 |
17 | .. code-block:: bash
18 |
19 | $ pip install grakel
20 |
21 | To also install the cvxopt package, which is a requirement of the Lovász-:math:`\vartheta` kernel, you can use the following command:
22 |
23 | .. code-block:: bash
24 |
25 | $ pip install grakel[lovasz]
26 |
27 | .. *GraKeL* is also available via `anaconda`_.
28 |
29 | Building GraKeL
30 | ---------------
31 |
32 | In order to build your own version of *GraKeL*, you need a C++ compiler since the package contains some C++ extensions. To build and install a local version of `GraKeL`, you need to execute :code:`pip install .` or :code:`python setup.py install` on the root folder. Furthermore, in case you want to build the extensions locally, execute :code:`python setup.py build_ext`.
33 |
34 | In order for the C++ extensions to compile our extensions, a system-specific build environment should be configured. What you generally need is a C++ compiler and some python header files.
35 |
36 | Unix Environment
37 | ^^^^^^^^^^^^^^^^^
38 |
39 | In the case of Unix environments, you need to have installed:
40 |
41 | - A C++ compiler like `g++`
42 | - The package that contains the `Python.h` file such as `python-dev`
43 |
44 | Windows Environment
45 | ^^^^^^^^^^^^^^^^^^^
46 |
47 | In the case of a Windows environment, you need to install parts of the Windows Virtual Studio SDK (for C++) (for more details, please have a look here_).
48 |
49 | .. note::
50 |
51 | If you have trouble building `GraKeL`, please raise an issue_ so that we can enrich our installation instructions, as well as addressing the problem.
52 |
53 | Why so Many Packages?
54 | ---------------------
55 |
56 | Graph kernels deal with the problem of graph comparison, a very challenging problem which has been studied for decades. Due to the complex nature of the problem, different types of approaches have been developed so far. Some approaches employ combinatorial algorithms, others formulate the graph comparison algorithm as a continuous optimization problem, while there are also other approaches that apply heuristics. The field of graph kernels is also characterized by such a large diversity of methods. For instance, the *graphlet kernel* solves the graph isomorphism problem to determine the identity of each graphet, while the *Lovász*-:math:`\vartheta` kernel solves a semidefinite programming problem to compute the Lovász number of each graph and the associated orthonormal representations. To solve such problems, *GraKeL* relies on well-established external libraries that provide optimized software that has been developed to address these problems. For example, *GraKeL* uses [bliss]_ to test graph isomorphism and the cvxopt_ library to optimize semidefinite programs.
57 |
58 | .. _cvxopt: https://cvxopt.org/
59 |
60 | .. [bliss] To test graph isomorphism, *GraKeL* extended `PyBliss`_, a Python wrapper for bliss. This allowed *GraKeL* to remain compatible with Python 2/3 and its installation on Windows. Among all the candidate packages, PyBliss was chosen thanks to the information shared by `Tamás Nepusz`_ (developer of the `iGraph`_ library), who pointed out that this package was the most efficient (both in terms of time and memory) for deciding isomorphism between small graphs in experiments conducted using the iGraph library. Other candidate packages include `pynauty`_ (a Python extension of `nauty`_) and `networkx`_ (contains an implementation of the `VF2`_ algorithm).
61 |
62 | .. _PyBliss: http://www.tcs.hut.fi/Software/bliss/
63 | .. _Tamás Nepusz: http://hal.elte.hu/~nepusz/
64 | .. _iGraph: http://igraph.org/
65 | .. _pynauty: https://web.cs.dal.ca/~peter/software/pynauty/html/
66 | .. _nauty: http://users.cecs.anu.edu.au/~bdm/nauty/
67 | .. _networkx: https://networkx.github.io/
68 | .. _VF2: https://networkx.github.io/documentation/networkx-1.10/reference/algorithms.isomorphism.vf2.html
69 | .. _PyPI: https://pypi.org/project/grakel-dev/
70 | .. _anaconda: https://anaconda.org/ysig/grakel-dev
71 | .. _issue: https://github.com/ysig/GraKeL/issues
72 | .. _here: https://docs.microsoft.com/en-us/visualstudio/python/working-with-c-cpp-python-in-visual-studio?view=vs-2019#prerequisites
73 |
--------------------------------------------------------------------------------
/doc/graph.rst:
--------------------------------------------------------------------------------
1 | .. _graph:
2 |
3 | Graph (class)
4 | =============
5 | Documentation for the graph class.
6 | .. currentmodule:: grakel
7 |
8 | .. autosummary::
9 | :toctree: generated/
10 |
11 | grakel.Graph
12 |
13 | .. currentmodule:: grakel.graph
14 |
15 | .. autosummary::
16 | :toctree: generated/
17 |
18 | grakel.graph.is_adjacency
19 | grakel.graph.is_edge_dictionary
20 | grakel.graph.laplacian
21 | grakel.graph.floyd_warshall
--------------------------------------------------------------------------------
/doc/index.rst:
--------------------------------------------------------------------------------
1 | .. grakel documentation master file, created by
2 | sphinx-quickstart on Mon Jan 18 14:44:12 2016.
3 |
4 | ========
5 | Overview
6 | ========
7 |
8 | *GraKeL* is a Python package which provides implementations of several graph kernels, a family of powerful methods which allow kernel-based learning approaches such as SVMs to work directly on graphs.
9 |
10 | Getting Started
11 |
12 | .. toctree::
13 | :maxdepth: 2
14 |
15 | documentation
16 |
17 | ==========
18 | Benchmarks
19 | ==========
20 |
21 | To demonstrate the efficiency of the algorithms implemented in *GraKeL*, we present a comparison of the running times of the implementations of some graph kernels from *GraKeL* and from other packages. We also compare the running times of the different kernels to each other.
22 |
23 | .. toctree::
24 | :maxdepth: 2
25 |
26 | benchmarks
27 |
28 | =================
29 | Package Reference
30 | =================
31 |
32 | A collection of all classes and functions important for the use and understanding of the *GraKeL* package.
33 |
34 | GraKeL provides
35 |
36 | .. toctree::
37 | :maxdepth: 1
38 |
39 | api
40 | classes
41 | auto_examples/index
42 | tutorials
43 |
44 |
45 | ==========
46 | What's New
47 | ==========
48 |
49 | - Version **0.1a8**
50 |
51 | + Added a new kernel: [Weisfeiler-Lehman-Optimal-Assignment](https://ysig.github.io/GraKeL/0.1a8/kernels/weisfeiler_lehman_optimal_assignment.html).
52 | + Removed MultiScaleLaplacian (as being really slow and useless) and renamed MultiScaleLaplacianFast to MultiScaleLaplacian.
53 | + Fixed minor issues (joblib deprecation, skbunch etc) from `0.1a7`.
54 |
55 | - Version **0.1a7**
56 |
57 | + Detailed installation instructions for c++ extensions in windows.
58 | + Changed `base_kernel` alias in frameworks with `base_graph_kernel` to disambiguate with vectorial kernels.
59 | + Speed-up for floyd_warshall calculation in graph.py.
60 | + Large update throughout all the documentation.
61 |
62 | - Version **0.1a6**
63 |
64 | + More scikit-learn compatibility:
65 |
66 | 1. Initialise kernels by name and alias on GraphKernel (as GraphKernel(kernel="shortest_path").
67 | 2. Fit and instantion by default parameters.
68 | 3. Random number generator standardized `check_random_state`. `random_seed` are now `random_state` arguments.
69 | 4. Doctests.
70 |
71 | + Miscelanous:
72 |
73 | 1. Detailed unsupported kernel output.
74 | 2. More detailed licensing information considering **cvxopt** and **BLISS**
75 | 3. Small bugfix inside the (Count Sensitive) Neighborhood Hash Kernel.
76 | 4. Added sparse-compatibility for VertexHistogram and for EdgeHistogram.
77 |
78 | - Version **0.1a5**
79 |
80 | + Various bugfixes in kernel implementations.
81 | + Added a bunch of :code:`utils` functions for external operations: transforming existing *graph formats* (csv, pandas, networkx) to the grakel native, *k-fold cross validation* with an SVM and *kernel matrix transformer* for manipulating precomputed kernel matrices in an :code:`Transformer` fashion.
82 | + **Conda** compatibility: visit ``_.
83 |
84 | ==================
85 | Indices and tables
86 | ==================
87 |
88 | * :ref:`genindex`
89 | * :ref:`modindex`
90 | * :ref:`search`
91 |
--------------------------------------------------------------------------------
/doc/kernels.rst:
--------------------------------------------------------------------------------
1 | .. _kernels:
2 |
3 | Kernels (between graphs)
4 | ========================
5 | .. module:: grakel.kernels
6 |
7 | The documentation of the `kernels` submodule.
8 |
9 | .. toctree::
10 | :maxdepth: 1
11 | :glob:
12 |
13 | kernels/*
14 |
--------------------------------------------------------------------------------
/doc/kernels/core_framework.rst:
--------------------------------------------------------------------------------
1 | .. _core_framework:
2 |
3 | Core Kernel Framework
4 | =====================
5 |
6 | The core framework is a tool for increasing the expressive power of graph kernels :cite:`nikolentzos2018degeneracy`.
7 | The framework is not restricted to graph kernels, but can be applied to any graph comparison algorithm.
8 | It capitalizes on the :math:`k`-core decomposition which is capable of uncovering topological and hierarchical properties of graphs.
9 | Specifically, the :math:`k`-core decomposition is a powerful tool for network analysis and it is commonly used as a measure of importance and well connectedness for vertices in a broad spectrum of applications.
10 | The notion of :math:`k`-core was first introduced by Seidman to study the cohesion of social networks :cite:`seidman1983network`.
11 | In recent years, the :math:`k`-core decomposition has been established as a standard tool in many application domains such as in network visualization :cite:`alvarez2006large`, in protein function prediction :cite:`wuchty2005peeling` and in graph clustering :cite:`giatsidis2014corecluster`.
12 |
13 | Core Decomposition
14 | ------------------
15 |
16 | Let :math:`G = (V,E)` be an undirected and unweighted graph.
17 | Let :math:`n` and :math:`m` denote the number of vertices and number of edges, respectively.
18 | Given a subset of vertices :math:`S \subseteq V`, let :math:`E(S)` be the set of edges that have both end-points in :math:`S`.
19 | Then, :math:`G'=(S,E(S))` is the subgraph induced by :math:`S`.
20 | We use :math:`G' \subseteq G` to denote that :math:`G'` is a subgraph of :math:`G`.
21 | The degree of a vertex :math:`v \in S`, :math:`d_{G'}(v)`, is equal to the number of vertices that are adjacent to :math:`v` in :math:`G'`.
22 | Let :math:`G` be a graph and :math:`G'` a subgraph of :math:`G` induced by a set of vertices :math:`S`.
23 | Then, :math:`G'` is defined to be a :math:`k`-core of :math:`G`, denoted by :math:`C_k`, if it is a maximal subgraph of :math:`G` in which all vertices have degree at least :math:`k`.
24 | Hence, if :math:`G'` is a :math:`k`-core of :math:`G`, then :math:`\forall v \in S`, :math:`d_{G'}(v) \geq k`.
25 | Each :math:`k`-core is a unique subgraph of :math:`G`, and it is not necessarily connected.
26 | The core number :math:`c(v)` of a vertex :math:`v` is equal to the highest-order core that :math:`v` belongs to.
27 | In other words, :math:`v` has core number :math:`c(v) = k`, if it belongs to a :math:`k`-core but not to a :math:`(k+1)`-core.
28 | The degeneracy :math:`\delta^*(G)` of a graph :math:`G` is defined as the maximum :math:`k` for which graph :math:`G` contains a non-empty :math:`k`-core subgraph, :math:`\delta^*(G) = \max_{v \in V}c(v)`.
29 | Furthermore, assuming that :math:`\mathcal{C} = \{ C_0, C_1, \ldots, C_{\delta^*(G)} \}` is the set of all :math:`k`-cores, then :math:`\mathcal{C}` forms a nested chain
30 |
31 | .. math::
32 |
33 | C_{\delta^*(G)} \subseteq \ldots \subseteq C_1 \subseteq C_0 = G
34 |
35 | Therefore, the :math:`k`-core decomposition is a very useful tool for discovering the hierarchical structure of graphs.
36 | The :math:`k`-core decomposition of a graph can be computed in :math:`\mathcal{O}(n+m)` time \cite{matula1983smallest,batagelj2011fast}.
37 | The underlying idea is that we can obtain the :math:`i`-core of a graph if we recursively remove all vertices with degree less than :math:`i` and their incident edges from the graph until no other vertex can be removed.
38 |
39 |
40 | Core Kernels
41 | ------------
42 |
43 | The :math:`k`-core decomposition builds a hierarchy of nested subgraphs, each having stronger connectedness properties compared to the previous ones.
44 | The core framework measures the similarity between the corresponding according to the hierarchy subgraphs and aggregates the results.
45 | Let :math:`G=(V,E)` and :math:`G'=(V',E')` be two graphs.
46 | Let also :math:`k` be any kernel for graphs.
47 | Then, the core variant of the base kernel :math:`k` is defined as
48 |
49 | .. math::
50 |
51 | k_c(G, G') = k(C_0,C'_0) + k(C_1,C'_1) + \ldots + k(C_{\delta^*_{min}},C'_{\delta^*_{min}})
52 |
53 | where :math:`\delta^*_{min}` is the minimum of the degeneracies of the two graphs, and :math:`C_0,C_1,\ldots,C_{\delta^*_{min}}` and :math:`C'_0,C'_1,\ldots,C'_{\delta^*_{min}}` are the :math:`0`-core, :math:`1`-core,:math:`\ldots`, :math:`\delta^*_{min}`-core subgraphs of :math:`G` and :math:`G'`, respectively.
54 | By decomposing graphs into subgraphs of increasing importance, the algorithm is capable of more accurately capturing their underlying structure.
55 |
56 | The computational complexity of the core framework depends on the complexity of the base kernel and the degeneracy of the graphs under comparison.
57 | Given a pair of graphs :math:`G, G'` and an algorithm :math:`A` for comparing the two graphs, let :math:`\mathcal{O}_A` be the time complexity of algorithm :math:`A`.
58 | Let also :math:`\delta^*_{min} = \min \big( \delta^*(G),\delta^*(G') \big)` be the minimum of the degeneracies of the two graphs.
59 | Then, the complexity of computing the core variant of algorithm :math:`A` is :math:`\mathcal{O}_{c}=\delta^*_{min}\mathcal{O}_A`.
60 | It is well-known that the degeneracy of a graph is upper bounded by the maximum of the degrees of its vertices and by the largest eigenvalue of its adjacency matrix :math:`\lambda_1`.
61 | Since in most real-world graphs it holds that :math:`\lambda_1 \ll n`, it also holds that :math:`\delta^*_{max} \ll n`, and hence, the time complexity added by the core framework is not very high.
62 |
63 | The implementation of the core framework can be found below
64 |
65 | .. currentmodule:: grakel
66 |
67 | .. autosummary::
68 |
69 | CoreFramework
70 |
71 | Bibliography
72 | ------------
73 | .. bibliography:: graph_kernels.bib
74 | :filter: docname in docnames
75 |
--------------------------------------------------------------------------------
/doc/kernels/edge_histogram.rst:
--------------------------------------------------------------------------------
1 | .. _edge_histogram:
2 |
3 | Edge Histogram Kernel
4 | =====================
5 |
6 | The edge histogram kernel is a basic linear kernel on edge label histograms.
7 | The kernel assumes edge-labeled graphs.
8 | Let :math:`\mathcal{G}` be a collection of graphs, and assume that each of their edges comes from an abstract edge space :math:`\mathcal{E}`.
9 | Given a set of edge labels :math:`\mathcal{L}`, :math:`\ell : \mathcal{E} \rightarrow \mathcal{L}` is a function that assigns labels to the edges of the graphs.
10 | Assume that there are :math:`d` labels in total, that is :math:`d = |\mathcal{L}|`.
11 | Then, the edge label histogram of a graph :math:`G=(V,E)` is a vector :math:`\mathbf{f} = (f_1, f_2, \ldots, f_d)`, such that :math:`f_i = |\{ (v,u) \in E : \ell(v,u) = i \}|` for each :math:`i \in \mathcal{L}`.
12 | Let :math:`\mathbf{f}, \mathbf{f}'` be the edge label histograms of two graphs :math:`G, G'`, respectively.
13 | The edge histogram kernel is then defined as the linear kernel between :math:`\mathbf{f}` and :math:`\mathbf{f}'`, that is
14 |
15 | .. math::
16 |
17 | k(G, G') = \langle \mathbf{f}, \mathbf{f}' \rangle
18 |
19 | The complexity of the edge histogram kernel is linear in the number of edges of the graphs.
20 |
21 | An implementation of that kernel can be found below
22 |
23 | .. currentmodule:: grakel
24 |
25 | .. autosummary::
26 |
27 | EdgeHistogram
28 |
--------------------------------------------------------------------------------
/doc/kernels/graph_hopper.rst:
--------------------------------------------------------------------------------
1 | .. _graph_hopper:
2 |
3 | Graph Hopper Kernel
4 | ===================
5 |
6 | Given two graphs, the GraphHopper kernel compares shortest paths between pairs of vertices from the two graphs :cite:`feragen2013scalable`.
7 | The kernel takes into account both path lengths and the vertices encountered while ``hopping'' along shortest paths.
8 | The kernel is equivalent to a weighted sum of node kernels.
9 |
10 |
11 | Let :math:`G=(V,E)` be a graph.
12 | The graph contains either discrete node labels or continuous node attributes.
13 | Let :math:`\ell : \mathcal{V} \rightarrow \mathcal{L}` be a labeling function that assigns either discrete labels or continuous attributes to vertices.
14 | The kernel compares node labels/attributes using a kernel :math:`k_n` (\eg delta kernel in the case of node labels, and linear or gaussian kernel in the case of node attributes).
15 | Given two vertices :math:`v,u \in V`, a path :math:`\pi` from :math:`v` to :math:`u` in :math:`G` is defined as a sequence of vertices
16 |
17 | .. math::
18 |
19 | \pi = [v_1, v_2, v_3, \ldots, v_l]
20 |
21 | where :math:`v_1 = v`, :math:`v_l = u` and :math:`(v_i, v_{i+1}) \in E` for all :math:`i=1,\ldots,l-1`.
22 | Let :math:`\pi(i) = v_i` denote the :math:`i^{th}` vertex encountered when ``hopping'' along the path.
23 | Denote by :math:`l(\pi)` the weighted length of :math:`\pi` and by :math:`|\pi|` its discrete length, defined as the number of vertices in :math:`\pi`.
24 | The shortest path :math:`\pi_{ij}` from :math:`v_i` to :math:`v_j` is defined in terms of weighted length.
25 | The diameter :math:`\delta(G)` of :math:`G` is the maximal number of nodes in a shortest path in :math:`G`, with respect to weighted path length.
26 |
27 | The GraphHopper kernel is defined as a sum of path kernels :math:`k_p` over the families :math:`P, P'` of shortest
28 | paths in :math:`G,G'`
29 |
30 | .. math::
31 |
32 | k(G,G') = \sum_{\pi \in P} \sum_{\pi' \in P'} k_p(\pi, \pi')
33 |
34 | The path kernel :math:`k_p(\pi, \pi')` is a sum of node kernels :math:`k_n` on vertices simultaneously encountered while simultaneously hopping along paths :math:`\pi` and :math:`\pi'` of equal discrete length, that is
35 |
36 | .. math::
37 |
38 | k_p(\pi, \pi') = \begin{cases}
39 | \sum_{j=1}^{|\pi|} k_n(\pi(j), \pi'(j)), & \text{if $|\pi| = |\pi'|$},\\
40 | 0, & \text{otherwise.}
41 | \end{cases}
42 |
43 | The :math:`k(G,G')` kernel can be decomposed into a weighted sum of node kernels
44 |
45 | .. math::
46 |
47 | k(G,G') = \sum_{v \in V} \sum_{v' \in V'} w(v,v') k_n(v, v')
48 |
49 | where :math:`w(v,v')` counts the number of times :math:`v` and :math:`v'` appear at the same hop, or coordinate, :math:`i` of shortest paths :math:`\pi,\pi'` of equal discrete length :math:`|\pi| = |\pi'|`.
50 | We can decompose the weight :math:`w(v,v')` as
51 |
52 | .. math::
53 |
54 | w(v,v') = \sum_{j=1}^\delta \sum_{i=1}^\delta | \{ (\pi,\pi') : \pi(i)=v, \pi'(i)=v', |\pi|=|\pi'|=j \} | = \sum_{j=1}^\delta \sum_{i=1}^\delta [\mathbf{M_v}]_{ij} [\mathbf{M_{v'}}]_{ij}
55 |
56 | where :math:`\mathbf{M_v}` is a :math:`\delta \times \delta` matrix whose entry :math:`[\mathbf{M_v}]_{ij}` counts how many times :math:`v` appears at the :math:`i^{th}` coordinate of a shortest path in :math:`G` of discrete length :math:`j`, and :math:`\delta = \max(\delta(G), \delta(G'))`.
57 | The components of these matrices can be computed efficiently using recursive message-passing algorithms.
58 | The total complexity of computing :math:`k(G,G')` is :math:`\mathcal{O}(n^2(m + \log n + d + \delta^2))` where :math:`n` is the number of vertices, :math:`m` is the number of edges and :math:`d` is the dimensionality of the node attributes (:math:`d=1` in the case of discrete node labels).
59 |
60 | The implementation of the neighborhood hash kernel can be found below
61 |
62 | .. currentmodule:: grakel
63 |
64 | .. autosummary::
65 |
66 | GraphHopper
67 |
68 | Bibliography
69 | ------------
70 | .. bibliography:: graph_kernels.bib
71 | :filter: docname in docnames
72 |
--------------------------------------------------------------------------------
/doc/kernels/graphlet_sampling.rst:
--------------------------------------------------------------------------------
1 | .. _graphlet_sampling:
2 |
3 | Graphlet Sampling Kernel
4 | ========================
5 |
6 | The graphlet sampling kernel decomposes graphs into graphlets (i.e. small subgraphs with :math:`k` nodes
7 | where :math:`k \in \{ 3,4,5, \ldots \}`) :cite:`prvzulj2007biological` and counts matching graphlets
8 | in the input graphs. Let :math:`\mathcal{G} = \{ graphlet_1,graphlet_2, \ldots, graphlet_r\}` be the set
9 | of size-:math:`k` graphlets.
10 | Let also :math:`f_G \in \mathbb{N}^r` be a vector such that its :math:`i`-th entry is equal to the
11 | frequency of occurrence of :math:`graphlet_i` in :math:`G`, :math:`f_{G,i} = \#(graphlet_i \sqsubseteq G)`.
12 | Then, the graphlet kernel is defined as follows.
13 |
14 | Graphlet of size :math:`k` Kernel
15 | ---------------------------------
16 | Let :math:`G_i`, :math:`G_j` be two graphs of size :math:`n \geq k`, and :math:`f_{G_i}, f_{G_j}` vectors
17 | that count the occurrence of each graphlet of size :math:`k` in the two graphs.
18 | Then the graphlet kernel is defined as
19 |
20 | .. math::
21 | :nowrap:
22 |
23 | \begin{equation}
24 | k(G_i,G_j) = f_{G_i}^\top \ f_{G_j}
25 | \end{equation}
26 |
27 | As is evident from the above definition, the graphlet kernel is computed by explicit feature maps.
28 | First, the representation of each graph in the feature space is computed.
29 | And then, the kernel value is computed as the dot product of the two feature vectors.
30 | The main problem of graphlet kernel is that an exaustive enumeration of graphlets is very expensive.
31 | Since there are :math:`\binom{n}{k}` size-:math:`k` subgraphs in a graph, computing the feature vector
32 | for a graph of size :math:`n` requires :math:`\mathcal{O}(n^k)` time.
33 | To account for that, Shervashidze et al. resorted to sampling :cite:`shervashidze2009efficient`.
34 | Following Weissman et al. :cite:`weissman2003inequalities`, they showed that by sampling a fixed number
35 | of graphlets the empirical distribution of graphlets will be sufficiently close to their actual distribution
36 | in the graph.
37 |
38 | Below follows the implemented graphlet sampling kernel. By using the parameter *sampling* the user
39 | can explore various possibilities from sampling all graphlets, to sampling probabilistically based
40 | either on the number of samples or the satisfaction of a certain probabilistic on error.
41 |
42 | .. currentmodule:: grakel
43 |
44 | .. autosummary::
45 |
46 | GraphletSampling
47 |
48 | Bibliography
49 | ------------
50 | .. bibliography:: graph_kernels.bib
51 | :filter: docname in docnames
52 |
--------------------------------------------------------------------------------
/doc/kernels/hadamard_code.rst:
--------------------------------------------------------------------------------
1 | .. _hadamard_code:
2 |
3 | Hadamard Code Kernel
4 | ====================
5 |
6 | A similar framework to the neighborhood-hashing kernel and the Weisfeiler-Lehman kernel was introduced by Tetsuya Kataoka and Akihito Inokuchi in :cite:`icpram16`, known as Hadamard-code kernel.
7 | Given a collection of **labeled** graphs :math:`\mathbf{G}=[G]^{N}_{i=1}` collect the set :math:`\Sigma` of all distinct labels inside :math:`\mathbf{G}`. A :math:`2^{k}`-th Hadamard code matrix :math:`H_{2^{k}}` is defined as follows:
8 |
9 | .. math::
10 |
11 | H_{2^{k+1}}= \begin{cases}
12 | \begin{pmatrix}
13 | 1 & 1\\
14 | 1 & -1
15 | \end{pmatrix},\text{ if }k = 0
16 | \\\\
17 | \begin{pmatrix}
18 | H_{2^{k}} & H_{2^{k}}\\
19 | H_{2^{k}} & -H_{2^{k}}
20 | \end{pmatrix},\text{if } k > 0
21 | \end{cases}
22 |
23 | Now by defining a Hadamard matrix :math:`\mathbb{H} = H_{2^{\lceil \log_{2}|\Sigma|\rceil}}`, then we initially label each node inside a graph:
24 |
25 | .. math::
26 |
27 | l^{(0)}(v) = \mathtt{row}_{i}\mathbb{H},\text{ }\textbf{iff}\text{ }label(v) = \Sigma_{i}
28 |
29 | Based on this initial labeling the following relabeling rule:
30 |
31 | .. math::
32 |
33 | l^{(k+1)}(v) = l^{(k)}(v) + \sum_{u \in N(v)}l^{(k)}(u)
34 |
35 | was used. :math:`N(v)` is used to denote the neighborhood of a node :math:`v`.
36 | Following the above scheme, relabeling is applied iteratively for a fixed number of iterations, while each kernel matrix (calculated from a give *base-kernel*) between relabeled graphs is aggregated to a total one, through summation.
37 |
38 | .. figure:: ../_static/kataoka1.png
39 |
40 | An example of the relabeling procedure of the Hadamard code kernel for a single graph.
41 |
42 |
43 |
44 | The implementation of the hadamard code kernel framework can be found below. Note that use can use :code:`base_kernel` to attach as a base kernel any kernel for **labeled** graphs.
45 |
46 | .. currentmodule:: grakel
47 |
48 | .. autosummary::
49 |
50 | HadamardCode
51 |
52 | Bibliography
53 | ------------
54 | .. bibliography:: graph_kernels.bib
55 | :filter: docname in docnames
56 |
--------------------------------------------------------------------------------
/doc/kernels/kernel.rst:
--------------------------------------------------------------------------------
1 | .. _kernel:
2 |
3 | Kernel (general class)
4 | ======================
5 |
6 | In the literature, a graph kernel appears as a function: :math:`k \; : \; \mathcal{G} \times \mathcal{G} \rightarrow \mathbb{R}` for which there exists a map:
7 |
8 | .. math::
9 |
10 | \phi \; :\; \mathcal{G} \rightarrow \mathbb{H}, \text{for a Hilbert space } \mathbb{H}
11 |
12 | where each kernel value can be computed as :math:`k(G_{i}, G_{j}) = \langle \phi(G_{i}), \phi(G_{j}) \rangle` where :math:`\langle \cdot , \cdot \rangle` denotes the inner product inside this space. The emerging matrix :math:`\mathbf{K}_{ij} = k(G_{i}, G_{j})` is known as the kernel matrix. For a kernel matrix to be valid it is required to be *positive semidefinite* (i.e. :math:`\lambda_{min} \ge 0`).
13 |
14 | .. note::
15 |
16 | The kernels implemented inside this package have all a **Polynomial** time complexity.
17 |
18 | In many cases, if instead of computing the kernel for each pair of graphs, we calculate it for the whole collection of graphs :math:`[G]_{i=1}^{N}`, we have a significant computational advantage. Given two collections of graphs: :math:`G^{n}, G^{m}`, the full kernel matrix :math:`\mathcal{K}` looks like:
19 |
20 | .. math::
21 |
22 | \mathcal{K} =
23 | \left[
24 | \begin{array}{c||c}
25 | \mathcal{K}^{n\times n} & \mathcal{K}^{n\times m} \\
26 | \hline
27 | \hline
28 | \mathcal{K}^{m\times n} & \mathcal{K}^{m\times m}
29 | \end{array}
30 | \right]
31 |
32 | Any :code:`Kernel` object inherited class should match the following behavior:
33 |
34 | - :math:`\mathcal{K}^{n\times n}=\texttt{.fit_transform}(\mathcal{G}^{n})`
35 | - :math:`\mathcal{K}^{m\times n}=\texttt{.fit}(\mathcal{G}^{\text{n}}).\texttt{transform}(\mathcal{G}^{\text{m}})`
36 | - :math:`\mathcal{K}=\texttt{.fit_transform}([\mathcal{G}^{n}\; \mathcal{G}^{m}])`
37 |
38 | In graph classification, a problem tackled mainly by graph kernels, we are usually interested in calculating the matrices :math:`\mathcal{K}^{n\times n}` and :math:`\mathcal{K}^{m\times n}`.
39 |
40 | .. currentmodule:: grakel
41 |
42 | Parametrization
43 | ---------------
44 |
45 | Any :code:`Kernel` inherited object comes with three parameters:
46 |
47 | - :code:`verbose` is a :code:`bool` parameter in order for the kernel to print messages that are related to the progress of the execution.
48 | - :code:`normalize` parameter is a :code:`bool` parameter which ensures that the kernel output will be normalized, that is :math:`[\mathcal{\hat{K}}]_{ij} = \frac{[\mathcal{K}]_{ij}}{\sqrt[]{[\mathcal{K}]_{ii}*[\mathcal{K}]_{jj}}}`.
49 | - :code:`n_jobs` is an :code:`int` parameter that defines the amount of parallel jobs on which parts of the kernel calculation will be executed (if a parallelization has been implemented).
50 |
51 | .. note::
52 |
53 | In order for normalization to happen even in a framework scheme, its :code:`Kernel` should have an implemented :code:`diagonal` method.
54 |
55 | The :code:`Kernel` class as discussed above can be found below:
56 |
57 | .. autosummary::
58 |
59 | Kernel
60 |
--------------------------------------------------------------------------------
/doc/kernels/lovasz_theta.rst:
--------------------------------------------------------------------------------
1 | .. _lovasz_theta:
2 |
3 | Lovasz Theta Kernel
4 | ===================
5 |
6 | The Lovász number :math:`\vartheta(G)` of a graph :math:`G=(V,E)` is a real number that is an upper bound on the Shannon capacity of the graph.
7 | It was introduced by László Lovász in :math:`1979` :cite:`lovasz1979shannon`.
8 | The Lovász number is intimately connected with the notion of orthonormal representations of graphs.
9 | An orthonormal representation of a graph :math:`G` consists of a set of unit vectors :math:`U_G = \{ \mathbf{u}_i \in \mathbb{R}^d : || \mathbf{u}_i || = 1 \}_{i \in V}` where each vertex :math:`i` is assigned a unit vector :math:`\mathbf{u}_i` such that :math:`(i,j) \not \in E \implies \mathbf{u}_i^\top \mathbf{u}_j = 0`.
10 | Specifically, the Lovász number of a graph :math:`G` is defined as
11 |
12 | .. math::
13 |
14 | \vartheta(G) = \min_{\mathbf{c}, U_G} \max_{i \in V} \frac{1}{(\mathbf{c}^\top \mathbf{u}_i)^2}
15 |
16 | where :math:`\mathbf{c} \in \mathbb{R}^d` is a unit vector and :math:`U_G` is an orthonormal representation of :math:`G`.
17 | Geometrically, :math:`\vartheta(G)` is defined by the smallest cone enclosing a valid orthonormal representation :math:`U_G`.
18 | The Lovász number :math:`\vartheta(G)` of a graph :math:`G` can be computed to arbitrary precision in polynomial time by solving a semidefinite program.
19 |
20 | The Lovász :math:`\vartheta` kernel utilizes the orthonormal representations associated with the Lovász number to compare graphs :cite:`johansson2014global`.
21 | The kernel is applicable only to unlabeled graphs.
22 | Given a collection of graphs, it first generates orthonormal representations for the vertices of each graph by computing the Lovász :math:`\vartheta` number.
23 | Hence, :math:`U_G` is a set that contains the orthonormal representations of :math:`G`.
24 | Let :math:`S \subseteq V` be a subset of the vertex set of :math:`G`.
25 | Then, the Lovász value of the set of vertices :math:`S` is defined as
26 |
27 | .. math::
28 |
29 | \vartheta_S(G) = \min_{\mathbf{c}} \max_{i \in S} \frac{1}{(\mathbf{c}^\top \mathbf{u}_i)^2}
30 |
31 | where :math:`\mathbf{c} \in \mathbb{R}^d` is a unit vector and :math:`\mathbf{u}_i` is the representation of vertex :math:`i` obtained by computing the Lovász number :math:`\vartheta(G)` of :math:`G`.
32 | The Lovász value of a set of vertices :math:`S` represents the angle of the smallest cone enclosing the set of orthonormal representations of these vertices (\ie subset of :math:`U_G` defined as :math:`\{ \mathbf{u}_i : \mathbf{u}_i \in U_G, i \in S \}`).
33 |
34 | The Lovász :math:`vartheta` kernel between two graphs :math:`G, G'` is then defined as follows:
35 |
36 | .. math::
37 |
38 | k_{Lo}(G, G') = \sum_{S \subseteq V} \sum_{S' \subseteq V'} \delta(|S|, |S'|) \frac{1}{Z_{|S|}} k(\vartheta_S(G), \vartheta_{S'}(G'))
39 |
40 | where :math:`Z_{|S|} = \binom{|V|}{|S|} \binom{|V'|}{|S|}`, :math:`\delta(|S|, |S'|)` is a delta kernel (equal to :math:`1` if :math:`|S|=|S'|`, and :math:`0` otherwise), and :math:`k` is a positive semi-definite kernel between Lovász values (\eg linear kernel, gaussian kernel).
41 |
42 | The Lovász :math:`\vartheta` kernel consists of two main steps: (:math:`1`) computing the Lovász number :math:`\vartheta` of each graph and obtaining the associated orthonormal representations, and (:math:`2`) computing the Lovász value for all subgraphs (\ie subsets of vertices :math:`S \subseteq V`) of each graph.
43 | Exact computation of the Lovász :math:`\vartheta` kernel is in most real settings infeasible since it requires computing the minimum enclosing cones of :math:`2^n` sets of vertices.
44 |
45 | When dealing with large graphs, it is thus necessary to resort to sampling.
46 | Given a graph :math:`G`, instead of evaluating the Lovász value on all :math:`2^n` sets of vertices, the algorithm evaluates it in on a smaller number of subgraphs :math:`\mathfrak{S} \in 2^V`.
47 | Then, the Lovász :math:`\vartheta` kernel is defined as follows
48 |
49 | .. math::
50 |
51 | \hat{k}_{Lo}(G, G') = \sum_{S \subseteq \mathfrak{S}} \sum_{S' \subseteq \mathfrak{S}'} \delta(|S|, |S'|) \frac{1}{\hat{Z}_{|S|}} k(\vartheta_S(G), \vartheta_{S'}(G'))
52 |
53 | where :math:`\hat{Z}_{|S|} = |\mathfrak{S}_{|S|}| |\mathfrak{S}'_{|S|}|` and :math:`\mathfrak{S}_{|S|}` denotes the subset of :math:`\mathfrak{S}` consisting of all sets of cardinality :math:`|S|`, that is :math:`\mathfrak{S}_{|S|} = \{ B \in \mathfrak{S} : |B| = |S| \}`.
54 |
55 | The time complexity of computing :math:`\hat{k}_{Lo}(G, G')` is :math:`\mathcal{O}(n^2 m \epsilon^{-1} + s^2 T(k) + sn)` where :math:`T(k)` is the complexity of computing the base kernel :math:`k`, :math:`n = |V|`, :math:`m = |E|` and :math:`s = \max(|\mathfrak{S}|, |\mathfrak{S}'|)`.
56 | The first term represents the cost of solving the semi-definite program that computes the Lovász number :math:`\vartheta`.
57 | The second term corresponds to the worst-case complexity of computing the sum of the Lovász values.
58 | And finally, the third term is the cost of computing the Lovász values of the sampled subsets of vertices.
59 |
60 | The implementation of the Lovász :math:`\vartheta` kernel can be found below
61 |
62 | .. currentmodule:: grakel
63 |
64 | .. autosummary::
65 |
66 | LovaszTheta
67 |
68 | Bibliography
69 | ------------
70 | .. bibliography:: graph_kernels.bib
71 | :filter: docname in docnames
72 |
--------------------------------------------------------------------------------
/doc/kernels/neighborhood_subgraph_pairwise_distance.rst:
--------------------------------------------------------------------------------
1 | .. _nspdk:
2 |
3 | Neighborhood Subgraph Pairwise Distance Kernel
4 | ==============================================
5 | The neighborhood subgraph pairwise distance kernel extracts pairs of rooted subgraphs from each graph whose roots are located at a certain distance from each other, and which contain vertices up to a certain distance from the root :cite:`costa2010fast`.
6 | It then compares graphs based on these pairs of rooted subgraphs.
7 | To avoid isomorphism checking, graph invariants are employed to encode each rooted subgraph.
8 |
9 | Let :math:`G=(V,E)` be a graph.
10 | The distance between two vertices :math:`u,v \in V`, denoted :math:`D(u,v)`, is the length of the shortest path between them.
11 | The neighborhood of radius :math:`r` of a vertex :math:`v` is the set of vertices at a distance less than or equal to :math:`r` from :math:`v`, that is :math:`\{ u \in V : D(u,v) \leq r\}`.
12 | Given a subset of vertices :math:`S \subseteq V`, let :math:`E(S)` be the set of edges that have both end-points in :math:`S`.
13 | Then, the subgraph with vertex set :math:`S` and edge set :math:`E(S)` is known as the subgraph induced by :math:`S`.
14 | The neighborhood subgraph of radius :math:`r` of vertex :math:`v` is the subgraph induced by the neighborhood of radius :math:`r` of :math:`v` and is denoted by :math:`N_r^v`.
15 | Let also :math:`R_{r,d}(A_v,B_u,G)` be a relation between two rooted graphs :math:`A_v`, :math:`B_u` and a graph :math:`G=(V,E)` that is true if and only if both :math:`A_v` and :math:`B_u` are in :math:`\{N_r^v : v \in V \}`, where we require :math:`A_v, B_u` to be isomorphic to some :math:`N_r^v` to verify the set inclusion, and that :math:`D(u,v) = d`.
16 | We denote with :math:`R^{-1}(G)` the inverse relation that yields all the pairs of rooted graphs :math:`A_v`, :math:`B_u` satisfying the above constraints.
17 | Hence, :math:`R^{-1}(G)` selects all pairs of neighborhood graphs of radius :math:`r` whose roots are at distance :math:`d` in a given graph :math:`G`.
18 | The neighborhood subgraph pairwise distance kernel utilizes the following kernel
19 |
20 | .. math::
21 |
22 | k_{r,d}(G, G') = \sum_{A_v, B_v \in R_{r,d}^{-1}(G)} \quad \sum_{A'_{v'}, B'_{v'} \in R_{r,d}^{-1}(G')} \delta(A_v, A'_{v'}) \delta(B_v, B'_{v'})
23 |
24 | where :math:`\delta` is :math:`1` if its input subgraphs are isomorphic, and :math:`0` otherwise.
25 | The above kernel counts the number of identical pairs of neighboring graphs of radius :math:`r` at distance :math:`d` between two graphs.
26 | Then, the neighborhood subgraph pairwise distance kernel is defined as
27 |
28 | .. math::
29 |
30 | k(G, G') = \sum_{r=0}^{r^*} \sum_{d=0}^{d^*} \hat{k}_{r,d}(G, G')
31 |
32 | where :math:`\hat{k}_{r,d}` is a normalized version of :math:`k_{r,d}`, that is
33 |
34 | .. math::
35 |
36 | \hat{k}_{r,d}(G,G') = \frac{k_{r,d}(G,G')}{\sqrt{k_{r,d}(G,G) k_{r,d}(G',G')}}
37 |
38 | The above version ensures that relations of all orders are equally weighted regardless of the size of the induced part sets.
39 |
40 | The neighborhood subgraph pairwise distance kernel includes an exact matching kernel over two graphs (\ie the :math:`\delta` kernel) which is equivalent to solving the graph isomorphism problem.
41 | Solving the graph isomorphism problem is not feasible.
42 | Therefore, the kernel produces an approximate solution to it instead.
43 | Given a subgraph :math:`G_S` induced by the set of vertices :math:`S`, the kernel computes a graph invariant encoding for the subgraph via a label function :math:`\mathcal{L}^g : \mathcal{G} \rightarrow \Sigma^*`, where :math:`\mathcal{G}` is the set of rooted graphs and :math:`\Sigma^*` is the set of strings over a finite alphabet :math:`\Sigma`.
44 | The function :math:`\mathcal{L}^g` makes use of two other label functions: (:math:`1`) a function :math:`\mathcal{L}^n` for vertices, and (:math:`2`) a function :math:`\mathcal{L}^e` for edges.
45 | The :math:`\mathcal{L}^n` function assigns to vertex :math:`v` the concatenation of the lexicographically sorted list of distance-distance from root-label triplets :math:`\langle D(v,u), D(v,h), \mathcal{L}(u) \rangle` for all :math:`u \in S`, where :math:`h` is the root of the subgraph and :math:`\mathcal{L}` is a function that maps vertices/edges to their label symbol.
46 | Hence, the above function relabels each vertex with a string that encodes the initial label of the vertex, the vertex distance from all other labeled vertices, and the distance from the root vertex.
47 | The :math:`\mathcal{L}^e(u,v)` function assigns to edge :math:`(u,v)` the label :math:`\langle \mathcal{L}^n(u)`, :math:`\mathcal{L}^n(v)`, :math:`\mathcal{L}((u,v)) \rangle`.
48 | The :math:`\mathcal{L}^e(u,v)` function thus annotates each edge based on the new labels of its endpoints, and its initial label, if any.
49 | Finally, the function :math:`\mathcal{L}^g(G_S)` assigns to the rooted graph induced by :math:`S` the concatenation of the lexicographically sorted list of :math:`\mathcal{L}^e(u,v)` for all :math:`(u,v) \in E(S)`.
50 | The kernel then employs a hashing function from strings to natural numbers :math:`H : \Sigma^* \rightarrow \mathbb{N}` to obtain a unique identifier for each subgraph.
51 | Hence, instead of testing pairs of subgraphs for isomorphism, the kernel just checks if the subgraphs share the same identifier.
52 |
53 | The computational complexity of the neighborhood subgraph pairwise distance kernel is :math:`\mathcal{O}(|V| |S| |E(S)| \log |E(S)|)` and is dominated by the repeated computation of the graph invariant for each vertex of the graph.
54 | Since this is a constant time procedure, for small values of :math:`d^*` and :math:`r^*`, the complexity of the kernel is in practice linear in the size of the graph.
55 |
56 | The implementation of the neighborhood subgraph pairwise distance kernel can be found below
57 |
58 | .. currentmodule:: grakel
59 |
60 | .. autosummary::
61 |
62 | NeighborhoodSubgraphPairwiseDistance
63 |
64 | Bibliography
65 | ------------
66 | .. bibliography:: graph_kernels.bib
67 | :filter: docname in docnames
68 |
--------------------------------------------------------------------------------
/doc/kernels/odd_sth.rst:
--------------------------------------------------------------------------------
1 | .. _odd_sth:
2 |
3 | ODD-STh Kernel
4 | ==============
5 | The ODD-STh kernel is a kernel between labeled graphs. Its approach derives from the idea of utilizing tree-based kernels, i.e. kernels that take as input graphs that are trees. Such kernels are in general more computationally efficient as trees are constrained to interesting properties.
6 | The idea behind the ODD-STh kernel proposed in :cite:`Martino2012ATK`, has to do with decomposition of two graph to ordered DAGs and adding the kernel values between all pairs of DAGs of the original graphs as:
7 |
8 | .. math::
9 |
10 | K_{K_{DAG}}(G_{1}, G_{2}) = \sum_{\substack{D_{1} \in DD(G_{1}) \\ D_{2} \in DD(G_{2})}} K_{DAG}(D1, D2)
11 |
12 | where :math:`DD(G_{i})` corresponds to a graph decomposition of this graph and :math:`K_{DAG}` is a kernel between DAGs. As a DAG decomposition of each graph they considered the set of all directed BFS explorations starting from each node inside the graph, as follows in the picture:
13 |
14 | .. figure:: ../_static/odd_sth_1.png
15 | :scale: 100 %
16 |
17 | A simple DAG decomposition of a single graph
18 |
19 |
20 |
21 | Now in order to move from DAGs to trees each :math:`K_{DAG}` kernel was calculated as the sum of tree kernel between derived trees between each of the two DAGs:
22 |
23 | .. math::
24 |
25 | K_{DAG} = \sum_{\substack{v_{1} \in V(D_{1}) \\ v_{2} \in V(D_{2})}} C(root(T(v_{1})), root(T(v_{2})))
26 |
27 | where :math:`T()` corresponds to the tree-visits on DAGs (which preserve an essence of \textit{ordering} as found in (:cite:`Martino2012ATK`, section 5.2). An example of such tree visits follows:
28 |
29 | .. figure:: ../_static/odd_sth_2.png
30 | :scale: 100 %
31 |
32 | Ordered tree visits on a DAG decomposed from a graph
33 |
34 | :math:`C()` is a kernel between trees, where in our case it will be the **S**\ ub-\ **T**\ ree Kernel (as found in :cite:`STKernel`).
35 |
36 | .. note::
37 | Tree isomorphism can `be decided in linear time on the sum of the number of nodes and the number of edges `_
38 |
39 | For increasing the efficiency of this algorithm for the new set of DAG decomposition, known as ODD (*Ordered Dag Decomposition*), an aggregation of all the decomposition in a single DAG was proposed notated as :math:`BigDAG`. This method introduced in (:cite:`Martino2006`, MinimalDAG: Figure 2, p. 3), aggregates nodes having same labels with frequencies if they correspond to the same path on each DAG, while conserves the existence of nodes that cannot be aggregated.
40 |
41 | .. figure:: ../_static/odd_sth_3.png
42 | :scale: 100 %
43 |
44 | Construction of a :math:`BigDAG` from two DAGs
45 |
46 | Doing so allows as to replace the kernel computation:
47 |
48 | .. math::
49 |
50 | K_{K_{DAG}}(G_{1}, G_{2}) = \sum_{\substack{D_{1} \in DD(G_{1}) \\ D_{2} \in DD(G_{2})}} K_{DAG}(D1, D2)
51 |
52 | with:
53 |
54 | .. math::
55 |
56 | K_{BigDAG}(G_{1}, G_{2}) = \sum_{\substack{u_{1} \in V(BigDAG(G_{1}))\\ u_{2} \in V(BigDAG(G_{2})}} f_{u_{1}}f_{u_{2}}C(u_{1}, u_{2})
57 |
58 | where :math:`f_{u}` is the frequency counter of the node :math:`u` and :math:`C(u, v)` is the number of matching proper subtrees from :math:`u` and :math:`v`. An even more abstract idea they followed was to created a :math:`Big^{2}DAG` where all the :math:`BigDAGs` created from each graph, would be aggregated to a single one, in the same way as in trees, but instead of incrementing frequencies on common nodes a frequency vector of appended frequencies for each DAG, was constructed.
59 |
60 | .. figure:: ../_static/odd_sth_4.png
61 | :scale: 100 %
62 |
63 | Construction of a :math:`Big^{2}DAG` from two :math:`BigDAGs`
64 |
65 | In the final :math:`Big^{2}DAG` graph, the computation of the kernel matrix is all about calculating the following formula:
66 |
67 | .. math::
68 |
69 | K_{Big^{2}DAG}(G_{i}, G_{j}) = \sum_{u_{1}, u_{2} \in V(Big^{2}DAG)} F_{u_{1}}[i] * F_{u_{2}}[j] C(u_{1}, u_{2})
70 |
71 | which is equivalent to:
72 |
73 | .. math::
74 |
75 | K_{Big^{2}DAG}(G_{i}, G_{j}) = \sum_{u \in V(Big^{2}DAG)} F_{u}[i] * F_{u}[j] C(u, u)
76 |
77 | because the subtree kernel will have a match only between identical subtrees, that is:
78 |
79 | .. math::
80 |
81 | C(u_{1}, u_{2}) \not= 0 \leftrightarrow T(u_{1}) = T(u_{2})
82 |
83 | Finally in order to construct the :math:`Big^{2}DAG` each vertex would be represented by a tuple containing a unique hash (whose uniqueness has to do with the ordering) a frequency vector and a depth, which where utilized for calculating the kernel value. In order to restrict the size of the produced graphs a parameter :math:`h` was introduced which restricts the maximum depth of the BFS exploration when doing the graph decomposition.
84 |
85 | The Ordered Dag Decomposition - Sub-Tree :math:`h` (ODD-STh) kernel can be found implemented below:
86 |
87 | .. currentmodule:: grakel
88 |
89 | .. autosummary::
90 |
91 | OddSth
92 |
93 | .. note::
94 |
95 | Because the :math:`Big^{2}DAG` graph should be preserved through consequent transformations, the cost of copying it may make :code:`fit_transform` calculation between all the graphs of *train* and *test* faster than fitting on *train* graphs and transforming on *test* graphs.
96 |
97 | Bibliography
98 | ------------
99 | .. bibliography:: graph_kernels.bib
100 | :filter: docname in docnames
--------------------------------------------------------------------------------
/doc/kernels/propagation.rst:
--------------------------------------------------------------------------------
1 | .. _propagation:
2 |
3 | Propagation Kernel
4 | ======================
5 | Propagation kernels where introduced as a general framework in :cite:`neumann2015propagation`. They are based in the idea of propagating label information between nodes of the graph, based on the graph structure.
6 | A graph is considered to have **attributes** on nodes, where in the case of labels they correspond to One-Hot-Vectors of the full dictionary of labels.
7 | The totality of nodes for each graph, can be seen as a probability distribution :math:`P` of size :math:`n \times d` where :math:`n` corresponds to the number of nodes and :math:`d` to the size of attributes.
8 | After the idea of diffusion is applied in order to construct the algorithmic framework of propagation kernels.
9 | Given a transition matrix :math:`T` that is row normalized, an iterative propagation scheme will be build on the basis of the following simple substitution rule:
10 |
11 | .. math::
12 |
13 | P_{t+1} \leftarrow T P_{t}
14 |
15 | Other than a user given transition matrix, :math:`T = D^{−1}A`, was considered as default for each graph, where :math:`D = diag(\sum_{j} A_{ij})` and :math:`A` corresponds to the adjacency matrix of this graph.
16 | The general algorithm for propagation kernels is as follows:
17 |
18 | .. figure:: ../_static/marion1.png
19 |
20 | The general algorithmic scheme for propagation kernels.
21 |
22 |
23 | The kernel computation :math:`\langle \Phi, \Phi \rangle_{ij}`, at iteration :math:`t` between two graphs :math:`i, j` is equivalent with the:
24 |
25 | .. math::
26 |
27 | K(G^{(i)}_{t}, G^{(j)}_{t}) = \sum_{u \in G^{(i)}_{t}} \sum_{v \in G^{(j)}_{t}} k(u, v)
28 |
29 | where the node kernel :math:`k(u, v)` is resolved through binning.
30 | In order to bin nodes a method should be found that was both efficient and expressive.
31 | A simple hashing function was considered a bad idea as it would separate values that where much more common than others. A sense of *locality* was needed when binning in order to group similar diffusion patterns in the same bin, similar to what is shown in the following:
32 |
33 | .. figure:: ../_static/marion2.png
34 |
35 | A binning scheme between a two step label propagation.
36 |
37 | For that the technique of locally sensitive hashing [**LSH**] was utilized, which was applied to all the input graphs as shown in the following:
38 |
39 | .. figure:: ../_static/marion3.png
40 |
41 | The locally sensitive function implemented inside the kernel.
42 |
43 | Finally the following algorithm was implemented, in our case where we consider all graphs to be *fully-labeled*:
44 |
45 | .. figure:: ../_static/marion4.png
46 |
47 | The propagation kernel algorithm, which was implemented inside the package.
48 |
49 | In case we have an attributed graph the :math:`P_{0} \leftarrow \delta_{l(V)}` is replaced by :math:`P_{0} \leftarrow attr(V)` considering all the node attributes have the same dimension.
50 |
51 | Both for attributed and for labeled graphs, implementations of the propagation can be found below:
52 |
53 | .. currentmodule:: grakel
54 |
55 | .. autosummary::
56 |
57 | Propagation
58 | PropagationAttr
59 |
60 | Bibliography
61 | ------------
62 | .. bibliography:: graph_kernels.bib
63 | :filter: docname in docnames
--------------------------------------------------------------------------------
/doc/kernels/pyramid_match.rst:
--------------------------------------------------------------------------------
1 | .. _pyramid_match:
2 |
3 | Pyramid Match Kernel
4 | ====================
5 |
6 | The pyramid match kernel is a very popular algorithm in Computer Vision, and has proven useful for many applications including object recognition and image retrieval :cite:`grauman2007pyramid`, :cite:`lazebnik2006beyond`.
7 | The pyramid match graph kernel extends its applicability to graph-structured data :cite:`nikolentzos2017matching`.
8 | The kernel can handle both unlabeled graphs and graphs that contains discrete node labels.
9 |
10 | The pyramid match graph kernel first embedds the vertices of each graph into a low-dimensional vector space using the eigenvectors of the :math:`d` largest in magnitude eigenvalues of the adjacency matrix of the graph.
11 | Since the signs of these eigenvectors are arbitrary, it replaces all their components by their absolute values.
12 | Each vertex is thus a point in the :math:`d`-dimensional unit hypercube.
13 | To find an approximate correspondence between the sets of vertices of two graphs, the kernel maps these points to multi-resolution histograms, and compares the emerging histograms with a weighted histogram intersection function.
14 |
15 | Initially, the kernel partitions the feature space into regions of increasingly larger size and takes a weighted sum of the matches that occur at each level.
16 | Two points match with each other if they fall into the same region.
17 | Matches made within larger regions are weighted less than those found in smaller regions.
18 | The kernel repeatedly fits a grid with cells of increasing size to the :math:`d`-dimensional unit hypercube.
19 | Each cell is related only to a specific dimension and its size along that dimension is doubled at each iteration, while its size along the other dimensions stays constant and equal to :math:`1`.
20 | Given a sequence of levels from :math:`0` to :math:`L`, then at level :math:`l`, the :math:`d`-dimensional unit hypercube has :math:`2^l` cells along each dimension and :math:`D = 2^{l}d` cells in total.
21 | Given a pair of graphs :math:`G,G'`, let :math:`H_G^l` and :math:`H_{G'}^l` denote the histograms of :math:`G` and :math:`G'` at level :math:`l` and :math:`H_G^l(i)`, :math:`H_{G'}^l(i)`, the number of vertices of :math:`G`, :math:`G'` that lie in the :math:`i^{th}` cell.
22 | The number of points in two sets which match at level $l$ is then computed using the histogram intersection function
23 |
24 | .. math::
25 |
26 | I(H_G^l,H_{G'}^l) = \sum_{i=1}^D \min\big(H_G^l(i),H_{G'}^l(i)\big)
27 |
28 | The matches that occur at level :math:`l` also occur at levels :math:`0, \ldots, l-1`.
29 | We are interested in the number of new matches found at each level which is given by :math:`I(H_{G_1}^l,H_{G_2}^l) - I(H_{G_1}^{l+1},H_{G_2}^{l+1})` for :math:`l=0,\ldots,L-1`.
30 | The number of new matches found at each level in the pyramid is weighted according to the size of that level's cells.
31 | Matches found within smaller cells are weighted more than those made in larger cells.
32 | Specifically, the weight for level :math:`l` is set equal to :math:`\frac{1}{2^{L-l}}`.
33 | Hence, the weights are inversely proportional to the length of the side of the cells that varies in size as the levels increase.
34 | The pyramid match kernel is then defined as follows
35 |
36 | .. math::
37 |
38 | k(G,G') = I(H_G^L,H_{G'}^L) + \sum_{l=0}^{L-1} \frac{1}{2^{L-l}}\big(I(H_G^l,H_{G'}^l) - I(H_G^{l+1},H_{G'}^{l+1})\big)
39 |
40 | The complexity of the pyramid match kernel is :math:`\mathcal{O}(dnL)` where $n$ is the number of nodes of the graphs under comparison.
41 |
42 | In the case of labeled graphs, the kernel restricts matchings to occur only between vertices that share same labels.
43 | It represents each graph as a set of sets of vectors, and matches pairs of sets of two graphs corresponding to the same label using the pyramid match kernel.
44 | The emerging kernel for labeled graphs corresponds to the sum of the separate kernels
45 |
46 | .. math::
47 |
48 | k(G, G') = \sum_{i=1}^c k^i(G,G')
49 |
50 | where :math:`c` is the number of distinct labels and :math:`k^i(G_1,G_2)` is the pyramid match kernel between the sets of vertices of the two graphs which are assigned the label :math:`i`.
51 |
52 | The above kernel is implemented below
53 |
54 | .. currentmodule:: grakel
55 |
56 | .. autosummary::
57 |
58 | PyramidMatch
59 |
60 | Bibliography
61 | ------------
62 | .. bibliography:: graph_kernels.bib
63 | :filter: docname in docnames
64 |
--------------------------------------------------------------------------------
/doc/kernels/random_walk.rst:
--------------------------------------------------------------------------------
1 | .. _random_walk:
2 |
3 | .. raw:: latex
4 |
5 | \newtheorem{definition}{Definition}
6 |
7 | Random Walk Kernel
8 | ==================
9 | The most well-studied family of graph kernels is probably the *random walk kernels* which quantify the similarity between a pair of graphs based on the number of common walks in the two graphs
10 | :cite:`kashima2003marginalized`, :cite:`gartner2003graph`, :cite:`mahe2004extensions`, :cite:`borgwardt2005protein`, :cite:`vishwanathan2010graph`, :cite:`sugiyama2015halting`.
11 |
12 | Kernels belonging to this family have concentrated mainly on counting matching walks in the two input graphs. There are several variations of random walk kernels. The :math:`k`-step random walk kernel compares random walks up to length :math:`k` in the two graphs. The most widely-used kernel from this family is the geometric random walk kernel :cite:`gartner2003graph` which compares walks up to infinity assigning a weight :math:`\lambda^k` (:math:`\lambda < 1`) to walks of length :math:`k` in order to ensure convergence of the corresponding geometric series. We next give the formal definition of the geometric random walk kernel.
13 | Given two node-labeled graphs :math:`G_i=(V_i,E_i)` and :math:`G_j=(V_j,E_j)`, their direct product
14 | :math:`G_\times=(V_\times,E_\times)` is a graph with vertex set:
15 |
16 | .. math::
17 | :nowrap:
18 |
19 | \begin{equation}
20 | V_{\times} = \{(v_i,v_j) : v_i \in V_i \wedge v_j \in V_j \wedge \ell(v_i) = \ell(v_j) \}
21 | \end{equation}
22 |
23 | and edge set:
24 |
25 | .. math::
26 | :nowrap:
27 |
28 | \begin{equation}
29 | E_{\times} = \{\{(v_i,v_j),(u_i,u_j)\} : \{v_i,u_i\} \in E_i \wedge \{v_j,u_j\} \in E_j\}
30 | \end{equation}
31 |
32 | Performing a random walk on :math:`G_{\times}` is equivalent to performing a simultaneous random walk
33 | on :math:`G_i` and :math:`G_j`.
34 | The geometric random walk kernel counts common walks (of potentially infinite length) in two graphs
35 | and is defined as follows.
36 |
37 | Definition: Geometric Random Walk Kernel
38 | ----------------------------------------
39 |
40 | Let :math:`G_i` and :math:`G_j` be two graphs, let :math:`A_\times` denote the adjacency matrix of their
41 | product graph :math:`G_\times`, and let :math:`V_\times` denote the vertex set of the product
42 | graph :math:`G_\times`.
43 |
44 | Then, the geometric random walk kernel is defined as
45 |
46 | .. math::
47 | :nowrap:
48 |
49 | \begin{equation}
50 | K_{\times}^{\infty}(G_i,G_j) = \sum_{p,q=1}^{|V_{\times}|} \Big[ \sum_{l=0}^{\infty} \lambda^l A_{\times}^l \Big]_{pq} = e^T(I - \lambda A_{\times})^{-1} e
51 | \end{equation}
52 |
53 | where :math:`I` is the identity matrix, :math:`e` is the all-ones vector, and :math:`\lambda`
54 | is a positive, real-valued weight. The geometric random walk kernel converges only if
55 | :math:`\lambda < \frac{1}{\lambda_\times}` where :math:`\lambda_\times` is the largest eigenvalue of
56 | :math:`A_{\times}`.
57 |
58 | Direct computation of the geometric random walk kernel requires :math:`\mathcal{O}(n^6)` time.
59 | The computational complexity of the method severely limits its applicability to real-world applications.
60 | To account for this, Vishwanathan et al. proposed in :cite:`vishwanathan2010graph` four efficient
61 | methods to compute random walk graph kernels which generally reduce the computational complexity from
62 | :math:`\mathcal{O}(n^6)` to :math:`\mathcal{O}(n^3)`.
63 | Mahé et al. proposed in :cite:`mahe2004extensions` some other extensions of random walk kernels.
64 | Specifically, they proposed a label enrichment approach which increases specificity and in most
65 | cases also reduces computational complexity.
66 | They also employed a second order Markov random walk to deal with the problem of "tottering".
67 | Sugiyama and Borgwardt focused in :cite:`sugiyama2015halting` on a different problem of random walk
68 | kernels, a phenomenon referred to as "halting".
69 |
70 | Next follow two implementations of this kernel (one for unlabeled graphs and one for graphs with discrete node labels)
71 |
72 | .. currentmodule:: grakel
73 |
74 | .. autosummary::
75 | RandomWalk
76 | RandomWalkLabeled
77 |
78 |
79 | Bibliography
80 | ------------
81 | .. bibliography:: graph_kernels.bib
82 | :filter: docname in docnames
83 |
--------------------------------------------------------------------------------
/doc/kernels/shortest_path.rst:
--------------------------------------------------------------------------------
1 | .. _shortest_path:
2 |
3 | Shortest Path Kernel
4 | ====================
5 | The shortest-path kernel decomposes graphs into shortest paths and compares pairs of shortest paths
6 | according to their lengths and the labels of their endpoints.
7 | The first step of the shortest-path kernel is to transform the input graphs into shortest-paths graphs.
8 | Given an input graph :math:`G=(V,E)`, we create a new graph :math:`S=(V,E_s)` (i.e. its shortest-path graph).
9 | The shortest-path graph :math:`S` contains the same set of vertices as the graph from which it originates.
10 | The edge set of the former is a superset of that of the latter, since in the shortest-path graph :math:`S`,
11 | there exists an edge between all vertices which are connected by a walk in the original graph :math:`G`.
12 | To complete the transformation, we assign labels to all the edges of the shortest-path graph :math:`S`.
13 | The label of each edge is set equal to the shortest distance between its endpoints in the original graph :math:`G`.
14 |
15 | Given the above procedure for transforming a graph into a shortest-path graph, the shortest-path kernel is defined as follows.
16 |
17 | Definition: Shortest-Path Kernel
18 | --------------------------------
19 | Let :math:`G_i`, :math:`G_j` be two graphs, and :math:`S_i`, :math:`S_j` their corresponding shortest-path graphs.
20 |
21 | The shortest-path kernel is then defined on :math:`S_i=(V_i,E_i)` and :math:`S_j=(V_j,E_j)` as
22 |
23 | .. math::
24 | :nowrap:
25 |
26 | \begin{equation}
27 | k(S_i,S_j) = \sum_{e_i \in E_i} \sum_{e_j \in E_j} k_{walk}^{(1)}(e_i, e_j)
28 | \end{equation}
29 |
30 | where :math:`k_{walk}^{(1)}(e_i, e_j)` is a positive semidefinite kernel on edge walks of length :math:`1`.
31 |
32 | In labeled graphs, the :math:`k_{walk}^{(1)}(e_i, e_j)` kernel is designed to compare both the lengths
33 | of the shortest paths corresponding to edges :math:`e_i` and :math:`e_j`, and the labels of their endpoint vertices.
34 |
35 | Let :math:`e_i = \{v_i, u_i\}` and :math:`e_j = \{v_j, u_j\}`.
36 | Then, :math:`k_{walk}^{(1)}(e_i, e_j)` is usually defined as:
37 |
38 | .. math::
39 | :nowrap:
40 |
41 | \begin{equation}
42 | \begin{split}
43 | k_{walk}^{(1)}(e_i, e_j) &= k_v(\ell(v_i),\ell(v_j)) \ k_e(\ell(e_i),\ell(e_j)) \ k_v(\ell(u_i),\ell(u_j)) \\
44 | &+ k_v(\ell(v_i),\ell(u_j)) \ k_e(\ell(e_i),\ell(e_j)) \ k_v(\ell(u_i),\ell(v_j))
45 | \end{split}
46 | \end{equation}
47 |
48 | where :math:`k_v` is a kernel comparing vertex labels, and :math:`k_e` a kernel comparing shortest path lengths.
49 | Vertex labels are usually compared via a dirac kernel, while shortest path lengths may also be compared via
50 | a dirac kernel or, more rarely, via a brownian bridge kernel :cite:`borgwardt2005shortest`.
51 |
52 | In terms of runtime complexity, the shortest-path kernel is very expensive since its computation takes :math:`\mathcal{O}(n^4)` time.
53 |
54 |
55 | Two versions of this kernel can be found implemented below. The first takes as input graphs with discrete node labels and applies a speed-up technique for faster kernel calculation.
56 |
57 | .. currentmodule:: grakel
58 |
59 | .. autosummary::
60 |
61 | ShortestPath
62 | ShortestPathAttr
63 |
64 | Bibliography
65 | ------------
66 | .. bibliography:: graph_kernels.bib
67 | :filter: docname in docnames
68 |
--------------------------------------------------------------------------------
/doc/kernels/subgraph_matching.rst:
--------------------------------------------------------------------------------
1 | .. _subgraph_matching:
2 |
3 | Subgraph Matching Kernel
4 | ========================
5 |
6 | The subgraph matching kernel counts the number of matchings between subgraphs of bounded size in two graphs :cite:`kriege2012subgraph`.
7 | The kernel is very general since it can be applied to graphs that contain node labels, edge labels, node attributes or edge attributes.
8 |
9 | Let :math:`\mathcal{G}` be a set of graphs.
10 | We assume that the graphs that are contained in the set are labeled or attributed.
11 | Specifically, let :math:`\ell : \mathcal{V} \cup \mathcal{E} \rightarrow \mathcal{L}` be a labeling function that assigns either discrete labels or continuous attributes to vertices and edges.
12 | A graph isomorphism between two labeled/attributed graphs :math:`G=(V,E)` and :math:`G'=(V',E')` is a bijection :math:`\phi : V \rightarrow V'` that preserves adjacencies, \ie :math:`\forall v,u \in V : (v,u) \in E \Leftrightarrow (\phi(v), \phi(u)) \in E'`, and labels, \ie if :math:`\psi \in V \times V \rightarrow V' \times V'` is the mapping of vertex pairs implicated by the bijection :math:`\phi` such that :math:`\psi((v,u)) = (\phi(v), \phi(u))`, then, the conditions :math:`\forall v \in V : \ell(v) \equiv \ell(\phi(v))` and :math:`\forall e \in E : \ell(e) \equiv \ell(\psi(e))` must hold, where :math:`\equiv` denotes that two labels are considered equivalent.
13 |
14 | Given two graphs :math:`G=(V,E)` and :math:`G'=(V',E')`, let :math:`\mathcal{B}(G,G')` denote the set of all bijections between sets :math:`S \subseteq V` and :math:`S' \subseteq V'`, and let :math:`\lambda : \mathcal{B}(G,G') \rightarrow \mathbb{R}^+` be a weight function.
15 | The subgraph matching kernel is defined as
16 |
17 | .. math::
18 |
19 | k(G, G') = \sum_{\phi \in \mathcal{B}(G,G')} \lambda(\phi) \prod_{v \in S} \kappa_V(v, \phi(v)) \prod_{e \in S \times S} \kappa_E(e, \psi(e))
20 |
21 | where :math:`S = dom(\phi)` and :math:`\kappa_V, \kappa_E` are kernel functions defined on vertices and edges, respectively.
22 |
23 | The instance of the subgraph matching kernel that is obtained if we set the :math:`\kappa_V, \kappa_E` functions as follows
24 |
25 | .. math::
26 |
27 | \begin{split}
28 | \kappa_V(v,v') &= \begin{cases}
29 | 1, & \text{if } \ell(v) \equiv \ell(v'),\\
30 | 0, & \text{otherwise and}
31 | \end{cases}\\
32 | \kappa_E(e,e') &= \begin{cases}
33 | 1, & \text{if } e \in E \wedge e' \in E' \wedge \ell(e) \equiv \ell(e') \text{ or } e \not \in E \wedge e' \not \in E',\\
34 | 0, & \text{otherwise.}
35 | \end{cases}
36 | \end{split}
37 |
38 | is known as the common subgraph isomorphism kernel.
39 | This kernel counts the number of isomorphic subgraphs contained in two graphs.
40 |
41 | To count the number of isomorphisms between subgraphs, the kernel capitalizes on a classical result of Levi :cite:`levi1973note` which makes a connection between common subgraphs of two graphs and cliques in their product graph.
42 | More specifically, each maximum clique in the product graph is associated with a maximum common subgraph of the factor graphs.
43 | This allows someone to compute the common subgraph isomorphism kernel by enumerating the cliques of the product graph.
44 |
45 | The general subgraph matching kernel extends the theory of Levi and builds a weighted product graph to allow a more flexible scoring of bijections.
46 | Given two graphs :math:`G=(V,E)`, :math:`G'=(V',E')`, and vertex and edge kernels :math:`\kappa_V` and :math:`\kappa_E`, the weighted product graph :math:`G_P=(V_P, E_P)` of :math:`G` and :math:`G'` is defined as
47 |
48 | .. math::
49 |
50 | \begin{split}
51 | V_P &= \{ (v,v') \in V \times V' : \kappa_V(v,v') > 0 \} \\
52 | E_P &= \{ ((v,v'),(u,u')) \in V_P \times V_P : v \neq u \wedge v' \neq u' \wedge \kappa_E((v,v'),(u,u')) > 0 \} \\
53 | c(u) &= \kappa_V(v,v') \quad \forall u=(v,v') \in V_P \\
54 | c(e) &= \kappa_E((v,u),(v',u')) \quad \forall e \in E_P, \\
55 | \text{where } &e=((v,v'),(u,u'))
56 | \end{split}
57 |
58 | After creating the weighted product graph, the kernel enumerates its cliques.
59 | The kernel starts from an empty clique and extends it stepwise by all vertices preserving the clique property.
60 | Let :math:`w` be the weight of a clique :math:`C`.
61 | Whenever the clique :math:`C` is extended by a new vertex :math:`v`, the weight of the clique is updated as follows: first it is multiplied by the weight of the vertex :math:`w' = w \cdot c(v)`, and then, it is multiplied by all the edges connecting :math:`v` to a vertex in :math:`C`, that is :math:`w' = \sum_{u \in C} w \cdot c((v,u))`.
62 | The algorithm effectively avoids duplicates by removing a vertex from the candidate set after all cliques containing it have been exhaustively explored.
63 |
64 | The runtime of the subgraph matching kernel depends on the number of cliques in the product graph.
65 | The worst-case runtime complexity of the kernel when considering subgraphs of size up to :math:`k` is :math:`\mathcal{O}(kn^{k+1})`, where :math:`n=|V|+|V'|` is the sum of the number of vertices of the two graphs.
66 |
67 | The implementation of the subgraph matching kernel can be found below
68 |
69 | .. currentmodule:: grakel
70 |
71 | .. autosummary::
72 |
73 | SubgraphMatching
74 |
75 | Bibliography
76 | ------------
77 | .. bibliography:: graph_kernels.bib
78 | :filter: docname in docnames
79 |
--------------------------------------------------------------------------------
/doc/kernels/svm_theta.rst:
--------------------------------------------------------------------------------
1 | .. _svm_theta:
2 |
3 | SVM Theta Kernel
4 | ================
5 |
6 | The SVM-:math:`\vartheta` kernel is very related to the Lovász :math:`\vartheta` kernel :cite:`johansson2014global`.
7 | The Lovász :math:`\vartheta` kernel suffers from high computational complexity, and the SVM-:math:`\vartheta` kernel was developed as a more efficient alternative.
8 | Similar to the Lovász :math:`\vartheta` kernel, this kernel also assumes unlabeled graphs.
9 |
10 | Given a graph :math:`G=(V,E)` such that :math:`|V| = n`, the Lovász number of :math:`G` can be defined as
11 |
12 | .. math::
13 |
14 | \vartheta(G) = \min_{\mathbf{K} \in L} \omega(\mathbf{K})
15 |
16 | where :math:`\omega(\mathbf{K})` is the one-class SVM given by
17 |
18 | .. math:: \omega(\mathbf{K}) = \max_{\alpha_i > 0} 2\sum_{i=1}^{n} \alpha_i - \sum_{i=1}^{n} \sum_{j=1}^{n} \alpha_i \alpha_j \mathbf{K}_{ij}
19 | :label: oneclass_svm
20 |
21 |
22 | and :math:`L` is a set of positive semidefinite matrices defined as
23 |
24 | .. math::
25 |
26 | L = \{ \mathbf{K} \in S_{n}^+ : \mathbf{K}_{ii} = 1, \mathbf{K}_{ij}=0 \: \forall (i,j) \not \in E \}
27 |
28 | where :math:`S_{n}^+` is the set of all :math:`n \times n` positive semidefinite matrices.
29 |
30 | The SVM-:math:`\vartheta` kernel first computes the matrix :math:`\mathbf{K}_{LS}` which is equal to
31 |
32 | .. math::
33 |
34 | \mathbf{K}_{LS} = \frac{\mathbf{A}}{\rho} + \mathbf{I}
35 |
36 | where :math:`\mathbf{A}` is the adjacency matrix of :math:`G`, :math:`\mathbf{I}` is the :math:`n \times n` identity matrix, and :math:`\rho \geq -\lambda_n` with :math:`\lambda_n` the minimum eigenvalue of :math:`\mathbf{A}`.
37 | The matrix :math:`\mathbf{K}_{LS}` is positive semidefinite by construction and it has been shown in :cite:`jethava2013lovasz` that
38 |
39 | .. math::
40 |
41 | \omega(\mathbf{K}_{LS}) = \sum_{i=1}^n \alpha_i
42 |
43 | where :math:`\alpha_i` are the maximizers of Equation :eq:`oneclass_svm`.
44 | Furthermore, it was shown that on certain families of graphs (e.g. Erdős–Rényi random graphs), :math:`\omega(\mathbf{K}_{LS})` is with high probability a constant factor approximation to :math:`\vartheta(G)`.
45 |
46 | Then, the SVM-:math:`\vartheta` kernel is defined as follows
47 |
48 | .. math::
49 |
50 | k_{SVM}(G, G') = \sum_{S \subseteq V} \sum_{S' \subseteq V'} \delta(|S|, |S'|) \frac{1}{Z_{|S|}} k \Big(\sum_{i \in S} \alpha_i, \sum_{j \in S'} \alpha_j \Big)
51 |
52 | where :math:`Z_{|S|} = \binom{|V|}{|S|} \binom{|V'|}{|S|}`, :math:`\delta(|S|, |S'|)` is a delta kernel (equal to :math:`1` if :math:`|S|=|S'|`, and :math:`0` otherwise), and :math:`k` is a positive semi-definite kernel between real values (\eg linear kernel, gaussian kernel).
53 |
54 | The SVM-:math:`\vartheta` kernel consists of three main steps: (:math:`1`) constructing matrix :math:`\mathbf{K}_{LS}` of :math:`G` which takes :math:`\mathcal{O}(n^3)` time (:math:`2`) solving the one-class SVM problem in :math:`\mathcal{O}(n^2)` time to obtain the :math:`\alpha_i` values, and (:math:`3`) computing the sum of the :math:`\alpha_i` values for all subgraphs (\ie subsets of vertices :math:`S \subseteq V`) of each graph.
55 | Computing the above quantity for all :math:`2^n` sets of vertices is not feasible in real-world scenarios.
56 |
57 | To address the above issue, the SVM-:math:`\vartheta` kernel employs sampling schemes.
58 | Given a graph :math:`G`, the kernel samples a specific number of subgraphs :math:`\mathfrak{S} \in 2^V`.
59 | Then, the SVM-:math:`\vartheta` kernel is defined as follows
60 |
61 | .. math::
62 |
63 | \hat{k}_{SVM}(G, G') = \sum_{S \subseteq \mathfrak{S}} \sum_{S' \subseteq \mathfrak{S}'} \delta(|S|, |S'|) \frac{1}{\hat{Z}_{|S|}} \Big(\sum_{i \in S} \alpha_i, \sum_{j \in S'} \alpha_j \Big)
64 |
65 | where :math:`\hat{Z}_{|S|} = |\mathfrak{S}_{|S|}| |\mathfrak{S}'_{|S|}|` and :math:`\mathfrak{S}_{|S|}` denotes the subset of :math:`\mathfrak{S}` consisting of all sets of cardinality :math:`|S|`, that is :math:`\mathfrak{S}_{|S|} = \{ B \in \mathfrak{S} : |B| = |S| \}`.
66 |
67 | The time complexity of computing :math:`\hat{k}_{SVM}(G, G')` is :math:`\mathcal{O}(n^3 + s^2 T(k) + sn)` where :math:`T(k)` is the complexity of computing the base kernel :math:`k` and :math:`s = \max(|\mathfrak{S}|, |\mathfrak{S}'|)`.
68 | The first term represents the cost of computing :math:`\mathbf{K}_{LS}` (dominated by the eigenvalue decomposition).
69 | The second term corresponds to the worst-case complexity of comparing the sums of the :math:`\alpha_i` values.
70 | And finally, the third term is the cost of computing the sum of the :math:`\alpha_i` values for the sampled subsets of vertices.
71 |
72 | The implementation of the SVM-:math:`\vartheta` kernel can be found below
73 |
74 | .. currentmodule:: grakel
75 |
76 | .. autosummary::
77 |
78 | SvmTheta
79 |
80 | Bibliography
81 | ------------
82 | .. bibliography:: graph_kernels.bib
83 | :filter: docname in docnames
84 |
--------------------------------------------------------------------------------
/doc/kernels/vertex_histogram.rst:
--------------------------------------------------------------------------------
1 | .. _vertex_histogram:
2 |
3 | Vertex Histogram Kernel
4 | =======================
5 |
6 | The vertex histogram kernel is a basic linear kernel on vertex label histograms.
7 | The kernel assumes node-labeled graphs.
8 | Let :math:`\mathcal{G}` be a collection of graphs, and assume that each of their vertices comes from an abstract vertex space :math:`\mathcal{V}`.
9 | Given a set of node labels :math:`\mathcal{L}`, :math:`\ell : \mathcal{V} \rightarrow \mathcal{L}` is a function that assigns labels to the vertices of the graphs.
10 | Assume that there are :math:`d` labels in total, that is :math:`d = |\mathcal{L}|`.
11 | Then, the vertex label histogram of a graph :math:`G=(V,E)` is a vector :math:`\mathbf{f} = (f_1, f_2, \ldots, f_d)`, such that :math:`f_i = |\{ v \in V : \ell(v) = i \}|` for each :math:`i \in \mathcal{L}`.
12 | Let :math:`\mathbf{f}, \mathbf{f}'` be the vertex label histograms of two graphs :math:`G, G'`, respectively.
13 | The vertex histogram kernel is then defined as the linear kernel between :math:`\mathbf{f}` and :math:`\mathbf{f}'`, that is
14 |
15 | .. math::
16 |
17 | k(G, G') = \langle \mathbf{f}, \mathbf{f}' \rangle
18 |
19 | The complexity of the vertex histogram kernel is linear in the number of vertices of the graphs.
20 |
21 | An implementation of that kernel can be found below
22 |
23 | .. currentmodule:: grakel
24 |
25 | .. autosummary::
26 |
27 | VertexHistogram
28 |
--------------------------------------------------------------------------------
/doc/kernels/weisfeiler_lehman.rst:
--------------------------------------------------------------------------------
1 | .. _weisfeiler_lehman:
2 |
3 | Weisfeiler Lehman Framework
4 | ===========================
5 |
6 | This Weisfeiler Lehman framework operates on top of existing graph kernels and is inspired by the
7 | Weisfeiler-Lehman test of graph isomorphism :cite:`weisfeiler1968reduction`.
8 | The key idea of the Weisfeiler-Lehman algorithm is to replace the label of each vertex with a multiset
9 | label consisting of the original label of the vertex and the sorted set of labels of its neighbors.
10 | The resultant multiset is then compressed into a new, short label.
11 | This relabeling procedure is then repeated for :math:`h` iterations.
12 | Note that this procedure is performed simultaneously on all input graphs.
13 | Therefore, two vertices from different graphs will get identical new labels
14 | if and only if they have identical multiset labels.
15 |
16 | More formally, given a graph :math:`G=(V,E)` endowed with a labeling function :math:`\ell=\ell_0`,
17 | the Weisfeiler-Lehman graph of :math:`G` at height :math:`i` is a graph :math:`G_i=(V,E)` endowed
18 | with a labeling function :math:`\ell_i` which has emerged after :math:`i` iterations of the
19 | relabeling procedure described above.
20 |
21 | The Weisfeiler-Lehman sequence up to height :math:`h` of :math:`G` consists of the Weisfeiler-Lehman
22 | graphs of :math:`G` at heights from :math:`0` to :math:`h`, :math:`\{ G_0,G_1,\ldots,G_h\}`.
23 |
24 |
25 | Definition: Weisfeiler-Lehman Framework
26 | ---------------------------------------
27 |
28 | Let :math:`k` be any kernel for graphs, that we will call the base kernel.
29 | Then the Weisfeiler-Lehman kernel with :math:`h` iterations with the base
30 | kernel :math:`k` between two graphs :math:`G` and :math:`G'` is defined as
31 |
32 | .. math::
33 | :nowrap:
34 |
35 | \begin{equation}
36 | k_{WL}(G,G') = k(G_0,G_0') + k(G_1,G_1') + \ldots + k(G_h,G_h')
37 | \end{equation}
38 |
39 | where :math:`h` is the number of Weisfeiler-Lehman iterations, and
40 | :math:`\{ G_0,G_1,\ldots,G_h\}` and :math:`\{ G_0',G_1',\ldots,G_h'\}`
41 | are the Weisfeiler-Lehman sequences of :math:`G` and :math:`G'` respectively.
42 |
43 | From the above definition, it is clear that any graph kernel that takes into
44 | account discrete node labels can take advantage of the Weisfeiler-Lehman framework
45 | and compare graphs based on the whole Weisfeiler-Lehman sequence.
46 |
47 | The general implementation of this framework can be found here:
48 |
49 | .. currentmodule:: grakel
50 |
51 | .. autosummary::
52 |
53 | WeisfeilerLehman
54 |
55 | It should support all :code:`Kernel` objects inside its parameter :code:`base_kernel` (formulated in the correct way).
56 |
57 |
58 | Weisfeiler-Lehman Subtree Kernel
59 | --------------------------------
60 | The Weisfeiler-Lehman subtree kernel is a very popular algorithm, and is considered the state-of-the-art in graph classification.
61 | Let :math:`G`, :math:`G'` be two graphs. Define :math:`\Sigma_i \subseteq \Sigma` as the set of letters that occur as node labels
62 | at least once in :math:`G` or :math:`G'` at the end of the :math:`i^{th}` iteration of the Weisfeiler-Lehman algorithm.
63 | Let :math:`\Sigma_0` be the set of original node labels of :math:`G` and :math:`G'`.
64 | Assume all :math:`\Sigma_i` are pairwise disjoint.
65 | Without loss of generality, assume that every :math:`\Sigma_i = \{ \sigma_{i1},\ldots,\sigma_{i|\Sigma_i|} \}` is ordered.
66 | Define a map :math:`c_i : \{ G,G' \} \times \Sigma_i \rightarrow \mathbb{N}` such that :math:`c_i(G, \sigma_{ij})`
67 | is the number of occurrences of the letter :math:`\sigma_{ij}` in the graph :math:`G`.
68 |
69 | The Weisfeiler-Lehman subtree kernel on two graphs :math:`G` and :math:`G'` with :math:`h` iterations is defined as
70 |
71 | .. math::
72 | :nowrap:
73 |
74 | \begin{equation}
75 | k(G,G') = \langle \phi(G),\phi(G') \rangle
76 | \end{equation}
77 |
78 | where
79 |
80 | .. math::
81 | :nowrap:
82 |
83 | \begin{equation}
84 | \phi(G) = (c_0(G,\sigma_{01}),\ldots,c_0(G,\sigma_{0|\Sigma_0|}),\ldots,c_h(G,\sigma_{h1}),\ldots,c_h(G,\sigma_{h|\Sigma_h|}))
85 | \end{equation}
86 |
87 | and
88 |
89 | .. math::
90 | :nowrap:
91 |
92 | \begin{equation}
93 | \phi(G') = (c_0(G',\sigma_{01}),\ldots,c_0(G',\sigma_{0|\Sigma_0|}),\ldots,c_h(G',\sigma_{h1}),\ldots,c_h(G',\sigma_{h|\Sigma_h|}))
94 | \end{equation}
95 |
96 | It can be shown that the above definition is equivalent to comparing the number of shared subtrees between the two input graphs :cite:`shervashidze2011weisfeiler`.
97 | It is interesting to note that the Weisfeiler-Lehman subtree kernel exhibits an attractive computational complexity since it can be computed in :math:`\mathcal{O}(hm)` time.
98 |
99 | .. note::
100 |
101 | To create an instance of the above kernel use the :ref:`vertex_histogram` as the :code:`base_kernel`.
102 |
103 | Bibliography
104 | ------------
105 | .. bibliography:: graph_kernels.bib
106 | :filter: docname in docnames
107 |
--------------------------------------------------------------------------------
/doc/sphinxext/MANIFEST.in:
--------------------------------------------------------------------------------
1 | recursive-include tests *.py
2 | include *.txt
3 |
--------------------------------------------------------------------------------
/doc/sphinxext/github_link.py:
--------------------------------------------------------------------------------
1 | from operator import attrgetter
2 | import inspect
3 | import subprocess
4 | import os
5 | import sys
6 | from functools import partial
7 |
8 | REVISION_CMD = 'git rev-parse --short HEAD'
9 |
10 |
11 | def _get_git_revision():
12 | try:
13 | revision = subprocess.check_output(REVISION_CMD.split()).strip()
14 | except (subprocess.CalledProcessError, OSError):
15 | print('Failed to execute git to get revision')
16 | return None
17 | return revision.decode('utf-8')
18 |
19 |
20 | def _linkcode_resolve(domain, info, package, url_fmt, revision):
21 | """Determine a link to online source for a class/method/function
22 |
23 | This is called by sphinx.ext.linkcode
24 |
25 | An example with a long-untouched module that everyone has
26 | >>> _linkcode_resolve('py', {'module': 'tty',
27 | ... 'fullname': 'setraw'},
28 | ... package='tty',
29 | ... url_fmt='http://hg.python.org/cpython/file/'
30 | ... '{revision}/Lib/{package}/{path}#L{lineno}',
31 | ... revision='xxxx')
32 | 'http://hg.python.org/cpython/file/xxxx/Lib/tty/tty.py#L18'
33 | """
34 |
35 | if revision is None:
36 | return
37 | if domain not in ('py', 'pyx'):
38 | return
39 | if not info.get('module') or not info.get('fullname'):
40 | return
41 |
42 | class_name = info['fullname'].split('.')[0]
43 | if type(class_name) != str:
44 | # Python 2 only
45 | class_name = class_name.encode('utf-8')
46 | module = __import__(info['module'], fromlist=[class_name])
47 | obj = attrgetter(info['fullname'])(module)
48 |
49 | try:
50 | fn = inspect.getsourcefile(obj)
51 | except Exception:
52 | fn = None
53 | if not fn:
54 | try:
55 | fn = inspect.getsourcefile(sys.modules[obj.__module__])
56 | except Exception:
57 | fn = None
58 | if not fn:
59 | return
60 |
61 | fn = os.path.relpath(fn,
62 | start=os.path.dirname(__import__(package).__file__))
63 | try:
64 | lineno = inspect.getsourcelines(obj)[1]
65 | except Exception:
66 | lineno = ''
67 | return url_fmt.format(revision=revision, package=package,
68 | path=fn, lineno=lineno)
69 |
70 |
71 | def make_linkcode_resolve(package, url_fmt):
72 | """Returns a linkcode_resolve function for the given URL format
73 |
74 | revision is a git commit reference (hash or name)
75 |
76 | package is the name of the root module of the package
77 |
78 | url_fmt is along the lines of ('https://github.com/USER/PROJECT/'
79 | 'blob/{revision}/{package}/'
80 | '{path}#L{lineno}')
81 | """
82 | revision = _get_git_revision()
83 | return partial(_linkcode_resolve, revision=revision, package=package,
84 | url_fmt=url_fmt)
85 |
--------------------------------------------------------------------------------
/doc/sphinxext/sphinx_issues.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """A Sphinx extension for linking to your project's issue tracker.
3 |
4 | Copyright 2014 Steven Loria
5 |
6 | Permission is hereby granted, free of charge, to any person obtaining a copy
7 | of this software and associated documentation files (the "Software"), to deal
8 | in the Software without restriction, including without limitation the rights
9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 |
13 | The above copyright notice and this permission notice shall be included in
14 | all copies or substantial portions of the Software.
15 |
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 | THE SOFTWARE.
23 | """
24 |
25 | from docutils import nodes, utils
26 | from sphinx.util.nodes import split_explicit_title
27 |
28 | __version__ = '0.2.0'
29 | __author__ = 'Steven Loria'
30 | __license__ = 'MIT'
31 |
32 |
33 | def user_role(name, rawtext, text, lineno,
34 | inliner, options=None, content=None):
35 | """Sphinx role for linking to a user profile. Defaults to linking to
36 | GitHub profiles, but the profile URIS can be configured via the
37 | ``issues_user_uri`` config value.
38 |
39 | Example: ::
40 |
41 | :user:`sloria`
42 | """
43 | options = options or {}
44 | content = content or []
45 | has_explicit_title, title, target = split_explicit_title(text)
46 |
47 | target = utils.unescape(target).strip()
48 | title = utils.unescape(title).strip()
49 | config = inliner.document.settings.env.app.config
50 | if config.issues_user_uri:
51 | ref = config.issues_user_uri.format(user=target)
52 | else:
53 | ref = 'https://github.com/{0}'.format(target)
54 | if has_explicit_title:
55 | text = title
56 | else:
57 | text = '@{0}'.format(target)
58 |
59 | link = nodes.reference(text=text, refuri=ref, **options)
60 | return [link], []
61 |
62 |
63 | def _make_issue_node(issue_no, config, options=None):
64 | options = options or {}
65 | if issue_no not in ('-', '0'):
66 | if config.issues_uri:
67 | ref = config.issues_uri.format(issue=issue_no)
68 | elif config.issues_github_path:
69 | ref = 'https://github.com/{0}/issues/{1}'.format(
70 | config.issues_github_path, issue_no
71 | )
72 | issue_text = '#{0}'.format(issue_no)
73 | link = nodes.reference(text=issue_text, refuri=ref, **options)
74 | else:
75 | link = None
76 | return link
77 |
78 |
79 | def issue_role(name, rawtext, text, lineno,
80 | inliner, options=None, content=None):
81 | """Sphinx role for linking to an issue. Must have
82 | `issues_uri` or `issues_github_path` configured in ``conf.py``.
83 |
84 | Examples: ::
85 |
86 | :issue:`123`
87 | :issue:`42,45`
88 | """
89 | options = options or {}
90 | content = content or []
91 | issue_nos = [each.strip() for each in utils.unescape(text).split(',')]
92 | config = inliner.document.settings.env.app.config
93 | ret = []
94 | for i, issue_no in enumerate(issue_nos):
95 | node = _make_issue_node(issue_no, config, options=options)
96 | ret.append(node)
97 | if i != len(issue_nos) - 1:
98 | sep = nodes.raw(text=', ', format='html')
99 | ret.append(sep)
100 | return ret, []
101 |
102 |
103 | def setup(app):
104 | # Format template for issues URI
105 | # e.g. 'https://github.com/sloria/marshmallow/issues/{issue}
106 | app.add_config_value('issues_uri', default=None, rebuild='html')
107 | # Shortcut for GitHub, e.g. 'sloria/marshmallow'
108 | app.add_config_value('issues_github_path', default=None, rebuild='html')
109 | # Format template for user profile URI
110 | # e.g. 'https://github.com/{user}'
111 | app.add_config_value('issues_user_uri', default=None, rebuild='html')
112 | app.add_role('issue', issue_role)
113 | app.add_role('user', user_role)
114 |
--------------------------------------------------------------------------------
/doc/sphinxext/xref.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # Taken from: https://github.com/michaeljones/sphinx-xref/blob/master/xref.py
3 |
4 | from docutils import nodes
5 |
6 | from sphinx.util import caption_ref_re
7 |
8 | def xref( typ, rawtext, text, lineno, inliner, options={}, content=[] ):
9 |
10 | title = target = text
11 | titleistarget = True
12 | # look if explicit title and target are given with `foo ` syntax
13 | brace = text.find('<')
14 | if brace != -1:
15 | titleistarget = False
16 | m = caption_ref_re.match(text)
17 | if m:
18 | target = m.group(2)
19 | title = m.group(1)
20 | else:
21 | # fallback: everything after '<' is the target
22 | target = text[brace+1:]
23 | title = text[:brace]
24 |
25 | link = xref.links[target]
26 |
27 | if brace != -1:
28 | pnode = nodes.reference(target, title, refuri=link[1])
29 | else:
30 | pnode = nodes.reference(target, link[0], refuri=link[1])
31 |
32 | return [pnode], []
33 |
34 | def get_refs(app):
35 |
36 | xref.links = app.config.xref_links
37 |
38 | def setup(app):
39 |
40 | app.add_config_value('xref_links', {}, True)
41 | app.add_role('xref', xref)
42 | app.connect("builder-inited", get_refs)
43 |
44 |
--------------------------------------------------------------------------------
/doc/tutorials.rst:
--------------------------------------------------------------------------------
1 | .. _tutorials:
2 |
3 | Tutorials
4 | =========
5 | Here is a list of available tutorials:
6 |
7 |
8 | `Digit Classification `_
9 | ------------------------------------------------------------------------------------------------------------------------------------------------------
10 | This tutorial explores how an object recognition task can be formulated as a graph classification problem and solved using graph kernels.
11 |
12 |
13 | `Text Categorization `_
14 | ----------------------------------------------------------------------------------------------------------------------------------------------------
15 | In this tutorial, graph kernels are applied to the task of text categorization. Documents are represented as word co-occurence networks and then graph kernels are employed to classify the emerging graphs.
--------------------------------------------------------------------------------
/examples/README.txt:
--------------------------------------------------------------------------------
1 | .. _general_examples:
2 |
3 | Examples
4 | ========
5 |
6 | Examples that use graph kernels to perform various tasks (e.g., graph classification, retrieval of most similar document, etc).
7 |
--------------------------------------------------------------------------------
/examples/document_retrieval_example.py:
--------------------------------------------------------------------------------
1 | """
2 | ==============================================================================
3 | Retrieval of most similar document using the Weisfeiler-Lehman subtree kernel.
4 | ==============================================================================
5 | Script makes use of :class:`grakel.WeisfeilerLehman`, :class:`grakel.VertexHistogram`
6 | """
7 | from __future__ import print_function
8 | print(__doc__)
9 |
10 | import numpy as np
11 | import time
12 |
13 | from nltk import word_tokenize
14 | from nltk.corpus import sentence_polarity
15 |
16 | from grakel.kernels import WeisfeilerLehman, VertexHistogram
17 | from grakel import Graph
18 |
19 | sents = sentence_polarity.sents()
20 | sents = [sent for sent in sents if len(sent) > 1]
21 | n_sents = 3000
22 | sents = sents[:n_sents]
23 | print("Loaded %d sentences\n" % n_sents)
24 |
25 | print("Creating word co-occurrence networks\n")
26 | word_networks = list()
27 | for sent in sents:
28 |
29 | node_labels = dict()
30 | tokens_to_ids = dict()
31 | for token in sent:
32 | if token not in tokens_to_ids:
33 | tokens_to_ids[token] = len(tokens_to_ids)
34 | node_labels[tokens_to_ids[token]] = token
35 |
36 | edges = list()
37 | for i in range(len(sent)-1):
38 | edges.append((tokens_to_ids[sent[i]], tokens_to_ids[sent[i+1]]))
39 |
40 | word_networks.append(Graph(edges, node_labels=node_labels))
41 |
42 | query_sent_id = 54
43 | query_sent = [word_networks[query_sent_id]]
44 |
45 | # Initialize Weisfeiler-Lehman subtree kernel
46 | gk = WeisfeilerLehman(niter=2, normalize=True, base_graph_kernel=VertexHistogram)
47 |
48 | print("Computing similarities\n")
49 | t0 = time.time()
50 | gk.fit(query_sent)
51 | K = gk.transform(word_networks)
52 | print("done in %0.3fs\n" % (time.time() - t0))
53 |
54 | print("Query sentence")
55 | print("--------------")
56 | print(" ".join(sents[query_sent_id]))
57 | print()
58 | print("Most similar sentence")
59 | print("---------------------")
60 | print(" ".join(sents[np.argsort(K[:,0])[-2]]))
61 |
--------------------------------------------------------------------------------
/examples/erdos_renyi.py:
--------------------------------------------------------------------------------
1 | """
2 | ===========================================================================
3 | Graph classification on a randomly generated dataset of Erdos-Renyi graphs.
4 | ===========================================================================
5 |
6 | Script makes use of :class:`grakel.Graph` and :class:`grakel.ShortestPath`
7 | """
8 | from __future__ import print_function
9 | print(__doc__)
10 |
11 | import numpy as np
12 |
13 | from random import random
14 |
15 | from sklearn.model_selection import train_test_split
16 | from sklearn.svm import SVC
17 | from sklearn.metrics import accuracy_score
18 |
19 | from grakel import Graph
20 | from grakel.kernels import ShortestPath
21 |
22 | # Generates 3 sets of Erdos-Renyi graphs. Each edge is included in the graph with probability p
23 | # independent from every other edge. The probability p is set equal to 0.25, 0.5 and 0.75 for
24 | # the graphs of the 1st, 2nd and 3rd set, respectivery
25 | Gs = list()
26 | y = list()
27 | probs = [0.25, 0.5, 0.75]
28 | for i in range(len(probs)):
29 | for j in range(5, 35):
30 | edges = list()
31 | for n1 in range(j):
32 | for n2 in range(n1+1, j):
33 | if random() <= probs[i]:
34 | edges.append((n1, n2))
35 | edges.append((n2, n1))
36 |
37 | Gs.append(Graph(edges))
38 | y.append(i)
39 |
40 | # Splits the dataset into a training and a test set
41 | G_train, G_test, y_train, y_test = train_test_split(Gs, y, test_size=0.1, random_state=42)
42 |
43 | # Uses the shortest path kernel to generate the kernel matrices
44 | gk = ShortestPath(normalize=True, with_labels=False)
45 | K_train = gk.fit_transform(G_train)
46 | K_test = gk.transform(G_test)
47 |
48 | # Uses the SVM classifier to perform classification
49 | clf = SVC(kernel="precomputed")
50 | clf.fit(K_train, y_train)
51 | y_pred = clf.predict(K_test)
52 |
53 | # Computes and prints the classification accuracy
54 | acc = accuracy_score(y_test, y_pred)
55 | print("Accuracy:", str(round(acc*100, 2)) + "%")
56 |
--------------------------------------------------------------------------------
/examples/node_attributed_dataset.py:
--------------------------------------------------------------------------------
1 | """
2 | =======================================================================
3 | Graph classification on a dataset that contains node-attributed graphs.
4 | =======================================================================
5 |
6 | Script makes use of :class:`grakel.PropagationAttr`
7 | """
8 | from __future__ import print_function
9 | print(__doc__)
10 |
11 | import numpy as np
12 |
13 | from sklearn.model_selection import train_test_split
14 | from sklearn.svm import SVC
15 | from sklearn.metrics import accuracy_score
16 |
17 | from grakel.datasets import fetch_dataset
18 | from grakel.kernels import PropagationAttr
19 |
20 | # Loads the ENZYMES dataset
21 | ENZYMES_attr = fetch_dataset("ENZYMES", prefer_attr_nodes=True, verbose=False)
22 | G, y = ENZYMES_attr.data, ENZYMES_attr.target
23 |
24 | # Splits the dataset into a training and a test set
25 | G_train, G_test, y_train, y_test = train_test_split(G, y, test_size=0.1, random_state=42)
26 |
27 | # Uses the graphhopper kernel to generate the kernel matrices
28 | gk = PropagationAttr(normalize=True)
29 | K_train = gk.fit_transform(G_train)
30 | K_test = gk.transform(G_test)
31 |
32 | # Uses the SVM classifier to perform classification
33 | clf = SVC(kernel="precomputed")
34 | clf.fit(K_train, y_train)
35 | y_pred = clf.predict(K_test)
36 |
37 | # Computes and prints the classification accuracy
38 | acc = accuracy_score(y_test, y_pred)
39 | print("Accuracy:", str(round(acc*100, 2)) + "%")
40 |
--------------------------------------------------------------------------------
/examples/nx_to_grakel.py:
--------------------------------------------------------------------------------
1 | """
2 | =========================================================
3 | Example of transforming NetworkX graphs to GraKeL graphs.
4 | =========================================================
5 | """
6 | from __future__ import print_function
7 | print(__doc__)
8 |
9 | import numpy as np
10 | import networkx as nx
11 |
12 | from grakel.utils import graph_from_networkx
13 |
14 | # Creates a list of two simple graphs
15 | G1 = nx.Graph()
16 | G1.add_nodes_from([0,1,2])
17 | G1.add_edges_from([(0,1), (1,2)])
18 |
19 | G2 = nx.Graph()
20 | G2.add_nodes_from([0,1,2])
21 | G2.add_edges_from([(0,1), (0,2), (1,2)])
22 |
23 | G_nx = [G1, G2]
24 |
25 | # Transforms list of NetworkX graphs into a list of GraKeL graphs
26 | G = graph_from_networkx(G_nx)
27 | print("1 - Simple graphs transformed\n")
28 |
29 |
30 | # Creates a list of two node-labeled graphs
31 | G1 = nx.Graph()
32 | G1.add_nodes_from([0,1,2])
33 | G1.add_edges_from([(0,1), (1,2)])
34 | nx.set_node_attributes(G1, {0:'a', 1:'b', 2:'a'}, 'label')
35 |
36 | G2 = nx.Graph()
37 | G2.add_nodes_from([0,1,2])
38 | G2.add_edges_from([(0,1), (0,2), (1,2)])
39 | nx.set_node_attributes(G2, {0:'a', 1:'b', 2:'c'}, 'label')
40 |
41 | G_nx = [G1, G2]
42 |
43 | # Transforms list of NetworkX graphs into a list of GraKeL graphs
44 | G = graph_from_networkx(G_nx, node_labels_tag='label')
45 | print("2 - Node-labeled graphs transformed\n")
46 |
47 |
48 | # Creates a list of two node-attributed graphs
49 | G1 = nx.Graph()
50 | G1.add_nodes_from([0,1,2])
51 | G1.add_edges_from([(0,1), (1,2)])
52 | nx.set_node_attributes(G1, {0:np.array([1.1, 0.8]),
53 | 1:np.array([0.2, -0.3]), 2:np.array([0.9, 1.0])}, 'attributes')
54 |
55 | G2 = nx.Graph()
56 | G2.add_nodes_from([0,1,2])
57 | G2.add_edges_from([(0,1), (0,2), (1,2)])
58 | nx.set_node_attributes(G2, {0:np.array([1.8, 0.5]),
59 | 1:np.array([-0.1, 0.2]), 2:np.array([2.3, 1.2])}, 'attributes')
60 |
61 | G_nx = [G1, G2]
62 |
63 | # Transforms list of NetworkX graphs into a list of GraKeL graphs
64 | G = graph_from_networkx(G_nx, node_labels_tag='attributes')
65 | print("3 - Node-attributed graphs transformed")
--------------------------------------------------------------------------------
/examples/optimizing_hyperparameters.py:
--------------------------------------------------------------------------------
1 | """
2 | ===================================================================================
3 | Performing cross-validation n times, optimizing SVM's and kernel's hyperparameters.
4 | ===================================================================================
5 |
6 | Script makes use of :class:`grakel.WeisfeilerLehman`, :class:`grakel.VertexHistogram`
7 | """
8 | from __future__ import print_function
9 | print(__doc__)
10 |
11 | import numpy as np
12 |
13 | from grakel.datasets import fetch_dataset
14 | from grakel.utils import cross_validate_Kfold_SVM
15 | from grakel.kernels import WeisfeilerLehman, VertexHistogram
16 |
17 | # Loads the MUTAG dataset
18 | MUTAG = fetch_dataset("MUTAG", verbose=False)
19 | G, y = MUTAG.data, MUTAG.target
20 |
21 | # Generates a list of kernel matrices using the Weisfeiler-Lehman subtree kernel
22 | # Each kernel matrix is generated by setting the number of iterations of the
23 | # kernel to a different value (from 2 to 7)
24 | Ks = list()
25 | for i in range(1, 7):
26 | gk = WeisfeilerLehman(n_iter=i, base_graph_kernel=VertexHistogram, normalize=True)
27 | K = gk.fit_transform(G)
28 | Ks.append(K)
29 |
30 |
31 | # Performs 10-fold cross-validation over different kernels and the parameter C of
32 | # SVM and repeats the experiment 10 times with different folds
33 | accs = cross_validate_Kfold_SVM([Ks], y, n_iter=10)
34 | print("Average accuracy:", str(round(np.mean(accs[0])*100, 2)) + "%")
35 | print("Standard deviation:", str(round(np.std(accs[0])*100, 2)) + "%")
--------------------------------------------------------------------------------
/examples/plot_pipeline_example.py:
--------------------------------------------------------------------------------
1 | """
2 | ====================================================
3 | Example of building a graph classification pipeline.
4 | ====================================================
5 |
6 | Script makes use of :class:`grakel.ShortestPath`
7 | """
8 | from __future__ import print_function
9 | print(__doc__)
10 |
11 | import numpy as np
12 |
13 | from sklearn.svm import SVC
14 | from sklearn.model_selection import GridSearchCV
15 | from sklearn.model_selection import cross_val_predict
16 | from sklearn.pipeline import make_pipeline
17 | from sklearn.metrics import accuracy_score
18 |
19 | from grakel.datasets import fetch_dataset
20 | from grakel.kernels import ShortestPath
21 |
22 | # Loads the Mutag dataset from:
23 | MUTAG = fetch_dataset("MUTAG", verbose=False)
24 | G, y = MUTAG.data, MUTAG.target
25 |
26 | # Values of C parameter of SVM
27 | C_grid = (10. ** np.arange(-4,6,1) / len(G)).tolist()
28 |
29 | # Creates pipeline
30 | estimator = make_pipeline(
31 | ShortestPath(normalize=True),
32 | GridSearchCV(SVC(kernel='precomputed'), dict(C=C_grid),
33 | scoring='accuracy', cv=10))
34 |
35 | # Performs cross-validation and computes accuracy
36 | n_folds = 10
37 | acc = accuracy_score(y, cross_val_predict(estimator, G, y, cv=n_folds))
38 | print("Accuracy:", str(round(acc*100, 2)) + "%")
--------------------------------------------------------------------------------
/examples/shortest_path.py:
--------------------------------------------------------------------------------
1 | """
2 | =============================================================
3 | Graph classification on MUTAG using the shortest path kernel.
4 | =============================================================
5 |
6 | Script makes use of :class:`grakel.ShortestPath`
7 | """
8 | from __future__ import print_function
9 | print(__doc__)
10 |
11 | import numpy as np
12 |
13 | from sklearn.model_selection import train_test_split
14 | from sklearn.svm import SVC
15 | from sklearn.metrics import accuracy_score
16 |
17 | from grakel.datasets import fetch_dataset
18 | from grakel.kernels import ShortestPath
19 |
20 | # Loads the MUTAG dataset
21 | MUTAG = fetch_dataset("MUTAG", verbose=False)
22 | G, y = MUTAG.data, MUTAG.target
23 |
24 | # Splits the dataset into a training and a test set
25 | G_train, G_test, y_train, y_test = train_test_split(G, y, test_size=0.1, random_state=42)
26 |
27 | # Uses the shortest path kernel to generate the kernel matrices
28 | gk = ShortestPath(normalize=True)
29 | K_train = gk.fit_transform(G_train)
30 | K_test = gk.transform(G_test)
31 |
32 | # Uses the SVM classifier to perform classification
33 | clf = SVC(kernel="precomputed")
34 | clf.fit(K_train, y_train)
35 | y_pred = clf.predict(K_test)
36 |
37 | # Computes and prints the classification accuracy
38 | acc = accuracy_score(y_test, y_pred)
39 | print("Accuracy:", str(round(acc*100, 2)) + "%")
40 |
--------------------------------------------------------------------------------
/examples/weisfeiler_lehman_subtree.py:
--------------------------------------------------------------------------------
1 | """
2 | =========================================================================
3 | Graph classification on MUTAG using the Weisfeiler-Lehman subtree kernel.
4 | =========================================================================
5 |
6 | Script makes use of :class:`grakel.WeisfeilerLehman`, :class:`grakel.VertexHistogram`
7 | """
8 | from __future__ import print_function
9 | print(__doc__)
10 |
11 | import numpy as np
12 |
13 | from sklearn.model_selection import train_test_split
14 | from sklearn.svm import SVC
15 | from sklearn.metrics import accuracy_score
16 |
17 | from grakel.datasets import fetch_dataset
18 | from grakel.kernels import WeisfeilerLehman, VertexHistogram
19 |
20 | # Loads the MUTAG dataset
21 | MUTAG = fetch_dataset("MUTAG", verbose=False)
22 | G, y = MUTAG.data, MUTAG.target
23 |
24 | # Splits the dataset into a training and a test set
25 | G_train, G_test, y_train, y_test = train_test_split(G, y, test_size=0.1, random_state=42)
26 |
27 | # Uses the Weisfeiler-Lehman subtree kernel to generate the kernel matrices
28 | gk = WeisfeilerLehman(n_iter=4, base_graph_kernel=VertexHistogram, normalize=True)
29 | K_train = gk.fit_transform(G_train)
30 | K_test = gk.transform(G_test)
31 |
32 | # Uses the SVM classifier to perform classification
33 | clf = SVC(kernel="precomputed")
34 | clf.fit(K_train, y_train)
35 | y_pred = clf.predict(K_test)
36 |
37 | # Computes and prints the classification accuracy
38 | acc = accuracy_score(y_test, y_pred)
39 | print("Accuracy:", str(round(acc*100, 2)) + "%")
40 |
--------------------------------------------------------------------------------
/git:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ysig/GraKeL/6a9cebf185aa4e71b1e05f8d4e91edf8ce818aeb/git
--------------------------------------------------------------------------------
/grakel/__init__.py:
--------------------------------------------------------------------------------
1 | """Init file for the whole grakel project."""
2 | from grakel import datasets
3 |
4 | from grakel.graph import Graph
5 |
6 | from grakel.graph_kernels import GraphKernel
7 |
8 |
9 | from grakel.kernels import Kernel
10 |
11 | from grakel.kernels import GraphletSampling
12 | from grakel.kernels import RandomWalk
13 | from grakel.kernels import RandomWalkLabeled
14 | from grakel.kernels import ShortestPath
15 | from grakel.kernels import ShortestPathAttr
16 | from grakel.kernels import WeisfeilerLehman
17 | from grakel.kernels import NeighborhoodHash
18 | from grakel.kernels import PyramidMatch
19 | from grakel.kernels import SubgraphMatching
20 | from grakel.kernels import NeighborhoodSubgraphPairwiseDistance
21 | from grakel.kernels import LovaszTheta
22 | from grakel.kernels import SvmTheta
23 | from grakel.kernels import OddSth
24 | from grakel.kernels import Propagation
25 | from grakel.kernels import PropagationAttr
26 | from grakel.kernels import HadamardCode
27 | from grakel.kernels import MultiscaleLaplacian
28 | from grakel.kernels import VertexHistogram
29 | from grakel.kernels import EdgeHistogram
30 | from grakel.kernels import GraphHopper
31 | from grakel.kernels import CoreFramework
32 | from grakel.kernels import WeisfeilerLehmanOptimalAssignment
33 |
34 | from grakel.utils import KMTransformer
35 | from grakel.utils import cross_validate_Kfold_SVM
36 | from grakel.utils import graph_from_networkx
37 | from grakel.utils import graph_from_pandas
38 | from grakel.utils import graph_from_csv
39 | from grakel.utils import graph_from_torch_geometric
40 |
41 | __all__ = [
42 | "datasets",
43 | "GraphKernel",
44 | "Graph",
45 | "Kernel",
46 | "GraphletSampling",
47 | "RandomWalk",
48 | "RandomWalkLabeled",
49 | "ShortestPath",
50 | "ShortestPathAttr",
51 | "WeisfeilerLehman",
52 | "NeighborhoodHash",
53 | "PyramidMatch",
54 | "SubgraphMatching",
55 | "NeighborhoodSubgraphPairwiseDistance",
56 | "LovaszTheta",
57 | "SvmTheta",
58 | "OddSth",
59 | "Propagation",
60 | "PropagationAttr",
61 | "HadamardCode",
62 | "MultiscaleLaplacian",
63 | "VertexHistogram",
64 | "EdgeHistogram",
65 | "GraphHopper",
66 | "CoreFramework",
67 | "WeisfeilerLehmanOptimalAssignment",
68 | "graph_from_networkx",
69 | "graph_from_pandas",
70 | "graph_from_csv",
71 | "graph_from_torch_geometric",
72 | "KMTransformer",
73 | "cross_validate_Kfold_SVM"
74 | ]
75 |
76 | # Generic release markers:
77 | # X.Y
78 | # X.Y.Z # For bugfix releases
79 | #
80 | # Admissible pre-release markers:
81 | # X.YaN # Alpha release
82 | # X.YbN # Beta release
83 | # X.YrcN # Release Candidate
84 | # X.Y # Final release
85 | #
86 | # Dev branch marker is: 'X.Y.dev' or 'X.Y.devN' where N is an integer.
87 | # 'X.Y.dev0' is the canonical version of 'X.Y.dev'
88 | #
89 | __version__ = '0.1.8'
90 |
--------------------------------------------------------------------------------
/grakel/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | """Import datasets related with graph kernels, from a large collection."""
2 | from grakel.datasets.base import fetch_dataset
3 | from grakel.datasets.base import get_dataset_info
4 | from grakel.datasets.testing import generate_dataset
5 |
6 | __all__ = [
7 | "get_dataset_info",
8 | "fetch_dataset",
9 | "generate_dataset"
10 | ]
11 |
--------------------------------------------------------------------------------
/grakel/kernels/__init__.py:
--------------------------------------------------------------------------------
1 | """__init__ file for kernel sub-module of grakel."""
2 | # Author: Ioannis Siglidis
3 | # License: BSD 3 clause
4 | from grakel.kernels.kernel import Kernel
5 |
6 | from grakel.kernels.graphlet_sampling import GraphletSampling
7 | from grakel.kernels.random_walk import RandomWalk
8 | from grakel.kernels.random_walk import RandomWalkLabeled
9 | from grakel.kernels.shortest_path import ShortestPath
10 | from grakel.kernels.shortest_path import ShortestPathAttr
11 | from grakel.kernels.weisfeiler_lehman import WeisfeilerLehman
12 | from grakel.kernels.neighborhood_hash import NeighborhoodHash
13 | from grakel.kernels.pyramid_match import PyramidMatch
14 | from grakel.kernels.subgraph_matching import SubgraphMatching
15 | from grakel.kernels.neighborhood_subgraph_pairwise_distance import \
16 | NeighborhoodSubgraphPairwiseDistance
17 | from grakel.kernels.lovasz_theta import LovaszTheta
18 | from grakel.kernels.svm_theta import SvmTheta
19 | from grakel.kernels.odd_sth import OddSth
20 | from grakel.kernels.propagation import Propagation
21 | from grakel.kernels.propagation import PropagationAttr
22 | from grakel.kernels.hadamard_code import HadamardCode
23 | from grakel.kernels.multiscale_laplacian import MultiscaleLaplacian
24 | from grakel.kernels.vertex_histogram import VertexHistogram
25 | from grakel.kernels.edge_histogram import EdgeHistogram
26 | from grakel.kernels.graph_hopper import GraphHopper
27 | from grakel.kernels.core_framework import CoreFramework
28 | from grakel.kernels.weisfeiler_lehman_optimal_assignment import WeisfeilerLehmanOptimalAssignment
29 |
30 | __all__ = [
31 | "default_executor",
32 | "Kernel",
33 | "GraphletSampling",
34 | "RandomWalk",
35 | "RandomWalkLabeled",
36 | "ShortestPath",
37 | "ShortestPathAttr",
38 | "WeisfeilerLehman",
39 | "NeighborhoodHash",
40 | "PyramidMatch",
41 | "SubgraphMatching",
42 | "NeighborhoodSubgraphPairwiseDistance",
43 | "LovaszTheta",
44 | "SvmTheta",
45 | "OddSth",
46 | "Propagation",
47 | "PropagationAttr",
48 | "HadamardCode",
49 | "MultiscaleLaplacian",
50 | "VertexHistogram",
51 | "EdgeHistogram",
52 | "GraphHopper",
53 | "CoreFramework",
54 | "WeisfeilerLehmanOptimalAssignment"
55 | ]
56 |
--------------------------------------------------------------------------------
/grakel/kernels/_c_functions/__init__.pyx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ysig/GraKeL/6a9cebf185aa4e71b1e05f8d4e91edf8ce818aeb/grakel/kernels/_c_functions/__init__.pyx
--------------------------------------------------------------------------------
/grakel/kernels/_c_functions/header.pxd:
--------------------------------------------------------------------------------
1 | cimport cython
2 | cdef extern from "include/functions.hpp":
3 | unsigned int ArashPartov(const char* str, unsigned int length)
4 | void sm_core_init(double value, int* d, int nv, int kappa, double *cost_vertices, double **cost_edges, double *total_value)
5 |
--------------------------------------------------------------------------------
/grakel/kernels/_c_functions/include/functions.hpp:
--------------------------------------------------------------------------------
1 | #ifndef FUNCTIONS_H_
2 | #define FUNCTIONS_H_
3 |
4 | unsigned int ArashPartov(const char* str, unsigned int length);
5 | void sm_core_init(double value, int* d, int nv, int kappa, double *cost_vertices, double **cost_edges, double *total_value);
6 |
7 | #endif
--------------------------------------------------------------------------------
/grakel/kernels/_c_functions/src/ArashPartov.cpp:
--------------------------------------------------------------------------------
1 | /* Arash Partov Function
2 | * Author: Ioannis Siglidis
3 | * License: BSD 3 clause"
4 | * Code taken from: http://www.partow.net/programming/hashfunctions/#APHashFunction
5 | */
6 | #include "../include/functions.hpp"
7 |
8 | unsigned int ArashPartov(const char* str, unsigned int length)
9 | {
10 | unsigned int hash = 0xAAAAAAAA;
11 | unsigned int i = 0;
12 |
13 | for (i = 0; i < length; ++str, ++i)
14 | {
15 | hash ^= ((i & 1) == 0) ? ( (hash << 7) ^ (*str) * (hash >> 3)) :
16 | (~((hash << 11) + ((*str) ^ (hash >> 5))));
17 | }
18 |
19 | return hash;
20 | }
21 |
--------------------------------------------------------------------------------
/grakel/kernels/_c_functions/src/sm_core.cpp:
--------------------------------------------------------------------------------
1 | /* Subgraph Matching Kernel (Supplementary Functions)
2 | * Author: Ioannis Siglidis
3 | * License: BSD 3 clause"
4 | * Code taken from: http://www.partow.net/programming/hashfunctions/#APHashFunction
5 | */
6 | #include "../include/functions.hpp"
7 | #include
8 | #include
9 | #include
10 |
11 | using namespace std;
12 |
13 | double *cv;
14 | double **ce;
15 | double *totalValue;
16 | unsigned int k;
17 |
18 | void sm_core(double value, list c, list p, int* d, int lBound, int uBound) {
19 |
20 | while (!p.empty()) {
21 | int i = p.front();
22 | p.pop_front();
23 |
24 | double nValue = value * cv[i];
25 | double* iEdgeValue = ce[i];
26 | for (list::const_iterator it = c.begin(); it != c.end(); it++) {
27 | nValue *= abs(iEdgeValue[*it]);
28 | }
29 |
30 | totalValue[c.size()] += nValue;
31 |
32 | if (c.size()+1 < k) {
33 | c.push_back(i);
34 |
35 | // prepare candidate set for recursive call
36 | list newP;
37 | for (list::const_iterator it = p.begin(); it != p.end(); it++) {
38 | int v = *it;
39 | if (iEdgeValue[v] != 0)
40 | newP.push_back(v);
41 | }
42 |
43 | int newUBound = uBound;
44 | int newLBound = lBound;
45 | if (lBound <= uBound) {
46 | int tmp;
47 | while (iEdgeValue[d[newUBound]] == 0 && --newUBound > lBound);
48 | while (iEdgeValue[d[newLBound]] == 0 && ++newLBound < newUBound);
49 | int nm = newLBound-1;
50 | while (++nm <= newUBound) {
51 | if (iEdgeValue[d[nm]] < 0) continue;
52 | if (iEdgeValue[d[nm]] > 0)
53 | newP.push_back(d[nm]);
54 | // swap
55 | tmp = d[newLBound];
56 | d[newLBound] = d[nm];
57 | d[nm] = tmp;
58 | newLBound++;
59 | }
60 | }
61 |
62 | sm_core(nValue, c, newP, d, newLBound, newUBound);
63 | c.pop_back();
64 | }
65 | }
66 | }
67 |
68 |
69 | void sm_core_init(double value, int* d, int nv, int kappa, double *cost_vertices, double **cost_edges, double *total_value) {
70 |
71 | cv = cost_vertices;
72 | ce = cost_edges;
73 | totalValue = total_value;
74 | k = (unsigned int) kappa;
75 |
76 | list c;
77 | int lBound = 0;
78 | int uBound = nv-1;
79 |
80 | for (int it=lBound; it<=uBound; it++) {
81 | int i = d[it];
82 | double nValue = value * cv[i];
83 |
84 | totalValue[0] += nValue;
85 |
86 | if (k > 1) {
87 | c.push_back(i);
88 |
89 | // prepare candidate set for recursive call
90 | list p;
91 | int tmp;
92 | int newUBound = uBound;
93 | double *iEdgeValue = ce[i];
94 | while (iEdgeValue[d[newUBound]] == 0 && --newUBound > it);
95 | int newLBound = it;
96 | while (++newLBound <= newUBound && iEdgeValue[d[newLBound]] == 0);
97 | int nm = newLBound-1;
98 | while (++nm <= newUBound) {
99 | if (iEdgeValue[d[nm]] < 0) continue;
100 | if (iEdgeValue[d[nm]] > 0)
101 | p.push_back(d[nm]);
102 | // swap
103 | tmp = d[newLBound];
104 | d[newLBound] = d[nm];
105 | d[nm] = tmp;
106 | newLBound++;
107 | }
108 |
109 | sm_core(nValue, c, p, d, newLBound, newUBound);
110 | c.pop_back();
111 | }
112 | }
113 | }
114 |
--------------------------------------------------------------------------------
/grakel/kernels/_isomorphism/__init__.py:
--------------------------------------------------------------------------------
1 | """Init file for the _isomorphism submodule project."""
2 | # Author: Ioannis Siglidis
3 | # This file is a modification and extension of the [GNU LPGL] licensed
4 | # PyBliss which can be found at: http://www.tcs.hut.fi/Software/bliss/
5 | # PyBliss and Bliss are copyright of their respective owners.
6 | # License: BSD 3 clause"
7 | from grakel.kernels._isomorphism.bliss import Graph
8 |
9 | __all__ = [
10 | "Graph",
11 | ]
12 |
--------------------------------------------------------------------------------
/grakel/kernels/_isomorphism/bliss-0.50/bignum.hh:
--------------------------------------------------------------------------------
1 | #ifndef BLISS_BIGNUM_HH
2 | #define BLISS_BIGNUM_HH
3 |
4 | /*
5 | * Copyright (c) Tommi Junttila
6 | * Released under the GNU General Public License version 2.
7 | */
8 |
9 | #if defined(BLISS_USE_GMP)
10 | #include
11 | #endif
12 |
13 | #include
14 | #include
15 | #include "defs.hh"
16 |
17 | namespace bliss {
18 |
19 | /**
20 | * \brief A very simple class for big integers (or approximation of them).
21 | *
22 | * If the compile time flag BLISS_USE_GMP is set,
23 | * then the GNU Multiple Precision Arithmetic library (GMP) is used to
24 | * obtain arbitrary precision, otherwise "long double" is used to
25 | * approximate big integers.
26 | */
27 |
28 |
29 | #if defined(BLISS_USE_GMP)
30 |
31 |
32 | class BigNum
33 | {
34 | mpz_t v;
35 | public:
36 | /**
37 | * Create a new big number and set it to zero.
38 | */
39 | BigNum() {mpz_init(v); }
40 |
41 | /**
42 | * Destroy the number.
43 | */
44 | ~BigNum() {mpz_clear(v); }
45 |
46 | /**
47 | * Set the number to 'n'.
48 | */
49 | void assign(const int n) {mpz_set_si(v, n); }
50 |
51 | /**
52 | * Multiply the number with 'n'.
53 | */
54 | void multiply(const int n) {mpz_mul_si(v, v, n); }
55 |
56 | /**
57 | * Print the number in the file stream 'fp'.
58 | */
59 | int print(FILE *fp) {return mpz_out_str(fp, 10, v); }
60 | };
61 |
62 | #else
63 |
64 | class BigNum
65 | {
66 | long double v;
67 | public:
68 | /**
69 | * Create a new big number and set it to zero.
70 | */
71 | BigNum(): v(0.0) {}
72 |
73 | /**
74 | * Set the number to 'n'.
75 | */
76 | void assign(const int n) {v = (long double)n; }
77 |
78 | /**
79 | * Multiply the number with 'n'.
80 | */
81 | void multiply(const int n) {v *= (long double)n; }
82 |
83 | /**
84 | * Print the number in the file stream 'fp'.
85 | */
86 | int print(FILE *fp) {return fprintf(fp, "%Lg", v); }
87 | };
88 |
89 | #endif
90 |
91 | } //namespace bliss
92 |
93 | #endif
94 |
--------------------------------------------------------------------------------
/grakel/kernels/_isomorphism/bliss-0.50/bliss_C.cc:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include "graph.hh"
5 | extern "C" {
6 | #include "bliss_C.h"
7 | }
8 |
9 | struct bliss_graph_struct {
10 | bliss::Graph *g;
11 | };
12 |
13 | extern "C"
14 | BlissGraph *bliss_new(const unsigned int n)
15 | {
16 | BlissGraph *graph = new bliss_graph_struct;
17 | assert(graph);
18 | graph->g = new bliss::Graph(n);
19 | assert(graph->g);
20 | return graph;
21 | }
22 |
23 | extern "C"
24 | BlissGraph *bliss_read_dimacs(FILE *fp)
25 | {
26 | bliss::Graph *g = bliss::Graph::read_dimacs(fp);
27 | if(!g)
28 | return 0;
29 | BlissGraph *graph = new bliss_graph_struct;
30 | assert(graph);
31 | graph->g = g;
32 | return graph;
33 | }
34 |
35 | extern "C"
36 | void bliss_write_dimacs(BlissGraph *graph, FILE *fp)
37 | {
38 | assert(graph);
39 | assert(graph->g);
40 | graph->g->write_dimacs(fp);
41 | }
42 |
43 | extern "C"
44 | void bliss_release(BlissGraph *graph)
45 | {
46 | assert(graph);
47 | assert(graph->g);
48 | delete graph->g; graph->g = 0;
49 | delete graph;
50 | }
51 |
52 | extern "C"
53 | void bliss_write_dot(BlissGraph *graph, FILE *fp)
54 | {
55 | assert(graph);
56 | assert(graph->g);
57 | graph->g->write_dot(fp);
58 | }
59 |
60 | extern "C"
61 | unsigned int bliss_get_nof_vertices(BlissGraph *graph)
62 | {
63 | assert(graph);
64 | assert(graph->g);
65 | return graph->g->get_nof_vertices();
66 | }
67 |
68 | extern "C"
69 | unsigned int bliss_add_vertex(BlissGraph *graph, unsigned int l)
70 | {
71 | assert(graph);
72 | assert(graph->g);
73 | return graph->g->add_vertex(l);
74 | }
75 |
76 | extern "C"
77 | void bliss_add_edge(BlissGraph *graph, unsigned int v1, unsigned int v2)
78 | {
79 | assert(graph);
80 | assert(graph->g);
81 | graph->g->add_edge(v1, v2);
82 | }
83 |
84 | extern "C"
85 | int bliss_cmp(BlissGraph *graph1, BlissGraph *graph2)
86 | {
87 | assert(graph1);
88 | assert(graph1->g);
89 | assert(graph2);
90 | assert(graph2->g);
91 | return graph1->g->cmp(graph2->g);
92 | }
93 |
94 | extern "C"
95 | unsigned int bliss_hash(BlissGraph *graph)
96 | {
97 | assert(graph);
98 | assert(graph->g);
99 | return graph->g->get_hash();
100 | }
101 |
102 | extern "C"
103 | BlissGraph *bliss_permute(BlissGraph *graph, const unsigned int *perm)
104 | {
105 | assert(graph);
106 | assert(graph->g);
107 | assert(graph->g->get_nof_vertices() == 0 || perm);
108 | BlissGraph *permuted_graph = new bliss_graph_struct;
109 | assert(permuted_graph);
110 | permuted_graph->g = graph->g->permute(perm);
111 | return permuted_graph;
112 | }
113 |
114 | extern "C"
115 | void
116 | bliss_find_automorphisms(BlissGraph *graph,
117 | void (*hook)(void *user_param,
118 | unsigned int n,
119 | const unsigned int *aut),
120 | void *hook_user_param,
121 | BlissStats *stats)
122 | {
123 | bliss::Stats s;
124 | assert(graph);
125 | assert(graph->g);
126 | graph->g->find_automorphisms(s, hook, hook_user_param);
127 |
128 | if(stats)
129 | {
130 | stats->group_size_approx = s.group_size_approx;
131 | stats->nof_nodes = s.nof_nodes;
132 | stats->nof_leaf_nodes = s.nof_leaf_nodes;
133 | stats->nof_bad_nodes = s.nof_bad_nodes;
134 | stats->nof_canupdates = s.nof_canupdates;
135 | stats->nof_generators = s.nof_generators;
136 | stats->max_level = s.max_level;
137 | }
138 | }
139 |
140 |
141 | extern "C"
142 | const unsigned int *
143 | bliss_find_canonical_labeling(BlissGraph *graph,
144 | void (*hook)(void *user_param,
145 | unsigned int n,
146 | const unsigned int *aut),
147 | void *hook_user_param,
148 | BlissStats *stats)
149 | {
150 | bliss::Stats s;
151 | const unsigned int *canonical_labeling = 0;
152 | assert(graph);
153 | assert(graph->g);
154 |
155 | canonical_labeling = graph->g->canonical_form(s, hook, hook_user_param);
156 |
157 | if(stats)
158 | {
159 | stats->group_size_approx = s.group_size_approx;
160 | stats->nof_nodes = s.nof_nodes;
161 | stats->nof_leaf_nodes = s.nof_leaf_nodes;
162 | stats->nof_bad_nodes = s.nof_bad_nodes;
163 | stats->nof_canupdates = s.nof_canupdates;
164 | stats->nof_generators = s.nof_generators;
165 | stats->max_level = s.max_level;
166 | }
167 |
168 | return canonical_labeling;
169 | }
170 |
--------------------------------------------------------------------------------
/grakel/kernels/_isomorphism/bliss-0.50/defs.hh:
--------------------------------------------------------------------------------
1 | #ifndef BLISS_DEFS_HH
2 | #define BLISS_DEFS_HH
3 |
4 | #include
5 |
6 | namespace bliss {
7 |
8 | /**
9 | * The version number of bliss.
10 | */
11 | static const char * const version = "0.50";
12 |
13 |
14 | #if defined(BLISS_DEBUG)
15 | #define BLISS_CONSISTENCY_CHECKS
16 | #define BLISS_EXPENSIVE_CONSISTENCY_CHECKS
17 | #endif
18 |
19 | #if defined(BLISS_CONSISTENCY_CHECKS)
20 | #define BLISS_ASSERT(a) assert(a)
21 | //inline void BLISS_ASSERT(const int c) {assert(c); }
22 | #else
23 | #define BLISS_ASSERT(a) ;
24 | //inline void BLISS_ASSERT(const int c) {}
25 | #endif
26 |
27 |
28 | #if defined(BLISS_CONSISTENCY_CHECKS)
29 | /* Force a check that the found automorphisms are valid */
30 | #define BLISS_VERIFY_AUTOMORPHISMS
31 | #endif
32 |
33 |
34 | #if defined(BLISS_CONSISTENCY_CHECKS)
35 | /* Force a check that the generated partitions are equitable */
36 | #define BLISS_VERIFY_EQUITABLEDNESS
37 | #endif
38 |
39 |
40 | } // namespace bliss
41 |
42 |
43 |
44 | /*! \mainpage Bliss
45 | *
46 | * \section intro_sec Introduction
47 | *
48 | * This is the source code documentation of bliss,
49 | * produced by running doxygen in
50 | * the source directory.
51 | * The algorithms and data structures used in bliss are documented in
52 | * the papers found at the
53 | * bliss web site.
54 | *
55 | *
56 | * \section compile_sec Compiling
57 | *
58 | * Compiling bliss in Linux should be easy, just execute
59 | * \code
60 | * make
61 | * \endcode
62 | * in the bliss source directory.
63 | * This will produce the executable program \c bliss as well as
64 | * the library file \c libbliss.a that can be linked in other programs.
65 | * If you have the GNU Multiple Precision
66 | * Arithmetic Library (GMP) installed in your machine, you can also use
67 | * \code
68 | * make gmp
69 | * \endcode
70 | * to enable exact computation of automorphism group sizes.
71 | *
72 | * When linking the bliss library \c libbliss.a in other programs,
73 | * remember to include the standard c++ library
74 | * (and the GMP library if you compiled bliss to include it).
75 | * For instance,
76 | * \code gcc -o test test.c -lstdc++ -lgmp -lbliss\endcode
77 | *
78 | * \section cppapi_sec The C++ language API
79 | *
80 | * The C++ language API is the main API to bliss;
81 | * all other APIs are just more or less complete variants of it.
82 | * The C++ API consists basically of the public methods in
83 | * the classes bliss::AbstractGraph, bliss::Graph, and bliss::Digraph.
84 | * For an example of its use,
85 | * see the \ref executable "source of the bliss executable".
86 | *
87 | *
88 | * \section capi_sec The C language API
89 | *
90 | * The C language API is given in the file bliss_C.h.
91 | * It is currently more restricted than the C++ API so
92 | * consider using the C++ API whenever possible.
93 | */
94 |
95 |
96 | #endif
97 |
--------------------------------------------------------------------------------
/grakel/kernels/_isomorphism/bliss-0.50/heap.cc:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include "defs.hh"
5 | #include "heap.hh"
6 |
7 | namespace bliss {
8 |
9 | Heap::~Heap()
10 | {
11 | if(array)
12 | {
13 | free(array);
14 | array = 0;
15 | n = 0;
16 | N = 0;
17 | }
18 | }
19 |
20 | void Heap::upheap(unsigned int index)
21 | {
22 | BLISS_ASSERT(n >= 1);
23 | BLISS_ASSERT(index >= 1 && index <= n);
24 | const unsigned int v = array[index];
25 | array[0] = 0;
26 | while(array[index/2] > v)
27 | {
28 | array[index] = array[index/2];
29 | index = index/2;
30 | }
31 | array[index] = v;
32 | }
33 |
34 | void Heap::downheap(unsigned int index)
35 | {
36 | const unsigned int v = array[index];
37 | while(index <= n/2)
38 | {
39 | unsigned int new_index = index + index;
40 | if((new_index < n) && (array[new_index] > array[new_index+1])){
41 | new_index++;}
42 | if(v <= array[new_index]){
43 | break;}
44 | array[index] = array[new_index];
45 | index = new_index;
46 | }
47 | array[index] = v;
48 | }
49 |
50 | void Heap::init(const unsigned int size)
51 | {
52 | BLISS_ASSERT(size > 0);
53 | if(size > N)
54 | {
55 | if(array)
56 | free(array);
57 | array = (unsigned int*)malloc((size + 1) * sizeof(unsigned int));
58 | N = size;
59 | }
60 | n = 0;
61 | }
62 |
63 | void Heap::insert(const unsigned int v)
64 | {
65 | BLISS_ASSERT(n < N);
66 | array[++n] = v;
67 | upheap(n);
68 | }
69 |
70 | unsigned int Heap::remove()
71 | {
72 | BLISS_ASSERT(n >= 1 && n <= N);
73 | const unsigned int v = array[1];
74 | array[1] = array[n--];
75 | downheap(1);
76 | return v;
77 | }
78 |
79 | } // namespace bliss
80 |
--------------------------------------------------------------------------------
/grakel/kernels/_isomorphism/bliss-0.50/heap.hh:
--------------------------------------------------------------------------------
1 | #ifndef BLISS_HEAP_HH
2 | #define BLISS_HEAP_HH
3 |
4 | namespace bliss {
5 |
6 | /**
7 | * \brief A capacity bounded heap data structure.
8 | */
9 |
10 | class Heap
11 | {
12 | unsigned int N;
13 | unsigned int n;
14 | unsigned int *array;
15 | void upheap(unsigned int k);
16 | void downheap(unsigned int k);
17 | public:
18 | /**
19 | * Create a new heap.
20 | * init() must be called after this.
21 | */
22 | Heap() {array = 0; n = 0; N = 0; }
23 | ~Heap();
24 |
25 | /**
26 | * Initialize the heap to have the capacity to hold \e size elements.
27 | */
28 | void init(const unsigned int size);
29 |
30 | /**
31 | * Is the heap empty?
32 | * Time complexity is O(1).
33 | */
34 | bool is_empty() const {return(n==0); }
35 |
36 | /**
37 | * Remove all the elements in the heap.
38 | * Time complexity is O(1).
39 | */
40 | void clear() {n = 0;}
41 |
42 | /**
43 | * Insert the element \a e in the heap.
44 | * Time complexity is O(log(N)), where N is the number of elements
45 | * currently in the heap.
46 | */
47 | void insert(const unsigned int e);
48 |
49 | /**
50 | * Remove and return the smallest element in the heap.
51 | * Time complexity is O(log(N)), where N is the number of elements
52 | * currently in the heap.
53 | */
54 | unsigned int remove();
55 | };
56 |
57 | } // namespace bliss
58 |
59 | #endif
60 |
--------------------------------------------------------------------------------
/grakel/kernels/_isomorphism/bliss-0.50/kqueue.hh:
--------------------------------------------------------------------------------
1 | #ifndef BLISS_KQUEUE_HH
2 | #define BLISS_KQUEUE_HH
3 |
4 | /*
5 | * Copyright (c) Tommi Junttila
6 | * Released under the GNU General Public License version 2.
7 | */
8 |
9 | #include "defs.hh"
10 |
11 | namespace bliss {
12 |
13 | /**
14 | * \brief A very simple implementation of queues with fixed capacity.
15 | */
16 |
17 | template
18 | class KQueue
19 | {
20 | public:
21 | /**
22 | * Create a new queue with capacity zero.
23 | * The function init() should be called next.
24 | */
25 | KQueue();
26 |
27 | ~KQueue();
28 |
29 | /**
30 | * Initialize the queue to have the capacity to hold at most \a N elements.
31 | */
32 | void init(const unsigned int N);
33 |
34 | /** Is the queue empty? */
35 | bool is_empty() const;
36 |
37 | /** Return the number of elements in the queue. */
38 | unsigned int size() const;
39 |
40 | /** Remove all the elements in the queue. */
41 | void clear();
42 |
43 | /** Return (but don't remove) the first element in the queue. */
44 | Type front() const;
45 |
46 | /** Remove and return the first element of the queue. */
47 | Type pop_front();
48 |
49 | /** Push the element \a e in the front of the queue. */
50 | void push_front(Type e);
51 |
52 | /** Remove and return the last element of the queue. */
53 | Type pop_back();
54 |
55 | /** Push the element \a e in the back of the queue. */
56 | void push_back(Type e);
57 | private:
58 | Type *entries, *end;
59 | Type *head, *tail;
60 | };
61 |
62 | template
63 | KQueue::KQueue()
64 | {
65 | entries = 0;
66 | end = 0;
67 | head = 0;
68 | tail = 0;
69 | }
70 |
71 | template
72 | KQueue::~KQueue()
73 | {
74 | if(entries)
75 | free(entries);
76 | }
77 |
78 | template
79 | void KQueue::init(const unsigned int k)
80 | {
81 | assert(k > 0);
82 | if(entries)
83 | free(entries);
84 | entries = (Type*)malloc((k + 1) * sizeof(Type));
85 | end = entries + k + 1;
86 | head = entries;
87 | tail = head;
88 | }
89 |
90 | template
91 | void KQueue::clear()
92 | {
93 | head = entries;
94 | tail = head;
95 | }
96 |
97 | template
98 | bool KQueue::is_empty() const
99 | {
100 | return(head == tail);
101 | }
102 |
103 | template
104 | unsigned int KQueue::size() const
105 | {
106 | if(tail >= head)
107 | return(tail - head);
108 | return((end - head) + (tail - entries));
109 | }
110 |
111 | template
112 | Type KQueue::front() const
113 | {
114 | BLISS_ASSERT(head != tail);
115 | return *head;
116 | }
117 |
118 | template
119 | Type KQueue::pop_front()
120 | {
121 | BLISS_ASSERT(head != tail);
122 | Type *old_head = head;
123 | head++;
124 | if(head == end)
125 | head = entries;
126 | return *old_head;
127 | }
128 |
129 | template
130 | void KQueue::push_front(Type e)
131 | {
132 | if(head == entries)
133 | head = end - 1;
134 | else
135 | head--;
136 | BLISS_ASSERT(head != tail);
137 | *head = e;
138 | }
139 |
140 | template
141 | void KQueue::push_back(Type e)
142 | {
143 | *tail = e;
144 | tail++;
145 | if(tail == end)
146 | tail = entries;
147 | BLISS_ASSERT(head != tail);
148 | }
149 |
150 | } // namespace bliss
151 |
152 | #endif
153 |
--------------------------------------------------------------------------------
/grakel/kernels/_isomorphism/bliss-0.50/kstack.hh:
--------------------------------------------------------------------------------
1 | #ifndef BLISS_KSTACK_H
2 | #define BLISS_KSTACK_H
3 |
4 | /*
5 | * Copyright (c) Tommi Junttila
6 | * Released under the GNU General Public License version 2.
7 | */
8 |
9 | #include
10 | #include "defs.hh"
11 |
12 | namespace bliss {
13 |
14 | /**
15 | * \brief A very simple implementation of a stack with fixed capacity.
16 | */
17 | template
18 | class KStack {
19 | public:
20 | /**
21 | * Create a new stack with zero capacity.
22 | * The function init() should be called next.
23 | */
24 | KStack();
25 |
26 | /**
27 | * Create a new stack with the capacity to hold at most \a N elements.
28 | */
29 | KStack(int N);
30 |
31 | ~KStack();
32 |
33 | /**
34 | * Initialize the stack to have the capacity to hold at most \a N elements.
35 | */
36 | void init(int N);
37 |
38 | /**
39 | * Is the stack empty?
40 | */
41 | bool is_empty() const {return(cursor == entries); }
42 |
43 | /**
44 | * Return (but don't remove) the top element of the stack.
45 | */
46 | Type top() const {BLISS_ASSERT(cursor > entries); return *cursor; }
47 |
48 | /**
49 | * Pop (remove) the top element of the stack.
50 | */
51 | Type pop()
52 | {
53 | BLISS_ASSERT(cursor > entries);
54 | return *cursor--;
55 | }
56 |
57 | /**
58 | * Push the element \a e in the stack.
59 | */
60 | void push(Type e)
61 | {
62 | BLISS_ASSERT(cursor < entries + kapacity);
63 | *(++cursor) = e;
64 | }
65 |
66 | /** Remove all the elements in the stack. */
67 | void clean() {cursor = entries; }
68 |
69 | /**
70 | * Get the number of elements in the stack.
71 | */
72 | unsigned int size() const {return(cursor - entries); }
73 |
74 | /**
75 | * Return the i:th element in the stack, where \a i is in the range
76 | * 0,...,this.size()-1; the 0:th element is the bottom element
77 | * in the stack.
78 | */
79 | Type element_at(unsigned int i)
80 | {
81 | assert(i < size());
82 | return entries[i+1];
83 | }
84 |
85 | /** Return the capacity (NOT the number of elements) of the stack. */
86 | int capacity() {return kapacity; }
87 | private:
88 | int kapacity;
89 | Type *entries;
90 | Type *cursor;
91 | };
92 |
93 | template
94 | KStack::KStack()
95 | {
96 | kapacity = 0;
97 | entries = 0;
98 | cursor = 0;
99 | }
100 |
101 | template
102 | KStack::KStack(int k)
103 | {
104 | assert(k > 0);
105 | kapacity = k;
106 | entries = (Type*)malloc((k+1) * sizeof(Type));
107 | cursor = entries;
108 | }
109 |
110 | template
111 | void KStack::init(int k)
112 | {
113 | assert(k > 0);
114 | if(entries)
115 | free(entries);
116 | kapacity = k;
117 | entries = (Type*)malloc((k+1) * sizeof(Type));
118 | cursor = entries;
119 | }
120 |
121 | template
122 | KStack::~KStack()
123 | {
124 | free(entries);
125 | }
126 |
127 | } // namespace bliss
128 |
129 | #endif
130 |
--------------------------------------------------------------------------------
/grakel/kernels/_isomorphism/bliss-0.50/orbit.cc:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include "defs.hh"
4 | #include "orbit.hh"
5 |
6 | /*
7 | * Copyright (c) Tommi Junttila
8 | * Released under the GNU General Public License version 2.
9 | */
10 |
11 | namespace bliss {
12 |
13 | Orbit::Orbit()
14 | {
15 | orbits = 0;
16 | in_orbit = 0;
17 | nof_elements = 0;
18 | }
19 |
20 |
21 | Orbit::~Orbit()
22 | {
23 | if(orbits)
24 | {
25 | free(orbits);
26 | orbits = 0;
27 | }
28 | if(in_orbit)
29 | {
30 | free(in_orbit);
31 | in_orbit = 0;
32 | }
33 | nof_elements = 0;
34 | }
35 |
36 |
37 | void Orbit::init(const unsigned int n)
38 | {
39 | assert(n > 0);
40 | if(orbits) free(orbits);
41 | orbits = (OrbitEntry*)malloc(n * sizeof(OrbitEntry));
42 | if(in_orbit) free(in_orbit);
43 | in_orbit = (OrbitEntry**)malloc(n * sizeof(OrbitEntry*));
44 | nof_elements = n;
45 |
46 | reset();
47 | }
48 |
49 |
50 | void Orbit::reset()
51 | {
52 | assert(orbits);
53 | assert(in_orbit);
54 |
55 | for(unsigned int i = 0; i < nof_elements; i++)
56 | {
57 | orbits[i].element = i;
58 | orbits[i].next = 0;
59 | orbits[i].size = 1;
60 | in_orbit[i] = &orbits[i];
61 | }
62 | _nof_orbits = nof_elements;
63 | }
64 |
65 |
66 | void Orbit::merge_orbits(OrbitEntry *orbit1, OrbitEntry *orbit2)
67 | {
68 | BLISS_ASSERT((orbit1 == orbit2) == (orbit1->element == orbit2->element));
69 | BLISS_ASSERT(orbit1->element < nof_elements);
70 | BLISS_ASSERT(orbit2->element < nof_elements);
71 |
72 | if(orbit1 != orbit2)
73 | {
74 | _nof_orbits--;
75 | /* Only update the elements in the smaller orbit */
76 | if(orbit1->size > orbit2->size)
77 | {
78 | OrbitEntry * const temp = orbit2;
79 | orbit2 = orbit1;
80 | orbit1 = temp;
81 | }
82 | /* Link the elements of orbit1 to the almost beginning of orbit2 */
83 | OrbitEntry *e = orbit1;
84 | while(e->next)
85 | {
86 | in_orbit[e->element] = orbit2;
87 | e = e->next;
88 | }
89 | in_orbit[e->element] = orbit2;
90 | e->next = orbit2->next;
91 | orbit2->next = orbit1;
92 | /* Keep the minimal orbit representative in the beginning */
93 | if(orbit1->element < orbit2->element)
94 | {
95 | const unsigned int temp = orbit1->element;
96 | orbit1->element = orbit2->element;
97 | orbit2->element = temp;
98 | }
99 | orbit2->size += orbit1->size;
100 | }
101 | }
102 |
103 |
104 | void Orbit::merge_orbits(unsigned int e1, unsigned int e2)
105 | {
106 | BLISS_ASSERT(e1 < nof_elements);
107 | BLISS_ASSERT(e2 < nof_elements);
108 |
109 | merge_orbits(in_orbit[e1], in_orbit[e2]);
110 | }
111 |
112 |
113 | bool Orbit::is_minimal_representative(unsigned int element) const
114 | {
115 | return(get_minimal_representative(element) == element);
116 | }
117 |
118 |
119 | unsigned int Orbit::get_minimal_representative(unsigned int element) const
120 | {
121 | BLISS_ASSERT(element < nof_elements);
122 |
123 | OrbitEntry * const orbit = in_orbit[element];
124 |
125 | BLISS_ASSERT(orbit->element <= element);
126 | return(orbit->element);
127 | }
128 |
129 |
130 | unsigned int Orbit::orbit_size(unsigned int element) const
131 | {
132 | BLISS_ASSERT(element < nof_elements);
133 |
134 | return(in_orbit[element]->size);
135 | }
136 |
137 |
138 | } // namespace bliss
139 |
--------------------------------------------------------------------------------
/grakel/kernels/_isomorphism/bliss-0.50/orbit.hh:
--------------------------------------------------------------------------------
1 | #ifndef BLISS_ORBIT_HH
2 | #define BLISS_ORBIT_HH
3 |
4 | /*
5 | * Copyright (c) Tommi Junttila
6 | * Released under the GNU General Public License version 2.
7 | */
8 |
9 | namespace bliss {
10 |
11 | /**
12 | * \brief A class for representing orbit information.
13 | *
14 | * Given a set {0,...,N-1} of N elements, represent equivalence
15 | * classes (that is, unordered partitions) of the elements.
16 | * Supports only equivalence class merging, not splitting.
17 | * Merging two classes requires time O(k), where k is the number of
18 | * the elements in the smaller of the merged classes.
19 | * Getting the smallest representative in a class (and thus testing
20 | * whether two elements belong to the same class) is a constant time operation.
21 | */
22 | class Orbit
23 | {
24 | class OrbitEntry
25 | {
26 | public:
27 | unsigned int element;
28 | OrbitEntry *next;
29 | unsigned int size;
30 | };
31 |
32 | OrbitEntry *orbits;
33 | OrbitEntry **in_orbit;
34 | unsigned int nof_elements;
35 | unsigned int _nof_orbits;
36 | void merge_orbits(OrbitEntry *o1, OrbitEntry *o2);
37 |
38 | public:
39 | /**
40 | * Create a new orbit information object.
41 | * The init() function must be called next to actually initialize
42 | * the object.
43 | */
44 | Orbit();
45 | ~Orbit();
46 |
47 | /**
48 | * Initialize the orbit information to consider sets of \a N elements.
49 | * It is required that \a N > 0.
50 | * The orbit information is reset so that each element forms
51 | * an orbit of its own.
52 | * Time complexity is O(N).
53 | * \sa reset()
54 | */
55 | void init(const unsigned int N);
56 |
57 | /**
58 | * Reset the orbits so that each element forms an orbit of its own.
59 | * Time complexity is O(N).
60 | */
61 | void reset();
62 |
63 | /**
64 | * Merge the orbits of the elements \a e1 and \a e2.
65 | * Time complexity is O(k), where k is the number of elements in
66 | * the smaller of the merged orbits.
67 | */
68 | void merge_orbits(unsigned int e1, unsigned int e2);
69 |
70 | /**
71 | * Is the element \a e the smallest element in its orbit?
72 | * Time complexity is O(1).
73 | */
74 | bool is_minimal_representative(unsigned int e) const;
75 |
76 | /**
77 | * Get the smallest element in the orbit of the element \a e.
78 | * Time complexity is O(1).
79 | */
80 | unsigned int get_minimal_representative(unsigned int e) const;
81 |
82 | /**
83 | * Get the number of elements in the orbit of the element \a e.
84 | * Time complexity is O(1).
85 | */
86 | unsigned int orbit_size(unsigned int e) const;
87 |
88 | /**
89 | * Get the number of orbits.
90 | * Time complexity is O(1).
91 | */
92 | unsigned int nof_orbits() const {return _nof_orbits; }
93 | };
94 |
95 | } // namespace bliss
96 |
97 | #endif
98 |
--------------------------------------------------------------------------------
/grakel/kernels/_isomorphism/bliss-0.50/uintseqhash.cc:
--------------------------------------------------------------------------------
1 | #include "uintseqhash.hh"
2 |
3 | namespace bliss {
4 |
5 | /*
6 | * Random bits generated by
7 | * http://www.fourmilab.ch/hotbits/
8 | */
9 | static unsigned int rtab[256] = {
10 | 0xAEAA35B8, 0x65632E16, 0x155EDBA9, 0x01349B39,
11 | 0x8EB8BD97, 0x8E4C5367, 0x8EA78B35, 0x2B1B4072,
12 | 0xC1163893, 0x269A8642, 0xC79D7F6D, 0x6A32DEA0,
13 | 0xD4D2DA56, 0xD96D4F47, 0x47B5F48A, 0x2587C6BF,
14 | 0x642B71D8, 0x5DBBAF58, 0x5C178169, 0xA16D9279,
15 | 0x75CDA063, 0x291BC48B, 0x01AC2F47, 0x5416DF7C,
16 | 0x45307514, 0xB3E1317B, 0xE1C7A8DE, 0x3ACDAC96,
17 | 0x11B96831, 0x32DE22DD, 0x6A1DA93B, 0x58B62381,
18 | 0x283810E2, 0xBC30E6A6, 0x8EE51705, 0xB06E8DFB,
19 | 0x729AB12A, 0xA9634922, 0x1A6E8525, 0x49DD4E19,
20 | 0xE5DB3D44, 0x8C5B3A02, 0xEBDE2864, 0xA9146D9F,
21 | 0x736D2CB4, 0xF5229F42, 0x712BA846, 0x20631593,
22 | 0x89C02603, 0xD5A5BF6A, 0x823F4E18, 0x5BE5DEFF,
23 | 0x1C4EBBFA, 0x5FAB8490, 0x6E559B0C, 0x1FE528D6,
24 | 0xB3198066, 0x4A965EB5, 0xFE8BB3D5, 0x4D2F6234,
25 | 0x5F125AA4, 0xBCC640FA, 0x4F8BC191, 0xA447E537,
26 | 0xAC474D3C, 0x703BFA2C, 0x617DC0E7, 0xF26299D7,
27 | 0xC90FD835, 0x33B71C7B, 0x6D83E138, 0xCBB1BB14,
28 | 0x029CF5FF, 0x7CBD093D, 0x4C9825EF, 0x845C4D6D,
29 | 0x124349A5, 0x53942D21, 0x800E60DA, 0x2BA6EB7F,
30 | 0xCEBF30D3, 0xEB18D449, 0xE281F724, 0x58B1CB09,
31 | 0xD469A13D, 0x9C7495C3, 0xE53A7810, 0xA866C08E,
32 | 0x832A038B, 0xDDDCA484, 0xD5FE0DDE, 0x0756002B,
33 | 0x2FF51342, 0x60FEC9C8, 0x061A53E3, 0x47B1884E,
34 | 0xDC17E461, 0xA17A6A37, 0x3158E7E2, 0xA40D873B,
35 | 0x45AE2140, 0xC8F36149, 0x63A4EE2D, 0xD7107447,
36 | 0x6F90994F, 0x5006770F, 0xC1F3CA9A, 0x91B317B2,
37 | 0xF61B4406, 0xA8C9EE8F, 0xC6939B75, 0xB28BBC3B,
38 | 0x36BF4AEF, 0x3B12118D, 0x4D536ECF, 0x9CF4B46B,
39 | 0xE8AB1E03, 0x8225A360, 0x7AE4A130, 0xC4EE8B50,
40 | 0x50651797, 0x5BB4C59F, 0xD120EE47, 0x24F3A386,
41 | 0xBE579B45, 0x3A378EFC, 0xC5AB007B, 0x3668942B,
42 | 0x2DBDCC3A, 0x6F37F64C, 0xC24F862A, 0xB6F97FCF,
43 | 0x9E4FA23D, 0x551AE769, 0x46A8A5A6, 0xDC1BCFDD,
44 | 0x8F684CF9, 0x501D811B, 0x84279F80, 0x2614E0AC,
45 | 0x86445276, 0xAEA0CE71, 0x0812250F, 0xB586D18A,
46 | 0xC68D721B, 0x44514E1D, 0x37CDB99A, 0x24731F89,
47 | 0xFA72E589, 0x81E6EBA2, 0x15452965, 0x55523D9D,
48 | 0x2DC47E14, 0x2E7FA107, 0xA7790F23, 0x40EBFDBB,
49 | 0x77E7906B, 0x6C1DB960, 0x1A8B9898, 0x65FA0D90,
50 | 0xED28B4D8, 0x34C3ED75, 0x768FD2EC, 0xFAB60BCB,
51 | 0x962C75F4, 0x304F0498, 0x0A41A36B, 0xF7DE2A4A,
52 | 0xF4770FE2, 0x73C93BBB, 0xD21C82C5, 0x6C387447,
53 | 0x8CDB4CB9, 0x2CC243E8, 0x41859E3D, 0xB667B9CB,
54 | 0x89681E8A, 0x61A0526C, 0x883EDDDC, 0x539DE9A4,
55 | 0xC29E1DEC, 0x97C71EC5, 0x4A560A66, 0xBD7ECACF,
56 | 0x576AE998, 0x31CE5616, 0x97172A6C, 0x83D047C4,
57 | 0x274EA9A8, 0xEB31A9DA, 0x327209B5, 0x14D1F2CB,
58 | 0x00FE1D96, 0x817DBE08, 0xD3E55AED, 0xF2D30AFC,
59 | 0xFB072660, 0x866687D6, 0x92552EB9, 0xEA8219CD,
60 | 0xF7927269, 0xF1948483, 0x694C1DF5, 0xB7D8B7BF,
61 | 0xFFBC5D2F, 0x2E88B849, 0x883FD32B, 0xA0331192,
62 | 0x8CB244DF, 0x41FAF895, 0x16902220, 0x97FB512A,
63 | 0x2BEA3CC4, 0xAF9CAE61, 0x41ACD0D5, 0xFD2F28FF,
64 | 0xE780ADFA, 0xB3A3A76E, 0x7112AD87, 0x7C3D6058,
65 | 0x69E64FFF, 0xE5F8617C, 0x8580727C, 0x41F54F04,
66 | 0xD72BE498, 0x653D1795, 0x1275A327, 0x14B499D4,
67 | 0x4E34D553, 0x4687AA39, 0x68B64292, 0x5C18ABC3,
68 | 0x41EABFCC, 0x92A85616, 0x82684CF8, 0x5B9F8A4E,
69 | 0x35382FFE, 0xFB936318, 0x52C08E15, 0x80918B2E,
70 | 0x199EDEE0, 0xA9470163, 0xEC44ACDD, 0x612D6735,
71 | 0x8F88EA7D, 0x759F5EA4, 0xE5CC7240, 0x68CFEB8B,
72 | 0x04725601, 0x0C22C23E, 0x5BC97174, 0x89965841,
73 | 0x5D939479, 0x690F338A, 0x3C2D4380, 0xDAE97F2B
74 | };
75 |
76 |
77 | void UintSeqHash::update(unsigned int i)
78 | {
79 | i++;
80 | while(i > 0)
81 | {
82 | h ^= rtab[i & 0xff];
83 | #if 1
84 | const unsigned int b = (h & 0x80000000) >> 31;
85 | i = i >> 8;
86 | h = (h << 1) | b;
87 | #else
88 | const unsigned int b = h & 0x80000000;
89 | h = h << 1;
90 | if(b != 0)
91 | h++;
92 | i = i >> 8;
93 | #endif
94 | }
95 | }
96 |
97 |
98 | } // namespace bliss
99 |
--------------------------------------------------------------------------------
/grakel/kernels/_isomorphism/bliss-0.50/uintseqhash.hh:
--------------------------------------------------------------------------------
1 | #ifndef BLISS_UINTSEQHASH_HH
2 | #define BLISS_UINTSEQHASH_HH
3 |
4 | #include
5 |
6 | namespace bliss {
7 |
8 | /**
9 | * \brief A hash for sequences of unsigned ints.
10 | */
11 | class UintSeqHash
12 | {
13 | protected:
14 | unsigned int h;
15 | public:
16 | UintSeqHash() {h = 0; }
17 | UintSeqHash(const UintSeqHash &other) {h = other.h; }
18 | UintSeqHash& operator=(const UintSeqHash &other) {h = other.h; return *this; }
19 |
20 | /** Reset the hash value. */
21 | void reset() {h = 0; }
22 |
23 | /** Add the unsigned int \a n to the sequence. */
24 | void update(unsigned int n);
25 |
26 | /** Get the hash value of the sequence seen so far. */
27 | unsigned int get_value() const {return h; }
28 |
29 | /** Compare the hash values of this and \a other.
30 | * Return -1/0/1 if the value of this is smaller/equal/greater than
31 | * that of \a other. */
32 | int cmp(const UintSeqHash &other) const {
33 | return (h < other.h)?-1:((h == other.h)?0:1);
34 | }
35 | /** An abbreviation for cmp(other) < 0 */
36 | bool is_lt(const UintSeqHash &other) const {return(cmp(other) < 0); }
37 | /** An abbreviation for cmp(other) <= 0 */
38 | bool is_le(const UintSeqHash &other) const {return(cmp(other) <= 0); }
39 | /** An abbreviation for cmp(other) == 0 */
40 | bool is_equal(const UintSeqHash &other) const {return(cmp(other) == 0); }
41 | };
42 |
43 |
44 | } // namespace bliss
45 |
46 | #endif
47 |
--------------------------------------------------------------------------------
/grakel/kernels/_isomorphism/bliss-0.50/utils.cc:
--------------------------------------------------------------------------------
1 | #include
2 | #include "utils.hh"
3 |
4 | namespace bliss {
5 |
6 | void print_permutation(FILE *fp,
7 | const unsigned int N,
8 | const unsigned int *perm,
9 | const unsigned int offset)
10 | {
11 | assert(N > 0);
12 | assert(perm);
13 | for(unsigned int i = 0; i < N; i++) {
14 | unsigned int j = perm[i];
15 | if(j == i)
16 | continue;
17 | bool is_first = true;
18 | while(j != i) {
19 | if(j < i) {
20 | is_first = false;
21 | break;
22 | }
23 | j = perm[j];
24 | }
25 | if(!is_first)
26 | continue;
27 | fprintf(fp, "(%u,", i+offset);
28 | j = perm[i];
29 | while(j != i) {
30 | fprintf(fp, "%u", j+offset);
31 | j = perm[j];
32 | if(j != i)
33 | fprintf(fp, ",");
34 | }
35 | fprintf(fp, ")");
36 | }
37 | }
38 |
39 | } // namespace bliss
40 |
--------------------------------------------------------------------------------
/grakel/kernels/_isomorphism/bliss-0.50/utils.hh:
--------------------------------------------------------------------------------
1 | #ifndef BLISS_UTILS_HH
2 | #define BLISS_UTILS_HH
3 |
4 | /**
5 | * \file
6 | * \brief Some small utilities.
7 | *
8 | */
9 |
10 | /*
11 | * Copyright (c) Tommi Junttila
12 | * Released under the GNU General Public License version 2.
13 | */
14 |
15 | #include
16 |
17 | namespace bliss {
18 |
19 | /**
20 | * Print the permutation \a perm of {0,...,N-1} in the cycle format
21 | * in the file stream \a fp.
22 | * The amount \a offset is added to each element before printing,
23 | * e.g. the permutation (2 4) is printed as (3 5) when \a offset is 1.
24 | */
25 | void print_permutation(FILE *fp,
26 | const unsigned int N,
27 | const unsigned int *perm,
28 | const unsigned int offset = 0);
29 |
30 | } // namespace bliss
31 |
32 | #endif
33 |
--------------------------------------------------------------------------------
/grakel/tests/__main__.py:
--------------------------------------------------------------------------------
1 | """The main function for the tests sub-module."""
2 | # Author: Ioannis Siglidis
3 | # License: BSD 3 clause
4 |
5 | if __name__ == '__main__':
6 | import os
7 | import sys
8 | import warnings
9 |
10 | from subprocess import check_call
11 |
12 | warnings.filterwarnings('ignore', category=UserWarning)
13 |
14 | python_executable_address = str(sys.executable)
15 | test_dir = str(os.path.dirname(os.path.realpath(__file__)))
16 | project_dir = str(os.path.realpath(os.path.join(__file__, "../../../")))
17 |
18 | print('Installing the latest "GraKeL"..')
19 | print('--------------------------------')
20 |
21 | cwd = os.getcwd()
22 | os.chdir(project_dir)
23 | try:
24 | check_call([python_executable_address, project_dir + "/setup.py",
25 | "install"])
26 | finally:
27 | os.chdir(cwd)
28 |
29 | print('................................................................\n')
30 |
31 | print('Testing Graph..')
32 | print('---------------')
33 | check_call([python_executable_address, test_dir + "/test_graph.py",
34 | "--ignore_warnings", "--verbose"])
35 | print('................................................................\n')
36 |
37 | print('Testing Kernels..')
38 | print('-----------------')
39 | check_call([python_executable_address, test_dir + "/test_kernels.py",
40 | "--verbose", "--time", "--ignore_warnings", "--all"])
41 | print('................................................................\n')
42 |
43 | print('Testing Graph Kernels..')
44 | print('-----------------------')
45 | check_call([python_executable_address,
46 | test_dir + "/test_graph_kernels.py",
47 | "--verbose", "--time", "--ignore_warnings", "--all"])
48 | print('................................................................')
49 |
--------------------------------------------------------------------------------
/grakel/tests/data/Cuneiform/Cuneiform_graph_labels.txt:
--------------------------------------------------------------------------------
1 | 0
2 | 1
3 | 2
4 | 3
5 | 4
6 | 5
7 | 6
8 | 7
9 | 8
10 | 9
11 | 10
12 | 11
13 | 12
14 | 13
15 | 14
16 | 15
17 | 16
18 | 17
19 | 18
20 | 19
21 | 20
22 | 21
23 | 22
24 | 23
25 | 24
26 | 25
27 | 26
28 | 0
29 | 1
30 | 2
31 | 3
32 | 4
33 | 5
34 | 6
35 | 7
36 | 8
37 | 9
38 | 10
39 | 11
40 | 12
41 | 13
42 | 14
43 | 15
44 | 16
45 | 17
46 | 18
47 | 19
48 | 20
49 | 21
50 | 22
51 | 23
52 | 24
53 | 25
54 | 26
55 | 27
56 | 28
57 | 29
58 | 0
59 | 1
60 | 2
61 | 3
62 | 4
63 | 5
64 | 6
65 | 7
66 | 8
67 | 9
68 | 10
69 | 11
70 | 12
71 | 13
72 | 14
73 | 15
74 | 16
75 | 17
76 | 18
77 | 19
78 | 20
79 | 21
80 | 22
81 | 23
82 | 24
83 | 25
84 | 26
85 | 27
86 | 28
87 | 29
88 | 0
89 | 1
90 | 2
91 | 3
92 | 4
93 | 5
94 | 6
95 | 7
96 | 8
97 | 9
98 | 10
99 | 11
100 | 12
101 | 13
102 | 14
103 | 15
104 | 16
105 | 17
106 | 18
107 | 19
108 | 20
109 | 21
110 | 22
111 | 23
112 | 24
113 | 25
114 | 26
115 | 27
116 | 28
117 | 29
118 | 0
119 | 1
120 | 2
121 | 3
122 | 4
123 | 5
124 | 6
125 | 7
126 | 8
127 | 9
128 | 10
129 | 11
130 | 12
131 | 13
132 | 14
133 | 15
134 | 16
135 | 17
136 | 18
137 | 19
138 | 20
139 | 21
140 | 22
141 | 23
142 | 24
143 | 25
144 | 26
145 | 27
146 | 28
147 | 29
148 | 0
149 | 1
150 | 2
151 | 3
152 | 4
153 | 5
154 | 6
155 | 7
156 | 8
157 | 9
158 | 10
159 | 11
160 | 12
161 | 13
162 | 14
163 | 15
164 | 16
165 | 17
166 | 18
167 | 19
168 | 20
169 | 21
170 | 22
171 | 23
172 | 24
173 | 25
174 | 26
175 | 27
176 | 28
177 | 29
178 | 0
179 | 1
180 | 2
181 | 3
182 | 4
183 | 5
184 | 6
185 | 7
186 | 8
187 | 9
188 | 10
189 | 11
190 | 12
191 | 13
192 | 14
193 | 15
194 | 16
195 | 17
196 | 18
197 | 19
198 | 20
199 | 21
200 | 22
201 | 23
202 | 24
203 | 25
204 | 26
205 | 27
206 | 28
207 | 29
208 | 0
209 | 1
210 | 2
211 | 3
212 | 4
213 | 5
214 | 6
215 | 7
216 | 8
217 | 9
218 | 10
219 | 11
220 | 12
221 | 13
222 | 14
223 | 15
224 | 16
225 | 17
226 | 18
227 | 19
228 | 20
229 | 21
230 | 22
231 | 23
232 | 24
233 | 25
234 | 26
235 | 27
236 | 28
237 | 29
238 | 0
239 | 1
240 | 2
241 | 3
242 | 4
243 | 5
244 | 6
245 | 7
246 | 8
247 | 9
248 | 10
249 | 11
250 | 12
251 | 13
252 | 14
253 | 15
254 | 16
255 | 17
256 | 18
257 | 19
258 | 20
259 | 21
260 | 22
261 | 23
262 | 24
263 | 25
264 | 26
265 | 27
266 | 28
267 | 29
268 |
--------------------------------------------------------------------------------
/grakel/tests/data/Cuneiform/README.txt:
--------------------------------------------------------------------------------
1 | README for dataset Cuneiform
2 |
3 |
4 | === Usage ===
5 |
6 | This folder contains the following comma separated text files
7 | (replace DS by the name of the dataset):
8 |
9 | n = total number of nodes
10 | m = total number of edges
11 | N = number of graphs
12 |
13 | (1) DS_A.txt (m lines)
14 | sparse (block diagonal) adjacency matrix for all graphs,
15 | each line corresponds to (row, col) resp. (node_id, node_id)
16 |
17 | (2) DS_graph_indicator.txt (n lines)
18 | column vector of graph identifiers for all nodes of all graphs,
19 | the value in the i-th line is the graph_id of the node with node_id i
20 |
21 | (3) DS_graph_labels.txt (N lines)
22 | class labels for all graphs in the dataset,
23 | the value in the i-th line is the class label of the graph with graph_id i
24 |
25 | (4) DS_node_labels.txt (n lines)
26 | column vector of node labels,
27 | the value in the i-th line corresponds to the node with node_id i
28 |
29 | There are OPTIONAL files if the respective information is available:
30 |
31 | (5) DS_edge_labels.txt (m lines; same size as DS_A_sparse.txt)
32 | labels for the edges in DS_A_sparse.txt
33 |
34 | (6) DS_edge_attributes.txt (m lines; same size as DS_A.txt)
35 | attributes for the edges in DS_A.txt
36 |
37 | (7) DS_node_attributes.txt (n lines)
38 | matrix of node attributes,
39 | the comma seperated values in the i-th line is the attribute vector of the node with node_id i
40 |
41 | (8) DS_graph_attributes.txt (N lines)
42 | regression values for all graphs in the dataset,
43 | the value in the i-th line is the attribute of the graph with graph_id i
44 |
45 |
46 | === Description ===
47 |
48 | The Cuneiform dataset contains graphs representing 29 different Hittite cuneiform signs.
49 | The data was obtained from nine cuneiform tablets written by scholars of Hittitology in
50 | the course of a study about individualistic characteristics of cuneiform hand writing.
51 | After automated extraction of individual wedges, the affiliation of the wedges to the
52 | cuneiform signs were determined manually. The graph model is explained in detail in the
53 | referenced publication.
54 |
55 |
56 | === References ===
57 |
58 | Nils M. Kriege, Matthias Fey, Denis Fisseler, Petra Mutzel, Frank Weichert
59 | Recognizing Cuneiform Signs Using Graph Based Methods. 2018. arXiv:1802.05908
60 | https://arxiv.org/abs/1802.05908
61 |
62 |
63 | === Description of Labels ===
64 |
65 | Node labels were converted to integer values using this map:
66 |
67 | Component 0:
68 | 0 depthPoint
69 | 1 tailVertex
70 | 2 leftVertex
71 | 3 rightVertex
72 |
73 | Component 1:
74 | 0 vertical
75 | 1 Winkelhaken
76 | 2 horizontal
77 |
78 |
79 |
80 | Edge labels were converted to integer values using this map:
81 |
82 | Component 0:
83 | 0 wedge
84 | 1 arrangement
85 |
86 |
87 |
88 | Class labels were converted to integer values using this map:
89 |
90 | 0 tu
91 | 1 ta
92 | 2 ti
93 | 3 nu
94 | 4 na
95 | 5 ni
96 | 6 bu
97 | 7 ba
98 | 8 bi
99 | 9 zu
100 | 10 za
101 | 11 zi
102 | 12 su
103 | 13 sa
104 | 14 si
105 | 15 hu
106 | 16 ha
107 | 17 hi
108 | 18 du
109 | 19 da
110 | 20 di
111 | 21 ru
112 | 22 ra
113 | 23 ri
114 | 24 ku
115 | 25 ka
116 | 26 ki
117 | 27 lu
118 | 28 la
119 | 29 li
120 |
--------------------------------------------------------------------------------
/grakel/tests/data/MUTAG/MUTAG_graph_labels.txt:
--------------------------------------------------------------------------------
1 | 1
2 | -1
3 | -1
4 | 1
5 | -1
6 | 1
7 | -1
8 | 1
9 | -1
10 | 1
11 | 1
12 | 1
13 | 1
14 | -1
15 | 1
16 | 1
17 | -1
18 | 1
19 | -1
20 | 1
21 | 1
22 | 1
23 | 1
24 | 1
25 | 1
26 | 1
27 | 1
28 | 1
29 | 1
30 | 1
31 | 1
32 | 1
33 | 1
34 | -1
35 | 1
36 | -1
37 | 1
38 | -1
39 | -1
40 | -1
41 | 1
42 | -1
43 | 1
44 | 1
45 | 1
46 | 1
47 | 1
48 | 1
49 | 1
50 | 1
51 | 1
52 | 1
53 | 1
54 | 1
55 | -1
56 | 1
57 | 1
58 | 1
59 | 1
60 | 1
61 | 1
62 | -1
63 | 1
64 | 1
65 | -1
66 | -1
67 | 1
68 | 1
69 | 1
70 | -1
71 | 1
72 | 1
73 | -1
74 | 1
75 | 1
76 | -1
77 | -1
78 | -1
79 | 1
80 | 1
81 | 1
82 | 1
83 | 1
84 | -1
85 | 1
86 | 1
87 | 1
88 | -1
89 | -1
90 | 1
91 | 1
92 | 1
93 | 1
94 | 1
95 | 1
96 | 1
97 | 1
98 | -1
99 | 1
100 | -1
101 | 1
102 | 1
103 | 1
104 | 1
105 | 1
106 | 1
107 | 1
108 | 1
109 | 1
110 | -1
111 | -1
112 | 1
113 | -1
114 | -1
115 | 1
116 | -1
117 | 1
118 | 1
119 | -1
120 | -1
121 | 1
122 | 1
123 | -1
124 | -1
125 | 1
126 | 1
127 | 1
128 | 1
129 | -1
130 | -1
131 | -1
132 | -1
133 | -1
134 | 1
135 | -1
136 | 1
137 | 1
138 | -1
139 | -1
140 | 1
141 | -1
142 | -1
143 | -1
144 | -1
145 | 1
146 | 1
147 | -1
148 | 1
149 | 1
150 | -1
151 | 1
152 | 1
153 | 1
154 | -1
155 | -1
156 | -1
157 | 1
158 | 1
159 | 1
160 | -1
161 | 1
162 | 1
163 | 1
164 | 1
165 | 1
166 | 1
167 | 1
168 | -1
169 | 1
170 | 1
171 | 1
172 | 1
173 | 1
174 | 1
175 | -1
176 | 1
177 | 1
178 | 1
179 | -1
180 | 1
181 | -1
182 | -1
183 | 1
184 | 1
185 | -1
186 | -1
187 | 1
188 | -1
189 |
--------------------------------------------------------------------------------
/grakel/tests/data/MUTAG/README.txt:
--------------------------------------------------------------------------------
1 | README for dataset MUTAG
2 |
3 |
4 | === Usage ===
5 |
6 | This folder contains the following comma separated text files
7 | (replace DS by the name of the dataset):
8 |
9 | n = total number of nodes
10 | m = total number of edges
11 | N = number of graphs
12 |
13 | (1) DS_A.txt (m lines)
14 | sparse (block diagonal) adjacency matrix for all graphs,
15 | each line corresponds to (row, col) resp. (node_id, node_id)
16 |
17 | (2) DS_graph_indicator.txt (n lines)
18 | column vector of graph identifiers for all nodes of all graphs,
19 | the value in the i-th line is the graph_id of the node with node_id i
20 |
21 | (3) DS_graph_labels.txt (N lines)
22 | class labels for all graphs in the dataset,
23 | the value in the i-th line is the class label of the graph with graph_id i
24 |
25 | (4) DS_node_labels.txt (n lines)
26 | column vector of node labels,
27 | the value in the i-th line corresponds to the node with node_id i
28 |
29 | There are OPTIONAL files if the respective information is available:
30 |
31 | (5) DS_edge_labels.txt (m lines; same size as DS_A_sparse.txt)
32 | labels for the edges in DD_A_sparse.txt
33 |
34 | (6) DS_edge_attributes.txt (m lines; same size as DS_A.txt)
35 | attributes for the edges in DS_A.txt
36 |
37 | (7) DS_node_attributes.txt (n lines)
38 | matrix of node attributes,
39 | the comma seperated values in the i-th line is the attribute vector of the node with node_id i
40 |
41 | (8) DS_graph_attributes.txt (N lines)
42 | regression values for all graphs in the dataset,
43 | the value in the i-th line is the attribute of the graph with graph_id i
44 |
45 |
46 | === Description of the dataset ===
47 |
48 | The MUTAG dataset consists of 188 chemical compounds divided into two
49 | classes according to their mutagenic effect on a bacterium.
50 |
51 | The chemical data was obtained form http://cdb.ics.uci.edu and converted
52 | to graphs, where vertices represent atoms and edges represent chemical
53 | bonds. Explicit hydrogen atoms have been removed and vertices are labeled
54 | by atom type and edges by bond type (single, double, triple or aromatic).
55 | Chemical data was processed using the Chemistry Development Kit (v1.4).
56 |
57 | Node labels:
58 |
59 | 0 C
60 | 1 N
61 | 2 O
62 | 3 F
63 | 4 I
64 | 5 Cl
65 | 6 Br
66 |
67 | Edge labels:
68 |
69 | 0 aromatic
70 | 1 single
71 | 2 double
72 | 3 triple
73 |
74 |
75 | === Previous Use of the Dataset ===
76 |
77 | Kriege, N., Mutzel, P.: Subgraph matching kernels for attributed graphs. In: Proceedings
78 | of the 29th International Conference on Machine Learning (ICML-2012) (2012).
79 |
80 |
81 | === References ===
82 |
83 | Debnath, A.K., Lopez de Compadre, R.L., Debnath, G., Shusterman, A.J., and Hansch, C.
84 | Structure-activity relationship of mutagenic aromatic and heteroaromatic nitro compounds.
85 | Correlation with molecular orbital energies and hydrophobicity. J. Med. Chem. 34(2):786-797 (1991).
86 |
--------------------------------------------------------------------------------
/grakel/tests/test_graph.py:
--------------------------------------------------------------------------------
1 | """Tests for the Graph class."""
2 | # Author: Ioannis Siglidis
3 | # License: BSD 3 clause
4 | import numpy as np
5 | import numpy.testing as npt
6 |
7 | from grakel.graph import Graph
8 |
9 | global verbose
10 |
11 | # Add extra arguments for allowing unit testing
12 | if __name__ == '__main__':
13 | import argparse
14 | parser = argparse.ArgumentParser(
15 | description='A test file for all `Graph` type objects')
16 | parser.add_argument(
17 | '--verbose',
18 | help='verbose outputs on stdout',
19 | action="store_true")
20 | parser.add_argument(
21 | '--ignore_warnings',
22 | help='ignore warnings produced by kernel executions',
23 | action="store_true")
24 |
25 | args = parser.parse_args()
26 | verbose = bool(args.verbose)
27 |
28 | if bool(args.ignore_warnings):
29 | import warnings
30 | warnings.filterwarnings('ignore', category=UserWarning)
31 | else:
32 | import warnings
33 | warnings.filterwarnings('ignore', category=UserWarning)
34 | verbose = False
35 |
36 |
37 | def test_graph_adjacency():
38 | """Testing Graph object consistency for an adjacency-type initialization object."""
39 | # Input
40 | X = np.array([[1, 1, 0, 3], [1, 0, 0, 2], [2, 3, 0, 1], [1, 0, 0, 0]])
41 | labels = {0: 'banana', 1: 'cherry', 2: 'banana', 3: 'cherry'}
42 |
43 | # try all formats
44 | g = dict()
45 | g["auto"] = Graph(X, labels, {}, "auto")
46 | g["dict"] = Graph(X, labels, {}, "dictionary")
47 | g["adjc"] = Graph(X, labels, {}, "adjacency")
48 | g["all"] = Graph(X, labels, {}, "all")
49 |
50 | # Desired output label group
51 | desired_output_label_group = {'cherry': [1, 3], 'banana': [0, 2]}
52 |
53 | for k in g.keys():
54 | gklg = g[k].get_label_group()
55 | if verbose:
56 | print(k)
57 | print(gklg, '\n')
58 | else:
59 | npt.assert_equal(desired_output_label_group, gklg)
60 |
61 | # Desired Shortest path matrix
62 | spm_do = [[0., 1., float("Inf"), 3.],
63 | [1., 0., float("Inf"), 2.],
64 | [2., 3., 0., 1.],
65 | [1., 2., float("Inf"), 0.]]
66 |
67 | for k in g.keys():
68 | spm, spl = g[k].build_shortest_path_matrix(algorithm_type="auto")
69 | if verbose:
70 | print(k)
71 | print(spm, '\n', spl, '\n')
72 | else:
73 | npt.assert_array_equal(spm, spm_do)
74 | npt.assert_equal(spl, labels)
75 |
76 |
77 | def test_graph_edge_dictionary():
78 | """Testing Graph object consistency for an edge-dictionary-type initialization object."""
79 | # Input
80 | X = {'a': {'a': 1, 'b': 1, 'd': 3},
81 | 'b': {'a': 1, 'd': 2},
82 | 'c': {'a': 2, 'b': 3, 'd': 1},
83 | 'd': {'a': 1}}
84 |
85 | labels = {'a': 'banana', 'b': 'cherry', 'c': 'banana', 'd': 'cherry'}
86 |
87 | # Test for all Graph formats
88 | g = dict()
89 | g["auto"] = Graph(X, labels, {}, "auto")
90 | g["dict"] = Graph(X, labels, {}, "dictionary")
91 | g["adjc"] = Graph(X, labels, {}, "adjacency")
92 | g["all"] = Graph(X, labels, {}, "all")
93 |
94 | # Desired output label group
95 | desired_output_label_group = {'cherry': set(['d', 'b']),
96 | 'banana': set(['a', 'c'])}
97 | desired_output_label_group_idx = {'banana': set([0, 2]),
98 | 'cherry': set([1, 3])}
99 |
100 | def proper_dict(x):
101 | return {key: set(x[key]) for key in x.keys()}
102 |
103 | for k in g.keys():
104 | gklg = g[k].get_label_group()
105 | if verbose:
106 | print(k)
107 | print(gklg, '\n')
108 | else:
109 | if (k == "adjc"):
110 | npt.assert_equal(
111 | desired_output_label_group_idx,
112 | proper_dict(gklg))
113 | else:
114 | npt.assert_equal(
115 | desired_output_label_group,
116 | proper_dict(gklg))
117 |
118 | # Desired Shortest path matrix
119 | spm_do = [[0., 1., float("Inf"), 3.],
120 | [1., 0., float("Inf"), 2.],
121 | [2., 3., 0., 1.],
122 | [1., 2., float("Inf"), 0.]]
123 |
124 | desired_labels = {0: 'banana', 1: 'cherry', 2: 'banana', 3: 'cherry'}
125 |
126 | for k in g.keys():
127 | spm, spl = g[k].build_shortest_path_matrix(algorithm_type="auto")
128 | if verbose:
129 | print(k)
130 | print(spm, '\n', spl, '\n')
131 | else:
132 | npt.assert_array_equal(spm, spm_do)
133 | npt.assert_equal(spl, desired_labels)
134 |
135 |
136 | if __name__ == '__main__':
137 | test_graph_adjacency()
138 | test_graph_edge_dictionary()
139 |
--------------------------------------------------------------------------------
/grakel/tests/test_windows_sdp_issue.py:
--------------------------------------------------------------------------------
1 | # This test case was found to be inconsistent when running on github action runners
2 | # However the `test_lovasz` tests seem to pass locally. The test body comes from
3 | # `LovaszTheta.parse_input` and is a minimal example which causes those tests to fail.
4 | #
5 | # To enable the test, change `DISABLED` to False
6 | #
7 | # This test is here in case debugging is required in the future.
8 | import pytest
9 |
10 | DISABLED = True
11 | cvxopt = True
12 | try:
13 | import cvxopt
14 | except ImportError:
15 | cvxopt = False
16 |
17 | @pytest.mark.skipif(DISABLED or not cvxopt, reason="Skipping debugging test")
18 | @pytest.mark.parametrize(
19 | "nv, ne, e_list, x_list",
20 | [
21 | (
22 | 9,
23 | 15,
24 | [0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11,
25 | 12, 12, 13, 13, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15],
26 | [2, 2, 3, 3, 6, 6, 7, 7, 12, 12, 14, 14, 15, 15, 21, 21, 22, 22, 23, 23, 25,
27 | 25, 41, 41, 44, 44, 53, 53, 71, 71, 0, 10, 20, 30, 40, 50, 60, 70, 80],
28 | )
29 | ],
30 | )
31 | def test_windows_sdp(nv, ne, e_list, x_list) -> None:
32 | # initialise g sparse (to values -1, based on two list that
33 | # define index and one that defines shape
34 | from cvxopt.base import matrix, spmatrix
35 | from cvxopt.solvers import sdp
36 |
37 | print(nv, ne, e_list, x_list)
38 | g_sparse = spmatrix(-1, x_list, e_list, (nv * nv, ne + 1))
39 |
40 | # Initialise optimization parameters
41 | h = matrix(-1.0, (nv, nv))
42 | c = matrix([0.0] * ne + [1.0])
43 |
44 | # Solve the convex optimization problem
45 | # Should raise here on windows
46 | sol = sdp(c, Gs=[g_sparse], hs=[h])
47 | assert sol is not None
48 |
--------------------------------------------------------------------------------
/meta.yaml:
--------------------------------------------------------------------------------
1 | {% set name = "grakel-dev" %}
2 | {% set version = "0.1a5" %}
3 |
4 | package:
5 | name: '{{ name|lower }}'
6 | version: '{{ version }}'
7 |
8 | source:
9 | path: ../GraKeL
10 |
11 | build:
12 | number: 0
13 | script: python setup.py install --single-version-externally-managed --record=record.txt
14 |
15 | requirements:
16 | host:
17 | - python
18 | - setuptools
19 | - pytest
20 | - numpy >=1.14.0
21 | - scikit-learn >=0.19.0
22 | - scipy >=1.0.1
23 | - cython >=0.27.3
24 | - future >=0.16.0
25 | - six >=1.11.0
26 | - cvxopt >=1.2.0
27 | run:
28 | - python
29 | - pytest
30 | - numpy >=1.14.0
31 | - scikit-learn >=0.19.0
32 | - scipy >=1.0.1
33 | - cython >=0.27.3
34 | - future >=0.16.0
35 | - six >=1.11.0
36 | - cvxopt >=1.2.0
37 | build:
38 | - {{ compiler('cxx') }}
39 |
40 | test:
41 | imports:
42 | - grakel
43 | - grakel.datasets
44 | - grakel.kernels
45 | - grakel.kernels._c_functions
46 | - grakel.kernels._isomorphism
47 | - grakel.tests
48 |
49 | about:
50 | home: https://github.com/ysig/GraKeL
51 | license: BSD-3-Clause
52 | license_family: BSD
53 | license_file: ''
54 | summary: A scikit-learn compatible library for graph kernels.
55 | description: "For more info, please visit the documenantion or the github-page."
56 | doc_url: 'https://ysig.github.io/GraKeL/dev/'
57 |
58 | extra:
59 | recipe-maintainers: 'Ioannis Siglidis'
60 |
--------------------------------------------------------------------------------
/misc/implement_list:
--------------------------------------------------------------------------------
1 | Current:
2 |
3 |
4 | Archive:
5 | - Calculates the full graphlet kernel
6 | as proposed by Shervashidze and Vishwanathan (2009)
7 | for graphlets of size 3,4,5
8 |
9 | TOSOLVE:
10 | - overflow error for some executions of the multiscale laplacian
11 |
12 | TODO:
13 | - correct the build frame work
14 | - unit test by asserting the existing values
15 | - add new kernels to graph_kernels.py
16 | - support input format for dictionaries as list of tuples,
17 | list of truples, dictionary of tuples
18 | - support sparse matrix format.
19 | (maybe change adjacency mat to scipy csr_mat?)
20 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools>=61.0.0", "cython", "numpy>=1.14.0"]
3 | build-backend = "setuptools.build_meta"
4 |
5 | [project]
6 | name = "GraKeL"
7 | version='0.1.10'
8 | dependencies = [
9 | "numpy >= 1.14.0",
10 | "cython >= 0.27.3",
11 | "scikit-learn >= 0.19",
12 | "six >= 1.11.0",
13 | "future >= 0.16.0",
14 | "joblib"
15 | ]
16 | requires-python=">=3.5"
17 | authors = [
18 | { name = "Ioannis-Siglidis", email = "y.siglidis@gmail.com" }
19 | ]
20 | readme = "README.md"
21 | description='A scikit-learn compatible library for graph kernels'
22 | classifiers=[
23 | 'Intended Audience :: Science/Research',
24 | 'Intended Audience :: Developers',
25 | 'License :: OSI Approved',
26 | 'Programming Language :: C',
27 | 'Programming Language :: Python',
28 | 'Topic :: Software Development',
29 | 'Topic :: Scientific/Engineering',
30 | 'Operating System :: POSIX',
31 | 'Operating System :: Unix',
32 | 'Operating System :: MacOS',
33 | 'Programming Language :: Python :: 3',
34 | ]
35 | license= { file = "LICENSE" }
36 |
37 | [project.optional-dependencies]
38 | lovasz = ["cvxopt>=1.2.0"]
39 | dev = ["cvxopt>=1.2.0", "pytest", "pytest-coverage", "torch_geometric"]
40 | test = ["pytest", "pytest-coverage", "torch_geometric"]
41 | wheel = ["pytest", "pytest-coverage"]
42 |
43 | [project.urls]
44 | documentation = 'https://ysig.github.io/GraKeL/'
45 | feedback = 'http://www.lix.polytechnique.fr/dascim/contact/'
46 | source = 'https://github.com/ysig/GraKeL/'
47 | tracker = 'https://github.com/ysig/GraKeL/issues'
48 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy>=1.14.0
2 | cython>=0.27.3
3 | scikit-learn>=0.19
4 | scipy>=1.12.0
5 | six>=1.11.0
6 | future>=0.16.0
7 | joblib
8 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | """The general `setup.py` file."""
2 | # Author: Ioannis Siglidis
3 | # License: BSD 3 clause
4 | import sys
5 | from platform import system
6 |
7 | from setuptools import Extension, find_packages, setup
8 | from numpy import get_include
9 | from Cython.Build import build_ext
10 |
11 | # Compile extensions
12 |
13 | # Set optimization arguments for compilation
14 | OS = system()
15 | if OS == "Windows":
16 | extra_compile_args = ["/O2", "/w"]
17 | elif OS in ["Linux", "Darwin"]:
18 | extra_compile_args = ["-O3", "-w"]
19 |
20 | # Remove the "-Wstrict-prototypes" compiler option, which isn't valid for C++.
21 | import distutils.sysconfig
22 |
23 | cfg_vars = distutils.sysconfig.get_config_vars()
24 | for key, value in cfg_vars.items():
25 | if type(value) == str:
26 | cfg_vars[key] = value.replace("-Wstrict-prototypes", "")
27 |
28 | # Add the _c_functions extension on kernels
29 | ext_address = "./grakel/kernels/_c_functions/"
30 | ext = Extension(
31 | name="grakel.kernels._c_functions",
32 | sources=[
33 | ext_address + "functions.pyx",
34 | ext_address + "src/ArashPartov.cpp",
35 | ext_address + "src/sm_core.cpp",
36 | ],
37 | include_dirs=[ext_address + "include", get_include()],
38 | depends=[ext_address + "include/functions.hpp"],
39 | language="c++",
40 | extra_compile_args=extra_compile_args,
41 | )
42 |
43 | # Add the bliss library extension for calculating isomorphism
44 | isodir = "./grakel/kernels/_isomorphism/"
45 | blissdir = isodir + "bliss-0.50/"
46 |
47 | # The essential bliss source files
48 | blisssrcs = ["graph.cc", "heap.cc", "orbit.cc", "partition.cc", "uintseqhash.cc"]
49 | blisssrcs = [blissdir + src for src in blisssrcs]
50 | pn = str(sys.version_info[0])
51 |
52 | # Compile intpybliss
53 | intpybliss = Extension(
54 | name="grakel.kernels._isomorphism.intpybliss",
55 | define_macros=[("MAJOR_VERSION", "0"), ("MINOR_VERSION", "50beta")],
56 | include_dirs=[blissdir],
57 | language="c++",
58 | extra_compile_args=extra_compile_args,
59 | sources=[isodir + "intpyblissmodule_" + pn + ".cc"] + blisssrcs,
60 | )
61 |
62 | # Make bliss extension
63 | bliss = Extension(
64 | name="grakel.kernels._isomorphism.bliss",
65 | include_dirs=[isodir],
66 | language="c++",
67 | extra_compile_args=extra_compile_args,
68 | sources=[isodir + "bliss.pyx"],
69 | )
70 |
71 | setup(
72 | packages=find_packages(),
73 | package_data={"grakel.tests": ["data/Cuneiform/*.txt", "data/MUTAG/*.txt"]},
74 | ext_modules=[intpybliss, bliss, ext],
75 | cmdclass={"build_ext": build_ext},
76 | )
77 |
--------------------------------------------------------------------------------