├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md ├── PULL_REQUEST_TEMPLATE.md └── workflows │ ├── comment-pr.yml │ ├── doc.yml │ ├── pycodestyle.yml │ ├── pydocstyle.yml │ ├── pylint.yml │ ├── pytest.yml │ └── receive-pr.yml ├── .gitignore ├── .pre-commit-config.yaml ├── .pylintrc ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── Makefile ├── README.md ├── benchmarks ├── GraphClassification │ ├── README.md │ ├── config_gen.py │ ├── configs │ │ ├── ChebNet.yaml │ │ ├── DGN.yaml │ │ ├── GIN.yaml │ │ ├── model_default.yaml │ │ └── train_default.yaml │ ├── grid │ │ └── grid_example.yaml │ ├── models │ │ ├── cheb_net.py │ │ ├── dgn.py │ │ ├── gcn.py │ │ ├── gin.py │ │ └── mlp_readout_layer.py │ ├── train.py │ └── utils.py └── NodeClassification │ ├── README.md │ ├── config_gen.py │ ├── configs │ ├── APPNP.yaml │ ├── GAT.yaml │ ├── GATv2.yaml │ ├── GCNII.yaml │ ├── GraphSAGE.yaml │ ├── LINKX.yaml │ ├── LINKX_train.yaml │ ├── MixHop.yaml │ ├── MoNet.yaml │ ├── SGC.yaml │ ├── TAGCN.yaml │ ├── catboost.yaml │ ├── lightgbm.yaml │ ├── model_default.yaml │ └── train_default.yaml │ ├── grid │ └── grid_example.yaml │ ├── models │ ├── appnp.py │ ├── gat.py │ ├── gatv2.py │ ├── gbdt.py │ ├── gcn.py │ ├── gcn2.py │ ├── gcn_minibatch.py │ ├── graph_sage.py │ ├── graph_sage_minibatch.py │ ├── linkx.py │ ├── mixhop.py │ ├── mlp.py │ ├── monet.py │ ├── sgc.py │ └── tagcn.py │ ├── train.py │ ├── train_gbdt.py │ ├── train_minibatch.py │ └── utils.py ├── datasets ├── FB13 │ ├── FB13.ipynb │ ├── LICENSE │ ├── README.md │ ├── metadata.json │ ├── task_kg_entity_prediction_1.json │ └── task_kg_relation_prediction_1.json ├── FB15K │ ├── FB15K.ipynb │ ├── LICENSE │ ├── README.md │ ├── metadata.json │ ├── task_kg_entity_prediction_1.json │ └── task_kg_relation_prediction_1.json ├── FB15K237 │ ├── FB15K237.ipynb │ ├── LICENSE │ ├── README.md │ ├── metadata.json │ ├── task_kg_entity_prediction_1.json │ └── task_kg_relation_prediction_1.json ├── KGMicrobe │ ├── KGMicrobe.ipynb │ ├── LICENSE │ ├── README.md │ └── metadata.json ├── NELL-995 │ ├── LICENSE │ ├── NELL-995.ipynb │ ├── README.md │ ├── metadata.json │ ├── task_kg_entity_prediction_1.json │ └── task_kg_relation_prediction_1.json ├── WN11 │ ├── LICENSE │ ├── README.md │ ├── WN11.ipynb │ ├── metadata.json │ ├── task_kg_entity_prediction_1.json │ └── task_kg_relation_prediction_1.json ├── WN18 │ ├── LICENSE │ ├── README.md │ ├── WN18.ipynb │ ├── metadata.json │ ├── task_kg_entity_prediction_1.json │ └── task_kg_relation_prediction_1.json ├── WN18RR │ ├── LICENSE │ ├── README.md │ ├── WN18RR.ipynb │ ├── metadata.json │ ├── task_kg_entity_prediction_1.json │ └── task_kg_relation_prediction_1.json ├── YAGO3-10 │ ├── LICENSE │ ├── README.md │ ├── YAGO3-10.ipynb │ ├── metadata.json │ ├── task_kg_entity_prediction_1.json │ └── task_kg_relation_prediction_1.json ├── actor │ ├── LICENSE │ ├── README.md │ ├── actor.ipynb │ ├── metadata.json │ └── task_node_classification_1.json ├── arxiv-year │ ├── LICENSE │ ├── README.md │ ├── arxiv-year.ipynb │ ├── metadata.json │ └── task_node_classification_1.json ├── chameleon │ ├── LICENSE │ ├── README.md │ ├── chameleon.ipynb │ ├── metadata.json │ └── task_node_classification_1.json ├── cifar │ ├── LICENSE │ ├── README.md │ ├── cifar.ipynb │ ├── metadata.json │ └── task_graph_classification_1.json ├── citeseer │ ├── LICENSE │ ├── README.md │ ├── citeseer.ipynb │ ├── metadata.json │ └── task_node_classification_1.json ├── cora │ ├── LICENSE │ ├── README.md │ ├── cora.ipynb │ ├── metadata.json │ └── task_node_classification_1.json ├── cornell │ ├── LICENSE │ ├── README.md │ ├── cornell.ipynb │ ├── metadata.json │ └── task_node_classification_1.json ├── genius │ ├── LICENSE │ ├── README.md │ ├── genius.ipynb │ ├── metadata.json │ └── task_node_classification_1.json ├── mnist │ ├── LICENSE │ ├── README.md │ ├── metadata.json │ ├── mnist.ipynb │ └── task_graph_classification_1.json ├── ogbg-molbace │ ├── LICENSE │ ├── README.md │ ├── metadata.json │ ├── ogbg-molbace.ipynb │ └── task_graph_classification_1.json ├── ogbg-molclintox │ ├── LICENSE │ ├── README.md │ ├── metadata.json │ ├── ogbg-molclintox.ipynb │ └── task_graph_classification_1.json ├── ogbg-molfreesolv │ ├── LICENSE │ ├── README.md │ ├── metadata.json │ ├── ogbg-molfreesolv.ipynb │ └── task_graph_regression_1.json ├── ogbg-molhiv │ ├── LICENSE │ ├── README.md │ ├── metadata.json │ ├── ogbg-molhiv.ipynb │ └── task_graph_classification_1.json ├── ogbg-molmuv │ ├── LICENSE │ ├── README.md │ ├── metadata.json │ ├── ogbg-molmuv.ipynb │ └── task_graph_classification_1.json ├── ogbg-molpcba │ ├── LICENSE │ ├── README.md │ ├── metadata.json │ ├── ogbg-molpcba.ipynb │ └── task_graph_classification_1.json ├── ogbg-molsider │ ├── LICENSE │ ├── README.md │ ├── metadata.json │ ├── ogbg-molsider.ipynb │ └── task_graph_classification_1.json ├── ogbl-collab │ ├── LICENSE │ ├── README.md │ ├── metadata.json │ ├── ogbl-collab.ipynb │ ├── task_time_dependent_link_prediction_1.json │ └── task_time_dependent_link_prediction_2.json ├── ogbn-arxiv │ ├── LICENSE │ ├── README.md │ ├── metadata.json │ ├── ogbn-arxiv.ipynb │ └── task_node_classification_1.json ├── ogbn-mag │ ├── LICENSE │ ├── README.md │ ├── metadata.json │ ├── ogbn-mag.ipynb │ └── task_node_classification_1.json ├── ogbn-products │ ├── LICENSE │ ├── README.md │ ├── metadata.json │ ├── ogbn-products.ipynb │ └── task_node_classification_1.json ├── ogbn-proteins │ ├── LICENSE │ ├── README.md │ ├── metadata.json │ ├── ogbn-proteins.ipynb │ └── task_node_classification_1.json ├── penn94 │ ├── LICENSE │ ├── README.md │ ├── metadata.json │ ├── penn94.ipynb │ └── task_node_classification_1.json ├── pokec │ ├── LICENSE │ ├── README.md │ ├── metadata.json │ ├── pokec.ipynb │ └── task_node_classification_1.json ├── pubmed │ ├── LICENSE │ ├── README.md │ ├── metadata.json │ ├── pubmed.ipynb │ └── task_node_classification_1.json ├── reddit │ ├── LICENSE │ ├── README.md │ ├── metadata.json │ ├── reddit.ipynb │ └── task_node_classification_1.json ├── snap-patents │ ├── LICENSE │ ├── README.md │ ├── metadata.json │ ├── snap-patents.ipynb │ └── task_node_classification_1.json ├── squirrel │ ├── LICENSE │ ├── README.md │ ├── metadata.json │ ├── squirrel.ipynb │ └── task_node_classification_1.json ├── texas │ ├── LICENSE │ ├── README.md │ ├── metadata.json │ ├── task_node_classification_1.json │ └── texas.ipynb ├── twitch-gamers │ ├── LICENSE │ ├── README.md │ ├── metadata.json │ ├── task_node_classification_1.json │ └── twitch-gamers.ipynb ├── wiki │ ├── LICENSE │ ├── README.md │ ├── metadata.json │ ├── task_node_classification_1.json │ └── wiki.ipynb └── wisconsin │ ├── LICENSE │ ├── README.md │ ├── metadata.json │ ├── task_node_classification_1.json │ └── wisconsin.ipynb ├── docs ├── .readthedocs.yaml ├── Makefile ├── make.bat ├── requirements.txt └── source │ ├── _templates │ ├── classtemplate.rst │ └── functemplate.rst │ ├── api │ ├── dataset.rst │ ├── gli.rst │ ├── graph.rst │ ├── io.rst │ ├── task.rst │ └── utils.rst │ ├── conf.py │ ├── format │ ├── citation.rst │ └── file.rst │ ├── generated │ ├── gli.dataloading.combine_graph_and_task.rst │ ├── gli.dataloading.get_gli_dataset.rst │ ├── gli.dataloading.get_gli_graph.rst │ ├── gli.dataloading.get_gli_task.rst │ ├── gli.graph.read_gli_graph.rst │ ├── gli.io.Attribute.rst │ ├── gli.io.save_heterograph.rst │ ├── gli.io.save_homograph.rst │ ├── gli.io.save_task_node_classification.rst │ ├── gli.io.save_task_node_regression.rst │ ├── gli.task.GLITask.rst │ ├── gli.task.GraphClassificationTask.rst │ ├── gli.task.GraphRegressionTask.rst │ ├── gli.task.KGEntityPredictionTask.rst │ ├── gli.task.KGRelationPredictionTask.rst │ ├── gli.task.LinkPredictionTask.rst │ ├── gli.task.NodeClassificationTask.rst │ ├── gli.task.NodeRegressionTask.rst │ ├── gli.task.TimeDependentLinkPredictionTask.rst │ └── gli.task.read_gli_task.rst │ ├── index.rst │ └── start │ ├── contribute.rst │ ├── install.rst │ └── tutorial.rst ├── example.py ├── gli ├── __init__.py ├── config.py ├── dataloading.py ├── dataset.py ├── graph.py ├── io │ ├── __init__.py │ ├── edge_task.py │ ├── graph.py │ ├── graph_task.py │ ├── kg_task.py │ ├── node_task.py │ └── utils.py ├── tags.py ├── task.py └── utils.py ├── img ├── GLI-Contribution-Workflow.png ├── GLI-File-Structure.png ├── flowchart.png ├── gli-banner.jpg └── gli-banner.png ├── pyproject.toml ├── requirements.txt ├── setup.py ├── templates └── dataset-folder │ ├── LICENSE │ ├── README.md │ ├── metadata.hjson │ └── preprocess.ipynb └── tests ├── config.yaml ├── conftest.py ├── kg_utils.py ├── preprocess.py ├── test_data_loading.py ├── test_files.py ├── test_io.py ├── test_kg_training.py ├── test_metadata.py ├── test_node_classification_training.py ├── test_node_regression_training.py ├── test_task.py ├── training_utils.py └── utils.py /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: "[BUG]" 5 | labels: bug 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | ``` 15 | Steps to reproduce the behavior 16 | ``` 17 | 18 | **Expected behavior** 19 | A clear and concise description of what you expected to happen. 20 | 21 | **Screenshots** 22 | If applicable, add screenshots to help explain your problem. 23 | 24 | **Additional context** 25 | Add any other context about the problem here. 26 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: "[FEATURE REQUEST]" 5 | labels: enhancement 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ## Description 4 | 5 | 6 | ## Related Issue 7 | 8 | 9 | 10 | 11 | 12 | ## Motivation and Context 13 | 14 | 15 | 16 | ## How Has This Been Tested? 17 | 18 | 19 | 20 | 21 | ## Screenshots (if appropriate): 22 | -------------------------------------------------------------------------------- /.github/workflows/doc.yml: -------------------------------------------------------------------------------- 1 | name: Docs 2 | on: [push, pull_request, workflow_dispatch] 3 | jobs: 4 | docs: 5 | runs-on: ubuntu-latest 6 | strategy: 7 | matrix: 8 | python-version: ["3.8"] 9 | steps: 10 | - uses: actions/checkout@v2 11 | - uses: actions/setup-python@v2 12 | with: 13 | python-version: ${{ matrix.python-version }} 14 | - name: Install dependencies 15 | run: | 16 | # pip install sphinx sphinx_rtd_theme 17 | pip install -e ".[doc]" 18 | - name: Sphinx build 19 | run: | 20 | sphinx-build docs/source _build 21 | - name: Deploy 22 | uses: peaceiris/actions-gh-pages@v3 23 | if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }} 24 | with: 25 | publish_branch: gh-pages 26 | github_token: ${{ secrets.GITHUB_TOKEN }} 27 | publish_dir: _build/ 28 | force_orphan: true -------------------------------------------------------------------------------- /.github/workflows/pycodestyle.yml: -------------------------------------------------------------------------------- 1 | name: Pycodestyle 2 | 3 | on: [pull_request, workflow_dispatch] 4 | 5 | jobs: 6 | build: 7 | runs-on: ubuntu-latest 8 | strategy: 9 | matrix: 10 | python-version: ["3.8"] 11 | steps: 12 | - uses: actions/checkout@v2 13 | - name: Set up Python ${{ matrix.python-version }} 14 | uses: actions/setup-python@v2 15 | with: 16 | python-version: ${{ matrix.python-version }} 17 | - name: Install dependencies 18 | run: | 19 | python -m pip install --upgrade pip 20 | pip install pycodestyle 21 | - name: Analysing the code with pylint 22 | run: | 23 | pycodestyle $(git ls-files '*.py') 24 | -------------------------------------------------------------------------------- /.github/workflows/pydocstyle.yml: -------------------------------------------------------------------------------- 1 | name: Pydocstyle 2 | 3 | on: [pull_request, workflow_dispatch] 4 | 5 | jobs: 6 | build: 7 | runs-on: ubuntu-latest 8 | strategy: 9 | matrix: 10 | python-version: ["3.8"] 11 | steps: 12 | - uses: actions/checkout@v2 13 | - name: Set up Python ${{ matrix.python-version }} 14 | uses: actions/setup-python@v2 15 | with: 16 | python-version: ${{ matrix.python-version }} 17 | - name: Install dependencies 18 | run: | 19 | python -m pip install --upgrade pip 20 | pip install pydocstyle 21 | - name: Analysing the code with pydocstyle 22 | run: | 23 | pydocstyle $(git ls-files '*.py') 24 | -------------------------------------------------------------------------------- /.github/workflows/pylint.yml: -------------------------------------------------------------------------------- 1 | name: Pylint 2 | 3 | on: [pull_request, workflow_dispatch] 4 | 5 | jobs: 6 | build: 7 | runs-on: ubuntu-latest 8 | strategy: 9 | matrix: 10 | python-version: ["3.8"] 11 | steps: 12 | - uses: actions/checkout@v2 13 | - name: Set up Python ${{ matrix.python-version }} 14 | uses: actions/setup-python@v2 15 | with: 16 | python-version: ${{ matrix.python-version }} 17 | - name: Install dependencies 18 | run: | 19 | python -m pip install --upgrade pip 20 | pip install numpy torch scipy 21 | pip install --force-reinstall pylint==2.17.5 22 | - name: Analysing the code with pylint 23 | run: | 24 | pylint $(git ls-files '*.py') --rcfile .pylintrc --recursive y 25 | -------------------------------------------------------------------------------- /.github/workflows/pytest.yml: -------------------------------------------------------------------------------- 1 | name: Pytest 2 | 3 | on: [pull_request, workflow_dispatch] 4 | 5 | jobs: 6 | build: 7 | runs-on: self-hosted 8 | strategy: 9 | matrix: 10 | python-version: ["3.7"] 11 | steps: 12 | - uses: actions/checkout@v3 13 | with: 14 | fetch-depth: 0 15 | - name: Set up Python ${{ matrix.python-version }} 16 | uses: actions/setup-python@v3 17 | with: 18 | python-version: ${{ matrix.python-version }} 19 | - name: Install dependencies 20 | run: | 21 | python -m pip install --upgrade pip 22 | pip install pytest 23 | pip install -e . 24 | pip install pyyaml 25 | - name: Get changed files using defaults 26 | id: changed-files 27 | uses: tj-actions/changed-files@v23.1 28 | - name: List all changed files 29 | run: | 30 | for file in ${{ steps.changed-files.outputs.all_changed_and_modified_files }}; do 31 | echo "$file was changed" 32 | done 33 | - name: Test with pytest, if triggered by PR 34 | run: | 35 | if ${{ github.event_name == 'pull_request' }} 36 | then 37 | dataset_list=() 38 | for path in ${{ steps.changed-files.outputs.all_changed_and_modified_files }}; do 39 | dir="$(dirname "${path}")" ; 40 | if [ ! -d "$dir" ]; then 41 | echo "$dir doesn't exist, continue" 42 | continue; 43 | fi 44 | dataset=$(echo $path | grep "datasets" | sed -r 's/datasets\/([_a-zA-Z0-9-]+)\/.*/\1/') 45 | if [ -z "$dataset" ]; then continue; fi 46 | if [[ ! " ${dataset_list[*]} " =~ " ${dataset} " ]]; then 47 | echo "add dataset: $dataset" 48 | dataset_list+=($dataset) 49 | fi 50 | done 51 | echo "datasets list is ${dataset_list[*]}" 52 | mkdir temp 53 | echo "${dataset_list[*]}" > temp/changed_datasets 54 | python tests/preprocess.py 55 | pytest tests/ 56 | fi 57 | - name: Test all datasets with pytest, if triggered by workflow_dispatch 58 | run: | 59 | if ${{ github.event_name == 'workflow_dispatch' }} 60 | then 61 | python tests/preprocess.py 62 | pytest tests/ 63 | fi 64 | -------------------------------------------------------------------------------- /.github/workflows/receive-pr.yml: -------------------------------------------------------------------------------- 1 | name: Receive PR 2 | 3 | # read-only repo token 4 | # no access to secrets 5 | on: 6 | pull_request: 7 | 8 | jobs: 9 | build: 10 | runs-on: ubuntu-latest 11 | 12 | steps: 13 | - uses: actions/checkout@v3 14 | with: 15 | fetch-depth: 0 16 | - name: Get changed files using defaults 17 | id: changed-files 18 | uses: tj-actions/changed-files@v23.1 19 | - name: List all changed files 20 | run: | 21 | for file in ${{ steps.changed-files.outputs.all_changed_and_modified_files }}; do 22 | echo "$file was changed" 23 | done 24 | - name: Check large datasets 25 | id: main 26 | run: | 27 | dataset_list=() 28 | for path in ${{ steps.changed-files.outputs.all_changed_and_modified_files }}; do 29 | dir="$(dirname "${path}")" ; 30 | if [ ! -d "$dir" ]; then 31 | echo "$dir doesn't exist, continue" 32 | continue; 33 | fi 34 | dataset=$(echo $path | grep "datasets" | sed -r 's/datasets\/([_a-zA-Z0-9-]+)\/.*/\1/') 35 | if [ -z "$dataset" ]; then continue; fi 36 | if [[ ! " ${dataset_list[*]} " =~ " ${dataset} " ]]; then 37 | echo "add dataset: $dataset" 38 | dataset_list+=($dataset) 39 | fi 40 | done 41 | dataset_to_comment=() 42 | large_dataset_list=$(cat tests/config.yaml | sed -r 's/large_dataset_to_skip: \[(.*)\]/\1/') 43 | for dataset in "${dataset_list[@]}"; do 44 | if [[ "$large_dataset_list" == *"$dataset"* ]]; then 45 | echo "add ${dataset} to dataset_to_comment" 46 | dataset_to_comment+=($dataset) 47 | fi 48 | done 49 | if [ ${#dataset_to_comment[@]} -ne 0 ]; then 50 | echo "dataset to be commented are: ${dataset_to_comment[*]}" 51 | fi 52 | echo "::set-output name=DATASETS::${dataset_to_comment[*]}" 53 | 54 | - name: Save PR number 55 | run: | 56 | mkdir -p ./pr 57 | echo ${{ github.event.number }} > ./pr/NR 58 | echo ${{ steps.main.outputs.DATASETS }} >> ./pr/NR 59 | - uses: actions/upload-artifact@v3 60 | with: 61 | name: pr 62 | path: pr/ 63 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # See https://pre-commit.com for more information 2 | # See https://pre-commit.com/hooks.html for more hooks 3 | repos: 4 | - repo: https://github.com/pre-commit/pre-commit-hooks 5 | rev: v3.2.0 6 | hooks: 7 | - id: trailing-whitespace 8 | - id: end-of-file-fixer 9 | - id: check-yaml 10 | - id: check-added-large-files 11 | - repo: local 12 | hooks: 13 | - id: pylint 14 | name: pylint 15 | entry: pylint 16 | language: system 17 | types: [python] 18 | args: 19 | [ 20 | "-rn", # Only display messages 21 | "-sn", # Don't display the score 22 | "--rcfile=.pylintrc", # Link to your config file 23 | ] 24 | - repo: local 25 | hooks: 26 | - id: pycodestyle 27 | name: pycodestyle 28 | entry: pycodestyle 29 | language: system 30 | types: [python] 31 | - repo: local 32 | hooks: 33 | - id: pydocstyle 34 | name: pydocstyle 35 | entry: pydocstyle 36 | language: system 37 | types: [python] 38 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Graph-Learning-Benchmarks 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Signifies our desired python version 2 | PYTHON = python 3 | PYTHON_FILES := gli/ benchmarks/ tests/ example.py 4 | 5 | # .PHONY defines parts of the makefile that are not dependant on any specific file 6 | # This is most often used to store functions 7 | .PHONY = help setup test run clean 8 | 9 | # The @ makes sure that the command itself isn't echoed in the terminal 10 | help: 11 | @echo "Usage: make " 12 | @echo " Available commands:" 13 | @echo " setup: install the full project." 14 | @echo " clean: remove all data files (npz)." 15 | @echo " test: run all tests (pystyle, pylint, pytest). Stop on failure." 16 | @echo " pystyle: run pycodestyle and pydocstyle tests." 17 | @echo " pylint: run pylint." 18 | @echo " pytest: run pytests on all datasets by default." 19 | @echo " run pytest on a single dataset by arg DATASET." 20 | @echo " e.g., make pytest DATASET=cora" 21 | @echo " donwload: download and preprocess all data files (npz)." 22 | @echo " logs: create logs directory." 23 | @echo " build: build the package." 24 | 25 | setup: 26 | ${PYTHON} -m pip install -e ".[test,full]" 27 | 28 | clean: 29 | find datasets -name '*.npz' -delete 30 | 31 | test: pystyle pylint pytest 32 | 33 | # The leading '-' will execute following command to ignore errors. 34 | pystyle: logs 35 | -pycodestyle ${PYTHON_FILES} | tee logs/pycodestyle.log 36 | -pydocstyle ${PYTHON_FILES} | tee logs/pydocstyle.log 37 | 38 | pylint: logs 39 | -pylint ${PYTHON_FILES} --rcfile .pylintrc --recursive y | tee logs/pylint.log 40 | 41 | pytest: logs 42 | ifndef DATASET 43 | -pytest -v tests/ | tee logs/pytest.log 44 | else 45 | mkdir -p temp 46 | @echo $$DATASET > temp/changed_datasets 47 | -pytest -v tests/ | tee logs/pytest.log 48 | rm temp/changed_datasets 49 | rmdir temp 50 | endif 51 | 52 | download: 53 | ${PYTHON} tests/preprocess.py 54 | 55 | logs: 56 | -mkdir logs 57 | 58 | build: 59 | ${PYTHON} -m build -------------------------------------------------------------------------------- /benchmarks/GraphClassification/README.md: -------------------------------------------------------------------------------- 1 | # GLI Benchmarking on `GraphClassification` Task 2 | 3 | The code in this folder can be used to benchmark some popular models on `GraphClassification` task. 4 | 5 | ## How to run 6 | 7 | Example commands to run the code: 8 | 9 | ```bash 10 | python train.py --dataset --model GCN 11 | python train.py --dataset --model DGN --model-cfg configs/DGN.yaml 12 | python train.py --dataset --model ChebNet --model-cfg configs/ChebNet.yaml 13 | python train.py --dataset --model GIN --model-cfg configs/GIN.yaml 14 | ``` 15 | 16 | One can provide a `yaml` file to arguments `--model-cfg` or `--train-cfg` respectively for model configuration or training configuration. If not provided, default configurations (see [model_default.yaml](https://github.com/Graph-Learning-Benchmarks/gli/blob/main/benchmarks/GraphClassification/configs/model_default.yaml) and [train_default.yaml](https://github.com/Graph-Learning-Benchmarks/gli/blob/main/benchmarks/GraphClassification/configs/train_default.yaml)) will be used. 17 | 18 | Note that some models may have unique hyperparameters not included in the default configuration files. In this case, one should pass the model-specific coniguration files to `train.py`. 19 | 20 | ## Supported models 21 | 22 | The following list of models are supported by this benchmark. 23 | 24 | - `GCN` 25 | - `DGN` 26 | - `ChebNet` 27 | - `GIN` 28 | 29 | To add a new model, one should add the model implementation under the `models` folder, and add model specific confgurations under the `configs` folder when needed. We have tried to implement `train.py` in a generic way so one may only need to make minimal modifications to `train.py` and `utils.py`. 30 | 31 | Contributions of new models are welcome through pull requests. 32 | 33 | ## Supported datasets 34 | 35 | This benchmark should work for most datasets with a `GraphClassification` task associated. The following datasets have been tested for this code. 36 | 37 | - `mnist` 38 | - `ogbg-molpcba` 39 | - `ogbg-molhiv` 40 | - `ogbg-molsider` 41 | - `ogbg-molbace` 42 | - `ogbg-molmuv` 43 | - `cifar` 44 | - `ogbg-molclintox` 45 | -------------------------------------------------------------------------------- /benchmarks/GraphClassification/configs/ChebNet.yaml: -------------------------------------------------------------------------------- 1 | num_hidden: 128 2 | num_layers: 4 3 | k: 3 4 | -------------------------------------------------------------------------------- /benchmarks/GraphClassification/configs/DGN.yaml: -------------------------------------------------------------------------------- 1 | num_layers: 4 2 | hidden_dim: 128 3 | # aggregators: ['dir1-av', 'dir1-dx', 'sum'] 4 | aggregators: ['mean'] 5 | scalers: ['identity'] 6 | delta: 2.5 7 | dropout: 0 8 | -------------------------------------------------------------------------------- /benchmarks/GraphClassification/configs/GIN.yaml: -------------------------------------------------------------------------------- 1 | num_hidden: 16 2 | num_layers: 5 3 | dropout: .5 -------------------------------------------------------------------------------- /benchmarks/GraphClassification/configs/model_default.yaml: -------------------------------------------------------------------------------- 1 | num_hidden: 146 2 | num_layers: 4 3 | dropout: 0.0 4 | -------------------------------------------------------------------------------- /benchmarks/GraphClassification/configs/train_default.yaml: -------------------------------------------------------------------------------- 1 | loss_fun: cross_entropy 2 | self_loop: True 3 | to_dense: False 4 | lr: .0005 5 | weight_decay: 0.0 6 | max_epoch: 10000 7 | early_stopping: True 8 | seed: 0 9 | batch_size: 1024 10 | -------------------------------------------------------------------------------- /benchmarks/GraphClassification/grid/grid_example.yaml: -------------------------------------------------------------------------------- 1 | num_hidden: [64, 128] 2 | lr: [0.0005, 0.001, 0.005, 0.01] 3 | dropout: [0.0, 0.2, 0.4, 0.6, 0.8] 4 | weight_decay: [0, .0001, .001, .01] 5 | -------------------------------------------------------------------------------- /benchmarks/GraphClassification/models/cheb_net.py: -------------------------------------------------------------------------------- 1 | """ 2 | ChebNet model in GLI. 3 | 4 | References: 5 | https://github.com/dmlc/dgl/blob/195f99362d883f8b6d131b70a7868a 6 | 537e55b786/examples/pytorch/model_zoo/citation_network/models.py 7 | """ 8 | 9 | import dgl 10 | from torch import nn 11 | from dgl.nn.pytorch import ChebConv 12 | from models.mlp_readout_layer import MLPReadout 13 | 14 | 15 | class ChebNet(nn.Module): 16 | """ChebNet network.""" 17 | 18 | def __init__(self, 19 | in_feats, 20 | n_hidden, 21 | n_classes, 22 | n_layers, 23 | k): 24 | """Initiate model.""" 25 | super().__init__() 26 | self.layers = nn.ModuleList() 27 | self.layers.append( 28 | ChebConv(in_feats, n_hidden, k) 29 | ) 30 | for _ in range(n_layers - 2): 31 | self.layers.append( 32 | ChebConv(n_hidden, n_hidden, k) 33 | ) 34 | 35 | self.layers.append( 36 | ChebConv(n_hidden, n_hidden, k) 37 | ) 38 | self.mlp_layer = MLPReadout(n_hidden, n_classes) 39 | 40 | def forward(self, g, features): 41 | """Forward.""" 42 | h = features 43 | for layer in self.layers: 44 | h = layer(g, h) 45 | g.ndata["h"] = h 46 | hg = dgl.mean_nodes(g, "h") 47 | return self.mlp_layer(hg) 48 | -------------------------------------------------------------------------------- /benchmarks/GraphClassification/models/gcn.py: -------------------------------------------------------------------------------- 1 | """ 2 | GCN model in GLI. 3 | 4 | References: 5 | https://github.com/dmlc/dgl/tree/master/examples/pytorch/gcn 6 | https://docs.dgl.ai/tutorials/blitz/5_graph_classification.html# 7 | sphx-glr-tutorials-blitz-5-graph-classification-py 8 | """ 9 | 10 | import dgl 11 | from torch import nn 12 | from dgl.nn.pytorch import GraphConv 13 | from models.mlp_readout_layer import MLPReadout 14 | 15 | 16 | class GCNgraph(nn.Module): 17 | """GCN network.""" 18 | 19 | def __init__(self, 20 | in_feats, 21 | n_hidden, 22 | n_classes, 23 | n_layers, 24 | activation, 25 | dropout): 26 | """Initiate model.""" 27 | super().__init__() 28 | self.layers = nn.ModuleList() 29 | # embedded layer 30 | self.embedding_h = nn.Linear(in_feats, n_hidden) 31 | 32 | # hidden layers 33 | for _ in range(n_layers - 2): 34 | self.layers.append(GraphConv(n_hidden, n_hidden, 35 | activation=activation)) 36 | # output layer 37 | self.layers.append(GraphConv(n_hidden, n_hidden)) 38 | self.dropout = nn.Dropout(p=dropout) 39 | 40 | # readout layer 41 | self.mlp_layer = MLPReadout(n_hidden, n_classes) 42 | 43 | def forward(self, g, features): 44 | """Forward.""" 45 | h = features 46 | h = self.embedding_h(h) 47 | for i, layer in enumerate(self.layers): 48 | if i != 0: 49 | h = self.dropout(h) 50 | h = layer(g, h) 51 | g.ndata["h"] = h 52 | hg = dgl.mean_nodes(g, "h") 53 | return self.mlp_layer(hg) 54 | -------------------------------------------------------------------------------- /benchmarks/GraphClassification/models/mlp_readout_layer.py: -------------------------------------------------------------------------------- 1 | 2 | """ 3 | MLP Layer used after graph vector representation. 4 | 5 | References: 6 | https://github.com/graphdeeplearning/benchmarking-gnns/blob/master/layers/mlp_readout_layer.py 7 | """ 8 | 9 | from torch import nn 10 | import torch.nn.functional as F 11 | 12 | 13 | class MLPReadout(nn.Module): 14 | """MLPReadout layer in GLI.""" 15 | 16 | def __init__(self, input_dim, output_dim, L=2): 17 | """Initiate layer, L=nb_hidden_layers.""" 18 | super().__init__() 19 | list_fc_layers = [nn.Linear(input_dim // 2 ** layer, input_dim // 2 ** 20 | (layer + 1), bias=True) for layer in range(L)] 21 | list_fc_layers.append(nn.Linear(input_dim // 2 ** L, output_dim, 22 | bias=True)) 23 | self.fc_layers = nn.ModuleList(list_fc_layers) 24 | self.n_layers = L 25 | 26 | def forward(self, x): 27 | """Forward.""" 28 | y = x 29 | for layer in range(self.n_layers): 30 | y = self.fc_layers[layer](y) 31 | y = F.relu(y) 32 | y = self.fc_layers[self.n_layers](y) 33 | return y 34 | -------------------------------------------------------------------------------- /benchmarks/NodeClassification/configs/APPNP.yaml: -------------------------------------------------------------------------------- 1 | in_drop: .5 2 | edge_drop: .5 3 | hidden_sizes: [64] 4 | k: 10 5 | alpha: .1 6 | -------------------------------------------------------------------------------- /benchmarks/NodeClassification/configs/GAT.yaml: -------------------------------------------------------------------------------- 1 | num_layers: 2 2 | num_hidden: 8 3 | num_heads: 8 4 | num_out_heads: 2 5 | residual: False 6 | dropout: .6 7 | negative_slope: .2 8 | -------------------------------------------------------------------------------- /benchmarks/NodeClassification/configs/GATv2.yaml: -------------------------------------------------------------------------------- 1 | num_layers: 1 2 | num_hidden: 8 3 | num_heads: 8 4 | num_out_heads: 2 5 | residual: False 6 | feat_drop: .7 7 | attn_drop: .7 8 | negative_slope: .2 9 | -------------------------------------------------------------------------------- /benchmarks/NodeClassification/configs/GCNII.yaml: -------------------------------------------------------------------------------- 1 | num_hidden: 64 2 | num_layers: 64 3 | dropout: .5 4 | lambda_: .5 5 | alpha: .2 -------------------------------------------------------------------------------- /benchmarks/NodeClassification/configs/GraphSAGE.yaml: -------------------------------------------------------------------------------- 1 | num_layers: 2 2 | num_hidden: 8 3 | dropout: .6 4 | aggregator_type: gcn 5 | -------------------------------------------------------------------------------- /benchmarks/NodeClassification/configs/LINKX.yaml: -------------------------------------------------------------------------------- 1 | num_hidden: 16 2 | num_layers: 1 3 | dropout: .5 4 | inner_activation: False 5 | inner_dropout: False 6 | init_layers_A: 1 7 | init_layers_X: 1 8 | -------------------------------------------------------------------------------- /benchmarks/NodeClassification/configs/LINKX_train.yaml: -------------------------------------------------------------------------------- 1 | loss_fun: cross_entropy 2 | self_loop: False 3 | to_dense: False 4 | lr: .01 5 | weight_decay: 0.001 6 | max_epoch: 10000 7 | early_stopping: True 8 | seed: 0 9 | batch_size: 256 10 | to_undirected: False 11 | optimizer: "AdamW" 12 | -------------------------------------------------------------------------------- /benchmarks/NodeClassification/configs/MixHop.yaml: -------------------------------------------------------------------------------- 1 | num_hidden: 8 2 | p: [0, 1, 2] 3 | num_layers: 2 4 | dropout: .5 5 | layer_dropout: 0.9 6 | batchnorm: False 7 | -------------------------------------------------------------------------------- /benchmarks/NodeClassification/configs/MoNet.yaml: -------------------------------------------------------------------------------- 1 | num_layers: 2 2 | num_hidden: 8 3 | dropout: .6 4 | pseudo_dim: 2 5 | num_kernels: 3 6 | -------------------------------------------------------------------------------- /benchmarks/NodeClassification/configs/SGC.yaml: -------------------------------------------------------------------------------- 1 | k: 2 -------------------------------------------------------------------------------- /benchmarks/NodeClassification/configs/TAGCN.yaml: -------------------------------------------------------------------------------- 1 | num_layers: 2 2 | num_hidden: 16 3 | k: 2 4 | dropout: .5 5 | -------------------------------------------------------------------------------- /benchmarks/NodeClassification/configs/catboost.yaml: -------------------------------------------------------------------------------- 1 | hp: 2 | lr: 3 | - 0.01 4 | - 0.1 5 | depth: 6 | - 4 7 | - 6 8 | l2_leaf_reg: 9 | - null 10 | num_epochs: 1000 11 | patience: 100 12 | verbose: false 13 | -------------------------------------------------------------------------------- /benchmarks/NodeClassification/configs/lightgbm.yaml: -------------------------------------------------------------------------------- 1 | hp: 2 | lr: 3 | - 0.01 4 | - 0.1 5 | num_leaves: 6 | - 15 7 | - 63 8 | lambda_l2: 9 | - 0.0 10 | boosting: 11 | - gbdt 12 | num_epochs: 1000 13 | patience: 100 14 | -------------------------------------------------------------------------------- /benchmarks/NodeClassification/configs/model_default.yaml: -------------------------------------------------------------------------------- 1 | num_layers: 2 2 | num_hidden: 8 3 | dropout: .6 4 | -------------------------------------------------------------------------------- /benchmarks/NodeClassification/configs/train_default.yaml: -------------------------------------------------------------------------------- 1 | loss_fun: cross_entropy 2 | self_loop: True 3 | to_dense: False 4 | lr: .01 5 | weight_decay: 0.001 6 | max_epoch: 10000 7 | early_stopping: True 8 | seed: 0 9 | batch_size: 256 10 | to_undirected: False 11 | optimizer: "Adam" 12 | -------------------------------------------------------------------------------- /benchmarks/NodeClassification/grid/grid_example.yaml: -------------------------------------------------------------------------------- 1 | num_hidden: [32 ,64] 2 | lr: [0.001, 0.005, 0.01, .1] 3 | dropout: [0.2, 0.4, 0.6, 0.8] 4 | weight_decay: [.0001, .001, .01, .1] 5 | -------------------------------------------------------------------------------- /benchmarks/NodeClassification/models/appnp.py: -------------------------------------------------------------------------------- 1 | """ 2 | APPNP model in GLI. 3 | 4 | References: 5 | https://github.com/dmlc/dgl/tree/master/examples/pytorch/appnp 6 | """ 7 | 8 | from torch import nn 9 | from dgl.nn.pytorch.conv import APPNPConv 10 | 11 | 12 | class APPNP(nn.Module): 13 | """APPNP network.""" 14 | 15 | def __init__(self, 16 | g, 17 | in_feats, 18 | hiddens, 19 | n_classes, 20 | activation, 21 | feat_drop, 22 | edge_drop, 23 | alpha, 24 | k): 25 | """Initiate model.""" 26 | super().__init__() 27 | self.g = g 28 | self.layers = nn.ModuleList() 29 | # input layer 30 | self.layers.append(nn.Linear(in_feats, hiddens[0])) 31 | # hidden layers 32 | for i in range(1, len(hiddens)): 33 | self.layers.append(nn.Linear(hiddens[i - 1], hiddens[i])) 34 | # output layer 35 | self.layers.append(nn.Linear(hiddens[-1], n_classes)) 36 | self.activation = activation 37 | if feat_drop: 38 | self.feat_drop = nn.Dropout(feat_drop) 39 | else: 40 | self.feat_drop = lambda x: x 41 | self.propagate = APPNPConv(k, alpha, edge_drop) 42 | self.reset_parameters() 43 | 44 | def reset_parameters(self): 45 | """Reset parameters.""" 46 | for layer in self.layers: 47 | layer.reset_parameters() 48 | 49 | def forward(self, features): 50 | """Forward.""" 51 | # prediction step 52 | h = features 53 | h = self.feat_drop(h) 54 | h = self.activation(self.layers[0](h)) 55 | for layer in self.layers[1:-1]: 56 | h = self.activation(layer(h)) 57 | h = self.layers[-1](self.feat_drop(h)) 58 | # propagation step 59 | h = self.propagate(self.g, h) 60 | return h 61 | -------------------------------------------------------------------------------- /benchmarks/NodeClassification/models/gat.py: -------------------------------------------------------------------------------- 1 | """ 2 | GAT model in GLI. 3 | 4 | References: 5 | https://github.com/dmlc/dgl/tree/master/examples/pytorch/gat 6 | """ 7 | 8 | from torch import nn 9 | from dgl.nn import GATConv 10 | 11 | 12 | class GAT(nn.Module): 13 | """GAT network.""" 14 | 15 | def __init__(self, 16 | g, 17 | num_layers, 18 | in_dim, 19 | num_hidden, 20 | num_classes, 21 | heads, 22 | activation, 23 | feat_drop, 24 | attn_drop, 25 | negative_slope, 26 | residual): 27 | """Initiate model.""" 28 | super().__init__() 29 | self.g = g 30 | self.num_layers = num_layers 31 | self.gat_layers = nn.ModuleList() 32 | self.activation = activation 33 | 34 | # input projection (no residual) 35 | self.gat_layers.append(GATConv( 36 | in_dim, num_hidden, heads[0], 37 | feat_drop, attn_drop, negative_slope, False, self.activation)) 38 | # hidden layers 39 | for layer in range(1, num_layers - 2): 40 | # due to multi-head, the in_dim = num_hidden * num_heads 41 | self.gat_layers.append(GATConv(num_hidden * heads[layer-1], 42 | num_hidden, heads[layer], 43 | feat_drop, attn_drop, 44 | negative_slope, residual, 45 | self.activation)) 46 | # output projection 47 | self.gat_layers.append(GATConv( 48 | num_hidden * heads[-2], num_classes, heads[-1], 49 | feat_drop, attn_drop, negative_slope, residual, None)) 50 | 51 | def forward(self, inputs): 52 | """Forward.""" 53 | h = inputs 54 | for layer in range(self.num_layers): 55 | h = self.gat_layers[layer](self.g, h) 56 | h = h.flatten(1) if layer != self.num_layers - 1 else h.mean(1) 57 | return h 58 | -------------------------------------------------------------------------------- /benchmarks/NodeClassification/models/gcn.py: -------------------------------------------------------------------------------- 1 | """ 2 | GCN model in GLI. 3 | 4 | References: 5 | https://github.com/dmlc/dgl/tree/master/examples/pytorch/gcn 6 | """ 7 | 8 | from torch import nn 9 | from dgl.nn.pytorch import GraphConv 10 | 11 | 12 | class GCN(nn.Module): 13 | """GCN network.""" 14 | 15 | def __init__(self, 16 | g, 17 | in_feats, 18 | n_hidden, 19 | n_classes, 20 | n_layers, 21 | activation, 22 | dropout): 23 | """Initiate model.""" 24 | super().__init__() 25 | self.g = g 26 | self.layers = nn.ModuleList() 27 | # input layer 28 | self.layers.append(GraphConv(in_feats, n_hidden, 29 | activation=activation)) 30 | # hidden layers 31 | for _ in range(n_layers - 2): 32 | self.layers.append(GraphConv(n_hidden, n_hidden, 33 | activation=activation)) 34 | # output layer 35 | self.layers.append(GraphConv(n_hidden, n_classes)) 36 | self.dropout = nn.Dropout(p=dropout) 37 | 38 | def forward(self, features): 39 | """Forward.""" 40 | h = features 41 | for i, layer in enumerate(self.layers): 42 | if i != 0: 43 | h = self.dropout(h) 44 | h = layer(self.g, h) 45 | return h 46 | -------------------------------------------------------------------------------- /benchmarks/NodeClassification/models/gcn_minibatch.py: -------------------------------------------------------------------------------- 1 | """ 2 | GCN model in GLI. 3 | 4 | References: 5 | https://github.com/dmlc/dgl/tree/master/examples/pytorch/gcn 6 | https://docs.dgl.ai/guide/minibatch-node.html?highlight=sampling 7 | """ 8 | 9 | from torch import nn 10 | from dgl.nn.pytorch import GraphConv 11 | 12 | 13 | class GCNminibatch(nn.Module): 14 | """GCN network.""" 15 | 16 | def __init__(self, 17 | in_feats, 18 | n_hidden, 19 | n_classes, 20 | n_layers, 21 | activation, 22 | dropout): 23 | """Initiate model.""" 24 | super().__init__() 25 | self.layers = nn.ModuleList() 26 | # input layer 27 | self.layers.append(GraphConv(in_feats, n_hidden, 28 | activation=activation, 29 | norm='none')) 30 | # hidden layers 31 | for _ in range(n_layers - 2): 32 | self.layers.append(GraphConv(n_hidden, n_hidden, 33 | activation=activation, 34 | norm='none')) 35 | # output layer 36 | self.layers.append(GraphConv(n_hidden, n_classes, 37 | norm='none')) 38 | self.dropout = nn.Dropout(p=dropout) 39 | 40 | def forward(self, blocks, features): 41 | """Forward.""" 42 | h = features 43 | for i, layer in enumerate(self.layers): 44 | if i != 0: 45 | h = self.dropout(h) 46 | h = layer(blocks[i], h) 47 | return h 48 | -------------------------------------------------------------------------------- /benchmarks/NodeClassification/models/graph_sage.py: -------------------------------------------------------------------------------- 1 | """ 2 | GraphSAGE model in GLI. 3 | 4 | References: 5 | https://github.com/dmlc/dgl/blob/master/examples/pytorch/graphsage/train_full.py 6 | """ 7 | 8 | from torch import nn 9 | from dgl.nn.pytorch.conv import SAGEConv 10 | 11 | 12 | class GraphSAGE(nn.Module): 13 | """GraphSAGE model.""" 14 | 15 | def __init__(self, 16 | g, 17 | in_feats, 18 | n_hidden, 19 | n_classes, 20 | n_layers, 21 | activation, 22 | dropout, 23 | aggregator_type): 24 | """Initiate model.""" 25 | super().__init__() 26 | self.g = g 27 | self.layers = nn.ModuleList() 28 | self.dropout = nn.Dropout(dropout) 29 | self.activation = activation 30 | 31 | # input layer 32 | self.layers.append(SAGEConv(in_feats, n_hidden, aggregator_type)) 33 | # hidden layers 34 | for _ in range(n_layers - 2): 35 | self.layers.append(SAGEConv(n_hidden, n_hidden, aggregator_type)) 36 | # output layer 37 | self.layers.append(SAGEConv(n_hidden, n_classes, aggregator_type)) 38 | 39 | def forward(self, inputs): 40 | """Forward.""" 41 | h = self.dropout(inputs) 42 | for length, layer in enumerate(self.layers): 43 | h = layer(self.g, h) 44 | if length != len(self.layers) - 1: 45 | h = self.activation(h) 46 | h = self.dropout(h) 47 | return h 48 | -------------------------------------------------------------------------------- /benchmarks/NodeClassification/models/graph_sage_minibatch.py: -------------------------------------------------------------------------------- 1 | """ 2 | GraphSAGE model in GLI. 3 | 4 | References: 5 | https://github.com/dmlc/dgl/blob/master/examples/pytorch/graphsage/train_full.py 6 | https://docs.dgl.ai/guide/minibatch-node.html?highlight=sampling 7 | """ 8 | 9 | from torch import nn 10 | from dgl.nn.pytorch.conv import SAGEConv 11 | 12 | 13 | class GraphSAGEminibatch(nn.Module): 14 | """GraphSAGE model.""" 15 | 16 | def __init__(self, 17 | in_feats, 18 | n_hidden, 19 | n_classes, 20 | n_layers, 21 | activation, 22 | dropout, 23 | aggregator_type): 24 | """Initiate model.""" 25 | super().__init__() 26 | self.layers = nn.ModuleList() 27 | self.dropout = nn.Dropout(dropout) 28 | self.activation = activation 29 | 30 | # input layer 31 | self.layers.append(SAGEConv(in_feats, n_hidden, 32 | aggregator_type)) 33 | # hidden layers 34 | for _ in range(n_layers - 2): 35 | self.layers.append(SAGEConv(n_hidden, n_hidden, 36 | aggregator_type)) 37 | # output layer 38 | self.layers.append(SAGEConv(n_hidden, n_classes, 39 | aggregator_type)) 40 | 41 | def forward(self, blocks, inputs): 42 | """Forward.""" 43 | h = self.dropout(inputs) 44 | for length, layer in enumerate(self.layers): 45 | h = layer(blocks[length], h) 46 | if length != len(self.layers) - 1: 47 | h = self.activation(h) 48 | h = self.dropout(h) 49 | return h 50 | -------------------------------------------------------------------------------- /benchmarks/NodeClassification/models/mlp.py: -------------------------------------------------------------------------------- 1 | """ 2 | MLP model in GLI. 3 | 4 | References: 5 | https://github.com/dmlc/dgl/blob/195f99362d883f8b6d131b70a7868a537e55b786/examples/pytorch/grand/model.py 6 | """ 7 | 8 | from torch import nn 9 | 10 | 11 | class MLP(nn.Module): 12 | """MLP network.""" 13 | 14 | def __init__(self, 15 | in_feats, 16 | n_hidden, 17 | n_classes, 18 | n_layers, 19 | activation, 20 | dropout): 21 | """Initiate model.""" 22 | super().__init__() 23 | self.layers = nn.ModuleList() 24 | self.activation = activation 25 | # input layer 26 | self.layers.append(nn.Linear(in_feats, n_hidden, bias=True)) 27 | 28 | # hidden layers 29 | for _ in range(n_layers - 1): 30 | self.layers.append(nn.Linear(n_hidden, n_hidden, bias=True)) 31 | 32 | # output layer 33 | self.layers.append(nn.Linear(n_hidden, n_classes, bias=True)) 34 | 35 | self.dropout = nn.Dropout(dropout) 36 | 37 | def forward(self, features): 38 | """Forward.""" 39 | h = features 40 | for i, layer in enumerate(self.layers): 41 | if i != 0: 42 | h = self.dropout(h) 43 | h = layer(h) 44 | h = self.activation(h) 45 | return h 46 | -------------------------------------------------------------------------------- /benchmarks/NodeClassification/models/monet.py: -------------------------------------------------------------------------------- 1 | """ 2 | GAT model in GLI. 3 | 4 | References: 5 | https://github.com/dmlc/dgl/blob/master/examples/pytorch/monet/citation.py 6 | """ 7 | 8 | import torch 9 | from torch import nn 10 | from dgl.nn.pytorch.conv import GMMConv 11 | 12 | 13 | class MoNet(nn.Module): 14 | """Monet model.""" 15 | 16 | def __init__(self, 17 | g, 18 | in_feats, 19 | n_hidden, 20 | out_feats, 21 | n_layers, 22 | dim, 23 | n_kernels, 24 | dropout): 25 | """Initiate model.""" 26 | super().__init__() 27 | self.g = g 28 | self.layers = nn.ModuleList() 29 | self.pseudo_proj = nn.ModuleList() 30 | 31 | # process pseudo 32 | us, vs = g.edges(order="eid") 33 | udeg, vdeg = 1 / torch.sqrt(g.in_degrees(us).float()), 1 / \ 34 | torch.sqrt(g.in_degrees(vs).float()) 35 | self.pseudo = torch.cat([udeg.unsqueeze(1), vdeg.unsqueeze(1)], dim=1) 36 | 37 | # Input layer 38 | self.layers.append( 39 | GMMConv(in_feats, n_hidden, dim, n_kernels)) 40 | self.pseudo_proj.append( 41 | nn.Sequential(nn.Linear(2, dim), nn.Tanh())) 42 | 43 | # Hidden layer 44 | for _ in range(n_layers - 2): 45 | self.layers.append(GMMConv(n_hidden, n_hidden, dim, n_kernels)) 46 | self.pseudo_proj.append( 47 | nn.Sequential(nn.Linear(2, dim), nn.Tanh())) 48 | 49 | # Output layer 50 | self.layers.append(GMMConv(n_hidden, out_feats, dim, n_kernels)) 51 | self.pseudo_proj.append( 52 | nn.Sequential(nn.Linear(2, dim), nn.Tanh())) 53 | self.dropout = nn.Dropout(dropout) 54 | 55 | def forward(self, feat): 56 | """Forward.""" 57 | h = feat 58 | for i in range(len(self.layers)): 59 | if i != 0: 60 | h = self.dropout(h) 61 | h = self.layers[i]( 62 | self.g, h, self.pseudo_proj[i](self.pseudo)) 63 | return h 64 | -------------------------------------------------------------------------------- /benchmarks/NodeClassification/models/sgc.py: -------------------------------------------------------------------------------- 1 | """ 2 | SGConv model in GLI. 3 | 4 | References: 5 | https://docs.dgl.ai/generated/dgl.nn.pytorch.conv.SGConv.html 6 | https://github.com/dmlc/dgl/blob/master/examples/pytorch/sgc/sgc.py 7 | """ 8 | 9 | from torch import nn 10 | from dgl.nn.pytorch import SGConv 11 | 12 | 13 | class SGC(nn.Module): 14 | """SGC network.""" 15 | 16 | def __init__(self, 17 | g, 18 | in_feats, 19 | n_classes, 20 | k) -> None: 21 | """Initiate model.""" 22 | super().__init__() 23 | self.g = g 24 | self.layer = SGConv(in_feats, n_classes, k) 25 | 26 | def forward(self, features): 27 | """Forward.""" 28 | h = features 29 | h = self.layer(self.g, h) 30 | return h 31 | -------------------------------------------------------------------------------- /benchmarks/NodeClassification/models/tagcn.py: -------------------------------------------------------------------------------- 1 | """ 2 | TAGCN model in GLI. 3 | 4 | References: 5 | https://docs.dgl.ai/generated/dgl.nn.pytorch.conv.TAGConv.html 6 | """ 7 | 8 | from dgl.nn.pytorch.conv import TAGConv 9 | from torch import nn 10 | 11 | 12 | class TAGCN(nn.Module): 13 | """TAGCN network.""" 14 | 15 | def __init__( 16 | self, 17 | g, 18 | in_feats, 19 | n_hidden, 20 | n_classes, 21 | n_layers, 22 | k, 23 | activation, 24 | dropout 25 | ): 26 | """Initiate model.""" 27 | super().__init__() 28 | self.g = g 29 | self.layers = nn.ModuleList() 30 | # input layer 31 | self.layers.append(TAGConv(in_feats, 32 | n_hidden, 33 | k=k, 34 | activation=activation)) 35 | # hidden layers 36 | for _ in range(n_layers - 1): 37 | self.layers.append( 38 | TAGConv(n_hidden, n_hidden, activation=activation) 39 | ) 40 | # output layer 41 | self.layers.append(TAGConv(n_hidden, n_classes)) # activation=None 42 | self.dropout = nn.Dropout(p=dropout) 43 | 44 | def forward(self, features): 45 | """Forward.""" 46 | h = features 47 | for i, layer in enumerate(self.layers): 48 | if i != 0: 49 | h = self.dropout(h) 50 | h = layer(self.g, h) 51 | return h 52 | -------------------------------------------------------------------------------- /datasets/FB13/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Graph-Learning-Benchmarks/gli/8f2065396d59e6b4aaa371e997c3a43f91448429/datasets/FB13/LICENSE -------------------------------------------------------------------------------- /datasets/FB13/task_kg_entity_prediction_1.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "The task is to predict a valid entity given (head, relation) or (relation, tail) that form a fact triple.", 3 | "type": "KGEntityPrediction", 4 | "feature": null, 5 | "train_triplet_set": { 6 | "file": "FB13.npz", 7 | "key": "TrainEdge_id" 8 | }, 9 | "val_triplet_set": { 10 | "file": "FB13.npz", 11 | "key": "ValidEdge_id" 12 | }, 13 | "test_triplet_set": { 14 | "file": "FB13.npz", 15 | "key": "TestEdge_id" 16 | }, 17 | "num_relations": 13 18 | } 19 | -------------------------------------------------------------------------------- /datasets/FB13/task_kg_relation_prediction_1.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "The task is to predict a valid relation given (head, tail) that form a fact triple.", 3 | "type": "KGRelationPrediction", 4 | "feature": null, 5 | "train_triplet_set": { 6 | "file": "FB13.npz", 7 | "key": "TrainEdge_id" 8 | }, 9 | "val_triplet_set": { 10 | "file": "FB13.npz", 11 | "key": "ValidEdge_id" 12 | }, 13 | "test_triplet_set": { 14 | "file": "FB13.npz", 15 | "key": "TestEdge_id" 16 | }, 17 | "num_relations": 13, 18 | "target": "Edge/EdgeClass" 19 | } 20 | -------------------------------------------------------------------------------- /datasets/FB15K/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Graph-Learning-Benchmarks/gli/8f2065396d59e6b4aaa371e997c3a43f91448429/datasets/FB15K/LICENSE -------------------------------------------------------------------------------- /datasets/FB15K/task_kg_entity_prediction_1.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "The task is to predict a valid entity given (head, relation) or (relation, tail) that form a fact triple.", 3 | "type": "KGEntityPrediction", 4 | "feature": null, 5 | "train_triplet_set": { 6 | "file": "FB15K.npz", 7 | "key": "TrainEdge_id" 8 | }, 9 | "val_triplet_set": { 10 | "file": "FB15K.npz", 11 | "key": "ValidEdge_id" 12 | }, 13 | "test_triplet_set": { 14 | "file": "FB15K.npz", 15 | "key": "TestEdge_id" 16 | }, 17 | "num_relations": 1345 18 | } 19 | -------------------------------------------------------------------------------- /datasets/FB15K/task_kg_relation_prediction_1.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "The task is to predict a valid relation given (head, tail) that form a fact triple.", 3 | "type": "KGRelationPrediction", 4 | "feature": null, 5 | "train_triplet_set": { 6 | "file": "FB15K.npz", 7 | "key": "TrainEdge_id" 8 | }, 9 | "val_triplet_set": { 10 | "file": "FB15K.npz", 11 | "key": "ValidEdge_id" 12 | }, 13 | "test_triplet_set": { 14 | "file": "FB15K.npz", 15 | "key": "TestEdge_id" 16 | }, 17 | "num_relations": 1345, 18 | "target": "Edge/EdgeClass" 19 | } 20 | -------------------------------------------------------------------------------- /datasets/FB15K237/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Graph-Learning-Benchmarks/gli/8f2065396d59e6b4aaa371e997c3a43f91448429/datasets/FB15K237/LICENSE -------------------------------------------------------------------------------- /datasets/FB15K237/task_kg_entity_prediction_1.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "The task is to predict a valid entity given (head, relation) or (relation, tail) that form a fact triple.", 3 | "type": "KGEntityPrediction", 4 | "feature": null, 5 | "train_triplet_set": { 6 | "file": "FB15K237.npz", 7 | "key": "TrainEdge_id" 8 | }, 9 | "val_triplet_set": { 10 | "file": "FB15K237.npz", 11 | "key": "ValidEdge_id" 12 | }, 13 | "test_triplet_set": { 14 | "file": "FB15K237.npz", 15 | "key": "TestEdge_id" 16 | }, 17 | "num_relations": 237 18 | } 19 | -------------------------------------------------------------------------------- /datasets/FB15K237/task_kg_relation_prediction_1.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "The task is to predict a valid relation given (head, tail) that form a fact triple.", 3 | "type": "KGRelationPrediction", 4 | "feature": null, 5 | "train_triplet_set": { 6 | "file": "FB15K237.npz", 7 | "key": "TrainEdge_id" 8 | }, 9 | "val_triplet_set": { 10 | "file": "FB15K237.npz", 11 | "key": "ValidEdge_id" 12 | }, 13 | "test_triplet_set": { 14 | "file": "FB15K237.npz", 15 | "key": "TestEdge_id" 16 | }, 17 | "num_relations": 237, 18 | "target": "Edge/EdgeClass" 19 | } 20 | -------------------------------------------------------------------------------- /datasets/KGMicrobe/LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Copyright (c) 2020, Lawrence Berkeley National Laboratory 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | * Redistributions of source code must retain the above copyright notice, this 9 | list of conditions and the following disclaimer. 10 | 11 | * Redistributions in binary form must reproduce the above copyright notice, 12 | this list of conditions and the following disclaimer in the documentation 13 | and/or other materials provided with the distribution. 14 | 15 | * Neither the name of the copyright holder nor the names of its 16 | contributors may be used to endorse or promote products derived from 17 | this software without specific prior written permission. 18 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 23 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /datasets/KGMicrobe/README.md: -------------------------------------------------------------------------------- 1 | # KGMicrobe 2 | 3 | ## Dataset Description 4 | 5 | KG-Microbe is a microbe-centric Knowledge Graph (KG) to support tasks such as querying and graph link prediction in many use cases 6 | including microbiology, biomedicine, and the environment. 7 | 8 | Statistics: 9 | - Nodes: 2715139 10 | - Edges: 5937358 11 | 12 | #### Citation 13 | - Original Source 14 | - [Website](https://github.com/Knowledge-Graph-Hub/kg-microbe) 15 | ``` 16 | @inproceedings{reese2021kg, 17 | title={KG-COVID-19: a framework to produce customized knowledge graphs for COVID-19 response}, 18 | author={Reese, Justin T and Unni, Deepak and Callahan, Tiffany J and Cappelletti, Luca and Ravanmehr, Vida and Carbon, Seth and Shefchek, Kent A and Good, Benjamin M and Balhoff, James P and Fontana, Tommaso and others}, 19 | journal={Patterns}, 20 | volume={2}, 21 | number={1}, 22 | pages={100155}, 23 | year={2021}, 24 | publisher={Elsevier} 25 | } 26 | ``` 27 | 28 | ## Preprocessing 29 | 30 | The data files in GLI format are transformed from the [Grape](https://github.com/AnacletoLAB/grape/blob/main/tutorials/Ensmallen_Automatic_graph_retrieval_utilities.ipynb) implementation. 31 | 32 | ### Requirements 33 | 34 | The preprocessing code requires the following packages. 35 | 36 | ``` 37 | scipy==1.7.1 38 | ``` 39 | -------------------------------------------------------------------------------- /datasets/KGMicrobe/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "KGMicrobe dataset.", 3 | "data": { 4 | "Node": { 5 | "NodeName": { 6 | "description": "Node (entity) names in KGMicrobe dataset, strings.", 7 | "type": "string", 8 | "format": "Tensor", 9 | "file": "KGMicrobe.npz", 10 | "key": "node_name" 11 | }, 12 | "NodeLabel": { 13 | "description": "Node labels of KGMicrobe dataset, int ranged from 0 to 10.", 14 | "type": "int", 15 | "format": "Tensor", 16 | "file": "KGMicrobe.npz", 17 | "key": "node_class" 18 | } 19 | }, 20 | "Edge": { 21 | "_Edge": { 22 | "file": "KGMicrobe.npz", 23 | "key": "edge" 24 | }, 25 | "EdgeClass": { 26 | "description": "Relation type-id of the Edge in the KGMicrobe dataset, int ranged from 0 to 8.", 27 | "type": "int", 28 | "format": "Tensor", 29 | "file": "KGMicrobe.npz", 30 | "key": "edge_class" 31 | } 32 | }, 33 | "Graph": { 34 | "_NodeList": { 35 | "file": "KGMicrobe.npz", 36 | "key": "node_list" 37 | }, 38 | "_EdgeList": { 39 | "file": "KGMicrobe.npz", 40 | "key": "edge_list" 41 | } 42 | } 43 | }, 44 | "citation": "@inproceedings{reese2021kg,\ntitle={KG-COVID-19: a framework to produce customized knowledge graphs for COVID-19 response},\nauthor={Reese, Justin T and Unni, Deepak and Callahan, Tiffany J and Cappelletti, Luca and Ravanmehr, Vida and Carbon, Seth and Shefchek, Kent A and Good, Benjamin M and Balhoff, James P and Fontana, Tommaso and others},\njournal={Patterns},\nvolume={2},\nnumber={1},\npages={100155},\nyear={2021},\npublisher={Elsevier}\n}", 45 | "is_heterogeneous": false 46 | } 47 | -------------------------------------------------------------------------------- /datasets/NELL-995/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Graph-Learning-Benchmarks/gli/8f2065396d59e6b4aaa371e997c3a43f91448429/datasets/NELL-995/LICENSE -------------------------------------------------------------------------------- /datasets/NELL-995/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "NELL-995 dataset.", 3 | "data": { 4 | "Node": { 5 | "NodeName": { 6 | "description": "Node (entity) names in NELL-995 dataset, strings.", 7 | "type": "string", 8 | "format": "Tensor", 9 | "file": "NELL-995.npz", 10 | "key": "node_name" 11 | } 12 | }, 13 | "Edge": { 14 | "_Edge": { 15 | "file": "NELL-995.npz", 16 | "key": "edge" 17 | }, 18 | "EdgeClass": { 19 | "description": "Relation type-id of the Edge in the NELL-995 dataset.", 20 | "type": "int", 21 | "format": "Tensor", 22 | "file": "NELL-995.npz", 23 | "key": "edge_class" 24 | }, 25 | "EdgeName": { 26 | "description": "Relation name of the Edge in the NELL-995 dataset.", 27 | "type": "string", 28 | "format": "Tensor", 29 | "file": "NELL-995.npz", 30 | "key": "edge_name" 31 | } 32 | }, 33 | "Graph": { 34 | "_NodeList": { 35 | "file": "NELL-995.npz", 36 | "key": "node_list" 37 | }, 38 | "_EdgeList": { 39 | "file": "NELL-995.npz", 40 | "key": "edge_list" 41 | } 42 | } 43 | }, 44 | "citation": "@article{xiong2017deeppath,\ntitle={Deeppath: A reinforcement learning method for knowledge graph reasoning},\nauthor={Xiong, Wenhan and Hoang, Thien and Wang, William Yang},\njournal={arXiv preprint arXiv:1707.06690},\nyear={2017}\n}", 45 | "is_heterogeneous": false 46 | } 47 | -------------------------------------------------------------------------------- /datasets/NELL-995/task_kg_entity_prediction_1.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "The task is to predict a valid entity given (head, relation) or (relation, tail) that form a fact triple.", 3 | "type": "KGEntityPrediction", 4 | "feature": null, 5 | "train_triplet_set": { 6 | "file": "NELL-995.npz", 7 | "key": "TrainEdge_id" 8 | }, 9 | "val_triplet_set": { 10 | "file": "NELL-995.npz", 11 | "key": "ValidEdge_id" 12 | }, 13 | "test_triplet_set": { 14 | "file": "NELL-995.npz", 15 | "key": "TestEdge_id" 16 | }, 17 | "num_relations": 200 18 | } 19 | -------------------------------------------------------------------------------- /datasets/NELL-995/task_kg_relation_prediction_1.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "The task is to predict a valid relation given (head, tail) that form a fact triple.", 3 | "type": "KGRelationPrediction", 4 | "feature": null, 5 | "train_triplet_set": { 6 | "file": "NELL-995.npz", 7 | "key": "TrainEdge_id" 8 | }, 9 | "val_triplet_set": { 10 | "file": "NELL-995.npz", 11 | "key": "ValidEdge_id" 12 | }, 13 | "test_triplet_set": { 14 | "file": "NELL-995.npz", 15 | "key": "TestEdge_id" 16 | }, 17 | "num_relations": 200, 18 | "target": "Edge/EdgeClass" 19 | } 20 | -------------------------------------------------------------------------------- /datasets/WN11/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Graph-Learning-Benchmarks/gli/8f2065396d59e6b4aaa371e997c3a43f91448429/datasets/WN11/LICENSE -------------------------------------------------------------------------------- /datasets/WN11/task_kg_entity_prediction_1.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "The task is to predict a valid entity given (head, relation) or (relation, tail) that form a fact triple.", 3 | "type": "KGEntityPrediction", 4 | "feature": null, 5 | "train_triplet_set": { 6 | "file": "WN11.npz", 7 | "key": "TrainEdge_id" 8 | }, 9 | "val_triplet_set": { 10 | "file": "WN11.npz", 11 | "key": "ValidEdge_id" 12 | }, 13 | "test_triplet_set": { 14 | "file": "WN11.npz", 15 | "key": "TestEdge_id" 16 | }, 17 | "num_relations": 11 18 | } 19 | -------------------------------------------------------------------------------- /datasets/WN11/task_kg_relation_prediction_1.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "The task is to predict a valid relation given (head, tail) that form a fact triple.", 3 | "type": "KGRelationPrediction", 4 | "feature": null, 5 | "train_triplet_set": { 6 | "file": "WN11.npz", 7 | "key": "TrainEdge_id" 8 | }, 9 | "val_triplet_set": { 10 | "file": "WN11.npz", 11 | "key": "ValidEdge_id" 12 | }, 13 | "test_triplet_set": { 14 | "file": "WN11.npz", 15 | "key": "TestEdge_id" 16 | }, 17 | "num_relations": 11, 18 | "target": "Edge/EdgeClass" 19 | } 20 | -------------------------------------------------------------------------------- /datasets/WN18/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Graph-Learning-Benchmarks/gli/8f2065396d59e6b4aaa371e997c3a43f91448429/datasets/WN18/LICENSE -------------------------------------------------------------------------------- /datasets/WN18/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "WN18 dataset.", 3 | "data": { 4 | "Node": { 5 | "NodeName": { 6 | "description": "Node (entity) names in WN18 dataset, strings.", 7 | "type": "string", 8 | "format": "Tensor", 9 | "file": "WN18.npz", 10 | "key": "node_name" 11 | } 12 | }, 13 | "Edge": { 14 | "_Edge": { 15 | "file": "WN18.npz", 16 | "key": "edge" 17 | }, 18 | "EdgeClass": { 19 | "description": "Relation type-id of the Edge in the WN18 dataset.", 20 | "type": "int", 21 | "format": "Tensor", 22 | "file": "WN18.npz", 23 | "key": "edge_class" 24 | }, 25 | "EdgeName": { 26 | "description": "Relation name of the Edge in the WN18 dataset.", 27 | "type": "string", 28 | "format": "Tensor", 29 | "file": "WN18.npz", 30 | "key": "edge_name" 31 | } 32 | }, 33 | "Graph": { 34 | "_NodeList": { 35 | "file": "WN18.npz", 36 | "key": "node_list" 37 | }, 38 | "_EdgeList": { 39 | "file": "WN18.npz", 40 | "key": "edge_list" 41 | } 42 | } 43 | }, 44 | "citation": "@article{bordes2013translating,\ntitle={Translating embeddings for modeling multi-relational data},\nauthor={Bordes, Antoine and Usunier, Nicolas and Garcia-Duran, Alberto and Weston, Jason and Yakhnenko, Oksana},\njournal={Advances in neural information processing systems},\nvolume={26},\nyear={2013}\n}", 45 | "is_heterogeneous": false 46 | } 47 | -------------------------------------------------------------------------------- /datasets/WN18/task_kg_entity_prediction_1.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "The task is to predict a valid entity given (head, relation) or (relation, tail) that form a fact triple.", 3 | "type": "KGEntityPrediction", 4 | "feature": null, 5 | "train_triplet_set": { 6 | "file": "WN18.npz", 7 | "key": "TrainEdge_id" 8 | }, 9 | "val_triplet_set": { 10 | "file": "WN18.npz", 11 | "key": "ValidEdge_id" 12 | }, 13 | "test_triplet_set": { 14 | "file": "WN18.npz", 15 | "key": "TestEdge_id" 16 | }, 17 | "num_relations": 18 18 | } 19 | -------------------------------------------------------------------------------- /datasets/WN18/task_kg_relation_prediction_1.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "The task is to predict a valid relation given (head, tail) that form a fact triple.", 3 | "type": "KGRelationPrediction", 4 | "feature": null, 5 | "train_triplet_set": { 6 | "file": "WN18.npz", 7 | "key": "TrainEdge_id" 8 | }, 9 | "val_triplet_set": { 10 | "file": "WN18.npz", 11 | "key": "ValidEdge_id" 12 | }, 13 | "test_triplet_set": { 14 | "file": "WN18.npz", 15 | "key": "TestEdge_id" 16 | }, 17 | "num_relations": 18, 18 | "target": "Edge/EdgeClass" 19 | } 20 | -------------------------------------------------------------------------------- /datasets/WN18RR/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Graph-Learning-Benchmarks/gli/8f2065396d59e6b4aaa371e997c3a43f91448429/datasets/WN18RR/LICENSE -------------------------------------------------------------------------------- /datasets/WN18RR/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "WN18RR dataset.", 3 | "data": { 4 | "Node": { 5 | "NodeName": { 6 | "description": "Node (entity) names in WN18RR dataset, strings.", 7 | "type": "string", 8 | "format": "Tensor", 9 | "file": "WN18RR.npz", 10 | "key": "node_name" 11 | } 12 | }, 13 | "Edge": { 14 | "_Edge": { 15 | "file": "WN18RR.npz", 16 | "key": "edge" 17 | }, 18 | "EdgeClass": { 19 | "description": "Relation type-id of the Edge in the WN18RR dataset.", 20 | "type": "int", 21 | "format": "Tensor", 22 | "file": "WN18RR.npz", 23 | "key": "edge_class" 24 | }, 25 | "EdgeName": { 26 | "description": "Relation name of the Edge in the WN18RR dataset.", 27 | "type": "string", 28 | "format": "Tensor", 29 | "file": "WN18RR.npz", 30 | "key": "edge_name" 31 | } 32 | }, 33 | "Graph": { 34 | "_NodeList": { 35 | "file": "WN18RR.npz", 36 | "key": "node_list" 37 | }, 38 | "_EdgeList": { 39 | "file": "WN18RR.npz", 40 | "key": "edge_list" 41 | } 42 | } 43 | }, 44 | "citation": "@inproceedings{dettmers2018convolutional,\ntitle={Convolutional 2d knowledge graph embeddings},\nauthor={Dettmers, Tim and Minervini, Pasquale and Stenetorp, Pontus and Riedel, Sebastian},\nbooktitle={Proceedings of the AAAI Conference on Artificial Intelligence},\nvolume={32},\nnumber={1},\nyear={2018}\n}", 45 | "is_heterogeneous": false 46 | } 47 | -------------------------------------------------------------------------------- /datasets/WN18RR/task_kg_entity_prediction_1.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "The task is to predict a valid entity given (head, relation) or (relation, tail) that form a fact triple.", 3 | "type": "KGEntityPrediction", 4 | "feature": null, 5 | "train_triplet_set": { 6 | "file": "WN18RR.npz", 7 | "key": "TrainEdge_id" 8 | }, 9 | "val_triplet_set": { 10 | "file": "WN18RR.npz", 11 | "key": "ValidEdge_id" 12 | }, 13 | "test_triplet_set": { 14 | "file": "WN18RR.npz", 15 | "key": "TestEdge_id" 16 | }, 17 | "num_relations": 11 18 | } 19 | -------------------------------------------------------------------------------- /datasets/WN18RR/task_kg_relation_prediction_1.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "The task is to predict a valid relation given (head, tail) that form a fact triple.", 3 | "type": "KGRelationPrediction", 4 | "feature": null, 5 | "train_triplet_set": { 6 | "file": "WN18RR.npz", 7 | "key": "TrainEdge_id" 8 | }, 9 | "val_triplet_set": { 10 | "file": "WN18RR.npz", 11 | "key": "ValidEdge_id" 12 | }, 13 | "test_triplet_set": { 14 | "file": "WN18RR.npz", 15 | "key": "TestEdge_id" 16 | }, 17 | "num_relations": 11, 18 | "target": "Edge/EdgeClass" 19 | } 20 | -------------------------------------------------------------------------------- /datasets/YAGO3-10/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Graph-Learning-Benchmarks/gli/8f2065396d59e6b4aaa371e997c3a43f91448429/datasets/YAGO3-10/LICENSE -------------------------------------------------------------------------------- /datasets/YAGO3-10/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "YAGO3-10 dataset.", 3 | "data": { 4 | "Node": { 5 | "NodeName": { 6 | "description": "Node (entity) names in YAGO3-10 dataset, strings.", 7 | "type": "string", 8 | "format": "Tensor", 9 | "file": "YAGO3-10.npz", 10 | "key": "node_name" 11 | } 12 | }, 13 | "Edge": { 14 | "_Edge": { 15 | "file": "YAGO3-10.npz", 16 | "key": "edge" 17 | }, 18 | "EdgeClass": { 19 | "description": "Relation type-id of the Edge in the YAGO3-10 dataset.", 20 | "type": "int", 21 | "format": "Tensor", 22 | "file": "YAGO3-10.npz", 23 | "key": "edge_class" 24 | }, 25 | "EdgeName": { 26 | "description": "Relation name of the Edge in the YAGO3-10 dataset.", 27 | "type": "string", 28 | "format": "Tensor", 29 | "file": "YAGO3-10.npz", 30 | "key": "edge_name" 31 | } 32 | }, 33 | "Graph": { 34 | "_NodeList": { 35 | "file": "YAGO3-10.npz", 36 | "key": "node_list" 37 | }, 38 | "_EdgeList": { 39 | "file": "YAGO3-10.npz", 40 | "key": "edge_list" 41 | } 42 | } 43 | }, 44 | "citation": "@inproceedings{dettmers2018convolutional,\ntitle={Convolutional 2d knowledge graph embeddings},\nauthor={Dettmers, Tim and Minervini, Pasquale and Stenetorp, Pontus and Riedel, Sebastian},\nbooktitle={Proceedings of the AAAI Conference on Artificial Intelligence},\nvolume={32},\nnumber={1},\nyear={2018}\n}", 45 | "is_heterogeneous": false 46 | } 47 | -------------------------------------------------------------------------------- /datasets/YAGO3-10/task_kg_entity_prediction_1.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "The task is to predict a valid entity given (head, relation) or (relation, tail) that form a fact triple.", 3 | "type": "KGEntityPrediction", 4 | "feature": null, 5 | "train_triplet_set": { 6 | "file": "YAGO3-10.npz", 7 | "key": "TrainEdge_id" 8 | }, 9 | "val_triplet_set": { 10 | "file": "YAGO3-10.npz", 11 | "key": "ValidEdge_id" 12 | }, 13 | "test_triplet_set": { 14 | "file": "YAGO3-10.npz", 15 | "key": "TestEdge_id" 16 | }, 17 | "num_relations": 37 18 | } 19 | -------------------------------------------------------------------------------- /datasets/YAGO3-10/task_kg_relation_prediction_1.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "The task is to predict a valid relation given (head, tail) that form a fact triple.", 3 | "type": "KGRelationPrediction", 4 | "feature": null, 5 | "train_triplet_set": { 6 | "file": "YAGO3-10.npz", 7 | "key": "TrainEdge_id" 8 | }, 9 | "val_triplet_set": { 10 | "file": "YAGO3-10.npz", 11 | "key": "ValidEdge_id" 12 | }, 13 | "test_triplet_set": { 14 | "file": "YAGO3-10.npz", 15 | "key": "TestEdge_id" 16 | }, 17 | "num_relations": 37, 18 | "target": "Edge/EdgeClass" 19 | } 20 | -------------------------------------------------------------------------------- /datasets/actor/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Graph-Learning-Benchmarks/gli/8f2065396d59e6b4aaa371e997c3a43f91448429/datasets/actor/LICENSE -------------------------------------------------------------------------------- /datasets/actor/README.md: -------------------------------------------------------------------------------- 1 | # Actor 2 | 3 | ## Dataset Description 4 | This dataset is the actor-only induced subgraph of the film-director-actor-writer network (Tang et al., 2009). Each nodes correspond to an actor, and the edge between two nodes denotes co-occurrence on the same Wikipedia page. Node features correspond to some keywords in the Wikipedia pages. The nodes are classified into five categories. 5 | 6 | 7 | Statistics: 8 | - Nodes: 7600 9 | - Edges: 30019 10 | - Number of Classes: 5 11 | 12 | #### Citation 13 | - Original Source 14 | + [Website](https://www.aminer.org/lab-datasets/soinf/) 15 | + LICENSE: missing 16 | ``` 17 | @inproceedings{tang2009social, 18 | title={Social influence analysis in large-scale networks}, 19 | author={Tang, Jie and Sun, Jimeng and Wang, Chi and Yang, Zi}, 20 | booktitle={Proceedings of the 15th ACM SIGKDD international conference on Knowledge discovery and mining}, 21 | pages={807--816}, 22 | year={2009} 23 | } 24 | ``` 25 | - Current Version 26 | + [Website](https://github.com/graphdml-uiuc-jlu/geom-gcn) 27 | + LICENSE: missing 28 | ``` 29 | @article{pei2020geom, 30 | title={Geom-gcn: Geometric graph convolutional networks}, 31 | author={Pei, Hongbin and Wei, Bingzhe and Chang, Kevin Chen-Chuan and Lei, Yu and Yang, Bo}, 32 | journal={arXiv preprint arXiv:2002.05287}, 33 | year={2020} 34 | } 35 | ``` 36 | 37 | ## Available Tasks 38 | 39 | ### Actor 40 | 41 | - Task type: `NodeClassification` 42 | 43 | 44 | #### Citation 45 | 46 | ``` 47 | @article{pei2020geom, 48 | title={Geom-gcn: Geometric graph convolutional networks}, 49 | author={Pei, Hongbin and Wei, Bingzhe and Chang, Kevin Chen-Chuan and Lei, Yu and Yang, Bo}, 50 | journal={arXiv preprint arXiv:2002.05287}, 51 | year={2020} 52 | } 53 | ``` 54 | 55 | ## Preprocessing 56 | The data files and task config file in GLI format are transformed from the [torch_geometric.datasets](https://pytorch-geometric.readthedocs.io/en/latest/modules/datasets.html). Check `actor.ipynb` for the preprocessing. 57 | 58 | 59 | ### Requirements 60 | 61 | The preprocessing code requires the following packages. 62 | 63 | ``` 64 | numpy==1.22.3 65 | scipy==1.7.3 66 | torch==1.11.0 67 | torch_geometric==2.0.4 68 | ``` 69 | -------------------------------------------------------------------------------- /datasets/actor/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "Actor dataset.", 3 | "data": { 4 | "Node": { 5 | "NodeFeature": { 6 | "description": "Node features of Actor dataset, 1/0-valued vectors.", 7 | "type": "int", 8 | "format": "SparseTensor", 9 | "file": "actor_node_feats.sparse.npz" 10 | }, 11 | "NodeLabel": { 12 | "description": "Node labels of Actor dataset, int ranged from 0 to 4.", 13 | "type": "int", 14 | "format": "Tensor", 15 | "file": "actor.npz", 16 | "key": "node_class" 17 | } 18 | }, 19 | "Edge": { 20 | "_Edge": { 21 | "file": "actor.npz", 22 | "key": "edge" 23 | } 24 | }, 25 | "Graph": { 26 | "_NodeList": { 27 | "file": "actor.npz", 28 | "key": "node_list" 29 | }, 30 | "_EdgeList": { 31 | "file": "actor.npz", 32 | "key": "edge_list" 33 | } 34 | } 35 | }, 36 | "citation": "@inproceedings{tang2009social,\ntitle={Social influence analysis in large-scale networks},\nauthor={Tang, Jie and Sun, Jimeng and Wang, Chi and Yang, Zi},\nbooktitle={Proceedings of the 15th ACM SIGKDD international conference on Knowledge discovery and mining},\npages={807--816},\nyear={2009}\n}", 37 | "is_heterogeneous": false 38 | } 39 | -------------------------------------------------------------------------------- /datasets/actor/task_node_classification_1.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "Node classification on Actor dataset.", 3 | "type": "NodeClassification", 4 | "feature": [ 5 | "Node/NodeFeature" 6 | ], 7 | "target": "Node/NodeLabel", 8 | "num_classes": 5, 9 | "num_splits": 10, 10 | "train_set": { 11 | "file": "actor_task.npz", 12 | "key": "train_FOLD" 13 | }, 14 | "val_set": { 15 | "file": "actor_task.npz", 16 | "key": "val_FOLD" 17 | }, 18 | "test_set": { 19 | "file": "actor_task.npz", 20 | "key": "test_FOLD" 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /datasets/arxiv-year/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Graph-Learning-Benchmarks/gli/8f2065396d59e6b4aaa371e997c3a43f91448429/datasets/arxiv-year/LICENSE -------------------------------------------------------------------------------- /datasets/arxiv-year/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "arXiv-year dataset.", 3 | "data": { 4 | "Node": { 5 | "NodeFeature": { 6 | "description": "Node features of arXiv-year dataset.", 7 | "type": "float32", 8 | "format": "Tensor", 9 | "file": "arxiv_year.npz", 10 | "key": "node_feats" 11 | }, 12 | "NodeLabel": { 13 | "description": "Node labels of arXiv-year dataset, int ranged from 0 to 4.", 14 | "type": "int64", 15 | "format": "Tensor", 16 | "file": "arxiv_year.npz", 17 | "key": "node_class" 18 | } 19 | }, 20 | "Edge": { 21 | "_Edge": { 22 | "file": "arxiv_year.npz", 23 | "key": "edge" 24 | } 25 | }, 26 | "Graph": { 27 | "_NodeList": { 28 | "file": "arxiv_year.npz", 29 | "key": "node_list" 30 | }, 31 | "_EdgeList": { 32 | "file": "arxiv_year.npz", 33 | "key": "edge_list" 34 | } 35 | } 36 | }, 37 | "citation": "@article{lim2021large,\ntitle={Large scale learning on non-homophilous graphs: New benchmarks and strong simple methods},\nauthor={Lim, Derek and Hohne, Felix and Li, Xiuyu and Huang, Sijia Linda and Gupta, Vaishnavi and Bhalerao, Omkar and Lim, Ser Nam},\njournal={Advances in Neural Information Processing Systems},\nvolume={34},\npages={20887--20902},\nyear={2021}\n}", 38 | "is_heterogeneous": false 39 | } 40 | -------------------------------------------------------------------------------- /datasets/arxiv-year/task_node_classification_1.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "Node classification on arXiv-year dataset.", 3 | "type": "NodeClassification", 4 | "feature": [ 5 | "Node/NodeFeature" 6 | ], 7 | "target": "Node/NodeLabel", 8 | "num_classes": 5, 9 | "train_ratio": 0.5, 10 | "val_ratio": 0.25, 11 | "test_ratio": 0.25, 12 | "num_samples": 169343 13 | } 14 | -------------------------------------------------------------------------------- /datasets/chameleon/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Graph-Learning-Benchmarks/gli/8f2065396d59e6b4aaa371e997c3a43f91448429/datasets/chameleon/LICENSE -------------------------------------------------------------------------------- /datasets/chameleon/README.md: -------------------------------------------------------------------------------- 1 | # Chameleon 2 | 3 | ## Dataset Description 4 | 5 | Chameleon is a page-page network on specific topics in Wikipedia. In this dataset, nodes represent web pages and edges are mutual links between pages. And node features correspond to several informative nouns in the Wikipedia pages. The nodes are classified into five categories. 6 | 7 | 8 | Statistics: 9 | - Nodes: 2277 10 | - Edges: 36101 11 | - Number of Classes: 5 12 | 13 | #### Citation 14 | - Original Source 15 | + [Website](https://github.com/benedekrozemberczki/datasets#wikipedia-article-networks) 16 | + LICENSE: [MIT](https://github.com/benedekrozemberczki/datasets/blob/master/LICENSE) 17 | ``` 18 | @article{rozemberczki2021multi, 19 | title={Multi-scale attributed node embedding}, 20 | author={Rozemberczki, Benedek and Allen, Carl and Sarkar, Rik}, 21 | journal={Journal of Complex Networks}, 22 | volume={9}, 23 | number={2}, 24 | pages={cnab014}, 25 | year={2021}, 26 | publisher={Oxford University Press} 27 | } 28 | ``` 29 | - Current Version 30 | + [Website](https://github.com/graphdml-uiuc-jlu/geom-gcn) 31 | + LICENSE: missing 32 | ``` 33 | @article{pei2020geom, 34 | title={Geom-gcn: Geometric graph convolutional networks}, 35 | author={Pei, Hongbin and Wei, Bingzhe and Chang, Kevin Chen-Chuan and Lei, Yu and Yang, Bo}, 36 | journal={arXiv preprint arXiv:2002.05287}, 37 | year={2020} 38 | } 39 | ``` 40 | 41 | ## Available Tasks 42 | 43 | ### MUSAE 44 | 45 | - Task type: `NodeClassification` 46 | 47 | This is a node classification task with fixed split from [MUSAE](https://github.com/benedekrozemberczki/MUSAE). 48 | 49 | #### Citation 50 | 51 | ``` 52 | @article{pei2020geom, 53 | title={Geom-gcn: Geometric graph convolutional networks}, 54 | author={Pei, Hongbin and Wei, Bingzhe and Chang, Kevin Chen-Chuan and Lei, Yu and Yang, Bo}, 55 | journal={arXiv preprint arXiv:2002.05287}, 56 | year={2020} 57 | } 58 | ``` 59 | 60 | ## Preprocessing 61 | The data files and task config file in GLI format are transformed from the [torch_geometric.datasets](https://pytorch-geometric.readthedocs.io/en/latest/modules/datasets.html). Check `chameleon.ipynb` for the preprocessing. 62 | 63 | 64 | ### Requirements 65 | 66 | The preprocessing code requires the following packages. 67 | 68 | ``` 69 | numpy==1.22.3 70 | scipy==1.7.3 71 | torch==1.11.0 72 | torch_geometric==2.0.4 73 | ``` 74 | -------------------------------------------------------------------------------- /datasets/chameleon/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "Chameleon dataset.", 3 | "data": { 4 | "Node": { 5 | "NodeFeature": { 6 | "description": "Node features of Chameleon dataset, 1/0-valued vectors.", 7 | "type": "int", 8 | "format": "SparseTensor", 9 | "file": "chameleon_node_feats.sparse.npz" 10 | }, 11 | "NodeLabel": { 12 | "description": "Node labels of Chameleon dataset, int ranged from 0 to 4.", 13 | "type": "int", 14 | "format": "Tensor", 15 | "file": "chameleon.npz", 16 | "key": "node_class" 17 | } 18 | }, 19 | "Edge": { 20 | "_Edge": { 21 | "file": "chameleon.npz", 22 | "key": "edge" 23 | } 24 | }, 25 | "Graph": { 26 | "_NodeList": { 27 | "file": "chameleon.npz", 28 | "key": "node_list" 29 | }, 30 | "_EdgeList": { 31 | "file": "chameleon.npz", 32 | "key": "edge_list" 33 | } 34 | } 35 | }, 36 | "citation": "@article{rozemberczki2021multi,\ntitle={Multi-scale attributed node embedding},\nauthor={Rozemberczki, Benedek and Allen, Carl and Sarkar, Rik},\njournal={Journal of Complex Networks},\nvolume={9},\nnumber={2},\npages={cnab014},\nyear={2021},\npublisher={Oxford University Press}\n}", 37 | "is_heterogeneous": false 38 | } 39 | -------------------------------------------------------------------------------- /datasets/chameleon/task_node_classification_1.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "Node classification on Chameleon dataset. The split is introduced in the paper \"Multi-scale Attributed Node Embedding\", while the classification categories are introduced in the paper \"Geom-GCN: Geometric Graph Convolutional Networks\".", 3 | "type": "NodeClassification", 4 | "feature": [ 5 | "Node/NodeFeature" 6 | ], 7 | "target": "Node/NodeLabel", 8 | "num_classes": 5, 9 | "num_splits": 10, 10 | "train_set": { 11 | "file": "chameleon_task.npz", 12 | "key": "train_FOLD" 13 | }, 14 | "val_set": { 15 | "file": "chameleon_task.npz", 16 | "key": "val_FOLD" 17 | }, 18 | "test_set": { 19 | "file": "chameleon_task.npz", 20 | "key": "test_FOLD" 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /datasets/cifar/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Vijay Prakash Dwivedi, Chaitanya K. Joshi, Anh Tuan Luu, Thomas Laurent, Yoshua Bengio, Xavier Bresson 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /datasets/cifar/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "cifar dataset", 3 | "data": { 4 | "Node": { 5 | "NodeFeature": { 6 | "description": "numpy ndarray of shape (num_nodes, nodefeat_dim), where nodefeat_dim is the dimensionality of node features and i-th row represents the feature of i-th node. This can be None if no input node features are available.", 7 | "type": "int", 8 | "format": "SparseTensor", 9 | "file": "cifar.npz", 10 | "key": "node_feats" 11 | } 12 | }, 13 | "Edge": { 14 | "_Edge": { 15 | "file": "cifar.npz", 16 | "key": "edge" 17 | }, 18 | "EdgeFeature": { 19 | "description": "numpy ndarray of shape (num_edges, edgefeat_dim), where edgefeat_dim is the dimensionality of node features and i-th row represents the feature of i-th node. This can be None if no input node features are available.", 20 | "type": "int", 21 | "format": "SparseTensor", 22 | "file": "cifar.npz", 23 | "key": "edge_feats" 24 | } 25 | }, 26 | "Graph": { 27 | "_NodeList": { 28 | "file": "cifar_node_list.sparse.npz" 29 | }, 30 | "GraphLabel": { 31 | "file": "cifar.npz", 32 | "type": "int", 33 | "format": "Tensor", 34 | "key": "graph_class" 35 | } 36 | } 37 | }, 38 | "citation": "@misc{https://doi.org/10.48550/arxiv.2003.00982,\ndoi = {10.48550/ARXIV.2003.00982},\nurl = {https://arxiv.org/abs/2003.00982},\nauthor = {Dwivedi, Vijay Prakash and Joshi, Chaitanya K. and Luu, Anh Tuan and Laurent, Thomas and Bengio, Yoshua and Bresson, Xavier},\nkeywords = {Machine Learning (cs.LG), Machine Learning (stat.ML), FOS: Computer and information sciences, FOS: Computer and information sciences},\ntitle = {Benchmarking Graph Neural Networks},\npublisher = {arXiv},\nyear = {2020},\ncopyright = {arXiv.org perpetual, non-exclusive license}", 39 | "is_heterogeneous": false 40 | } 41 | -------------------------------------------------------------------------------- /datasets/cifar/task_graph_classification_1.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "The task is to classify pictures into 10 graph classes including aeroplane, automobile, birds, cat, deer, dog, frog, horse, ship, truck.", 3 | "type": "GraphClassification", 4 | "feature": [ 5 | "Node/NodeFeature", 6 | "Edge/EdgeFeature" 7 | ], 8 | "target": "Graph/GraphLabel", 9 | "num_classes": 10, 10 | "train_set": { 11 | "file": "cifar_task.npz", 12 | "key": "train" 13 | }, 14 | "val_set": { 15 | "file": "cifar_task.npz", 16 | "key": "val" 17 | }, 18 | "test_set": { 19 | "file": "cifar_task.npz", 20 | "key": "test" 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /datasets/citeseer/LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | Copyright (c) 2016 Zhilin Yang 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 5 | 6 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 7 | 8 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /datasets/citeseer/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "CITESEER dataset.", 3 | "data": { 4 | "Node": { 5 | "NodeFeature": { 6 | "description": "Node features of Citeseer dataset, 1/0-valued vectors.", 7 | "type": "int", 8 | "format": "SparseTensor", 9 | "file": "citeseer__graph__Node_NodeFeature__48cffb6534f4b56a45196efa8b32cdac.sparse.npz" 10 | }, 11 | "NodeLabel": { 12 | "description": "Node labels of Citeseer dataset, int ranged from 1 to 6.", 13 | "type": "int", 14 | "format": "Tensor", 15 | "file": "citeseer__graph__aed93544b5c54381d05b5452603278fb.npz", 16 | "key": "Node_NodeLabel" 17 | } 18 | }, 19 | "Edge": { 20 | "_Edge": { 21 | "file": "citeseer__graph__aed93544b5c54381d05b5452603278fb.npz", 22 | "key": "Edge_Edge" 23 | } 24 | }, 25 | "Graph": { 26 | "_NodeList": { 27 | "file": "citeseer__graph__Graph_NodeList__be3f84ead018cfb899bd6f98d0bb92db.sparse.npz" 28 | } 29 | } 30 | }, 31 | "citation": "@inproceedings{yang2016revisiting,\ntitle={Revisiting semi-supervised learning with graph embeddings},\nauthor={Yang, Zhilin and Cohen, William and Salakhudinov, Ruslan},\nbooktitle={International conference on machine learning},\npages={40--48},\nyear={2016},\norganization={PMLR}\n}", 32 | "is_heterogeneous": false 33 | } -------------------------------------------------------------------------------- /datasets/citeseer/task_node_classification_1.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "Node classification on CITESEER dataset. Planetoid split.", 3 | "type": "NodeClassification", 4 | "feature": [ 5 | "Node/NodeFeature" 6 | ], 7 | "target": "Node/NodeLabel", 8 | "num_classes": 6, 9 | "train_set": { 10 | "file": "citeseer__task_node_classification_1__d0b7b5f7e7e7cb9b84e0b3e97354e16e.npz", 11 | "key": "train_set" 12 | }, 13 | "val_set": { 14 | "file": "citeseer__task_node_classification_1__d0b7b5f7e7e7cb9b84e0b3e97354e16e.npz", 15 | "key": "val_set" 16 | }, 17 | "test_set": { 18 | "file": "citeseer__task_node_classification_1__d0b7b5f7e7e7cb9b84e0b3e97354e16e.npz", 19 | "key": "test_set" 20 | } 21 | } -------------------------------------------------------------------------------- /datasets/cora/LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | Copyright (c) 2016 Zhilin Yang 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 5 | 6 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 7 | 8 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /datasets/cora/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "CORA dataset.", 3 | "data": { 4 | "Node": { 5 | "NodeFeature": { 6 | "description": "Node features of Cora dataset, 1/0-valued vectors.", 7 | "type": "int", 8 | "format": "SparseTensor", 9 | "file": "cora__graph__Node_NodeFeature__7032c9c380d1889061dcbbcd76b8c427.sparse.npz" 10 | }, 11 | "NodeLabel": { 12 | "description": "Node labels of Cora dataset, int ranged from 1 to 7.", 13 | "type": "int", 14 | "format": "Tensor", 15 | "file": "cora__graph__6c912909fa18eff10797210ea5e485fe.npz", 16 | "key": "Node_NodeLabel" 17 | } 18 | }, 19 | "Edge": { 20 | "_Edge": { 21 | "file": "cora__graph__6c912909fa18eff10797210ea5e485fe.npz", 22 | "key": "Edge_Edge" 23 | } 24 | }, 25 | "Graph": { 26 | "_NodeList": { 27 | "file": "cora__graph__Graph_NodeList__23bbef862fd6037395412eb03b4e1d9c.sparse.npz" 28 | } 29 | } 30 | }, 31 | "citation": "@inproceedings{yang2016revisiting,\ntitle={Revisiting semi-supervised learning with graph embeddings},\nauthor={Yang, Zhilin and Cohen, William and Salakhudinov, Ruslan},\nbooktitle={International conference on machine learning},\npages={40--48},\nyear={2016},\norganization={PMLR}\n}", 32 | "is_heterogeneous": false 33 | } -------------------------------------------------------------------------------- /datasets/cora/task_node_classification_1.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "Node classification on CORA dataset. Planetoid split.", 3 | "type": "NodeClassification", 4 | "feature": [ 5 | "Node/NodeFeature" 6 | ], 7 | "target": "Node/NodeLabel", 8 | "num_classes": 7, 9 | "train_set": { 10 | "file": "cora__task_node_classification_1__41e167258678b585872679839ce9c40f.npz", 11 | "key": "train_set" 12 | }, 13 | "val_set": { 14 | "file": "cora__task_node_classification_1__41e167258678b585872679839ce9c40f.npz", 15 | "key": "val_set" 16 | }, 17 | "test_set": { 18 | "file": "cora__task_node_classification_1__41e167258678b585872679839ce9c40f.npz", 19 | "key": "test_set" 20 | } 21 | } -------------------------------------------------------------------------------- /datasets/cornell/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Graph-Learning-Benchmarks/gli/8f2065396d59e6b4aaa371e997c3a43f91448429/datasets/cornell/LICENSE -------------------------------------------------------------------------------- /datasets/cornell/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "Cornell dataset.", 3 | "data": { 4 | "Node": { 5 | "NodeFeature": { 6 | "description": "Node features of Cornell dataset, 1/0-valued vectors.", 7 | "type": "int", 8 | "format": "SparseTensor", 9 | "file": "cornell_node_feats.sparse.npz" 10 | }, 11 | "NodeLabel": { 12 | "description": "Node labels of Cornell dataset, int ranged from 0 to 4.", 13 | "type": "int", 14 | "format": "Tensor", 15 | "file": "cornell.npz", 16 | "key": "node_class" 17 | } 18 | }, 19 | "Edge": { 20 | "_Edge": { 21 | "file": "cornell.npz", 22 | "key": "edge" 23 | } 24 | }, 25 | "Graph": { 26 | "_NodeList": { 27 | "file": "cornell.npz", 28 | "key": "node_list" 29 | }, 30 | "_EdgeList": { 31 | "file": "cornell.npz", 32 | "key": "edge_list" 33 | } 34 | } 35 | }, 36 | "citation": "@article{garcia2016using,\ntitle={Using fuzzy logic to leverage HTML markup for web page representation},\nauthor={Garcia-Plaza, Alberto P and Fresno, Victor and Unanue, Raquel Martinez and Zubiaga, Arkaitz},\njournal={IEEE Transactions on Fuzzy Systems},\nvolume={25},\nnumber={4},\npages={919--933},\nyear={2016},\npublisher={IEEE}\n}", 37 | "is_heterogeneous": false 38 | } 39 | -------------------------------------------------------------------------------- /datasets/cornell/task_node_classification_1.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "Node classification on Cornell dataset. Webkb split.", 3 | "type": "NodeClassification", 4 | "feature": [ 5 | "Node/NodeFeature" 6 | ], 7 | "target": "Node/NodeLabel", 8 | "num_classes": 5, 9 | "num_splits": 10, 10 | "train_set": { 11 | "file": "cornell_task.npz", 12 | "key": "train_FOLD" 13 | }, 14 | "val_set": { 15 | "file": "cornell_task.npz", 16 | "key": "val_FOLD" 17 | }, 18 | "test_set": { 19 | "file": "cornell_task.npz", 20 | "key": "test_FOLD" 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /datasets/genius/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Graph-Learning-Benchmarks/gli/8f2065396d59e6b4aaa371e997c3a43f91448429/datasets/genius/LICENSE -------------------------------------------------------------------------------- /datasets/genius/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "genius dataset.", 3 | "data": { 4 | "Node": { 5 | "NodeFeature": { 6 | "description": "Node features of genius dataset.", 7 | "type": "float32", 8 | "format": "Tensor", 9 | "file": "genius.npz", 10 | "key": "node_feats" 11 | }, 12 | "NodeLabel": { 13 | "description": "Node labels of genius dataset, 1/0-valued vectors.", 14 | "type": "int64", 15 | "format": "Tensor", 16 | "file": "genius.npz", 17 | "key": "node_class" 18 | } 19 | }, 20 | "Edge": { 21 | "_Edge": { 22 | "file": "genius.npz", 23 | "key": "edge" 24 | } 25 | }, 26 | "Graph": { 27 | "_NodeList": { 28 | "file": "genius.npz", 29 | "key": "node_list" 30 | }, 31 | "_EdgeList": { 32 | "file": "genius.npz", 33 | "key": "edge_list" 34 | } 35 | } 36 | }, 37 | "citation": "@article{lim2021large,\ntitle={Large scale learning on non-homophilous graphs: New benchmarks and strong simple methods},\nauthor={Lim, Derek and Hohne, Felix and Li, Xiuyu and Huang, Sijia Linda and Gupta, Vaishnavi and Bhalerao, Omkar and Lim, Ser Nam},\njournal={Advances in Neural Information Processing Systems},\nvolume={34},\npages={20887--20902},\nyear={2021}\n}", 38 | "is_heterogeneous": false 39 | } 40 | -------------------------------------------------------------------------------- /datasets/genius/task_node_classification_1.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "Node classification on genius dataset.", 3 | "type": "NodeClassification", 4 | "feature": [ 5 | "Node/NodeFeature" 6 | ], 7 | "target": "Node/NodeLabel", 8 | "num_classes": 2, 9 | "train_ratio": 0.5, 10 | "val_ratio": 0.25, 11 | "test_ratio": 0.25, 12 | "num_samples": 421961 13 | } 14 | -------------------------------------------------------------------------------- /datasets/mnist/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Vijay Prakash Dwivedi, Chaitanya K. Joshi, Anh Tuan Luu, Thomas Laurent, Yoshua Bengio, Xavier Bresson 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /datasets/mnist/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "mnist dataset", 3 | "data": { 4 | "Node": { 5 | "NodeFeature": { 6 | "description": "numpy ndarray of shape (num_nodes, nodefeat_dim), where nodefeat_dim is the dimensionality of node features and i-th row represents the feature of i-th node. This can be None if no input node features are available.", 7 | "type": "int", 8 | "format": "SparseTensor", 9 | "file": "mnist.npz", 10 | "key": "node_feats" 11 | } 12 | }, 13 | "Edge": { 14 | "_Edge": { 15 | "file": "mnist.npz", 16 | "key": "edge" 17 | }, 18 | "EdgeFeature": { 19 | "description": "numpy ndarray of shape (num_edges, edgefeat_dim), where edgefeat_dim is the dimensionality of node features and i-th row represents the feature of i-th node. This can be None if no input node features are available.", 20 | "type": "int", 21 | "format": "SparseTensor", 22 | "file": "mnist.npz", 23 | "key": "edge_feats" 24 | } 25 | }, 26 | "Graph": { 27 | "_NodeList": { 28 | "file": "mnist_node_list.sparse.npz" 29 | }, 30 | "GraphLabel": { 31 | "file": "mnist.npz", 32 | "type": "int", 33 | "format": "Tensor", 34 | "key": "graph_class" 35 | } 36 | } 37 | }, 38 | "citation": "@misc{https://doi.org/10.48550/arxiv.2003.00982,\ndoi = {10.48550/ARXIV.2003.00982},\nurl = {https://arxiv.org/abs/2003.00982},\nauthor = {Dwivedi, Vijay Prakash and Joshi, Chaitanya K. and Luu, Anh Tuan and Laurent, Thomas and Bengio, Yoshua and Bresson, Xavier},\nkeywords = {Machine Learning (cs.LG), Machine Learning (stat.ML), FOS: Computer and information sciences, FOS: Computer and information sciences},\ntitle = {Benchmarking Graph Neural Networks},\npublisher = {arXiv},\nyear = {2020},\ncopyright = {arXiv.org perpetual, non-exclusive license}", 39 | "is_heterogeneous": false 40 | } 41 | -------------------------------------------------------------------------------- /datasets/mnist/task_graph_classification_1.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "The task is to classify handwritten numbers to 0-9 digit.", 3 | "type": "GraphClassification", 4 | "feature": [ 5 | "Node/NodeFeature", 6 | "Edge/EdgeFeature" 7 | ], 8 | "target": "Graph/GraphLabel", 9 | "num_classes": 10, 10 | "train_set": { 11 | "file": "mnist_task.npz", 12 | "key": "train" 13 | }, 14 | "val_set": { 15 | "file": "mnist_task.npz", 16 | "key": "val" 17 | }, 18 | "test_set": { 19 | "file": "mnist_task.npz", 20 | "key": "test" 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /datasets/ogbg-molbace/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 snap-stanford 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /datasets/ogbg-molbace/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "ogbg-molbace dataset.", 3 | "data": { 4 | "Node": { 5 | "NodeFeature": { 6 | "description": "numpy ndarray of shape (num_nodes, nodefeat_dim), where nodefeat_dim is the dimensionality of node features and i-th row represents the feature of i-th node. This can be None if no input node features are available.", 7 | "type": "int", 8 | "format": "Tensor", 9 | "file": "ogbg-molbace.npz", 10 | "key": "node_feats" 11 | } 12 | }, 13 | "Edge": { 14 | "_Edge": { 15 | "file": "ogbg-molbace.npz", 16 | "key": "edge" 17 | }, 18 | "EdgeFeature": { 19 | "description": "Numpy ndarray of shape (num_edges, edgefeat_dim), where edgefeat_dim is the dimensionality of edge features and i-th row represents the feature of i-th edge. This can be None if no input edge features are available.", 20 | "type": "int", 21 | "format": "Tensor", 22 | "file": "ogbg-molbace.npz", 23 | "key": "edge_feats" 24 | } 25 | }, 26 | "Graph": { 27 | "_NodeList": { 28 | "file": "ogbg-molbace_node_list.sparse.npz" 29 | }, 30 | "GraphLabel": { 31 | "file": "ogbg-molbace.npz", 32 | "type": "int", 33 | "format": "Tensor", 34 | "key": "graph_class" 35 | } 36 | } 37 | }, 38 | "citation": "@inproceedings{Wu2018Stanford,\ntitle={Moleculenet: a benchmark for molecular machine learning},\nauthor={Zhenqin Wu, Bharath Ramsundar, Evan N Feinberg, Joseph Gomes, Caleb Geniesse, Aneesh SPappu, Karl Leswing, and Vijay Pande},\nbooktitle={Chemical Science},\npages={513=520},\nyear={2018}\n}", 39 | "is_heterogeneous": false 40 | } 41 | -------------------------------------------------------------------------------- /datasets/ogbg-molbace/task_graph_classification_1.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "The task is to predict the target molecular properties as accurately as possible, where the molecular properties are cast as binary labels, e.g, whether a molecule inhibits HIV virus replication or not.", 3 | "type": "GraphClassification", 4 | "feature": [ 5 | "Node/NodeFeature", 6 | "Edge/EdgeFeature" 7 | ], 8 | "target": "Graph/GraphLabel", 9 | "num_classes": 2, 10 | "train_set": { 11 | "file": "ogbg-molbace_task.npz", 12 | "key": "train" 13 | }, 14 | "val_set": { 15 | "file": "ogbg-molbace_task.npz", 16 | "key": "val" 17 | }, 18 | "test_set": { 19 | "file": "ogbg-molbace_task.npz", 20 | "key": "test" 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /datasets/ogbg-molclintox/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 snap-stanford 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /datasets/ogbg-molclintox/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "ogbg-molclintox dataset.", 3 | "data": { 4 | "Node": { 5 | "NodeFeature": { 6 | "description": "numpy ndarray of shape (num_nodes, nodefeat_dim), where nodefeat_dim is the dimensionality of node features and i-th row represents the feature of i-th node. This can be None if no input node features are available.", 7 | "type": "int", 8 | "format": "Tensor", 9 | "file": "ogbg-molclintox.npz", 10 | "key": "node_feats" 11 | } 12 | }, 13 | "Edge": { 14 | "_Edge": { 15 | "file": "ogbg-molclintox.npz", 16 | "key": "edge" 17 | }, 18 | "EdgeFeature": { 19 | "description": "Numpy ndarray of shape (num_edges, edgefeat_dim), where edgefeat_dim is the dimensionality of edge features and i-th row represents the feature of i-th edge. This can be None if no input edge features are available.", 20 | "type": "int", 21 | "format": "Tensor", 22 | "file": "ogbg-molclintox.npz", 23 | "key": "edge_feats" 24 | } 25 | }, 26 | "Graph": { 27 | "_NodeList": { 28 | "file": "ogbg-molclintox_node_list.sparse.npz" 29 | }, 30 | "GraphLabel": { 31 | "file": "ogbg-molclintox.npz", 32 | "type": "int", 33 | "format": "Tensor", 34 | "key": "graph_class" 35 | } 36 | } 37 | }, 38 | "citation": "@inproceedings{Wu2018Stanford,\ntitle={Moleculenet: a benchmark for molecular machine learning},\nauthor={Zhenqin Wu, Bharath Ramsundar, Evan N Feinberg, Joseph Gomes, Caleb Geniesse, Aneesh SPappu, Karl Leswing, and Vijay Pande},\nbooktitle={Chemical Science},\npages={513=520},\nyear={2018}\n}", 39 | "is_heterogeneous": false 40 | } 41 | -------------------------------------------------------------------------------- /datasets/ogbg-molclintox/task_graph_classification_1.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "The task is to predict the target molecular properties as accurately as possible, where the molecular properties are cast as binary labels, e.g, whether a molecule inhibits HIV virus replication or not.", 3 | "type": "GraphClassification", 4 | "feature": [ 5 | "Node/NodeFeature", 6 | "Edge/EdgeFeature" 7 | ], 8 | "target": "Graph/GraphLabel", 9 | "num_classes": 2, 10 | "train_set": { 11 | "file": "ogbg-molclintox_task.npz", 12 | "key": "train" 13 | }, 14 | "val_set": { 15 | "file": "ogbg-molclintox_task.npz", 16 | "key": "val" 17 | }, 18 | "test_set": { 19 | "file": "ogbg-molclintox_task.npz", 20 | "key": "test" 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /datasets/ogbg-molfreesolv/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 snap-stanford 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /datasets/ogbg-molfreesolv/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "ogbg-molfreesolv dataset", 3 | "data": { 4 | "Node": { 5 | "NodeFeature": { 6 | "description": "Numpy ndarray of shape (num_nodes, nodefeat_dim), where nodefeat_dim is the dimensionality of node features and i-th row represents the feature of i-th node. This can be None if no input node features are available.", 7 | "type": "int", 8 | "format": "Tensor", 9 | "file": "ogbg-molfreesolv.npz", 10 | "key": "node_feats" 11 | } 12 | }, 13 | "Edge": { 14 | "_Edge": { 15 | "file": "ogbg-molfreesolv.npz", 16 | "key": "edge" 17 | }, 18 | "EdgeFeature": { 19 | "description": "Numpy ndarray of shape (num_edges, edgefeat_dim), where edgefeat_dim is the dimensionality of edge features and i-th row represents the feature of i-th edge. This can be None if no input edge features are available.", 20 | "type": "int", 21 | "format": "Tensor", 22 | "file": "ogbg-molfreesolv.npz", 23 | "key": "edge_feats" 24 | } 25 | }, 26 | "Graph": { 27 | "_NodeList": { 28 | "file": "ogbg-molfreesolv_node_list.sparse.npz" 29 | }, 30 | "GraphLabel": { 31 | "file": "ogbg-molfreesolv.npz", 32 | "type": "int", 33 | "format": "Tensor", 34 | "key": "graph_class" 35 | } 36 | } 37 | }, 38 | "citation": "@inproceedings{Wu2018Stanford,\ntitle={Moleculenet: a benchmark for molecular machine learning},\nauthor={Zhenqin Wu, Bharath Ramsundar, Evan N Feinberg, Joseph Gomes, Caleb Geniesse, Aneesh SPappu, Karl Leswing, and Vijay Pande},\nbooktitle={Chemical Science},\npages={513=520},\nyear={2018}\n}", 39 | "is_heterogeneous": false 40 | } 41 | -------------------------------------------------------------------------------- /datasets/ogbg-molfreesolv/task_graph_regression_1.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "The task is to predict the target molecular properties as accurately as possible, where the molecular properties are cast as binary labels, e.g, whether a molecule inhibits HIV virus replication or not.", 3 | "type": "GraphRegression", 4 | "feature": [ 5 | "Node/NodeFeature", 6 | "Edge/EdgeFeature" 7 | ], 8 | "target": "Graph/GraphLabel", 9 | "train_set": { 10 | "file": "ogbg-molfreesolv_task.npz", 11 | "key": "train" 12 | }, 13 | "val_set": { 14 | "file": "ogbg-molfreesolv_task.npz", 15 | "key": "val" 16 | }, 17 | "test_set": { 18 | "file": "ogbg-molfreesolv_task.npz", 19 | "key": "test" 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /datasets/ogbg-molhiv/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 snap-stanford 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /datasets/ogbg-molhiv/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "OGBg-molhiv dataset.", 3 | "data": { 4 | "Node": { 5 | "NodeFeature": { 6 | "description": "Node features of ogbg-molhiv dataset.", 7 | "type": "int", 8 | "format": "Tensor", 9 | "file": "ogbg-molhiv.npz", 10 | "key": "node_feats" 11 | } 12 | }, 13 | "Edge": { 14 | "_Edge": { 15 | "file": "ogbg-molhiv.npz", 16 | "key": "edge" 17 | }, 18 | "EdgeFeature": { 19 | "description": "Node features of ogbg-molhiv dataset.", 20 | "type": "int", 21 | "format": "Tensor", 22 | "file": "ogbg-molhiv.npz", 23 | "key": "edge_feats" 24 | } 25 | }, 26 | "Graph": { 27 | "_NodeList": { 28 | "file": "ogbg-molhiv_node_list.sparse.npz" 29 | }, 30 | "GraphLabel": { 31 | "file": "ogbg-molhiv.npz", 32 | "type": "int", 33 | "format": "Tensor", 34 | "key": "graph_class" 35 | } 36 | } 37 | }, 38 | "citation": "@inproceedings{Wu2018Stanford,\ntitle={Moleculenet: a benchmark for molecular machine learning},\nauthor={Zhenqin Wu, Bharath Ramsundar, Evan N Feinberg, Joseph Gomes, Caleb Geniesse, Aneesh SPappu, Karl Leswing, and Vijay Pande},\nbooktitle={Chemical Science},\npages={513=520},\nyear={2018}\n}", 39 | "is_heterogeneous": false 40 | } 41 | -------------------------------------------------------------------------------- /datasets/ogbg-molhiv/task_graph_classification_1.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "The task is to predict the target molecular properties as accurately as possible, where the molecular properties are cast as binary labels, e.g, whether a molecule inhibits HIV virus replication or not. Note that some datasets (e.g., ogbg-molpcba) can have multiple tasks, and can contain nan that indicates the corresponding label is not assigned to the molecule.", 3 | "type": "GraphClassification", 4 | "feature": [ 5 | "Node/NodeFeature", 6 | "Edge/EdgeFeature" 7 | ], 8 | "target": "Graph/GraphLabel", 9 | "num_classes": 2, 10 | "train_set": { 11 | "file": "ogbg-molhiv_task.npz", 12 | "key": "train" 13 | }, 14 | "val_set": { 15 | "file": "ogbg-molhiv_task.npz", 16 | "key": "val" 17 | }, 18 | "test_set": { 19 | "file": "ogbg-molhiv_task.npz", 20 | "key": "test" 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /datasets/ogbg-molmuv/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 snap-stanford 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /datasets/ogbg-molmuv/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "ogbg-molsider dataset.", 3 | "data": { 4 | "Node": { 5 | "NodeFeature": { 6 | "description": "numpy ndarray of shape (num_nodes, nodefeat_dim), where nodefeat_dim is the dimensionality of node features and i-th row represents the feature of i-th node. This can be None if no input node features are available.", 7 | "type": "int", 8 | "format": "Tensor", 9 | "file": "ogbg-molmuv.npz", 10 | "key": "node_feats" 11 | } 12 | }, 13 | "Edge": { 14 | "_Edge": { 15 | "file": "ogbg-molmuv.npz", 16 | "key": "edge" 17 | }, 18 | "EdgeFeature": { 19 | "description": "Numpy ndarray of shape (num_edges, edgefeat_dim), where edgefeat_dim is the dimensionality of edge features and i-th row represents the feature of i-th edge. This can be None if no input edge features are available.", 20 | "type": "int", 21 | "format": "Tensor", 22 | "file": "ogbg-molmuv.npz", 23 | "key": "edge_feats" 24 | } 25 | }, 26 | "Graph": { 27 | "_NodeList": { 28 | "file": "ogbg-molmuv_node_list.sparse.npz" 29 | }, 30 | "GraphLabel": { 31 | "file": "ogbg-molmuv.npz", 32 | "type": "int", 33 | "format": "Tensor", 34 | "key": "graph_class" 35 | } 36 | } 37 | }, 38 | "citation": "@inproceedings{Wu2018Stanford,\ntitle={Moleculenet: a benchmark for molecular machine learning},\nauthor={Zhenqin Wu, Bharath Ramsundar, Evan N Feinberg, Joseph Gomes, Caleb Geniesse, Aneesh SPappu, Karl Leswing, and Vijay Pande},\nbooktitle={Chemical Science},\npages={513=520},\nyear={2018}\n}", 39 | "is_heterogeneous": false 40 | } 41 | -------------------------------------------------------------------------------- /datasets/ogbg-molmuv/task_graph_classification_1.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "The task is to predict the target molecular properties as accurately as possible, where the molecular properties are cast as binary labels, e.g, whether a molecule inhibits HIV virus replication or not.", 3 | "type": "GraphClassification", 4 | "feature": [ 5 | "Node/NodeFeature", 6 | "Edge/EdgeFeature" 7 | ], 8 | "target": "Graph/GraphLabel", 9 | "num_classes": 2, 10 | "train_set": { 11 | "file": "ogbg-molmuv_task.npz", 12 | "key": "train" 13 | }, 14 | "val_set": { 15 | "file": "ogbg-molmuv_task.npz", 16 | "key": "val" 17 | }, 18 | "test_set": { 19 | "file": "ogbg-molmuv_task.npz", 20 | "key": "test" 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /datasets/ogbg-molpcba/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 snap-stanford 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /datasets/ogbg-molpcba/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "ogbg-molpcba dataset.", 3 | "data": { 4 | "Node": { 5 | "NodeFeature": { 6 | "description": "numpy ndarray of shape (num_nodes, nodefeat_dim), where nodefeat_dim is the dimensionality of node features and i-th row represents the feature of i-th node. This can be None if no input node features are available.", 7 | "type": "int", 8 | "format": "Tensor", 9 | "file": "ogbg-molpcba.npz", 10 | "key": "node_feats" 11 | } 12 | }, 13 | "Edge": { 14 | "_Edge": { 15 | "file": "ogbg-molpcba.npz", 16 | "key": "edge" 17 | }, 18 | "EdgeFeature": { 19 | "description": "Numpy ndarray of shape (num_edges, edgefeat_dim), where edgefeat_dim is the dimensionality of edge features and i-th row represents the feature of i-th edge. This can be None if no input edge features are available.", 20 | "type": "int", 21 | "format": "Tensor", 22 | "file": "ogbg-molpcba.npz", 23 | "key": "edge_feats" 24 | } 25 | }, 26 | "Graph": { 27 | "_NodeList": { 28 | "file": "ogbg-molpcba_node_list.sparse.npz" 29 | }, 30 | "GraphLabel": { 31 | "file": "ogbg-molpcba.npz", 32 | "type": "int", 33 | "format": "Tensor", 34 | "key": "graph_class" 35 | } 36 | } 37 | }, 38 | "citation": "@inproceedings{Wu2018Stanford,\ntitle={Moleculenet: a benchmark for molecular machine learning},\nauthor={Zhenqin Wu, Bharath Ramsundar, Evan N Feinberg, Joseph Gomes, Caleb Geniesse, Aneesh SPappu, Karl Leswing, and Vijay Pande},\nbooktitle={Chemical Science},\npages={513=520},\nyear={2018}\n}", 39 | "is_heterogeneous": false 40 | } 41 | -------------------------------------------------------------------------------- /datasets/ogbg-molpcba/task_graph_classification_1.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "The task is to predict the target molecular properties as accurately as possible, where the molecular properties are cast as binary labels, e.g, whether a molecule inhibits HIV virus replication or not.", 3 | "type": "GraphClassification", 4 | "feature": [ 5 | "Node/NodeFeature", 6 | "Edge/EdgeFeature" 7 | ], 8 | "target": "Graph/GraphLabel", 9 | "num_classes": 2, 10 | "train_set": { 11 | "file": "ogbg-molpcba_task.npz", 12 | "key": "train" 13 | }, 14 | "val_set": { 15 | "file": "ogbg-molpcba_task.npz", 16 | "key": "val" 17 | }, 18 | "test_set": { 19 | "file": "ogbg-molpcba_task.npz", 20 | "key": "test" 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /datasets/ogbg-molsider/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 snap-stanford 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /datasets/ogbg-molsider/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "ogbg-molsider dataset.", 3 | "data": { 4 | "Node": { 5 | "NodeFeature": { 6 | "description": "numpy ndarray of shape (num_nodes, nodefeat_dim), where nodefeat_dim is the dimensionality of node features and i-th row represents the feature of i-th node. This can be None if no input node features are available.", 7 | "type": "int", 8 | "format": "Tensor", 9 | "file": "ogbg-molsider.npz", 10 | "key": "node_feats" 11 | } 12 | }, 13 | "Edge": { 14 | "_Edge": { 15 | "file": "ogbg-molsider.npz", 16 | "key": "edge" 17 | }, 18 | "EdgeFeature": { 19 | "description": "Numpy ndarray of shape (num_edges, edgefeat_dim), where edgefeat_dim is the dimensionality of edge features and i-th row represents the feature of i-th edge. This can be None if no input edge features are available.", 20 | "type": "int", 21 | "format": "Tensor", 22 | "file": "ogbg-molsider.npz", 23 | "key": "edge_feats" 24 | } 25 | }, 26 | "Graph": { 27 | "_NodeList": { 28 | "file": "ogbg-molsider_node_list.sparse.npz" 29 | }, 30 | "GraphLabel": { 31 | "file": "ogbg-molsider.npz", 32 | "type": "int", 33 | "format": "Tensor", 34 | "key": "graph_class" 35 | } 36 | } 37 | }, 38 | "citation": "@inproceedings{Wu2018Stanford,\ntitle={Moleculenet: a benchmark for molecular machine learning},\nauthor={Zhenqin Wu, Bharath Ramsundar, Evan N Feinberg, Joseph Gomes, Caleb Geniesse, Aneesh SPappu, Karl Leswing, and Vijay Pande},\nbooktitle={Chemical Science},\npages={513=520},\nyear={2018}\n}", 39 | "is_heterogeneous": false 40 | } 41 | -------------------------------------------------------------------------------- /datasets/ogbg-molsider/task_graph_classification_1.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "The task is to predict the target molecular properties as accurately as possible, where the molecular properties are cast as binary labels, e.g, whether a molecule inhibits HIV virus replication or not.", 3 | "type": "GraphClassification", 4 | "feature": [ 5 | "Node/NodeFeature", 6 | "Edge/EdgeFeature" 7 | ], 8 | "target": "Graph/GraphLabel", 9 | "num_classes": 2, 10 | "train_set": { 11 | "file": "ogbg-molsider_task.npz", 12 | "key": "train" 13 | }, 14 | "val_set": { 15 | "file": "ogbg-molsider_task.npz", 16 | "key": "val" 17 | }, 18 | "test_set": { 19 | "file": "ogbg-molsider_task.npz", 20 | "key": "test" 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /datasets/ogbl-collab/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "OGBL-COLLAB dataset.", 3 | "data": { 4 | "Node": { 5 | "NodeFeature": { 6 | "description": "Node features of ogbl-collab dataset.", 7 | "type": "float", 8 | "format": "Tensor", 9 | "file": "ogbl-collab.npz", 10 | "key": "node_feats" 11 | } 12 | }, 13 | "Edge": { 14 | "_Edge": { 15 | "file": "ogbl-collab.npz", 16 | "key": "edge" 17 | }, 18 | "EdgeWeight": { 19 | "description": "Number of co-authored papers published in that year", 20 | "type": "int", 21 | "format": "Tensor", 22 | "file": "ogbl-collab.npz", 23 | "key": "edge_weight" 24 | }, 25 | "EdgeYear": { 26 | "description": "Year of the collaboration represented by the Edge", 27 | "type": "int", 28 | "format": "Tensor", 29 | "file": "ogbl-collab.npz", 30 | "key": "edge_year" 31 | } 32 | }, 33 | "Graph": { 34 | "_NodeList": { 35 | "file": "ogbl-collab.npz", 36 | "key": "node_list" 37 | }, 38 | "_EdgeList": { 39 | "file": "ogbl-collab.npz", 40 | "key": "edge_list" 41 | } 42 | } 43 | }, 44 | "citation": "@inproceedings{wang2020microsoft,\ntitle={Microsoft academic graph: When experts are not enough},\nauthor={Wang, Kuansan and Shen, Zhihong and Huang, Chiyuan and Wu, Chieh-Han and Dong, Yuxiao and Kanakia, Anshul},\nbooktitle={Quantitative Science Studies},\npages={396--413},\nyear={2020}\n}", 45 | "is_heterogeneous": false 46 | } 47 | -------------------------------------------------------------------------------- /datasets/ogbl-collab/task_time_dependent_link_prediction_1.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "The task is to predict the future author collaboration relationships given the past collaborations. The goal is to rank true collaborations higher than false collaborations. Specifically, we rank each true collaboration among a set of 100,000 randomly-sampled negative collaborations, and count the ratio of positive edges that are ranked at K-place or above (Hits@K). We found K = 50 to be a good threshold in our preliminary experiments.", 3 | "type": "TimeDependentLinkPrediction", 4 | "feature": [ 5 | "Node/NodeFeature", 6 | "Edge/EdgeWeight" 7 | ], 8 | "time": "Edge/EdgeYear", 9 | "train_time_window": [1963, 2018], 10 | "val_time_window": [2018, 2019], 11 | "test_time_window": [2019, 2020] 12 | } 13 | -------------------------------------------------------------------------------- /datasets/ogbl-collab/task_time_dependent_link_prediction_2.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "The task is to predict the future author collaboration relationships given the past collaborations. The goal is to rank true collaborations higher than false collaborations. Specifically, we rank each true collaboration among a set of 100,000 randomly-sampled negative collaborations, and count the ratio of positive edges that are ranked at K-place or above (Hits@K). We found K = 50 to be a good threshold in our preliminary experiments.", 3 | "type": "TimeDependentLinkPrediction", 4 | "feature": [ 5 | "Node/NodeFeature", 6 | "Edge/EdgeWeight" 7 | ], 8 | "time": "Edge/EdgeYear", 9 | "val_neg": { 10 | "file": "ogbl-collab_task_prestore_neg.npz", 11 | "key": "val_neg" 12 | }, 13 | "test_neg": { 14 | "file": "ogbl-collab_task_prestore_neg.npz", 15 | "key": "test_neg" 16 | }, 17 | "train_time_window": [1963, 2018], 18 | "val_time_window": [2018, 2019], 19 | "test_time_window": [2019, 2020] 20 | } 21 | -------------------------------------------------------------------------------- /datasets/ogbn-arxiv/README.md: -------------------------------------------------------------------------------- 1 | # Ogbn-arxiv 2 | ## Data Description 3 | 4 | The **ogbn-arxiv** dataset is a directed graph, representing the citation network between all Computer Science (CS) arXiv papers indexed by MAG. Each node is an arXiv paper and each directed edge indicates that one paper cites another one. Each paper comes with a 128-dimensional feature vector obtained by averaging the embeddings of words in its title and abstract. 5 | 6 | Statistics: 7 | 1. Nodes: 169343 8 | 2. Edges: 1166243 9 | 10 | 11 | #### Citation 12 | - Original Source 13 | - [Website](https://direct.mit.edu/qss/article/1/1/396/15572/Microsoft-Academic-Graph-When-experts-are-not) 14 | - LICENSE: Missing 15 | ``` 16 | @inproceedings{Wu2018Stanford, 17 | title={Microsoft academic graph: When experts are not enough. }, 18 | author={Kuansan Wang, Zhihong Shen, Chiyuan Huang, Chieh-Han Wu, Yuxiao Dong, and Anshul Kanakia}, 19 | booktitle={Quantitative Science Studies}, 20 | pages={396=413}, 21 | year={2020} 22 | } 23 | ``` 24 | - Current Version 25 | - [Website](https://ogb.stanford.edu/docs/linkprop/) 26 | - LICENSE: [ODC-BY](https://ogb.stanford.edu/docs/linkprop/) 27 | ``` 28 | @article{hu2022stanford, 29 | title={Open Graph Benchmark: Datasets for Machine Learning on Graphs}, 30 | author={Hu, Weihua and Fey, Matthias and Zitnik, Marinka and Dong, Yuxiao and Ren, Hongyu and Liu, Bowen and Catasta, Michele and Leskovec, Jure}, 31 | year={2021} 32 | } 33 | ``` 34 | 35 | ## Available Tasks 36 | ### [OGB](https://ogb.stanford.edu/docs/nodeprop/) 37 | - Task type: `NodeClassification` 38 | 39 | #### Citation 40 | ``` 41 | @inproceedings{ 42 | title={Distributed representationsof words and phrases and their compositionality}, 43 | author={Tomas Mikolov, Ilya Sutskever, Kai Chen, Greg S Corrado, and Jeff Dean}, 44 | booktitle={In Advances in Neural Information Processing Systems (NeurIPS)}, 45 | pages={3111=3119}, 46 | year={2013} 47 | } 48 | ``` 49 | 50 | ## Preprocessing 51 | The data files and task config file in GLI format are transformed from the OGB implementation. 52 | 53 | ### Requirements 54 | The preprocessing code requires the following package. 55 | ``` 56 | ogb >= 1.1.1 57 | numpy 58 | torch 59 | ``` 60 | -------------------------------------------------------------------------------- /datasets/ogbn-arxiv/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "ogbn-arxiv dataset.", 3 | "data": { 4 | "Node": { 5 | "NodeFeature": { 6 | "description": "numpy ndarray of shape (num_nodes, nodefeat_dim), where nodefeat_dim is the dimensionality of node features and i-th row represents the feature of i-th node. This can be None if no input node features are available.", 7 | "type": "int", 8 | "format": "Tensor", 9 | "file": "ogbn-arxiv.npz", 10 | "key": "node_feats" 11 | }, 12 | "NodeYear": { 13 | "description": "Year of the arxiv paper represented by the node", 14 | "type": "int", 15 | "format": "Tensor", 16 | "file": "ogbn-arxiv.npz", 17 | "key": "node_year" 18 | }, 19 | "NodeLabel": { 20 | "description": "Label of the arxiv paper represented by the node", 21 | "type": "int", 22 | "format": "Tensor", 23 | "file": "ogbn-arxiv.npz", 24 | "key": "node_label" 25 | } 26 | }, 27 | "Edge": { 28 | "_Edge": { 29 | "file": "ogbn-arxiv.npz", 30 | "key": "edge" 31 | }, 32 | "ID": { 33 | "description": "ID of the edge, range from 0 to 1166243", 34 | "file": "ogbn-arxiv.npz", 35 | "key": "edge_id" 36 | } 37 | }, 38 | "Graph": { 39 | "_NodeList": { 40 | "file": "ogbn-arxiv.npz", 41 | "key": "node_list" 42 | }, 43 | "_EdgeList": { 44 | "file": "ogbn-arxiv.npz", 45 | "key": "edge_list" 46 | } 47 | } 48 | }, 49 | "citation": "@inproceedings{\ntitle={Microsoft academic graph: When experts are not enough. },\nauthor={Kuansan Wang, Zhihong Shen, Chiyuan Huang, Chieh-Han Wu, Yuxiao Dong, and Anshul Kanakia.},\nbooktitle={Quantitative Science Studies},\npages={396=413},\nyear={2020}\n}", 50 | "is_heterogeneous": false 51 | } 52 | -------------------------------------------------------------------------------- /datasets/ogbn-arxiv/task_node_classification_1.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "The task is to predict the 40 subject areas of arXiv CS papers, e.g., cs.AI, cs.LG, and cs.OS, which are manually determined (i.e., labeled) by the paper’s authors and arXiv moderators", 3 | "type": "NodeClassification", 4 | "feature": [ 5 | "Node/NodeFeature", 6 | "Edge/EdgeFeature" 7 | ], 8 | "target": "Node/NodeLabel", 9 | "num_classes":40, 10 | "train_set": { 11 | "file": "ogbn-arxiv_task.npz", 12 | "key": "train" 13 | }, 14 | "val_set": { 15 | "file": "ogbn-arxiv_task.npz", 16 | "key": "val" 17 | }, 18 | "test_set": { 19 | "file": "ogbn-arxiv_task.npz", 20 | "key": "test" 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /datasets/ogbn-mag/task_node_classification_1.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "The ogbn-mag dataset is a heterogeneous network composed of a subset of the Microsoft Academic Graph (MAG) [1]. It contains four types of entities\u2014papers (736,389 nodes), authors (1,134,649 nodes), institutions (8,740 nodes), and fields of study (59,965 nodes)\u2014as well as four types of directed relations connecting two types of entities\u2014an author is \u201caffiliated with\u201d an institution, an author \u201cwrites\u201d a paper, a paper \u201ccites\u201d a paper, and a paper \u201chas a topic of\u201d a field of study. Similar to ogbn-mag, each paper is associated with a 128-dimensional word2vec feature vector, and all the other types of entities are not associated with input node features.", 3 | "type": "NodeClassification", 4 | "feature": [ 5 | "PaperNode/PaperNodeFeature", 6 | "PaperNode/PaperYear" 7 | ], 8 | "target": "PaperNode/PaperLabel", 9 | "num_classes": 349, 10 | "train_set": { 11 | "file": "ogbn-mag_task.npz", 12 | "key": "train" 13 | }, 14 | "val_set": { 15 | "file": "ogbn-mag_task.npz", 16 | "key": "val" 17 | }, 18 | "test_set": { 19 | "file": "ogbn-mag_task.npz", 20 | "key": "test" 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /datasets/ogbn-products/LICENSE: -------------------------------------------------------------------------------- 1 | LICENSE 2 | 3 | By accessing the Amazon Customer Reviews Library ("Reviews Library"), you agree that the Reviews Library is an Amazon Service subject to the Amazon.com Conditions of Use (https://www.amazon.com/gp/help/customer/display.html/ref=footer_cou?ie=UTF8&nodeId=508088) and you agree to be bound by them, with the following additional conditions: 4 | 5 | In addition to the license rights granted under the Conditions of Use, Amazon or its content providers grant you a limited, non-exclusive, non-transferable, non-sublicensable, revocable license to access and use the Reviews Library for purposes of academic research. You may not resell, republish, or make any commercial use of the Reviews Library or its contents, including use of the Reviews Library for commercial research, such as research related to a funding or consultancy contract, internship, or other relationship in which the results are provided for a fee or delivered to a for-profit organization. You may not (a) link or associate content in the Reviews Library with any personal information (including Amazon customer accounts), or (b) attempt to determine the identity of the author of any content in the Reviews Library. If you violate any of the foregoing conditions, your license to access and use the Reviews Library will automatically terminate without prejudice to any of the other rights or remedies Amazon may have. 6 | -------------------------------------------------------------------------------- /datasets/ogbn-products/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "ogbn-products dataset.", 3 | "data": { 4 | "Node": { 5 | "NodeFeature": { 6 | "description": "numpy ndarray of shape (num_nodes, nodefeat_dim), where nodefeat_dim is the dimensionality of node features and i-th row represents the feature of i-th node. This can be None if no input node features are available.", 7 | "type": "int", 8 | "format": "Tensor", 9 | "file": "ogbn-products.npz", 10 | "key": "node_feats" 11 | }, 12 | "NodeLabel": { 13 | "description": "47 Labels of each node", 14 | "type": "int", 15 | "format": "Tensor", 16 | "file": "ogbn-products.npz", 17 | "key": "node_label" 18 | } 19 | }, 20 | "Edge": { 21 | "_Edge": { 22 | "file": "ogbn-products.npz", 23 | "key": "edge" 24 | }, 25 | "ID": { 26 | "description": "ID of the edge, range from 0 to 1166243", 27 | "file": "ogbn-products.npz", 28 | "key": "edge_id" 29 | } 30 | }, 31 | "Graph": { 32 | "_NodeList": { 33 | "file": "ogbn-products.npz", 34 | "key": "node_list" 35 | }, 36 | "_EdgeList": { 37 | "file": "ogbn-products.npz", 38 | "key": "edge_list" 39 | } 40 | } 41 | }, 42 | "citation": "@Misc{Bhatia16,\nauthor = {Bhatia, K. and Dahiya, K. and Jain, H. and Kar, P. and Mittal, A. and Prabhu, Y. and Varma, M.},\ntitle = {The extreme classification repository: Multi-label datasets and code},\nurl = {http://manikvarma.org/downloads/XC/XMLRepository.html},\nyear = {2016}}", 43 | "is_heterogeneous": false 44 | } 45 | -------------------------------------------------------------------------------- /datasets/ogbn-products/task_node_classification_1.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "The task is to predict the category of a product in a multi-class classification setup, where the 47 top-level categories are used for target labels.", 3 | "type": "NodeClassification", 4 | "feature": [ 5 | "Node/NodeFeature", 6 | "Edge/EdgeFeature" 7 | ], 8 | "target": "Node/NodeLabel", 9 | "num_classes":47, 10 | "train_set": { 11 | "file": "ogbn-products_task.npz", 12 | "key": "train" 13 | }, 14 | "val_set": { 15 | "file": "ogbn-products_task.npz", 16 | "key": "val" 17 | }, 18 | "test_set": { 19 | "file": "ogbn-products_task.npz", 20 | "key": "test" 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /datasets/ogbn-proteins/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "ogbn-proteins dataset.", 3 | "data": { 4 | "Node": { 5 | "NodeSpecies": { 6 | "description": "Species of the proteins represented by the node", 7 | "type": "int", 8 | "format": "Tensor", 9 | "file": "ogbn-proteins.npz", 10 | "key": "node_species" 11 | }, 12 | "NodeLabel": { 13 | "description": "Labels of the proteins represented by the node", 14 | "type": "int", 15 | "format": "Tensor", 16 | "file": "ogbn-proteins.npz", 17 | "key": "node_label" 18 | } 19 | }, 20 | "Edge": { 21 | "_Edge": { 22 | "file": "ogbn-proteins.npz", 23 | "key": "edge" 24 | }, 25 | "EdgeFeature": { 26 | "description": "Numpy ndarray of shape (num_edges, edgefeat_dim), where edgefeat_dim is the dimensionality of edge features and i-th row represents the feature of i-th edge. This can be None if no input edge features are available.", 27 | "type": "int", 28 | "format": "Tensor", 29 | "file": "ogbn-proteins.npz", 30 | "key": "edge_feats" 31 | }, 32 | "ID": { 33 | "description": "ID of the edge, range from 0 to 30387995", 34 | "file": "ogbn-proteins.npz", 35 | "key": "edge_id" 36 | } 37 | }, 38 | "Graph": { 39 | "_NodeList": { 40 | "file": "ogbn-proteins.npz", 41 | "key": "node_list" 42 | }, 43 | "_EdgeList": { 44 | "file": "ogbn-proteins.npz", 45 | "key": "edge_list" 46 | } 47 | } 48 | }, 49 | "citation": "@inproceedings{title={STRING v11: protein–protein association networks with increased coverage, supporting functional discovery in genome-wide experimental datasets.},\nauthor={Damian Szklarczyk, Annika L Gable, David Lyon, Alexander Junge, Stefan Wyder, Jaime Huerta- Cepas, Milan Simonovic, Nadezhda T Doncheva, John H Morris, Peer Bork, et al.},\nbooktitle={Nucleic Acids Research},\npages={607=613},\nyear={2029}}", 50 | "is_heterogeneous": false 51 | } 52 | -------------------------------------------------------------------------------- /datasets/ogbn-proteins/task_node_classification_1.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "The task is to predict the presence of protein functions in a multi-label binary classification setup, where there are 112 kinds of labels to predict in total. ", 3 | "type": "NodeClassification", 4 | "feature": [ 5 | "Node/NodeSpecies", 6 | "Edge/EdgeFeature" 7 | ], 8 | "target": "Node/NodeLabel", 9 | "num_classes":2, 10 | "train_set": { 11 | "file": "ogbn-proteins_task.npz", 12 | "key": "train" 13 | }, 14 | "val_set": { 15 | "file": "ogbn-proteins_task.npz", 16 | "key": "val" 17 | }, 18 | "test_set": { 19 | "file": "ogbn-proteins_task.npz", 20 | "key": "test" 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /datasets/penn94/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Graph-Learning-Benchmarks/gli/8f2065396d59e6b4aaa371e997c3a43f91448429/datasets/penn94/LICENSE -------------------------------------------------------------------------------- /datasets/penn94/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "Penn94 dataset.", 3 | "data": { 4 | "Node": { 5 | "NodeFeature": { 6 | "description": "Node features of Penn94 dataset.", 7 | "type": "float32", 8 | "format": "Tensor", 9 | "file": "penn94.npz", 10 | "key": "node_feats" 11 | }, 12 | "NodeLabel": { 13 | "description": "Node labels of Penn94 dataset, 1/0-valued vectors.", 14 | "type": "int64", 15 | "format": "Tensor", 16 | "file": "penn94.npz", 17 | "key": "node_class" 18 | } 19 | }, 20 | "Edge": { 21 | "_Edge": { 22 | "file": "penn94.npz", 23 | "key": "edge" 24 | } 25 | }, 26 | "Graph": { 27 | "_NodeList": { 28 | "file": "penn94.npz", 29 | "key": "node_list" 30 | }, 31 | "_EdgeList": { 32 | "file": "penn94.npz", 33 | "key": "edge_list" 34 | } 35 | } 36 | }, 37 | "citation": "@article{lim2021large,\ntitle={Large scale learning on non-homophilous graphs: New benchmarks and strong simple methods},\nauthor={Lim, Derek and Hohne, Felix and Li, Xiuyu and Huang, Sijia Linda and Gupta, Vaishnavi and Bhalerao, Omkar and Lim, Ser Nam},\njournal={Advances in Neural Information Processing Systems},\nvolume={34},\npages={20887--20902},\nyear={2021}\n}", 38 | "is_heterogeneous": false 39 | } 40 | -------------------------------------------------------------------------------- /datasets/penn94/task_node_classification_1.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "Node classification on Penn94 dataset.", 3 | "type": "NodeClassification", 4 | "feature": [ 5 | "Node/NodeFeature" 6 | ], 7 | "target": "Node/NodeLabel", 8 | "num_classes": 2, 9 | "train_ratio": 0.5, 10 | "val_ratio": 0.25, 11 | "test_ratio": 0.25, 12 | "num_samples": 41554 13 | } 14 | -------------------------------------------------------------------------------- /datasets/pokec/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Graph-Learning-Benchmarks/gli/8f2065396d59e6b4aaa371e997c3a43f91448429/datasets/pokec/LICENSE -------------------------------------------------------------------------------- /datasets/pokec/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "pokec dataset.", 3 | "data": { 4 | "Node": { 5 | "NodeFeature": { 6 | "description": "Node features of pokec dataset.", 7 | "type": "float32", 8 | "format": "Tensor", 9 | "file": "pokec.npz", 10 | "key": "node_feats" 11 | }, 12 | "NodeLabel": { 13 | "description": "Node labels of pokec dataset, 1/0-valued vectors.", 14 | "type": "int64", 15 | "format": "Tensor", 16 | "file": "pokec.npz", 17 | "key": "node_class" 18 | } 19 | }, 20 | "Edge": { 21 | "_Edge": { 22 | "file": "pokec.npz", 23 | "key": "edge" 24 | } 25 | }, 26 | "Graph": { 27 | "_NodeList": { 28 | "file": "pokec.npz", 29 | "key": "node_list" 30 | }, 31 | "_EdgeList": { 32 | "file": "pokec.npz", 33 | "key": "edge_list" 34 | } 35 | } 36 | }, 37 | "citation": "@article{lim2021large,\ntitle={Large scale learning on non-homophilous graphs: New benchmarks and strong simple methods},\nauthor={Lim, Derek and Hohne, Felix and Li, Xiuyu and Huang, Sijia Linda and Gupta, Vaishnavi and Bhalerao, Omkar and Lim, Ser Nam},\njournal={Advances in Neural Information Processing Systems},\nvolume={34},\npages={20887--20902},\nyear={2021}\n}", 38 | "is_heterogeneous": false 39 | } 40 | -------------------------------------------------------------------------------- /datasets/pokec/task_node_classification_1.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "Node classification on Pokec dataset.", 3 | "type": "NodeClassification", 4 | "feature": [ 5 | "Node/NodeFeature" 6 | ], 7 | "target": "Node/NodeLabel", 8 | "num_classes": 2, 9 | "train_ratio": 0.5, 10 | "val_ratio": 0.25, 11 | "test_ratio": 0.25, 12 | "num_samples": 1632803 13 | } 14 | -------------------------------------------------------------------------------- /datasets/pubmed/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Graph-Learning-Benchmarks/gli/8f2065396d59e6b4aaa371e997c3a43f91448429/datasets/pubmed/LICENSE -------------------------------------------------------------------------------- /datasets/pubmed/README.md: -------------------------------------------------------------------------------- 1 | # PUBMED 2 | 3 | ## Dataset Description 4 | 5 | The PUBMED dataset contains a citation network with with documents as nodes and citations as edges. Each node has bag-of-words features of the document and a class label represents the research area this document belongs to. 6 | 7 | Statistics: 8 | - Nodes: 19717 9 | - Edges: 88651 10 | - Number of Classes: 3 11 | 12 | #### Citation 13 | - Original Source 14 | + [Website](https://linqs.org/datasets/#pubmed-diabetes) 15 | + LICENSE: missing 16 | ``` 17 | @inproceedings{namata:mlg12, 18 | title = {Query-Driven Active Surveying for Collective Classification}, 19 | author = {Galileo Mark Namata and Ben London and Lise Getoor and Bert Huang}, 20 | booktitle = {International Workshop on Mining and Learning with Graphs (MLG)}, 21 | year = {2012}, 22 | _publisher = {MLG}, 23 | address = {Edinburgh, Scotland}, 24 | } 25 | ``` 26 | - Current Version 27 | + [Website](https://github.com/kimiyoung/planetoid) 28 | + LICENSE: [MIT](https://github.com/kimiyoung/planetoid/blob/master/LICENSE) 29 | ``` 30 | @inproceedings{yang2016revisiting, 31 | title={Revisiting semi-supervised learning with graph embeddings}, 32 | author={Yang, Zhilin and Cohen, William and Salakhudinov, Ruslan}, 33 | booktitle={International conference on machine learning}, 34 | pages={40--48}, 35 | year={2016}, 36 | organization={PMLR} 37 | } 38 | ``` 39 | 40 | ## Available Tasks 41 | 42 | ### Planetoid 43 | 44 | - Task type: `NodeClassification` 45 | 46 | This is a node classification task with fixed split from [planetoid](https://github.com/kimiyoung/planetoid). 47 | 48 | #### Citation 49 | 50 | ``` 51 | @inproceedings{yang2016revisiting, 52 | title={Revisiting semi-supervised learning with graph embeddings}, 53 | author={Yang, Zhilin and Cohen, William and Salakhudinov, Ruslan}, 54 | booktitle={International conference on machine learning}, 55 | pages={40--48}, 56 | year={2016}, 57 | organization={PMLR} 58 | } 59 | ``` 60 | 61 | ## Preprocessing 62 | 63 | The data files and task config file in GLI format are transformed from the [DGL](https://www.dgl.ai) implementation. Check `pubmed.ipynb` for the preprocessing. 64 | 65 | 66 | ### Requirements 67 | 68 | The preprocessing code requires the following packages. 69 | 70 | ``` 71 | scipy==1.7.1 72 | dgl-cuda11.3==0.7.2 73 | ``` 74 | -------------------------------------------------------------------------------- /datasets/pubmed/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "PUBMED dataset.", 3 | "data": { 4 | "Node": { 5 | "NodeFeature": { 6 | "description": "Node features of Pubmed dataset.", 7 | "type": "float", 8 | "format": "SparseTensor", 9 | "file": "pubmed_node_feats.sparse.npz" 10 | }, 11 | "NodeLabel": { 12 | "description": "Node labels of Pubmed dataset, int ranged from 1 to 3.", 13 | "type": "int", 14 | "format": "Tensor", 15 | "file": "pubmed.npz", 16 | "key": "node_class" 17 | } 18 | }, 19 | "Edge": { 20 | "_Edge": { 21 | "file": "pubmed.npz", 22 | "key": "edge" 23 | } 24 | }, 25 | "Graph": { 26 | "_NodeList": { 27 | "file": "pubmed.npz", 28 | "key": "node_list" 29 | }, 30 | "_EdgeList": { 31 | "file": "pubmed.npz", 32 | "key": "edge_list" 33 | } 34 | } 35 | }, 36 | "citation": "@inproceedings{yang2016revisiting,\ntitle={Revisiting semi-supervised learning with graph embeddings},\nauthor={Yang, Zhilin and Cohen, William and Salakhudinov, Ruslan},\nbooktitle={International conference on machine learning},\npages={40--48},\nyear={2016},\norganization={PMLR}\n}", 37 | "is_heterogeneous": false 38 | } 39 | -------------------------------------------------------------------------------- /datasets/pubmed/task_node_classification_1.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "Node classification on PUBMED dataset. Planetoid split.", 3 | "type": "NodeClassification", 4 | "feature": [ 5 | "Node/NodeFeature" 6 | ], 7 | "target": "Node/NodeLabel", 8 | "num_classes": 6, 9 | "train_set": { 10 | "file": "pubmed_task.npz", 11 | "key": "train" 12 | }, 13 | "val_set": { 14 | "file": "pubmed_task.npz", 15 | "key": "val" 16 | }, 17 | "test_set": { 18 | "file": "pubmed_task.npz", 19 | "key": "test" 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /datasets/reddit/README.md: -------------------------------------------------------------------------------- 1 | # Reddit 2 | 3 | ## Dataset Description 4 | 5 | The Reddit dataset is a graph dataset from Reddit posts made in the month of September, 2014. The node label in this case is the community that a post belongs to. 50 large communities have been sampled to build a post-to-post graph, connecting posts if the same user comments on both. 6 | 7 | Statistics: 8 | - Nodes: 232965 9 | - Edges: 114615892 10 | - Number of Classes: 41 11 | 12 | #### Citation 13 | - Original Source 14 | + [Website](http://snap.stanford.edu/graphsage/) 15 | + LICENSE: [MIT](https://github.com/williamleif/GraphSAGE/blob/master/LICENSE.txt) 16 | ``` 17 | @article{hamilton2017inductive, 18 | title={Inductive representation learning on large graphs}, 19 | author={Hamilton, Will and Ying, Zhitao and Leskovec, Jure}, 20 | journal={Advances in neural information processing systems}, 21 | volume={30}, 22 | year={2017} 23 | } 24 | ``` 25 | 26 | - Current Version 27 | + [Website](http://snap.stanford.edu/graphsage/) 28 | + LICENSE: [MIT](https://github.com/williamleif/GraphSAGE/blob/master/LICENSE.txt) 29 | ``` 30 | @article{hamilton2017inductive, 31 | title={Inductive representation learning on large graphs}, 32 | author={Hamilton, Will and Ying, Zhitao and Leskovec, Jure}, 33 | journal={Advances in neural information processing systems}, 34 | volume={30}, 35 | year={2017} 36 | } 37 | ``` 38 | 39 | - Previous Version 40 | 41 | 42 | 43 | 44 | ## Available Tasks 45 | 46 | - Task type: `NodeClassification` 47 | 48 | 49 | #### Citation 50 | 51 | ``` 52 | @article{hamilton2017inductive, 53 | title={Inductive representation learning on large graphs}, 54 | author={Hamilton, Will and Ying, Zhitao and Leskovec, Jure}, 55 | journal={Advances in neural information processing systems}, 56 | volume={30}, 57 | year={2017} 58 | } 59 | ``` 60 | 61 | ## Preprocessing 62 | 63 | The data files and task config file in GLI format are transformed from the [DGL](https://www.dgl.ai) implementation (check docs for [Reddit Dataset](https://docs.dgl.ai/en/0.9.x/generated/dgl.data.RedditDataset.html?highlight=reddit#dgl.data.RedditDataset)). Check `reddit.ipynb` for the preprocessing. 64 | 65 | 66 | ### Requirements 67 | 68 | The preprocessing code requires the following packages. 69 | 70 | ``` 71 | numpy 72 | torch 73 | dgl==1.1.2 74 | ``` 75 | -------------------------------------------------------------------------------- /datasets/reddit/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "Reddit dataset.", 3 | "data": { 4 | "Node": { 5 | "NodeFeature": { 6 | "description": "Node features of Reddit dataset, incorporating pretrained GloVe CommonCrawl word embeddings.", 7 | "type": "float", 8 | "format": "Tensor", 9 | "file": "reddit__graph__bfb7717c1f9b72842adc4af257467122.npz", 10 | "key": "Node_NodeFeature" 11 | }, 12 | "NodeLabel": { 13 | "description": "Node labels of Reddit dataset, int ranged from 0 to 40.", 14 | "type": "int", 15 | "format": "Tensor", 16 | "file": "reddit__graph__bfb7717c1f9b72842adc4af257467122.npz", 17 | "key": "Node_NodeLabel" 18 | } 19 | }, 20 | "Edge": { 21 | "_Edge": { 22 | "file": "reddit__graph__bfb7717c1f9b72842adc4af257467122.npz", 23 | "key": "Edge_Edge" 24 | } 25 | }, 26 | "Graph": { 27 | "_NodeList": { 28 | "file": "reddit__graph__Graph_NodeList__e4f77fbbcc4906feaf9f51e8d2a6da98.sparse.npz" 29 | } 30 | } 31 | }, 32 | "citation": "@article{hamilton2017inductive,\ntitle={Inductive representation learning on large graphs},\nauthor={Hamilton, Will and Ying, Zhitao and Leskovec, Jure},\njournal={Advances in neural information processing systems},\nvolume={30},\nyear={2017}}", 33 | "is_heterogeneous": false 34 | } -------------------------------------------------------------------------------- /datasets/reddit/task_node_classification_1.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "Node classification on Reddit dataset.", 3 | "type": "NodeClassification", 4 | "feature": [ 5 | "Node/NodeFeature" 6 | ], 7 | "target": "Node/NodeLabel", 8 | "num_classes": 41, 9 | "train_set": { 10 | "file": "reddit__task_node_classification_1__f966ab3b42876ca118130cd1ea52237f.npz", 11 | "key": "train_set" 12 | }, 13 | "val_set": { 14 | "file": "reddit__task_node_classification_1__f966ab3b42876ca118130cd1ea52237f.npz", 15 | "key": "val_set" 16 | }, 17 | "test_set": { 18 | "file": "reddit__task_node_classification_1__f966ab3b42876ca118130cd1ea52237f.npz", 19 | "key": "test_set" 20 | } 21 | } -------------------------------------------------------------------------------- /datasets/snap-patents/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Graph-Learning-Benchmarks/gli/8f2065396d59e6b4aaa371e997c3a43f91448429/datasets/snap-patents/LICENSE -------------------------------------------------------------------------------- /datasets/snap-patents/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "snap-patents dataset.", 3 | "data": { 4 | "Node": { 5 | "NodeFeature": { 6 | "description": "Node features of snap-patents dataset.", 7 | "type": "float32", 8 | "format": "Tensor", 9 | "file": "snap_patents.npz", 10 | "key": "node_feats" 11 | }, 12 | "NodeLabel": { 13 | "description": "Node labels of snap-patents dataset, int ranged from 0 to 4.", 14 | "type": "int64", 15 | "format": "Tensor", 16 | "file": "snap_patents.npz", 17 | "key": "node_class" 18 | } 19 | }, 20 | "Edge": { 21 | "_Edge": { 22 | "file": "snap_patents.npz", 23 | "key": "edge" 24 | } 25 | }, 26 | "Graph": { 27 | "_NodeList": { 28 | "file": "snap_patents.npz", 29 | "key": "node_list" 30 | }, 31 | "_EdgeList": { 32 | "file": "snap_patents.npz", 33 | "key": "edge_list" 34 | } 35 | } 36 | }, 37 | "citation": "@article{lim2021large,\ntitle={Large scale learning on non-homophilous graphs: New benchmarks and strong simple methods},\nauthor={Lim, Derek and Hohne, Felix and Li, Xiuyu and Huang, Sijia Linda and Gupta, Vaishnavi and Bhalerao, Omkar and Lim, Ser Nam},\njournal={Advances in Neural Information Processing Systems},\nvolume={34},\npages={20887--20902},\nyear={2021}\n}", 38 | "is_heterogeneous": false 39 | } 40 | -------------------------------------------------------------------------------- /datasets/snap-patents/task_node_classification_1.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "Node classification on snap-patents dataset.", 3 | "type": "NodeClassification", 4 | "feature": [ 5 | "Node/NodeFeature" 6 | ], 7 | "target": "Node/NodeLabel", 8 | "num_classes": 5, 9 | "train_ratio": 0.5, 10 | "val_ratio": 0.25, 11 | "test_ratio": 0.25, 12 | "num_samples": 2923922 13 | } 14 | -------------------------------------------------------------------------------- /datasets/squirrel/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Graph-Learning-Benchmarks/gli/8f2065396d59e6b4aaa371e997c3a43f91448429/datasets/squirrel/LICENSE -------------------------------------------------------------------------------- /datasets/squirrel/README.md: -------------------------------------------------------------------------------- 1 | # Squirrel 2 | 3 | ## Dataset Description 4 | 5 | Squirrel is a page-page network on specific topics in Wikipedia. In this dataset, nodes represent web pages and edges are mutual links between pages. And node features correspond to several informative nouns in the Wikipedia pages. The nodes are classified into five categories. 6 | 7 | Statistics: 8 | - Nodes: 5201 9 | - Edges: 217073 10 | - Number of Classes: 5 11 | 12 | #### Citation 13 | - Original Source 14 | + [Website](https://github.com/benedekrozemberczki/datasets#wikipedia-article-networks) 15 | + LICENSE: [MIT](https://github.com/benedekrozemberczki/datasets/blob/master/LICENSE) 16 | ``` 17 | @article{rozemberczki2021multi, 18 | title={Multi-scale attributed node embedding}, 19 | author={Rozemberczki, Benedek and Allen, Carl and Sarkar, Rik}, 20 | journal={Journal of Complex Networks}, 21 | volume={9}, 22 | number={2}, 23 | pages={cnab014}, 24 | year={2021}, 25 | publisher={Oxford University Press} 26 | } 27 | ``` 28 | - Current Version 29 | + [Website](https://github.com/graphdml-uiuc-jlu/geom-gcn) 30 | + LICENSE: missing 31 | ``` 32 | @article{pei2020geom, 33 | title={Geom-gcn: Geometric graph convolutional networks}, 34 | author={Pei, Hongbin and Wei, Bingzhe and Chang, Kevin Chen-Chuan and Lei, Yu and Yang, Bo}, 35 | journal={arXiv preprint arXiv:2002.05287}, 36 | year={2020} 37 | } 38 | ``` 39 | 40 | ## Available Tasks 41 | 42 | ### MUSAE 43 | 44 | - Task type: `NodeClassification` 45 | 46 | This is a node classification task with fixed split from [MUSAE](https://github.com/benedekrozemberczki/MUSAE). 47 | 48 | #### Citation 49 | 50 | ``` 51 | @article{pei2020geom, 52 | title={Geom-gcn: Geometric graph convolutional networks}, 53 | author={Pei, Hongbin and Wei, Bingzhe and Chang, Kevin Chen-Chuan and Lei, Yu and Yang, Bo}, 54 | journal={arXiv preprint arXiv:2002.05287}, 55 | year={2020} 56 | } 57 | ``` 58 | 59 | ## Preprocessing 60 | The data files and task config file in GLI format are transformed from the [torch_geometric.datasets](https://pytorch-geometric.readthedocs.io/en/latest/modules/datasets.html). Check `squirrel.ipynb` for the preprocessing. 61 | 62 | 63 | ### Requirements 64 | 65 | The preprocessing code requires the following packages. 66 | 67 | ``` 68 | numpy==1.22.3 69 | scipy==1.7.3 70 | torch==1.11.0 71 | torch_geometric==2.0.4 72 | ``` 73 | -------------------------------------------------------------------------------- /datasets/squirrel/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "Squirrel dataset.", 3 | "data": { 4 | "Node": { 5 | "NodeFeature": { 6 | "description": "Node features of Squirrel dataset, 1/0-valued vectors.", 7 | "type": "int", 8 | "format": "SparseTensor", 9 | "file": "squirrel_node_feats.sparse.npz" 10 | }, 11 | "NodeLabel": { 12 | "description": "Node labels of Squirrel dataset, int ranged from 0 to 4.", 13 | "type": "int", 14 | "format": "Tensor", 15 | "file": "squirrel.npz", 16 | "key": "node_class" 17 | } 18 | }, 19 | "Edge": { 20 | "_Edge": { 21 | "file": "squirrel.npz", 22 | "key": "edge" 23 | } 24 | }, 25 | "Graph": { 26 | "_NodeList": { 27 | "file": "squirrel.npz", 28 | "key": "node_list" 29 | }, 30 | "_EdgeList": { 31 | "file": "squirrel.npz", 32 | "key": "edge_list" 33 | } 34 | } 35 | }, 36 | "citation": "@article{rozemberczki2021multi,\ntitle={Multi-scale attributed node embedding},\nauthor={Rozemberczki, Benedek and Allen, Carl and Sarkar, Rik},\njournal={Journal of Complex Networks},\nvolume={9},\nnumber={2},\npages={cnab014},\nyear={2021},\npublisher={Oxford University Press}\n}", 37 | "is_heterogeneous": false 38 | } 39 | -------------------------------------------------------------------------------- /datasets/squirrel/task_node_classification_1.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "Node classification on Squirrel dataset. The split is introduced in the paper \"Multi-scale Attributed Node Embedding\", while the classification categories are introduced in the paper \"Geom-GCN: Geometric Graph Convolutional Networks\".", 3 | "type": "NodeClassification", 4 | "feature": [ 5 | "Node/NodeFeature" 6 | ], 7 | "target": "Node/NodeLabel", 8 | "num_classes": 5, 9 | "num_splits": 10, 10 | "train_set": { 11 | "file": "squirrel_task.npz", 12 | "key": "train_FOLD" 13 | }, 14 | "val_set": { 15 | "file": "squirrel_task.npz", 16 | "key": "val_FOLD" 17 | }, 18 | "test_set": { 19 | "file": "squirrel_task.npz", 20 | "key": "test_FOLD" 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /datasets/texas/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Graph-Learning-Benchmarks/gli/8f2065396d59e6b4aaa371e997c3a43f91448429/datasets/texas/LICENSE -------------------------------------------------------------------------------- /datasets/texas/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "Texas dataset.", 3 | "data": { 4 | "Node": { 5 | "NodeFeature": { 6 | "description": "Node features of Texas dataset, 1/0-valued vectors.", 7 | "type": "int", 8 | "format": "SparseTensor", 9 | "file": "texas_node_feats.sparse.npz" 10 | }, 11 | "NodeLabel": { 12 | "description": "Node labels of texas dataset, int ranged from 0 to 4.", 13 | "type": "int", 14 | "format": "Tensor", 15 | "file": "texas.npz", 16 | "key": "node_class" 17 | } 18 | }, 19 | "Edge": { 20 | "_Edge": { 21 | "file": "texas.npz", 22 | "key": "edge" 23 | } 24 | }, 25 | "Graph": { 26 | "_NodeList": { 27 | "file": "texas.npz", 28 | "key": "node_list" 29 | }, 30 | "_EdgeList": { 31 | "file": "texas.npz", 32 | "key": "edge_list" 33 | } 34 | } 35 | }, 36 | "citation": "@article{garcia2016using,\ntitle={Using fuzzy logic to leverage HTML markup for web page representation},\nauthor={Garcia-Plaza, Alberto P and Fresno, Victor and Unanue, Raquel Martinez and Zubiaga, Arkaitz},\njournal={IEEE Transactions on Fuzzy Systems},\nvolume={25},\nnumber={4},\npages={919--933},\nyear={2016},\npublisher={IEEE}\n}", 37 | "is_heterogeneous": false 38 | } 39 | -------------------------------------------------------------------------------- /datasets/texas/task_node_classification_1.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "Node classification on Texas dataset. Webkb split.", 3 | "type": "NodeClassification", 4 | "feature": [ 5 | "Node/NodeFeature" 6 | ], 7 | "target": "Node/NodeLabel", 8 | "num_classes": 5, 9 | "num_splits": 10, 10 | "train_set": { 11 | "file": "texas_task.npz", 12 | "key": "train_FOLD" 13 | }, 14 | "val_set": { 15 | "file": "texas_task.npz", 16 | "key": "val_FOLD" 17 | }, 18 | "test_set": { 19 | "file": "texas_task.npz", 20 | "key": "test_FOLD" 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /datasets/twitch-gamers/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Graph-Learning-Benchmarks/gli/8f2065396d59e6b4aaa371e997c3a43f91448429/datasets/twitch-gamers/LICENSE -------------------------------------------------------------------------------- /datasets/twitch-gamers/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "twitch-gamers dataset.", 3 | "data": { 4 | "Node": { 5 | "NodeFeature": { 6 | "description": "Node features of twitch-gamers dataset.", 7 | "type": "float32", 8 | "format": "Tensor", 9 | "file": "twitch_gamers.npz", 10 | "key": "node_feats" 11 | }, 12 | "NodeLabel": { 13 | "description": "Node labels of twitch-gamers dataset, 1/0-valued vectors.", 14 | "type": "int64", 15 | "format": "Tensor", 16 | "file": "twitch_gamers.npz", 17 | "key": "node_class" 18 | } 19 | }, 20 | "Edge": { 21 | "_Edge": { 22 | "file": "twitch_gamers.npz", 23 | "key": "edge" 24 | } 25 | }, 26 | "Graph": { 27 | "_NodeList": { 28 | "file": "twitch_gamers.npz", 29 | "key": "node_list" 30 | }, 31 | "_EdgeList": { 32 | "file": "twitch_gamers.npz", 33 | "key": "edge_list" 34 | } 35 | } 36 | }, 37 | "citation": "@article{lim2021large,\ntitle={Large scale learning on non-homophilous graphs: New benchmarks and strong simple methods},\nauthor={Lim, Derek and Hohne, Felix and Li, Xiuyu and Huang, Sijia Linda and Gupta, Vaishnavi and Bhalerao, Omkar and Lim, Ser Nam},\njournal={Advances in Neural Information Processing Systems},\nvolume={34},\npages={20887--20902},\nyear={2021}\n}", 38 | "is_heterogeneous": false 39 | } 40 | -------------------------------------------------------------------------------- /datasets/twitch-gamers/task_node_classification_1.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "Node classification on twitch-gamers dataset.", 3 | "type": "NodeClassification", 4 | "feature": [ 5 | "Node/NodeFeature" 6 | ], 7 | "target": "Node/NodeLabel", 8 | "num_classes": 2, 9 | "train_ratio": 0.5, 10 | "val_ratio": 0.25, 11 | "test_ratio": 0.25, 12 | "num_samples": 168114 13 | } 14 | -------------------------------------------------------------------------------- /datasets/wiki/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Graph-Learning-Benchmarks/gli/8f2065396d59e6b4aaa371e997c3a43f91448429/datasets/wiki/LICENSE -------------------------------------------------------------------------------- /datasets/wiki/README.md: -------------------------------------------------------------------------------- 1 | # wiki 2 | 3 | ## Dataset Description 4 | Wiki is a dataset of Wikipedia articles, where nodes represent pages and edges represent links between them. This dataset is collected by Lim[1](#myfootnote1). Node features are constructed using averaged title and abstract GloVe embeddings. Labels represent total page views over 60 days, which are partitioned into quintiles to make five classes. 5 | 6 | Statistics: 7 | - Nodes: 1925342 8 | - Edges: 303434860 9 | - Number of Classes: 5 10 | 11 | [1]: Lim, Derek, Felix Hohne, Xiuyu Li, Sijia Linda Huang, Vaishnavi Gupta, Omkar Bhalerao, and Ser Nam Lim. "Large scale learning on non-homophilous graphs: New benchmarks and strong simple methods." Advances in Neural Information Processing Systems 34 (2021): 20887-20902. 12 | 13 | 14 | #### Citation 15 | - Original Source 16 | 17 | - [Website](https://github.com/CUAI/Non-Homophily-Large-Scale) 18 | - LICENSE: missing 19 | ``` 20 | @article{lim2021large, 21 | title={Large scale learning on non-homophilous graphs: New benchmarks and strong simple methods}, 22 | author={Lim, Derek and Hohne, Felix and Li, Xiuyu and Huang, Sijia Linda and Gupta, Vaishnavi and Bhalerao, Omkar and Lim, Ser Nam}, 23 | journal={Advances in Neural Information Processing Systems}, 24 | volume={34}, 25 | pages={20887--20902}, 26 | year={2021} 27 | } 28 | ``` 29 | ## Available Tasks 30 | 31 | ### wiki 32 | 33 | - Task type: `NodeClassification` 34 | 35 | 36 | #### Citation 37 | 38 | ``` 39 | @article{lim2021large, 40 | title={Large scale learning on non-homophilous graphs: New benchmarks and strong simple methods}, 41 | author={Lim, Derek and Hohne, Felix and Li, Xiuyu and Huang, Sijia Linda and Gupta, Vaishnavi and Bhalerao, Omkar and Lim, Ser Nam}, 42 | journal={Advances in Neural Information Processing Systems}, 43 | volume={34}, 44 | pages={20887--20902}, 45 | year={2021} 46 | } 47 | ``` 48 | 49 | ## Preprocessing 50 | The data file in GLI format is transformed from the [CUAI](https://github.com/CUAI/Non-Homophily-Large-Scale). Check [Non-homo-datasets](https://github.com/GreatSnoopyMe/Non-homo-datasets) for the preprocessing. 51 | 52 | 53 | ### Requirements 54 | 55 | The preprocessing code requires the following packages. 56 | 57 | ``` 58 | dataset==1.5.2 59 | numpy==1.22.3 60 | scipy==1.7.3 61 | torch==1.11.0 62 | ``` 63 | -------------------------------------------------------------------------------- /datasets/wiki/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "wiki dataset.", 3 | "data": { 4 | "Node": { 5 | "NodeFeature": { 6 | "description": "Node features of wiki dataset.", 7 | "type": "float32", 8 | "format": "Tensor", 9 | "file": "wiki.npz", 10 | "key": "node_feats" 11 | }, 12 | "NodeLabel": { 13 | "description": "Node labels of wiki dataset, int ranged from 0 to 4.", 14 | "type": "int64", 15 | "format": "Tensor", 16 | "file": "wiki.npz", 17 | "key": "node_class" 18 | } 19 | }, 20 | "Edge": { 21 | "_Edge": { 22 | "file": "wiki.npz", 23 | "key": "edge" 24 | } 25 | }, 26 | "Graph": { 27 | "_NodeList": { 28 | "file": "wiki.npz", 29 | "key": "node_list" 30 | }, 31 | "_EdgeList": { 32 | "file": "wiki.npz", 33 | "key": "edge_list" 34 | } 35 | } 36 | }, 37 | "citation": "@article{lim2021large,\ntitle={Large scale learning on non-homophilous graphs: New benchmarks and strong simple methods},\nauthor={Lim, Derek and Hohne, Felix and Li, Xiuyu and Huang, Sijia Linda and Gupta, Vaishnavi and Bhalerao, Omkar and Lim, Ser Nam},\njournal={Advances in Neural Information Processing Systems},\nvolume={34},\npages={20887--20902},\nyear={2021}\n}", 38 | "is_heterogeneous": false 39 | } 40 | -------------------------------------------------------------------------------- /datasets/wiki/task_node_classification_1.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "Node classification on wiki dataset.", 3 | "type": "NodeClassification", 4 | "feature": [ 5 | "Node/NodeFeature" 6 | ], 7 | "target": "Node/NodeLabel", 8 | "num_classes": 5, 9 | "train_ratio": 0.5, 10 | "val_ratio": 0.25, 11 | "test_ratio": 0.25, 12 | "num_samples": 1925342 13 | } 14 | -------------------------------------------------------------------------------- /datasets/wisconsin/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Graph-Learning-Benchmarks/gli/8f2065396d59e6b4aaa371e997c3a43f91448429/datasets/wisconsin/LICENSE -------------------------------------------------------------------------------- /datasets/wisconsin/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "Wisconsin dataset.", 3 | "data": { 4 | "Node": { 5 | "NodeFeature": { 6 | "description": "Node features of Wisconsin dataset, 1/0-valued vectors.", 7 | "type": "int", 8 | "format": "SparseTensor", 9 | "file": "wisconsin_node_feats.sparse.npz" 10 | }, 11 | "NodeLabel": { 12 | "description": "Node labels of Wisconsin dataset, int ranged from 0 to 4.", 13 | "type": "int", 14 | "format": "Tensor", 15 | "file": "wisconsin.npz", 16 | "key": "node_class" 17 | } 18 | }, 19 | "Edge": { 20 | "_Edge": { 21 | "file": "wisconsin.npz", 22 | "key": "edge" 23 | } 24 | }, 25 | "Graph": { 26 | "_NodeList": { 27 | "file": "wisconsin.npz", 28 | "key": "node_list" 29 | }, 30 | "_EdgeList": { 31 | "file": "wisconsin.npz", 32 | "key": "edge_list" 33 | } 34 | } 35 | }, 36 | "citation": "@article{garcia2016using,\ntitle={Using fuzzy logic to leverage HTML markup for web page representation},\nauthor={Garcia-Plaza, Alberto P and Fresno, Victor and Unanue, Raquel Martinez and Zubiaga, Arkaitz},\njournal={IEEE Transactions on Fuzzy Systems},\nvolume={25},\nnumber={4},\npages={919--933},\nyear={2016},\npublisher={IEEE}\n}", 37 | "is_heterogeneous": false 38 | } 39 | -------------------------------------------------------------------------------- /datasets/wisconsin/task_node_classification_1.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "Node classification on Wisconsin dataset. Webkb split.", 3 | "type": "NodeClassification", 4 | "feature": [ 5 | "Node/NodeFeature" 6 | ], 7 | "target": "Node/NodeLabel", 8 | "num_classes": 5, 9 | "num_splits": 10, 10 | "train_set": { 11 | "file": "wisconsin_task.npz", 12 | "key": "train_FOLD" 13 | }, 14 | "val_set": { 15 | "file": "wisconsin_task.npz", 16 | "key": "val_FOLD" 17 | }, 18 | "test_set": { 19 | "file": "wisconsin_task.npz", 20 | "key": "test_FOLD" 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /docs/.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # .readthedocs.yaml 2 | # Read the Docs configuration file 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 4 | 5 | # Required 6 | version: 2 7 | 8 | # Set the version of Python and other tools you might need 9 | build: 10 | os: ubuntu-22.04 11 | tools: 12 | python: "3.11" 13 | 14 | # Build documentation in the docs/ directory with Sphinx 15 | sphinx: 16 | configuration: docs/source/conf.py 17 | 18 | # We recommend specifying your dependencies to enable reproducible builds: 19 | # https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html 20 | python: 21 | install: 22 | - requirements: docs/requirements.txt 23 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | 13 | %SPHINXBUILD% >NUL 2>NUL 14 | if errorlevel 9009 ( 15 | echo. 16 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 17 | echo.installed, then set the SPHINXBUILD environment variable to point 18 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 19 | echo.may add the Sphinx directory to PATH. 20 | echo. 21 | echo.If you don't have Sphinx installed, grab it from 22 | echo.https://www.sphinx-doc.org/ 23 | exit /b 1 24 | ) 25 | 26 | if "%1" == "" goto help 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx-rtd-theme 2 | sphinx-copybutton 3 | numpy>=1.19 4 | scipy>=1.5 5 | torch>=1.10 6 | dgl>=0.6 7 | -------------------------------------------------------------------------------- /docs/source/_templates/classtemplate.rst: -------------------------------------------------------------------------------- 1 | 2 | .. role:: hidden 3 | :class: hidden-section 4 | 5 | .. currentmodule:: {{ module }} 6 | 7 | 8 | {{ name | underline}} 9 | 10 | .. autoclass:: {{ name }} 11 | :show-inheritance: 12 | :members: 13 | 14 | .. automethod:: __init__ -------------------------------------------------------------------------------- /docs/source/_templates/functemplate.rst: -------------------------------------------------------------------------------- 1 | 2 | .. role:: hidden 3 | :class: hidden-section 4 | 5 | .. currentmodule:: {{ module }} 6 | 7 | 8 | {{ name | underline}} 9 | 10 | .. autofunction:: {{ name }} -------------------------------------------------------------------------------- /docs/source/api/dataset.rst: -------------------------------------------------------------------------------- 1 | .. _dataset: 2 | 3 | gli.dataset 4 | =========== 5 | 6 | .. currentmodule:: gli.dataset 7 | .. automodule:: gli.dataset 8 | -------------------------------------------------------------------------------- /docs/source/api/gli.rst: -------------------------------------------------------------------------------- 1 | .. _dataloading: 2 | 3 | gli.dataloading 4 | =============== 5 | 6 | .. currentmodule:: gli.dataloading 7 | .. automodule:: gli.dataloading 8 | 9 | Utility Function 10 | ---------------- 11 | 12 | .. autosummary:: 13 | :toctree: ../generated/ 14 | :nosignatures: 15 | :template: functemplate.rst 16 | 17 | get_gli_dataset 18 | get_gli_graph 19 | get_gli_task 20 | combine_graph_and_task -------------------------------------------------------------------------------- /docs/source/api/graph.rst: -------------------------------------------------------------------------------- 1 | .. _graph: 2 | 3 | gli.graph 4 | ========= 5 | 6 | .. currentmodule:: gli.graph 7 | .. automodule:: gli.graph 8 | 9 | Utility functions 10 | ----------------- 11 | 12 | .. autosummary:: 13 | :toctree: ../generated/ 14 | :nosignatures: 15 | :template: functemplate.rst 16 | 17 | read_gli_graph -------------------------------------------------------------------------------- /docs/source/api/io.rst: -------------------------------------------------------------------------------- 1 | .. _io: 2 | 3 | gli.io 4 | ====== 5 | 6 | .. currentmodule:: gli.io 7 | .. automodule:: gli.io 8 | 9 | Attribute Base Class 10 | -------------------- 11 | .. autosummary:: 12 | :toctree: ../generated/ 13 | :nosignatures: 14 | :template: classtemplate.rst 15 | 16 | Attribute 17 | 18 | Utility functions 19 | ----------------- 20 | 21 | .. autosummary:: 22 | :toctree: ../generated/ 23 | :nosignatures: 24 | :template: functemplate.rst 25 | 26 | save_homograph 27 | save_heterograph 28 | save_task_node_regression 29 | save_task_node_classification -------------------------------------------------------------------------------- /docs/source/api/task.rst: -------------------------------------------------------------------------------- 1 | .. _task: 2 | 3 | gli.task 4 | ======== 5 | 6 | .. currentmodule:: gli.task 7 | .. automodule:: gli.task 8 | 9 | Base Class 10 | ---------- 11 | 12 | .. autosummary:: 13 | :toctree: ../generated/ 14 | :nosignatures: 15 | :template: classtemplate.rst 16 | 17 | GLITask 18 | 19 | Utility Function 20 | ---------------- 21 | 22 | .. autosummary:: 23 | :toctree: ../generated/ 24 | :nosignatures: 25 | :template: functemplate.rst 26 | 27 | read_gli_task 28 | 29 | Available Tasks 30 | --------------- 31 | 32 | .. autosummary:: 33 | :toctree: ../generated/ 34 | :nosignatures: 35 | :template: classtemplate.rst 36 | 37 | NodeClassificationTask 38 | NodeRegressionTask 39 | GraphClassificationTask 40 | GraphRegressionTask 41 | LinkPredictionTask 42 | TimeDependentLinkPredictionTask 43 | KGEntityPredictionTask 44 | KGRelationPredictionTask -------------------------------------------------------------------------------- /docs/source/api/utils.rst: -------------------------------------------------------------------------------- 1 | .. _utils: 2 | 3 | gli.utils 4 | ========= 5 | 6 | .. currentmodule:: gli.utils 7 | .. automodule:: gli.utils -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | """Configuration file for the Sphinx documentation builder. 2 | 3 | For the full list of built-in configuration values, see the documentation: 4 | https://www.sphinx-doc.org/en/master/usage/configuration.html 5 | """ 6 | # import sphinx_rtd_theme 7 | import os 8 | import sys 9 | 10 | sys.path.insert(0, os.path.abspath('../..')) 11 | # -- Project information ----------------------------------------------------- 12 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information 13 | 14 | project = 'GLI' 15 | copyright = '2022, GLI Team' # pylint: disable=redefined-builtin 16 | author = 'GLI Team' 17 | release = '0.1' 18 | 19 | # -- General configuration --------------------------------------------------- 20 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration 21 | 22 | extensions = [ 23 | 'sphinx.ext.autodoc', 24 | 'sphinx.ext.autosummary', 25 | 'sphinx.ext.napoleon', 26 | 'sphinx_copybutton' 27 | ] 28 | 29 | templates_path = ['_templates'] 30 | exclude_patterns = [] 31 | 32 | autosummary_generate = True # Turn on sphinx.ext.autosummary 33 | 34 | # -- Options for HTML output ------------------------------------------------- 35 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output 36 | 37 | html_theme = 'sphinx_rtd_theme' 38 | html_static_path = [] 39 | -------------------------------------------------------------------------------- /docs/source/format/citation.rst: -------------------------------------------------------------------------------- 1 | .. _citation: 2 | 3 | Citation and License 4 | ==================== -------------------------------------------------------------------------------- /docs/source/generated/gli.dataloading.combine_graph_and_task.rst: -------------------------------------------------------------------------------- 1 |  2 | .. role:: hidden 3 | :class: hidden-section 4 | 5 | .. currentmodule:: gli.dataloading 6 | 7 | 8 | combine_graph_and_task 9 | ====================== 10 | 11 | .. autofunction:: combine_graph_and_task -------------------------------------------------------------------------------- /docs/source/generated/gli.dataloading.get_gli_dataset.rst: -------------------------------------------------------------------------------- 1 |  2 | .. role:: hidden 3 | :class: hidden-section 4 | 5 | .. currentmodule:: gli.dataloading 6 | 7 | 8 | get_gli_dataset 9 | =============== 10 | 11 | .. autofunction:: get_gli_dataset -------------------------------------------------------------------------------- /docs/source/generated/gli.dataloading.get_gli_graph.rst: -------------------------------------------------------------------------------- 1 |  2 | .. role:: hidden 3 | :class: hidden-section 4 | 5 | .. currentmodule:: gli.dataloading 6 | 7 | 8 | get_gli_graph 9 | ============= 10 | 11 | .. autofunction:: get_gli_graph -------------------------------------------------------------------------------- /docs/source/generated/gli.dataloading.get_gli_task.rst: -------------------------------------------------------------------------------- 1 |  2 | .. role:: hidden 3 | :class: hidden-section 4 | 5 | .. currentmodule:: gli.dataloading 6 | 7 | 8 | get_gli_task 9 | ============ 10 | 11 | .. autofunction:: get_gli_task -------------------------------------------------------------------------------- /docs/source/generated/gli.graph.read_gli_graph.rst: -------------------------------------------------------------------------------- 1 |  2 | .. role:: hidden 3 | :class: hidden-section 4 | 5 | .. currentmodule:: gli.graph 6 | 7 | 8 | read_gli_graph 9 | ============== 10 | 11 | .. autofunction:: read_gli_graph -------------------------------------------------------------------------------- /docs/source/generated/gli.io.Attribute.rst: -------------------------------------------------------------------------------- 1 |  2 | .. role:: hidden 3 | :class: hidden-section 4 | 5 | .. currentmodule:: gli.io 6 | 7 | 8 | Attribute 9 | ========= 10 | 11 | .. autoclass:: Attribute 12 | :show-inheritance: 13 | :members: 14 | 15 | .. automethod:: __init__ -------------------------------------------------------------------------------- /docs/source/generated/gli.io.save_heterograph.rst: -------------------------------------------------------------------------------- 1 |  2 | .. role:: hidden 3 | :class: hidden-section 4 | 5 | .. currentmodule:: gli.io 6 | 7 | 8 | save_heterograph 9 | ================ 10 | 11 | .. autofunction:: save_heterograph -------------------------------------------------------------------------------- /docs/source/generated/gli.io.save_homograph.rst: -------------------------------------------------------------------------------- 1 |  2 | .. role:: hidden 3 | :class: hidden-section 4 | 5 | .. currentmodule:: gli.io 6 | 7 | 8 | save_homograph 9 | ============== 10 | 11 | .. autofunction:: save_homograph -------------------------------------------------------------------------------- /docs/source/generated/gli.io.save_task_node_classification.rst: -------------------------------------------------------------------------------- 1 |  2 | .. role:: hidden 3 | :class: hidden-section 4 | 5 | .. currentmodule:: gli.io 6 | 7 | 8 | save_task_node_classification 9 | ============================= 10 | 11 | .. autofunction:: save_task_node_classification -------------------------------------------------------------------------------- /docs/source/generated/gli.io.save_task_node_regression.rst: -------------------------------------------------------------------------------- 1 |  2 | .. role:: hidden 3 | :class: hidden-section 4 | 5 | .. currentmodule:: gli.io 6 | 7 | 8 | save_task_node_regression 9 | ========================= 10 | 11 | .. autofunction:: save_task_node_regression -------------------------------------------------------------------------------- /docs/source/generated/gli.task.GLITask.rst: -------------------------------------------------------------------------------- 1 |  2 | .. role:: hidden 3 | :class: hidden-section 4 | 5 | .. currentmodule:: gli.task 6 | 7 | 8 | GLITask 9 | ======= 10 | 11 | .. autoclass:: GLITask 12 | :show-inheritance: 13 | :members: 14 | 15 | .. automethod:: __init__ -------------------------------------------------------------------------------- /docs/source/generated/gli.task.GraphClassificationTask.rst: -------------------------------------------------------------------------------- 1 |  2 | .. role:: hidden 3 | :class: hidden-section 4 | 5 | .. currentmodule:: gli.task 6 | 7 | 8 | GraphClassificationTask 9 | ======================= 10 | 11 | .. autoclass:: GraphClassificationTask 12 | :show-inheritance: 13 | :members: 14 | 15 | .. automethod:: __init__ -------------------------------------------------------------------------------- /docs/source/generated/gli.task.GraphRegressionTask.rst: -------------------------------------------------------------------------------- 1 |  2 | .. role:: hidden 3 | :class: hidden-section 4 | 5 | .. currentmodule:: gli.task 6 | 7 | 8 | GraphRegressionTask 9 | =================== 10 | 11 | .. autoclass:: GraphRegressionTask 12 | :show-inheritance: 13 | :members: 14 | 15 | .. automethod:: __init__ -------------------------------------------------------------------------------- /docs/source/generated/gli.task.KGEntityPredictionTask.rst: -------------------------------------------------------------------------------- 1 |  2 | .. role:: hidden 3 | :class: hidden-section 4 | 5 | .. currentmodule:: gli.task 6 | 7 | 8 | KGEntityPredictionTask 9 | ====================== 10 | 11 | .. autoclass:: KGEntityPredictionTask 12 | :show-inheritance: 13 | :members: 14 | 15 | .. automethod:: __init__ -------------------------------------------------------------------------------- /docs/source/generated/gli.task.KGRelationPredictionTask.rst: -------------------------------------------------------------------------------- 1 |  2 | .. role:: hidden 3 | :class: hidden-section 4 | 5 | .. currentmodule:: gli.task 6 | 7 | 8 | KGRelationPredictionTask 9 | ======================== 10 | 11 | .. autoclass:: KGRelationPredictionTask 12 | :show-inheritance: 13 | :members: 14 | 15 | .. automethod:: __init__ -------------------------------------------------------------------------------- /docs/source/generated/gli.task.LinkPredictionTask.rst: -------------------------------------------------------------------------------- 1 |  2 | .. role:: hidden 3 | :class: hidden-section 4 | 5 | .. currentmodule:: gli.task 6 | 7 | 8 | LinkPredictionTask 9 | ================== 10 | 11 | .. autoclass:: LinkPredictionTask 12 | :show-inheritance: 13 | :members: 14 | 15 | .. automethod:: __init__ -------------------------------------------------------------------------------- /docs/source/generated/gli.task.NodeClassificationTask.rst: -------------------------------------------------------------------------------- 1 |  2 | .. role:: hidden 3 | :class: hidden-section 4 | 5 | .. currentmodule:: gli.task 6 | 7 | 8 | NodeClassificationTask 9 | ====================== 10 | 11 | .. autoclass:: NodeClassificationTask 12 | :show-inheritance: 13 | :members: 14 | 15 | .. automethod:: __init__ -------------------------------------------------------------------------------- /docs/source/generated/gli.task.NodeRegressionTask.rst: -------------------------------------------------------------------------------- 1 |  2 | .. role:: hidden 3 | :class: hidden-section 4 | 5 | .. currentmodule:: gli.task 6 | 7 | 8 | NodeRegressionTask 9 | ================== 10 | 11 | .. autoclass:: NodeRegressionTask 12 | :show-inheritance: 13 | :members: 14 | 15 | .. automethod:: __init__ -------------------------------------------------------------------------------- /docs/source/generated/gli.task.TimeDependentLinkPredictionTask.rst: -------------------------------------------------------------------------------- 1 |  2 | .. role:: hidden 3 | :class: hidden-section 4 | 5 | .. currentmodule:: gli.task 6 | 7 | 8 | TimeDependentLinkPredictionTask 9 | =============================== 10 | 11 | .. autoclass:: TimeDependentLinkPredictionTask 12 | :show-inheritance: 13 | :members: 14 | 15 | .. automethod:: __init__ -------------------------------------------------------------------------------- /docs/source/generated/gli.task.read_gli_task.rst: -------------------------------------------------------------------------------- 1 |  2 | .. role:: hidden 3 | :class: hidden-section 4 | 5 | .. currentmodule:: gli.task 6 | 7 | 8 | read_gli_task 9 | ============= 10 | 11 | .. autofunction:: read_gli_task -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | .. GLI documentation master file, created by 2 | sphinx-quickstart on Sun Oct 30 13:29:10 2022. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to GLI's Tutorial and Documentation! 7 | ============================================ 8 | 9 | GLI is an easy-to-use graph learning platform with unique features that can better serve the dataset contributors, in comparison to existing graph learning libraries. It aims to ease and incentivize the creation and curation of datasets. 10 | 11 | Highlighted Features 12 | -------------------- 13 | 14 | Standard Data Format 15 | ~~~~~~~~~~~~~~~~~~~~ 16 | 17 | GLI defines a standard data format that has efficient storage and access to graphs. It unifies the storage for graphs of different scales and heterogeneity and is thus flexible to accommodate various graph-structured data. 18 | 19 | Explicit Separation of Data Storage and Task Configuration 20 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 21 | 22 | GLI makes an explicit separation between the data storage and the task configuration for graph learning. i.e., Multiple tasks can be performed on the same dataset, or the same task can be performed on different datasets. The separation between graphs and tasks further allows users to use general datasets bound to every type of task that can be applied to every graph dataset. 23 | 24 | .. toctree:: 25 | :maxdepth: 1 26 | :caption: Get Started 27 | :hidden: 28 | :glob: 29 | 30 | 31 | start/install 32 | start/tutorial 33 | start/contribute 34 | 35 | 36 | .. toctree:: 37 | :maxdepth: 1 38 | :caption: API Reference 39 | :hidden: 40 | :glob: 41 | :titlesonly: 42 | 43 | api/gli 44 | api/task 45 | api/graph 46 | api/dataset 47 | api/utils 48 | api/io 49 | 50 | 51 | .. toctree:: 52 | :maxdepth: 2 53 | :caption: File Format 54 | :hidden: 55 | :glob: 56 | 57 | 58 | format/file 59 | format/citation -------------------------------------------------------------------------------- /docs/source/start/install.rst: -------------------------------------------------------------------------------- 1 | Installation 2 | =============================== 3 | 4 | Currently, we support installation from the source. 5 | 6 | .. code:: bash 7 | 8 | git clone https://github.com/Graph-Learning-Benchmarks/gli.git 9 | cd gli 10 | pip install -e . # basic requirements 11 | pip install -e ".[test]" # test-related requirements 12 | pip install -e ".[doc]" # doc-related requirements 13 | pip install -e ".[full]" # all requirements 14 | 15 | To test the installation, run the following command: 16 | 17 | .. code:: bash 18 | 19 | python example.py --graph cora --task NodeClassification 20 | 21 | The output should be like this: 22 | 23 | :: 24 | 25 | > Graph(s) loading takes 0.0196 seconds and uses 0.9788 MB. 26 | > Task loading takes 0.0016 seconds and uses 0.1218 MB. 27 | > Combining(s) graph and task takes 0.0037 seconds and uses 0.0116 MB. 28 | Dataset("CORA dataset. NodeClassification", num_graphs=1, save_path=/Users/jimmy/.dgl/CORA dataset. NodeClassification)** -------------------------------------------------------------------------------- /gli/__init__.py: -------------------------------------------------------------------------------- 1 | """Root entry.""" 2 | from .config import * 3 | from . import dataloading 4 | from . import dataset 5 | from . import graph 6 | from . import task 7 | from . import utils 8 | from .dataloading import get_gli_graph, get_gli_task, \ 9 | get_gli_dataset, combine_graph_and_task 10 | -------------------------------------------------------------------------------- /gli/config.py: -------------------------------------------------------------------------------- 1 | """Configuration file.""" 2 | from os.path import realpath, dirname, expanduser, join 3 | 4 | ROOT_PATH = dirname(dirname(realpath(__file__))) 5 | WARNING_DENSE_SIZE = 1e9 6 | DATASET_PATH = join(expanduser("~"), ".gli/datasets") 7 | GLOBAL_FILE_URL = "https://jiaqima.github.io/gli/global_urls.json" 8 | SERVER_IP = "http://34.211.28.138" 9 | -------------------------------------------------------------------------------- /gli/io/__init__.py: -------------------------------------------------------------------------------- 1 | """Root entry for gli.io.""" 2 | from .graph import save_graph, save_homograph, save_heterograph, Attribute 3 | from .node_task import save_task_node_classification, save_task_node_regression 4 | from .edge_task import save_task_link_prediction, \ 5 | save_task_time_dependent_link_prediction 6 | from .graph_task import save_task_graph_classification, \ 7 | save_task_graph_regression 8 | from .kg_task import save_task_kg_entity_prediction, \ 9 | save_task_kg_relation_prediction 10 | -------------------------------------------------------------------------------- /img/GLI-Contribution-Workflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Graph-Learning-Benchmarks/gli/8f2065396d59e6b4aaa371e997c3a43f91448429/img/GLI-Contribution-Workflow.png -------------------------------------------------------------------------------- /img/GLI-File-Structure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Graph-Learning-Benchmarks/gli/8f2065396d59e6b4aaa371e997c3a43f91448429/img/GLI-File-Structure.png -------------------------------------------------------------------------------- /img/flowchart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Graph-Learning-Benchmarks/gli/8f2065396d59e6b4aaa371e997c3a43f91448429/img/flowchart.png -------------------------------------------------------------------------------- /img/gli-banner.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Graph-Learning-Benchmarks/gli/8f2065396d59e6b4aaa371e997c3a43f91448429/img/gli-banner.jpg -------------------------------------------------------------------------------- /img/gli-banner.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Graph-Learning-Benchmarks/gli/8f2065396d59e6b4aaa371e997c3a43f91448429/img/gli-banner.png -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "gl-indexer" 7 | version = "0.1.0" 8 | authors = [ 9 | { name = "Jiaqi Ma", email = "jiaqima@umich.edu" }, 10 | { name = "Xingjian Zhang", email = "jimmyzxj@umich.edu" }, 11 | ] 12 | description = "Contributor-friendly and metadata-rich platform for graph learning benchmarks." 13 | readme = "README.md" 14 | requires-python = ">=3.6" 15 | classifiers = [ 16 | "Programming Language :: Python :: 3", 17 | "License :: OSI Approved :: MIT License", 18 | "Operating System :: OS Independent", 19 | ] 20 | dependencies = ["numpy>=1.19", "scipy>=1.5", "torch>=1.10", "dgl>=0.6"] 21 | optional-dependencies = { test = [ 22 | "pytest", 23 | "pydocstyle", 24 | "pycodestyle", 25 | "pylint", 26 | "pyyaml", 27 | "pre-commit", 28 | ], doc = [ 29 | "sphinx", 30 | "sphinx-rtd-theme", 31 | "sphinx_copybutton" 32 | ], tag = [ 33 | "powerlaw", 34 | ] } 35 | 36 | [project.urls] 37 | "Homepage" = "https://github.com/pypa/sampleproject" 38 | 39 | [tool.setuptools.packages.find] 40 | where = ["."] 41 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy>=1.19 2 | scipy>=1.5 3 | torch>=1.10 4 | dgl>=0.6 -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | """Setup script for the package.""" 2 | from setuptools import setup 3 | 4 | setup() 5 | -------------------------------------------------------------------------------- /templates/dataset-folder/LICENSE: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /templates/dataset-folder/README.md: -------------------------------------------------------------------------------- 1 | # 2 | 3 | 4 | 5 | ## Dataset Description 6 | 7 | 8 | 9 | #### Citation 10 | 11 | - Original Source 12 | + [Website]() 13 | + LICENSE: []() 14 | 15 | 16 | 17 | ``` 18 | 19 | ``` 20 | 21 | - Current Version 22 | + [Website]() 23 | + LICENSE: []() 24 | 25 | 26 | 27 | ``` 28 | 29 | ``` 30 | 31 | - Previous Version 32 | + [Website]() 33 | + LICENSE: []() 34 | 35 | 36 | 37 | 38 | ``` 39 | 40 | ``` 41 | 42 | 43 | 44 | ## Available Tasks 45 | 46 | 47 | 48 | ### 49 | 50 | 51 | 52 | - Task type: `` 53 | 54 | 55 | 56 | #### Citation 57 | 58 | ``` 59 | 60 | ``` 61 | 62 | 63 | 64 | ## Preprocessing 65 | 66 | 67 | 68 | ### Requirements 69 | 70 | ``` 71 | 72 | ``` 73 | 74 | 75 | -------------------------------------------------------------------------------- /templates/dataset-folder/preprocess.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "Please use this file to write codes that transform the data files and configuration files from the raw data. Typically this script should include following parts:\n", 8 | "1. Download raw data\n", 9 | "2. Process raw data\n", 10 | "3. Convert the raw data into gli format\n", 11 | "4. save the gli format data into\n", 12 | " 1. configuration files (e.g., metadata.json, ...)\n", 13 | " 2. data files (e.g., cora.npz, ...)\n", 14 | "\n", 15 | "_You can use a Python script (*.py) to preprocess, too._" 16 | ] 17 | }, 18 | { 19 | "cell_type": "markdown", 20 | "metadata": {}, 21 | "source": [] 22 | } 23 | ], 24 | "metadata": { 25 | "kernelspec": { 26 | "display_name": "Python 3 (ipykernel)", 27 | "language": "python", 28 | "name": "python3" 29 | }, 30 | "language_info": { 31 | "codemirror_mode": { 32 | "name": "ipython", 33 | "version": 3 34 | }, 35 | "file_extension": ".py", 36 | "mimetype": "text/x-python", 37 | "name": "python", 38 | "nbconvert_exporter": "python", 39 | "pygments_lexer": "ipython3", 40 | "version": "3.8.13" 41 | } 42 | }, 43 | "nbformat": 4, 44 | "nbformat_minor": 2 45 | } 46 | -------------------------------------------------------------------------------- /tests/config.yaml: -------------------------------------------------------------------------------- 1 | large_dataset_to_skip: ["wiki", "ogbg-code2"] 2 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | """Conftest to enable parameter into test.""" 2 | 3 | 4 | def pytest_addoption(parser): 5 | """Adopt parameter.""" 6 | parser.addoption( 7 | "--dataset", 8 | action="append", 9 | default=[], 10 | help="list of datasets to pass to test_data_loading", 11 | ) 12 | 13 | 14 | def pytest_generate_tests(metafunc): 15 | """Generate tests.""" 16 | if "dataset" in metafunc.fixturenames: 17 | metafunc.parametrize("dataset", metafunc.config.getoption("dataset")) 18 | -------------------------------------------------------------------------------- /tests/preprocess.py: -------------------------------------------------------------------------------- 1 | """Preprocess before tests.""" 2 | import os 3 | import shutil 4 | import subprocess 5 | 6 | DATAFILES_URL = "" 7 | NUM_TESTS_THRESHOLD = 999999999999999 # no need to preprocess yet 8 | 9 | 10 | def _prepare_data_files(): 11 | if os.path.exists("temp/changed_datasets"): 12 | with open("temp/changed_datasets", encoding="utf-8") as f: 13 | dataset_dir_list = f.read().split() 14 | if len(dataset_dir_list) < NUM_TESTS_THRESHOLD: 15 | # do not download the combined data files if # of tests is small 16 | return 17 | else: 18 | return 19 | out = "datafiles.tar" 20 | url = DATAFILES_URL 21 | subprocess.run(["wget", "-q", "-O", out, url], check=True) 22 | shutil.unpack_archive(out) 23 | os.remove(out) 24 | 25 | for dataset in os.listdir("datafiles/"): 26 | data_dir = os.path.join("datafiles/", dataset) 27 | dataset_dir = os.path.join("datasets/", dataset) 28 | if not os.path.isdir(data_dir): 29 | continue 30 | for data_file_name in os.listdir(data_dir): 31 | file_type = os.path.splitext(data_file_name)[-1] 32 | if file_type == ".npz": 33 | data_file_path = os.path.join(data_dir, data_file_name) 34 | dataset_file_path = os.path.join(dataset_dir, data_file_name) 35 | shutil.move(data_file_path, dataset_file_path) 36 | shutil.rmtree("datafiles/") 37 | 38 | 39 | if __name__ == "__main__": 40 | _prepare_data_files() 41 | -------------------------------------------------------------------------------- /tests/test_data_loading.py: -------------------------------------------------------------------------------- 1 | """Automated test for data files in examples/.""" 2 | import os 3 | import fnmatch 4 | import json 5 | import pytest 6 | import gli 7 | from gli.task import SUPPORTED_TASK_TYPES 8 | from utils import find_datasets, load_config_file 9 | 10 | 11 | @pytest.mark.parametrize("dataset_name", find_datasets()) 12 | def test_data_loading(dataset_name): 13 | """Test data loading for a given dataset. 14 | 15 | Test if get_gli_graph, get_gli_task, and get_gli_dataset 16 | can be applied successfully. 17 | """ 18 | # temporary skipping all large datasets 19 | dataset = dataset_name 20 | test_cfg = load_config_file("tests/config.yaml") 21 | if dataset in test_cfg["large_dataset_to_skip"]: 22 | return 23 | 24 | directory = os.getcwd() + "/datasets/" + dataset 25 | task_list = [] 26 | for file in os.listdir(directory): 27 | if fnmatch.fnmatch(file, "task*.json"): 28 | with open(directory + "/" + file, encoding="utf-8") as f: 29 | task_dict = json.load(f) 30 | if task_dict["type"] not in SUPPORTED_TASK_TYPES: 31 | f.close() 32 | return 33 | task_list.append(task_dict["type"]) 34 | try: 35 | _ = gli.dataloading.get_gli_graph(dataset) 36 | except (AssertionError, 37 | AttributeError, 38 | ModuleNotFoundError, 39 | IndexError, 40 | ValueError) as e: 41 | print(e, dataset, "graph loading failed") 42 | assert False 43 | 44 | for task in task_list: 45 | try: 46 | _ = gli.dataloading.get_gli_task(dataset, task) 47 | except (AssertionError, 48 | AttributeError, 49 | ModuleNotFoundError, 50 | IndexError, 51 | ValueError) as e: 52 | print(e, dataset, task, "task loading failed") 53 | assert False 54 | 55 | try: 56 | gli.dataloading.get_gli_dataset(dataset, task) 57 | except (AssertionError, 58 | AttributeError, 59 | ModuleNotFoundError, 60 | IndexError, 61 | ValueError) as e: 62 | print(e, dataset, "combine graph and task loading failed") 63 | assert False 64 | -------------------------------------------------------------------------------- /tests/test_task.py: -------------------------------------------------------------------------------- 1 | """Automated test for metadata.json in datasets/.""" 2 | import pytest 3 | import os 4 | import json 5 | from utils import find_datasets, check_if_task_json, \ 6 | SUPPORTED_TASK_REQUIRED_KEYS_HASH, find_datasets_abs_path 7 | 8 | 9 | def check_essential_keys_task_json(dic): 10 | """Check if task json has all essential keys.""" 11 | missing_keys = [] 12 | if "type" not in dic: 13 | missing_keys.append("type") 14 | else: 15 | task_type = dic.get("type") 16 | for req_keywords in SUPPORTED_TASK_REQUIRED_KEYS_HASH[task_type]: 17 | if req_keywords == "feature": 18 | if req_keywords not in dic: 19 | missing_keys.append(req_keywords) 20 | else: 21 | if dic.get(req_keywords, None) is None: 22 | missing_keys.append(req_keywords) 23 | return missing_keys 24 | 25 | 26 | @pytest.mark.parametrize("dataset_name", find_datasets()) 27 | def test_task_json_content(dataset_name): 28 | """Check if task json meets requirements.""" 29 | file_list = [] 30 | directory = find_datasets_abs_path(dataset_name) 31 | 32 | for root, _, file in os.walk(directory): 33 | if isinstance(file, str): 34 | file.append(os.path.join(root, file)) 35 | file_list.append(file) 36 | else: 37 | for f in file: 38 | file_list.append(os.path.join(root, f)) 39 | for file in file_list: 40 | if check_if_task_json(file): 41 | with open(file, encoding="utf8") as json_file: 42 | data = json.load(json_file) 43 | missing_keys = check_essential_keys_task_json(data) 44 | if len(missing_keys) != 0: 45 | print(file + " misses following keys") 46 | print(missing_keys) 47 | assert len(missing_keys) == 0 48 | -------------------------------------------------------------------------------- /tests/training_utils.py: -------------------------------------------------------------------------------- 1 | """Functions used in test_training.""" 2 | import os 3 | import fnmatch 4 | import json 5 | 6 | 7 | def get_cfg(dataset): 8 | """Return fixed dict to test_training.""" 9 | args = { 10 | "model": "GCN", 11 | "dataset": dataset, 12 | "task": "NodeClassification", 13 | "gpu": -1 14 | } 15 | 16 | model_cfg = { 17 | "num_layers": 2, 18 | "num_hidden": 8, 19 | "dropout": .6 20 | } 21 | 22 | train_cfg = { 23 | "loss_fun": "cross_entropy", 24 | "dataset": { 25 | "self_loop": True, 26 | "to_dense": True 27 | }, 28 | "optim": { 29 | "lr": .005, 30 | "weight_decay": 0.0005 31 | }, 32 | "num_trials": 1, 33 | "max_epoch": 3 34 | } 35 | return args, model_cfg, train_cfg 36 | 37 | 38 | def check_multiple_split_v2(dataset): 39 | """Check whether the dataset has multiple splits.""" 40 | print() 41 | dataset_directory = os.getcwd() \ 42 | + "/datasets/" + dataset 43 | for file in os.listdir(dataset_directory): 44 | if fnmatch.fnmatch(file, "task*.json"): 45 | with open(dataset_directory + "/" + file, encoding="utf-8") as f: 46 | task_dict = json.load(f) 47 | if "num_splits" in task_dict and task_dict["num_splits"] > 1: 48 | return 1 49 | else: 50 | return 0 51 | 52 | 53 | def check_dataset_task(dataset, target_task): 54 | """Check whether the dataset support target_task.""" 55 | directory = os.getcwd() + "/datasets/" + dataset 56 | for file in os.listdir(directory): 57 | if fnmatch.fnmatch(file, "task*.json"): 58 | with open(directory + "/" + file, encoding="utf-8") as f: 59 | task_dict = json.load(f) 60 | if task_dict["type"] == target_task: 61 | return True 62 | return False 63 | 64 | 65 | def get_label_number(labels): 66 | """Return the label number of dataset.""" 67 | if len(labels.shape) > 1: 68 | return labels.shape[1] 69 | else: 70 | return 1 71 | --------------------------------------------------------------------------------