├── .github
    ├── ISSUE_TEMPLATE
    │   ├── bug_report.md
    │   └── feature_request.md
    ├── PULL_REQUEST_TEMPLATE.md
    └── workflows
    │   ├── comment-pr.yml
    │   ├── doc.yml
    │   ├── pycodestyle.yml
    │   ├── pydocstyle.yml
    │   ├── pylint.yml
    │   ├── pytest.yml
    │   └── receive-pr.yml
├── .gitignore
├── .pre-commit-config.yaml
├── .pylintrc
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── Makefile
├── README.md
├── benchmarks
    ├── GraphClassification
    │   ├── README.md
    │   ├── config_gen.py
    │   ├── configs
    │   │   ├── ChebNet.yaml
    │   │   ├── DGN.yaml
    │   │   ├── GIN.yaml
    │   │   ├── model_default.yaml
    │   │   └── train_default.yaml
    │   ├── grid
    │   │   └── grid_example.yaml
    │   ├── models
    │   │   ├── cheb_net.py
    │   │   ├── dgn.py
    │   │   ├── gcn.py
    │   │   ├── gin.py
    │   │   └── mlp_readout_layer.py
    │   ├── train.py
    │   └── utils.py
    └── NodeClassification
    │   ├── README.md
    │   ├── config_gen.py
    │   ├── configs
    │       ├── APPNP.yaml
    │       ├── GAT.yaml
    │       ├── GATv2.yaml
    │       ├── GCNII.yaml
    │       ├── GraphSAGE.yaml
    │       ├── LINKX.yaml
    │       ├── LINKX_train.yaml
    │       ├── MixHop.yaml
    │       ├── MoNet.yaml
    │       ├── SGC.yaml
    │       ├── TAGCN.yaml
    │       ├── catboost.yaml
    │       ├── lightgbm.yaml
    │       ├── model_default.yaml
    │       └── train_default.yaml
    │   ├── grid
    │       └── grid_example.yaml
    │   ├── models
    │       ├── appnp.py
    │       ├── gat.py
    │       ├── gatv2.py
    │       ├── gbdt.py
    │       ├── gcn.py
    │       ├── gcn2.py
    │       ├── gcn_minibatch.py
    │       ├── graph_sage.py
    │       ├── graph_sage_minibatch.py
    │       ├── linkx.py
    │       ├── mixhop.py
    │       ├── mlp.py
    │       ├── monet.py
    │       ├── sgc.py
    │       └── tagcn.py
    │   ├── train.py
    │   ├── train_gbdt.py
    │   ├── train_minibatch.py
    │   └── utils.py
├── datasets
    ├── FB13
    │   ├── FB13.ipynb
    │   ├── LICENSE
    │   ├── README.md
    │   ├── metadata.json
    │   ├── task_kg_entity_prediction_1.json
    │   └── task_kg_relation_prediction_1.json
    ├── FB15K
    │   ├── FB15K.ipynb
    │   ├── LICENSE
    │   ├── README.md
    │   ├── metadata.json
    │   ├── task_kg_entity_prediction_1.json
    │   └── task_kg_relation_prediction_1.json
    ├── FB15K237
    │   ├── FB15K237.ipynb
    │   ├── LICENSE
    │   ├── README.md
    │   ├── metadata.json
    │   ├── task_kg_entity_prediction_1.json
    │   └── task_kg_relation_prediction_1.json
    ├── KGMicrobe
    │   ├── KGMicrobe.ipynb
    │   ├── LICENSE
    │   ├── README.md
    │   └── metadata.json
    ├── NELL-995
    │   ├── LICENSE
    │   ├── NELL-995.ipynb
    │   ├── README.md
    │   ├── metadata.json
    │   ├── task_kg_entity_prediction_1.json
    │   └── task_kg_relation_prediction_1.json
    ├── WN11
    │   ├── LICENSE
    │   ├── README.md
    │   ├── WN11.ipynb
    │   ├── metadata.json
    │   ├── task_kg_entity_prediction_1.json
    │   └── task_kg_relation_prediction_1.json
    ├── WN18
    │   ├── LICENSE
    │   ├── README.md
    │   ├── WN18.ipynb
    │   ├── metadata.json
    │   ├── task_kg_entity_prediction_1.json
    │   └── task_kg_relation_prediction_1.json
    ├── WN18RR
    │   ├── LICENSE
    │   ├── README.md
    │   ├── WN18RR.ipynb
    │   ├── metadata.json
    │   ├── task_kg_entity_prediction_1.json
    │   └── task_kg_relation_prediction_1.json
    ├── YAGO3-10
    │   ├── LICENSE
    │   ├── README.md
    │   ├── YAGO3-10.ipynb
    │   ├── metadata.json
    │   ├── task_kg_entity_prediction_1.json
    │   └── task_kg_relation_prediction_1.json
    ├── actor
    │   ├── LICENSE
    │   ├── README.md
    │   ├── actor.ipynb
    │   ├── metadata.json
    │   └── task_node_classification_1.json
    ├── arxiv-year
    │   ├── LICENSE
    │   ├── README.md
    │   ├── arxiv-year.ipynb
    │   ├── metadata.json
    │   └── task_node_classification_1.json
    ├── chameleon
    │   ├── LICENSE
    │   ├── README.md
    │   ├── chameleon.ipynb
    │   ├── metadata.json
    │   └── task_node_classification_1.json
    ├── cifar
    │   ├── LICENSE
    │   ├── README.md
    │   ├── cifar.ipynb
    │   ├── metadata.json
    │   └── task_graph_classification_1.json
    ├── citeseer
    │   ├── LICENSE
    │   ├── README.md
    │   ├── citeseer.ipynb
    │   ├── metadata.json
    │   └── task_node_classification_1.json
    ├── cora
    │   ├── LICENSE
    │   ├── README.md
    │   ├── cora.ipynb
    │   ├── metadata.json
    │   └── task_node_classification_1.json
    ├── cornell
    │   ├── LICENSE
    │   ├── README.md
    │   ├── cornell.ipynb
    │   ├── metadata.json
    │   └── task_node_classification_1.json
    ├── genius
    │   ├── LICENSE
    │   ├── README.md
    │   ├── genius.ipynb
    │   ├── metadata.json
    │   └── task_node_classification_1.json
    ├── mnist
    │   ├── LICENSE
    │   ├── README.md
    │   ├── metadata.json
    │   ├── mnist.ipynb
    │   └── task_graph_classification_1.json
    ├── ogbg-molbace
    │   ├── LICENSE
    │   ├── README.md
    │   ├── metadata.json
    │   ├── ogbg-molbace.ipynb
    │   └── task_graph_classification_1.json
    ├── ogbg-molclintox
    │   ├── LICENSE
    │   ├── README.md
    │   ├── metadata.json
    │   ├── ogbg-molclintox.ipynb
    │   └── task_graph_classification_1.json
    ├── ogbg-molfreesolv
    │   ├── LICENSE
    │   ├── README.md
    │   ├── metadata.json
    │   ├── ogbg-molfreesolv.ipynb
    │   └── task_graph_regression_1.json
    ├── ogbg-molhiv
    │   ├── LICENSE
    │   ├── README.md
    │   ├── metadata.json
    │   ├── ogbg-molhiv.ipynb
    │   └── task_graph_classification_1.json
    ├── ogbg-molmuv
    │   ├── LICENSE
    │   ├── README.md
    │   ├── metadata.json
    │   ├── ogbg-molmuv.ipynb
    │   └── task_graph_classification_1.json
    ├── ogbg-molpcba
    │   ├── LICENSE
    │   ├── README.md
    │   ├── metadata.json
    │   ├── ogbg-molpcba.ipynb
    │   └── task_graph_classification_1.json
    ├── ogbg-molsider
    │   ├── LICENSE
    │   ├── README.md
    │   ├── metadata.json
    │   ├── ogbg-molsider.ipynb
    │   └── task_graph_classification_1.json
    ├── ogbl-collab
    │   ├── LICENSE
    │   ├── README.md
    │   ├── metadata.json
    │   ├── ogbl-collab.ipynb
    │   ├── task_time_dependent_link_prediction_1.json
    │   └── task_time_dependent_link_prediction_2.json
    ├── ogbn-arxiv
    │   ├── LICENSE
    │   ├── README.md
    │   ├── metadata.json
    │   ├── ogbn-arxiv.ipynb
    │   └── task_node_classification_1.json
    ├── ogbn-mag
    │   ├── LICENSE
    │   ├── README.md
    │   ├── metadata.json
    │   ├── ogbn-mag.ipynb
    │   └── task_node_classification_1.json
    ├── ogbn-products
    │   ├── LICENSE
    │   ├── README.md
    │   ├── metadata.json
    │   ├── ogbn-products.ipynb
    │   └── task_node_classification_1.json
    ├── ogbn-proteins
    │   ├── LICENSE
    │   ├── README.md
    │   ├── metadata.json
    │   ├── ogbn-proteins.ipynb
    │   └── task_node_classification_1.json
    ├── penn94
    │   ├── LICENSE
    │   ├── README.md
    │   ├── metadata.json
    │   ├── penn94.ipynb
    │   └── task_node_classification_1.json
    ├── pokec
    │   ├── LICENSE
    │   ├── README.md
    │   ├── metadata.json
    │   ├── pokec.ipynb
    │   └── task_node_classification_1.json
    ├── pubmed
    │   ├── LICENSE
    │   ├── README.md
    │   ├── metadata.json
    │   ├── pubmed.ipynb
    │   └── task_node_classification_1.json
    ├── reddit
    │   ├── LICENSE
    │   ├── README.md
    │   ├── metadata.json
    │   ├── reddit.ipynb
    │   └── task_node_classification_1.json
    ├── snap-patents
    │   ├── LICENSE
    │   ├── README.md
    │   ├── metadata.json
    │   ├── snap-patents.ipynb
    │   └── task_node_classification_1.json
    ├── squirrel
    │   ├── LICENSE
    │   ├── README.md
    │   ├── metadata.json
    │   ├── squirrel.ipynb
    │   └── task_node_classification_1.json
    ├── texas
    │   ├── LICENSE
    │   ├── README.md
    │   ├── metadata.json
    │   ├── task_node_classification_1.json
    │   └── texas.ipynb
    ├── twitch-gamers
    │   ├── LICENSE
    │   ├── README.md
    │   ├── metadata.json
    │   ├── task_node_classification_1.json
    │   └── twitch-gamers.ipynb
    ├── wiki
    │   ├── LICENSE
    │   ├── README.md
    │   ├── metadata.json
    │   ├── task_node_classification_1.json
    │   └── wiki.ipynb
    └── wisconsin
    │   ├── LICENSE
    │   ├── README.md
    │   ├── metadata.json
    │   ├── task_node_classification_1.json
    │   └── wisconsin.ipynb
├── docs
    ├── .readthedocs.yaml
    ├── Makefile
    ├── make.bat
    ├── requirements.txt
    └── source
    │   ├── _templates
    │       ├── classtemplate.rst
    │       └── functemplate.rst
    │   ├── api
    │       ├── dataset.rst
    │       ├── gli.rst
    │       ├── graph.rst
    │       ├── io.rst
    │       ├── task.rst
    │       └── utils.rst
    │   ├── conf.py
    │   ├── format
    │       ├── citation.rst
    │       └── file.rst
    │   ├── generated
    │       ├── gli.dataloading.combine_graph_and_task.rst
    │       ├── gli.dataloading.get_gli_dataset.rst
    │       ├── gli.dataloading.get_gli_graph.rst
    │       ├── gli.dataloading.get_gli_task.rst
    │       ├── gli.graph.read_gli_graph.rst
    │       ├── gli.io.Attribute.rst
    │       ├── gli.io.save_heterograph.rst
    │       ├── gli.io.save_homograph.rst
    │       ├── gli.io.save_task_node_classification.rst
    │       ├── gli.io.save_task_node_regression.rst
    │       ├── gli.task.GLITask.rst
    │       ├── gli.task.GraphClassificationTask.rst
    │       ├── gli.task.GraphRegressionTask.rst
    │       ├── gli.task.KGEntityPredictionTask.rst
    │       ├── gli.task.KGRelationPredictionTask.rst
    │       ├── gli.task.LinkPredictionTask.rst
    │       ├── gli.task.NodeClassificationTask.rst
    │       ├── gli.task.NodeRegressionTask.rst
    │       ├── gli.task.TimeDependentLinkPredictionTask.rst
    │       └── gli.task.read_gli_task.rst
    │   ├── index.rst
    │   └── start
    │       ├── contribute.rst
    │       ├── install.rst
    │       └── tutorial.rst
├── example.py
├── gli
    ├── __init__.py
    ├── config.py
    ├── dataloading.py
    ├── dataset.py
    ├── graph.py
    ├── io
    │   ├── __init__.py
    │   ├── edge_task.py
    │   ├── graph.py
    │   ├── graph_task.py
    │   ├── kg_task.py
    │   ├── node_task.py
    │   └── utils.py
    ├── tags.py
    ├── task.py
    └── utils.py
├── img
    ├── GLI-Contribution-Workflow.png
    ├── GLI-File-Structure.png
    ├── flowchart.png
    ├── gli-banner.jpg
    └── gli-banner.png
├── pyproject.toml
├── requirements.txt
├── setup.py
├── templates
    └── dataset-folder
    │   ├── LICENSE
    │   ├── README.md
    │   ├── metadata.hjson
    │   └── preprocess.ipynb
└── tests
    ├── config.yaml
    ├── conftest.py
    ├── kg_utils.py
    ├── preprocess.py
    ├── test_data_loading.py
    ├── test_files.py
    ├── test_io.py
    ├── test_kg_training.py
    ├── test_metadata.py
    ├── test_node_classification_training.py
    ├── test_node_regression_training.py
    ├── test_task.py
    ├── training_utils.py
    └── utils.py


/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | title: "[BUG]"
 5 | labels: bug
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 | 
13 | **To Reproduce**
14 | ```
15 | Steps to reproduce the behavior
16 | ```
17 | 
18 | **Expected behavior**
19 | A clear and concise description of what you expected to happen.
20 | 
21 | **Screenshots**
22 | If applicable, add screenshots to help explain your problem.
23 | 
24 | **Additional context**
25 | Add any other context about the problem here.
26 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for this project
 4 | title: "[FEATURE REQUEST]"
 5 | labels: enhancement
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 | 
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 | 
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 | 
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 | 


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | <!--- Provide a general summary of your changes in the Title above -->
 2 | 
 3 | ## Description
 4 | <!--- Describe your changes in detail -->
 5 | 
 6 | ## Related Issue
 7 | <!--- This project only accepts pull requests related to open issues -->
 8 | <!--- If suggesting a new feature or change, please discuss it in an issue first -->
 9 | <!--- If fixing a bug, there should be an issue describing it with steps to reproduce -->
10 | <!--- Please link to the issue here: -->
11 | 
12 | ## Motivation and Context
13 | <!--- Why is this change required? What problem does it solve? -->
14 | <!--- If it fixes an open issue, please link to the issue here. -->
15 | 
16 | ## How Has This Been Tested?
17 | <!--- Please describe in detail how you tested your changes. -->
18 | <!--- Include details of your testing environment, and the tests you ran to -->
19 | <!--- see how your change affects other areas of the code, etc. -->
20 | 
21 | ## Screenshots (if appropriate):
22 | 


--------------------------------------------------------------------------------
/.github/workflows/doc.yml:
--------------------------------------------------------------------------------
 1 | name: Docs
 2 | on: [push, pull_request, workflow_dispatch]
 3 | jobs:
 4 |   docs:
 5 |     runs-on: ubuntu-latest
 6 |     strategy:
 7 |       matrix:
 8 |         python-version: ["3.8"]
 9 |     steps:
10 |       - uses: actions/checkout@v2
11 |       - uses: actions/setup-python@v2
12 |         with:   
13 |           python-version: ${{ matrix.python-version }}
14 |       - name: Install dependencies
15 |         run: |
16 |           # pip install sphinx sphinx_rtd_theme
17 |           pip install -e ".[doc]"
18 |       - name: Sphinx build
19 |         run: |
20 |           sphinx-build docs/source _build
21 |       - name: Deploy
22 |         uses: peaceiris/actions-gh-pages@v3
23 |         if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }}
24 |         with:
25 |           publish_branch: gh-pages
26 |           github_token: ${{ secrets.GITHUB_TOKEN }}
27 |           publish_dir: _build/
28 |           force_orphan: true


--------------------------------------------------------------------------------
/.github/workflows/pycodestyle.yml:
--------------------------------------------------------------------------------
 1 | name: Pycodestyle
 2 | 
 3 | on: [pull_request, workflow_dispatch]
 4 | 
 5 | jobs:
 6 |   build:
 7 |     runs-on: ubuntu-latest
 8 |     strategy:
 9 |       matrix:
10 |         python-version: ["3.8"]
11 |     steps:
12 |     - uses: actions/checkout@v2
13 |     - name: Set up Python ${{ matrix.python-version }}
14 |       uses: actions/setup-python@v2
15 |       with:
16 |         python-version: ${{ matrix.python-version }}
17 |     - name: Install dependencies
18 |       run: |
19 |         python -m pip install --upgrade pip
20 |         pip install pycodestyle
21 |     - name: Analysing the code with pylint
22 |       run: |
23 |         pycodestyle $(git ls-files '*.py')
24 | 


--------------------------------------------------------------------------------
/.github/workflows/pydocstyle.yml:
--------------------------------------------------------------------------------
 1 | name: Pydocstyle
 2 | 
 3 | on: [pull_request, workflow_dispatch]
 4 | 
 5 | jobs:
 6 |   build:
 7 |     runs-on: ubuntu-latest
 8 |     strategy:
 9 |       matrix:
10 |         python-version: ["3.8"]
11 |     steps:
12 |     - uses: actions/checkout@v2
13 |     - name: Set up Python ${{ matrix.python-version }}
14 |       uses: actions/setup-python@v2
15 |       with:
16 |         python-version: ${{ matrix.python-version }}
17 |     - name: Install dependencies
18 |       run: |
19 |         python -m pip install --upgrade pip
20 |         pip install pydocstyle
21 |     - name: Analysing the code with pydocstyle
22 |       run: |
23 |         pydocstyle $(git ls-files '*.py')
24 | 


--------------------------------------------------------------------------------
/.github/workflows/pylint.yml:
--------------------------------------------------------------------------------
 1 | name: Pylint
 2 | 
 3 | on: [pull_request, workflow_dispatch]
 4 | 
 5 | jobs:
 6 |   build:
 7 |     runs-on: ubuntu-latest
 8 |     strategy:
 9 |       matrix:
10 |         python-version: ["3.8"]
11 |     steps:
12 |     - uses: actions/checkout@v2
13 |     - name: Set up Python ${{ matrix.python-version }}
14 |       uses: actions/setup-python@v2
15 |       with:
16 |         python-version: ${{ matrix.python-version }}
17 |     - name: Install dependencies
18 |       run: |
19 |         python -m pip install --upgrade pip
20 |         pip install numpy torch scipy
21 |         pip install --force-reinstall pylint==2.17.5
22 |     - name: Analysing the code with pylint
23 |       run: |
24 |         pylint $(git ls-files '*.py') --rcfile .pylintrc --recursive y
25 | 


--------------------------------------------------------------------------------
/.github/workflows/pytest.yml:
--------------------------------------------------------------------------------
 1 | name: Pytest
 2 | 
 3 | on: [pull_request, workflow_dispatch]
 4 | 
 5 | jobs:
 6 |   build:
 7 |     runs-on: self-hosted
 8 |     strategy:
 9 |       matrix:
10 |         python-version: ["3.7"]
11 |     steps:
12 |     - uses: actions/checkout@v3
13 |       with:
14 |         fetch-depth: 0
15 |     - name: Set up Python ${{ matrix.python-version }}
16 |       uses: actions/setup-python@v3
17 |       with:
18 |         python-version: ${{ matrix.python-version }}
19 |     - name: Install dependencies
20 |       run: |
21 |         python -m pip install --upgrade pip
22 |         pip install pytest
23 |         pip install -e .
24 |         pip install pyyaml
25 |     - name: Get changed files using defaults
26 |       id: changed-files
27 |       uses: tj-actions/changed-files@v23.1
28 |     - name: List all changed files
29 |       run: |
30 |           for file in ${{ steps.changed-files.outputs.all_changed_and_modified_files }}; do
31 |             echo "$file was changed"
32 |           done
33 |     - name: Test with pytest, if triggered by PR
34 |       run: |
35 |         if ${{ github.event_name == 'pull_request' }}
36 |         then
37 |           dataset_list=()
38 |           for path in ${{ steps.changed-files.outputs.all_changed_and_modified_files }}; do
39 |               dir="$(dirname "${path}")" ;
40 |               if [ ! -d "$dir" ]; then
41 |                 echo "$dir doesn't exist, continue"
42 |                 continue;
43 |               fi
44 |               dataset=$(echo $path | grep "datasets" | sed -r 's/datasets\/([_a-zA-Z0-9-]+)\/.*/\1/')
45 |               if [ -z "$dataset" ]; then continue; fi
46 |               if [[ ! " ${dataset_list[*]} " =~ " ${dataset} " ]]; then
47 |                 echo "add dataset: $dataset"
48 |                 dataset_list+=($dataset)
49 |               fi
50 |           done
51 |           echo "datasets list is ${dataset_list[*]}"
52 |           mkdir temp
53 |           echo "${dataset_list[*]}" > temp/changed_datasets
54 |           python tests/preprocess.py
55 |           pytest tests/
56 |         fi
57 |     - name: Test all datasets with pytest, if triggered by workflow_dispatch
58 |       run: |
59 |         if ${{ github.event_name == 'workflow_dispatch' }}
60 |         then
61 |           python tests/preprocess.py
62 |           pytest tests/
63 |         fi
64 | 


--------------------------------------------------------------------------------
/.github/workflows/receive-pr.yml:
--------------------------------------------------------------------------------
 1 | name: Receive PR
 2 | 
 3 | # read-only repo token
 4 | # no access to secrets
 5 | on:
 6 |   pull_request:
 7 | 
 8 | jobs:
 9 |   build:
10 |     runs-on: ubuntu-latest
11 | 
12 |     steps:
13 |       - uses: actions/checkout@v3
14 |         with:
15 |           fetch-depth: 0
16 |       - name: Get changed files using defaults
17 |         id: changed-files
18 |         uses: tj-actions/changed-files@v23.1
19 |       - name: List all changed files
20 |         run: |
21 |           for file in ${{ steps.changed-files.outputs.all_changed_and_modified_files }}; do
22 |             echo "$file was changed"
23 |           done
24 |       - name: Check large datasets
25 |         id: main
26 |         run: |
27 |           dataset_list=()
28 |           for path in ${{ steps.changed-files.outputs.all_changed_and_modified_files }}; do
29 |               dir="$(dirname "${path}")" ;
30 |               if [ ! -d "$dir" ]; then
31 |                 echo "$dir doesn't exist, continue"
32 |                 continue;
33 |               fi
34 |               dataset=$(echo $path | grep "datasets" | sed -r 's/datasets\/([_a-zA-Z0-9-]+)\/.*/\1/')
35 |               if [ -z "$dataset" ]; then continue; fi
36 |               if [[ ! " ${dataset_list[*]} " =~ " ${dataset} " ]]; then
37 |                 echo "add dataset: $dataset"
38 |                 dataset_list+=($dataset)
39 |               fi
40 |           done
41 |           dataset_to_comment=()
42 |           large_dataset_list=$(cat tests/config.yaml | sed -r 's/large_dataset_to_skip: \[(.*)\]/\1/')
43 |           for dataset in "${dataset_list[@]}"; do
44 |             if [[  "$large_dataset_list" == *"$dataset"* ]]; then
45 |               echo "add ${dataset} to dataset_to_comment"
46 |               dataset_to_comment+=($dataset)
47 |             fi
48 |           done
49 |           if [ ${#dataset_to_comment[@]} -ne 0 ]; then
50 |             echo "dataset to be commented are: ${dataset_to_comment[*]}"
51 |           fi
52 |           echo "::set-output name=DATASETS::${dataset_to_comment[*]}"
53 | 
54 |       - name: Save PR number
55 |         run: |
56 |           mkdir -p ./pr
57 |           echo ${{ github.event.number }} > ./pr/NR
58 |           echo ${{ steps.main.outputs.DATASETS }} >> ./pr/NR
59 |       - uses: actions/upload-artifact@v3
60 |         with:
61 |           name: pr
62 |           path: pr/
63 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | # See https://pre-commit.com for more information
 2 | # See https://pre-commit.com/hooks.html for more hooks
 3 | repos:
 4 | -   repo: https://github.com/pre-commit/pre-commit-hooks
 5 |     rev: v3.2.0
 6 |     hooks:
 7 |     -   id: trailing-whitespace
 8 |     -   id: end-of-file-fixer
 9 |     -   id: check-yaml
10 |     -   id: check-added-large-files
11 | - repo: local
12 |   hooks:
13 |     - id: pylint
14 |       name: pylint
15 |       entry: pylint
16 |       language: system
17 |       types: [python]
18 |       args:
19 |         [
20 |           "-rn", # Only display messages
21 |           "-sn", # Don't display the score
22 |           "--rcfile=.pylintrc", # Link to your config file
23 |         ]
24 | - repo: local
25 |   hooks:
26 |     - id: pycodestyle
27 |       name: pycodestyle
28 |       entry: pycodestyle
29 |       language: system
30 |       types: [python]
31 | - repo: local
32 |   hooks:
33 |     - id: pydocstyle
34 |       name: pydocstyle
35 |       entry: pydocstyle
36 |       language: system
37 |       types: [python]
38 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Graph-Learning-Benchmarks
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | # Signifies our desired python version
 2 | PYTHON = python
 3 | PYTHON_FILES := gli/ benchmarks/ tests/ example.py
 4 | 
 5 | # .PHONY defines parts of the makefile that are not dependant on any specific file
 6 | # This is most often used to store functions
 7 | .PHONY = help setup test run clean
 8 | 
 9 | # The @ makes sure that the command itself isn't echoed in the terminal
10 | help:
11 | 	@echo "Usage: make <command>"
12 | 	@echo " Available commands:"
13 | 	@echo "  setup:    install the full project."
14 | 	@echo "  clean:    remove all data files (npz)."
15 | 	@echo "  test:     run all tests (pystyle, pylint, pytest). Stop on failure."
16 | 	@echo "  pystyle:  run pycodestyle and pydocstyle tests."
17 | 	@echo "  pylint:   run pylint."
18 | 	@echo "  pytest:   run pytests on all datasets by default."
19 | 	@echo "            run pytest on a single dataset by arg DATASET."
20 | 	@echo "            e.g., make pytest DATASET=cora"
21 | 	@echo "  donwload: download and preprocess all data files (npz)."
22 | 	@echo "  logs:     create logs directory."
23 | 	@echo "  build:    build the package."
24 | 
25 | setup:
26 | 	${PYTHON} -m pip install -e ".[test,full]"
27 | 
28 | clean:
29 | 	find datasets -name '*.npz' -delete
30 | 
31 | test: pystyle pylint pytest
32 | 
33 | # The leading '-' will execute following command to ignore errors.
34 | pystyle: logs
35 | 	-pycodestyle ${PYTHON_FILES} | tee logs/pycodestyle.log
36 | 	-pydocstyle ${PYTHON_FILES} | tee logs/pydocstyle.log
37 | 
38 | pylint: logs
39 | 	-pylint ${PYTHON_FILES} --rcfile .pylintrc --recursive y | tee logs/pylint.log
40 | 
41 | pytest: logs
42 | ifndef DATASET
43 | 	-pytest -v tests/ | tee logs/pytest.log
44 | else
45 | 	mkdir -p temp
46 | 	@echo $$DATASET > temp/changed_datasets
47 | 	-pytest -v tests/ | tee logs/pytest.log
48 | 	rm temp/changed_datasets
49 | 	rmdir temp
50 | endif
51 | 
52 | download:
53 | 	${PYTHON} tests/preprocess.py
54 | 
55 | logs:
56 | 	-mkdir logs
57 | 
58 | build:
59 | 	${PYTHON} -m build


--------------------------------------------------------------------------------
/benchmarks/GraphClassification/README.md:
--------------------------------------------------------------------------------
 1 | # GLI Benchmarking on `GraphClassification` Task
 2 | 
 3 | The code in this folder can be used to benchmark some popular models on `GraphClassification` task.
 4 | 
 5 | ## How to run
 6 | 
 7 | Example commands to run the code:
 8 | 
 9 | ```bash
10 | python train.py --dataset <dataset> --model GCN 
11 | python train.py --dataset <dataset> --model DGN --model-cfg configs/DGN.yaml
12 | python train.py --dataset <dataset> --model ChebNet --model-cfg configs/ChebNet.yaml
13 | python train.py --dataset <dataset> --model GIN --model-cfg configs/GIN.yaml
14 | ```
15 | 
16 | One can provide a `yaml` file to arguments `--model-cfg` or `--train-cfg` respectively for model configuration or training configuration. If not provided, default configurations (see [model_default.yaml](https://github.com/Graph-Learning-Benchmarks/gli/blob/main/benchmarks/GraphClassification/configs/model_default.yaml) and [train_default.yaml](https://github.com/Graph-Learning-Benchmarks/gli/blob/main/benchmarks/GraphClassification/configs/train_default.yaml)) will be used. 
17 | 
18 | Note that some models may have unique hyperparameters not included in the default configuration files. In this case, one should pass the model-specific coniguration files to `train.py`.
19 | 
20 | ## Supported models
21 | 
22 | The following list of models are supported by this benchmark. 
23 | 
24 | - `GCN`
25 | - `DGN`
26 | - `ChebNet`
27 | - `GIN`
28 | 
29 | To add a new model, one should add the model implementation under the `models` folder, and add model specific confgurations under the `configs` folder when needed. We have tried to implement `train.py` in a generic way so one may only need to make minimal modifications to `train.py` and `utils.py`.
30 | 
31 | Contributions of new models are welcome through pull requests. 
32 | 
33 | ## Supported datasets
34 | 
35 | This benchmark should work for most datasets with a `GraphClassification` task associated. The following datasets have been tested for this code. 
36 | 
37 | - `mnist`
38 | - `ogbg-molpcba`
39 | - `ogbg-molhiv`
40 | - `ogbg-molsider`
41 | - `ogbg-molbace`
42 | - `ogbg-molmuv`
43 | - `cifar`
44 | - `ogbg-molclintox`
45 | 


--------------------------------------------------------------------------------
/benchmarks/GraphClassification/configs/ChebNet.yaml:
--------------------------------------------------------------------------------
1 | num_hidden: 128
2 | num_layers: 4
3 | k: 3
4 | 


--------------------------------------------------------------------------------
/benchmarks/GraphClassification/configs/DGN.yaml:
--------------------------------------------------------------------------------
1 | num_layers: 4
2 | hidden_dim: 128
3 | # aggregators: ['dir1-av', 'dir1-dx', 'sum']
4 | aggregators: ['mean']
5 | scalers: ['identity']
6 | delta: 2.5
7 | dropout: 0
8 | 


--------------------------------------------------------------------------------
/benchmarks/GraphClassification/configs/GIN.yaml:
--------------------------------------------------------------------------------
1 | num_hidden: 16
2 | num_layers: 5
3 | dropout: .5


--------------------------------------------------------------------------------
/benchmarks/GraphClassification/configs/model_default.yaml:
--------------------------------------------------------------------------------
1 | num_hidden: 146
2 | num_layers: 4
3 | dropout: 0.0
4 | 


--------------------------------------------------------------------------------
/benchmarks/GraphClassification/configs/train_default.yaml:
--------------------------------------------------------------------------------
 1 | loss_fun: cross_entropy
 2 | self_loop: True
 3 | to_dense: False
 4 | lr: .0005
 5 | weight_decay: 0.0
 6 | max_epoch: 10000
 7 | early_stopping: True
 8 | seed: 0
 9 | batch_size: 1024
10 | 


--------------------------------------------------------------------------------
/benchmarks/GraphClassification/grid/grid_example.yaml:
--------------------------------------------------------------------------------
1 | num_hidden: [64, 128]
2 | lr: [0.0005, 0.001, 0.005, 0.01]
3 | dropout: [0.0, 0.2, 0.4, 0.6, 0.8]
4 | weight_decay: [0, .0001, .001, .01]
5 | 


--------------------------------------------------------------------------------
/benchmarks/GraphClassification/models/cheb_net.py:
--------------------------------------------------------------------------------
 1 | """
 2 | ChebNet model in GLI.
 3 | 
 4 | References:
 5 | https://github.com/dmlc/dgl/blob/195f99362d883f8b6d131b70a7868a
 6 | 537e55b786/examples/pytorch/model_zoo/citation_network/models.py
 7 | """
 8 | 
 9 | import dgl
10 | from torch import nn
11 | from dgl.nn.pytorch import ChebConv
12 | from models.mlp_readout_layer import MLPReadout
13 | 
14 | 
15 | class ChebNet(nn.Module):
16 |     """ChebNet network."""
17 | 
18 |     def __init__(self,
19 |                  in_feats,
20 |                  n_hidden,
21 |                  n_classes,
22 |                  n_layers,
23 |                  k):
24 |         """Initiate model."""
25 |         super().__init__()
26 |         self.layers = nn.ModuleList()
27 |         self.layers.append(
28 |             ChebConv(in_feats, n_hidden, k)
29 |         )
30 |         for _ in range(n_layers - 2):
31 |             self.layers.append(
32 |                 ChebConv(n_hidden, n_hidden, k)
33 |             )
34 | 
35 |         self.layers.append(
36 |             ChebConv(n_hidden, n_hidden, k)
37 |         )
38 |         self.mlp_layer = MLPReadout(n_hidden, n_classes)
39 | 
40 |     def forward(self, g, features):
41 |         """Forward."""
42 |         h = features
43 |         for layer in self.layers:
44 |             h = layer(g, h)
45 |         g.ndata["h"] = h
46 |         hg = dgl.mean_nodes(g, "h")
47 |         return self.mlp_layer(hg)
48 | 


--------------------------------------------------------------------------------
/benchmarks/GraphClassification/models/gcn.py:
--------------------------------------------------------------------------------
 1 | """
 2 | GCN model in GLI.
 3 | 
 4 | References:
 5 | https://github.com/dmlc/dgl/tree/master/examples/pytorch/gcn
 6 | https://docs.dgl.ai/tutorials/blitz/5_graph_classification.html#
 7 | sphx-glr-tutorials-blitz-5-graph-classification-py
 8 | """
 9 | 
10 | import dgl
11 | from torch import nn
12 | from dgl.nn.pytorch import GraphConv
13 | from models.mlp_readout_layer import MLPReadout
14 | 
15 | 
16 | class GCNgraph(nn.Module):
17 |     """GCN network."""
18 | 
19 |     def __init__(self,
20 |                  in_feats,
21 |                  n_hidden,
22 |                  n_classes,
23 |                  n_layers,
24 |                  activation,
25 |                  dropout):
26 |         """Initiate model."""
27 |         super().__init__()
28 |         self.layers = nn.ModuleList()
29 |         # embedded layer
30 |         self.embedding_h = nn.Linear(in_feats, n_hidden)
31 | 
32 |         # hidden layers
33 |         for _ in range(n_layers - 2):
34 |             self.layers.append(GraphConv(n_hidden, n_hidden,
35 |                                          activation=activation))
36 |         # output layer
37 |         self.layers.append(GraphConv(n_hidden, n_hidden))
38 |         self.dropout = nn.Dropout(p=dropout)
39 | 
40 |         # readout layer
41 |         self.mlp_layer = MLPReadout(n_hidden, n_classes)
42 | 
43 |     def forward(self, g, features):
44 |         """Forward."""
45 |         h = features
46 |         h = self.embedding_h(h)
47 |         for i, layer in enumerate(self.layers):
48 |             if i != 0:
49 |                 h = self.dropout(h)
50 |             h = layer(g, h)
51 |         g.ndata["h"] = h
52 |         hg = dgl.mean_nodes(g, "h")
53 |         return self.mlp_layer(hg)
54 | 


--------------------------------------------------------------------------------
/benchmarks/GraphClassification/models/mlp_readout_layer.py:
--------------------------------------------------------------------------------
 1 | 
 2 | """
 3 | MLP Layer used after graph vector representation.
 4 | 
 5 | References:
 6 | https://github.com/graphdeeplearning/benchmarking-gnns/blob/master/layers/mlp_readout_layer.py
 7 | """
 8 | 
 9 | from torch import nn
10 | import torch.nn.functional as F
11 | 
12 | 
13 | class MLPReadout(nn.Module):
14 |     """MLPReadout layer in GLI."""
15 | 
16 |     def __init__(self, input_dim, output_dim, L=2):
17 |         """Initiate layer, L=nb_hidden_layers."""
18 |         super().__init__()
19 |         list_fc_layers = [nn.Linear(input_dim // 2 ** layer, input_dim // 2 **
20 |                           (layer + 1), bias=True) for layer in range(L)]
21 |         list_fc_layers.append(nn.Linear(input_dim // 2 ** L, output_dim,
22 |                               bias=True))
23 |         self.fc_layers = nn.ModuleList(list_fc_layers)
24 |         self.n_layers = L
25 | 
26 |     def forward(self, x):
27 |         """Forward."""
28 |         y = x
29 |         for layer in range(self.n_layers):
30 |             y = self.fc_layers[layer](y)
31 |             y = F.relu(y)
32 |         y = self.fc_layers[self.n_layers](y)
33 |         return y
34 | 


--------------------------------------------------------------------------------
/benchmarks/NodeClassification/configs/APPNP.yaml:
--------------------------------------------------------------------------------
1 | in_drop: .5
2 | edge_drop: .5
3 | hidden_sizes: [64]
4 | k: 10
5 | alpha: .1
6 | 


--------------------------------------------------------------------------------
/benchmarks/NodeClassification/configs/GAT.yaml:
--------------------------------------------------------------------------------
1 | num_layers: 2
2 | num_hidden: 8
3 | num_heads: 8
4 | num_out_heads: 2
5 | residual: False
6 | dropout: .6
7 | negative_slope: .2
8 | 


--------------------------------------------------------------------------------
/benchmarks/NodeClassification/configs/GATv2.yaml:
--------------------------------------------------------------------------------
1 | num_layers: 1
2 | num_hidden: 8
3 | num_heads: 8
4 | num_out_heads: 2
5 | residual: False
6 | feat_drop: .7
7 | attn_drop: .7
8 | negative_slope: .2
9 | 


--------------------------------------------------------------------------------
/benchmarks/NodeClassification/configs/GCNII.yaml:
--------------------------------------------------------------------------------
1 | num_hidden: 64
2 | num_layers: 64
3 | dropout: .5
4 | lambda_: .5
5 | alpha: .2


--------------------------------------------------------------------------------
/benchmarks/NodeClassification/configs/GraphSAGE.yaml:
--------------------------------------------------------------------------------
1 | num_layers: 2
2 | num_hidden: 8
3 | dropout: .6
4 | aggregator_type: gcn
5 | 


--------------------------------------------------------------------------------
/benchmarks/NodeClassification/configs/LINKX.yaml:
--------------------------------------------------------------------------------
1 | num_hidden: 16
2 | num_layers: 1
3 | dropout: .5
4 | inner_activation: False
5 | inner_dropout: False
6 | init_layers_A: 1
7 | init_layers_X: 1
8 | 


--------------------------------------------------------------------------------
/benchmarks/NodeClassification/configs/LINKX_train.yaml:
--------------------------------------------------------------------------------
 1 | loss_fun: cross_entropy
 2 | self_loop: False
 3 | to_dense: False
 4 | lr: .01
 5 | weight_decay: 0.001
 6 | max_epoch: 10000
 7 | early_stopping: True
 8 | seed: 0
 9 | batch_size: 256
10 | to_undirected: False
11 | optimizer: "AdamW"
12 | 


--------------------------------------------------------------------------------
/benchmarks/NodeClassification/configs/MixHop.yaml:
--------------------------------------------------------------------------------
1 | num_hidden: 8
2 | p: [0, 1, 2]
3 | num_layers: 2
4 | dropout: .5
5 | layer_dropout: 0.9
6 | batchnorm: False
7 | 


--------------------------------------------------------------------------------
/benchmarks/NodeClassification/configs/MoNet.yaml:
--------------------------------------------------------------------------------
1 | num_layers: 2
2 | num_hidden: 8
3 | dropout: .6
4 | pseudo_dim: 2
5 | num_kernels: 3
6 | 


--------------------------------------------------------------------------------
/benchmarks/NodeClassification/configs/SGC.yaml:
--------------------------------------------------------------------------------
1 | k: 2


--------------------------------------------------------------------------------
/benchmarks/NodeClassification/configs/TAGCN.yaml:
--------------------------------------------------------------------------------
1 | num_layers: 2
2 | num_hidden: 16
3 | k: 2
4 | dropout: .5
5 | 


--------------------------------------------------------------------------------
/benchmarks/NodeClassification/configs/catboost.yaml:
--------------------------------------------------------------------------------
 1 | hp:
 2 |   lr:
 3 |   - 0.01
 4 |   - 0.1
 5 |   depth:
 6 |   - 4
 7 |   - 6
 8 |   l2_leaf_reg:
 9 |   - null
10 | num_epochs: 1000
11 | patience: 100
12 | verbose: false
13 | 


--------------------------------------------------------------------------------
/benchmarks/NodeClassification/configs/lightgbm.yaml:
--------------------------------------------------------------------------------
 1 | hp:
 2 |     lr:
 3 |     - 0.01
 4 |     - 0.1
 5 |     num_leaves:
 6 |     - 15
 7 |     - 63
 8 |     lambda_l2:
 9 |     - 0.0
10 |     boosting:
11 |     - gbdt
12 | num_epochs: 1000
13 | patience: 100
14 |   


--------------------------------------------------------------------------------
/benchmarks/NodeClassification/configs/model_default.yaml:
--------------------------------------------------------------------------------
1 | num_layers: 2
2 | num_hidden: 8
3 | dropout: .6
4 | 


--------------------------------------------------------------------------------
/benchmarks/NodeClassification/configs/train_default.yaml:
--------------------------------------------------------------------------------
 1 | loss_fun: cross_entropy
 2 | self_loop: True
 3 | to_dense: False
 4 | lr: .01
 5 | weight_decay: 0.001
 6 | max_epoch: 10000
 7 | early_stopping: True
 8 | seed: 0
 9 | batch_size: 256
10 | to_undirected: False
11 | optimizer: "Adam"
12 | 


--------------------------------------------------------------------------------
/benchmarks/NodeClassification/grid/grid_example.yaml:
--------------------------------------------------------------------------------
1 | num_hidden: [32 ,64]
2 | lr: [0.001, 0.005, 0.01, .1]
3 | dropout: [0.2, 0.4, 0.6, 0.8]
4 | weight_decay: [.0001, .001, .01, .1]
5 | 


--------------------------------------------------------------------------------
/benchmarks/NodeClassification/models/appnp.py:
--------------------------------------------------------------------------------
 1 | """
 2 | APPNP model in GLI.
 3 | 
 4 | References:
 5 | https://github.com/dmlc/dgl/tree/master/examples/pytorch/appnp
 6 | """
 7 | 
 8 | from torch import nn
 9 | from dgl.nn.pytorch.conv import APPNPConv
10 | 
11 | 
12 | class APPNP(nn.Module):
13 |     """APPNP network."""
14 | 
15 |     def __init__(self,
16 |                  g,
17 |                  in_feats,
18 |                  hiddens,
19 |                  n_classes,
20 |                  activation,
21 |                  feat_drop,
22 |                  edge_drop,
23 |                  alpha,
24 |                  k):
25 |         """Initiate model."""
26 |         super().__init__()
27 |         self.g = g
28 |         self.layers = nn.ModuleList()
29 |         # input layer
30 |         self.layers.append(nn.Linear(in_feats, hiddens[0]))
31 |         # hidden layers
32 |         for i in range(1, len(hiddens)):
33 |             self.layers.append(nn.Linear(hiddens[i - 1], hiddens[i]))
34 |         # output layer
35 |         self.layers.append(nn.Linear(hiddens[-1], n_classes))
36 |         self.activation = activation
37 |         if feat_drop:
38 |             self.feat_drop = nn.Dropout(feat_drop)
39 |         else:
40 |             self.feat_drop = lambda x: x
41 |         self.propagate = APPNPConv(k, alpha, edge_drop)
42 |         self.reset_parameters()
43 | 
44 |     def reset_parameters(self):
45 |         """Reset parameters."""
46 |         for layer in self.layers:
47 |             layer.reset_parameters()
48 | 
49 |     def forward(self, features):
50 |         """Forward."""
51 |         # prediction step
52 |         h = features
53 |         h = self.feat_drop(h)
54 |         h = self.activation(self.layers[0](h))
55 |         for layer in self.layers[1:-1]:
56 |             h = self.activation(layer(h))
57 |         h = self.layers[-1](self.feat_drop(h))
58 |         # propagation step
59 |         h = self.propagate(self.g, h)
60 |         return h
61 | 


--------------------------------------------------------------------------------
/benchmarks/NodeClassification/models/gat.py:
--------------------------------------------------------------------------------
 1 | """
 2 | GAT model in GLI.
 3 | 
 4 | References:
 5 | https://github.com/dmlc/dgl/tree/master/examples/pytorch/gat
 6 | """
 7 | 
 8 | from torch import nn
 9 | from dgl.nn import GATConv
10 | 
11 | 
12 | class GAT(nn.Module):
13 |     """GAT network."""
14 | 
15 |     def __init__(self,
16 |                  g,
17 |                  num_layers,
18 |                  in_dim,
19 |                  num_hidden,
20 |                  num_classes,
21 |                  heads,
22 |                  activation,
23 |                  feat_drop,
24 |                  attn_drop,
25 |                  negative_slope,
26 |                  residual):
27 |         """Initiate model."""
28 |         super().__init__()
29 |         self.g = g
30 |         self.num_layers = num_layers
31 |         self.gat_layers = nn.ModuleList()
32 |         self.activation = activation
33 | 
34 |         # input projection (no residual)
35 |         self.gat_layers.append(GATConv(
36 |             in_dim, num_hidden, heads[0],
37 |             feat_drop, attn_drop, negative_slope, False, self.activation))
38 |         # hidden layers
39 |         for layer in range(1, num_layers - 2):
40 |             # due to multi-head, the in_dim = num_hidden * num_heads
41 |             self.gat_layers.append(GATConv(num_hidden * heads[layer-1],
42 |                                            num_hidden, heads[layer],
43 |                                            feat_drop, attn_drop,
44 |                                            negative_slope, residual,
45 |                                            self.activation))
46 |         # output projection
47 |         self.gat_layers.append(GATConv(
48 |             num_hidden * heads[-2], num_classes, heads[-1],
49 |             feat_drop, attn_drop, negative_slope, residual, None))
50 | 
51 |     def forward(self, inputs):
52 |         """Forward."""
53 |         h = inputs
54 |         for layer in range(self.num_layers):
55 |             h = self.gat_layers[layer](self.g, h)
56 |             h = h.flatten(1) if layer != self.num_layers - 1 else h.mean(1)
57 |         return h
58 | 


--------------------------------------------------------------------------------
/benchmarks/NodeClassification/models/gcn.py:
--------------------------------------------------------------------------------
 1 | """
 2 | GCN model in GLI.
 3 | 
 4 | References:
 5 | https://github.com/dmlc/dgl/tree/master/examples/pytorch/gcn
 6 | """
 7 | 
 8 | from torch import nn
 9 | from dgl.nn.pytorch import GraphConv
10 | 
11 | 
12 | class GCN(nn.Module):
13 |     """GCN network."""
14 | 
15 |     def __init__(self,
16 |                  g,
17 |                  in_feats,
18 |                  n_hidden,
19 |                  n_classes,
20 |                  n_layers,
21 |                  activation,
22 |                  dropout):
23 |         """Initiate model."""
24 |         super().__init__()
25 |         self.g = g
26 |         self.layers = nn.ModuleList()
27 |         # input layer
28 |         self.layers.append(GraphConv(in_feats, n_hidden,
29 |                                      activation=activation))
30 |         # hidden layers
31 |         for _ in range(n_layers - 2):
32 |             self.layers.append(GraphConv(n_hidden, n_hidden,
33 |                                          activation=activation))
34 |         # output layer
35 |         self.layers.append(GraphConv(n_hidden, n_classes))
36 |         self.dropout = nn.Dropout(p=dropout)
37 | 
38 |     def forward(self, features):
39 |         """Forward."""
40 |         h = features
41 |         for i, layer in enumerate(self.layers):
42 |             if i != 0:
43 |                 h = self.dropout(h)
44 |             h = layer(self.g, h)
45 |         return h
46 | 


--------------------------------------------------------------------------------
/benchmarks/NodeClassification/models/gcn_minibatch.py:
--------------------------------------------------------------------------------
 1 | """
 2 | GCN model in GLI.
 3 | 
 4 | References:
 5 | https://github.com/dmlc/dgl/tree/master/examples/pytorch/gcn
 6 | https://docs.dgl.ai/guide/minibatch-node.html?highlight=sampling
 7 | """
 8 | 
 9 | from torch import nn
10 | from dgl.nn.pytorch import GraphConv
11 | 
12 | 
13 | class GCNminibatch(nn.Module):
14 |     """GCN network."""
15 | 
16 |     def __init__(self,
17 |                  in_feats,
18 |                  n_hidden,
19 |                  n_classes,
20 |                  n_layers,
21 |                  activation,
22 |                  dropout):
23 |         """Initiate model."""
24 |         super().__init__()
25 |         self.layers = nn.ModuleList()
26 |         # input layer
27 |         self.layers.append(GraphConv(in_feats, n_hidden,
28 |                                      activation=activation,
29 |                                      norm='none'))
30 |         # hidden layers
31 |         for _ in range(n_layers - 2):
32 |             self.layers.append(GraphConv(n_hidden, n_hidden,
33 |                                          activation=activation,
34 |                                          norm='none'))
35 |         # output layer
36 |         self.layers.append(GraphConv(n_hidden, n_classes,
37 |                                      norm='none'))
38 |         self.dropout = nn.Dropout(p=dropout)
39 | 
40 |     def forward(self, blocks, features):
41 |         """Forward."""
42 |         h = features
43 |         for i, layer in enumerate(self.layers):
44 |             if i != 0:
45 |                 h = self.dropout(h)
46 |             h = layer(blocks[i], h)
47 |         return h
48 | 


--------------------------------------------------------------------------------
/benchmarks/NodeClassification/models/graph_sage.py:
--------------------------------------------------------------------------------
 1 | """
 2 | GraphSAGE model in GLI.
 3 | 
 4 | References:
 5 | https://github.com/dmlc/dgl/blob/master/examples/pytorch/graphsage/train_full.py
 6 | """
 7 | 
 8 | from torch import nn
 9 | from dgl.nn.pytorch.conv import SAGEConv
10 | 
11 | 
12 | class GraphSAGE(nn.Module):
13 |     """GraphSAGE model."""
14 | 
15 |     def __init__(self,
16 |                  g,
17 |                  in_feats,
18 |                  n_hidden,
19 |                  n_classes,
20 |                  n_layers,
21 |                  activation,
22 |                  dropout,
23 |                  aggregator_type):
24 |         """Initiate model."""
25 |         super().__init__()
26 |         self.g = g
27 |         self.layers = nn.ModuleList()
28 |         self.dropout = nn.Dropout(dropout)
29 |         self.activation = activation
30 | 
31 |         # input layer
32 |         self.layers.append(SAGEConv(in_feats, n_hidden, aggregator_type))
33 |         # hidden layers
34 |         for _ in range(n_layers - 2):
35 |             self.layers.append(SAGEConv(n_hidden, n_hidden, aggregator_type))
36 |         # output layer
37 |         self.layers.append(SAGEConv(n_hidden, n_classes, aggregator_type))
38 | 
39 |     def forward(self, inputs):
40 |         """Forward."""
41 |         h = self.dropout(inputs)
42 |         for length, layer in enumerate(self.layers):
43 |             h = layer(self.g, h)
44 |             if length != len(self.layers) - 1:
45 |                 h = self.activation(h)
46 |                 h = self.dropout(h)
47 |         return h
48 | 


--------------------------------------------------------------------------------
/benchmarks/NodeClassification/models/graph_sage_minibatch.py:
--------------------------------------------------------------------------------
 1 | """
 2 | GraphSAGE model in GLI.
 3 | 
 4 | References:
 5 | https://github.com/dmlc/dgl/blob/master/examples/pytorch/graphsage/train_full.py
 6 | https://docs.dgl.ai/guide/minibatch-node.html?highlight=sampling
 7 | """
 8 | 
 9 | from torch import nn
10 | from dgl.nn.pytorch.conv import SAGEConv
11 | 
12 | 
13 | class GraphSAGEminibatch(nn.Module):
14 |     """GraphSAGE model."""
15 | 
16 |     def __init__(self,
17 |                  in_feats,
18 |                  n_hidden,
19 |                  n_classes,
20 |                  n_layers,
21 |                  activation,
22 |                  dropout,
23 |                  aggregator_type):
24 |         """Initiate model."""
25 |         super().__init__()
26 |         self.layers = nn.ModuleList()
27 |         self.dropout = nn.Dropout(dropout)
28 |         self.activation = activation
29 | 
30 |         # input layer
31 |         self.layers.append(SAGEConv(in_feats, n_hidden,
32 |                            aggregator_type))
33 |         # hidden layers
34 |         for _ in range(n_layers - 2):
35 |             self.layers.append(SAGEConv(n_hidden, n_hidden,
36 |                                aggregator_type))
37 |         # output layer
38 |         self.layers.append(SAGEConv(n_hidden, n_classes,
39 |                            aggregator_type))
40 | 
41 |     def forward(self, blocks, inputs):
42 |         """Forward."""
43 |         h = self.dropout(inputs)
44 |         for length, layer in enumerate(self.layers):
45 |             h = layer(blocks[length], h)
46 |             if length != len(self.layers) - 1:
47 |                 h = self.activation(h)
48 |                 h = self.dropout(h)
49 |         return h
50 | 


--------------------------------------------------------------------------------
/benchmarks/NodeClassification/models/mlp.py:
--------------------------------------------------------------------------------
 1 | """
 2 | MLP model in GLI.
 3 | 
 4 | References:
 5 | https://github.com/dmlc/dgl/blob/195f99362d883f8b6d131b70a7868a537e55b786/examples/pytorch/grand/model.py
 6 | """
 7 | 
 8 | from torch import nn
 9 | 
10 | 
11 | class MLP(nn.Module):
12 |     """MLP network."""
13 | 
14 |     def __init__(self,
15 |                  in_feats,
16 |                  n_hidden,
17 |                  n_classes,
18 |                  n_layers,
19 |                  activation,
20 |                  dropout):
21 |         """Initiate model."""
22 |         super().__init__()
23 |         self.layers = nn.ModuleList()
24 |         self.activation = activation
25 |         # input layer
26 |         self.layers.append(nn.Linear(in_feats, n_hidden, bias=True))
27 | 
28 |         # hidden layers
29 |         for _ in range(n_layers - 1):
30 |             self.layers.append(nn.Linear(n_hidden, n_hidden, bias=True))
31 | 
32 |         # output layer
33 |         self.layers.append(nn.Linear(n_hidden, n_classes, bias=True))
34 | 
35 |         self.dropout = nn.Dropout(dropout)
36 | 
37 |     def forward(self, features):
38 |         """Forward."""
39 |         h = features
40 |         for i, layer in enumerate(self.layers):
41 |             if i != 0:
42 |                 h = self.dropout(h)
43 |             h = layer(h)
44 |             h = self.activation(h)
45 |         return h
46 | 


--------------------------------------------------------------------------------
/benchmarks/NodeClassification/models/monet.py:
--------------------------------------------------------------------------------
 1 | """
 2 | GAT model in GLI.
 3 | 
 4 | References:
 5 | https://github.com/dmlc/dgl/blob/master/examples/pytorch/monet/citation.py
 6 | """
 7 | 
 8 | import torch
 9 | from torch import nn
10 | from dgl.nn.pytorch.conv import GMMConv
11 | 
12 | 
13 | class MoNet(nn.Module):
14 |     """Monet model."""
15 | 
16 |     def __init__(self,
17 |                  g,
18 |                  in_feats,
19 |                  n_hidden,
20 |                  out_feats,
21 |                  n_layers,
22 |                  dim,
23 |                  n_kernels,
24 |                  dropout):
25 |         """Initiate model."""
26 |         super().__init__()
27 |         self.g = g
28 |         self.layers = nn.ModuleList()
29 |         self.pseudo_proj = nn.ModuleList()
30 | 
31 |         # process pseudo
32 |         us, vs = g.edges(order="eid")
33 |         udeg, vdeg = 1 / torch.sqrt(g.in_degrees(us).float()), 1 / \
34 |             torch.sqrt(g.in_degrees(vs).float())
35 |         self.pseudo = torch.cat([udeg.unsqueeze(1), vdeg.unsqueeze(1)], dim=1)
36 | 
37 |         # Input layer
38 |         self.layers.append(
39 |             GMMConv(in_feats, n_hidden, dim, n_kernels))
40 |         self.pseudo_proj.append(
41 |             nn.Sequential(nn.Linear(2, dim), nn.Tanh()))
42 | 
43 |         # Hidden layer
44 |         for _ in range(n_layers - 2):
45 |             self.layers.append(GMMConv(n_hidden, n_hidden, dim, n_kernels))
46 |             self.pseudo_proj.append(
47 |                 nn.Sequential(nn.Linear(2, dim), nn.Tanh()))
48 | 
49 |         # Output layer
50 |         self.layers.append(GMMConv(n_hidden, out_feats, dim, n_kernels))
51 |         self.pseudo_proj.append(
52 |             nn.Sequential(nn.Linear(2, dim), nn.Tanh()))
53 |         self.dropout = nn.Dropout(dropout)
54 | 
55 |     def forward(self, feat):
56 |         """Forward."""
57 |         h = feat
58 |         for i in range(len(self.layers)):
59 |             if i != 0:
60 |                 h = self.dropout(h)
61 |             h = self.layers[i](
62 |                 self.g, h, self.pseudo_proj[i](self.pseudo))
63 |         return h
64 | 


--------------------------------------------------------------------------------
/benchmarks/NodeClassification/models/sgc.py:
--------------------------------------------------------------------------------
 1 | """
 2 | SGConv model in GLI.
 3 | 
 4 | References:
 5 | https://docs.dgl.ai/generated/dgl.nn.pytorch.conv.SGConv.html
 6 | https://github.com/dmlc/dgl/blob/master/examples/pytorch/sgc/sgc.py
 7 | """
 8 | 
 9 | from torch import nn
10 | from dgl.nn.pytorch import SGConv
11 | 
12 | 
13 | class SGC(nn.Module):
14 |     """SGC network."""
15 | 
16 |     def __init__(self,
17 |                  g,
18 |                  in_feats,
19 |                  n_classes,
20 |                  k) -> None:
21 |         """Initiate model."""
22 |         super().__init__()
23 |         self.g = g
24 |         self.layer = SGConv(in_feats, n_classes, k)
25 | 
26 |     def forward(self, features):
27 |         """Forward."""
28 |         h = features
29 |         h = self.layer(self.g, h)
30 |         return h
31 | 


--------------------------------------------------------------------------------
/benchmarks/NodeClassification/models/tagcn.py:
--------------------------------------------------------------------------------
 1 | """
 2 | TAGCN model in GLI.
 3 | 
 4 | References:
 5 | https://docs.dgl.ai/generated/dgl.nn.pytorch.conv.TAGConv.html
 6 | """
 7 | 
 8 | from dgl.nn.pytorch.conv import TAGConv
 9 | from torch import nn
10 | 
11 | 
12 | class TAGCN(nn.Module):
13 |     """TAGCN network."""
14 | 
15 |     def __init__(
16 |         self,
17 |         g,
18 |         in_feats,
19 |         n_hidden,
20 |         n_classes,
21 |         n_layers,
22 |         k,
23 |         activation,
24 |         dropout
25 |     ):
26 |         """Initiate model."""
27 |         super().__init__()
28 |         self.g = g
29 |         self.layers = nn.ModuleList()
30 |         # input layer
31 |         self.layers.append(TAGConv(in_feats,
32 |                                    n_hidden,
33 |                                    k=k,
34 |                                    activation=activation))
35 |         # hidden layers
36 |         for _ in range(n_layers - 1):
37 |             self.layers.append(
38 |                 TAGConv(n_hidden, n_hidden, activation=activation)
39 |             )
40 |         # output layer
41 |         self.layers.append(TAGConv(n_hidden, n_classes))  # activation=None
42 |         self.dropout = nn.Dropout(p=dropout)
43 | 
44 |     def forward(self, features):
45 |         """Forward."""
46 |         h = features
47 |         for i, layer in enumerate(self.layers):
48 |             if i != 0:
49 |                 h = self.dropout(h)
50 |             h = layer(self.g, h)
51 |         return h
52 | 


--------------------------------------------------------------------------------
/datasets/FB13/LICENSE:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Graph-Learning-Benchmarks/gli/8f2065396d59e6b4aaa371e997c3a43f91448429/datasets/FB13/LICENSE


--------------------------------------------------------------------------------
/datasets/FB13/task_kg_entity_prediction_1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "The task is to predict a valid entity given (head, relation) or (relation, tail) that form a fact triple.",
 3 |     "type": "KGEntityPrediction",
 4 |     "feature": null,
 5 |     "train_triplet_set": {
 6 |         "file": "FB13.npz",
 7 |         "key": "TrainEdge_id"
 8 |     },
 9 |     "val_triplet_set": {
10 |         "file": "FB13.npz",
11 |         "key": "ValidEdge_id"
12 |     },
13 |     "test_triplet_set": {
14 |         "file": "FB13.npz",
15 |         "key": "TestEdge_id"
16 |     },
17 |     "num_relations": 13
18 | }
19 | 


--------------------------------------------------------------------------------
/datasets/FB13/task_kg_relation_prediction_1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "The task is to predict a valid relation given (head, tail) that form a fact triple.",
 3 |     "type": "KGRelationPrediction",
 4 |     "feature": null,
 5 |     "train_triplet_set": {
 6 |         "file": "FB13.npz",
 7 |         "key": "TrainEdge_id"
 8 |     },
 9 |     "val_triplet_set": {
10 |         "file": "FB13.npz",
11 |         "key": "ValidEdge_id"
12 |     },
13 |     "test_triplet_set": {
14 |         "file": "FB13.npz",
15 |         "key": "TestEdge_id"
16 |     },
17 |     "num_relations": 13,
18 |     "target": "Edge/EdgeClass"
19 | }
20 | 


--------------------------------------------------------------------------------
/datasets/FB15K/LICENSE:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Graph-Learning-Benchmarks/gli/8f2065396d59e6b4aaa371e997c3a43f91448429/datasets/FB15K/LICENSE


--------------------------------------------------------------------------------
/datasets/FB15K/task_kg_entity_prediction_1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "The task is to predict a valid entity given (head, relation) or (relation, tail) that form a fact triple.",
 3 |     "type": "KGEntityPrediction",
 4 |     "feature": null,
 5 |     "train_triplet_set": {
 6 |         "file": "FB15K.npz",
 7 |         "key": "TrainEdge_id"
 8 |     },
 9 |     "val_triplet_set": {
10 |         "file": "FB15K.npz",
11 |         "key": "ValidEdge_id"
12 |     },
13 |     "test_triplet_set": {
14 |         "file": "FB15K.npz",
15 |         "key": "TestEdge_id"
16 |     },
17 |     "num_relations": 1345
18 | }
19 | 


--------------------------------------------------------------------------------
/datasets/FB15K/task_kg_relation_prediction_1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "The task is to predict a valid relation given (head, tail) that form a fact triple.",
 3 |     "type": "KGRelationPrediction",
 4 |     "feature": null,
 5 |     "train_triplet_set": {
 6 |         "file": "FB15K.npz",
 7 |         "key": "TrainEdge_id"
 8 |     },
 9 |     "val_triplet_set": {
10 |         "file": "FB15K.npz",
11 |         "key": "ValidEdge_id"
12 |     },
13 |     "test_triplet_set": {
14 |         "file": "FB15K.npz",
15 |         "key": "TestEdge_id"
16 |     },
17 |     "num_relations": 1345,
18 |     "target": "Edge/EdgeClass"
19 | }
20 | 


--------------------------------------------------------------------------------
/datasets/FB15K237/LICENSE:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Graph-Learning-Benchmarks/gli/8f2065396d59e6b4aaa371e997c3a43f91448429/datasets/FB15K237/LICENSE


--------------------------------------------------------------------------------
/datasets/FB15K237/task_kg_entity_prediction_1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "The task is to predict a valid entity given (head, relation) or (relation, tail) that form a fact triple.",
 3 |     "type": "KGEntityPrediction",
 4 |     "feature": null,
 5 |     "train_triplet_set": {
 6 |         "file": "FB15K237.npz",
 7 |         "key": "TrainEdge_id"
 8 |     },
 9 |     "val_triplet_set": {
10 |         "file": "FB15K237.npz",
11 |         "key": "ValidEdge_id"
12 |     },
13 |     "test_triplet_set": {
14 |         "file": "FB15K237.npz",
15 |         "key": "TestEdge_id"
16 |     },
17 |     "num_relations": 237
18 | }
19 | 


--------------------------------------------------------------------------------
/datasets/FB15K237/task_kg_relation_prediction_1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "The task is to predict a valid relation given (head, tail) that form a fact triple.",
 3 |     "type": "KGRelationPrediction",
 4 |     "feature": null,
 5 |     "train_triplet_set": {
 6 |         "file": "FB15K237.npz",
 7 |         "key": "TrainEdge_id"
 8 |     },
 9 |     "val_triplet_set": {
10 |         "file": "FB15K237.npz",
11 |         "key": "ValidEdge_id"
12 |     },
13 |     "test_triplet_set": {
14 |         "file": "FB15K237.npz",
15 |         "key": "TestEdge_id"
16 |     },
17 |     "num_relations": 237,
18 |     "target": "Edge/EdgeClass"
19 | }
20 | 


--------------------------------------------------------------------------------
/datasets/KGMicrobe/LICENSE:
--------------------------------------------------------------------------------
 1 | 
 2 | Copyright (c) 2020, Lawrence Berkeley National Laboratory
 3 | All rights reserved.
 4 | 
 5 | Redistribution and use in source and binary forms, with or without
 6 | modification, are permitted provided that the following conditions are met:
 7 | 
 8 | * Redistributions of source code must retain the above copyright notice, this
 9 |   list of conditions and the following disclaimer.
10 | 
11 | * Redistributions in binary form must reproduce the above copyright notice,
12 |   this list of conditions and the following disclaimer in the documentation
13 |   and/or other materials provided with the distribution.
14 | 
15 | * Neither the name of the copyright holder nor the names of its
16 |   contributors may be used to endorse or promote products derived from
17 |   this software without specific prior written permission.
18 | 
19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


--------------------------------------------------------------------------------
/datasets/KGMicrobe/README.md:
--------------------------------------------------------------------------------
 1 | # KGMicrobe
 2 | 
 3 | ## Dataset Description
 4 | 
 5 | KG-Microbe is a microbe-centric Knowledge Graph (KG) to support tasks such as querying and graph link prediction in many use cases
 6 | including microbiology, biomedicine, and the environment.
 7 | 
 8 | Statistics:
 9 | - Nodes: 2715139
10 | - Edges: 5937358
11 | 
12 | #### Citation
13 | - Original Source
14 | 	- [Website](https://github.com/Knowledge-Graph-Hub/kg-microbe)
15 | ```
16 | @inproceedings{reese2021kg,
17 |   title={KG-COVID-19: a framework to produce customized knowledge graphs for COVID-19 response},
18 |   author={Reese, Justin T and Unni, Deepak and Callahan, Tiffany J and Cappelletti, Luca and Ravanmehr, Vida and Carbon, Seth and Shefchek, Kent A and Good, Benjamin M and Balhoff, James P and Fontana, Tommaso and others},
19 |   journal={Patterns},
20 |   volume={2},
21 |   number={1},
22 |   pages={100155},
23 |   year={2021},
24 |   publisher={Elsevier}
25 | }
26 | ```
27 | 
28 | ## Preprocessing
29 | 
30 | The data files in GLI format are transformed from the [Grape](https://github.com/AnacletoLAB/grape/blob/main/tutorials/Ensmallen_Automatic_graph_retrieval_utilities.ipynb) implementation.
31 | 
32 | ### Requirements
33 | 
34 | The preprocessing code requires the following packages.
35 | 
36 | ```
37 | scipy==1.7.1
38 | ```
39 | 


--------------------------------------------------------------------------------
/datasets/KGMicrobe/metadata.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "KGMicrobe dataset.",
 3 |     "data": {
 4 |         "Node": {
 5 |             "NodeName": {
 6 |                 "description": "Node (entity) names in KGMicrobe dataset, strings.",
 7 |                 "type": "string",
 8 |                 "format": "Tensor",
 9 |                 "file": "KGMicrobe.npz",
10 |                 "key": "node_name"
11 |             },
12 |             "NodeLabel": {
13 |                 "description": "Node labels of KGMicrobe dataset, int ranged from 0 to 10.",
14 |                 "type": "int",
15 |                 "format": "Tensor",
16 |                 "file": "KGMicrobe.npz",
17 |                 "key": "node_class"
18 |             }
19 |         },
20 |         "Edge": {
21 |             "_Edge": {
22 |                 "file": "KGMicrobe.npz",
23 |                 "key": "edge"
24 |             },
25 |             "EdgeClass": {
26 |                 "description": "Relation type-id of the Edge in the KGMicrobe dataset, int ranged from 0 to 8.",
27 |                 "type": "int",
28 |                 "format": "Tensor",
29 |                 "file": "KGMicrobe.npz",
30 |                 "key": "edge_class"
31 |             }
32 |         },
33 |         "Graph": {
34 |             "_NodeList": {
35 |                 "file": "KGMicrobe.npz",
36 |                 "key": "node_list"
37 |             },
38 |             "_EdgeList": {
39 |                 "file": "KGMicrobe.npz",
40 |                 "key": "edge_list"
41 |             }
42 |         }
43 |     },
44 |     "citation": "@inproceedings{reese2021kg,\ntitle={KG-COVID-19: a framework to produce customized knowledge graphs for COVID-19 response},\nauthor={Reese, Justin T and Unni, Deepak and Callahan, Tiffany J and Cappelletti, Luca and Ravanmehr, Vida and Carbon, Seth and Shefchek, Kent A and Good, Benjamin M and Balhoff, James P and Fontana, Tommaso and others},\njournal={Patterns},\nvolume={2},\nnumber={1},\npages={100155},\nyear={2021},\npublisher={Elsevier}\n}",
45 |     "is_heterogeneous": false
46 | }
47 | 


--------------------------------------------------------------------------------
/datasets/NELL-995/LICENSE:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Graph-Learning-Benchmarks/gli/8f2065396d59e6b4aaa371e997c3a43f91448429/datasets/NELL-995/LICENSE


--------------------------------------------------------------------------------
/datasets/NELL-995/metadata.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "NELL-995 dataset.",
 3 |     "data": {
 4 |         "Node": {
 5 |             "NodeName": {
 6 |                 "description": "Node (entity) names in NELL-995 dataset, strings.",
 7 |                 "type": "string",
 8 |                 "format": "Tensor",
 9 |                 "file": "NELL-995.npz",
10 |                 "key": "node_name"
11 |             }
12 |         },
13 |         "Edge": {
14 |             "_Edge": {
15 |                 "file": "NELL-995.npz",
16 |                 "key": "edge"
17 |             },
18 |             "EdgeClass": {
19 |                 "description": "Relation type-id of the Edge in the NELL-995 dataset.",
20 |                 "type": "int",
21 |                 "format": "Tensor",
22 |                 "file": "NELL-995.npz",
23 |                 "key": "edge_class"
24 |             },
25 |             "EdgeName": {
26 |                 "description": "Relation name of the Edge in the NELL-995 dataset.",
27 |                 "type": "string",
28 |                 "format": "Tensor",
29 |                 "file": "NELL-995.npz",
30 |                 "key": "edge_name"
31 |             }
32 |         },
33 |         "Graph": {
34 |             "_NodeList": {
35 |                 "file": "NELL-995.npz",
36 |                 "key": "node_list"
37 |             },
38 |             "_EdgeList": {
39 |                 "file": "NELL-995.npz",
40 |                 "key": "edge_list"
41 |             }
42 |         }
43 |     },
44 |     "citation": "@article{xiong2017deeppath,\ntitle={Deeppath: A reinforcement learning method for knowledge graph reasoning},\nauthor={Xiong, Wenhan and Hoang, Thien and Wang, William Yang},\njournal={arXiv preprint arXiv:1707.06690},\nyear={2017}\n}",
45 |     "is_heterogeneous": false
46 | }
47 | 


--------------------------------------------------------------------------------
/datasets/NELL-995/task_kg_entity_prediction_1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "The task is to predict a valid entity given (head, relation) or (relation, tail) that form a fact triple.",
 3 |     "type": "KGEntityPrediction",
 4 |     "feature": null,
 5 |     "train_triplet_set": {
 6 |         "file": "NELL-995.npz",
 7 |         "key": "TrainEdge_id"
 8 |     },
 9 |     "val_triplet_set": {
10 |         "file": "NELL-995.npz",
11 |         "key": "ValidEdge_id"
12 |     },
13 |     "test_triplet_set": {
14 |         "file": "NELL-995.npz",
15 |         "key": "TestEdge_id"
16 |     },
17 |     "num_relations": 200
18 | }
19 | 


--------------------------------------------------------------------------------
/datasets/NELL-995/task_kg_relation_prediction_1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "The task is to predict a valid relation given (head, tail) that form a fact triple.",
 3 |     "type": "KGRelationPrediction",
 4 |     "feature": null,
 5 |     "train_triplet_set": {
 6 |         "file": "NELL-995.npz",
 7 |         "key": "TrainEdge_id"
 8 |     },
 9 |     "val_triplet_set": {
10 |         "file": "NELL-995.npz",
11 |         "key": "ValidEdge_id"
12 |     },
13 |     "test_triplet_set": {
14 |         "file": "NELL-995.npz",
15 |         "key": "TestEdge_id"
16 |     },
17 |     "num_relations": 200,
18 |     "target": "Edge/EdgeClass"
19 | }
20 | 


--------------------------------------------------------------------------------
/datasets/WN11/LICENSE:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Graph-Learning-Benchmarks/gli/8f2065396d59e6b4aaa371e997c3a43f91448429/datasets/WN11/LICENSE


--------------------------------------------------------------------------------
/datasets/WN11/task_kg_entity_prediction_1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "The task is to predict a valid entity given (head, relation) or (relation, tail) that form a fact triple.",
 3 |     "type": "KGEntityPrediction",
 4 |     "feature": null,
 5 |     "train_triplet_set": {
 6 |         "file": "WN11.npz",
 7 |         "key": "TrainEdge_id"
 8 |     },
 9 |     "val_triplet_set": {
10 |         "file": "WN11.npz",
11 |         "key": "ValidEdge_id"
12 |     },
13 |     "test_triplet_set": {
14 |         "file": "WN11.npz",
15 |         "key": "TestEdge_id"
16 |     },
17 |     "num_relations": 11
18 | }
19 | 


--------------------------------------------------------------------------------
/datasets/WN11/task_kg_relation_prediction_1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "The task is to predict a valid relation given (head, tail) that form a fact triple.",
 3 |     "type": "KGRelationPrediction",
 4 |     "feature": null,
 5 |     "train_triplet_set": {
 6 |         "file": "WN11.npz",
 7 |         "key": "TrainEdge_id"
 8 |     },
 9 |     "val_triplet_set": {
10 |         "file": "WN11.npz",
11 |         "key": "ValidEdge_id"
12 |     },
13 |     "test_triplet_set": {
14 |         "file": "WN11.npz",
15 |         "key": "TestEdge_id"
16 |     },
17 |     "num_relations": 11,
18 |     "target": "Edge/EdgeClass"
19 | }
20 | 


--------------------------------------------------------------------------------
/datasets/WN18/LICENSE:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Graph-Learning-Benchmarks/gli/8f2065396d59e6b4aaa371e997c3a43f91448429/datasets/WN18/LICENSE


--------------------------------------------------------------------------------
/datasets/WN18/metadata.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "WN18 dataset.",
 3 |     "data": {
 4 |         "Node": {
 5 |             "NodeName": {
 6 |                 "description": "Node (entity) names in WN18 dataset, strings.",
 7 |                 "type": "string",
 8 |                 "format": "Tensor",
 9 |                 "file": "WN18.npz",
10 |                 "key": "node_name"
11 |             }
12 |         },
13 |         "Edge": {
14 |             "_Edge": {
15 |                 "file": "WN18.npz",
16 |                 "key": "edge"
17 |             },
18 |             "EdgeClass": {
19 |                 "description": "Relation type-id of the Edge in the WN18 dataset.",
20 |                 "type": "int",
21 |                 "format": "Tensor",
22 |                 "file": "WN18.npz",
23 |                 "key": "edge_class"
24 |             },
25 |             "EdgeName": {
26 |                 "description": "Relation name of the Edge in the WN18 dataset.",
27 |                 "type": "string",
28 |                 "format": "Tensor",
29 |                 "file": "WN18.npz",
30 |                 "key": "edge_name"
31 |             }
32 |         },
33 |         "Graph": {
34 |             "_NodeList": {
35 |                 "file": "WN18.npz",
36 |                 "key": "node_list"
37 |             },
38 |             "_EdgeList": {
39 |                 "file": "WN18.npz",
40 |                 "key": "edge_list"
41 |             }
42 |         }
43 |     },
44 |     "citation": "@article{bordes2013translating,\ntitle={Translating embeddings for modeling multi-relational data},\nauthor={Bordes, Antoine and Usunier, Nicolas and Garcia-Duran, Alberto and Weston, Jason and Yakhnenko, Oksana},\njournal={Advances in neural information processing systems},\nvolume={26},\nyear={2013}\n}",
45 |     "is_heterogeneous": false
46 | }
47 | 


--------------------------------------------------------------------------------
/datasets/WN18/task_kg_entity_prediction_1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "The task is to predict a valid entity given (head, relation) or (relation, tail) that form a fact triple.",
 3 |     "type": "KGEntityPrediction",
 4 |     "feature": null,
 5 |     "train_triplet_set": {
 6 |         "file": "WN18.npz",
 7 |         "key": "TrainEdge_id"
 8 |     },
 9 |     "val_triplet_set": {
10 |         "file": "WN18.npz",
11 |         "key": "ValidEdge_id"
12 |     },
13 |     "test_triplet_set": {
14 |         "file": "WN18.npz",
15 |         "key": "TestEdge_id"
16 |     },
17 |     "num_relations": 18
18 | }
19 | 


--------------------------------------------------------------------------------
/datasets/WN18/task_kg_relation_prediction_1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "The task is to predict a valid relation given (head, tail) that form a fact triple.",
 3 |     "type": "KGRelationPrediction",
 4 |     "feature": null,
 5 |     "train_triplet_set": {
 6 |         "file": "WN18.npz",
 7 |         "key": "TrainEdge_id"
 8 |     },
 9 |     "val_triplet_set": {
10 |         "file": "WN18.npz",
11 |         "key": "ValidEdge_id"
12 |     },
13 |     "test_triplet_set": {
14 |         "file": "WN18.npz",
15 |         "key": "TestEdge_id"
16 |     },
17 |     "num_relations": 18,
18 |     "target": "Edge/EdgeClass"
19 | }
20 | 


--------------------------------------------------------------------------------
/datasets/WN18RR/LICENSE:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Graph-Learning-Benchmarks/gli/8f2065396d59e6b4aaa371e997c3a43f91448429/datasets/WN18RR/LICENSE


--------------------------------------------------------------------------------
/datasets/WN18RR/metadata.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "WN18RR dataset.",
 3 |     "data": {
 4 |         "Node": {
 5 |             "NodeName": {
 6 |                 "description": "Node (entity) names in WN18RR dataset, strings.",
 7 |                 "type": "string",
 8 |                 "format": "Tensor",
 9 |                 "file": "WN18RR.npz",
10 |                 "key": "node_name"
11 |             }
12 |         },
13 |         "Edge": {
14 |             "_Edge": {
15 |                 "file": "WN18RR.npz",
16 |                 "key": "edge"
17 |             },
18 |             "EdgeClass": {
19 |                 "description": "Relation type-id of the Edge in the WN18RR dataset.",
20 |                 "type": "int",
21 |                 "format": "Tensor",
22 |                 "file": "WN18RR.npz",
23 |                 "key": "edge_class"
24 |             },
25 |             "EdgeName": {
26 |                 "description": "Relation name of the Edge in the WN18RR dataset.",
27 |                 "type": "string",
28 |                 "format": "Tensor",
29 |                 "file": "WN18RR.npz",
30 |                 "key": "edge_name"
31 |             }
32 |         },
33 |         "Graph": {
34 |             "_NodeList": {
35 |                 "file": "WN18RR.npz",
36 |                 "key": "node_list"
37 |             },
38 |             "_EdgeList": {
39 |                 "file": "WN18RR.npz",
40 |                 "key": "edge_list"
41 |             }
42 |         }
43 |     },
44 |     "citation": "@inproceedings{dettmers2018convolutional,\ntitle={Convolutional 2d knowledge graph embeddings},\nauthor={Dettmers, Tim and Minervini, Pasquale and Stenetorp, Pontus and Riedel, Sebastian},\nbooktitle={Proceedings of the AAAI Conference on Artificial Intelligence},\nvolume={32},\nnumber={1},\nyear={2018}\n}",
45 |     "is_heterogeneous": false
46 | }
47 | 


--------------------------------------------------------------------------------
/datasets/WN18RR/task_kg_entity_prediction_1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "The task is to predict a valid entity given (head, relation) or (relation, tail) that form a fact triple.",
 3 |     "type": "KGEntityPrediction",
 4 |     "feature": null,
 5 |     "train_triplet_set": {
 6 |         "file": "WN18RR.npz",
 7 |         "key": "TrainEdge_id"
 8 |     },
 9 |     "val_triplet_set": {
10 |         "file": "WN18RR.npz",
11 |         "key": "ValidEdge_id"
12 |     },
13 |     "test_triplet_set": {
14 |         "file": "WN18RR.npz",
15 |         "key": "TestEdge_id"
16 |     },
17 |     "num_relations": 11
18 | }
19 | 


--------------------------------------------------------------------------------
/datasets/WN18RR/task_kg_relation_prediction_1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "The task is to predict a valid relation given (head, tail) that form a fact triple.",
 3 |     "type": "KGRelationPrediction",
 4 |     "feature": null,
 5 |     "train_triplet_set": {
 6 |         "file": "WN18RR.npz",
 7 |         "key": "TrainEdge_id"
 8 |     },
 9 |     "val_triplet_set": {
10 |         "file": "WN18RR.npz",
11 |         "key": "ValidEdge_id"
12 |     },
13 |     "test_triplet_set": {
14 |         "file": "WN18RR.npz",
15 |         "key": "TestEdge_id"
16 |     },
17 |     "num_relations": 11,
18 |     "target": "Edge/EdgeClass"
19 | }
20 | 


--------------------------------------------------------------------------------
/datasets/YAGO3-10/LICENSE:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Graph-Learning-Benchmarks/gli/8f2065396d59e6b4aaa371e997c3a43f91448429/datasets/YAGO3-10/LICENSE


--------------------------------------------------------------------------------
/datasets/YAGO3-10/metadata.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "YAGO3-10 dataset.",
 3 |     "data": {
 4 |         "Node": {
 5 |             "NodeName": {
 6 |                 "description": "Node (entity) names in YAGO3-10 dataset, strings.",
 7 |                 "type": "string",
 8 |                 "format": "Tensor",
 9 |                 "file": "YAGO3-10.npz",
10 |                 "key": "node_name"
11 |             }
12 |         },
13 |         "Edge": {
14 |             "_Edge": {
15 |                 "file": "YAGO3-10.npz",
16 |                 "key": "edge"
17 |             },
18 |             "EdgeClass": {
19 |                 "description": "Relation type-id of the Edge in the YAGO3-10 dataset.",
20 |                 "type": "int",
21 |                 "format": "Tensor",
22 |                 "file": "YAGO3-10.npz",
23 |                 "key": "edge_class"
24 |             },
25 |             "EdgeName": {
26 |                 "description": "Relation name of the Edge in the YAGO3-10 dataset.",
27 |                 "type": "string",
28 |                 "format": "Tensor",
29 |                 "file": "YAGO3-10.npz",
30 |                 "key": "edge_name"
31 |             }
32 |         },
33 |         "Graph": {
34 |             "_NodeList": {
35 |                 "file": "YAGO3-10.npz",
36 |                 "key": "node_list"
37 |             },
38 |             "_EdgeList": {
39 |                 "file": "YAGO3-10.npz",
40 |                 "key": "edge_list"
41 |             }
42 |         }
43 |     },
44 |     "citation": "@inproceedings{dettmers2018convolutional,\ntitle={Convolutional 2d knowledge graph embeddings},\nauthor={Dettmers, Tim and Minervini, Pasquale and Stenetorp, Pontus and Riedel, Sebastian},\nbooktitle={Proceedings of the AAAI Conference on Artificial Intelligence},\nvolume={32},\nnumber={1},\nyear={2018}\n}",
45 |     "is_heterogeneous": false
46 | }
47 | 


--------------------------------------------------------------------------------
/datasets/YAGO3-10/task_kg_entity_prediction_1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "The task is to predict a valid entity given (head, relation) or (relation, tail) that form a fact triple.",
 3 |     "type": "KGEntityPrediction",
 4 |     "feature": null,
 5 |     "train_triplet_set": {
 6 |         "file": "YAGO3-10.npz",
 7 |         "key": "TrainEdge_id"
 8 |     },
 9 |     "val_triplet_set": {
10 |         "file": "YAGO3-10.npz",
11 |         "key": "ValidEdge_id"
12 |     },
13 |     "test_triplet_set": {
14 |         "file": "YAGO3-10.npz",
15 |         "key": "TestEdge_id"
16 |     },
17 |     "num_relations": 37
18 | }
19 | 


--------------------------------------------------------------------------------
/datasets/YAGO3-10/task_kg_relation_prediction_1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "The task is to predict a valid relation given (head, tail) that form a fact triple.",
 3 |     "type": "KGRelationPrediction",
 4 |     "feature": null,
 5 |     "train_triplet_set": {
 6 |         "file": "YAGO3-10.npz",
 7 |         "key": "TrainEdge_id"
 8 |     },
 9 |     "val_triplet_set": {
10 |         "file": "YAGO3-10.npz",
11 |         "key": "ValidEdge_id"
12 |     },
13 |     "test_triplet_set": {
14 |         "file": "YAGO3-10.npz",
15 |         "key": "TestEdge_id"
16 |     },
17 |     "num_relations": 37,
18 |     "target": "Edge/EdgeClass"
19 | }
20 | 


--------------------------------------------------------------------------------
/datasets/actor/LICENSE:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Graph-Learning-Benchmarks/gli/8f2065396d59e6b4aaa371e997c3a43f91448429/datasets/actor/LICENSE


--------------------------------------------------------------------------------
/datasets/actor/README.md:
--------------------------------------------------------------------------------
 1 | # Actor
 2 | 
 3 | ## Dataset Description
 4 | This dataset is the actor-only induced subgraph of the film-director-actor-writer network (Tang et al., 2009). Each nodes correspond to an actor, and the edge between two nodes denotes co-occurrence on the same Wikipedia page. Node features correspond to some keywords in the Wikipedia pages. The nodes are classified into five categories.
 5 | 
 6 | 
 7 | Statistics:
 8 | - Nodes: 7600
 9 | - Edges: 30019
10 | - Number of Classes: 5
11 | 
12 | #### Citation
13 | - Original Source
14 |   + [Website](https://www.aminer.org/lab-datasets/soinf/)
15 |   + LICENSE: missing
16 | ```
17 | @inproceedings{tang2009social,
18 |   title={Social influence analysis in large-scale networks},
19 |   author={Tang, Jie and Sun, Jimeng and Wang, Chi and Yang, Zi},
20 |   booktitle={Proceedings of the 15th ACM SIGKDD international conference on Knowledge discovery and mining},
21 |   pages={807--816},
22 |   year={2009}
23 |   }
24 | ```
25 | - Current Version
26 |   + [Website](https://github.com/graphdml-uiuc-jlu/geom-gcn)
27 |   + LICENSE: missing
28 | ```
29 | @article{pei2020geom,
30 |   title={Geom-gcn: Geometric graph convolutional networks},
31 |   author={Pei, Hongbin and Wei, Bingzhe and Chang, Kevin Chen-Chuan and Lei, Yu and Yang, Bo},
32 |   journal={arXiv preprint arXiv:2002.05287},
33 |   year={2020}
34 | }
35 | ```
36 | 
37 | ## Available Tasks
38 | 
39 | ### Actor
40 | 
41 | - Task type: `NodeClassification`
42 | 
43 | 
44 | #### Citation
45 | 
46 | ```
47 | @article{pei2020geom,
48 |   title={Geom-gcn: Geometric graph convolutional networks},
49 |   author={Pei, Hongbin and Wei, Bingzhe and Chang, Kevin Chen-Chuan and Lei, Yu and Yang, Bo},
50 |   journal={arXiv preprint arXiv:2002.05287},
51 |   year={2020}
52 | }
53 | ```
54 | 
55 | ## Preprocessing
56 | The data files and task config file in GLI format are transformed from the [torch_geometric.datasets](https://pytorch-geometric.readthedocs.io/en/latest/modules/datasets.html). Check `actor.ipynb` for the preprocessing.
57 | 
58 | 
59 | ### Requirements
60 | 
61 | The preprocessing code requires the following packages.
62 | 
63 | ```
64 | numpy==1.22.3
65 | scipy==1.7.3
66 | torch==1.11.0
67 | torch_geometric==2.0.4
68 | ```
69 | 


--------------------------------------------------------------------------------
/datasets/actor/metadata.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "Actor dataset.",
 3 |     "data": {
 4 |         "Node": {
 5 |             "NodeFeature": {
 6 |                 "description": "Node features of Actor dataset, 1/0-valued vectors.",
 7 |                 "type": "int",
 8 |                 "format": "SparseTensor",
 9 |                 "file": "actor_node_feats.sparse.npz"
10 |             },
11 |             "NodeLabel": {
12 |                 "description": "Node labels of Actor dataset, int ranged from 0 to 4.",
13 |                 "type": "int",
14 |                 "format": "Tensor",
15 |                 "file": "actor.npz",
16 |                 "key": "node_class"
17 |             }
18 |         },
19 |         "Edge": {
20 |             "_Edge": {
21 |                 "file": "actor.npz",
22 |                 "key": "edge"
23 |             }
24 |         },
25 |         "Graph": {
26 |             "_NodeList": {
27 |                 "file": "actor.npz",
28 |                 "key": "node_list"
29 |             },
30 |             "_EdgeList": {
31 |                 "file": "actor.npz",
32 |                 "key": "edge_list"
33 |             }
34 |         }
35 |     },
36 |     "citation": "@inproceedings{tang2009social,\ntitle={Social influence analysis in large-scale networks},\nauthor={Tang, Jie and Sun, Jimeng and Wang, Chi and Yang, Zi},\nbooktitle={Proceedings of the 15th ACM SIGKDD international conference on Knowledge discovery and mining},\npages={807--816},\nyear={2009}\n}",
37 |     "is_heterogeneous": false
38 | }
39 | 


--------------------------------------------------------------------------------
/datasets/actor/task_node_classification_1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "Node classification on Actor dataset.",
 3 |     "type": "NodeClassification",
 4 |     "feature": [
 5 |         "Node/NodeFeature"
 6 |     ],
 7 |     "target": "Node/NodeLabel",
 8 |     "num_classes": 5,
 9 |     "num_splits": 10,
10 |     "train_set": {
11 |         "file": "actor_task.npz",
12 |         "key": "train_FOLD"
13 |     },
14 |     "val_set": {
15 |         "file": "actor_task.npz",
16 |         "key": "val_FOLD"
17 |     },
18 |     "test_set": {
19 |         "file": "actor_task.npz",
20 |         "key": "test_FOLD"
21 |     }
22 | }
23 | 


--------------------------------------------------------------------------------
/datasets/arxiv-year/LICENSE:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Graph-Learning-Benchmarks/gli/8f2065396d59e6b4aaa371e997c3a43f91448429/datasets/arxiv-year/LICENSE


--------------------------------------------------------------------------------
/datasets/arxiv-year/metadata.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "arXiv-year dataset.",
 3 |     "data": {
 4 |         "Node": {
 5 |             "NodeFeature": {
 6 |                 "description": "Node features of arXiv-year dataset.",
 7 |                 "type": "float32",
 8 |                 "format": "Tensor",
 9 |                 "file": "arxiv_year.npz",
10 |                 "key": "node_feats"
11 |             },
12 |             "NodeLabel": {
13 |                 "description": "Node labels of arXiv-year dataset, int ranged from 0 to 4.",
14 |                 "type": "int64",
15 |                 "format": "Tensor",
16 |                 "file": "arxiv_year.npz",
17 |                 "key": "node_class"
18 |             }
19 |         },
20 |         "Edge": {
21 |             "_Edge": {
22 |                 "file": "arxiv_year.npz",
23 |                 "key": "edge"
24 |             }
25 |         },
26 |         "Graph": {
27 |             "_NodeList": {
28 |                 "file": "arxiv_year.npz",
29 |                 "key": "node_list"
30 |             },
31 |             "_EdgeList": {
32 |                 "file": "arxiv_year.npz",
33 |                 "key": "edge_list"
34 |             }
35 |         }
36 |     },
37 |     "citation": "@article{lim2021large,\ntitle={Large scale learning on non-homophilous graphs: New benchmarks and strong simple methods},\nauthor={Lim, Derek and Hohne, Felix and Li, Xiuyu and Huang, Sijia Linda and Gupta, Vaishnavi and Bhalerao, Omkar and Lim, Ser Nam},\njournal={Advances in Neural Information Processing Systems},\nvolume={34},\npages={20887--20902},\nyear={2021}\n}",
38 |     "is_heterogeneous": false
39 | }
40 | 


--------------------------------------------------------------------------------
/datasets/arxiv-year/task_node_classification_1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "Node classification on arXiv-year dataset.",
 3 |     "type": "NodeClassification",
 4 |     "feature": [
 5 |         "Node/NodeFeature"
 6 |     ],
 7 |     "target": "Node/NodeLabel",
 8 |     "num_classes": 5,
 9 |     "train_ratio": 0.5,
10 |     "val_ratio": 0.25,
11 |     "test_ratio": 0.25,
12 |     "num_samples": 169343
13 | }
14 | 


--------------------------------------------------------------------------------
/datasets/chameleon/LICENSE:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Graph-Learning-Benchmarks/gli/8f2065396d59e6b4aaa371e997c3a43f91448429/datasets/chameleon/LICENSE


--------------------------------------------------------------------------------
/datasets/chameleon/README.md:
--------------------------------------------------------------------------------
 1 | # Chameleon
 2 | 
 3 | ## Dataset Description
 4 | 
 5 | Chameleon is a page-page network on specific topics in Wikipedia. In this dataset, nodes represent web pages and edges are mutual links between pages. And node features correspond to several informative nouns in the Wikipedia pages. The nodes are classified into five categories.
 6 | 
 7 | 
 8 | Statistics:
 9 | - Nodes: 2277
10 | - Edges: 36101
11 | - Number of Classes: 5
12 | 
13 | #### Citation
14 | - Original Source
15 |   + [Website](https://github.com/benedekrozemberczki/datasets#wikipedia-article-networks)
16 |   + LICENSE: [MIT](https://github.com/benedekrozemberczki/datasets/blob/master/LICENSE)
17 | ```
18 | @article{rozemberczki2021multi,
19 |   title={Multi-scale attributed node embedding},
20 |   author={Rozemberczki, Benedek and Allen, Carl and Sarkar, Rik},
21 |   journal={Journal of Complex Networks},
22 |   volume={9},
23 |   number={2},
24 |   pages={cnab014},
25 |   year={2021},
26 |   publisher={Oxford University Press}
27 | }
28 | ```
29 | - Current Version
30 |   + [Website](https://github.com/graphdml-uiuc-jlu/geom-gcn)
31 |   + LICENSE: missing
32 | ```
33 | @article{pei2020geom,
34 |   title={Geom-gcn: Geometric graph convolutional networks},
35 |   author={Pei, Hongbin and Wei, Bingzhe and Chang, Kevin Chen-Chuan and Lei, Yu and Yang, Bo},
36 |   journal={arXiv preprint arXiv:2002.05287},
37 |   year={2020}
38 | }
39 | ```
40 | 
41 | ## Available Tasks
42 | 
43 | ### MUSAE
44 | 
45 | - Task type: `NodeClassification`
46 | 
47 | This is a node classification task with fixed split from [MUSAE](https://github.com/benedekrozemberczki/MUSAE).
48 | 
49 | #### Citation
50 | 
51 | ```
52 | @article{pei2020geom,
53 |   title={Geom-gcn: Geometric graph convolutional networks},
54 |   author={Pei, Hongbin and Wei, Bingzhe and Chang, Kevin Chen-Chuan and Lei, Yu and Yang, Bo},
55 |   journal={arXiv preprint arXiv:2002.05287},
56 |   year={2020}
57 | }
58 | ```
59 | 
60 | ## Preprocessing
61 | The data files and task config file in GLI format are transformed from the [torch_geometric.datasets](https://pytorch-geometric.readthedocs.io/en/latest/modules/datasets.html). Check `chameleon.ipynb` for the preprocessing.
62 | 
63 | 
64 | ### Requirements
65 | 
66 | The preprocessing code requires the following packages.
67 | 
68 | ```
69 | numpy==1.22.3
70 | scipy==1.7.3
71 | torch==1.11.0
72 | torch_geometric==2.0.4
73 | ```
74 | 


--------------------------------------------------------------------------------
/datasets/chameleon/metadata.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "Chameleon dataset.",
 3 |     "data": {
 4 |         "Node": {
 5 |             "NodeFeature": {
 6 |                 "description": "Node features of Chameleon dataset, 1/0-valued vectors.",
 7 |                 "type": "int",
 8 |                 "format": "SparseTensor",
 9 |                 "file": "chameleon_node_feats.sparse.npz"
10 |             },
11 |             "NodeLabel": {
12 |                 "description": "Node labels of Chameleon dataset, int ranged from 0 to 4.",
13 |                 "type": "int",
14 |                 "format": "Tensor",
15 |                 "file": "chameleon.npz",
16 |                 "key": "node_class"
17 |             }
18 |         },
19 |         "Edge": {
20 |             "_Edge": {
21 |                 "file": "chameleon.npz",
22 |                 "key": "edge"
23 |             }
24 |         },
25 |         "Graph": {
26 |             "_NodeList": {
27 |                 "file": "chameleon.npz",
28 |                 "key": "node_list"
29 |             },
30 |             "_EdgeList": {
31 |                 "file": "chameleon.npz",
32 |                 "key": "edge_list"
33 |             }
34 |         }
35 |     },
36 |     "citation": "@article{rozemberczki2021multi,\ntitle={Multi-scale attributed node embedding},\nauthor={Rozemberczki, Benedek and Allen, Carl and Sarkar, Rik},\njournal={Journal of Complex Networks},\nvolume={9},\nnumber={2},\npages={cnab014},\nyear={2021},\npublisher={Oxford University Press}\n}",
37 |     "is_heterogeneous": false
38 | }
39 | 


--------------------------------------------------------------------------------
/datasets/chameleon/task_node_classification_1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "Node classification on Chameleon dataset. The split is introduced in the paper \"Multi-scale Attributed Node Embedding\", while the classification categories are introduced in the paper \"Geom-GCN: Geometric Graph Convolutional Networks\".",
 3 |     "type": "NodeClassification",
 4 |     "feature": [
 5 |         "Node/NodeFeature"
 6 |     ],
 7 |     "target": "Node/NodeLabel",
 8 |     "num_classes": 5,
 9 |     "num_splits": 10,
10 |     "train_set": {
11 |         "file": "chameleon_task.npz",
12 |         "key": "train_FOLD"
13 |     },
14 |     "val_set": {
15 |         "file": "chameleon_task.npz",
16 |         "key": "val_FOLD"
17 |     },
18 |     "test_set": {
19 |         "file": "chameleon_task.npz",
20 |         "key": "test_FOLD"
21 |     }
22 | }
23 | 


--------------------------------------------------------------------------------
/datasets/cifar/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Vijay Prakash Dwivedi, Chaitanya K. Joshi, Anh Tuan Luu, Thomas Laurent, Yoshua Bengio, Xavier Bresson
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/datasets/cifar/metadata.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "cifar dataset",
 3 |     "data": {
 4 |         "Node": {
 5 |             "NodeFeature": {
 6 |                 "description": "numpy ndarray of shape (num_nodes, nodefeat_dim), where nodefeat_dim is the dimensionality of node features and i-th row represents the feature of i-th node. This can be None if no input node features are available.",
 7 |                 "type": "int",
 8 |                 "format": "SparseTensor",
 9 |                 "file": "cifar.npz",
10 |                 "key": "node_feats"
11 |             }
12 |         },
13 |         "Edge": {
14 |             "_Edge": {
15 |                 "file": "cifar.npz",
16 |                 "key": "edge"
17 |             },
18 |             "EdgeFeature": {
19 |                 "description": "numpy ndarray of shape (num_edges, edgefeat_dim), where edgefeat_dim is the dimensionality of node features and i-th row represents the feature of i-th node. This can be None if no input node features are available.",
20 |                 "type": "int",
21 |                 "format": "SparseTensor",
22 |                 "file": "cifar.npz",
23 |                 "key": "edge_feats"
24 |         }
25 |         },
26 |         "Graph": {
27 |             "_NodeList": {
28 |                 "file": "cifar_node_list.sparse.npz"
29 |             },
30 |             "GraphLabel": {
31 |                 "file": "cifar.npz",
32 |                 "type": "int",
33 |                 "format": "Tensor",
34 |                 "key": "graph_class"
35 |             }
36 |         }
37 |     },
38 |     "citation": "@misc{https://doi.org/10.48550/arxiv.2003.00982,\ndoi = {10.48550/ARXIV.2003.00982},\nurl = {https://arxiv.org/abs/2003.00982},\nauthor = {Dwivedi, Vijay Prakash and Joshi, Chaitanya K. and Luu, Anh Tuan and Laurent, Thomas and Bengio, Yoshua and Bresson, Xavier},\nkeywords = {Machine Learning (cs.LG), Machine Learning (stat.ML), FOS: Computer and information sciences, FOS: Computer and information sciences},\ntitle = {Benchmarking Graph Neural Networks},\npublisher = {arXiv},\nyear = {2020},\ncopyright = {arXiv.org perpetual, non-exclusive license}",
39 |     "is_heterogeneous": false
40 | }
41 | 


--------------------------------------------------------------------------------
/datasets/cifar/task_graph_classification_1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "The task is to classify pictures into 10 graph classes including aeroplane, automobile, birds, cat, deer, dog, frog, horse, ship, truck.",
 3 |     "type": "GraphClassification",
 4 |     "feature": [
 5 |         "Node/NodeFeature",
 6 |         "Edge/EdgeFeature"
 7 |     ],
 8 |     "target": "Graph/GraphLabel",
 9 |     "num_classes": 10,
10 |     "train_set": {
11 |         "file": "cifar_task.npz",
12 |         "key": "train"
13 |     },
14 |     "val_set": {
15 |         "file": "cifar_task.npz",
16 |         "key": "val"
17 |     },
18 |     "test_set": {
19 |         "file": "cifar_task.npz",
20 |         "key": "test"
21 |     }
22 | }
23 | 


--------------------------------------------------------------------------------
/datasets/citeseer/LICENSE:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 | Copyright (c) 2016 Zhilin Yang
3 | 
4 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
5 | 
6 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
7 | 
8 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


--------------------------------------------------------------------------------
/datasets/citeseer/metadata.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "CITESEER dataset.",
 3 |     "data": {
 4 |         "Node": {
 5 |             "NodeFeature": {
 6 |                 "description": "Node features of Citeseer dataset, 1/0-valued vectors.",
 7 |                 "type": "int",
 8 |                 "format": "SparseTensor",
 9 |                 "file": "citeseer__graph__Node_NodeFeature__48cffb6534f4b56a45196efa8b32cdac.sparse.npz"
10 |             },
11 |             "NodeLabel": {
12 |                 "description": "Node labels of Citeseer dataset, int ranged from 1 to 6.",
13 |                 "type": "int",
14 |                 "format": "Tensor",
15 |                 "file": "citeseer__graph__aed93544b5c54381d05b5452603278fb.npz",
16 |                 "key": "Node_NodeLabel"
17 |             }
18 |         },
19 |         "Edge": {
20 |             "_Edge": {
21 |                 "file": "citeseer__graph__aed93544b5c54381d05b5452603278fb.npz",
22 |                 "key": "Edge_Edge"
23 |             }
24 |         },
25 |         "Graph": {
26 |             "_NodeList": {
27 |                 "file": "citeseer__graph__Graph_NodeList__be3f84ead018cfb899bd6f98d0bb92db.sparse.npz"
28 |             }
29 |         }
30 |     },
31 |     "citation": "@inproceedings{yang2016revisiting,\ntitle={Revisiting semi-supervised learning with graph embeddings},\nauthor={Yang, Zhilin and Cohen, William and Salakhudinov, Ruslan},\nbooktitle={International conference on machine learning},\npages={40--48},\nyear={2016},\norganization={PMLR}\n}",
32 |     "is_heterogeneous": false
33 | }


--------------------------------------------------------------------------------
/datasets/citeseer/task_node_classification_1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "Node classification on CITESEER dataset. Planetoid split.",
 3 |     "type": "NodeClassification",
 4 |     "feature": [
 5 |         "Node/NodeFeature"
 6 |     ],
 7 |     "target": "Node/NodeLabel",
 8 |     "num_classes": 6,
 9 |     "train_set": {
10 |         "file": "citeseer__task_node_classification_1__d0b7b5f7e7e7cb9b84e0b3e97354e16e.npz",
11 |         "key": "train_set"
12 |     },
13 |     "val_set": {
14 |         "file": "citeseer__task_node_classification_1__d0b7b5f7e7e7cb9b84e0b3e97354e16e.npz",
15 |         "key": "val_set"
16 |     },
17 |     "test_set": {
18 |         "file": "citeseer__task_node_classification_1__d0b7b5f7e7e7cb9b84e0b3e97354e16e.npz",
19 |         "key": "test_set"
20 |     }
21 | }


--------------------------------------------------------------------------------
/datasets/cora/LICENSE:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 | Copyright (c) 2016 Zhilin Yang
3 | 
4 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
5 | 
6 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
7 | 
8 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


--------------------------------------------------------------------------------
/datasets/cora/metadata.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "CORA dataset.",
 3 |     "data": {
 4 |         "Node": {
 5 |             "NodeFeature": {
 6 |                 "description": "Node features of Cora dataset, 1/0-valued vectors.",
 7 |                 "type": "int",
 8 |                 "format": "SparseTensor",
 9 |                 "file": "cora__graph__Node_NodeFeature__7032c9c380d1889061dcbbcd76b8c427.sparse.npz"
10 |             },
11 |             "NodeLabel": {
12 |                 "description": "Node labels of Cora dataset, int ranged from 1 to 7.",
13 |                 "type": "int",
14 |                 "format": "Tensor",
15 |                 "file": "cora__graph__6c912909fa18eff10797210ea5e485fe.npz",
16 |                 "key": "Node_NodeLabel"
17 |             }
18 |         },
19 |         "Edge": {
20 |             "_Edge": {
21 |                 "file": "cora__graph__6c912909fa18eff10797210ea5e485fe.npz",
22 |                 "key": "Edge_Edge"
23 |             }
24 |         },
25 |         "Graph": {
26 |             "_NodeList": {
27 |                 "file": "cora__graph__Graph_NodeList__23bbef862fd6037395412eb03b4e1d9c.sparse.npz"
28 |             }
29 |         }
30 |     },
31 |     "citation": "@inproceedings{yang2016revisiting,\ntitle={Revisiting semi-supervised learning with graph embeddings},\nauthor={Yang, Zhilin and Cohen, William and Salakhudinov, Ruslan},\nbooktitle={International conference on machine learning},\npages={40--48},\nyear={2016},\norganization={PMLR}\n}",
32 |     "is_heterogeneous": false
33 | }


--------------------------------------------------------------------------------
/datasets/cora/task_node_classification_1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "Node classification on CORA dataset. Planetoid split.",
 3 |     "type": "NodeClassification",
 4 |     "feature": [
 5 |         "Node/NodeFeature"
 6 |     ],
 7 |     "target": "Node/NodeLabel",
 8 |     "num_classes": 7,
 9 |     "train_set": {
10 |         "file": "cora__task_node_classification_1__41e167258678b585872679839ce9c40f.npz",
11 |         "key": "train_set"
12 |     },
13 |     "val_set": {
14 |         "file": "cora__task_node_classification_1__41e167258678b585872679839ce9c40f.npz",
15 |         "key": "val_set"
16 |     },
17 |     "test_set": {
18 |         "file": "cora__task_node_classification_1__41e167258678b585872679839ce9c40f.npz",
19 |         "key": "test_set"
20 |     }
21 | }


--------------------------------------------------------------------------------
/datasets/cornell/LICENSE:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Graph-Learning-Benchmarks/gli/8f2065396d59e6b4aaa371e997c3a43f91448429/datasets/cornell/LICENSE


--------------------------------------------------------------------------------
/datasets/cornell/metadata.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "Cornell dataset.",
 3 |     "data": {
 4 |         "Node": {
 5 |             "NodeFeature": {
 6 |                 "description": "Node features of Cornell dataset, 1/0-valued vectors.",
 7 |                 "type": "int",
 8 |                 "format": "SparseTensor",
 9 |                 "file": "cornell_node_feats.sparse.npz"
10 |             },
11 |             "NodeLabel": {
12 |                 "description": "Node labels of Cornell dataset, int ranged from 0 to 4.",
13 |                 "type": "int",
14 |                 "format": "Tensor",
15 |                 "file": "cornell.npz",
16 |                 "key": "node_class"
17 |             }
18 |         },
19 |         "Edge": {
20 |             "_Edge": {
21 |                 "file": "cornell.npz",
22 |                 "key": "edge"
23 |             }
24 |         },
25 |         "Graph": {
26 |             "_NodeList": {
27 |                 "file": "cornell.npz",
28 |                 "key": "node_list"
29 |             },
30 |             "_EdgeList": {
31 |                 "file": "cornell.npz",
32 |                 "key": "edge_list"
33 |             }
34 |         }
35 |     },
36 |     "citation": "@article{garcia2016using,\ntitle={Using fuzzy logic to leverage HTML markup for web page representation},\nauthor={Garcia-Plaza, Alberto P and Fresno, Victor and Unanue, Raquel Martinez and Zubiaga, Arkaitz},\njournal={IEEE Transactions on Fuzzy Systems},\nvolume={25},\nnumber={4},\npages={919--933},\nyear={2016},\npublisher={IEEE}\n}",
37 |     "is_heterogeneous": false
38 | }
39 | 


--------------------------------------------------------------------------------
/datasets/cornell/task_node_classification_1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "Node classification on Cornell dataset. Webkb split.",
 3 |     "type": "NodeClassification",
 4 |     "feature": [
 5 |         "Node/NodeFeature"
 6 |     ],
 7 |     "target": "Node/NodeLabel",
 8 |     "num_classes": 5,
 9 |     "num_splits": 10,
10 |     "train_set": {
11 |         "file": "cornell_task.npz",
12 |         "key": "train_FOLD"
13 |     },
14 |     "val_set": {
15 |         "file": "cornell_task.npz",
16 |         "key": "val_FOLD"
17 |     },
18 |     "test_set": {
19 |         "file": "cornell_task.npz",
20 |         "key": "test_FOLD"
21 |     }
22 | }
23 | 


--------------------------------------------------------------------------------
/datasets/genius/LICENSE:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Graph-Learning-Benchmarks/gli/8f2065396d59e6b4aaa371e997c3a43f91448429/datasets/genius/LICENSE


--------------------------------------------------------------------------------
/datasets/genius/metadata.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "genius dataset.",
 3 |     "data": {
 4 |         "Node": {
 5 |             "NodeFeature": {
 6 |                 "description": "Node features of genius dataset.",
 7 |                 "type": "float32",
 8 |                 "format": "Tensor",
 9 |                 "file": "genius.npz",
10 |                 "key": "node_feats"
11 |             },
12 |             "NodeLabel": {
13 |                 "description": "Node labels of genius dataset, 1/0-valued vectors.",
14 |                 "type": "int64",
15 |                 "format": "Tensor",
16 |                 "file": "genius.npz",
17 |                 "key": "node_class"
18 |             }
19 |         },
20 |         "Edge": {
21 |             "_Edge": {
22 |                 "file": "genius.npz",
23 |                 "key": "edge"
24 |             }
25 |         },
26 |         "Graph": {
27 |             "_NodeList": {
28 |                 "file": "genius.npz",
29 |                 "key": "node_list"
30 |             },
31 |             "_EdgeList": {
32 |                 "file": "genius.npz",
33 |                 "key": "edge_list"
34 |             }
35 |         }
36 |     },
37 |     "citation": "@article{lim2021large,\ntitle={Large scale learning on non-homophilous graphs: New benchmarks and strong simple methods},\nauthor={Lim, Derek and Hohne, Felix and Li, Xiuyu and Huang, Sijia Linda and Gupta, Vaishnavi and Bhalerao, Omkar and Lim, Ser Nam},\njournal={Advances in Neural Information Processing Systems},\nvolume={34},\npages={20887--20902},\nyear={2021}\n}",
38 |     "is_heterogeneous": false
39 | }
40 | 


--------------------------------------------------------------------------------
/datasets/genius/task_node_classification_1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "Node classification on genius dataset.",
 3 |     "type": "NodeClassification",
 4 |     "feature": [
 5 |         "Node/NodeFeature"
 6 |     ],
 7 |     "target": "Node/NodeLabel",
 8 |     "num_classes": 2,
 9 |     "train_ratio": 0.5,
10 |     "val_ratio": 0.25,
11 |     "test_ratio": 0.25,
12 |     "num_samples": 421961
13 | }
14 | 


--------------------------------------------------------------------------------
/datasets/mnist/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Vijay Prakash Dwivedi, Chaitanya K. Joshi, Anh Tuan Luu, Thomas Laurent, Yoshua Bengio, Xavier Bresson
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/datasets/mnist/metadata.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "mnist dataset",
 3 |     "data": {
 4 |         "Node": {
 5 |             "NodeFeature": {
 6 |                 "description": "numpy ndarray of shape (num_nodes, nodefeat_dim), where nodefeat_dim is the dimensionality of node features and i-th row represents the feature of i-th node. This can be None if no input node features are available.",
 7 |                 "type": "int",
 8 |                 "format": "SparseTensor",
 9 |                 "file": "mnist.npz",
10 |                 "key": "node_feats"
11 |             }
12 |         },
13 |         "Edge": {
14 |             "_Edge": {
15 |                 "file": "mnist.npz",
16 |                 "key": "edge"
17 |             },
18 |             "EdgeFeature": {
19 |                 "description": "numpy ndarray of shape (num_edges, edgefeat_dim), where edgefeat_dim is the dimensionality of node features and i-th row represents the feature of i-th node. This can be None if no input node features are available.",
20 |                 "type": "int",
21 |                 "format": "SparseTensor",
22 |                 "file": "mnist.npz",
23 |                 "key": "edge_feats"
24 |         }
25 |         },
26 |         "Graph": {
27 |             "_NodeList": {
28 |                 "file": "mnist_node_list.sparse.npz"
29 |             },
30 |             "GraphLabel": {
31 |                 "file": "mnist.npz",
32 |                 "type": "int",
33 |                 "format": "Tensor",
34 |                 "key": "graph_class"
35 |             }
36 |         }
37 |     },
38 |     "citation": "@misc{https://doi.org/10.48550/arxiv.2003.00982,\ndoi = {10.48550/ARXIV.2003.00982},\nurl = {https://arxiv.org/abs/2003.00982},\nauthor = {Dwivedi, Vijay Prakash and Joshi, Chaitanya K. and Luu, Anh Tuan and Laurent, Thomas and Bengio, Yoshua and Bresson, Xavier},\nkeywords = {Machine Learning (cs.LG), Machine Learning (stat.ML), FOS: Computer and information sciences, FOS: Computer and information sciences},\ntitle = {Benchmarking Graph Neural Networks},\npublisher = {arXiv},\nyear = {2020},\ncopyright = {arXiv.org perpetual, non-exclusive license}",
39 |     "is_heterogeneous": false
40 | }
41 | 


--------------------------------------------------------------------------------
/datasets/mnist/task_graph_classification_1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "The task is to classify handwritten numbers to 0-9 digit.",
 3 |     "type": "GraphClassification",
 4 |     "feature": [
 5 |         "Node/NodeFeature",
 6 |         "Edge/EdgeFeature"
 7 |     ],
 8 |     "target": "Graph/GraphLabel",
 9 |     "num_classes": 10,
10 |     "train_set": {
11 |         "file": "mnist_task.npz",
12 |         "key": "train"
13 |     },
14 |     "val_set": {
15 |         "file": "mnist_task.npz",
16 |         "key": "val"
17 |     },
18 |     "test_set": {
19 |         "file": "mnist_task.npz",
20 |         "key": "test"
21 |     }
22 | }
23 | 


--------------------------------------------------------------------------------
/datasets/ogbg-molbace/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 snap-stanford
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/datasets/ogbg-molbace/metadata.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "description": "ogbg-molbace dataset.",
 3 |   "data": {
 4 |     "Node": {
 5 |       "NodeFeature": {
 6 |         "description": "numpy ndarray of shape (num_nodes, nodefeat_dim), where nodefeat_dim is the dimensionality of node features and i-th row represents the feature of i-th node. This can be None if no input node features are available.",
 7 |         "type": "int",
 8 |         "format": "Tensor",
 9 |         "file": "ogbg-molbace.npz",
10 |         "key": "node_feats"
11 |       }
12 |     },
13 |     "Edge": {
14 |       "_Edge": {
15 |         "file": "ogbg-molbace.npz",
16 |         "key": "edge"
17 |       },
18 |       "EdgeFeature": {
19 |         "description": "Numpy ndarray of shape (num_edges, edgefeat_dim), where edgefeat_dim is the dimensionality of edge features and i-th row represents the feature of i-th edge. This can be None if no input edge features are available.",
20 |         "type": "int",
21 |         "format": "Tensor",
22 |         "file": "ogbg-molbace.npz",
23 |         "key": "edge_feats"
24 |       }
25 |     },
26 |     "Graph": {
27 |       "_NodeList": {
28 |         "file": "ogbg-molbace_node_list.sparse.npz"
29 |       },
30 |       "GraphLabel": {
31 |         "file": "ogbg-molbace.npz",
32 |         "type": "int",
33 |         "format": "Tensor",
34 |         "key": "graph_class"
35 |       }
36 |     }
37 |   },
38 |   "citation": "@inproceedings{Wu2018Stanford,\ntitle={Moleculenet: a benchmark for molecular machine learning},\nauthor={Zhenqin Wu, Bharath Ramsundar, Evan N Feinberg, Joseph Gomes, Caleb Geniesse, Aneesh SPappu, Karl Leswing, and Vijay Pande},\nbooktitle={Chemical Science},\npages={513=520},\nyear={2018}\n}",
39 |   "is_heterogeneous": false
40 | }
41 | 


--------------------------------------------------------------------------------
/datasets/ogbg-molbace/task_graph_classification_1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "The task is to predict the target molecular properties as accurately as possible, where the molecular properties are cast as binary labels, e.g, whether a molecule inhibits HIV virus replication or not.",
 3 |     "type": "GraphClassification",
 4 |     "feature": [
 5 |         "Node/NodeFeature",
 6 |         "Edge/EdgeFeature"
 7 |     ],
 8 |     "target": "Graph/GraphLabel",
 9 |     "num_classes": 2,
10 |     "train_set": {
11 |         "file": "ogbg-molbace_task.npz",
12 |         "key": "train"
13 |     },
14 |     "val_set": {
15 |         "file": "ogbg-molbace_task.npz",
16 |         "key": "val"
17 |     },
18 |     "test_set": {
19 |         "file": "ogbg-molbace_task.npz",
20 |         "key": "test"
21 |     }
22 | }
23 | 


--------------------------------------------------------------------------------
/datasets/ogbg-molclintox/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 snap-stanford
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/datasets/ogbg-molclintox/metadata.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "description": "ogbg-molclintox dataset.",
 3 |   "data": {
 4 |     "Node": {
 5 |       "NodeFeature": {
 6 |         "description": "numpy ndarray of shape (num_nodes, nodefeat_dim), where nodefeat_dim is the dimensionality of node features and i-th row represents the feature of i-th node. This can be None if no input node features are available.",
 7 |         "type": "int",
 8 |         "format": "Tensor",
 9 |         "file": "ogbg-molclintox.npz",
10 |         "key": "node_feats"
11 |       }
12 |     },
13 |     "Edge": {
14 |       "_Edge": {
15 |         "file": "ogbg-molclintox.npz",
16 |         "key": "edge"
17 |       },
18 |       "EdgeFeature": {
19 |         "description": "Numpy ndarray of shape (num_edges, edgefeat_dim), where edgefeat_dim is the dimensionality of edge features and i-th row represents the feature of i-th edge. This can be None if no input edge features are available.",
20 |         "type": "int",
21 |         "format": "Tensor",
22 |         "file": "ogbg-molclintox.npz",
23 |         "key": "edge_feats"
24 |       }
25 |     },
26 |     "Graph": {
27 |       "_NodeList": {
28 |         "file": "ogbg-molclintox_node_list.sparse.npz"
29 |       },
30 |       "GraphLabel": {
31 |         "file": "ogbg-molclintox.npz",
32 |         "type": "int",
33 |         "format": "Tensor",
34 |         "key": "graph_class"
35 |       }
36 |     }
37 |   },
38 |   "citation": "@inproceedings{Wu2018Stanford,\ntitle={Moleculenet: a benchmark for molecular machine learning},\nauthor={Zhenqin Wu, Bharath Ramsundar, Evan N Feinberg, Joseph Gomes, Caleb Geniesse, Aneesh SPappu, Karl Leswing, and Vijay Pande},\nbooktitle={Chemical Science},\npages={513=520},\nyear={2018}\n}",
39 |   "is_heterogeneous": false
40 | }
41 | 


--------------------------------------------------------------------------------
/datasets/ogbg-molclintox/task_graph_classification_1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "The task is to predict the target molecular properties as accurately as possible, where the molecular properties are cast as binary labels, e.g, whether a molecule inhibits HIV virus replication or not.",
 3 |     "type": "GraphClassification",
 4 |     "feature": [
 5 |         "Node/NodeFeature",
 6 |         "Edge/EdgeFeature"
 7 |     ],
 8 |     "target": "Graph/GraphLabel",
 9 |     "num_classes": 2,
10 |     "train_set": {
11 |         "file": "ogbg-molclintox_task.npz",
12 |         "key": "train"
13 |     },
14 |     "val_set": {
15 |         "file": "ogbg-molclintox_task.npz",
16 |         "key": "val"
17 |     },
18 |     "test_set": {
19 |         "file": "ogbg-molclintox_task.npz",
20 |         "key": "test"
21 |     }
22 | }
23 | 


--------------------------------------------------------------------------------
/datasets/ogbg-molfreesolv/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 snap-stanford
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/datasets/ogbg-molfreesolv/metadata.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "description": "ogbg-molfreesolv dataset",
 3 |   "data": {
 4 |     "Node": {
 5 |       "NodeFeature": {
 6 |         "description": "Numpy ndarray of shape (num_nodes, nodefeat_dim), where nodefeat_dim is the dimensionality of node features and i-th row represents the feature of i-th node. This can be None if no input node features are available.",
 7 |         "type": "int",
 8 |         "format": "Tensor",
 9 |         "file": "ogbg-molfreesolv.npz",
10 |         "key": "node_feats"
11 |       }
12 |     },
13 |     "Edge": {
14 |       "_Edge": {
15 |         "file": "ogbg-molfreesolv.npz",
16 |         "key": "edge"
17 |       },
18 |       "EdgeFeature": {
19 |         "description": "Numpy ndarray of shape (num_edges, edgefeat_dim), where edgefeat_dim is the dimensionality of edge features and i-th row represents the feature of i-th edge. This can be None if no input edge features are available.",
20 |         "type": "int",
21 |         "format": "Tensor",
22 |         "file": "ogbg-molfreesolv.npz",
23 |         "key": "edge_feats"
24 |       }
25 |     },
26 |     "Graph": {
27 |       "_NodeList": {
28 |         "file": "ogbg-molfreesolv_node_list.sparse.npz"
29 |       },
30 |       "GraphLabel": {
31 |         "file": "ogbg-molfreesolv.npz",
32 |         "type": "int",
33 |         "format": "Tensor",
34 |         "key": "graph_class"
35 |       }
36 |     }
37 |   },
38 |   "citation": "@inproceedings{Wu2018Stanford,\ntitle={Moleculenet: a benchmark for molecular machine learning},\nauthor={Zhenqin Wu, Bharath Ramsundar, Evan N Feinberg, Joseph Gomes, Caleb Geniesse, Aneesh SPappu, Karl Leswing, and Vijay Pande},\nbooktitle={Chemical Science},\npages={513=520},\nyear={2018}\n}",
39 |   "is_heterogeneous": false
40 | }
41 | 


--------------------------------------------------------------------------------
/datasets/ogbg-molfreesolv/task_graph_regression_1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "The task is to predict the target molecular properties as accurately as possible, where the molecular properties are cast as binary labels, e.g, whether a molecule inhibits HIV virus replication or not.",
 3 |     "type": "GraphRegression",
 4 |     "feature": [
 5 |         "Node/NodeFeature",
 6 |         "Edge/EdgeFeature"
 7 |     ],
 8 |     "target": "Graph/GraphLabel",
 9 |     "train_set": {
10 |         "file": "ogbg-molfreesolv_task.npz",
11 |         "key": "train"
12 |     },
13 |     "val_set": {
14 |         "file": "ogbg-molfreesolv_task.npz",
15 |         "key": "val"
16 |     },
17 |     "test_set": {
18 |         "file": "ogbg-molfreesolv_task.npz",
19 |         "key": "test"
20 |     }
21 | }
22 | 


--------------------------------------------------------------------------------
/datasets/ogbg-molhiv/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 snap-stanford
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/datasets/ogbg-molhiv/metadata.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "description": "OGBg-molhiv dataset.",
 3 |   "data": {
 4 |     "Node": {
 5 |       "NodeFeature": {
 6 |         "description": "Node features of ogbg-molhiv dataset.",
 7 |         "type": "int",
 8 |         "format": "Tensor",
 9 |         "file": "ogbg-molhiv.npz",
10 |         "key": "node_feats"
11 |       }
12 |     },
13 |     "Edge": {
14 |       "_Edge": {
15 |         "file": "ogbg-molhiv.npz",
16 |         "key": "edge"
17 |       },
18 |       "EdgeFeature": {
19 |         "description": "Node features of ogbg-molhiv dataset.",
20 |         "type": "int",
21 |         "format": "Tensor",
22 |         "file": "ogbg-molhiv.npz",
23 |         "key": "edge_feats"
24 |       }
25 |     },
26 |     "Graph": {
27 |       "_NodeList": {
28 |         "file": "ogbg-molhiv_node_list.sparse.npz"
29 |       },
30 |       "GraphLabel": {
31 |         "file": "ogbg-molhiv.npz",
32 |         "type": "int",
33 |         "format": "Tensor",
34 |         "key": "graph_class"
35 |       }
36 |     }
37 |   },
38 |   "citation": "@inproceedings{Wu2018Stanford,\ntitle={Moleculenet: a benchmark for molecular machine learning},\nauthor={Zhenqin Wu, Bharath Ramsundar, Evan N Feinberg, Joseph Gomes, Caleb Geniesse, Aneesh SPappu, Karl Leswing, and Vijay Pande},\nbooktitle={Chemical Science},\npages={513=520},\nyear={2018}\n}",
39 |   "is_heterogeneous": false
40 | }
41 | 


--------------------------------------------------------------------------------
/datasets/ogbg-molhiv/task_graph_classification_1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "The task is to predict the target molecular properties as accurately as possible, where the molecular properties are cast as binary labels, e.g, whether a molecule inhibits HIV virus replication or not. Note that some datasets (e.g., ogbg-molpcba) can have multiple tasks, and can contain nan that indicates the corresponding label is not assigned to the molecule.",
 3 |     "type": "GraphClassification",
 4 |     "feature": [
 5 |         "Node/NodeFeature",
 6 |         "Edge/EdgeFeature"
 7 |     ],
 8 |     "target": "Graph/GraphLabel",
 9 |     "num_classes": 2,
10 |     "train_set": {
11 |         "file": "ogbg-molhiv_task.npz",
12 |         "key": "train"
13 |     },
14 |     "val_set": {
15 |         "file": "ogbg-molhiv_task.npz",
16 |         "key": "val"
17 |     },
18 |     "test_set": {
19 |         "file": "ogbg-molhiv_task.npz",
20 |         "key": "test"
21 |     }
22 | }
23 | 


--------------------------------------------------------------------------------
/datasets/ogbg-molmuv/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 snap-stanford
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/datasets/ogbg-molmuv/metadata.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "description": "ogbg-molsider dataset.",
 3 |   "data": {
 4 |     "Node": {
 5 |       "NodeFeature": {
 6 |         "description": "numpy ndarray of shape (num_nodes, nodefeat_dim), where nodefeat_dim is the dimensionality of node features and i-th row represents the feature of i-th node. This can be None if no input node features are available.",
 7 |         "type": "int",
 8 |         "format": "Tensor",
 9 |         "file": "ogbg-molmuv.npz",
10 |         "key": "node_feats"
11 |       }
12 |     },
13 |     "Edge": {
14 |       "_Edge": {
15 |         "file": "ogbg-molmuv.npz",
16 |         "key": "edge"
17 |       },
18 |       "EdgeFeature": {
19 |         "description": "Numpy ndarray of shape (num_edges, edgefeat_dim), where edgefeat_dim is the dimensionality of edge features and i-th row represents the feature of i-th edge. This can be None if no input edge features are available.",
20 |         "type": "int",
21 |         "format": "Tensor",
22 |         "file": "ogbg-molmuv.npz",
23 |         "key": "edge_feats"
24 |       }
25 |     },
26 |     "Graph": {
27 |       "_NodeList": {
28 |         "file": "ogbg-molmuv_node_list.sparse.npz"
29 |       },
30 |       "GraphLabel": {
31 |         "file": "ogbg-molmuv.npz",
32 |         "type": "int",
33 |         "format": "Tensor",
34 |         "key": "graph_class"
35 |       }
36 |     }
37 |   },
38 |   "citation": "@inproceedings{Wu2018Stanford,\ntitle={Moleculenet: a benchmark for molecular machine learning},\nauthor={Zhenqin Wu, Bharath Ramsundar, Evan N Feinberg, Joseph Gomes, Caleb Geniesse, Aneesh SPappu, Karl Leswing, and Vijay Pande},\nbooktitle={Chemical Science},\npages={513=520},\nyear={2018}\n}",
39 |   "is_heterogeneous": false
40 | }
41 | 


--------------------------------------------------------------------------------
/datasets/ogbg-molmuv/task_graph_classification_1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "The task is to predict the target molecular properties as accurately as possible, where the molecular properties are cast as binary labels, e.g, whether a molecule inhibits HIV virus replication or not.",
 3 |     "type": "GraphClassification",
 4 |     "feature": [
 5 |         "Node/NodeFeature",
 6 |         "Edge/EdgeFeature"
 7 |     ],
 8 |     "target": "Graph/GraphLabel",
 9 |     "num_classes": 2,
10 |     "train_set": {
11 |         "file": "ogbg-molmuv_task.npz",
12 |         "key": "train"
13 |     },
14 |     "val_set": {
15 |         "file": "ogbg-molmuv_task.npz",
16 |         "key": "val"
17 |     },
18 |     "test_set": {
19 |         "file": "ogbg-molmuv_task.npz",
20 |         "key": "test"
21 |     }
22 | }
23 | 


--------------------------------------------------------------------------------
/datasets/ogbg-molpcba/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 snap-stanford
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/datasets/ogbg-molpcba/metadata.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "description": "ogbg-molpcba dataset.",
 3 |   "data": {
 4 |     "Node": {
 5 |       "NodeFeature": {
 6 |         "description": "numpy ndarray of shape (num_nodes, nodefeat_dim), where nodefeat_dim is the dimensionality of node features and i-th row represents the feature of i-th node. This can be None if no input node features are available.",
 7 |         "type": "int",
 8 |         "format": "Tensor",
 9 |         "file": "ogbg-molpcba.npz",
10 |         "key": "node_feats"
11 |       }
12 |     },
13 |     "Edge": {
14 |       "_Edge": {
15 |         "file": "ogbg-molpcba.npz",
16 |         "key": "edge"
17 |       },
18 |       "EdgeFeature": {
19 |         "description": "Numpy ndarray of shape (num_edges, edgefeat_dim), where edgefeat_dim is the dimensionality of edge features and i-th row represents the feature of i-th edge. This can be None if no input edge features are available.",
20 |         "type": "int",
21 |         "format": "Tensor",
22 |         "file": "ogbg-molpcba.npz",
23 |         "key": "edge_feats"
24 |       }
25 |     },
26 |     "Graph": {
27 |       "_NodeList": {
28 |         "file": "ogbg-molpcba_node_list.sparse.npz"
29 |       },
30 |       "GraphLabel": {
31 |         "file": "ogbg-molpcba.npz",
32 |         "type": "int",
33 |         "format": "Tensor",
34 |         "key": "graph_class"
35 |       }
36 |     }
37 |   },
38 |   "citation": "@inproceedings{Wu2018Stanford,\ntitle={Moleculenet: a benchmark for molecular machine learning},\nauthor={Zhenqin Wu, Bharath Ramsundar, Evan N Feinberg, Joseph Gomes, Caleb Geniesse, Aneesh SPappu, Karl Leswing, and Vijay Pande},\nbooktitle={Chemical Science},\npages={513=520},\nyear={2018}\n}",
39 |   "is_heterogeneous": false
40 | }
41 | 


--------------------------------------------------------------------------------
/datasets/ogbg-molpcba/task_graph_classification_1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "The task is to predict the target molecular properties as accurately as possible, where the molecular properties are cast as binary labels, e.g, whether a molecule inhibits HIV virus replication or not.",
 3 |     "type": "GraphClassification",
 4 |     "feature": [
 5 |         "Node/NodeFeature",
 6 |         "Edge/EdgeFeature"
 7 |     ],
 8 |     "target": "Graph/GraphLabel",
 9 |     "num_classes": 2,
10 |     "train_set": {
11 |         "file": "ogbg-molpcba_task.npz",
12 |         "key": "train"
13 |     },
14 |     "val_set": {
15 |         "file": "ogbg-molpcba_task.npz",
16 |         "key": "val"
17 |     },
18 |     "test_set": {
19 |         "file": "ogbg-molpcba_task.npz",
20 |         "key": "test"
21 |     }
22 | }
23 | 


--------------------------------------------------------------------------------
/datasets/ogbg-molsider/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 snap-stanford
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/datasets/ogbg-molsider/metadata.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "description": "ogbg-molsider dataset.",
 3 |   "data": {
 4 |     "Node": {
 5 |       "NodeFeature": {
 6 |         "description": "numpy ndarray of shape (num_nodes, nodefeat_dim), where nodefeat_dim is the dimensionality of node features and i-th row represents the feature of i-th node. This can be None if no input node features are available.",
 7 |         "type": "int",
 8 |         "format": "Tensor",
 9 |         "file": "ogbg-molsider.npz",
10 |         "key": "node_feats"
11 |       }
12 |     },
13 |     "Edge": {
14 |       "_Edge": {
15 |         "file": "ogbg-molsider.npz",
16 |         "key": "edge"
17 |       },
18 |       "EdgeFeature": {
19 |         "description": "Numpy ndarray of shape (num_edges, edgefeat_dim), where edgefeat_dim is the dimensionality of edge features and i-th row represents the feature of i-th edge. This can be None if no input edge features are available.",
20 |         "type": "int",
21 |         "format": "Tensor",
22 |         "file": "ogbg-molsider.npz",
23 |         "key": "edge_feats"
24 |       }
25 |     },
26 |     "Graph": {
27 |       "_NodeList": {
28 |         "file": "ogbg-molsider_node_list.sparse.npz"
29 |       },
30 |       "GraphLabel": {
31 |         "file": "ogbg-molsider.npz",
32 |         "type": "int",
33 |         "format": "Tensor",
34 |         "key": "graph_class"
35 |       }
36 |     }
37 |   },
38 |   "citation": "@inproceedings{Wu2018Stanford,\ntitle={Moleculenet: a benchmark for molecular machine learning},\nauthor={Zhenqin Wu, Bharath Ramsundar, Evan N Feinberg, Joseph Gomes, Caleb Geniesse, Aneesh SPappu, Karl Leswing, and Vijay Pande},\nbooktitle={Chemical Science},\npages={513=520},\nyear={2018}\n}",
39 |   "is_heterogeneous": false
40 | }
41 | 


--------------------------------------------------------------------------------
/datasets/ogbg-molsider/task_graph_classification_1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "The task is to predict the target molecular properties as accurately as possible, where the molecular properties are cast as binary labels, e.g, whether a molecule inhibits HIV virus replication or not.",
 3 |     "type": "GraphClassification",
 4 |     "feature": [
 5 |         "Node/NodeFeature",
 6 |         "Edge/EdgeFeature"
 7 |     ],
 8 |     "target": "Graph/GraphLabel",
 9 |     "num_classes": 2,
10 |     "train_set": {
11 |         "file": "ogbg-molsider_task.npz",
12 |         "key": "train"
13 |     },
14 |     "val_set": {
15 |         "file": "ogbg-molsider_task.npz",
16 |         "key": "val"
17 |     },
18 |     "test_set": {
19 |         "file": "ogbg-molsider_task.npz",
20 |         "key": "test"
21 |     }
22 | }
23 | 


--------------------------------------------------------------------------------
/datasets/ogbl-collab/metadata.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "OGBL-COLLAB dataset.",
 3 |     "data": {
 4 |         "Node": {
 5 |             "NodeFeature": {
 6 |                 "description": "Node features of ogbl-collab dataset.",
 7 |                 "type": "float",
 8 |                 "format": "Tensor",
 9 |                 "file": "ogbl-collab.npz",
10 |                 "key": "node_feats"
11 |             }
12 |         },
13 |         "Edge": {
14 |             "_Edge": {
15 |                 "file": "ogbl-collab.npz",
16 |                 "key": "edge"
17 |             },
18 |             "EdgeWeight": {
19 |                 "description": "Number of co-authored papers published in that year",
20 |                 "type": "int",
21 |                 "format": "Tensor",
22 |                 "file": "ogbl-collab.npz",
23 |                 "key": "edge_weight"
24 |             },
25 |             "EdgeYear": {
26 |                 "description": "Year of the collaboration represented by the Edge",
27 |                 "type": "int",
28 |                 "format": "Tensor",
29 |                 "file": "ogbl-collab.npz",
30 |                 "key": "edge_year"
31 |             }
32 |         },
33 |         "Graph": {
34 |             "_NodeList": {
35 |                 "file": "ogbl-collab.npz",
36 |                 "key": "node_list"
37 |             },
38 |             "_EdgeList": {
39 |                 "file": "ogbl-collab.npz",
40 |                 "key": "edge_list"
41 |             }
42 |         }
43 |     },
44 |     "citation": "@inproceedings{wang2020microsoft,\ntitle={Microsoft academic graph: When experts are not enough},\nauthor={Wang, Kuansan and Shen, Zhihong and Huang, Chiyuan and Wu, Chieh-Han and Dong, Yuxiao and Kanakia, Anshul},\nbooktitle={Quantitative Science Studies},\npages={396--413},\nyear={2020}\n}",
45 |     "is_heterogeneous": false
46 | }
47 | 


--------------------------------------------------------------------------------
/datasets/ogbl-collab/task_time_dependent_link_prediction_1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "The task is to predict the future author collaboration relationships given the past collaborations. The goal is to rank true collaborations higher than false collaborations. Specifically, we rank each true collaboration among a set of 100,000 randomly-sampled negative collaborations, and count the ratio of positive edges that are ranked at K-place or above (Hits@K). We found K = 50 to be a good threshold in our preliminary experiments.",
 3 |     "type": "TimeDependentLinkPrediction",
 4 |     "feature": [
 5 |         "Node/NodeFeature",
 6 |         "Edge/EdgeWeight"
 7 |     ],
 8 |     "time": "Edge/EdgeYear",
 9 |     "train_time_window": [1963, 2018],
10 |     "val_time_window": [2018, 2019],
11 |     "test_time_window": [2019, 2020]
12 | }
13 | 


--------------------------------------------------------------------------------
/datasets/ogbl-collab/task_time_dependent_link_prediction_2.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "The task is to predict the future author collaboration relationships given the past collaborations. The goal is to rank true collaborations higher than false collaborations. Specifically, we rank each true collaboration among a set of 100,000 randomly-sampled negative collaborations, and count the ratio of positive edges that are ranked at K-place or above (Hits@K). We found K = 50 to be a good threshold in our preliminary experiments.",
 3 |     "type": "TimeDependentLinkPrediction",
 4 |     "feature": [
 5 |         "Node/NodeFeature",
 6 |         "Edge/EdgeWeight"
 7 |     ],
 8 |     "time": "Edge/EdgeYear",
 9 |     "val_neg": {
10 |         "file": "ogbl-collab_task_prestore_neg.npz",
11 |         "key": "val_neg"
12 |     },
13 |     "test_neg": {
14 |         "file": "ogbl-collab_task_prestore_neg.npz",
15 |         "key": "test_neg"
16 |     },
17 |     "train_time_window": [1963, 2018],
18 |     "val_time_window": [2018, 2019],
19 |     "test_time_window": [2019, 2020]
20 | }
21 | 


--------------------------------------------------------------------------------
/datasets/ogbn-arxiv/README.md:
--------------------------------------------------------------------------------
 1 | # Ogbn-arxiv
 2 | ## Data Description
 3 | 
 4 | The **ogbn-arxiv** dataset is a directed graph, representing the citation network between all Computer Science (CS) arXiv papers indexed by MAG. Each node is an arXiv paper and each directed edge indicates that one paper cites another one. Each paper comes with a 128-dimensional feature vector obtained by averaging the embeddings of words in its title and abstract.
 5 | 
 6 | Statistics:
 7 | 1. Nodes: 169343
 8 | 2. Edges: 1166243
 9 | 
10 | 
11 | #### Citation
12 | - Original Source
13 |   - [Website](https://direct.mit.edu/qss/article/1/1/396/15572/Microsoft-Academic-Graph-When-experts-are-not)
14 |   - LICENSE: Missing
15 | ```
16 | @inproceedings{Wu2018Stanford,
17 |   title={Microsoft academic graph: When experts are not enough. },
18 |   author={Kuansan Wang, Zhihong Shen, Chiyuan Huang, Chieh-Han Wu, Yuxiao Dong, and Anshul Kanakia},
19 |   booktitle={Quantitative Science Studies},
20 |   pages={396=413},
21 |   year={2020}
22 | }
23 | ```
24 | - Current Version
25 |   - [Website](https://ogb.stanford.edu/docs/linkprop/)
26 |   - LICENSE: [ODC-BY](https://ogb.stanford.edu/docs/linkprop/)
27 | ```
28 | @article{hu2022stanford,
29 |   title={Open Graph Benchmark: Datasets for Machine Learning on Graphs},
30 |   author={Hu, Weihua and Fey, Matthias and Zitnik, Marinka and Dong, Yuxiao and Ren, Hongyu and Liu, Bowen and Catasta, Michele and Leskovec, Jure},
31 |   year={2021}
32 | }
33 | ```
34 | 
35 | ## Available Tasks
36 | ### [OGB](https://ogb.stanford.edu/docs/nodeprop/)
37 | - Task type: `NodeClassification`
38 | 
39 | #### Citation
40 | ```
41 | @inproceedings{
42 |   title={Distributed representationsof words and phrases and their compositionality},
43 |   author={Tomas Mikolov, Ilya Sutskever, Kai Chen, Greg S Corrado, and Jeff Dean},
44 |   booktitle={In Advances in Neural Information Processing Systems (NeurIPS)},
45 |   pages={3111=3119},
46 |   year={2013}
47 | }
48 | ```
49 | 
50 | ## Preprocessing
51 | The data files and task config file in GLI format are transformed from the OGB implementation.
52 | 
53 | ### Requirements
54 | The preprocessing code requires the following package.
55 | ```
56 | ogb >= 1.1.1
57 | numpy
58 | torch
59 | ```
60 | 


--------------------------------------------------------------------------------
/datasets/ogbn-arxiv/metadata.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "description": "ogbn-arxiv dataset.",
 3 |   "data": {
 4 |     "Node": {
 5 |       "NodeFeature": {
 6 |         "description": "numpy ndarray of shape (num_nodes, nodefeat_dim), where nodefeat_dim is the dimensionality of node features and i-th row represents the feature of i-th node. This can be None if no input node features are available.",
 7 |         "type": "int",
 8 |         "format": "Tensor",
 9 |         "file": "ogbn-arxiv.npz",
10 |         "key": "node_feats"
11 |       },
12 |       "NodeYear": {
13 |         "description": "Year of the arxiv paper represented by the node",
14 |         "type": "int",
15 |         "format": "Tensor",
16 |         "file": "ogbn-arxiv.npz",
17 |         "key": "node_year"
18 |       },
19 |       "NodeLabel": {
20 |         "description": "Label of the arxiv paper represented by the node",
21 |         "type": "int",
22 |         "format": "Tensor",
23 |         "file": "ogbn-arxiv.npz",
24 |         "key": "node_label"
25 |       }
26 |     },
27 |     "Edge": {
28 |       "_Edge": {
29 |         "file": "ogbn-arxiv.npz",
30 |         "key": "edge"
31 |       },
32 |       "ID": {
33 |         "description": "ID of the edge, range from 0 to 1166243",
34 |         "file": "ogbn-arxiv.npz",
35 |         "key": "edge_id"
36 |       }
37 |     },
38 |     "Graph": {
39 |       "_NodeList": {
40 |         "file": "ogbn-arxiv.npz",
41 |         "key": "node_list"
42 |       },
43 |       "_EdgeList": {
44 |         "file": "ogbn-arxiv.npz",
45 |         "key": "edge_list"
46 |       }
47 |     }
48 |   },
49 |   "citation": "@inproceedings{\ntitle={Microsoft academic graph: When experts are not enough. },\nauthor={Kuansan Wang, Zhihong Shen, Chiyuan Huang, Chieh-Han Wu, Yuxiao Dong, and Anshul Kanakia.},\nbooktitle={Quantitative Science Studies},\npages={396=413},\nyear={2020}\n}",
50 |   "is_heterogeneous": false
51 | }
52 | 


--------------------------------------------------------------------------------
/datasets/ogbn-arxiv/task_node_classification_1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "The task is to predict the 40 subject areas of arXiv CS papers, e.g., cs.AI, cs.LG, and cs.OS, which are manually determined (i.e., labeled) by the paper’s authors and arXiv moderators",
 3 |     "type": "NodeClassification",
 4 |     "feature": [
 5 |         "Node/NodeFeature",
 6 |         "Edge/EdgeFeature"
 7 |     ],
 8 |     "target": "Node/NodeLabel",
 9 |     "num_classes":40,
10 |     "train_set": {
11 |         "file": "ogbn-arxiv_task.npz",
12 |         "key": "train"
13 |     },
14 |     "val_set": {
15 |         "file": "ogbn-arxiv_task.npz",
16 |         "key": "val"
17 |     },
18 |     "test_set": {
19 |         "file": "ogbn-arxiv_task.npz",
20 |         "key": "test"
21 |     }
22 | }
23 | 


--------------------------------------------------------------------------------
/datasets/ogbn-mag/task_node_classification_1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "The ogbn-mag dataset is a heterogeneous network composed of a subset of the Microsoft Academic Graph (MAG) [1]. It contains four types of entities\u2014papers (736,389 nodes), authors (1,134,649 nodes), institutions (8,740 nodes), and fields of study (59,965 nodes)\u2014as well as four types of directed relations connecting two types of entities\u2014an author is \u201caffiliated with\u201d an institution, an author \u201cwrites\u201d a paper, a paper \u201ccites\u201d a paper, and a paper \u201chas a topic of\u201d a field of study. Similar to ogbn-mag, each paper is associated with a 128-dimensional word2vec feature vector, and all the other types of entities are not associated with input node features.",
 3 |     "type": "NodeClassification",
 4 |     "feature": [
 5 |         "PaperNode/PaperNodeFeature",
 6 |         "PaperNode/PaperYear"
 7 |     ],
 8 |     "target": "PaperNode/PaperLabel",
 9 |     "num_classes": 349,
10 |     "train_set": {
11 |         "file": "ogbn-mag_task.npz",
12 |         "key": "train"
13 |     },
14 |     "val_set": {
15 |         "file": "ogbn-mag_task.npz",
16 |         "key": "val"
17 |     },
18 |     "test_set": {
19 |         "file": "ogbn-mag_task.npz",
20 |         "key": "test"
21 |     }
22 | }
23 | 


--------------------------------------------------------------------------------
/datasets/ogbn-products/LICENSE:
--------------------------------------------------------------------------------
1 | LICENSE
2 | 
3 | By accessing the Amazon Customer Reviews Library ("Reviews Library"), you agree that the Reviews Library is an Amazon Service subject to the Amazon.com Conditions of Use (https://www.amazon.com/gp/help/customer/display.html/ref=footer_cou?ie=UTF8&nodeId=508088) and you agree to be bound by them, with the following additional conditions:
4 | 
5 | In addition to the license rights granted under the Conditions of Use, Amazon or its content providers grant you a limited, non-exclusive, non-transferable, non-sublicensable, revocable license to access and use the Reviews Library for purposes of academic research. You may not resell, republish, or make any commercial use of the Reviews Library or its contents, including use of the Reviews Library for commercial research, such as research related to a funding or consultancy contract, internship, or other relationship in which the results are provided for a fee or delivered to a for-profit organization. You may not (a) link or associate content in the Reviews Library with any personal information (including Amazon customer accounts), or (b) attempt to determine the identity of the author of any content in the Reviews Library. If you violate any of the foregoing conditions, your license to access and use the Reviews Library will automatically terminate without prejudice to any of the other rights or remedies Amazon may have.
6 | 


--------------------------------------------------------------------------------
/datasets/ogbn-products/metadata.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "description": "ogbn-products dataset.",
 3 |   "data": {
 4 |     "Node": {
 5 |       "NodeFeature": {
 6 |         "description": "numpy ndarray of shape (num_nodes, nodefeat_dim), where nodefeat_dim is the dimensionality of node features and i-th row represents the feature of i-th node. This can be None if no input node features are available.",
 7 |         "type": "int",
 8 |         "format": "Tensor",
 9 |         "file": "ogbn-products.npz",
10 |         "key": "node_feats"
11 |       },
12 |       "NodeLabel": {
13 |         "description": "47 Labels of each node",
14 |         "type": "int",
15 |         "format": "Tensor",
16 |         "file": "ogbn-products.npz",
17 |         "key": "node_label"
18 |       }
19 |     },
20 |     "Edge": {
21 |       "_Edge": {
22 |         "file": "ogbn-products.npz",
23 |         "key": "edge"
24 |       },
25 |       "ID": {
26 |         "description": "ID of the edge, range from 0 to 1166243",
27 |         "file": "ogbn-products.npz",
28 |         "key": "edge_id"
29 |       }
30 |     },
31 |     "Graph": {
32 |       "_NodeList": {
33 |         "file": "ogbn-products.npz",
34 |         "key": "node_list"
35 |       },
36 |       "_EdgeList": {
37 |         "file": "ogbn-products.npz",
38 |         "key": "edge_list"
39 |       }
40 |     }
41 |   },
42 |   "citation": "@Misc{Bhatia16,\nauthor    = {Bhatia, K. and Dahiya, K. and Jain, H. and Kar, P. and Mittal, A. and Prabhu, Y. and Varma, M.},\ntitle     = {The extreme classification repository: Multi-label datasets and code},\nurl       = {http://manikvarma.org/downloads/XC/XMLRepository.html},\nyear      = {2016}}",
43 |   "is_heterogeneous": false
44 | }
45 | 


--------------------------------------------------------------------------------
/datasets/ogbn-products/task_node_classification_1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "The task is to predict the category of a product in a multi-class classification setup, where the 47 top-level categories are used for target labels.",
 3 |     "type": "NodeClassification",
 4 |     "feature": [
 5 |         "Node/NodeFeature",
 6 |         "Edge/EdgeFeature"
 7 |     ],
 8 |     "target": "Node/NodeLabel",
 9 |     "num_classes":47,
10 |     "train_set": {
11 |         "file": "ogbn-products_task.npz",
12 |         "key": "train"
13 |     },
14 |     "val_set": {
15 |         "file": "ogbn-products_task.npz",
16 |         "key": "val"
17 |     },
18 |     "test_set": {
19 |         "file": "ogbn-products_task.npz",
20 |         "key": "test"
21 |     }
22 | }
23 | 


--------------------------------------------------------------------------------
/datasets/ogbn-proteins/metadata.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "description": "ogbn-proteins dataset.",
 3 |   "data": {
 4 |     "Node": {
 5 |       "NodeSpecies": {
 6 |         "description": "Species of the proteins represented by the node",
 7 |         "type": "int",
 8 |         "format": "Tensor",
 9 |         "file": "ogbn-proteins.npz",
10 |         "key": "node_species"
11 |       },
12 |       "NodeLabel": {
13 |         "description": "Labels of the proteins represented by the node",
14 |         "type": "int",
15 |         "format": "Tensor",
16 |         "file": "ogbn-proteins.npz",
17 |         "key": "node_label"
18 |       }
19 |     },
20 |     "Edge": {
21 |       "_Edge": {
22 |         "file": "ogbn-proteins.npz",
23 |         "key": "edge"
24 |       },
25 |       "EdgeFeature": {
26 |         "description": "Numpy ndarray of shape (num_edges, edgefeat_dim), where edgefeat_dim is the dimensionality of edge features and i-th row represents the feature of i-th edge. This can be None if no input edge features are available.",
27 |         "type": "int",
28 |         "format": "Tensor",
29 |         "file": "ogbn-proteins.npz",
30 |         "key": "edge_feats"
31 |       },
32 |       "ID": {
33 |         "description": "ID of the edge, range from 0 to 30387995",
34 |         "file": "ogbn-proteins.npz",
35 |         "key": "edge_id"
36 |       }
37 |     },
38 |     "Graph": {
39 |       "_NodeList": {
40 |         "file": "ogbn-proteins.npz",
41 |         "key": "node_list"
42 |       },
43 |       "_EdgeList": {
44 |         "file": "ogbn-proteins.npz",
45 |         "key": "edge_list"
46 |       }
47 |     }
48 |   },
49 |   "citation": "@inproceedings{title={STRING v11: protein–protein association networks with increased coverage, supporting functional discovery in genome-wide experimental datasets.},\nauthor={Damian Szklarczyk, Annika L Gable, David Lyon, Alexander Junge, Stefan Wyder, Jaime Huerta- Cepas, Milan Simonovic, Nadezhda T Doncheva, John H Morris, Peer Bork, et al.},\nbooktitle={Nucleic Acids Research},\npages={607=613},\nyear={2029}}",
50 |   "is_heterogeneous": false
51 | }
52 | 


--------------------------------------------------------------------------------
/datasets/ogbn-proteins/task_node_classification_1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "The task is to predict the presence of protein functions in a multi-label binary classification setup, where there are 112 kinds of labels to predict in total. ",
 3 |     "type": "NodeClassification",
 4 |     "feature": [
 5 |         "Node/NodeSpecies",
 6 |         "Edge/EdgeFeature"
 7 |     ],
 8 |     "target": "Node/NodeLabel",
 9 |     "num_classes":2,
10 |     "train_set": {
11 |         "file": "ogbn-proteins_task.npz",
12 |         "key": "train"
13 |     },
14 |     "val_set": {
15 |         "file": "ogbn-proteins_task.npz",
16 |         "key": "val"
17 |     },
18 |     "test_set": {
19 |         "file": "ogbn-proteins_task.npz",
20 |         "key": "test"
21 |     }
22 | }
23 | 


--------------------------------------------------------------------------------
/datasets/penn94/LICENSE:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Graph-Learning-Benchmarks/gli/8f2065396d59e6b4aaa371e997c3a43f91448429/datasets/penn94/LICENSE


--------------------------------------------------------------------------------
/datasets/penn94/metadata.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "Penn94 dataset.",
 3 |     "data": {
 4 |         "Node": {
 5 |             "NodeFeature": {
 6 |                 "description": "Node features of Penn94 dataset.",
 7 |                 "type": "float32",
 8 |                 "format": "Tensor",
 9 |                 "file": "penn94.npz",
10 |                 "key": "node_feats"
11 |             },
12 |             "NodeLabel": {
13 |                 "description": "Node labels of Penn94 dataset, 1/0-valued vectors.",
14 |                 "type": "int64",
15 |                 "format": "Tensor",
16 |                 "file": "penn94.npz",
17 |                 "key": "node_class"
18 |             }
19 |         },
20 |         "Edge": {
21 |             "_Edge": {
22 |                 "file": "penn94.npz",
23 |                 "key": "edge"
24 |             }
25 |         },
26 |         "Graph": {
27 |             "_NodeList": {
28 |                 "file": "penn94.npz",
29 |                 "key": "node_list"
30 |             },
31 |             "_EdgeList": {
32 |                 "file": "penn94.npz",
33 |                 "key": "edge_list"
34 |             }
35 |         }
36 |     },
37 |     "citation": "@article{lim2021large,\ntitle={Large scale learning on non-homophilous graphs: New benchmarks and strong simple methods},\nauthor={Lim, Derek and Hohne, Felix and Li, Xiuyu and Huang, Sijia Linda and Gupta, Vaishnavi and Bhalerao, Omkar and Lim, Ser Nam},\njournal={Advances in Neural Information Processing Systems},\nvolume={34},\npages={20887--20902},\nyear={2021}\n}",
38 |     "is_heterogeneous": false
39 | }
40 | 


--------------------------------------------------------------------------------
/datasets/penn94/task_node_classification_1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "Node classification on Penn94 dataset.",
 3 |     "type": "NodeClassification",
 4 |     "feature": [
 5 |         "Node/NodeFeature"
 6 |     ],
 7 |     "target": "Node/NodeLabel",
 8 |     "num_classes": 2,
 9 |     "train_ratio": 0.5,
10 |     "val_ratio": 0.25,
11 |     "test_ratio": 0.25,
12 |     "num_samples": 41554
13 | }
14 | 


--------------------------------------------------------------------------------
/datasets/pokec/LICENSE:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Graph-Learning-Benchmarks/gli/8f2065396d59e6b4aaa371e997c3a43f91448429/datasets/pokec/LICENSE


--------------------------------------------------------------------------------
/datasets/pokec/metadata.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "pokec dataset.",
 3 |     "data": {
 4 |         "Node": {
 5 |             "NodeFeature": {
 6 |                 "description": "Node features of pokec dataset.",
 7 |                 "type": "float32",
 8 |                 "format": "Tensor",
 9 |                 "file": "pokec.npz",
10 |                 "key": "node_feats"
11 |             },
12 |             "NodeLabel": {
13 |                 "description": "Node labels of pokec dataset, 1/0-valued vectors.",
14 |                 "type": "int64",
15 |                 "format": "Tensor",
16 |                 "file": "pokec.npz",
17 |                 "key": "node_class"
18 |             }
19 |         },
20 |         "Edge": {
21 |             "_Edge": {
22 |                 "file": "pokec.npz",
23 |                 "key": "edge"
24 |             }
25 |         },
26 |         "Graph": {
27 |             "_NodeList": {
28 |                 "file": "pokec.npz",
29 |                 "key": "node_list"
30 |             },
31 |             "_EdgeList": {
32 |                 "file": "pokec.npz",
33 |                 "key": "edge_list"
34 |             }
35 |         }
36 |     },
37 |     "citation": "@article{lim2021large,\ntitle={Large scale learning on non-homophilous graphs: New benchmarks and strong simple methods},\nauthor={Lim, Derek and Hohne, Felix and Li, Xiuyu and Huang, Sijia Linda and Gupta, Vaishnavi and Bhalerao, Omkar and Lim, Ser Nam},\njournal={Advances in Neural Information Processing Systems},\nvolume={34},\npages={20887--20902},\nyear={2021}\n}",
38 |     "is_heterogeneous": false
39 | }
40 | 


--------------------------------------------------------------------------------
/datasets/pokec/task_node_classification_1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "Node classification on Pokec dataset.",
 3 |     "type": "NodeClassification",
 4 |     "feature": [
 5 |         "Node/NodeFeature"
 6 |     ],
 7 |     "target": "Node/NodeLabel",
 8 |     "num_classes": 2,
 9 |     "train_ratio": 0.5,
10 |     "val_ratio": 0.25,
11 |     "test_ratio": 0.25,
12 |     "num_samples": 1632803
13 | }
14 | 


--------------------------------------------------------------------------------
/datasets/pubmed/LICENSE:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Graph-Learning-Benchmarks/gli/8f2065396d59e6b4aaa371e997c3a43f91448429/datasets/pubmed/LICENSE


--------------------------------------------------------------------------------
/datasets/pubmed/README.md:
--------------------------------------------------------------------------------
 1 | # PUBMED
 2 | 
 3 | ## Dataset Description
 4 | 
 5 | The PUBMED dataset contains a citation network with with documents as nodes and citations as edges. Each node has bag-of-words features of the document and a class label represents the research area this document belongs to.
 6 | 
 7 | Statistics:
 8 | - Nodes: 19717
 9 | - Edges: 88651
10 | - Number of Classes: 3
11 | 
12 | #### Citation
13 | - Original Source
14 |   + [Website](https://linqs.org/datasets/#pubmed-diabetes)
15 |   + LICENSE: missing
16 | ```
17 | @inproceedings{namata:mlg12,
18 |     title = {Query-Driven Active Surveying for Collective Classification},
19 |     author = {Galileo Mark Namata and Ben London and Lise Getoor and Bert Huang},
20 |     booktitle = {International Workshop on Mining and Learning with Graphs (MLG)},
21 |     year = {2012},
22 |     _publisher = {MLG},
23 |     address = {Edinburgh, Scotland},
24 | }
25 | ```
26 | - Current Version
27 |   + [Website](https://github.com/kimiyoung/planetoid)
28 |   + LICENSE: [MIT](https://github.com/kimiyoung/planetoid/blob/master/LICENSE)
29 | ```
30 | @inproceedings{yang2016revisiting,
31 |   title={Revisiting semi-supervised learning with graph embeddings},
32 |   author={Yang, Zhilin and Cohen, William and Salakhudinov, Ruslan},
33 |   booktitle={International conference on machine learning},
34 |   pages={40--48},
35 |   year={2016},
36 |   organization={PMLR}
37 | }
38 | ```
39 | 
40 | ## Available Tasks
41 | 
42 | ### Planetoid
43 | 
44 | - Task type: `NodeClassification`
45 | 
46 | This is a node classification task with fixed split from [planetoid](https://github.com/kimiyoung/planetoid).
47 | 
48 | #### Citation
49 | 
50 | ```
51 | @inproceedings{yang2016revisiting,
52 |   title={Revisiting semi-supervised learning with graph embeddings},
53 |   author={Yang, Zhilin and Cohen, William and Salakhudinov, Ruslan},
54 |   booktitle={International conference on machine learning},
55 |   pages={40--48},
56 |   year={2016},
57 |   organization={PMLR}
58 | }
59 | ```
60 | 
61 | ## Preprocessing
62 | 
63 | The data files and task config file in GLI format are transformed from the [DGL](https://www.dgl.ai) implementation. Check `pubmed.ipynb` for the preprocessing.
64 | 
65 | 
66 | ### Requirements
67 | 
68 | The preprocessing code requires the following packages.
69 | 
70 | ```
71 | scipy==1.7.1
72 | dgl-cuda11.3==0.7.2
73 | ```
74 | 


--------------------------------------------------------------------------------
/datasets/pubmed/metadata.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "PUBMED dataset.",
 3 |     "data": {
 4 |         "Node": {
 5 |             "NodeFeature": {
 6 |                 "description": "Node features of Pubmed dataset.",
 7 |                 "type": "float",
 8 |                 "format": "SparseTensor",
 9 |                 "file": "pubmed_node_feats.sparse.npz"
10 |             },
11 |             "NodeLabel": {
12 |                 "description": "Node labels of Pubmed dataset, int ranged from 1 to 3.",
13 |                 "type": "int",
14 |                 "format": "Tensor",
15 |                 "file": "pubmed.npz",
16 |                 "key": "node_class"
17 |             }
18 |         },
19 |         "Edge": {
20 |             "_Edge": {
21 |                 "file": "pubmed.npz",
22 |                 "key": "edge"
23 |             }
24 |         },
25 |         "Graph": {
26 |             "_NodeList": {
27 |                 "file": "pubmed.npz",
28 |                 "key": "node_list"
29 |             },
30 |             "_EdgeList": {
31 |                 "file": "pubmed.npz",
32 |                 "key": "edge_list"
33 |             }
34 |         }
35 |     },
36 |     "citation": "@inproceedings{yang2016revisiting,\ntitle={Revisiting semi-supervised learning with graph embeddings},\nauthor={Yang, Zhilin and Cohen, William and Salakhudinov, Ruslan},\nbooktitle={International conference on machine learning},\npages={40--48},\nyear={2016},\norganization={PMLR}\n}",
37 |     "is_heterogeneous": false
38 | }
39 | 


--------------------------------------------------------------------------------
/datasets/pubmed/task_node_classification_1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "Node classification on PUBMED dataset. Planetoid split.",
 3 |     "type": "NodeClassification",
 4 |     "feature": [
 5 |         "Node/NodeFeature"
 6 |     ],
 7 |     "target": "Node/NodeLabel",
 8 |     "num_classes": 6,
 9 |     "train_set": {
10 |         "file": "pubmed_task.npz",
11 |         "key": "train"
12 |     },
13 |     "val_set": {
14 |         "file": "pubmed_task.npz",
15 |         "key": "val"
16 |     },
17 |     "test_set": {
18 |         "file": "pubmed_task.npz",
19 |         "key": "test"
20 |     }
21 | }
22 | 


--------------------------------------------------------------------------------
/datasets/reddit/README.md:
--------------------------------------------------------------------------------
 1 | # Reddit
 2 | 
 3 | ## Dataset Description
 4 | 
 5 | The Reddit dataset is a graph dataset from Reddit posts made in the month of September, 2014. The node label in this case is the community that a post belongs to. 50 large communities have been sampled to build a post-to-post graph, connecting posts if the same user comments on both. 
 6 | 
 7 | Statistics:
 8 | - Nodes: 232965
 9 | - Edges: 114615892
10 | - Number of Classes: 41
11 | 
12 | #### Citation
13 | - Original Source
14 |   + [Website](http://snap.stanford.edu/graphsage/)
15 |   + LICENSE: [MIT](https://github.com/williamleif/GraphSAGE/blob/master/LICENSE.txt)
16 | ```
17 | @article{hamilton2017inductive,
18 |   title={Inductive representation learning on large graphs},
19 |   author={Hamilton, Will and Ying, Zhitao and Leskovec, Jure},
20 |   journal={Advances in neural information processing systems},
21 |   volume={30},
22 |   year={2017}
23 | }
24 | ```
25 | 
26 | - Current Version
27 |   + [Website](http://snap.stanford.edu/graphsage/)
28 |   + LICENSE: [MIT](https://github.com/williamleif/GraphSAGE/blob/master/LICENSE.txt)
29 | ```
30 | @article{hamilton2017inductive,
31 |   title={Inductive representation learning on large graphs},
32 |   author={Hamilton, Will and Ying, Zhitao and Leskovec, Jure},
33 |   journal={Advances in neural information processing systems},
34 |   volume={30},
35 |   year={2017}
36 | }
37 | ```
38 | 
39 | - Previous Version
40 | 
41 | 
42 | 
43 | 
44 | ## Available Tasks
45 | 
46 | - Task type: `NodeClassification`
47 | 
48 | 
49 | #### Citation
50 | 
51 | ```
52 | @article{hamilton2017inductive,
53 |   title={Inductive representation learning on large graphs},
54 |   author={Hamilton, Will and Ying, Zhitao and Leskovec, Jure},
55 |   journal={Advances in neural information processing systems},
56 |   volume={30},
57 |   year={2017}
58 | }
59 | ```
60 | 
61 | ## Preprocessing
62 | 
63 | The data files and task config file in GLI format are transformed from the [DGL](https://www.dgl.ai) implementation (check docs for [Reddit Dataset](https://docs.dgl.ai/en/0.9.x/generated/dgl.data.RedditDataset.html?highlight=reddit#dgl.data.RedditDataset)). Check `reddit.ipynb` for the preprocessing.
64 | 
65 | 
66 | ### Requirements
67 | 
68 | The preprocessing code requires the following packages.
69 | 
70 | ```
71 | numpy
72 | torch
73 | dgl==1.1.2
74 | ```
75 | 


--------------------------------------------------------------------------------
/datasets/reddit/metadata.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "Reddit dataset.",
 3 |     "data": {
 4 |         "Node": {
 5 |             "NodeFeature": {
 6 |                 "description": "Node features of Reddit dataset, incorporating pretrained GloVe CommonCrawl word embeddings.",
 7 |                 "type": "float",
 8 |                 "format": "Tensor",
 9 |                 "file": "reddit__graph__bfb7717c1f9b72842adc4af257467122.npz",
10 |                 "key": "Node_NodeFeature"
11 |             },
12 |             "NodeLabel": {
13 |                 "description": "Node labels of Reddit dataset, int ranged from 0 to 40.",
14 |                 "type": "int",
15 |                 "format": "Tensor",
16 |                 "file": "reddit__graph__bfb7717c1f9b72842adc4af257467122.npz",
17 |                 "key": "Node_NodeLabel"
18 |             }
19 |         },
20 |         "Edge": {
21 |             "_Edge": {
22 |                 "file": "reddit__graph__bfb7717c1f9b72842adc4af257467122.npz",
23 |                 "key": "Edge_Edge"
24 |             }
25 |         },
26 |         "Graph": {
27 |             "_NodeList": {
28 |                 "file": "reddit__graph__Graph_NodeList__e4f77fbbcc4906feaf9f51e8d2a6da98.sparse.npz"
29 |             }
30 |         }
31 |     },
32 |     "citation": "@article{hamilton2017inductive,\ntitle={Inductive representation learning on large graphs},\nauthor={Hamilton, Will and Ying, Zhitao and Leskovec, Jure},\njournal={Advances in neural information processing systems},\nvolume={30},\nyear={2017}}",
33 |     "is_heterogeneous": false
34 | }


--------------------------------------------------------------------------------
/datasets/reddit/task_node_classification_1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "Node classification on Reddit dataset.",
 3 |     "type": "NodeClassification",
 4 |     "feature": [
 5 |         "Node/NodeFeature"
 6 |     ],
 7 |     "target": "Node/NodeLabel",
 8 |     "num_classes": 41,
 9 |     "train_set": {
10 |         "file": "reddit__task_node_classification_1__f966ab3b42876ca118130cd1ea52237f.npz",
11 |         "key": "train_set"
12 |     },
13 |     "val_set": {
14 |         "file": "reddit__task_node_classification_1__f966ab3b42876ca118130cd1ea52237f.npz",
15 |         "key": "val_set"
16 |     },
17 |     "test_set": {
18 |         "file": "reddit__task_node_classification_1__f966ab3b42876ca118130cd1ea52237f.npz",
19 |         "key": "test_set"
20 |     }
21 | }


--------------------------------------------------------------------------------
/datasets/snap-patents/LICENSE:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Graph-Learning-Benchmarks/gli/8f2065396d59e6b4aaa371e997c3a43f91448429/datasets/snap-patents/LICENSE


--------------------------------------------------------------------------------
/datasets/snap-patents/metadata.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "snap-patents dataset.",
 3 |     "data": {
 4 |         "Node": {
 5 |             "NodeFeature": {
 6 |                 "description": "Node features of snap-patents dataset.",
 7 |                 "type": "float32",
 8 |                 "format": "Tensor",
 9 |                 "file": "snap_patents.npz",
10 |                 "key": "node_feats"
11 |             },
12 |             "NodeLabel": {
13 |                 "description": "Node labels of snap-patents dataset, int ranged from 0 to 4.",
14 |                 "type": "int64",
15 |                 "format": "Tensor",
16 |                 "file": "snap_patents.npz",
17 |                 "key": "node_class"
18 |             }
19 |         },
20 |         "Edge": {
21 |             "_Edge": {
22 |                 "file": "snap_patents.npz",
23 |                 "key": "edge"
24 |             }
25 |         },
26 |         "Graph": {
27 |             "_NodeList": {
28 |                 "file": "snap_patents.npz",
29 |                 "key": "node_list"
30 |             },
31 |             "_EdgeList": {
32 |                 "file": "snap_patents.npz",
33 |                 "key": "edge_list"
34 |             }
35 |         }
36 |     },
37 |     "citation": "@article{lim2021large,\ntitle={Large scale learning on non-homophilous graphs: New benchmarks and strong simple methods},\nauthor={Lim, Derek and Hohne, Felix and Li, Xiuyu and Huang, Sijia Linda and Gupta, Vaishnavi and Bhalerao, Omkar and Lim, Ser Nam},\njournal={Advances in Neural Information Processing Systems},\nvolume={34},\npages={20887--20902},\nyear={2021}\n}",
38 |     "is_heterogeneous": false
39 | }
40 | 


--------------------------------------------------------------------------------
/datasets/snap-patents/task_node_classification_1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "Node classification on snap-patents dataset.",
 3 |     "type": "NodeClassification",
 4 |     "feature": [
 5 |         "Node/NodeFeature"
 6 |     ],
 7 |     "target": "Node/NodeLabel",
 8 |     "num_classes": 5,
 9 |     "train_ratio": 0.5,
10 |     "val_ratio": 0.25,
11 |     "test_ratio": 0.25,
12 |     "num_samples": 2923922
13 | }
14 | 


--------------------------------------------------------------------------------
/datasets/squirrel/LICENSE:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Graph-Learning-Benchmarks/gli/8f2065396d59e6b4aaa371e997c3a43f91448429/datasets/squirrel/LICENSE


--------------------------------------------------------------------------------
/datasets/squirrel/README.md:
--------------------------------------------------------------------------------
 1 | # Squirrel
 2 | 
 3 | ## Dataset Description
 4 | 
 5 | Squirrel is a page-page network on specific topics in Wikipedia. In this dataset, nodes represent web pages and edges are mutual links between pages. And node features correspond to several informative nouns in the Wikipedia pages. The nodes are classified into five categories.
 6 | 
 7 | Statistics:
 8 | - Nodes: 5201
 9 | - Edges: 217073
10 | - Number of Classes: 5
11 | 
12 | #### Citation
13 | - Original Source
14 |   + [Website](https://github.com/benedekrozemberczki/datasets#wikipedia-article-networks)
15 |   + LICENSE: [MIT](https://github.com/benedekrozemberczki/datasets/blob/master/LICENSE)
16 | ```
17 | @article{rozemberczki2021multi,
18 |   title={Multi-scale attributed node embedding},
19 |   author={Rozemberczki, Benedek and Allen, Carl and Sarkar, Rik},
20 |   journal={Journal of Complex Networks},
21 |   volume={9},
22 |   number={2},
23 |   pages={cnab014},
24 |   year={2021},
25 |   publisher={Oxford University Press}
26 | }
27 | ```
28 | - Current Version
29 |   + [Website](https://github.com/graphdml-uiuc-jlu/geom-gcn)
30 |   + LICENSE: missing
31 | ```
32 | @article{pei2020geom,
33 |   title={Geom-gcn: Geometric graph convolutional networks},
34 |   author={Pei, Hongbin and Wei, Bingzhe and Chang, Kevin Chen-Chuan and Lei, Yu and Yang, Bo},
35 |   journal={arXiv preprint arXiv:2002.05287},
36 |   year={2020}
37 | }
38 | ```
39 | 
40 | ## Available Tasks
41 | 
42 | ### MUSAE
43 | 
44 | - Task type: `NodeClassification`
45 | 
46 | This is a node classification task with fixed split from [MUSAE](https://github.com/benedekrozemberczki/MUSAE).
47 | 
48 | #### Citation
49 | 
50 | ```
51 | @article{pei2020geom,
52 |   title={Geom-gcn: Geometric graph convolutional networks},
53 |   author={Pei, Hongbin and Wei, Bingzhe and Chang, Kevin Chen-Chuan and Lei, Yu and Yang, Bo},
54 |   journal={arXiv preprint arXiv:2002.05287},
55 |   year={2020}
56 | }
57 | ```
58 | 
59 | ## Preprocessing
60 | The data files and task config file in GLI format are transformed from the [torch_geometric.datasets](https://pytorch-geometric.readthedocs.io/en/latest/modules/datasets.html). Check `squirrel.ipynb` for the preprocessing.
61 | 
62 | 
63 | ### Requirements
64 | 
65 | The preprocessing code requires the following packages.
66 | 
67 | ```
68 | numpy==1.22.3
69 | scipy==1.7.3
70 | torch==1.11.0
71 | torch_geometric==2.0.4
72 | ```
73 | 


--------------------------------------------------------------------------------
/datasets/squirrel/metadata.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "Squirrel dataset.",
 3 |     "data": {
 4 |         "Node": {
 5 |             "NodeFeature": {
 6 |                 "description": "Node features of Squirrel dataset, 1/0-valued vectors.",
 7 |                 "type": "int",
 8 |                 "format": "SparseTensor",
 9 |                 "file": "squirrel_node_feats.sparse.npz"
10 |             },
11 |             "NodeLabel": {
12 |                 "description": "Node labels of Squirrel dataset, int ranged from 0 to 4.",
13 |                 "type": "int",
14 |                 "format": "Tensor",
15 |                 "file": "squirrel.npz",
16 |                 "key": "node_class"
17 |             }
18 |         },
19 |         "Edge": {
20 |             "_Edge": {
21 |                 "file": "squirrel.npz",
22 |                 "key": "edge"
23 |             }
24 |         },
25 |         "Graph": {
26 |             "_NodeList": {
27 |                 "file": "squirrel.npz",
28 |                 "key": "node_list"
29 |             },
30 |             "_EdgeList": {
31 |                 "file": "squirrel.npz",
32 |                 "key": "edge_list"
33 |             }
34 |         }
35 |     },
36 |     "citation": "@article{rozemberczki2021multi,\ntitle={Multi-scale attributed node embedding},\nauthor={Rozemberczki, Benedek and Allen, Carl and Sarkar, Rik},\njournal={Journal of Complex Networks},\nvolume={9},\nnumber={2},\npages={cnab014},\nyear={2021},\npublisher={Oxford University Press}\n}",
37 |     "is_heterogeneous": false
38 | }
39 | 


--------------------------------------------------------------------------------
/datasets/squirrel/task_node_classification_1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "Node classification on Squirrel dataset. The split is introduced in the paper \"Multi-scale Attributed Node Embedding\", while the classification categories are introduced in the paper \"Geom-GCN: Geometric Graph Convolutional Networks\".",
 3 |     "type": "NodeClassification",
 4 |     "feature": [
 5 |         "Node/NodeFeature"
 6 |     ],
 7 |     "target": "Node/NodeLabel",
 8 |     "num_classes": 5,
 9 |     "num_splits": 10,
10 |     "train_set": {
11 |         "file": "squirrel_task.npz",
12 |         "key": "train_FOLD"
13 |     },
14 |     "val_set": {
15 |         "file": "squirrel_task.npz",
16 |         "key": "val_FOLD"
17 |     },
18 |     "test_set": {
19 |         "file": "squirrel_task.npz",
20 |         "key": "test_FOLD"
21 |     }
22 | }
23 | 


--------------------------------------------------------------------------------
/datasets/texas/LICENSE:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Graph-Learning-Benchmarks/gli/8f2065396d59e6b4aaa371e997c3a43f91448429/datasets/texas/LICENSE


--------------------------------------------------------------------------------
/datasets/texas/metadata.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "Texas dataset.",
 3 |     "data": {
 4 |         "Node": {
 5 |             "NodeFeature": {
 6 |                 "description": "Node features of Texas dataset, 1/0-valued vectors.",
 7 |                 "type": "int",
 8 |                 "format": "SparseTensor",
 9 |                 "file": "texas_node_feats.sparse.npz"
10 |             },
11 |             "NodeLabel": {
12 |                 "description": "Node labels of texas dataset, int ranged from 0 to 4.",
13 |                 "type": "int",
14 |                 "format": "Tensor",
15 |                 "file": "texas.npz",
16 |                 "key": "node_class"
17 |             }
18 |         },
19 |         "Edge": {
20 |             "_Edge": {
21 |                 "file": "texas.npz",
22 |                 "key": "edge"
23 |             }
24 |         },
25 |         "Graph": {
26 |             "_NodeList": {
27 |                 "file": "texas.npz",
28 |                 "key": "node_list"
29 |             },
30 |             "_EdgeList": {
31 |                 "file": "texas.npz",
32 |                 "key": "edge_list"
33 |             }
34 |         }
35 |     },
36 |     "citation": "@article{garcia2016using,\ntitle={Using fuzzy logic to leverage HTML markup for web page representation},\nauthor={Garcia-Plaza, Alberto P and Fresno, Victor and Unanue, Raquel Martinez and Zubiaga, Arkaitz},\njournal={IEEE Transactions on Fuzzy Systems},\nvolume={25},\nnumber={4},\npages={919--933},\nyear={2016},\npublisher={IEEE}\n}",
37 |     "is_heterogeneous": false
38 | }
39 | 


--------------------------------------------------------------------------------
/datasets/texas/task_node_classification_1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "Node classification on Texas dataset. Webkb split.",
 3 |     "type": "NodeClassification",
 4 |     "feature": [
 5 |         "Node/NodeFeature"
 6 |     ],
 7 |     "target": "Node/NodeLabel",
 8 |     "num_classes": 5,
 9 |     "num_splits": 10,
10 |     "train_set": {
11 |         "file": "texas_task.npz",
12 |         "key": "train_FOLD"
13 |     },
14 |     "val_set": {
15 |         "file": "texas_task.npz",
16 |         "key": "val_FOLD"
17 |     },
18 |     "test_set": {
19 |         "file": "texas_task.npz",
20 |         "key": "test_FOLD"
21 |     }
22 | }
23 | 


--------------------------------------------------------------------------------
/datasets/twitch-gamers/LICENSE:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Graph-Learning-Benchmarks/gli/8f2065396d59e6b4aaa371e997c3a43f91448429/datasets/twitch-gamers/LICENSE


--------------------------------------------------------------------------------
/datasets/twitch-gamers/metadata.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "twitch-gamers dataset.",
 3 |     "data": {
 4 |         "Node": {
 5 |             "NodeFeature": {
 6 |                 "description": "Node features of twitch-gamers dataset.",
 7 |                 "type": "float32",
 8 |                 "format": "Tensor",
 9 |                 "file": "twitch_gamers.npz",
10 |                 "key": "node_feats"
11 |             },
12 |             "NodeLabel": {
13 |                 "description": "Node labels of twitch-gamers dataset, 1/0-valued vectors.",
14 |                 "type": "int64",
15 |                 "format": "Tensor",
16 |                 "file": "twitch_gamers.npz",
17 |                 "key": "node_class"
18 |             }
19 |         },
20 |         "Edge": {
21 |             "_Edge": {
22 |                 "file": "twitch_gamers.npz",
23 |                 "key": "edge"
24 |             }
25 |         },
26 |         "Graph": {
27 |             "_NodeList": {
28 |                 "file": "twitch_gamers.npz",
29 |                 "key": "node_list"
30 |             },
31 |             "_EdgeList": {
32 |                 "file": "twitch_gamers.npz",
33 |                 "key": "edge_list"
34 |             }
35 |         }
36 |     },
37 |     "citation": "@article{lim2021large,\ntitle={Large scale learning on non-homophilous graphs: New benchmarks and strong simple methods},\nauthor={Lim, Derek and Hohne, Felix and Li, Xiuyu and Huang, Sijia Linda and Gupta, Vaishnavi and Bhalerao, Omkar and Lim, Ser Nam},\njournal={Advances in Neural Information Processing Systems},\nvolume={34},\npages={20887--20902},\nyear={2021}\n}",
38 |     "is_heterogeneous": false
39 | }
40 | 


--------------------------------------------------------------------------------
/datasets/twitch-gamers/task_node_classification_1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "Node classification on twitch-gamers dataset.",
 3 |     "type": "NodeClassification",
 4 |     "feature": [
 5 |         "Node/NodeFeature"
 6 |     ],
 7 |     "target": "Node/NodeLabel",
 8 |     "num_classes": 2,
 9 |     "train_ratio": 0.5,
10 |     "val_ratio": 0.25,
11 |     "test_ratio": 0.25,
12 |     "num_samples": 168114
13 | }
14 | 


--------------------------------------------------------------------------------
/datasets/wiki/LICENSE:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Graph-Learning-Benchmarks/gli/8f2065396d59e6b4aaa371e997c3a43f91448429/datasets/wiki/LICENSE


--------------------------------------------------------------------------------
/datasets/wiki/README.md:
--------------------------------------------------------------------------------
 1 | # wiki
 2 | 
 3 | ## Dataset Description
 4 | Wiki is a dataset of Wikipedia articles, where nodes represent pages and edges represent links between them. This dataset is collected by Lim<sup>[1](#myfootnote1)</sup>. Node features are constructed using averaged title and abstract GloVe embeddings. Labels represent total page views over 60 days, which are partitioned into quintiles to make five classes.
 5 | 
 6 | Statistics:
 7 | - Nodes: 1925342
 8 | - Edges: 303434860
 9 | - Number of Classes: 5
10 | 
11 | <a name="myfootnote1">[1]</a>: Lim, Derek, Felix Hohne, Xiuyu Li, Sijia Linda Huang, Vaishnavi Gupta, Omkar Bhalerao, and Ser Nam Lim. "Large scale learning on non-homophilous graphs: New benchmarks and strong simple methods." Advances in Neural Information Processing Systems 34 (2021): 20887-20902.
12 | 
13 | 
14 | #### Citation
15 | - Original Source
16 |   
17 |   - [Website](https://github.com/CUAI/Non-Homophily-Large-Scale)
18 |   - LICENSE: missing
19 |   ```
20 |   @article{lim2021large,
21 |     title={Large scale learning on non-homophilous graphs: New benchmarks and strong simple methods},
22 |     author={Lim, Derek and Hohne, Felix and Li, Xiuyu and Huang, Sijia Linda and Gupta, Vaishnavi and Bhalerao, Omkar and Lim, Ser Nam},
23 |     journal={Advances in Neural Information Processing Systems},
24 |     volume={34},
25 |     pages={20887--20902},
26 |     year={2021}
27 |   }
28 |   ```
29 | ## Available Tasks
30 | 
31 | ### wiki
32 | 
33 | - Task type: `NodeClassification`
34 | 
35 | 
36 | #### Citation
37 | 
38 | ```
39 | @article{lim2021large,
40 |   title={Large scale learning on non-homophilous graphs: New benchmarks and strong simple methods},
41 |   author={Lim, Derek and Hohne, Felix and Li, Xiuyu and Huang, Sijia Linda and Gupta, Vaishnavi and Bhalerao, Omkar and Lim, Ser Nam},
42 |   journal={Advances in Neural Information Processing Systems},
43 |   volume={34},
44 |   pages={20887--20902},
45 |   year={2021}
46 | }
47 | ```
48 | 
49 | ## Preprocessing
50 | The data file in GLI format is transformed from the [CUAI](https://github.com/CUAI/Non-Homophily-Large-Scale). Check [Non-homo-datasets](https://github.com/GreatSnoopyMe/Non-homo-datasets) for the preprocessing.
51 | 
52 | 
53 | ### Requirements
54 | 
55 | The preprocessing code requires the following packages.
56 | 
57 | ```
58 | dataset==1.5.2
59 | numpy==1.22.3
60 | scipy==1.7.3
61 | torch==1.11.0
62 | ```
63 | 


--------------------------------------------------------------------------------
/datasets/wiki/metadata.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "wiki dataset.",
 3 |     "data": {
 4 |         "Node": {
 5 |             "NodeFeature": {
 6 |                 "description": "Node features of wiki dataset.",
 7 |                 "type": "float32",
 8 |                 "format": "Tensor",
 9 |                 "file": "wiki.npz",
10 |                 "key": "node_feats"
11 |             },
12 |             "NodeLabel": {
13 |                 "description": "Node labels of wiki dataset, int ranged from 0 to 4.",
14 |                 "type": "int64",
15 |                 "format": "Tensor",
16 |                 "file": "wiki.npz",
17 |                 "key": "node_class"
18 |             }
19 |         },
20 |         "Edge": {
21 |             "_Edge": {
22 |                 "file": "wiki.npz",
23 |                 "key": "edge"
24 |             }
25 |         },
26 |         "Graph": {
27 |             "_NodeList": {
28 |                 "file": "wiki.npz",
29 |                 "key": "node_list"
30 |             },
31 |             "_EdgeList": {
32 |                 "file": "wiki.npz",
33 |                 "key": "edge_list"
34 |             }
35 |         }
36 |     },
37 |     "citation": "@article{lim2021large,\ntitle={Large scale learning on non-homophilous graphs: New benchmarks and strong simple methods},\nauthor={Lim, Derek and Hohne, Felix and Li, Xiuyu and Huang, Sijia Linda and Gupta, Vaishnavi and Bhalerao, Omkar and Lim, Ser Nam},\njournal={Advances in Neural Information Processing Systems},\nvolume={34},\npages={20887--20902},\nyear={2021}\n}",
38 |     "is_heterogeneous": false
39 | }
40 | 


--------------------------------------------------------------------------------
/datasets/wiki/task_node_classification_1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "Node classification on wiki dataset.",
 3 |     "type": "NodeClassification",
 4 |     "feature": [
 5 |         "Node/NodeFeature"
 6 |     ],
 7 |     "target": "Node/NodeLabel",
 8 |     "num_classes": 5,
 9 |     "train_ratio": 0.5,
10 |     "val_ratio": 0.25,
11 |     "test_ratio": 0.25,
12 |     "num_samples": 1925342
13 | }
14 | 


--------------------------------------------------------------------------------
/datasets/wisconsin/LICENSE:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Graph-Learning-Benchmarks/gli/8f2065396d59e6b4aaa371e997c3a43f91448429/datasets/wisconsin/LICENSE


--------------------------------------------------------------------------------
/datasets/wisconsin/metadata.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "Wisconsin dataset.",
 3 |     "data": {
 4 |         "Node": {
 5 |             "NodeFeature": {
 6 |                 "description": "Node features of Wisconsin dataset, 1/0-valued vectors.",
 7 |                 "type": "int",
 8 |                 "format": "SparseTensor",
 9 |                 "file": "wisconsin_node_feats.sparse.npz"
10 |             },
11 |             "NodeLabel": {
12 |                 "description": "Node labels of Wisconsin dataset, int ranged from 0 to 4.",
13 |                 "type": "int",
14 |                 "format": "Tensor",
15 |                 "file": "wisconsin.npz",
16 |                 "key": "node_class"
17 |             }
18 |         },
19 |         "Edge": {
20 |             "_Edge": {
21 |                 "file": "wisconsin.npz",
22 |                 "key": "edge"
23 |             }
24 |         },
25 |         "Graph": {
26 |             "_NodeList": {
27 |                 "file": "wisconsin.npz",
28 |                 "key": "node_list"
29 |             },
30 |             "_EdgeList": {
31 |                 "file": "wisconsin.npz",
32 |                 "key": "edge_list"
33 |             }
34 |         }
35 |     },
36 |     "citation": "@article{garcia2016using,\ntitle={Using fuzzy logic to leverage HTML markup for web page representation},\nauthor={Garcia-Plaza, Alberto P and Fresno, Victor and Unanue, Raquel Martinez and Zubiaga, Arkaitz},\njournal={IEEE Transactions on Fuzzy Systems},\nvolume={25},\nnumber={4},\npages={919--933},\nyear={2016},\npublisher={IEEE}\n}",
37 |     "is_heterogeneous": false
38 | }
39 | 


--------------------------------------------------------------------------------
/datasets/wisconsin/task_node_classification_1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "description": "Node classification on Wisconsin dataset. Webkb split.",
 3 |     "type": "NodeClassification",
 4 |     "feature": [
 5 |         "Node/NodeFeature"
 6 |     ],
 7 |     "target": "Node/NodeLabel",
 8 |     "num_classes": 5,
 9 |     "num_splits": 10,
10 |     "train_set": {
11 |         "file": "wisconsin_task.npz",
12 |         "key": "train_FOLD"
13 |     },
14 |     "val_set": {
15 |         "file": "wisconsin_task.npz",
16 |         "key": "val_FOLD"
17 |     },
18 |     "test_set": {
19 |         "file": "wisconsin_task.npz",
20 |         "key": "test_FOLD"
21 |     }
22 | }
23 | 


--------------------------------------------------------------------------------
/docs/.readthedocs.yaml:
--------------------------------------------------------------------------------
 1 | # .readthedocs.yaml
 2 | # Read the Docs configuration file
 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 4 | 
 5 | # Required
 6 | version: 2
 7 | 
 8 | # Set the version of Python and other tools you might need
 9 | build:
10 |   os: ubuntu-22.04
11 |   tools:
12 |     python: "3.11"
13 | 
14 | # Build documentation in the docs/ directory with Sphinx
15 | sphinx:
16 |   configuration: docs/source/conf.py
17 | 
18 | # We recommend specifying your dependencies to enable reproducible builds:
19 | # https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html
20 | python:
21 |   install:
22 |   - requirements: docs/requirements.txt
23 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = source
 9 | BUILDDIR      = build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=source
11 | set BUILDDIR=build
12 | 
13 | %SPHINXBUILD% >NUL 2>NUL
14 | if errorlevel 9009 (
15 | 	echo.
16 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
17 | 	echo.installed, then set the SPHINXBUILD environment variable to point
18 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
19 | 	echo.may add the Sphinx directory to PATH.
20 | 	echo.
21 | 	echo.If you don't have Sphinx installed, grab it from
22 | 	echo.https://www.sphinx-doc.org/
23 | 	exit /b 1
24 | )
25 | 
26 | if "%1" == "" goto help
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | sphinx-rtd-theme
2 | sphinx-copybutton
3 | numpy>=1.19
4 | scipy>=1.5
5 | torch>=1.10
6 | dgl>=0.6
7 | 


--------------------------------------------------------------------------------
/docs/source/_templates/classtemplate.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | .. role:: hidden
 3 |     :class: hidden-section
 4 | 
 5 | .. currentmodule:: {{ module }}
 6 | 
 7 | 
 8 | {{ name | underline}}
 9 | 
10 | .. autoclass:: {{ name }}
11 |     :show-inheritance:
12 |     :members:
13 | 
14 |     .. automethod:: __init__


--------------------------------------------------------------------------------
/docs/source/_templates/functemplate.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | .. role:: hidden
 3 |     :class: hidden-section
 4 | 
 5 | .. currentmodule:: {{ module }}
 6 | 
 7 | 
 8 | {{ name | underline}}
 9 | 
10 | .. autofunction:: {{ name }}


--------------------------------------------------------------------------------
/docs/source/api/dataset.rst:
--------------------------------------------------------------------------------
1 | .. _dataset:
2 | 
3 | gli.dataset
4 | ===========
5 | 
6 | .. currentmodule:: gli.dataset
7 | .. automodule:: gli.dataset
8 | 


--------------------------------------------------------------------------------
/docs/source/api/gli.rst:
--------------------------------------------------------------------------------
 1 | .. _dataloading:
 2 | 
 3 | gli.dataloading
 4 | ===============
 5 | 
 6 | .. currentmodule:: gli.dataloading
 7 | .. automodule:: gli.dataloading
 8 | 
 9 | Utility Function
10 | ----------------
11 | 
12 | .. autosummary::
13 |     :toctree: ../generated/
14 |     :nosignatures:
15 |     :template: functemplate.rst
16 | 
17 |     get_gli_dataset
18 |     get_gli_graph
19 |     get_gli_task
20 |     combine_graph_and_task


--------------------------------------------------------------------------------
/docs/source/api/graph.rst:
--------------------------------------------------------------------------------
 1 | .. _graph:
 2 | 
 3 | gli.graph
 4 | =========
 5 | 
 6 | .. currentmodule:: gli.graph
 7 | .. automodule:: gli.graph
 8 | 
 9 | Utility functions
10 | -----------------
11 | 
12 | .. autosummary::
13 |     :toctree: ../generated/
14 |     :nosignatures:
15 |     :template: functemplate.rst
16 | 
17 |     read_gli_graph


--------------------------------------------------------------------------------
/docs/source/api/io.rst:
--------------------------------------------------------------------------------
 1 | .. _io:
 2 | 
 3 | gli.io
 4 | ======
 5 | 
 6 | .. currentmodule:: gli.io
 7 | .. automodule:: gli.io
 8 | 
 9 | Attribute Base Class
10 | --------------------
11 | .. autosummary::
12 |     :toctree: ../generated/
13 |     :nosignatures:
14 |     :template: classtemplate.rst
15 | 
16 |     Attribute
17 | 
18 | Utility functions
19 | -----------------
20 | 
21 | .. autosummary::
22 |     :toctree: ../generated/
23 |     :nosignatures:
24 |     :template: functemplate.rst
25 | 
26 |     save_homograph
27 |     save_heterograph
28 |     save_task_node_regression
29 |     save_task_node_classification


--------------------------------------------------------------------------------
/docs/source/api/task.rst:
--------------------------------------------------------------------------------
 1 | .. _task:
 2 | 
 3 | gli.task
 4 | ========
 5 | 
 6 | .. currentmodule:: gli.task
 7 | .. automodule:: gli.task
 8 | 
 9 | Base Class
10 | ----------
11 | 
12 | .. autosummary::
13 |     :toctree: ../generated/
14 |     :nosignatures:
15 |     :template: classtemplate.rst
16 | 
17 |     GLITask
18 | 
19 | Utility Function
20 | ----------------
21 | 
22 | .. autosummary::
23 |     :toctree: ../generated/
24 |     :nosignatures:
25 |     :template: functemplate.rst
26 | 
27 |     read_gli_task
28 | 
29 | Available Tasks
30 | ---------------
31 | 
32 | .. autosummary::
33 |     :toctree: ../generated/
34 |     :nosignatures:
35 |     :template: classtemplate.rst
36 | 
37 |     NodeClassificationTask
38 |     NodeRegressionTask
39 |     GraphClassificationTask
40 |     GraphRegressionTask
41 |     LinkPredictionTask
42 |     TimeDependentLinkPredictionTask
43 |     KGEntityPredictionTask
44 |     KGRelationPredictionTask


--------------------------------------------------------------------------------
/docs/source/api/utils.rst:
--------------------------------------------------------------------------------
1 | .. _utils:
2 | 
3 | gli.utils
4 | =========
5 | 
6 | .. currentmodule:: gli.utils
7 | .. automodule:: gli.utils


--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
 1 | """Configuration file for the Sphinx documentation builder.
 2 | 
 3 | For the full list of built-in configuration values, see the documentation:
 4 | https://www.sphinx-doc.org/en/master/usage/configuration.html
 5 | """
 6 | # import sphinx_rtd_theme
 7 | import os
 8 | import sys
 9 | 
10 | sys.path.insert(0, os.path.abspath('../..'))
11 | # -- Project information -----------------------------------------------------
12 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
13 | 
14 | project = 'GLI'
15 | copyright = '2022, GLI Team'  # pylint: disable=redefined-builtin
16 | author = 'GLI Team'
17 | release = '0.1'
18 | 
19 | # -- General configuration ---------------------------------------------------
20 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
21 | 
22 | extensions = [
23 |     'sphinx.ext.autodoc',
24 |     'sphinx.ext.autosummary',
25 |     'sphinx.ext.napoleon',
26 |     'sphinx_copybutton'
27 | ]
28 | 
29 | templates_path = ['_templates']
30 | exclude_patterns = []
31 | 
32 | autosummary_generate = True  # Turn on sphinx.ext.autosummary
33 | 
34 | # -- Options for HTML output -------------------------------------------------
35 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
36 | 
37 | html_theme = 'sphinx_rtd_theme'
38 | html_static_path = []
39 | 


--------------------------------------------------------------------------------
/docs/source/format/citation.rst:
--------------------------------------------------------------------------------
1 | .. _citation:
2 | 
3 | Citation and License
4 | ====================


--------------------------------------------------------------------------------
/docs/source/generated/gli.dataloading.combine_graph_and_task.rst:
--------------------------------------------------------------------------------
 1 | ﻿
 2 | .. role:: hidden
 3 |     :class: hidden-section
 4 | 
 5 | .. currentmodule:: gli.dataloading
 6 | 
 7 | 
 8 | combine_graph_and_task
 9 | ======================
10 | 
11 | .. autofunction:: combine_graph_and_task


--------------------------------------------------------------------------------
/docs/source/generated/gli.dataloading.get_gli_dataset.rst:
--------------------------------------------------------------------------------
 1 | ﻿
 2 | .. role:: hidden
 3 |     :class: hidden-section
 4 | 
 5 | .. currentmodule:: gli.dataloading
 6 | 
 7 | 
 8 | get_gli_dataset
 9 | ===============
10 | 
11 | .. autofunction:: get_gli_dataset


--------------------------------------------------------------------------------
/docs/source/generated/gli.dataloading.get_gli_graph.rst:
--------------------------------------------------------------------------------
 1 | ﻿
 2 | .. role:: hidden
 3 |     :class: hidden-section
 4 | 
 5 | .. currentmodule:: gli.dataloading
 6 | 
 7 | 
 8 | get_gli_graph
 9 | =============
10 | 
11 | .. autofunction:: get_gli_graph


--------------------------------------------------------------------------------
/docs/source/generated/gli.dataloading.get_gli_task.rst:
--------------------------------------------------------------------------------
 1 | ﻿
 2 | .. role:: hidden
 3 |     :class: hidden-section
 4 | 
 5 | .. currentmodule:: gli.dataloading
 6 | 
 7 | 
 8 | get_gli_task
 9 | ============
10 | 
11 | .. autofunction:: get_gli_task


--------------------------------------------------------------------------------
/docs/source/generated/gli.graph.read_gli_graph.rst:
--------------------------------------------------------------------------------
 1 | ﻿
 2 | .. role:: hidden
 3 |     :class: hidden-section
 4 | 
 5 | .. currentmodule:: gli.graph
 6 | 
 7 | 
 8 | read_gli_graph
 9 | ==============
10 | 
11 | .. autofunction:: read_gli_graph


--------------------------------------------------------------------------------
/docs/source/generated/gli.io.Attribute.rst:
--------------------------------------------------------------------------------
 1 | ﻿
 2 | .. role:: hidden
 3 |     :class: hidden-section
 4 | 
 5 | .. currentmodule:: gli.io
 6 | 
 7 | 
 8 | Attribute
 9 | =========
10 | 
11 | .. autoclass:: Attribute
12 |     :show-inheritance:
13 |     :members:
14 | 
15 |     .. automethod:: __init__


--------------------------------------------------------------------------------
/docs/source/generated/gli.io.save_heterograph.rst:
--------------------------------------------------------------------------------
 1 | ﻿
 2 | .. role:: hidden
 3 |     :class: hidden-section
 4 | 
 5 | .. currentmodule:: gli.io
 6 | 
 7 | 
 8 | save_heterograph
 9 | ================
10 | 
11 | .. autofunction:: save_heterograph


--------------------------------------------------------------------------------
/docs/source/generated/gli.io.save_homograph.rst:
--------------------------------------------------------------------------------
 1 | ﻿
 2 | .. role:: hidden
 3 |     :class: hidden-section
 4 | 
 5 | .. currentmodule:: gli.io
 6 | 
 7 | 
 8 | save_homograph
 9 | ==============
10 | 
11 | .. autofunction:: save_homograph


--------------------------------------------------------------------------------
/docs/source/generated/gli.io.save_task_node_classification.rst:
--------------------------------------------------------------------------------
 1 | ﻿
 2 | .. role:: hidden
 3 |     :class: hidden-section
 4 | 
 5 | .. currentmodule:: gli.io
 6 | 
 7 | 
 8 | save_task_node_classification
 9 | =============================
10 | 
11 | .. autofunction:: save_task_node_classification


--------------------------------------------------------------------------------
/docs/source/generated/gli.io.save_task_node_regression.rst:
--------------------------------------------------------------------------------
 1 | ﻿
 2 | .. role:: hidden
 3 |     :class: hidden-section
 4 | 
 5 | .. currentmodule:: gli.io
 6 | 
 7 | 
 8 | save_task_node_regression
 9 | =========================
10 | 
11 | .. autofunction:: save_task_node_regression


--------------------------------------------------------------------------------
/docs/source/generated/gli.task.GLITask.rst:
--------------------------------------------------------------------------------
 1 | ﻿
 2 | .. role:: hidden
 3 |     :class: hidden-section
 4 | 
 5 | .. currentmodule:: gli.task
 6 | 
 7 | 
 8 | GLITask
 9 | =======
10 | 
11 | .. autoclass:: GLITask
12 |     :show-inheritance:
13 |     :members:
14 | 
15 |     .. automethod:: __init__


--------------------------------------------------------------------------------
/docs/source/generated/gli.task.GraphClassificationTask.rst:
--------------------------------------------------------------------------------
 1 | ﻿
 2 | .. role:: hidden
 3 |     :class: hidden-section
 4 | 
 5 | .. currentmodule:: gli.task
 6 | 
 7 | 
 8 | GraphClassificationTask
 9 | =======================
10 | 
11 | .. autoclass:: GraphClassificationTask
12 |     :show-inheritance:
13 |     :members:
14 | 
15 |     .. automethod:: __init__


--------------------------------------------------------------------------------
/docs/source/generated/gli.task.GraphRegressionTask.rst:
--------------------------------------------------------------------------------
 1 | ﻿
 2 | .. role:: hidden
 3 |     :class: hidden-section
 4 | 
 5 | .. currentmodule:: gli.task
 6 | 
 7 | 
 8 | GraphRegressionTask
 9 | ===================
10 | 
11 | .. autoclass:: GraphRegressionTask
12 |     :show-inheritance:
13 |     :members:
14 | 
15 |     .. automethod:: __init__


--------------------------------------------------------------------------------
/docs/source/generated/gli.task.KGEntityPredictionTask.rst:
--------------------------------------------------------------------------------
 1 | ﻿
 2 | .. role:: hidden
 3 |     :class: hidden-section
 4 | 
 5 | .. currentmodule:: gli.task
 6 | 
 7 | 
 8 | KGEntityPredictionTask
 9 | ======================
10 | 
11 | .. autoclass:: KGEntityPredictionTask
12 |     :show-inheritance:
13 |     :members:
14 | 
15 |     .. automethod:: __init__


--------------------------------------------------------------------------------
/docs/source/generated/gli.task.KGRelationPredictionTask.rst:
--------------------------------------------------------------------------------
 1 | ﻿
 2 | .. role:: hidden
 3 |     :class: hidden-section
 4 | 
 5 | .. currentmodule:: gli.task
 6 | 
 7 | 
 8 | KGRelationPredictionTask
 9 | ========================
10 | 
11 | .. autoclass:: KGRelationPredictionTask
12 |     :show-inheritance:
13 |     :members:
14 | 
15 |     .. automethod:: __init__


--------------------------------------------------------------------------------
/docs/source/generated/gli.task.LinkPredictionTask.rst:
--------------------------------------------------------------------------------
 1 | ﻿
 2 | .. role:: hidden
 3 |     :class: hidden-section
 4 | 
 5 | .. currentmodule:: gli.task
 6 | 
 7 | 
 8 | LinkPredictionTask
 9 | ==================
10 | 
11 | .. autoclass:: LinkPredictionTask
12 |     :show-inheritance:
13 |     :members:
14 | 
15 |     .. automethod:: __init__


--------------------------------------------------------------------------------
/docs/source/generated/gli.task.NodeClassificationTask.rst:
--------------------------------------------------------------------------------
 1 | ﻿
 2 | .. role:: hidden
 3 |     :class: hidden-section
 4 | 
 5 | .. currentmodule:: gli.task
 6 | 
 7 | 
 8 | NodeClassificationTask
 9 | ======================
10 | 
11 | .. autoclass:: NodeClassificationTask
12 |     :show-inheritance:
13 |     :members:
14 | 
15 |     .. automethod:: __init__


--------------------------------------------------------------------------------
/docs/source/generated/gli.task.NodeRegressionTask.rst:
--------------------------------------------------------------------------------
 1 | ﻿
 2 | .. role:: hidden
 3 |     :class: hidden-section
 4 | 
 5 | .. currentmodule:: gli.task
 6 | 
 7 | 
 8 | NodeRegressionTask
 9 | ==================
10 | 
11 | .. autoclass:: NodeRegressionTask
12 |     :show-inheritance:
13 |     :members:
14 | 
15 |     .. automethod:: __init__


--------------------------------------------------------------------------------
/docs/source/generated/gli.task.TimeDependentLinkPredictionTask.rst:
--------------------------------------------------------------------------------
 1 | ﻿
 2 | .. role:: hidden
 3 |     :class: hidden-section
 4 | 
 5 | .. currentmodule:: gli.task
 6 | 
 7 | 
 8 | TimeDependentLinkPredictionTask
 9 | ===============================
10 | 
11 | .. autoclass:: TimeDependentLinkPredictionTask
12 |     :show-inheritance:
13 |     :members:
14 | 
15 |     .. automethod:: __init__


--------------------------------------------------------------------------------
/docs/source/generated/gli.task.read_gli_task.rst:
--------------------------------------------------------------------------------
 1 | ﻿
 2 | .. role:: hidden
 3 |     :class: hidden-section
 4 | 
 5 | .. currentmodule:: gli.task
 6 | 
 7 | 
 8 | read_gli_task
 9 | =============
10 | 
11 | .. autofunction:: read_gli_task


--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
 1 | .. GLI documentation master file, created by
 2 |    sphinx-quickstart on Sun Oct 30 13:29:10 2022.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Welcome to GLI's Tutorial and Documentation!
 7 | ============================================
 8 | 
 9 | GLI is an easy-to-use graph learning platform with unique features that can better serve the dataset contributors, in comparison to existing graph learning libraries. It aims to ease and incentivize the creation and curation of datasets.
10 | 
11 | Highlighted Features
12 | --------------------
13 | 
14 | Standard Data Format
15 | ~~~~~~~~~~~~~~~~~~~~
16 | 
17 | GLI defines a standard data format that has efficient storage and access to graphs. It unifies the storage for graphs of different scales and heterogeneity and is thus flexible to accommodate various graph-structured data.
18 | 
19 | Explicit Separation of Data Storage and Task Configuration
20 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
21 | 
22 | GLI makes an explicit separation between the data storage and the task configuration for graph learning. i.e., Multiple tasks can be performed on the same dataset, or the same task can be performed on different datasets. The separation between graphs and tasks further allows users to use general datasets bound to every type of task that can be applied to every graph dataset.
23 | 
24 | .. toctree::
25 |    :maxdepth: 1
26 |    :caption: Get Started
27 |    :hidden:
28 |    :glob:
29 | 
30 | 
31 |    start/install
32 |    start/tutorial
33 |    start/contribute
34 | 
35 | 
36 | .. toctree::
37 |    :maxdepth: 1
38 |    :caption: API Reference
39 |    :hidden:
40 |    :glob:
41 |    :titlesonly:
42 | 
43 |    api/gli
44 |    api/task
45 |    api/graph
46 |    api/dataset
47 |    api/utils
48 |    api/io
49 | 
50 | 
51 | .. toctree::
52 |    :maxdepth: 2
53 |    :caption: File Format
54 |    :hidden:
55 |    :glob:
56 | 
57 | 
58 |    format/file
59 |    format/citation


--------------------------------------------------------------------------------
/docs/source/start/install.rst:
--------------------------------------------------------------------------------
 1 | Installation
 2 | ===============================
 3 | 
 4 | Currently, we support installation from the source.
 5 | 
 6 | .. code:: bash
 7 | 
 8 |    git clone https://github.com/Graph-Learning-Benchmarks/gli.git
 9 |    cd gli
10 |    pip install -e .           # basic requirements
11 |    pip install -e ".[test]"   # test-related requirements
12 |    pip install -e ".[doc]"    # doc-related requirements
13 |    pip install -e ".[full]"   # all requirements
14 | 
15 | To test the installation, run the following command:
16 | 
17 | .. code:: bash
18 | 
19 |    python example.py --graph cora --task NodeClassification
20 | 
21 | The output should be like this:
22 | 
23 | ::
24 | 
25 |    > Graph(s) loading takes 0.0196 seconds and uses 0.9788 MB.
26 |    > Task loading takes 0.0016 seconds and uses 0.1218 MB.
27 |    > Combining(s) graph and task takes 0.0037 seconds and uses 0.0116 MB.
28 |    Dataset("CORA dataset. NodeClassification", num_graphs=1, save_path=/Users/jimmy/.dgl/CORA dataset. NodeClassification)**


--------------------------------------------------------------------------------
/gli/__init__.py:
--------------------------------------------------------------------------------
 1 | """Root entry."""
 2 | from .config import *
 3 | from . import dataloading
 4 | from . import dataset
 5 | from . import graph
 6 | from . import task
 7 | from . import utils
 8 | from .dataloading import get_gli_graph, get_gli_task, \
 9 |     get_gli_dataset, combine_graph_and_task
10 | 


--------------------------------------------------------------------------------
/gli/config.py:
--------------------------------------------------------------------------------
1 | """Configuration file."""
2 | from os.path import realpath, dirname, expanduser, join
3 | 
4 | ROOT_PATH = dirname(dirname(realpath(__file__)))
5 | WARNING_DENSE_SIZE = 1e9
6 | DATASET_PATH = join(expanduser("~"), ".gli/datasets")
7 | GLOBAL_FILE_URL = "https://jiaqima.github.io/gli/global_urls.json"
8 | SERVER_IP = "http://34.211.28.138"
9 | 


--------------------------------------------------------------------------------
/gli/io/__init__.py:
--------------------------------------------------------------------------------
 1 | """Root entry for gli.io."""
 2 | from .graph import save_graph, save_homograph, save_heterograph, Attribute
 3 | from .node_task import save_task_node_classification, save_task_node_regression
 4 | from .edge_task import save_task_link_prediction, \
 5 |     save_task_time_dependent_link_prediction
 6 | from .graph_task import save_task_graph_classification, \
 7 |     save_task_graph_regression
 8 | from .kg_task import save_task_kg_entity_prediction, \
 9 |     save_task_kg_relation_prediction
10 | 


--------------------------------------------------------------------------------
/img/GLI-Contribution-Workflow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Graph-Learning-Benchmarks/gli/8f2065396d59e6b4aaa371e997c3a43f91448429/img/GLI-Contribution-Workflow.png


--------------------------------------------------------------------------------
/img/GLI-File-Structure.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Graph-Learning-Benchmarks/gli/8f2065396d59e6b4aaa371e997c3a43f91448429/img/GLI-File-Structure.png


--------------------------------------------------------------------------------
/img/flowchart.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Graph-Learning-Benchmarks/gli/8f2065396d59e6b4aaa371e997c3a43f91448429/img/flowchart.png


--------------------------------------------------------------------------------
/img/gli-banner.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Graph-Learning-Benchmarks/gli/8f2065396d59e6b4aaa371e997c3a43f91448429/img/gli-banner.jpg


--------------------------------------------------------------------------------
/img/gli-banner.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Graph-Learning-Benchmarks/gli/8f2065396d59e6b4aaa371e997c3a43f91448429/img/gli-banner.png


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["setuptools"]
 3 | build-backend = "setuptools.build_meta"
 4 | 
 5 | [project]
 6 | name = "gl-indexer"
 7 | version = "0.1.0"
 8 | authors = [
 9 |     { name = "Jiaqi Ma", email = "jiaqima@umich.edu" },
10 |     { name = "Xingjian Zhang", email = "jimmyzxj@umich.edu" },
11 | ]
12 | description = "Contributor-friendly and metadata-rich platform for graph learning benchmarks."
13 | readme = "README.md"
14 | requires-python = ">=3.6"
15 | classifiers = [
16 |     "Programming Language :: Python :: 3",
17 |     "License :: OSI Approved :: MIT License",
18 |     "Operating System :: OS Independent",
19 | ]
20 | dependencies = ["numpy>=1.19", "scipy>=1.5", "torch>=1.10", "dgl>=0.6"]
21 | optional-dependencies = { test = [
22 |     "pytest",
23 |     "pydocstyle",
24 |     "pycodestyle",
25 |     "pylint",
26 |     "pyyaml",
27 |     "pre-commit",
28 | ], doc = [
29 |     "sphinx",
30 |     "sphinx-rtd-theme",
31 |     "sphinx_copybutton"
32 | ], tag = [
33 |     "powerlaw",
34 | ] }
35 | 
36 | [project.urls]
37 | "Homepage" = "https://github.com/pypa/sampleproject"
38 | 
39 | [tool.setuptools.packages.find]
40 | where = ["."]
41 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy>=1.19
2 | scipy>=1.5
3 | torch>=1.10
4 | dgl>=0.6


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | """Setup script for the package."""
2 | from setuptools import setup
3 | 
4 | setup()
5 | 


--------------------------------------------------------------------------------
/templates/dataset-folder/LICENSE:
--------------------------------------------------------------------------------
1 | <!-- Please choose a suitable LICENSE file for the dataset. -->


--------------------------------------------------------------------------------
/templates/dataset-folder/README.md:
--------------------------------------------------------------------------------
 1 | # <Dataset Name>
 2 | 
 3 | <!-- Replace the title with the dataset name. -->
 4 | 
 5 | ## Dataset Description
 6 | 
 7 | <!-- A concise description of the dataset. -->
 8 | 
 9 | #### Citation
10 | 
11 | - Original Source
12 | 	+ [Website](<URL to website>)
13 | 	+ LICENSE: [<license type>](<URL to license>)
14 | 
15 | <!-- The first work that created the dataset. -->
16 | 
17 | ```
18 | 
19 | ```
20 | 
21 | - Current Version
22 | 	+ [Website](<URL to website>)
23 | 	+ LICENSE: [<license type>](<URL to license>)
24 | 
25 | <!-- The work that is directly responsible for the dataset in this folder. -->
26 | 
27 | ```
28 | 
29 | ```
30 | 
31 | - Previous Version
32 | 	+ [Website](<URL to website>)
33 | 	+ LICENSE: [<license type>](<URL to license>)
34 | 
35 | <!-- Any intermediate versions between the original and the current version. -->
36 | <!-- There can be no or multiple "Previous Version". -->
37 | 
38 | ```
39 | 
40 | ```
41 | 
42 | <!-- Insert the BibTeX citation into the above code blocks. -->
43 | 
44 | ## Available Tasks
45 | 
46 | <!-- This section lists the available tasks -->
47 | 
48 | ### <Task Name>
49 | 
50 | <!-- Replace the title with the task name. -->
51 | 
52 | - Task type: `<TaskType>`
53 | 
54 | <!-- Replace the [TaskType] to be the predefined task types. -->
55 | 
56 | #### Citation
57 | 
58 | ```
59 | 
60 | ```
61 | 
62 | <!-- Insert the BibTeX citation into the above code block. -->
63 | 
64 | ## Preprocessing
65 | 
66 | <!-- A concise description of transformation from raw data to GLI format. -->
67 | 
68 | ### Requirements
69 | 
70 | ```
71 | 
72 | ```
73 | 
74 | <!-- Add required packages above. -->
75 | 


--------------------------------------------------------------------------------
/templates/dataset-folder/preprocess.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "Please use this file to write codes that transform the data files and configuration files from the raw data. Typically this script should include following parts:\n",
 8 |     "1. Download raw data\n",
 9 |     "2. Process raw data\n",
10 |     "3. Convert the raw data into gli format\n",
11 |     "4. save the gli format data into\n",
12 |     "   1. configuration files (e.g., metadata.json, ...)\n",
13 |     "   2. data files (e.g., cora.npz, ...)\n",
14 |     "\n",
15 |     "_You can use a Python script (*.py) to preprocess, too._"
16 |    ]
17 |   },
18 |   {
19 |    "cell_type": "markdown",
20 |    "metadata": {},
21 |    "source": []
22 |   }
23 |  ],
24 |  "metadata": {
25 |   "kernelspec": {
26 |    "display_name": "Python 3 (ipykernel)",
27 |    "language": "python",
28 |    "name": "python3"
29 |   },
30 |   "language_info": {
31 |    "codemirror_mode": {
32 |     "name": "ipython",
33 |     "version": 3
34 |    },
35 |    "file_extension": ".py",
36 |    "mimetype": "text/x-python",
37 |    "name": "python",
38 |    "nbconvert_exporter": "python",
39 |    "pygments_lexer": "ipython3",
40 |    "version": "3.8.13"
41 |   }
42 |  },
43 |  "nbformat": 4,
44 |  "nbformat_minor": 2
45 | }
46 | 


--------------------------------------------------------------------------------
/tests/config.yaml:
--------------------------------------------------------------------------------
1 | large_dataset_to_skip: ["wiki", "ogbg-code2"]
2 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | """Conftest to enable parameter into test."""
 2 | 
 3 | 
 4 | def pytest_addoption(parser):
 5 |     """Adopt parameter."""
 6 |     parser.addoption(
 7 |         "--dataset",
 8 |         action="append",
 9 |         default=[],
10 |         help="list of datasets to pass to test_data_loading",
11 |     )
12 | 
13 | 
14 | def pytest_generate_tests(metafunc):
15 |     """Generate tests."""
16 |     if "dataset" in metafunc.fixturenames:
17 |         metafunc.parametrize("dataset", metafunc.config.getoption("dataset"))
18 | 


--------------------------------------------------------------------------------
/tests/preprocess.py:
--------------------------------------------------------------------------------
 1 | """Preprocess before tests."""
 2 | import os
 3 | import shutil
 4 | import subprocess
 5 | 
 6 | DATAFILES_URL = ""
 7 | NUM_TESTS_THRESHOLD = 999999999999999  # no need to preprocess yet
 8 | 
 9 | 
10 | def _prepare_data_files():
11 |     if os.path.exists("temp/changed_datasets"):
12 |         with open("temp/changed_datasets", encoding="utf-8") as f:
13 |             dataset_dir_list = f.read().split()
14 |         if len(dataset_dir_list) < NUM_TESTS_THRESHOLD:
15 |             # do not download the combined data files if # of tests is small
16 |             return
17 |     else:
18 |         return
19 |     out = "datafiles.tar"
20 |     url = DATAFILES_URL
21 |     subprocess.run(["wget", "-q", "-O", out, url], check=True)
22 |     shutil.unpack_archive(out)
23 |     os.remove(out)
24 | 
25 |     for dataset in os.listdir("datafiles/"):
26 |         data_dir = os.path.join("datafiles/", dataset)
27 |         dataset_dir = os.path.join("datasets/", dataset)
28 |         if not os.path.isdir(data_dir):
29 |             continue
30 |         for data_file_name in os.listdir(data_dir):
31 |             file_type = os.path.splitext(data_file_name)[-1]
32 |             if file_type == ".npz":
33 |                 data_file_path = os.path.join(data_dir, data_file_name)
34 |                 dataset_file_path = os.path.join(dataset_dir, data_file_name)
35 |                 shutil.move(data_file_path, dataset_file_path)
36 |     shutil.rmtree("datafiles/")
37 | 
38 | 
39 | if __name__ == "__main__":
40 |     _prepare_data_files()
41 | 


--------------------------------------------------------------------------------
/tests/test_data_loading.py:
--------------------------------------------------------------------------------
 1 | """Automated test for data files in examples/."""
 2 | import os
 3 | import fnmatch
 4 | import json
 5 | import pytest
 6 | import gli
 7 | from gli.task import SUPPORTED_TASK_TYPES
 8 | from utils import find_datasets, load_config_file
 9 | 
10 | 
11 | @pytest.mark.parametrize("dataset_name", find_datasets())
12 | def test_data_loading(dataset_name):
13 |     """Test data loading for a given dataset.
14 | 
15 |     Test if get_gli_graph, get_gli_task, and get_gli_dataset
16 |     can be applied successfully.
17 |     """
18 |     # temporary skipping all large datasets
19 |     dataset = dataset_name
20 |     test_cfg = load_config_file("tests/config.yaml")
21 |     if dataset in test_cfg["large_dataset_to_skip"]:
22 |         return
23 | 
24 |     directory = os.getcwd() + "/datasets/" + dataset
25 |     task_list = []
26 |     for file in os.listdir(directory):
27 |         if fnmatch.fnmatch(file, "task*.json"):
28 |             with open(directory + "/" + file,  encoding="utf-8") as f:
29 |                 task_dict = json.load(f)
30 |                 if task_dict["type"] not in SUPPORTED_TASK_TYPES:
31 |                     f.close()
32 |                     return
33 |             task_list.append(task_dict["type"])
34 |     try:
35 |         _ = gli.dataloading.get_gli_graph(dataset)
36 |     except (AssertionError,
37 |             AttributeError,
38 |             ModuleNotFoundError,
39 |             IndexError,
40 |             ValueError) as e:
41 |         print(e, dataset, "graph loading failed")
42 |         assert False
43 | 
44 |     for task in task_list:
45 |         try:
46 |             _ = gli.dataloading.get_gli_task(dataset, task)
47 |         except (AssertionError,
48 |                 AttributeError,
49 |                 ModuleNotFoundError,
50 |                 IndexError,
51 |                 ValueError) as e:
52 |             print(e, dataset, task, "task loading failed")
53 |             assert False
54 | 
55 |         try:
56 |             gli.dataloading.get_gli_dataset(dataset, task)
57 |         except (AssertionError,
58 |                 AttributeError,
59 |                 ModuleNotFoundError,
60 |                 IndexError,
61 |                 ValueError) as e:
62 |             print(e, dataset, "combine graph and task loading failed")
63 |             assert False
64 | 


--------------------------------------------------------------------------------
/tests/test_task.py:
--------------------------------------------------------------------------------
 1 | """Automated test for metadata.json in datasets/."""
 2 | import pytest
 3 | import os
 4 | import json
 5 | from utils import find_datasets, check_if_task_json, \
 6 |     SUPPORTED_TASK_REQUIRED_KEYS_HASH, find_datasets_abs_path
 7 | 
 8 | 
 9 | def check_essential_keys_task_json(dic):
10 |     """Check if task json has all essential keys."""
11 |     missing_keys = []
12 |     if "type" not in dic:
13 |         missing_keys.append("type")
14 |     else:
15 |         task_type = dic.get("type")
16 |         for req_keywords in SUPPORTED_TASK_REQUIRED_KEYS_HASH[task_type]:
17 |             if req_keywords == "feature":
18 |                 if req_keywords not in dic:
19 |                     missing_keys.append(req_keywords)
20 |             else:
21 |                 if dic.get(req_keywords, None) is None:
22 |                     missing_keys.append(req_keywords)
23 |     return missing_keys
24 | 
25 | 
26 | @pytest.mark.parametrize("dataset_name", find_datasets())
27 | def test_task_json_content(dataset_name):
28 |     """Check if task json meets requirements."""
29 |     file_list = []
30 |     directory = find_datasets_abs_path(dataset_name)
31 | 
32 |     for root, _, file in os.walk(directory):
33 |         if isinstance(file, str):
34 |             file.append(os.path.join(root, file))
35 |             file_list.append(file)
36 |         else:
37 |             for f in file:
38 |                 file_list.append(os.path.join(root, f))
39 |     for file in file_list:
40 |         if check_if_task_json(file):
41 |             with open(file, encoding="utf8") as json_file:
42 |                 data = json.load(json_file)
43 |                 missing_keys = check_essential_keys_task_json(data)
44 |                 if len(missing_keys) != 0:
45 |                     print(file + " misses following keys")
46 |                     print(missing_keys)
47 |                 assert len(missing_keys) == 0
48 | 


--------------------------------------------------------------------------------
/tests/training_utils.py:
--------------------------------------------------------------------------------
 1 | """Functions used in test_training."""
 2 | import os
 3 | import fnmatch
 4 | import json
 5 | 
 6 | 
 7 | def get_cfg(dataset):
 8 |     """Return fixed dict to test_training."""
 9 |     args = {
10 |         "model": "GCN",
11 |         "dataset": dataset,
12 |         "task": "NodeClassification",
13 |         "gpu": -1
14 |     }
15 | 
16 |     model_cfg = {
17 |         "num_layers": 2,
18 |         "num_hidden": 8,
19 |         "dropout": .6
20 |     }
21 | 
22 |     train_cfg = {
23 |         "loss_fun": "cross_entropy",
24 |         "dataset": {
25 |             "self_loop": True,
26 |             "to_dense": True
27 |         },
28 |         "optim": {
29 |             "lr": .005,
30 |             "weight_decay": 0.0005
31 |         },
32 |         "num_trials": 1,
33 |         "max_epoch": 3
34 |     }
35 |     return args, model_cfg, train_cfg
36 | 
37 | 
38 | def check_multiple_split_v2(dataset):
39 |     """Check whether the dataset has multiple splits."""
40 |     print()
41 |     dataset_directory = os.getcwd() \
42 |         + "/datasets/" + dataset
43 |     for file in os.listdir(dataset_directory):
44 |         if fnmatch.fnmatch(file, "task*.json"):
45 |             with open(dataset_directory + "/" + file,  encoding="utf-8") as f:
46 |                 task_dict = json.load(f)
47 |                 if "num_splits" in task_dict and task_dict["num_splits"] > 1:
48 |                     return 1
49 |                 else:
50 |                     return 0
51 | 
52 | 
53 | def check_dataset_task(dataset, target_task):
54 |     """Check whether the dataset support target_task."""
55 |     directory = os.getcwd() + "/datasets/" + dataset
56 |     for file in os.listdir(directory):
57 |         if fnmatch.fnmatch(file, "task*.json"):
58 |             with open(directory + "/" + file,  encoding="utf-8") as f:
59 |                 task_dict = json.load(f)
60 |                 if task_dict["type"] == target_task:
61 |                     return True
62 |     return False
63 | 
64 | 
65 | def get_label_number(labels):
66 |     """Return the label number of dataset."""
67 |     if len(labels.shape) > 1:
68 |         return labels.shape[1]
69 |     else:
70 |         return 1
71 | 


--------------------------------------------------------------------------------