├── .flake8 ├── .github ├── ISSUE_TEMPLATE │ ├── bug-report.md │ ├── documentation.md │ ├── feature-request.md │ ├── installation.md │ └── question-help.md └── PULL_REQUEST_TEMPLATE.md ├── .gitignore ├── .gitmodules ├── .pre-commit-config.yaml ├── .readthedocs.yml ├── .travis.yml ├── LICENSE ├── MANIFEST.in ├── README.md ├── README_CN.md ├── cogdl ├── __init__.py ├── configs.py ├── data │ ├── __init__.py │ ├── batch.py │ ├── data.py │ ├── dataloader.py │ ├── dataset.py │ └── sampler.py ├── datasets │ ├── README.md │ ├── __init__.py │ ├── customized_data.py │ ├── gatne.py │ ├── gcc_data.py │ ├── geom_data.py │ ├── grb_data.py │ ├── gtn_data.py │ ├── han_data.py │ ├── kg_data.py │ ├── matlab_matrix.py │ ├── oagbert_data.py │ ├── ogb.py │ ├── planetoid_data.py │ ├── rd2cd_data.py │ ├── rec_data.py │ ├── saint_data.py │ ├── stgat_data.py │ ├── stgcn_data.py │ └── tu_data.py ├── experiments.py ├── layers │ ├── __init__.py │ ├── actgcn_layer.py │ ├── actgcnii_layer.py │ ├── actlinear_layer.py │ ├── actmlp_layer.py │ ├── actsage_layer.py │ ├── base_layer.py │ ├── deepergcn_layer.py │ ├── disengcn_layer.py │ ├── gat_layer.py │ ├── gat_layerii.py │ ├── gcn_layer.py │ ├── gcn_layerii.py │ ├── gcnii_layer.py │ ├── gin_layer.py │ ├── gine_layer.py │ ├── han_layer.py │ ├── jittor │ │ ├── __init__.py │ │ └── gcn_layer.py │ ├── mixhop_layer.py │ ├── mlp_layer.py │ ├── pprgo_layer.py │ ├── reversible_layer.py │ ├── rgcn_layer.py │ ├── sage_layer.py │ ├── saint_layer.py │ ├── se_layer.py │ ├── set2set.py │ ├── sgc_layer.py │ ├── stgat_layer.py │ └── stgcn_layer.py ├── loggers │ ├── __init__.py │ ├── base_logger.py │ ├── tensorboard_logger.py │ └── wandb_logger.py ├── models │ ├── README.md │ ├── __init__.py │ ├── base_model.py │ ├── emb │ │ ├── __init__.py │ │ ├── complex.py │ │ ├── deepwalk.py │ │ ├── dgk.py │ │ ├── distmult.py │ │ ├── dngr.py │ │ ├── gatne.py │ │ ├── graph2vec.py │ │ ├── grarep.py │ │ ├── hin2vec.py │ │ ├── hope.py │ │ ├── knowledge_base.py │ │ ├── line.py │ │ ├── metapath2vec.py │ │ ├── netmf.py │ │ ├── netsmf.py │ │ ├── node2vec.py │ │ ├── prone.py │ │ ├── pronepp.py │ │ ├── pte.py │ │ ├── rotate.py │ │ ├── sdne.py │ │ ├── spectral.py │ │ └── transe.py │ └── nn │ │ ├── __init__.py │ │ ├── actgcn.py │ │ ├── agc.py │ │ ├── autognn.py │ │ ├── compgcn.py │ │ ├── correct_smooth.py │ │ ├── daegc.py │ │ ├── deepergcn.py │ │ ├── dgi.py │ │ ├── diffpool.py │ │ ├── disengcn.py │ │ ├── drgat.py │ │ ├── drgcn.py │ │ ├── dropedge_gcn.py │ │ ├── gae.py │ │ ├── gat.py │ │ ├── gcc_model.py │ │ ├── gcn.py │ │ ├── gcnii.py │ │ ├── gcnmix.py │ │ ├── gdc_gcn.py │ │ ├── gin.py │ │ ├── grace.py │ │ ├── grand.py │ │ ├── graph_unet.py │ │ ├── graphsage.py │ │ ├── graphsaint.py │ │ ├── gtn.py │ │ ├── han.py │ │ ├── infograph.py │ │ ├── m3s.py │ │ ├── mixhop.py │ │ ├── mlp.py │ │ ├── moe_gcn.py │ │ ├── mvgrl.py │ │ ├── patchy_san.py │ │ ├── ppnp.py │ │ ├── pprgo.py │ │ ├── revgcn.py │ │ ├── rgcn.py │ │ ├── sagn.py │ │ ├── sgc.py │ │ ├── sign.py │ │ ├── sortpool.py │ │ ├── srgcn.py │ │ ├── stgat.py │ │ └── stgcn.py ├── oag │ ├── README.md │ ├── __init__.py │ ├── bert_model.py │ ├── dual_position_bert_model.py │ ├── oagbert.py │ ├── oagbert_metainfo.py │ └── utils.py ├── operators │ ├── __init__.py │ ├── edge_softmax.py │ ├── edge_softmax │ │ ├── edge_softmax.cc │ │ └── edge_softmax.cu │ ├── fused_gat.py │ ├── jt_spmm.py │ ├── linear.py │ ├── mhspmm.py │ ├── ops.py │ ├── sample.py │ ├── sample │ │ └── sample.cpp │ ├── scatter_max.py │ ├── scatter_max │ │ ├── scatter_max.cc │ │ └── scatter_max.cu │ ├── spmm.py │ └── spmm │ │ ├── computeUtil.h │ │ ├── mhTranspose.cpp │ │ ├── mhTranspose.cu │ │ ├── multiheadSddmm.cpp │ │ ├── multiheadSddmm.cu │ │ ├── multiheadSpmm.cpp │ │ ├── multiheadSpmm.cu │ │ ├── sddmm.cpp │ │ ├── sddmm_kernel.cu │ │ ├── spmm.cpp │ │ ├── spmm_cpu.cpp │ │ └── spmm_kernel.cu ├── options.py ├── pipelines.py ├── trainer │ ├── __init__.py │ ├── controller │ │ ├── __init__.py │ │ ├── data_controller.py │ │ └── training_controller.py │ ├── embed_trainer.py │ ├── trainer.py │ └── trainer_utils.py ├── utils │ ├── __init__.py │ ├── evaluator.py │ ├── graph_utils.py │ ├── grb_utils.py │ ├── index.py │ ├── link_prediction_utils.py │ ├── optimizer.py │ ├── ppr_utils.py │ ├── prone_utils.py │ ├── rwalk │ │ ├── Makefile │ │ ├── __init__.py │ │ ├── rwalk.c │ │ └── rwalk.h │ ├── sampling.py │ ├── spmm_utils.py │ ├── srgcn_utils.py │ ├── transform.py │ └── utils.py └── wrappers │ ├── __init__.py │ ├── data_wrapper │ ├── __init__.py │ ├── base_data_wrapper.py │ ├── graph_classification │ │ ├── __init__.py │ │ ├── graph_classification_dw.py │ │ ├── graph_embedding_dw.py │ │ ├── infograph_dw.py │ │ └── patchy_san_dw.py │ ├── heterogeneous │ │ ├── __init__.py │ │ ├── heterogeneous_embedding_dw.py │ │ ├── heterogeneous_gnn_dw.py │ │ └── multiplex_embedding_dw.py │ ├── link_prediction │ │ ├── __init__.py │ │ ├── embedding_link_prediction_dw.py │ │ ├── gnn_kg_link_prediction_dw.py │ │ ├── gnn_link_prediction_dw.py │ │ └── triple_link_prediction_dw.py │ ├── node_classification │ │ ├── __init__.py │ │ ├── cluster_dw.py │ │ ├── graphsage_dw.py │ │ ├── m3s_dw.py │ │ ├── network_embedding_dw.py │ │ ├── node_classification_dw.py │ │ ├── pprgo_dw.py │ │ ├── sagn_dw.py │ │ └── unsup_graphsage_dw.py │ ├── pretraining │ │ ├── __init__.py │ │ └── gcc_dw.py │ └── traffic_prediction │ │ ├── __init__.py │ │ ├── stgat_dw.py │ │ └── stgcn_dw.py │ ├── default_match.py │ ├── model_wrapper │ ├── __init__.py │ ├── base_model_wrapper.py │ ├── clustering │ │ ├── __init__.py │ │ ├── agc_mw.py │ │ ├── daegc_mw.py │ │ └── gae_mw.py │ ├── graph_classification │ │ ├── __init__.py │ │ ├── graph_classification_mw.py │ │ ├── graph_embedding_mw.py │ │ └── infograph_mw.py │ ├── heterogeneous │ │ ├── __init__.py │ │ ├── heterogeneous_embedding_mw.py │ │ ├── heterogeneous_gnn_mw.py │ │ └── multiplex_embedding_mw.py │ ├── link_prediction │ │ ├── __init__.py │ │ ├── embedding_link_prediction_mw.py │ │ ├── gnn_kg_link_prediction_mw.py │ │ ├── gnn_link_prediction_mw.py │ │ └── triple_link_prediction_mw.py │ ├── node_classification │ │ ├── __init__.py │ │ ├── correct_smooth_mw.py │ │ ├── dgi_mw.py │ │ ├── gcnmix_mw.py │ │ ├── grace_mw.py │ │ ├── grand_mw.py │ │ ├── graphsage_mw.py │ │ ├── m3s_mw.py │ │ ├── mvgrl_mw.py │ │ ├── network_embedding_mw.py │ │ ├── node_classification_mw.py │ │ ├── pprgo_mw.py │ │ ├── sagn_mw.py │ │ ├── self_auxiliary_mw.py │ │ └── unsup_graphsage_mw.py │ ├── pretraining │ │ ├── __init__.py │ │ └── gcc_mw.py │ └── traffic_prediction │ │ ├── __init__.py │ │ ├── stgat_mw.py │ │ └── stgcn_mw.py │ └── tools │ ├── __init__.py │ ├── memory_moco.py │ └── wrapper_utils.py ├── docs ├── Makefile ├── make.bat ├── requirements.txt └── source │ ├── _static │ ├── cogdl-logo.png │ ├── cogdl-training.png │ ├── coo.png │ ├── csr.png │ └── graph.jpg │ ├── api │ ├── data.rst │ ├── data_wrappers.rst │ ├── datasets.rst │ ├── experiments.rst │ ├── layers.rst │ ├── model_wrappers.rst │ ├── models.rst │ ├── options.rst │ ├── pipelines.rst │ └── utils.rst │ ├── conf.py │ ├── examples │ ├── 1graph.py │ ├── 2training.py │ ├── 3custom_dataset.py │ ├── 4custom_gnn.py │ └── README.rst │ ├── index.rst │ ├── install.rst │ ├── quickstart.rst │ ├── tutorial │ ├── custom_dataset.rst │ ├── custom_gnn.rst │ ├── graph.rst │ ├── model.rst │ ├── results.rst │ └── training.rst │ └── tutorial_cn │ ├── custom_dataset_cn.rst │ ├── custom_gnn_cn.rst │ ├── examples │ ├── 1graph_cn.py │ ├── 2training_cn.py │ ├── 3custom_dataset_cn.py │ ├── 4custom_gnn.py │ └── README.rst │ ├── graph_cn.rst │ ├── index.rst │ ├── install_cn.rst │ ├── model_cn.rst │ ├── quickstart_cn.rst │ └── training_cn.rst ├── examples ├── GRB │ ├── README_GRB.md │ ├── __init__.py │ ├── adversarial_training.ipynb │ ├── attack │ │ ├── __init__.py │ │ ├── base.py │ │ ├── injection │ │ │ ├── __init__.py │ │ │ ├── fgsm.py │ │ │ ├── pgd.py │ │ │ ├── rand.py │ │ │ ├── speit.py │ │ │ └── tdgia.py │ │ └── modification │ │ │ ├── __init__.py │ │ │ ├── dice.py │ │ │ ├── fga.py │ │ │ ├── flip.py │ │ │ ├── nea.py │ │ │ ├── pgd.py │ │ │ ├── prbcd.py │ │ │ ├── rand.py │ │ │ └── stack.py │ ├── defense │ │ ├── __init__.py │ │ ├── gcnsvd.py │ │ ├── gnnguard.py │ │ └── robustgcn.py │ ├── defense_model.ipynb │ ├── injection_attack.ipynb │ ├── modification_attack.ipynb │ ├── test_adv.py │ ├── test_attack_defense.py │ ├── test_defense.py │ ├── test_injection.py │ └── test_modification.py ├── VRGCN │ ├── README.md │ ├── VRGCN.py │ ├── dataloder.py │ └── main.py ├── bgrl │ ├── README.md │ ├── data.py │ ├── models.py │ ├── train.py │ └── utils.py ├── dgraph │ ├── README.md │ ├── dataset │ │ └── ReadMe.txt │ ├── gnn.py │ ├── logger.py │ ├── models │ │ ├── __init__.py │ │ ├── dgi.py │ │ ├── gat.py │ │ ├── gcn.py │ │ ├── gin.py │ │ ├── grand.py │ │ ├── graphsage.py │ │ ├── mixhop.py │ │ ├── mlp.py │ │ ├── sgc.py │ │ └── sign.py │ └── utils │ │ ├── __init__.py │ │ ├── dgraphfin.py │ │ ├── evaluator.py │ │ └── utils.py ├── gcc │ ├── README.md │ └── run_gcc.py ├── graphmae │ ├── .gitignore │ ├── LICENSE │ ├── README.md │ ├── configs.yml │ ├── graphmae │ │ ├── __init__.py │ │ ├── datasets │ │ │ ├── __init__.py │ │ │ └── data_util.py │ │ ├── evaluation.py │ │ ├── models │ │ │ ├── __init__.py │ │ │ ├── edcoder.py │ │ │ ├── gat.py │ │ │ ├── gcn.py │ │ │ ├── gin.py │ │ │ └── loss_func.py │ │ └── utils.py │ ├── imgs │ │ ├── ablation.jpg │ │ ├── compare.png │ │ └── fig.png │ ├── main_graph.py │ ├── main_inductive.py │ ├── main_transductive.py │ ├── requirements.txt │ └── scripts │ │ ├── run_graph.sh │ │ ├── run_inductive.sh │ │ └── run_transductive.sh ├── graphmae2 │ ├── README.md │ ├── asserts │ │ └── overview.png │ ├── configs │ │ ├── citeseer.yaml │ │ ├── cora.yaml │ │ ├── mag-scholar-f.yaml │ │ ├── ogbn-arxiv.yaml │ │ ├── ogbn-papers100M.yaml │ │ ├── ogbn-products.yaml │ │ └── pubmed.yaml │ ├── datasets │ │ ├── __init__.py │ │ ├── data_proc.py │ │ ├── lc_sampler.py │ │ └── localclustering.py │ ├── main_full_batch.py │ ├── main_large.py │ ├── models │ │ ├── __init__.py │ │ ├── edcoder.py │ │ ├── finetune.py │ │ ├── gat.py │ │ ├── gcn.py │ │ └── loss_func.py │ ├── run_fullbatch.sh │ ├── run_minibatch.sh │ └── utils.py ├── jittor │ └── gcn.py ├── legacy │ ├── custom_dataset.py │ ├── custom_gcn.py │ ├── custom_triple_dataset.py │ ├── cv_search.py │ ├── generate_emb.py │ ├── notebooks │ │ ├── build_gnn_applications.ipynb │ │ ├── quickstart.ipynb │ │ └── write_your_first_gcn.ipynb │ ├── pipeline.py │ ├── quick_start.py │ └── recommendation.py ├── oagbert │ ├── calculate_paper_similarity.py │ ├── generate_title.py │ ├── oagbert.py │ ├── oagbert_encode_paper.py │ ├── oagbert_metainfo.py │ ├── oagbert_metainfo_zh.py │ └── oagbert_metainfo_zh_similarity.py ├── ogb │ ├── arxiv │ │ ├── README.md │ │ └── gnn.py │ └── products │ │ ├── README.md │ │ └── gnn.py ├── pyg │ ├── README.md │ ├── chebnet.py │ ├── dgcnn.py │ ├── gat.py │ ├── gcn.py │ └── unet.py ├── simple_hgn │ ├── README.md │ ├── conv.py │ └── run.py └── simple_trafficPre │ ├── __init__.py │ └── example.py ├── gnn_papers.md ├── pyproject.toml ├── results.md ├── scripts ├── display_data.py ├── download.py ├── installation │ ├── gcc.sh │ └── metis.sh └── train.py ├── setup.py ├── tests ├── datasets │ ├── test_customized_data.py │ ├── test_data.py │ ├── test_gcc_data.py │ ├── test_geom_data.py │ ├── test_kg_data.py │ ├── test_matlab_data.py │ ├── test_oagbert_data.py │ ├── test_ogb.py │ ├── test_planetoid.py │ ├── test_rd2cd_data.py │ ├── test_rec_data.py │ └── test_saint_data.py ├── models │ ├── emb │ │ └── test_deepwalk.py │ └── ssl │ │ ├── test_contrastive_models.py │ │ └── test_generative_models.py ├── tasks │ ├── test_attributed_graph_clustering.py │ ├── test_encode_paper.py │ ├── test_graph_classification.py │ ├── test_heterogeneous_node_classification.py │ ├── test_link_prediction.py │ ├── test_multiplex_link_prediction.py │ ├── test_node_classification.py │ ├── test_triple_link_prediction.py │ ├── test_unsupervised_graph_classification.py │ └── test_unsupervised_node_classification.py ├── test_args.py ├── test_experiments.py ├── test_layers.py ├── test_oag.py ├── test_ops.py ├── test_options.py ├── test_pipelines.py └── test_utils.py └── third_party └── README.md /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | ignore = E203, E266, E501, W503, F403, F401, W291 3 | max-line-length = 120 4 | max-complexity = 18 5 | select = B,C,E,F,W,T4,B9 -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug-report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "\U0001F41B Bug Report" 3 | about: Submit a bug report to help us improve CogDL 4 | 5 | --- 6 | 7 | ## 🐛 Bug 8 | 9 | 10 | 11 | ## To Reproduce 12 | 13 | Steps to reproduce the behavior: 14 | 15 | 1. 16 | 1. 17 | 1. 18 | 19 | 20 | 21 | ## Expected behavior 22 | 23 | 24 | 25 | ## Environment 26 | 27 | - CogDL version: 28 | - OS (e.g., Linux): 29 | - Python version: 30 | - PyTorch version: 31 | - CUDA/cuDNN version (if applicable): 32 | - Any other relevant information: 33 | 34 | ## Additional context 35 | 36 | 37 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/documentation.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "\U0001F4DA Documentation" 3 | about: Report an issue related to cogdl.readthedocs.io 4 | 5 | --- 6 | 7 | ## 📚 Documentation 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature-request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "\U0001F680 Feature Request" 3 | about: Submit a proposal/request for a new CogDL feature 4 | --- 5 | 6 | ## 🚀 Feature 7 | 8 | 9 | 10 | ## Motivation 11 | 12 | 13 | 14 | ## Additional context 15 | 16 | 17 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/installation.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "\U0001F4DA Installation" 3 | about: Report an installation problem with CogDL 4 | --- 5 | 6 | ## 📚 Installation 7 | 8 | 9 | 10 | ## Environment 11 | 12 | * OS: 13 | * Python version: 14 | * PyTorch version: 15 | * CUDA/cuDNN version: 16 | * How did you try to install CogDL (wheel, source): 17 | * Any other relevant information: 18 | 19 | ## Checklist 20 | 21 | - [ ] I followed the [installation guide](https://github.com/thudm/cogdl#requirements-and-installation). 22 | - [ ] I set up CUDA correctly. 23 | - [ ] I do have multiple CUDA versions on my machine. 24 | 25 | ## Additional context 26 | 27 | 28 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/question-help.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "❓Questions & Help" 3 | about: Start a general discussion related to CogDL 4 | --- 5 | 6 | ## ❓ Questions & Help 7 | 8 | 9 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | ## Description 2 | 3 | 4 | ## Checklist 5 | Please feel free to remove inapplicable items for your PR. 6 | - [ ] The PR title starts with [$CATEGORY] (such as [Model], [Doc], [Feature], [Bugfix]) 7 | - [ ] Changes are complete (i.e. I finished coding on this PR) 8 | - [ ] All changes have test coverage 9 | - [ ] Code is well-documented 10 | - [ ] To my best knowledge, examples are either not affected by this change, 11 | or have been fixed to be compatible with this change 12 | - [ ] Related issue is referred in this PR 13 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | *.DS_Store 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | .vscode/ 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | 106 | data/ 107 | *.npy 108 | checkpoints/ 109 | saved/ 110 | *.png 111 | *.cluster 112 | *.tar 113 | docs/source/_build/ 114 | metis* 115 | *.dict 116 | *.csv 117 | *.sql 118 | *.pt 119 | *.npz 120 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "third_party/dgNN"] 2 | ignore = dirty 3 | path = third_party/dgNN 4 | url = ../../dgSPARSE/dgNN 5 | branch = main 6 | [submodule "third_party/actnn"] 7 | ignore = dirty 8 | path = third_party/actnn 9 | url = ../../ucbrise/actnn 10 | branch = main 11 | [submodule "third_party/fastmoe"] 12 | ignore = dirty 13 | path = third_party/fastmoe 14 | url = ../../laekov/fastmoe 15 | branch = master 16 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/psf/black 3 | rev: 20.8b1 4 | hooks: 5 | - id: black 6 | language_version: python3 7 | args: [--line-length=120] # this should be in pyproject.toml not sure why its not picked up 8 | - repo: https://gitlab.com/pycqa/flake8 9 | rev: 3.8.4 10 | hooks: 11 | - id: flake8 12 | additional_dependencies: [flake8-typing-imports==1.9.0] 13 | args: ['--config=.flake8'] 14 | -------------------------------------------------------------------------------- /.readthedocs.yml: -------------------------------------------------------------------------------- 1 | # Required 2 | version: 2 3 | 4 | # Build documentation in the docs/ directory with Sphinx 5 | sphinx: 6 | configuration: docs/source/conf.py 7 | 8 | # Optionally build your docs in additional formats such as PDF and ePub 9 | formats: all 10 | 11 | # Optionally set the version of Python and requirements required to build your docs 12 | python: 13 | version: 3.7 14 | install: 15 | - requirements: docs/requirements.txt 16 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | 3 | python: 4 | - "3.7" 5 | 6 | install: 7 | - pip install https://download.pytorch.org/whl/cpu/torch-1.7.1%2Bcpu-cp37-cp37m-linux_x86_64.whl 8 | - pip install https://pytorch-geometric.com/whl/torch-1.7.0+cpu/torch_scatter-2.0.7-cp37-cp37m-linux_x86_64.whl 9 | - pip install https://pytorch-geometric.com/whl/torch-1.7.0+cpu/torch_sparse-0.6.9-cp37-cp37m-linux_x86_64.whl 10 | - pip install packaging==20.9 11 | - pip install setuptools==60.9.0 12 | - bash ./scripts/installation/metis.sh 13 | - source ./scripts/installation/gcc.sh 14 | - pip install metis 15 | - pip install -r docs/requirements.txt 16 | - pip install -e . 17 | 18 | before_script: 19 | - black cogdl 20 | - flake8 cogdl 21 | 22 | script: 23 | - export METIS_DLL=~/.local/lib/libmetis.so 24 | - coverage run --source=cogdl -m pytest tests 25 | - cd docs && make clean && make html && cd .. 26 | 27 | after_success: 28 | - coveralls 29 | 30 | notifications: 31 | email: false 32 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 CogDL Team, KEG 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include cogdl/operators/spmm/* 2 | include cogdl/operators/sample/* 3 | include cogdl/operators/scatter_max/* 4 | include cogdl/operators/edge_softmax/* -------------------------------------------------------------------------------- /cogdl/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.6" 2 | 3 | from .experiments import experiment 4 | from .pipelines import pipeline 5 | -------------------------------------------------------------------------------- /cogdl/data/__init__.py: -------------------------------------------------------------------------------- 1 | from .data import Graph, Adjacency 2 | from .batch import Batch, batch_graphs 3 | from .dataset import Dataset, MultiGraphDataset 4 | from .dataloader import DataLoader 5 | 6 | __all__ = ["Graph", "Adjacency", "Batch", "Dataset", "DataLoader", "MultiGraphDataset", "batch_graphs"] 7 | -------------------------------------------------------------------------------- /cogdl/data/dataloader.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta 2 | import torch.utils.data 3 | from torch.utils.data.dataloader import default_collate 4 | 5 | from cogdl.data import Batch, Graph 6 | 7 | try: 8 | from typing import GenericMeta # python 3.6 9 | except ImportError: 10 | # in 3.7, genericmeta doesn't exist but we don't need it 11 | class GenericMeta(type): 12 | pass 13 | 14 | 15 | class RecordParameters(ABCMeta): 16 | def __call__(cls, *args, **kwargs): 17 | obj = type.__call__(cls, *args, **kwargs) 18 | obj.record_parameters([args, kwargs]) 19 | return obj 20 | 21 | 22 | class GenericRecordParameters(GenericMeta, RecordParameters): 23 | pass 24 | 25 | 26 | class DataLoader(torch.utils.data.DataLoader, metaclass=GenericRecordParameters): 27 | r"""Data loader which merges data objects from a 28 | :class:`cogdl.data.dataset` to a mini-batch. 29 | 30 | Args: 31 | dataset (Dataset): The dataset from which to load the data. 32 | batch_size (int, optional): How may samples per batch to load. 33 | (default: :obj:`1`) 34 | shuffle (bool, optional): If set to :obj:`True`, the data will be 35 | reshuffled at every epoch (default: :obj:`True`) 36 | """ 37 | 38 | def __init__(self, dataset, batch_size=1, shuffle=True, **kwargs): 39 | if "collate_fn" not in kwargs or kwargs["collate_fn"] is None: 40 | kwargs["collate_fn"] = self.collate_fn 41 | 42 | super(DataLoader, self).__init__( 43 | dataset, 44 | batch_size, 45 | shuffle, 46 | **kwargs, 47 | ) 48 | 49 | @staticmethod 50 | def collate_fn(batch): 51 | item = batch[0] 52 | if isinstance(item, Graph): 53 | return Batch.from_data_list(batch) 54 | elif isinstance(item, torch.Tensor): 55 | return default_collate(batch) 56 | elif isinstance(item, float): 57 | return torch.tensor(batch, dtype=torch.float) 58 | 59 | raise TypeError("DataLoader found invalid type: {}".format(type(item))) 60 | 61 | def get_parameters(self): 62 | return self.default_kwargs 63 | 64 | def record_parameters(self, params): 65 | self.default_kwargs = params 66 | -------------------------------------------------------------------------------- /cogdl/layers/__init__.py: -------------------------------------------------------------------------------- 1 | from .base_layer import BaseLayer 2 | from .gcn_layer import GCNLayer 3 | from .sage_layer import MeanAggregator, SumAggregator, SAGELayer 4 | from .gat_layer import GATLayer 5 | from .gin_layer import GINLayer 6 | from .gine_layer import GINELayer 7 | from .se_layer import SELayer 8 | from .deepergcn_layer import GENConv, ResGNNLayer 9 | from .disengcn_layer import DisenGCNLayer 10 | from .gcnii_layer import GCNIILayer 11 | from .mlp_layer import MLP 12 | from .saint_layer import SAINTLayer 13 | from .han_layer import HANLayer 14 | from .pprgo_layer import PPRGoLayer 15 | from .rgcn_layer import RGCNLayer 16 | from .sgc_layer import SGCLayer 17 | from .mixhop_layer import MixHopLayer 18 | from .reversible_layer import RevGNNLayer 19 | from .set2set import Set2Set 20 | from .stgcn_layer import STConvLayer 21 | from .stgat_layer import STGATConvLayer 22 | from .gcn_layerii import GCNLayerST 23 | from .gat_layerii import GATLayerST 24 | 25 | 26 | __all__ = [ 27 | "BaseLayer", 28 | "GCNLayer", 29 | "MeanAggregator", 30 | "SumAggregator", 31 | "SAGELayer", 32 | "GATLayer", 33 | "GINLayer", 34 | "GINELayer", 35 | "SELayer", 36 | "GENConv", 37 | "ResGNNLayer", 38 | "DisenGCNLayer", 39 | "GCNIILayer", 40 | "SAINTLayer", 41 | "HANLayer", 42 | "PPRGoLayer", 43 | "RGCNLayer", 44 | "SGCLayer", 45 | "MixHopLayer", 46 | "MLP", 47 | "RevGNNLayer", 48 | "Set2Set", 49 | "STConvLayer", 50 | "STGATConvLayer", 51 | "GCNLayerST", 52 | "GATLayerST", 53 | ] 54 | -------------------------------------------------------------------------------- /cogdl/layers/actgcn_layer.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import torch 4 | import torch.nn as nn 5 | from actnn.layers import QReLU, QBatchNorm1d, QDropout 6 | 7 | from .actlinear_layer import QLinear 8 | from cogdl.utils import spmm 9 | 10 | 11 | class ActGCNLayer(nn.Module): 12 | """ 13 | Simple GCN layer, similar to https://arxiv.org/abs/1609.02907 14 | """ 15 | 16 | def __init__( 17 | self, in_features, out_features, dropout=0.0, activation=None, residual=False, norm=None, bias=True, rp_ratio=1 18 | ): 19 | super(ActGCNLayer, self).__init__() 20 | self.in_features = in_features 21 | self.out_features = out_features 22 | self.linear = QLinear(in_features, out_features, bias=bias, rp_ratio=rp_ratio) 23 | if dropout > 0: 24 | self.dropout = QDropout(dropout) 25 | else: 26 | self.dropout = None 27 | if residual: 28 | self.residual = QLinear(in_features, out_features, rp_ratio=rp_ratio) 29 | else: 30 | self.residual = None 31 | 32 | if activation is not None: 33 | self.act = QReLU() 34 | else: 35 | self.act = None 36 | 37 | if norm is not None: 38 | if norm == "batchnorm": 39 | self.norm = QBatchNorm1d(out_features) 40 | else: 41 | raise NotImplementedError 42 | else: 43 | self.norm = None 44 | 45 | self.reset_parameters() 46 | 47 | def reset_parameters(self): 48 | stdv = 1.0 / math.sqrt(self.out_features) 49 | torch.nn.init.uniform_(self.linear.weight, -stdv, stdv) 50 | 51 | def forward(self, graph, x): 52 | support = self.linear(x) 53 | out = spmm(graph, support, actnn=True) 54 | 55 | if self.norm is not None: 56 | out = self.norm(out) 57 | if self.act is not None: 58 | out = self.act(out) 59 | 60 | if self.residual is not None: 61 | out = out + self.residual(x) 62 | if self.dropout is not None: 63 | out = self.dropout(out) 64 | return out 65 | -------------------------------------------------------------------------------- /cogdl/layers/actgcnii_layer.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import torch 4 | import torch.nn as nn 5 | 6 | from actnn.layers import QLinear 7 | 8 | from cogdl.utils import spmm 9 | 10 | 11 | class ActGCNIILayer(nn.Module): 12 | def __init__(self, n_channels, alpha=0.1, beta=1, residual=False): 13 | super(ActGCNIILayer, self).__init__() 14 | self.n_channels = n_channels 15 | self.alpha = alpha 16 | self.beta = beta 17 | self.residual = residual 18 | self.linear = QLinear(n_channels, n_channels) 19 | self.reset_parameters() 20 | 21 | def reset_parameters(self): 22 | stdv = 1.0 / math.sqrt(self.n_channels) 23 | torch.nn.init.uniform_(self.linear.weight, -stdv, stdv) 24 | 25 | def forward(self, graph, x, init_x): 26 | """Symmetric normalization""" 27 | hidden = spmm(graph, x, actnn=True) 28 | hidden = (1 - self.alpha) * hidden + self.alpha * init_x 29 | h = self.beta * self.linear(hidden) + (1 - self.beta) * hidden 30 | if self.residual: 31 | h = h + x 32 | return h 33 | -------------------------------------------------------------------------------- /cogdl/layers/actlinear_layer.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | from actnn.conf import config 4 | from actnn.qscheme import QScheme 5 | 6 | from cogdl.operators.linear import linear 7 | 8 | 9 | class QLinear(nn.Linear): 10 | num_layers = 0 11 | 12 | def __init__(self, input_features, output_features, bias=True, group=0, rp_ratio=2): 13 | super(QLinear, self).__init__(input_features, output_features, bias) 14 | if config.adaptive_conv_scheme: 15 | self.scheme = QScheme(self, group=group) 16 | else: 17 | self.scheme = None 18 | self.rp_ratio = rp_ratio 19 | 20 | def forward(self, input): 21 | if config.training: 22 | return linear.apply(input, self.weight, self.bias, self.scheme, self.rp_ratio) 23 | else: 24 | return super(QLinear, self).forward(input) 25 | -------------------------------------------------------------------------------- /cogdl/layers/actsage_layer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | from actnn.layers import QLinear, QReLU, QBatchNorm1d, QDropout 6 | 7 | from cogdl.utils import spmm 8 | 9 | 10 | class MeanAggregator(object): 11 | def __call__(self, graph, x): 12 | graph.row_norm() 13 | x = spmm(graph, x, actnn=True) 14 | return x 15 | 16 | 17 | class SumAggregator(object): 18 | def __call__(self, graph, x): 19 | x = spmm(graph, x, actnn=True) 20 | return x 21 | 22 | 23 | class ActSAGELayer(nn.Module): 24 | def __init__(self, in_feats, out_feats, normalize=False, aggr="mean", dropout=0.0, norm=None, activation=None): 25 | super(ActSAGELayer, self).__init__() 26 | self.in_feats = in_feats 27 | self.out_feats = out_feats 28 | self.fc = QLinear(2 * in_feats, out_feats) 29 | self.normalize = normalize 30 | self.dropout = dropout 31 | if aggr == "mean": 32 | self.aggr = MeanAggregator() 33 | elif aggr == "sum": 34 | self.aggr = SumAggregator() 35 | else: 36 | raise NotImplementedError 37 | 38 | if dropout > 0: 39 | self.dropout = QDropout(dropout) 40 | else: 41 | self.dropout = None 42 | 43 | if activation is not None: 44 | self.act = QReLU() 45 | else: 46 | self.act = None 47 | 48 | if norm is not None: 49 | if norm == "batchnorm": 50 | self.norm = QBatchNorm1d(out_feats) 51 | else: 52 | raise NotImplementedError 53 | else: 54 | self.norm = None 55 | 56 | def forward(self, graph, x): 57 | out = self.aggr(graph, x) 58 | out = torch.cat([x, out], dim=-1) 59 | out = self.fc(out) 60 | if self.normalize: 61 | out = F.normalize(out, p=2.0, dim=-1) 62 | 63 | if self.norm is not None: 64 | out = self.norm(out) 65 | if self.act is not None: 66 | out = self.act(out) 67 | 68 | if self.dropout is not None: 69 | out = self.dropout(out) 70 | return out 71 | -------------------------------------------------------------------------------- /cogdl/layers/base_layer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class BaseLayer(nn.Module): 6 | def __init__(self, **kwargs) -> None: 7 | super().__init__(**kwargs) 8 | 9 | def forward(self, graph, x): 10 | m = self.message(x[graph.edge_index[0]]) 11 | return self.aggregate(graph, m) 12 | 13 | def message(self, x): 14 | return x 15 | 16 | def aggregate(self, graph, x): 17 | result = torch.zeros(graph.num_nodes, x.shape[1], dtype=x.dtype).to(x.device) 18 | result.scatter_add_(0, graph.edge_index[1].unsqueeze(1).expand(-1, x.shape[1]), x) 19 | return result 20 | -------------------------------------------------------------------------------- /cogdl/layers/gcn_layer.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import torch 4 | import torch.nn as nn 5 | 6 | from cogdl.utils import spmm, get_activation 7 | 8 | 9 | class GCNLayer(nn.Module): 10 | """ 11 | Simple GCN layer, similar to https://arxiv.org/abs/1609.02907 12 | """ 13 | 14 | def __init__( 15 | self, in_features, out_features, dropout=0.0, activation=None, residual=False, norm=None, bias=True, **kwargs 16 | ): 17 | super(GCNLayer, self).__init__() 18 | self.in_features = in_features 19 | self.out_features = out_features 20 | self.linear = nn.Linear(in_features, out_features, bias=bias) 21 | if dropout > 0: 22 | self.dropout = nn.Dropout(dropout) 23 | else: 24 | self.dropout = None 25 | if residual: 26 | self.residual = nn.Linear(in_features, out_features) 27 | else: 28 | self.residual = None 29 | 30 | if activation is not None: 31 | self.act = get_activation(activation, inplace=True) 32 | else: 33 | self.act = None 34 | 35 | if norm is not None: 36 | if norm == "batchnorm": 37 | self.norm = nn.BatchNorm1d(out_features) 38 | elif norm == "layernorm": 39 | self.norm = nn.LayerNorm(out_features) 40 | else: 41 | raise NotImplementedError 42 | else: 43 | self.norm = None 44 | 45 | self.reset_parameters() 46 | 47 | def reset_parameters(self): 48 | stdv = 1.0 / math.sqrt(self.out_features) 49 | torch.nn.init.uniform_(self.linear.weight, -stdv, stdv) 50 | 51 | def forward(self, graph, x): 52 | support = self.linear(x) 53 | out = spmm(graph, support) 54 | 55 | if self.norm is not None: 56 | out = self.norm(out) 57 | if self.act is not None: 58 | out = self.act(out) 59 | 60 | if self.residual is not None: 61 | out = out + self.residual(x) 62 | if self.dropout is not None: 63 | out = self.dropout(out) 64 | return out 65 | -------------------------------------------------------------------------------- /cogdl/layers/gcn_layerii.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | import torch.nn as nn 4 | from cogdl.utils import spmm_scatter, get_activation 5 | 6 | 7 | class GCNLayerST(nn.Module): 8 | def __init__( 9 | self, in_features, out_features, dropout=0.0, activation='relu', residual=False, norm=None, bias=True, **kwargs 10 | ): 11 | super(GCNLayerST, self).__init__() 12 | self.in_features = in_features 13 | self.out_features = out_features 14 | self.linear = nn.Linear(in_features, out_features, bias=bias) 15 | if dropout > 0: 16 | self.dropout = nn.Dropout(dropout) 17 | else: 18 | self.dropout = None 19 | if residual: 20 | self.residual = nn.Linear(in_features, out_features) 21 | else: 22 | self.residual = None 23 | 24 | if activation is not None: 25 | self.act = get_activation(activation, inplace=True) 26 | else: 27 | self.act = None 28 | 29 | if norm is not None: 30 | if norm == "batchnorm": 31 | self.norm = nn.BatchNorm1d(out_features) 32 | elif norm == "layernorm": 33 | self.norm = nn.LayerNorm(out_features) 34 | else: 35 | raise NotImplementedError 36 | else: 37 | self.norm = None 38 | 39 | self.reset_parameters() 40 | 41 | def reset_parameters(self): 42 | stdv = 1.0 / math.sqrt(self.out_features) 43 | torch.nn.init.uniform_(self.linear.weight, -stdv, stdv) 44 | 45 | def forward(self, x, edge_index, edge_weight): 46 | support = self.linear(x) 47 | row, col = edge_index 48 | out = spmm_scatter(row, col, edge_weight, support) 49 | 50 | if self.norm is not None: 51 | out = self.norm(out) 52 | if self.act is not None: 53 | out = self.act(out) 54 | 55 | if self.residual is not None: 56 | out = out + self.residual(x) 57 | if self.dropout is not None: 58 | out = self.dropout(out) 59 | return out 60 | -------------------------------------------------------------------------------- /cogdl/layers/gcnii_layer.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import torch 4 | import torch.nn as nn 5 | 6 | from cogdl.utils import spmm 7 | 8 | 9 | class GCNIILayer(nn.Module): 10 | def __init__(self, n_channels, alpha=0.1, beta=1, residual=False): 11 | super(GCNIILayer, self).__init__() 12 | self.n_channels = n_channels 13 | self.alpha = alpha 14 | self.beta = beta 15 | self.residual = residual 16 | self.linear = nn.Linear(n_channels, n_channels) 17 | self.reset_parameters() 18 | 19 | def reset_parameters(self): 20 | stdv = 1.0 / math.sqrt(self.n_channels) 21 | torch.nn.init.uniform_(self.linear.weight, -stdv, stdv) 22 | 23 | def forward(self, graph, x, init_x): 24 | """Symmetric normalization""" 25 | hidden = spmm(graph, x) 26 | hidden = (1 - self.alpha) * hidden + self.alpha * init_x 27 | h = self.beta * self.linear(hidden) + (1 - self.beta) * hidden 28 | if self.residual: 29 | h = h + x 30 | return h 31 | -------------------------------------------------------------------------------- /cogdl/layers/gin_layer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from cogdl.utils import spmm 5 | 6 | 7 | class GINLayer(nn.Module): 8 | r"""Graph Isomorphism Network layer from paper `"How Powerful are Graph 9 | Neural Networks?" `__. 10 | 11 | .. math:: 12 | h_i^{(l+1)} = f_\Theta \left((1 + \epsilon) h_i^{l} + 13 | \mathrm{sum}\left(\left\{h_j^{l}, j\in\mathcal{N}(i) 14 | \right\}\right)\right) 15 | 16 | Parameters 17 | ---------- 18 | apply_func : callable layer function) 19 | layer or function applied to update node feature 20 | eps : float32, optional 21 | Initial `\epsilon` value. 22 | train_eps : bool, optional 23 | If True, `\epsilon` will be a learnable parameter. 24 | """ 25 | 26 | def __init__(self, apply_func=None, eps=0, train_eps=True): 27 | super(GINLayer, self).__init__() 28 | if train_eps: 29 | self.eps = torch.nn.Parameter(torch.FloatTensor([eps])) 30 | else: 31 | self.register_buffer("eps", torch.FloatTensor([eps])) 32 | self.apply_func = apply_func 33 | 34 | def forward(self, graph, x): 35 | out = (1 + self.eps) * x + spmm(graph, x) 36 | if self.apply_func is not None: 37 | out = self.apply_func(out) 38 | return out 39 | -------------------------------------------------------------------------------- /cogdl/layers/gine_layer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | from cogdl.utils import spmm 4 | 5 | from . import BaseLayer 6 | 7 | 8 | class GINELayer(BaseLayer): 9 | r"""The modified GINConv operator from the `"Graph convolutions that can finally model local structure" paper 10 | `__. 11 | 12 | Parameters 13 | ---------- 14 | apply_func : callable layer function) 15 | layer or function applied to update node feature 16 | eps : float32, optional 17 | Initial `\epsilon` value. 18 | train_eps : bool, optional 19 | If True, `\epsilon` will be a learnable parameter. 20 | """ 21 | 22 | def __init__(self, apply_func=None, eps=0, train_eps=True): 23 | super(GINELayer, self).__init__() 24 | if train_eps: 25 | self.eps = torch.nn.Parameter(torch.FloatTensor([eps])) 26 | else: 27 | self.register_buffer("eps", torch.FloatTensor([eps])) 28 | self.apply_func = apply_func 29 | 30 | def forward(self, graph, x): 31 | # m = self.message(x[graph.edge_index[0]], graph.edge_attr) 32 | # out = self.aggregate(graph, m) 33 | out = spmm(graph, x) 34 | out += (1 + self.eps) * x 35 | if self.apply_func is not None: 36 | out = self.apply_func(out) 37 | return out 38 | 39 | def message(self, x, attr): 40 | return F.relu(x + attr) 41 | -------------------------------------------------------------------------------- /cogdl/layers/han_layer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from .gat_layer import GATLayer 5 | 6 | 7 | class AttentionLayer(nn.Module): 8 | def __init__(self, num_features): 9 | super(AttentionLayer, self).__init__() 10 | self.linear = nn.Linear(num_features, 1) 11 | 12 | def forward(self, x): 13 | att = self.linear(x).view(-1, 1, x.shape[1]) 14 | return torch.matmul(att, x).squeeze(1) 15 | 16 | 17 | class HANLayer(nn.Module): 18 | def __init__(self, num_edge, w_in, w_out): 19 | super(HANLayer, self).__init__() 20 | self.gat_layer = nn.ModuleList() 21 | for _ in range(num_edge): 22 | self.gat_layer.append(GATLayer(w_in, w_out // 8, 8)) 23 | self.att_layer = AttentionLayer(w_out) 24 | 25 | def forward(self, graph, x): 26 | adj = graph.adj 27 | output = [] 28 | with graph.local_graph(): 29 | for i, edge in enumerate(adj): 30 | graph.edge_index = edge[0] 31 | output.append(self.gat_layer[i](graph, x)) 32 | output = torch.stack(output, dim=1) 33 | 34 | return self.att_layer(output) 35 | -------------------------------------------------------------------------------- /cogdl/layers/jittor/__init__.py: -------------------------------------------------------------------------------- 1 | from .gcn_layer import GCNLayer 2 | -------------------------------------------------------------------------------- /cogdl/layers/jittor/gcn_layer.py: -------------------------------------------------------------------------------- 1 | import jittor as jt 2 | from jittor import nn, Module, init 3 | 4 | from cogdl.operators.jt_spmm import spmm 5 | 6 | 7 | class GCNLayer(Module): 8 | def __init__( 9 | self, in_features, out_features, dropout=0.0, activation=None, residual=False, norm=None, bias=True, **kwargs 10 | ): 11 | super(GCNLayer, self).__init__() 12 | self.in_features = in_features 13 | self.out_features = out_features 14 | self.linear = nn.Linear(in_features, out_features, bias=bias) 15 | if dropout > 0: 16 | self.dropout = nn.Dropout(dropout) 17 | else: 18 | self.dropout = None 19 | if residual: 20 | self.residual = nn.Linear(in_features, out_features) 21 | else: 22 | self.residual = None 23 | 24 | if activation is not None and activation == "relu": 25 | self.act = nn.ReLU() 26 | else: 27 | self.act = None 28 | 29 | if norm is not None: 30 | if norm == "batchnorm": 31 | self.norm = nn.BatchNorm1d(out_features) 32 | elif norm == "layernorm": 33 | self.norm = nn.LayerNorm(out_features) 34 | else: 35 | raise NotImplementedError 36 | else: 37 | self.norm = None 38 | 39 | self.reset_parameters() 40 | 41 | def reset_parameters(self): 42 | init.xavier_uniform_(self.linear.weight) 43 | 44 | def execute(self, graph, x): 45 | support = self.linear(x) 46 | out = spmm(graph, support) 47 | 48 | if self.norm is not None: 49 | out = self.norm(out) 50 | if self.act is not None: 51 | out = self.act(out) 52 | 53 | if self.residual is not None: 54 | out = out + self.residual(x) 55 | if self.dropout is not None: 56 | out = self.dropout(out) 57 | return out 58 | 59 | -------------------------------------------------------------------------------- /cogdl/layers/mixhop_layer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from cogdl.utils import spmm 4 | 5 | 6 | class MixHopLayer(nn.Module): 7 | def __init__(self, num_features, adj_pows, dim_per_pow): 8 | super(MixHopLayer, self).__init__() 9 | self.num_features = num_features 10 | self.adj_pows = adj_pows 11 | self.dim_per_pow = dim_per_pow 12 | self.total_dim = 0 13 | self.linears = torch.nn.ModuleList() 14 | for dim in dim_per_pow: 15 | self.linears.append(nn.Linear(num_features, dim)) 16 | self.total_dim += dim 17 | # self.reset_parameters() 18 | 19 | def reset_parameters(self): 20 | for linear in self.linears: 21 | linear.reset_parameters() 22 | 23 | def adj_pow_x(self, graph, x, p): 24 | for _ in range(p): 25 | x = spmm(graph, x) 26 | return x 27 | 28 | def forward(self, graph, x): 29 | graph.sym_norm() 30 | output_list = [] 31 | for p, linear in zip(self.adj_pows, self.linears): 32 | output = linear(self.adj_pow_x(graph, x, p)) 33 | output_list.append(output) 34 | 35 | return torch.cat(output_list, dim=1) 36 | -------------------------------------------------------------------------------- /cogdl/layers/pprgo_layer.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import torch 4 | import torch.nn as nn 5 | 6 | from cogdl.utils import get_activation 7 | 8 | 9 | class LinearLayer(nn.Module): 10 | def __init__(self, in_features, out_features, bias=True): 11 | super(LinearLayer, self).__init__() 12 | self.in_features = in_features 13 | self.out_features = out_features 14 | self.weight = nn.Parameter(torch.Tensor(out_features, in_features)) 15 | if bias: 16 | self.bias = nn.Parameter(torch.Tensor(out_features)) 17 | else: 18 | self.register_parameter("bias", None) 19 | self.reset_parameters() 20 | 21 | def reset_parameters(self): 22 | nn.init.kaiming_uniform_(self.weight, mode="fan_out", a=math.sqrt(5)) 23 | if self.bias is not None: 24 | fan_in, _ = nn.init._calculate_fan_in_and_fan_out(self.weight) 25 | bound = 1 / nn.math.sqrt(fan_in) 26 | nn.init.uniform_(self.bias, -bound, bound) 27 | 28 | def forward(self, input): 29 | return torch.nn.functional.linear(input, self.weight, self.bias) 30 | 31 | 32 | class PPRGoLayer(nn.Module): 33 | def __init__(self, in_feats, hidden_size, out_feats, num_layers, dropout, activation="relu"): 34 | super(PPRGoLayer, self).__init__() 35 | self.dropout = dropout 36 | self.nlayers = num_layers 37 | shapes = [hidden_size] * (num_layers - 1) + [out_feats] 38 | self.layers = nn.ModuleList() 39 | self.layers.append(LinearLayer(in_feats, hidden_size, bias=False)) 40 | for i in range(num_layers - 1): 41 | self.layers.append(nn.Linear(shapes[i], shapes[i + 1], bias=False)) 42 | self.activation = get_activation(activation) 43 | 44 | def forward(self, x): 45 | h = x 46 | for i, layer in enumerate(self.layers): 47 | h = nn.functional.dropout(h, p=self.dropout, training=self.training) 48 | h = layer(h) 49 | if i != self.nlayers - 1: 50 | h = self.activation(h) 51 | return h 52 | -------------------------------------------------------------------------------- /cogdl/layers/se_layer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class SELayer(nn.Module): 6 | """Squeeze-and-excitation networks""" 7 | 8 | def __init__(self, in_channels, se_channels): 9 | super(SELayer, self).__init__() 10 | 11 | self.in_channels = in_channels 12 | self.se_channels = se_channels 13 | 14 | self.encoder_decoder = nn.Sequential( 15 | nn.Linear(in_channels, se_channels), nn.ELU(), nn.Linear(se_channels, in_channels), nn.Sigmoid(), 16 | ) 17 | 18 | # self.reset_parameters() 19 | 20 | def forward(self, x): 21 | """""" 22 | # Aggregate input representation 23 | x_global = torch.mean(x, dim=0) 24 | # Compute reweighting vector s 25 | s = self.encoder_decoder(x_global) 26 | 27 | return x * s 28 | -------------------------------------------------------------------------------- /cogdl/layers/sgc_layer.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | from cogdl.utils import spmm 4 | 5 | 6 | class SGCLayer(nn.Module): 7 | def __init__(self, in_features, out_features, order=3): 8 | super(SGCLayer, self).__init__() 9 | self.in_features = in_features 10 | self.out_features = out_features 11 | self.order = order 12 | self.W = nn.Linear(in_features, out_features) 13 | 14 | def forward(self, graph, x): 15 | output = self.W(x) 16 | for _ in range(self.order): 17 | output = spmm(graph, output) 18 | return output 19 | -------------------------------------------------------------------------------- /cogdl/layers/stgat_layer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from cogdl.layers.gat_layerii import GATLayerST 4 | 5 | 6 | 7 | class STGATConvLayer(nn.Module): 8 | def __init__( 9 | self, 10 | in_channels, 11 | out_channels, 12 | heads, 13 | dropout=0, 14 | concat=False 15 | ): 16 | super(STGATConvLayer, self).__init__() 17 | self.in_channels = in_channels 18 | self.out_channels = out_channels 19 | self.heads = heads 20 | self.dropout = dropout 21 | self.concat = concat 22 | self._graph_conv = GATLayerST(in_channels, out_channels, nhead=1, alpha=0.2, attn_drop=0.5, activation=None, residual=False, norm=None) 23 | 24 | def forward( 25 | self, 26 | X: torch.FloatTensor, 27 | edge_index: torch.LongTensor, 28 | edge_weight: torch.FloatTensor = None, 29 | ) -> torch.FloatTensor: 30 | 31 | x = self._graph_conv(X ,edge_index, edge_weight) 32 | 33 | return x 34 | 35 | 36 | -------------------------------------------------------------------------------- /cogdl/loggers/__init__.py: -------------------------------------------------------------------------------- 1 | from .base_logger import Logger 2 | 3 | 4 | def build_logger(logger, log_path="./runs", project="cogdl-exp"): 5 | if logger == "wandb": 6 | from .wandb_logger import WandbLogger 7 | 8 | return WandbLogger(log_path, project) 9 | elif logger == "tensorboard": 10 | from .tensorboard_logger import TBLogger 11 | 12 | return TBLogger(log_path) 13 | else: 14 | return Logger(log_path) 15 | -------------------------------------------------------------------------------- /cogdl/loggers/base_logger.py: -------------------------------------------------------------------------------- 1 | class Logger: 2 | def __init__(self, log_path): 3 | self.log_path = log_path 4 | 5 | def start(self): 6 | pass 7 | 8 | def note(self, metrics, step=None): 9 | pass 10 | 11 | def finish(self): 12 | pass 13 | -------------------------------------------------------------------------------- /cogdl/loggers/tensorboard_logger.py: -------------------------------------------------------------------------------- 1 | from tensorboardX import SummaryWriter 2 | 3 | from . import Logger 4 | 5 | 6 | class TBLogger(Logger): 7 | def __init__(self, log_path): 8 | super(TBLogger, self).__init__(log_path) 9 | self.last_step = 0 10 | 11 | def start(self): 12 | self.writer = SummaryWriter(logdir=self.log_path) 13 | 14 | def note(self, metrics, step=None): 15 | if not hasattr(self, "writer"): 16 | self.start() 17 | if step is None: 18 | step = self.last_step 19 | for key, value in metrics.items(): 20 | self.writer.add_scalar(key, value, step) 21 | self.last_step = step 22 | 23 | def finish(self): 24 | self.writer.close() 25 | -------------------------------------------------------------------------------- /cogdl/loggers/wandb_logger.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | from . import Logger 3 | 4 | try: 5 | import wandb 6 | except Exception: 7 | warnings.warn("Please install wandb first") 8 | 9 | 10 | class WandbLogger(Logger): 11 | def __init__(self, log_path, project=None): 12 | super(WandbLogger, self).__init__(log_path) 13 | self.last_step = 0 14 | self.project = project 15 | 16 | def start(self): 17 | self.run = wandb.init(reinit=True, dir=self.log_path, project=self.project) 18 | 19 | def note(self, metrics, step=None): 20 | if not hasattr(self, "run"): 21 | self.start() 22 | if step is None: 23 | step = self.last_step 24 | self.run.log(metrics, step=step) 25 | self.last_step = step 26 | 27 | def finish(self): 28 | self.run.finish() 29 | -------------------------------------------------------------------------------- /cogdl/models/base_model.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, Type, Any 2 | import torch.nn as nn 3 | 4 | 5 | class BaseModel(nn.Module): 6 | @staticmethod 7 | def add_args(parser): 8 | """Add model-specific arguments to the parser.""" 9 | pass 10 | 11 | @classmethod 12 | def build_model_from_args(cls, args): 13 | """Build a new model instance.""" 14 | raise NotImplementedError("Models must implement the build_model_from_args method") 15 | 16 | def __init__(self): 17 | super(BaseModel, self).__init__() 18 | self.model_name = self.__class__.__name__ 19 | self.loss_fn = None 20 | self.evaluator = None 21 | 22 | def _forward_unimplemented(self, *input: Any) -> None: # abc warning 23 | pass 24 | 25 | def forward(self, *args): 26 | raise NotImplementedError 27 | 28 | def predict(self, data): 29 | return self.forward(data) 30 | 31 | @property 32 | def device(self): 33 | return next(self.parameters()).device 34 | 35 | def set_loss_fn(self, loss_fn): 36 | self.loss_fn = loss_fn 37 | -------------------------------------------------------------------------------- /cogdl/models/emb/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/CogDL/281f47424d58844b167ccbe41d9829c1f77689f8/cogdl/models/emb/__init__.py -------------------------------------------------------------------------------- /cogdl/models/emb/complex.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import Tensor 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | 6 | from .. import BaseModel 7 | from .knowledge_base import KGEModel 8 | 9 | 10 | class ComplEx(KGEModel): 11 | r""" 12 | the implementation of ComplEx model from the paper `"Complex Embeddings for Simple Link Prediction"` 13 | borrowed from `KnowledgeGraphEmbedding` 14 | """ 15 | @staticmethod 16 | def add_args(parser): 17 | """Add model-specific arguments to the parser.""" 18 | parser.add_argument("--embedding_size", type=int, default=500, help="Dimensionality of embedded vectors") 19 | parser.add_argument("--gamma", type=float,default=12.0, help="Hyperparameter for embedding") 20 | parser.add_argument("--double_entity_embedding", default=True) 21 | parser.add_argument("--double_relation_embedding", default=True) 22 | 23 | def __init__( 24 | self, nentity, nrelation, hidden_dim, gamma, double_entity_embedding,double_relation_embedding 25 | ): 26 | super(ComplEx, self).__init__(nentity, nrelation, hidden_dim, gamma, double_entity_embedding, double_relation_embedding) 27 | 28 | 29 | def score(self, head, relation, tail, mode): 30 | re_head, im_head = torch.chunk(head, 2, dim=2) 31 | re_relation, im_relation = torch.chunk(relation, 2, dim=2) 32 | re_tail, im_tail = torch.chunk(tail, 2, dim=2) 33 | 34 | if mode == "head-batch": 35 | re_score = re_relation * re_tail + im_relation * im_tail 36 | im_score = re_relation * im_tail - im_relation * re_tail 37 | score = re_head * re_score + im_head * im_score 38 | else: 39 | re_score = re_head * re_relation - im_head * im_relation 40 | im_score = re_head * im_relation + im_head * re_relation 41 | score = re_score * re_tail + im_score * im_tail 42 | 43 | score = score.sum(dim=2) 44 | return score 45 | -------------------------------------------------------------------------------- /cogdl/models/emb/distmult.py: -------------------------------------------------------------------------------- 1 | from .. import BaseModel 2 | from .knowledge_base import KGEModel 3 | 4 | 5 | class DistMult(KGEModel): 6 | r"""The DistMult model from the ICLR 2015 paper `"EMBEDDING ENTITIES AND RELATIONS FOR LEARNING AND INFERENCE IN KNOWLEDGE BASES" 7 | ` 8 | borrowed from `KnowledgeGraphEmbedding` 9 | """ 10 | 11 | def __init__( 12 | self, nentity, nrelation, hidden_dim, gamma, double_entity_embedding, double_relation_embedding 13 | ): 14 | super(DistMult, self).__init__( 15 | nentity, nrelation, hidden_dim, gamma, double_entity_embedding, double_relation_embedding 16 | ) 17 | 18 | def score(self, head, relation, tail, mode): 19 | if mode == "head-batch": 20 | score = head * (relation * tail) 21 | else: 22 | score = (head * relation) * tail 23 | 24 | score = score.sum(dim=2) 25 | return score 26 | -------------------------------------------------------------------------------- /cogdl/models/emb/spectral.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import networkx as nx 3 | import scipy.sparse as sp 4 | from sklearn import preprocessing 5 | from .. import BaseModel 6 | 7 | 8 | class Spectral(BaseModel): 9 | r"""The Spectral clustering model from the `"Leveraging social media networks for classification" 10 | `_ paper 11 | 12 | Args: 13 | hidden_size (int) : The dimension of node representation. 14 | """ 15 | 16 | @staticmethod 17 | def add_args(parser): 18 | """Add model-specific arguments to the parser.""" 19 | # fmt: off 20 | parser.add_argument("--hidden-size", type=int, default=128) 21 | # fmt: on 22 | 23 | @classmethod 24 | def build_model_from_args(cls, args): 25 | return cls(args.hidden_size) 26 | 27 | def __init__(self, hidden_size): 28 | super(Spectral, self).__init__() 29 | self.dimension = hidden_size 30 | 31 | def forward(self, graph, return_dict=False): 32 | nx_g = graph.to_networkx() 33 | matrix = nx.normalized_laplacian_matrix(nx_g).todense() 34 | matrix = np.eye(matrix.shape[0]) - np.asarray(matrix) 35 | ut, s, _ = sp.linalg.svds(matrix, self.dimension) 36 | emb_matrix = ut * np.sqrt(s) 37 | embeddings = preprocessing.normalize(emb_matrix, "l2") 38 | 39 | if return_dict: 40 | features_matrix = dict() 41 | for vid, node in enumerate(nx_g.nodes()): 42 | features_matrix[node] = embeddings[vid] 43 | else: 44 | features_matrix = np.zeros((graph.num_nodes, embeddings.shape[1])) 45 | nx_nodes = nx_g.nodes() 46 | features_matrix[nx_nodes] = embeddings[np.arange(graph.num_nodes)] 47 | return features_matrix 48 | -------------------------------------------------------------------------------- /cogdl/models/emb/transe.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import Tensor 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | from .knowledge_base import KGEModel 6 | 7 | 8 | 9 | class TransE(KGEModel): 10 | r"""The TransE model from paper `"Translating Embeddings for Modeling Multi-relational Data" 11 | ` 12 | borrowed from `KnowledgeGraphEmbedding` 13 | """ 14 | 15 | def __init__( 16 | self, nentity, nrelation, hidden_dim, gamma, double_entity_embedding, double_relation_embedding 17 | ): 18 | super(TransE, self).__init__(nentity, nrelation, hidden_dim, gamma, double_entity_embedding, double_relation_embedding) 19 | 20 | 21 | 22 | def score(self, head, relation, tail, mode): 23 | if mode == "head-batch": 24 | score = head + (relation - tail) 25 | else: 26 | score = (head + relation) - tail 27 | 28 | score = self.gamma.item() - torch.norm(score, p=1, dim=2) 29 | return score -------------------------------------------------------------------------------- /cogdl/models/nn/__init__.py: -------------------------------------------------------------------------------- 1 | from .compgcn import CompGCN 2 | from .dgi import DGIModel 3 | from .disengcn import DisenGCN 4 | from .gcn import GCN 5 | from .gcnii import GCNII 6 | from .gdc_gcn import GDC_GCN 7 | from .grace import GRACE 8 | from .graphsage import Graphsage, SAGE 9 | from .mvgrl import MVGRL 10 | from .patchy_san import PatchySAN 11 | from .ppnp import PPNP 12 | from .rgcn import RGCN 13 | from .sgc import sgc 14 | from .revgcn import RevGCN, RevGEN, RevGAT 15 | from .deepergcn import DeeperGCN, ResGNNLayer 16 | from .stgcn import STGCN 17 | from .stgat import STGAT 18 | 19 | __all__ = [ 20 | "CompGCN", 21 | "DGIModel", 22 | "DisenGCN", 23 | "GCN", 24 | "GCNII", 25 | "GDC_GCN", 26 | "GRACE", 27 | "Graphsage", 28 | "MVGRL", 29 | "PatchySAN", 30 | "PPNP", 31 | "RGCN", 32 | "sgc", 33 | "RevGCN", 34 | "RevGAT", 35 | "RevGEN", 36 | "DeeperGCN", 37 | "ResGNNLayer", 38 | "SAGE", 39 | "STGCN", 40 | "STGAT", 41 | ] 42 | -------------------------------------------------------------------------------- /cogdl/models/nn/drgat.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch.nn.functional as F 3 | 4 | from cogdl.layers import SELayer, GATLayer 5 | 6 | from .. import BaseModel 7 | 8 | 9 | class DrGAT(BaseModel): 10 | @staticmethod 11 | def add_args(parser): 12 | """Add model-specific arguments to the parser.""" 13 | # fmt: off 14 | parser.add_argument("--num-features", type=int) 15 | parser.add_argument("--num-classes", type=int) 16 | parser.add_argument("--hidden-size", type=int, default=8) 17 | parser.add_argument("--nhead", type=int, default=8) 18 | parser.add_argument("--dropout", type=float, default=0.6) 19 | # fmt: on 20 | 21 | @classmethod 22 | def build_model_from_args(cls, args): 23 | return cls(args.num_features, args.num_classes, args.hidden_size, args.nhead, args.dropout,) 24 | 25 | def __init__(self, num_features, num_classes, hidden_size, num_heads, dropout): 26 | super(DrGAT, self).__init__() 27 | self.num_features = num_features 28 | self.num_classes = num_classes 29 | self.hidden_size = hidden_size 30 | self.num_heads = num_heads 31 | self.dropout = dropout 32 | self.conv1 = GATLayer(num_features, hidden_size, nhead=num_heads, attn_drop=dropout) 33 | self.conv2 = GATLayer(hidden_size * num_heads, num_classes, nhead=1, attn_drop=dropout) 34 | self.se1 = SELayer(num_features, se_channels=int(np.sqrt(num_features))) 35 | self.se2 = SELayer(hidden_size * num_heads, se_channels=int(np.sqrt(hidden_size * num_heads))) 36 | 37 | def forward(self, graph): 38 | x = graph.x 39 | x = F.dropout(x, p=self.dropout, training=self.training) 40 | x = self.se1(x) 41 | x = F.elu(self.conv1(graph, x)) 42 | x = F.dropout(x, p=self.dropout, training=self.training) 43 | x = self.se2(x) 44 | x = F.elu(self.conv2(graph, x)) 45 | return x 46 | -------------------------------------------------------------------------------- /cogdl/models/nn/han.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | from cogdl.layers import HANLayer 4 | 5 | from .. import BaseModel 6 | 7 | 8 | class HAN(BaseModel): 9 | @staticmethod 10 | def add_args(parser): 11 | """Add model-specific arguments to the parser.""" 12 | # fmt: off 13 | parser.add_argument("--num-features", type=int) 14 | parser.add_argument("--num-classes", type=int) 15 | parser.add_argument("--num-nodes", type=int) 16 | parser.add_argument("--hidden-size", type=int, default=64) 17 | parser.add_argument("--num-layers", type=int, default=2) 18 | parser.add_argument("--num-edge", type=int, default=2) 19 | # fmt: on 20 | 21 | @classmethod 22 | def build_model_from_args(cls, args): 23 | return cls( 24 | args.num_edge, args.num_features, args.hidden_size, args.num_classes, args.num_nodes, args.num_layers, 25 | ) 26 | 27 | def __init__(self, num_edge, w_in, w_out, num_class, num_nodes, num_layers): 28 | super(HAN, self).__init__() 29 | self.num_edge = num_edge 30 | self.num_nodes = num_nodes 31 | self.w_in = w_in 32 | self.w_out = w_out 33 | self.num_class = num_class 34 | self.num_layers = num_layers 35 | layers = [] 36 | for i in range(num_layers): 37 | if i == 0: 38 | layers.append(HANLayer(num_edge, w_in, w_out)) 39 | else: 40 | layers.append(HANLayer(num_edge, w_out, w_out)) 41 | 42 | self.layers = nn.ModuleList(layers) 43 | self.cross_entropy_loss = nn.CrossEntropyLoss() 44 | self.linear = nn.Linear(self.w_out, self.num_class) 45 | 46 | def forward(self, graph): 47 | X = graph.x 48 | for i in range(self.num_layers): 49 | X = self.layers[i](graph, X) 50 | 51 | out = self.linear(X) 52 | return out 53 | -------------------------------------------------------------------------------- /cogdl/models/nn/m3s.py: -------------------------------------------------------------------------------- 1 | import torch.nn.functional as F 2 | from cogdl.layers import GCNLayer 3 | 4 | from .. import BaseModel 5 | 6 | 7 | class M3S(BaseModel): 8 | @staticmethod 9 | def add_args(parser): 10 | """Add model-specific arguments to the parser.""" 11 | # fmt: off 12 | parser.add_argument("--num-features", type=int) 13 | parser.add_argument("--num-classes", type=int) 14 | parser.add_argument("--hidden-size", type=int, default=64) 15 | parser.add_argument("--dropout", type=float, default=0) 16 | parser.add_argument("--num-clusters", type=int, default=50) 17 | parser.add_argument("--num-stages", type=int, default=10) 18 | parser.add_argument("--epochs-per-stage", type=int, default=50) 19 | parser.add_argument("--label-rate", type=float, default=1) 20 | parser.add_argument("--num-new-labels", type=int, default=2) 21 | parser.add_argument("--alpha", type=float, default=1) 22 | parser.add_argument("--approximate", action="store_true") 23 | # fmt: on 24 | 25 | @classmethod 26 | def build_model_from_args(cls, args): 27 | return cls(args.num_features, args.hidden_size, args.num_classes, args.dropout,) 28 | 29 | def __init__(self, num_features, hidden_size, num_classes, dropout): 30 | super(M3S, self).__init__() 31 | self.dropout = dropout 32 | self.gcn1 = GCNLayer(num_features, hidden_size) 33 | self.gcn2 = GCNLayer(hidden_size, num_classes) 34 | 35 | def embed(self, graph): 36 | graph.sym_norm() 37 | h = graph.x 38 | h = self.gcn1(graph, h) 39 | h = F.relu(F.dropout(h, self.dropout, training=self.training)) 40 | return h.detach().cpu().numpy() 41 | 42 | def forward(self, graph): 43 | graph.sym_norm() 44 | h = graph.x 45 | h = self.gcn1(graph, h) 46 | h = F.dropout(F.relu(h), self.dropout, training=self.training) 47 | h = self.gcn2(graph, h) 48 | return h 49 | 50 | def predict(self, data): 51 | return self.forward(data) 52 | -------------------------------------------------------------------------------- /cogdl/models/nn/mixhop.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | 4 | from cogdl.layers import MixHopLayer 5 | 6 | from .. import BaseModel 7 | 8 | 9 | class MixHop(BaseModel): 10 | @staticmethod 11 | def add_args(parser): 12 | """Add model-specific arguments to the parser.""" 13 | # fmt: off 14 | parser.add_argument("--num-features", type=int) 15 | parser.add_argument("--num-classes", type=int) 16 | parser.add_argument("--dropout", type=float, default=0.7) 17 | parser.add_argument("--layer1-pows", type=int, nargs="+", default=[200, 200, 200]) 18 | parser.add_argument("--layer2-pows", type=int, nargs="+", default=[20, 20, 20]) 19 | # fmt: on 20 | 21 | @classmethod 22 | def build_model_from_args(cls, args): 23 | return cls(args.num_features, args.num_classes, args.dropout, args.layer1_pows, args.layer2_pows,) 24 | 25 | def __init__(self, num_features, num_classes, dropout, layer1_pows, layer2_pows): 26 | super(MixHop, self).__init__() 27 | 28 | self.dropout = dropout 29 | 30 | self.num_features = num_features 31 | self.num_classes = num_classes 32 | self.dropout = dropout 33 | layer_pows = [layer1_pows, layer2_pows] 34 | 35 | shapes = [num_features] + [sum(layer1_pows), sum(layer2_pows)] 36 | 37 | self.mixhops = nn.ModuleList( 38 | [MixHopLayer(shapes[layer], [0, 1, 2], layer_pows[layer]) for layer in range(len(layer_pows))] 39 | ) 40 | self.fc = nn.Linear(shapes[-1], num_classes) 41 | 42 | def forward(self, graph): 43 | x = graph.x 44 | for mixhop in self.mixhops: 45 | x = F.relu(mixhop(graph, x)) 46 | x = F.dropout(x, p=self.dropout, training=self.training) 47 | x = self.fc(x) 48 | return x 49 | 50 | def predict(self, data): 51 | return self.forward(data) 52 | -------------------------------------------------------------------------------- /cogdl/models/nn/mlp.py: -------------------------------------------------------------------------------- 1 | from .. import BaseModel 2 | from cogdl.layers import MLP as MLPLayer 3 | from cogdl.data import Graph 4 | 5 | 6 | class MLP(BaseModel): 7 | @staticmethod 8 | def add_args(parser): 9 | """Add model-specific arguments to the parser.""" 10 | # fmt: off 11 | parser.add_argument("--num-features", type=int) 12 | parser.add_argument("--num-classes", type=int) 13 | parser.add_argument("--hidden-size", type=int, default=16) 14 | parser.add_argument("--num-layers", type=int, default=2) 15 | parser.add_argument("--dropout", type=float, default=0.5) 16 | parser.add_argument("--norm", type=str, default=None) 17 | parser.add_argument("--activation", type=str, default="relu") 18 | # fmt: on 19 | 20 | @classmethod 21 | def build_model_from_args(cls, args): 22 | return cls( 23 | args.num_features, 24 | args.num_classes, 25 | args.hidden_size, 26 | args.num_layers, 27 | args.dropout, 28 | args.activation, 29 | args.norm, 30 | args.act_first if hasattr(args, "act_first") else False, 31 | ) 32 | 33 | def __init__( 34 | self, 35 | in_feats, 36 | out_feats, 37 | hidden_size, 38 | num_layers, 39 | dropout=0.0, 40 | activation="relu", 41 | norm=None, 42 | act_first=False, 43 | bias=True, 44 | ): 45 | super(MLP, self).__init__() 46 | self.nn = MLPLayer(in_feats, out_feats, hidden_size, num_layers, dropout, activation, norm, act_first, bias) 47 | 48 | def forward(self, x): 49 | if isinstance(x, Graph): 50 | x = x.x 51 | return self.nn(x) 52 | 53 | def predict(self, data): 54 | return self.forward(data.x) 55 | -------------------------------------------------------------------------------- /cogdl/models/nn/sgc.py: -------------------------------------------------------------------------------- 1 | from cogdl.layers import SGCLayer 2 | 3 | from .. import BaseModel 4 | 5 | 6 | class sgc(BaseModel): 7 | @staticmethod 8 | def add_args(parser): 9 | parser.add_argument("--num-features", type=int) 10 | parser.add_argument("--num-classes", type=int) 11 | 12 | @classmethod 13 | def build_model_from_args(cls, args): 14 | return cls(in_feats=args.num_features, out_feats=args.num_classes) 15 | 16 | def __init__(self, in_feats, out_feats): 17 | super(sgc, self).__init__() 18 | self.nn = SGCLayer(in_feats, out_feats) 19 | self.cache = dict() 20 | 21 | def forward(self, graph): 22 | graph.sym_norm() 23 | 24 | x = self.nn(graph, graph.x) 25 | return x 26 | 27 | def predict(self, data): 28 | return self.forward(data) 29 | -------------------------------------------------------------------------------- /cogdl/oag/__init__.py: -------------------------------------------------------------------------------- 1 | from .oagbert import oagbert 2 | 3 | __all__ = [ 4 | "oagbert", 5 | ] 6 | -------------------------------------------------------------------------------- /cogdl/oag/utils.py: -------------------------------------------------------------------------------- 1 | import unicodedata 2 | from tqdm import tqdm 3 | 4 | COLORCODES = { 5 | "black": "\x1b[30m", 6 | "red": "\x1b[31m", 7 | "green": "\x1b[32m", 8 | "yellow": "\x1b[33m", 9 | "blue": "\x1b[34m", 10 | "magenta": "\x1b[35m", 11 | "cyan": "\x1b[36m", 12 | "white": "\x1b[37m", 13 | "reset": "\x1b[0m", 14 | } 15 | 16 | 17 | def colored(text, color): 18 | return COLORCODES.get(color, "") + text + COLORCODES.get("reset", "") 19 | 20 | 21 | OAG_TOKEN_TYPE_NAMES = ["TEXT", "AUTHOR", "VENUE", "AFF", "FOS", "FUND"] 22 | 23 | 24 | def stringLenCJK(string): 25 | return sum(1 + (unicodedata.east_asian_width(c) in "WF") for c in string) 26 | 27 | 28 | def stringRjustCJK(string, length): 29 | return " " * (length - stringLenCJK(string)) + string 30 | -------------------------------------------------------------------------------- /cogdl/operators/__init__.py: -------------------------------------------------------------------------------- 1 | from .ops import ( 2 | s_add_e, 3 | s_mul_e, 4 | s_sub_e, 5 | s_add_e_sum, 6 | s_mul_e_sum, 7 | s_sub_e_sum, 8 | s_add_e_mean, 9 | s_mul_e_mean, 10 | s_sub_e_mean, 11 | s_add_t, 12 | s_mul_t, 13 | s_dot_t, 14 | s_sub_t, 15 | s_div_t, 16 | ) 17 | -------------------------------------------------------------------------------- /cogdl/operators/edge_softmax.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | from torch.utils.cpp_extension import load 4 | 5 | path = os.path.join(os.path.dirname(__file__)) 6 | 7 | try: 8 | edge_softmax = load( 9 | name="edge_softmax", 10 | sources=[ 11 | os.path.join(path, "edge_softmax/edge_softmax.cc"), 12 | os.path.join(path, "edge_softmax/edge_softmax.cu"), 13 | ], 14 | verbose=False, 15 | ) 16 | 17 | def csr_edge_softmax(rowptr, h): 18 | return EdgeSoftmaxFunction.apply(rowptr, h) 19 | 20 | 21 | except Exception: 22 | edge_softmax = None 23 | csr_edge_softmax = None 24 | 25 | 26 | class EdgeSoftmaxFunction(torch.autograd.Function): 27 | @staticmethod 28 | def forward(ctx, rowptr, h): 29 | out = edge_softmax.edge_softmax(rowptr, h) 30 | ctx.save_for_backward(rowptr, out) 31 | return out 32 | 33 | @staticmethod 34 | def backward(ctx, grad_out): 35 | rowptr, out = ctx.saved_tensors 36 | grad_out = grad_out.contiguous() 37 | grad_softmax = edge_softmax.edge_softmax_backward(rowptr, out, grad_out) 38 | return None, grad_softmax 39 | -------------------------------------------------------------------------------- /cogdl/operators/edge_softmax/edge_softmax.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | torch::Tensor edge_softmax_cuda( 8 | torch::Tensor rowptr, 9 | torch::Tensor weight); 10 | 11 | torch::Tensor edge_softmax_backward_cuda( 12 | torch::Tensor rowptr, 13 | torch::Tensor softmax, 14 | torch::Tensor grad); 15 | 16 | torch::Tensor edge_softmax( 17 | torch::Tensor rowptr, 18 | torch::Tensor weight) 19 | { 20 | assert(rowptr.device().type() == torch::kCUDA); 21 | assert(weight.device().type() == torch::kCUDA); 22 | assert(rowptr.is_contiguous()); 23 | assert(weight.is_contiguous()); 24 | assert(rowptr.dtype() == torch::kInt32); 25 | assert(weight.dtype() == torch::kFloat32); 26 | const at::cuda::OptionalCUDAGuard device_guard1(device_of(rowptr)); 27 | const at::cuda::OptionalCUDAGuard device_guard2(device_of(weight)); 28 | return edge_softmax_cuda(rowptr, weight); 29 | } 30 | 31 | torch::Tensor edge_softmax_backward( 32 | torch::Tensor rowptr, 33 | torch::Tensor softmax, 34 | torch::Tensor grad) 35 | { 36 | assert(rowptr.device().type() == torch::kCUDA); 37 | assert(softmax.device().type() == torch::kCUDA); 38 | assert(grad.device().type() == torch::kCUDA); 39 | assert(rowptr.is_contiguous()); 40 | assert(softmax.is_contiguous()); 41 | assert(grad.is_contiguous()); 42 | assert(rowptr.dtype() == torch::kInt32); 43 | assert(softmax.dtype() == torch::kFloat32); 44 | assert(grad.dtype() == torch::kFloat32); 45 | const at::cuda::OptionalCUDAGuard device_guard1(device_of(rowptr)); 46 | const at::cuda::OptionalCUDAGuard device_guard2(device_of(softmax)); 47 | const at::cuda::OptionalCUDAGuard device_guard3(device_of(grad)); 48 | return edge_softmax_backward_cuda(rowptr, softmax, grad); 49 | } 50 | 51 | PYBIND11_MODULE(edge_softmax, m) 52 | { 53 | m.doc() = "edgeSoftmax in CSR format. "; 54 | m.def("edge_softmax", &edge_softmax, "CSR edgeSoftmax"); 55 | m.def("edge_softmax_backward", &edge_softmax_backward, "CSR edgeSoftmax backward"); 56 | } -------------------------------------------------------------------------------- /cogdl/operators/fused_gat.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | try: 4 | import fused_gatconv 5 | 6 | def fused_gat_func(attn_row, attn_col, row_ptr, col_ind, col_ptr, row_ind, negative_slope, in_feat): 7 | return FusedGATFunction.apply(attn_row, attn_col, row_ptr, col_ind, col_ptr, row_ind, negative_slope, in_feat) 8 | 9 | 10 | except Exception: 11 | fused_gat_func = None 12 | 13 | 14 | class FusedGATFunction(torch.autograd.Function): 15 | @staticmethod 16 | def forward(ctx, attn_row, attn_col, row_ptr, col_ind, col_ptr, row_ind, negative_slope, in_feat): 17 | out_feat, edge_max, edge_sum = fused_gatconv.gat_forward( 18 | attn_row, attn_col, row_ptr, col_ind, negative_slope, in_feat 19 | ) 20 | ctx.save_for_backward(row_ptr, col_ind, col_ptr, row_ind, edge_max, edge_sum, in_feat, attn_row, attn_col) 21 | ctx.negative_slope = negative_slope 22 | return out_feat 23 | 24 | @staticmethod 25 | def backward(ctx, grad_out): 26 | row_ptr, col_ind, col_ptr, row_ind, edge_max, edge_sum, in_feat, attn_row, attn_col = ctx.saved_tensors 27 | grad_out = grad_out.contiguous() 28 | grad_feat, grad_attn_row, grad_attn_col = fused_gatconv.gat_backward( 29 | ctx.negative_slope, 30 | row_ptr, 31 | col_ind, 32 | col_ptr, 33 | row_ind, 34 | edge_max, 35 | edge_sum, 36 | in_feat, 37 | attn_row, 38 | attn_col, 39 | grad_out, 40 | ) 41 | return grad_attn_row, grad_attn_col, None, None, None, None, None, grad_feat, None 42 | -------------------------------------------------------------------------------- /cogdl/operators/jt_spmm.py: -------------------------------------------------------------------------------- 1 | import os 2 | import jittor as jt 3 | 4 | from jittor import Function 5 | from jittor.compiler import compile_torch_extensions 6 | 7 | jt.flags.use_cuda = 1 8 | cached_op = {"csr_spmm": None} 9 | 10 | 11 | def tensor2jit(x): 12 | return jt.array(x.cpu().numpy()) 13 | 14 | 15 | def init_spmm_ops(): 16 | if cached_op["csr_spmm"] is None: 17 | op_path = os.path.abspath(__file__) 18 | spmm_path = os.path.join(os.path.dirname(op_path), "spmm/spmm.cpp") 19 | spmm_cu_path = os.path.join(os.path.dirname(op_path), "spmm/spmm_kernel.cu") 20 | compile_torch_extensions("spmm", [spmm_path, spmm_cu_path], 1, 1) 21 | from spmm import csr_spmm 22 | 23 | cached_op["csr_spmm"] = csr_spmm 24 | 25 | 26 | def spmm(graph, x): 27 | row_ptr, col_indices = graph.row_indptr, graph.col_indices 28 | csr_data = graph.edge_weight 29 | spmm = SPMM() 30 | x = spmm(tensor2jit(row_ptr.int()), tensor2jit(col_indices.int()), x, tensor2jit(csr_data)) 31 | return x 32 | 33 | 34 | class SPMM(Function): 35 | def execute(self, rowptr, colind, feat, edge_weight_csr=None): 36 | init_spmm_ops() 37 | self.csr_spmm = cached_op["csr_spmm"] 38 | 39 | out = self.csr_spmm(rowptr, colind, edge_weight_csr, feat) 40 | self.backward_csc = (rowptr, colind, edge_weight_csr) 41 | return out 42 | 43 | def grad(self, grad_out): 44 | rowptr, colind, edge_weight_csr = self.backward_csc 45 | colptr, rowind, edge_weight_csc = rowptr, colind, edge_weight_csr 46 | grad_feat = self.csr_spmm(colptr, rowind, edge_weight_csc, grad_out) 47 | 48 | return None, None, grad_feat, None 49 | -------------------------------------------------------------------------------- /cogdl/operators/sample.py: -------------------------------------------------------------------------------- 1 | import os 2 | from torch.utils.cpp_extension import load 3 | 4 | path = os.path.join(os.path.dirname(__file__)) 5 | 6 | # subgraph and sample_adj 7 | try: 8 | sample = load(name="sampler", sources=[os.path.join(path, "sample/sample.cpp")], verbose=False) 9 | subgraph_c = sample.subgraph 10 | sample_adj_c = sample.sample_adj 11 | coo2csr_cpu = sample.coo2csr_cpu 12 | coo2csr_cpu_index = sample.coo2csr_cpu_index 13 | except Exception: 14 | subgraph_c = None 15 | sample_adj_c = None 16 | coo2csr_cpu_index = None 17 | coo2csr_cpu = None 18 | -------------------------------------------------------------------------------- /cogdl/operators/scatter_max.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import torch 4 | from torch.utils.cpp_extension import load 5 | 6 | path = os.path.join(os.path.dirname(__file__)) 7 | 8 | # SPMM 9 | 10 | try: 11 | spmm_max = load( 12 | name="scatter_max", 13 | sources=[os.path.join(path, "scatter_max/scatter_max.cc"), os.path.join(path, "scatter_max/scatter_max.cu")], 14 | verbose=True, 15 | ) 16 | 17 | def scatter_max(rowptr, colind, feat): 18 | return ScatterMaxFunction.apply(rowptr, colind, feat) 19 | 20 | 21 | except Exception: 22 | spmm_max = None 23 | 24 | 25 | class ScatterMaxFunction(torch.autograd.Function): 26 | @staticmethod 27 | def forward(ctx, rowptr, colind, feat): 28 | out, max_id = spmm_max.scatter_max_fp(rowptr, colind, feat) 29 | ctx.save_for_backward(max_id) 30 | return out 31 | 32 | @staticmethod 33 | def backward(ctx, grad): 34 | grad = grad.contiguous() 35 | max_id = ctx.saved_tensors[0] 36 | out = spmm_max.scatter_max_bp(grad, max_id) 37 | return None, None, out 38 | -------------------------------------------------------------------------------- /cogdl/operators/scatter_max/scatter_max.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | void assertTensor(torch::Tensor &T, c10::ScalarType type) { 7 | assert(T.is_contiguous()); 8 | assert(T.device().type() == torch::kCUDA); 9 | assert(T.dtype() == type); 10 | } 11 | 12 | std::vector scatter_max_fp_cuda(torch::Tensor rowptr, 13 | torch::Tensor colind, 14 | torch::Tensor node_feature); 15 | 16 | torch::Tensor scatter_max_bp_cuda(torch::Tensor node_feature, 17 | torch::Tensor max_mask); 18 | 19 | std::vector scatter_max(torch::Tensor rowptr, 20 | torch::Tensor colind, 21 | torch::Tensor node_feature) { 22 | assertTensor(rowptr, torch::kInt32); 23 | assertTensor(colind, torch::kInt32); 24 | assertTensor(node_feature, torch::kFloat32); 25 | const at::cuda::OptionalCUDAGuard device_guard1(device_of(rowptr)); 26 | const at::cuda::OptionalCUDAGuard device_guard2(device_of(colind)); 27 | const at::cuda::OptionalCUDAGuard device_guard3(device_of(node_feature)); 28 | return scatter_max_fp_cuda(rowptr, colind, node_feature); 29 | } 30 | 31 | torch::Tensor scatter_max_bp(torch::Tensor node_feature, 32 | torch::Tensor max_mask) { 33 | assertTensor(node_feature, torch::kFloat32); 34 | assertTensor(max_mask, torch::kInt32); 35 | const at::cuda::OptionalCUDAGuard device_guard1(device_of(node_feature)); 36 | const at::cuda::OptionalCUDAGuard device_guard2(device_of(max_mask)); 37 | return scatter_max_bp_cuda(node_feature, max_mask); 38 | } 39 | 40 | PYBIND11_MODULE(scatter_max, m) { 41 | m.doc() = "scatter max kernel"; 42 | m.def("scatter_max_fp", &scatter_max, "scatter max forward"); 43 | m.def("scatter_max_bp", &scatter_max_bp, "scatter max backward"); 44 | } 45 | -------------------------------------------------------------------------------- /cogdl/operators/spmm/mhTranspose.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | torch::Tensor mhtranspose_cuda( 8 | torch::Tensor permute, 9 | torch::Tensor attention // E * H 10 | ); 11 | 12 | torch::Tensor mhtranspose( 13 | torch::Tensor permute, 14 | torch::Tensor attention) 15 | { 16 | assert(permute.device().type() == torch::kCUDA); 17 | assert(attention.device().type() == torch::kCUDA); 18 | assert(permute.is_contiguous()); 19 | assert(attention.is_contiguous()); 20 | assert(permute.dtype() == torch::kInt32); 21 | assert(attention.dtype() == torch::kFloat32); 22 | const at::cuda::OptionalCUDAGuard device_guard1(device_of(permute)); 23 | const at::cuda::OptionalCUDAGuard device_guard2(device_of(attention)); 24 | return mhtranspose_cuda(permute, attention); 25 | } 26 | 27 | std::vector csr2csc_cuda( 28 | torch::Tensor csrRowPtr, 29 | torch::Tensor csrColInd, 30 | torch::Tensor csrVal); 31 | 32 | std::vector csr2csc( 33 | torch::Tensor rowptr, 34 | torch::Tensor colind, 35 | torch::Tensor csr_data) 36 | { 37 | assert(rowptr.device().type() == torch::kCUDA); 38 | assert(colind.device().type() == torch::kCUDA); 39 | assert(csr_data.device().type() == torch::kCUDA); 40 | assert(rowptr.is_contiguous()); 41 | assert(colind.is_contiguous()); 42 | assert(csr_data.is_contiguous()); 43 | assert(rowptr.dtype() == torch::kInt32); 44 | assert(colind.dtype() == torch::kInt32); 45 | assert(csr_data.dtype() == torch::kInt32); 46 | const at::cuda::OptionalCUDAGuard device_guard1(device_of(rowptr)); 47 | const at::cuda::OptionalCUDAGuard device_guard2(device_of(colind)); 48 | const at::cuda::OptionalCUDAGuard device_guard3(device_of(csr_data)); 49 | return csr2csc_cuda(rowptr, colind, csr_data); 50 | } 51 | 52 | PYBIND11_MODULE(mhtranspose, m) 53 | { 54 | m.doc() = "mhtranspose in CSR format. "; 55 | m.def("mhtranspose", &mhtranspose, "CSR mhsddmm"); 56 | m.def("csr2csc", &csr2csc, "csr2csc"); 57 | } 58 | -------------------------------------------------------------------------------- /cogdl/operators/spmm/multiheadSddmm.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | torch::Tensor mhsddmm_cuda( 8 | torch::Tensor rowptr, 9 | torch::Tensor colind, 10 | torch::Tensor grad, // V * H * F 11 | torch::Tensor feature // V * H * F 12 | ); 13 | 14 | torch::Tensor mhsddmm( 15 | torch::Tensor rowptr, 16 | torch::Tensor colind, 17 | torch::Tensor grad, // V * H * F 18 | torch::Tensor feature // V * H * F 19 | ) 20 | { 21 | assert(rowptr.device().type() == torch::kCUDA); 22 | assert(colind.device().type() == torch::kCUDA); 23 | assert(grad.device().type() == torch::kCUDA); 24 | assert(feature.device().type() == torch::kCUDA); 25 | assert(rowptr.is_contiguous()); 26 | assert(colind.is_contiguous()); 27 | assert(grad.is_contiguous()); 28 | assert(feature.is_contiguous()); 29 | assert(rowptr.dtype() == torch::kInt32); 30 | assert(colind.dtype() == torch::kInt32); 31 | assert(grad.dtype() == torch::kFloat32); 32 | assert(feature.dtype() == torch::kFloat32); 33 | const at::cuda::OptionalCUDAGuard device_guard1(device_of(rowptr)); 34 | const at::cuda::OptionalCUDAGuard device_guard2(device_of(colind)); 35 | const at::cuda::OptionalCUDAGuard device_guard3(device_of(grad)); 36 | const at::cuda::OptionalCUDAGuard device_guard4(device_of(feature)); 37 | return mhsddmm_cuda(rowptr, colind, grad, feature); 38 | } 39 | 40 | PYBIND11_MODULE(mhsddmm, m) 41 | { 42 | m.doc() = "mhsddmm in CSR format. "; 43 | m.def("mhsddmm", &mhsddmm, "CSR mhsddmm"); 44 | } 45 | -------------------------------------------------------------------------------- /cogdl/operators/spmm/multiheadSpmm.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | torch::Tensor mhspmm_cuda( 8 | torch::Tensor rowptr, 9 | torch::Tensor colind, 10 | torch::Tensor attention, 11 | torch::Tensor infeat); 12 | 13 | torch::Tensor mhspmm( 14 | torch::Tensor rowptr, 15 | torch::Tensor colind, 16 | torch::Tensor attention, 17 | torch::Tensor infeat) 18 | { 19 | assert(rowptr.device().type() == torch::kCUDA); 20 | assert(colind.device().type() == torch::kCUDA); 21 | assert(attention.device().type() == torch::kCUDA); 22 | assert(infeat.device().type() == torch::kCUDA); 23 | assert(rowptr.is_contiguous()); 24 | assert(colind.is_contiguous()); 25 | assert(attention.is_contiguous()); 26 | assert(infeat.is_contiguous()); 27 | assert(rowptr.dtype() == torch::kInt32); 28 | assert(colind.dtype() == torch::kInt32); 29 | assert(attention.dtype() == torch::kFloat32); 30 | assert(infeat.dtype() == torch::kFloat32); 31 | const at::cuda::OptionalCUDAGuard device_guard1(device_of(rowptr)); 32 | const at::cuda::OptionalCUDAGuard device_guard2(device_of(colind)); 33 | const at::cuda::OptionalCUDAGuard device_guard3(device_of(attention)); 34 | const at::cuda::OptionalCUDAGuard device_guard4(device_of(infeat)); 35 | return mhspmm_cuda(rowptr, colind, attention, infeat); 36 | } 37 | 38 | PYBIND11_MODULE(mhspmm, m) 39 | { 40 | m.doc() = "mhtranspose in CSR format. "; 41 | m.def("mhspmm", &mhspmm, "CSR mhsddmm"); 42 | } 43 | -------------------------------------------------------------------------------- /cogdl/trainer/__init__.py: -------------------------------------------------------------------------------- 1 | from .trainer import Trainer 2 | -------------------------------------------------------------------------------- /cogdl/trainer/controller/__init__.py: -------------------------------------------------------------------------------- 1 | from .data_controller import DataController 2 | from .training_controller import TrainingController 3 | -------------------------------------------------------------------------------- /cogdl/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .utils import * 2 | from .evaluator import * 3 | from .sampling import * 4 | from .graph_utils import * 5 | from .spmm_utils import * 6 | from .transform import * 7 | -------------------------------------------------------------------------------- /cogdl/utils/index.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from .spmm_utils import spmm 3 | 4 | 5 | @torch.no_grad() 6 | def homo_index(g, x): 7 | with g.local_graph(): 8 | g.remove_self_loops() 9 | neighbors = spmm(g, x) 10 | deg = g.degrees() 11 | isolated_nodes = deg == 0 12 | diff = (x - neighbors).norm(2, dim=-1) 13 | diff = diff.mean(1) 14 | diff = diff[~isolated_nodes] 15 | return torch.mean(diff) 16 | 17 | 18 | @torch.no_grad() 19 | def mad_index(g, x): 20 | row, col = g.edge_index 21 | self_loop = row == col 22 | mask = ~self_loop 23 | row = row[mask] 24 | col = col[mask] 25 | 26 | src, tgt = x[col], x[row] 27 | sim = (src * tgt).sum(dim=1) 28 | src_size = src.norm(p=2, dim=1) 29 | tgt_size = tgt.norm(p=2, dim=1) 30 | distance = 1 - sim / (src_size * tgt_size) 31 | 32 | N = g.num_nodes 33 | 34 | deg = g.degrees() - 1 35 | out = torch.zeros((N,), dtype=torch.float, device=x.device) 36 | out = out.scatter_add_(index=row, dim=0, src=distance) 37 | deg_inv = deg.pow(-1) 38 | deg_inv[torch.isinf(deg_inv)] = 1 39 | dis = out * deg_inv 40 | dis = dis[dis > 0] 41 | return torch.mean(dis).item() 42 | -------------------------------------------------------------------------------- /cogdl/utils/rwalk/Makefile: -------------------------------------------------------------------------------- 1 | CC?=gcc # Set compiler if CC is not set 2 | CFLAGS= -fopenmp -fPIC -O3 -D NDEBUG -Wall -Werror 3 | 4 | all: librwalk.so 5 | 6 | librwalk.so: rwalk.o 7 | $(CC) $(CFLAGS) -shared -Wl,-soname,librwalk.so -o librwalk.so rwalk.o 8 | rm rwalk.o 9 | 10 | rwalk.o: rwalk.c 11 | $(CC) -c $(CFLAGS) rwalk.c -o rwalk.o 12 | 13 | clean : 14 | rm -rf librwalk.so rwalk.o __pycache__ 15 | -------------------------------------------------------------------------------- /cogdl/utils/rwalk/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | """ 4 | import numpy as np 5 | import numpy.ctypeslib as npct 6 | from ctypes import c_float, c_int 7 | from os.path import dirname 8 | 9 | array_1d_int = npct.ndpointer(dtype=np.int32, ndim=1, flags="CONTIGUOUS") 10 | 11 | librwalk = npct.load_library("librwalk", dirname(__file__)) 12 | 13 | # print("rwalk: Loading library from: {}".format(dirname(__file__))) 14 | # librwalk.random_walk.restype = None 15 | # librwalk.random_walk.argtypes = [array_1d_int, array_1d_int, c_int, c_int, c_int, c_int, c_int, array_1d_int] 16 | 17 | librwalk.random_walk.restype = None 18 | librwalk.random_walk.argtypes = [ 19 | array_1d_int, 20 | array_1d_int, 21 | array_1d_int, 22 | c_int, 23 | c_int, 24 | c_int, 25 | c_int, 26 | c_int, 27 | c_float, 28 | array_1d_int, 29 | ] 30 | 31 | 32 | def random_walk(nodes, ptr, neighs, num_walks=1, num_steps=1, nthread=-1, seed=111413, restart_prob=0.0): 33 | assert ptr.flags["C_CONTIGUOUS"] 34 | assert neighs.flags["C_CONTIGUOUS"] 35 | assert ptr.dtype == np.int32 36 | assert neighs.dtype == np.int32 37 | assert nodes.dtype == np.int32 38 | n = nodes.size 39 | walks = -np.ones((n * num_walks, (num_steps + 1)), dtype=np.int32, order="C") 40 | assert walks.flags["C_CONTIGUOUS"] 41 | 42 | librwalk.random_walk( 43 | nodes, 44 | ptr, 45 | neighs, 46 | n, 47 | num_walks, 48 | num_steps, 49 | seed, 50 | nthread, 51 | restart_prob, 52 | np.reshape(walks, (walks.size,), order="C"), 53 | ) 54 | 55 | return walks 56 | -------------------------------------------------------------------------------- /cogdl/utils/rwalk/rwalk.c: -------------------------------------------------------------------------------- 1 | #include "rwalk.h" 2 | #include 3 | #include 4 | 5 | 6 | void random_walk(int const* starts, int const* ptr, int const* neighs, int n, int num_walks, 7 | int num_steps, int seed, int nthread, float restart_prop, int* walks) { 8 | if (nthread > 0) { 9 | omp_set_num_threads(nthread); 10 | } 11 | #pragma omp parallel 12 | { 13 | int thread_num = omp_get_thread_num(); 14 | unsigned int private_seed = (unsigned int)(seed + thread_num); 15 | #pragma omp for 16 | for (int i = 0; i < n; i++) { 17 | int offset, num_neighs; 18 | for (int walk = 0; walk < num_walks; walk++) { 19 | // int curr = i; 20 | int curr = starts[i]; 21 | offset = i * num_walks * (num_steps + 1) + walk * (num_steps + 1); 22 | walks[offset] = starts[i]; 23 | for (int step = 0; step < num_steps; step++) { 24 | num_neighs = ptr[curr + 1] - ptr[curr]; 25 | 26 | if((restart_prop > 0) && (rand_r(&private_seed) / (double)RAND_MAX < restart_prop)){ 27 | curr = starts[i]; 28 | } else { 29 | if (num_neighs > 0) { 30 | curr = neighs[ptr[curr] + (rand_r(&private_seed) % num_neighs)]; 31 | } 32 | } 33 | walks[offset + step + 1] = curr; 34 | } 35 | 36 | } 37 | } 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /cogdl/utils/rwalk/rwalk.h: -------------------------------------------------------------------------------- 1 | void random_walk(int const* starts, int const* ptr, int const* neighs, int n, int num_walks, 2 | int num_steps, int seed, int nthread, float restart_prop, int* walks); -------------------------------------------------------------------------------- /cogdl/wrappers/__init__.py: -------------------------------------------------------------------------------- 1 | from .data_wrapper import register_data_wrapper, fetch_data_wrapper 2 | from .model_wrapper import ( 3 | register_model_wrapper, 4 | fetch_model_wrapper, 5 | ModelWrapper, 6 | EmbeddingModelWrapper, 7 | ) 8 | -------------------------------------------------------------------------------- /cogdl/wrappers/data_wrapper/graph_classification/__init__.py: -------------------------------------------------------------------------------- 1 | from .graph_classification_dw import GraphClassificationDataWrapper 2 | from .graph_embedding_dw import GraphEmbeddingDataWrapper 3 | from .infograph_dw import InfoGraphDataWrapper 4 | from .patchy_san_dw import PATCHY_SAN_DataWrapper 5 | -------------------------------------------------------------------------------- /cogdl/wrappers/data_wrapper/graph_classification/graph_classification_dw.py: -------------------------------------------------------------------------------- 1 | from .. import DataWrapper 2 | from cogdl.wrappers.tools.wrapper_utils import node_degree_as_feature, split_dataset 3 | from cogdl.data import DataLoader 4 | 5 | 6 | class GraphClassificationDataWrapper(DataWrapper): 7 | @staticmethod 8 | def add_args(parser): 9 | # fmt: off 10 | parser.add_argument("--degree-node-features", action="store_true", 11 | help="Use one-hot degree vector as input node features") 12 | # parser.add_argument("--kfold", action="store_true", help="Use 10-fold cross-validation") 13 | parser.add_argument("--train-ratio", type=float, default=0.5) 14 | parser.add_argument("--test-ratio", type=float, default=0.3) 15 | parser.add_argument("--batch-size", type=int, default=16) 16 | # fmt: on 17 | 18 | def __init__(self, dataset, degree_node_features=False, batch_size=32, train_ratio=0.5, test_ratio=0.3): 19 | super(GraphClassificationDataWrapper, self).__init__(dataset) 20 | self.dataset = dataset 21 | self.degree_node_features = degree_node_features 22 | self.train_ratio = train_ratio 23 | self.test_ratio = test_ratio 24 | self.batch_size = batch_size 25 | self.split_idx = None 26 | 27 | self.setup_node_features() 28 | 29 | def train_wrapper(self): 30 | return DataLoader(self.dataset[self.split_idx[0]], batch_size=self.batch_size, shuffle=True, num_workers=4) 31 | 32 | def val_wrapper(self): 33 | if self.split_idx[1] is not None: 34 | return DataLoader(self.dataset[self.split_idx[1]], batch_size=self.batch_size, shuffle=False, num_workers=4) 35 | 36 | def test_wrapper(self): 37 | return DataLoader(self.dataset[self.split_idx[2]], batch_size=self.batch_size, shuffle=False, num_workers=4) 38 | 39 | def setup_node_features(self): 40 | if self.degree_node_features or self.dataset.data[0].x is None: 41 | self.dataset.data = node_degree_as_feature(self.dataset.data) 42 | train_idx, val_idx, test_idx = split_dataset(len(self.dataset), self.train_ratio, self.test_ratio) 43 | self.split_idx = [train_idx, val_idx, test_idx] 44 | -------------------------------------------------------------------------------- /cogdl/wrappers/data_wrapper/graph_classification/graph_embedding_dw.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from .. import DataWrapper 4 | from cogdl.wrappers.tools.wrapper_utils import node_degree_as_feature 5 | 6 | 7 | class GraphEmbeddingDataWrapper(DataWrapper): 8 | @staticmethod 9 | def add_args(parser): 10 | # fmt: off 11 | parser.add_argument("--degree-node-features", action="store_true", 12 | help="Use one-hot degree vector as input node features") 13 | # fmt: on 14 | 15 | def __init__(self, dataset, degree_node_features=False): 16 | super(GraphEmbeddingDataWrapper, self).__init__(dataset) 17 | self.dataset = dataset 18 | self.degree_node_features = degree_node_features 19 | 20 | def train_wrapper(self): 21 | return self.dataset 22 | 23 | def test_wrapper(self): 24 | if self.dataset[0].y.shape[0] > 1: 25 | return np.array([g.y.numpy() for g in self.dataset]) 26 | else: 27 | return np.array([g.y.item() for g in self.dataset]) 28 | 29 | def pre_transform(self): 30 | if self.degree_node_features: 31 | self.dataset = node_degree_as_feature(self.dataset) 32 | -------------------------------------------------------------------------------- /cogdl/wrappers/data_wrapper/graph_classification/infograph_dw.py: -------------------------------------------------------------------------------- 1 | from .graph_classification_dw import GraphClassificationDataWrapper 2 | 3 | 4 | class InfoGraphDataWrapper(GraphClassificationDataWrapper): 5 | def test_wrapper(self): 6 | return self.dataset 7 | -------------------------------------------------------------------------------- /cogdl/wrappers/data_wrapper/graph_classification/patchy_san_dw.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from .graph_classification_dw import GraphClassificationDataWrapper 4 | from cogdl.models.nn.patchy_san import get_single_feature 5 | 6 | 7 | class PATCHY_SAN_DataWrapper(GraphClassificationDataWrapper): 8 | @staticmethod 9 | def add_args(parser): 10 | GraphClassificationDataWrapper.add_args(parser) 11 | parser.add_argument("--num-sample", default=30, type=int, help="Number of chosen vertexes") 12 | parser.add_argument("--num-neighbor", default=10, type=int, help="Number of neighbor in constructing features") 13 | parser.add_argument("--stride", default=1, type=int, help="Stride of chosen vertexes") 14 | 15 | def __init__(self, dataset, num_sample, num_neighbor, stride, *args, **kwargs): 16 | super(PATCHY_SAN_DataWrapper, self).__init__(dataset, *args, **kwargs) 17 | self.sample = num_sample 18 | self.dataset = dataset 19 | self.neighbor = num_neighbor 20 | self.stride = stride 21 | 22 | def pre_transform(self): 23 | dataset = self.dataset 24 | num_features = dataset.num_features 25 | num_classes = dataset.num_classes 26 | for i, data in enumerate(dataset): 27 | new_feature = get_single_feature( 28 | dataset[i], num_features, num_classes, self.sample, self.neighbor, self.stride 29 | ) 30 | dataset[i].x = torch.from_numpy(new_feature) 31 | self.dataset = dataset 32 | super(PATCHY_SAN_DataWrapper, self).pre_transform() 33 | -------------------------------------------------------------------------------- /cogdl/wrappers/data_wrapper/heterogeneous/__init__.py: -------------------------------------------------------------------------------- 1 | from .heterogeneous_embedding_dw import HeterogeneousEmbeddingDataWrapper 2 | from .heterogeneous_gnn_dw import HeterogeneousGNNDataWrapper 3 | from .multiplex_embedding_dw import MultiplexEmbeddingDataWrapper 4 | -------------------------------------------------------------------------------- /cogdl/wrappers/data_wrapper/heterogeneous/heterogeneous_embedding_dw.py: -------------------------------------------------------------------------------- 1 | from .. import DataWrapper 2 | 3 | 4 | class HeterogeneousEmbeddingDataWrapper(DataWrapper): 5 | def __init__(self, dataset): 6 | super(HeterogeneousEmbeddingDataWrapper, self).__init__() 7 | 8 | self.dataset = dataset 9 | 10 | def train_wrapper(self): 11 | return self.dataset.data 12 | 13 | def test_wrapper(self): 14 | return self.dataset.data 15 | -------------------------------------------------------------------------------- /cogdl/wrappers/data_wrapper/heterogeneous/heterogeneous_gnn_dw.py: -------------------------------------------------------------------------------- 1 | from .. import DataWrapper 2 | 3 | 4 | class HeterogeneousGNNDataWrapper(DataWrapper): 5 | def __init__(self, dataset): 6 | super(HeterogeneousGNNDataWrapper, self).__init__(dataset=dataset) 7 | 8 | self.dataset = dataset 9 | 10 | def train_wrapper(self): 11 | return self.dataset 12 | 13 | def val_wrapper(self): 14 | return self.dataset 15 | 16 | def test_wrapper(self): 17 | return self.dataset 18 | -------------------------------------------------------------------------------- /cogdl/wrappers/data_wrapper/heterogeneous/multiplex_embedding_dw.py: -------------------------------------------------------------------------------- 1 | from .. import DataWrapper 2 | 3 | 4 | class MultiplexEmbeddingDataWrapper(DataWrapper): 5 | def __init__(self, dataset): 6 | super(MultiplexEmbeddingDataWrapper, self).__init__() 7 | 8 | self.dataset = dataset 9 | 10 | def train_wrapper(self): 11 | return self.dataset.data.train_data 12 | 13 | def test_wrapper(self): 14 | return self.dataset.data.test_data 15 | -------------------------------------------------------------------------------- /cogdl/wrappers/data_wrapper/link_prediction/__init__.py: -------------------------------------------------------------------------------- 1 | from .embedding_link_prediction_dw import EmbeddingLinkPredictionDataWrapper 2 | from .gnn_kg_link_prediction_dw import GNNKGLinkPredictionDataWrapper 3 | from .gnn_link_prediction_dw import GNNLinkPredictionDataWrapper 4 | from .triple_link_prediction_dw import TripleDataWrapper 5 | -------------------------------------------------------------------------------- /cogdl/wrappers/data_wrapper/link_prediction/gnn_kg_link_prediction_dw.py: -------------------------------------------------------------------------------- 1 | from .. import DataWrapper 2 | 3 | 4 | class GNNKGLinkPredictionDataWrapper(DataWrapper): 5 | def __init__(self, dataset): 6 | super(GNNKGLinkPredictionDataWrapper, self).__init__(dataset) 7 | self.dataset = dataset 8 | self.edge_set = None 9 | 10 | def train_wrapper(self): 11 | return self.dataset.data 12 | 13 | def val_wrapper(self): 14 | return self.dataset.data 15 | 16 | def test_wrapper(self): 17 | return self.dataset.data 18 | -------------------------------------------------------------------------------- /cogdl/wrappers/data_wrapper/node_classification/__init__.py: -------------------------------------------------------------------------------- 1 | from .cluster_dw import ClusterWrapper 2 | from .graphsage_dw import GraphSAGEDataWrapper 3 | from .unsup_graphsage_dw import UnsupGraphSAGEDataWrapper 4 | from .m3s_dw import M3SDataWrapper 5 | from .network_embedding_dw import NetworkEmbeddingDataWrapper 6 | from .node_classification_dw import FullBatchNodeClfDataWrapper 7 | from .pprgo_dw import PPRGoDataWrapper 8 | from .sagn_dw import SAGNDataWrapper 9 | -------------------------------------------------------------------------------- /cogdl/wrappers/data_wrapper/node_classification/cluster_dw.py: -------------------------------------------------------------------------------- 1 | from .. import DataWrapper 2 | from cogdl.data.sampler import ClusteredLoader, ClusteredDataset 3 | 4 | 5 | class ClusterWrapper(DataWrapper): 6 | @staticmethod 7 | def add_args(parser): 8 | # fmt: off 9 | parser.add_argument("--batch-size", type=int, default=20) 10 | parser.add_argument("--n-cluster", type=int, default=100) 11 | parser.add_argument("--method", type=str, default="metis") 12 | # fmt: on 13 | 14 | def __init__(self, dataset, method="metis", batch_size=20, n_cluster=100): 15 | super(ClusterWrapper, self).__init__(dataset) 16 | self.dataset = dataset 17 | self.cluster_dataset = ClusteredDataset(dataset, n_cluster=n_cluster, batch_size=batch_size) 18 | self.batch_size = batch_size 19 | self.n_cluster = n_cluster 20 | self.method = method 21 | 22 | def train_wrapper(self): 23 | self.dataset.data.train() 24 | return ClusteredLoader( 25 | self.cluster_dataset, 26 | method=self.method, 27 | batch_size=self.batch_size, 28 | shuffle=True, 29 | n_cluster=self.n_cluster, 30 | # persistent_workers=True, 31 | num_workers=0, 32 | ) 33 | 34 | def get_train_dataset(self): 35 | return self.cluster_dataset 36 | 37 | def val_wrapper(self): 38 | self.dataset.data.eval() 39 | return self.dataset.data 40 | 41 | def test_wrapper(self): 42 | self.dataset.data.eval() 43 | return self.dataset.data 44 | -------------------------------------------------------------------------------- /cogdl/wrappers/data_wrapper/node_classification/network_embedding_dw.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from .. import DataWrapper 4 | 5 | 6 | class NetworkEmbeddingDataWrapper(DataWrapper): 7 | def __init__(self, dataset): 8 | super(NetworkEmbeddingDataWrapper, self).__init__() 9 | 10 | self.dataset = dataset 11 | data = dataset[0] 12 | 13 | num_nodes = data.num_nodes 14 | num_classes = dataset.num_classes 15 | if len(data.y.shape) > 1: 16 | self.label_matrix = data.y 17 | else: 18 | self.label_matrix = np.zeros((num_nodes, num_classes), dtype=int) 19 | self.label_matrix[range(num_nodes), data.y.numpy()] = 1 20 | 21 | def train_wrapper(self): 22 | return self.dataset.data 23 | 24 | def test_wrapper(self): 25 | return self.label_matrix 26 | -------------------------------------------------------------------------------- /cogdl/wrappers/data_wrapper/node_classification/node_classification_dw.py: -------------------------------------------------------------------------------- 1 | from .. import DataWrapper 2 | from cogdl.data import Graph 3 | 4 | 5 | class FullBatchNodeClfDataWrapper(DataWrapper): 6 | def __init__(self, dataset): 7 | super(FullBatchNodeClfDataWrapper, self).__init__(dataset) 8 | self.dataset = dataset 9 | 10 | def train_wrapper(self) -> Graph: 11 | return self.dataset.data 12 | 13 | def val_wrapper(self): 14 | return self.dataset.data 15 | 16 | def test_wrapper(self): 17 | return self.dataset.data 18 | 19 | def pre_transform(self): 20 | self.dataset.data.add_remaining_self_loops() 21 | -------------------------------------------------------------------------------- /cogdl/wrappers/data_wrapper/pretraining/__init__.py: -------------------------------------------------------------------------------- 1 | from .gcc_dw import GCCDataWrapper 2 | -------------------------------------------------------------------------------- /cogdl/wrappers/data_wrapper/traffic_prediction/__init__.py: -------------------------------------------------------------------------------- 1 | from .stgcn_dw import STGCNDataWrapper 2 | from .stgat_dw import STGATDataWrapper 3 | -------------------------------------------------------------------------------- /cogdl/wrappers/model_wrapper/clustering/__init__.py: -------------------------------------------------------------------------------- 1 | from .agc_mw import AGCModelWrapper 2 | from .daegc_mw import DAEGCModelWrapper 3 | from .gae_mw import GAEModelWrapper 4 | -------------------------------------------------------------------------------- /cogdl/wrappers/model_wrapper/clustering/agc_mw.py: -------------------------------------------------------------------------------- 1 | from cogdl.wrappers.tools.wrapper_utils import evaluate_clustering 2 | from .. import EmbeddingModelWrapper 3 | 4 | 5 | class AGCModelWrapper(EmbeddingModelWrapper): 6 | @staticmethod 7 | def add_args(parser): 8 | # fmt: off 9 | parser.add_argument("--num-clusters", type=int, default=7) 10 | parser.add_argument("--cluster-method", type=str, default="kmeans", help="option: kmeans or spectral") 11 | parser.add_argument("--evaluation", type=str, default="full", help="option: full or NMI") 12 | # fmt: on 13 | 14 | def __init__(self, model, optimizer_cfg, num_clusters, cluster_method="kmeans", evaluation="full", max_iter=5): 15 | super(AGCModelWrapper, self).__init__() 16 | self.model = model 17 | self.optimizer_cfg = optimizer_cfg 18 | self.num_clusters = num_clusters 19 | self.cluster_method = cluster_method 20 | self.full = evaluation == "full" 21 | 22 | def train_step(self, graph): 23 | emb = self.model.forward(graph) 24 | return emb 25 | 26 | def test_step(self, batch): 27 | features_matrix, graph = batch 28 | return evaluate_clustering( 29 | features_matrix, graph.y, self.cluster_method, self.num_clusters, graph.num_nodes, self.full 30 | ) 31 | -------------------------------------------------------------------------------- /cogdl/wrappers/model_wrapper/clustering/gae_mw.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | 4 | from .. import ModelWrapper 5 | from cogdl.wrappers.tools.wrapper_utils import evaluate_clustering 6 | 7 | 8 | class GAEModelWrapper(ModelWrapper): 9 | @staticmethod 10 | def add_args(parser): 11 | # fmt: off 12 | parser.add_argument("--num-clusters", type=int, default=7) 13 | parser.add_argument("--cluster-method", type=str, default="kmeans", help="option: kmeans or spectral") 14 | parser.add_argument("--evaluation", type=str, default="full", help="option: full or NMI") 15 | # fmt: on 16 | 17 | def __init__(self, model, optimizer_cfg, num_clusters, cluster_method="kmeans", evaluation="full"): 18 | super(GAEModelWrapper, self).__init__() 19 | self.model = model 20 | self.num_clusters = num_clusters 21 | self.optimizer_cfg = optimizer_cfg 22 | self.cluster_method = cluster_method 23 | self.full = evaluation == "full" 24 | 25 | def train_step(self, subgraph): 26 | graph = subgraph 27 | loss = self.model.make_loss(graph, graph.adj_mx) 28 | return loss 29 | 30 | def test_step(self, subgraph): 31 | graph = subgraph 32 | features_matrix = self.model(graph) 33 | features_matrix = features_matrix.detach().cpu().numpy() 34 | return evaluate_clustering( 35 | features_matrix, graph.y, self.cluster_method, self.num_clusters, graph.num_nodes, self.full 36 | ) 37 | 38 | def pre_stage(self, stage, data_w): 39 | if stage == 0: 40 | data = data_w.get_dataset().data 41 | adj_mx = torch.sparse_coo_tensor( 42 | torch.stack(data.edge_index), 43 | torch.ones(data.edge_index[0].shape[0]), 44 | torch.Size([data.x.shape[0], data.x.shape[0]]), 45 | ).to_dense() 46 | data.adj_mx = adj_mx 47 | 48 | def setup_optimizer(self): 49 | lr, wd = self.optimizer_cfg["lr"], self.optimizer_cfg["weight_decay"] 50 | return torch.optim.Adam(self.parameters(), lr=lr, weight_decay=wd) 51 | -------------------------------------------------------------------------------- /cogdl/wrappers/model_wrapper/graph_classification/__init__.py: -------------------------------------------------------------------------------- 1 | from .graph_classification_mw import GraphClassificationModelWrapper 2 | from .graph_embedding_mw import GraphEmbeddingModelWrapper 3 | from .infograph_mw import InfoGraphModelWrapper 4 | -------------------------------------------------------------------------------- /cogdl/wrappers/model_wrapper/graph_classification/graph_classification_mw.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from .. import ModelWrapper 4 | 5 | 6 | class GraphClassificationModelWrapper(ModelWrapper): 7 | def __init__(self, model, optimizer_cfg): 8 | super(GraphClassificationModelWrapper, self).__init__() 9 | self.model = model 10 | self.optimizer_cfg = optimizer_cfg 11 | 12 | def train_step(self, batch): 13 | pred = self.model(batch) 14 | y = batch.y 15 | loss = self.default_loss_fn(pred, y) 16 | return loss 17 | 18 | def val_step(self, batch): 19 | pred = self.model(batch) 20 | y = batch.y 21 | val_loss = self.default_loss_fn(pred, y) 22 | 23 | metric = self.evaluate(pred, y, metric="auto") 24 | 25 | self.note("val_loss", val_loss) 26 | self.note("val_metric", metric) 27 | 28 | def test_step(self, batch): 29 | pred = self.model(batch) 30 | y = batch.y 31 | test_loss = self.default_loss_fn(pred, y) 32 | 33 | metric = self.evaluate(pred, y, metric="auto") 34 | 35 | self.note("test_loss", test_loss) 36 | self.note("test_metric", metric) 37 | 38 | def setup_optimizer(self): 39 | cfg = self.optimizer_cfg 40 | return torch.optim.Adam(self.model.parameters(), lr=cfg["lr"], weight_decay=cfg["weight_decay"]) 41 | -------------------------------------------------------------------------------- /cogdl/wrappers/model_wrapper/graph_classification/graph_embedding_mw.py: -------------------------------------------------------------------------------- 1 | from torch.utils.data import DataLoader 2 | 3 | from cogdl.data import MultiGraphDataset 4 | from .. import EmbeddingModelWrapper 5 | from cogdl.wrappers.tools.wrapper_utils import evaluate_graph_embeddings_using_svm 6 | 7 | 8 | class GraphEmbeddingModelWrapper(EmbeddingModelWrapper): 9 | def __init__(self, model): 10 | super(GraphEmbeddingModelWrapper, self).__init__() 11 | self.model = model 12 | 13 | def train_step(self, batch): 14 | if isinstance(batch, DataLoader) or isinstance(batch, MultiGraphDataset): 15 | graphs = [x for x in batch] 16 | else: 17 | graphs = batch 18 | emb = self.model(graphs) 19 | return emb 20 | 21 | def test_step(self, batch): 22 | x, y = batch 23 | return evaluate_graph_embeddings_using_svm(x, y) 24 | -------------------------------------------------------------------------------- /cogdl/wrappers/model_wrapper/heterogeneous/__init__.py: -------------------------------------------------------------------------------- 1 | from .heterogeneous_embedding_mw import HeterogeneousEmbeddingModelWrapper 2 | from .heterogeneous_gnn_mw import HeterogeneousGNNModelWrapper 3 | from .multiplex_embedding_mw import MultiplexEmbeddingModelWrapper 4 | -------------------------------------------------------------------------------- /cogdl/wrappers/model_wrapper/heterogeneous/heterogeneous_embedding_mw.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import numpy as np 3 | import torch 4 | 5 | from sklearn.linear_model import LogisticRegression 6 | from sklearn.metrics import f1_score 7 | 8 | from .. import EmbeddingModelWrapper 9 | 10 | 11 | class HeterogeneousEmbeddingModelWrapper(EmbeddingModelWrapper): 12 | @staticmethod 13 | def add_args(parser: argparse.ArgumentParser): 14 | """Add task-specific arguments to the parser.""" 15 | # fmt: off 16 | parser.add_argument("--hidden-size", type=int, default=128) 17 | # fmt: on 18 | 19 | def __init__(self, model, hidden_size=200): 20 | super(HeterogeneousEmbeddingModelWrapper, self).__init__() 21 | self.model = model 22 | self.hidden_size = hidden_size 23 | 24 | def train_step(self, batch): 25 | embeddings = self.model(batch) 26 | embeddings = np.hstack((embeddings, batch.x.numpy())) 27 | 28 | return embeddings 29 | 30 | def test_step(self, batch): 31 | embeddings, data = batch 32 | 33 | # Select nodes which have label as training data 34 | train_index = torch.cat((data.train_node, data.valid_node)).numpy() 35 | test_index = data.test_node.numpy() 36 | y = data.y.numpy() 37 | 38 | X_train, y_train = embeddings[train_index], y[train_index] 39 | X_test, y_test = embeddings[test_index], y[test_index] 40 | clf = LogisticRegression() 41 | clf.fit(X_train, y_train) 42 | preds = clf.predict(X_test) 43 | test_f1 = f1_score(y_test, preds, average="micro") 44 | 45 | return dict(f1=test_f1) 46 | -------------------------------------------------------------------------------- /cogdl/wrappers/model_wrapper/heterogeneous/heterogeneous_gnn_mw.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from cogdl.wrappers.model_wrapper import ModelWrapper 3 | 4 | 5 | class HeterogeneousGNNModelWrapper(ModelWrapper): 6 | def __init__(self, model, optimizer_cfg): 7 | super(HeterogeneousGNNModelWrapper, self).__init__() 8 | self.optimizer_cfg = optimizer_cfg 9 | self.model = model 10 | 11 | def train_step(self, batch): 12 | graph = batch.data 13 | pred = self.model(graph) 14 | train_mask = graph.train_node 15 | loss = self.default_loss_fn(pred[train_mask], graph.y[train_mask]) 16 | return loss 17 | 18 | def val_step(self, batch): 19 | graph = batch.data 20 | pred = self.model(graph) 21 | val_mask = graph.valid_node 22 | loss = self.default_loss_fn(pred[val_mask], graph.y[val_mask]) 23 | metric = self.evaluate(pred[val_mask], graph.y[val_mask], metric="auto") 24 | self.note("val_loss", loss.item()) 25 | self.note("val_metric", metric) 26 | 27 | def test_step(self, batch): 28 | graph = batch.data 29 | pred = self.model(graph) 30 | test_mask = graph.test_node 31 | loss = self.default_loss_fn(pred[test_mask], graph.y[test_mask]) 32 | metric = self.evaluate(pred[test_mask], graph.y[test_mask], metric="auto") 33 | self.note("test_loss", loss.item()) 34 | self.note("test_metric", metric) 35 | 36 | def setup_optimizer(self): 37 | cfg = self.optimizer_cfg 38 | if hasattr(self.model, "get_optimizer"): 39 | model_spec_optim = self.model.get_optimizer(cfg) 40 | if model_spec_optim is not None: 41 | return model_spec_optim 42 | return torch.optim.Adam(self.model.parameters(), lr=cfg["lr"], weight_decay=cfg["weight_decay"]) 43 | -------------------------------------------------------------------------------- /cogdl/wrappers/model_wrapper/link_prediction/__init__.py: -------------------------------------------------------------------------------- 1 | from .embedding_link_prediction_mw import EmbeddingLinkPredictionModelWrapper 2 | from .gnn_kg_link_prediction_mw import GNNKGLinkPredictionModelWrapper 3 | from .gnn_link_prediction_mw import GNNLinkPredictionModelWrapper 4 | from .triple_link_prediction_mw import TripleModelWrapper 5 | -------------------------------------------------------------------------------- /cogdl/wrappers/model_wrapper/link_prediction/embedding_link_prediction_mw.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from .. import EmbeddingModelWrapper 4 | from sklearn.metrics import roc_auc_score, f1_score, auc, precision_recall_curve 5 | 6 | 7 | class EmbeddingLinkPredictionModelWrapper(EmbeddingModelWrapper): 8 | def __init__(self, model): 9 | super(EmbeddingLinkPredictionModelWrapper, self).__init__() 10 | self.model = model 11 | 12 | def train_step(self, graph): 13 | embeddings = self.model(graph) 14 | return embeddings 15 | 16 | def test_step(self, batch): 17 | embeddings, test_data = batch 18 | roc_auc, f1_score, pr_auc = evaluate(embeddings, test_data[0], test_data[1]) 19 | print(f"Test ROC-AUC = {roc_auc:.4f}, F1 = {f1_score:.4f}, PR-AUC = {pr_auc:.4f}") 20 | return dict(ROC_AUC=roc_auc, PR_AUC=pr_auc, F1=f1_score) 21 | 22 | 23 | def evaluate(embs, true_edges, false_edges): 24 | true_list = list() 25 | prediction_list = list() 26 | for edge in true_edges: 27 | true_list.append(1) 28 | prediction_list.append(get_score(embs, edge[0], edge[1])) 29 | 30 | for edge in false_edges: 31 | true_list.append(0) 32 | prediction_list.append(get_score(embs, edge[0], edge[1])) 33 | 34 | sorted_pred = prediction_list[:] 35 | sorted_pred.sort() 36 | threshold = sorted_pred[-len(true_edges)] 37 | 38 | y_pred = np.zeros(len(prediction_list), dtype=np.int32) 39 | for i in range(len(prediction_list)): 40 | if prediction_list[i] >= threshold: 41 | y_pred[i] = 1 42 | 43 | y_true = np.array(true_list) 44 | y_scores = np.array(prediction_list) 45 | ps, rs, _ = precision_recall_curve(y_true, y_scores) 46 | return roc_auc_score(y_true, y_scores), f1_score(y_true, y_pred), auc(rs, ps) 47 | 48 | 49 | def get_score(embs, node1, node2, eps=1e-5): 50 | vector1 = embs[int(node1)] 51 | vector2 = embs[int(node2)] 52 | return np.dot(vector1, vector2) / (np.linalg.norm(vector1) * np.linalg.norm(vector2) + eps) 53 | -------------------------------------------------------------------------------- /cogdl/wrappers/model_wrapper/node_classification/__init__.py: -------------------------------------------------------------------------------- 1 | from cogdl.models.nn.correct_smooth import CorrectSmooth 2 | from .dgi_mw import DGIModelWrapper 3 | from .gcnmix_mw import GCNMixModelWrapper 4 | from .grace_mw import GRACEModelWrapper 5 | from .grand_mw import GrandModelWrapper 6 | from .graphsage_mw import GraphSAGEModelWrapper 7 | from .unsup_graphsage_mw import UnsupGraphSAGEModelWrapper 8 | from .m3s_mw import M3SModelWrapper 9 | from .mvgrl_mw import MVGRLModelWrapper 10 | from .network_embedding_mw import NetworkEmbeddingModelWrapper 11 | from .node_classification_mw import NodeClfModelWrapper 12 | from .pprgo_mw import PPRGoModelWrapper 13 | from .sagn_mw import SAGNModelWrapper 14 | from .self_auxiliary_mw import SelfAuxiliaryModelWrapper 15 | from .correct_smooth_mw import CorrectSmoothModelWrapper 16 | -------------------------------------------------------------------------------- /cogdl/wrappers/model_wrapper/node_classification/correct_smooth_mw.py: -------------------------------------------------------------------------------- 1 | import copy 2 | 3 | import torch 4 | import numpy as np 5 | 6 | from cogdl.wrappers.data_wrapper import DataWrapper 7 | 8 | from .node_classification_mw import NodeClfModelWrapper 9 | 10 | 11 | class CorrectSmoothModelWrapper(NodeClfModelWrapper): 12 | @staticmethod 13 | def add_args(parser): 14 | pass 15 | 16 | def __init__(self, model, optimizer_cfg): 17 | super(CorrectSmoothModelWrapper, self).__init__(model, optimizer_cfg) 18 | self.model = model 19 | self.optimizer_cfg = optimizer_cfg 20 | 21 | def val_step(self, subgraph): 22 | graph = subgraph 23 | pred = self.model(graph) 24 | pred = self.model.postprocess(graph, pred) 25 | y = graph.y 26 | val_mask = graph.val_mask 27 | loss = self.default_loss_fn(pred[val_mask], y[val_mask]) 28 | 29 | metric = self.evaluate(pred[val_mask], graph.y[val_mask], metric="auto") 30 | 31 | self.note("val_loss", loss.item()) 32 | self.note("val_metric", metric) 33 | 34 | def test_step(self, batch): 35 | graph = batch 36 | pred = self.model(graph) 37 | pred = self.model.postprocess(graph, pred) 38 | test_mask = batch.test_mask 39 | loss = self.default_loss_fn(pred[test_mask], batch.y[test_mask]) 40 | 41 | metric = self.evaluate(pred[test_mask], batch.y[test_mask], metric="auto") 42 | 43 | self.note("test_loss", loss.item()) 44 | self.note("test_metric", metric) 45 | -------------------------------------------------------------------------------- /cogdl/wrappers/model_wrapper/node_classification/graphsage_mw.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from .. import ModelWrapper 4 | 5 | 6 | class GraphSAGEModelWrapper(ModelWrapper): 7 | def __init__(self, model, optimizer_cfg): 8 | super(GraphSAGEModelWrapper, self).__init__() 9 | self.model = model 10 | self.optimizer_cfg = optimizer_cfg 11 | 12 | def train_step(self, batch): 13 | x_src, y, adjs = batch 14 | pred = self.model(x_src, adjs) 15 | loss = self.default_loss_fn(pred, y) 16 | return loss 17 | 18 | def val_step(self, batch): 19 | x_src, y, adjs = batch 20 | pred = self.model(x_src, adjs) 21 | loss = self.default_loss_fn(pred, y) 22 | 23 | metric = self.evaluate(pred, y, metric="auto") 24 | 25 | self.note("val_loss", loss.item()) 26 | self.note("val_metric", metric) 27 | 28 | def test_step(self, batch): 29 | dataset, test_loader = batch 30 | graph = dataset.data 31 | if hasattr(self.model, "inference"): 32 | pred = self.model.inference(graph.x, test_loader) 33 | else: 34 | pred = self.model(graph) 35 | pred = pred[graph.test_mask] 36 | y = graph.y[graph.test_mask] 37 | 38 | metric = self.evaluate(pred, y, metric="auto") 39 | self.note("test_loss", self.default_loss_fn(pred, y)) 40 | self.note("test_metric", metric) 41 | 42 | def setup_optimizer(self): 43 | cfg = self.optimizer_cfg 44 | return torch.optim.Adam(self.parameters(), lr=cfg["lr"], weight_decay=cfg["weight_decay"]) 45 | -------------------------------------------------------------------------------- /cogdl/wrappers/model_wrapper/node_classification/mvgrl_mw.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from .. import UnsupervisedModelWrapper 5 | from cogdl.wrappers.tools.wrapper_utils import evaluate_node_embeddings_using_logreg 6 | 7 | 8 | class MVGRLModelWrapper(UnsupervisedModelWrapper): 9 | def __init__(self, model, optimizer_cfg): 10 | super(MVGRLModelWrapper, self).__init__() 11 | self.model = model 12 | self.optimizer_cfg = optimizer_cfg 13 | self.loss_f = nn.BCEWithLogitsLoss() 14 | 15 | def train_step(self, subgraph): 16 | graph = subgraph 17 | logits = self.model(graph) 18 | labels = torch.zeros_like(logits) 19 | num_outs = logits.shape[1] 20 | labels[:, : num_outs // 2] = 1 21 | loss = self.loss_f(logits, labels) 22 | return loss 23 | 24 | def test_step(self, graph): 25 | with torch.no_grad(): 26 | pred = self.model(graph) 27 | y = graph.y 28 | result = evaluate_node_embeddings_using_logreg(pred, y, graph.train_mask, graph.test_mask) 29 | self.note("test_acc", result) 30 | 31 | def setup_optimizer(self): 32 | cfg = self.optimizer_cfg 33 | return torch.optim.Adam(self.parameters(), lr=cfg["lr"], weight_decay=cfg["weight_decay"]) 34 | -------------------------------------------------------------------------------- /cogdl/wrappers/model_wrapper/node_classification/node_classification_mw.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from cogdl.wrappers.model_wrapper import ModelWrapper 3 | 4 | 5 | class NodeClfModelWrapper(ModelWrapper): 6 | def __init__(self, model, optimizer_cfg): 7 | super(NodeClfModelWrapper, self).__init__() 8 | self.optimizer_cfg = optimizer_cfg 9 | self.model = model 10 | 11 | def train_step(self, subgraph): 12 | graph = subgraph 13 | pred = self.model(graph) 14 | train_mask = graph.train_mask 15 | loss = self.default_loss_fn(pred[train_mask], graph.y[train_mask]) 16 | return loss 17 | 18 | def val_step(self, subgraph): 19 | graph = subgraph 20 | pred = self.model(graph) 21 | y = graph.y 22 | val_mask = graph.val_mask 23 | loss = self.default_loss_fn(pred[val_mask], y[val_mask]) 24 | 25 | metric = self.evaluate(pred[val_mask], graph.y[val_mask], metric="auto") 26 | 27 | self.note("val_loss", loss.item()) 28 | self.note("val_metric", metric) 29 | 30 | def test_step(self, batch): 31 | graph = batch 32 | pred = self.model(graph) 33 | test_mask = batch.test_mask 34 | loss = self.default_loss_fn(pred[test_mask], batch.y[test_mask]) 35 | 36 | metric = self.evaluate(pred[test_mask], batch.y[test_mask], metric="auto") 37 | self.note("test_loss", loss.item()) 38 | self.note("test_metric", metric) 39 | 40 | def setup_optimizer(self): 41 | cfg = self.optimizer_cfg 42 | if hasattr(self.model, "setup_optimizer"): 43 | model_spec_optim = self.model.setup_optimizer(cfg) 44 | if model_spec_optim is not None: 45 | return model_spec_optim 46 | return torch.optim.Adam(self.model.parameters(), lr=cfg["lr"], weight_decay=cfg["weight_decay"]) 47 | 48 | def set_early_stopping(self): 49 | return "val_metric", ">" 50 | -------------------------------------------------------------------------------- /cogdl/wrappers/model_wrapper/node_classification/pprgo_mw.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from cogdl.wrappers.model_wrapper import ModelWrapper 3 | 4 | 5 | class PPRGoModelWrapper(ModelWrapper): 6 | def __init__(self, model, optimizer_cfg): 7 | super(PPRGoModelWrapper, self).__init__() 8 | self.optimizer_cfg = optimizer_cfg 9 | self.model = model 10 | 11 | def train_step(self, batch): 12 | x, targets, ppr_scores, y = batch 13 | pred = self.model(x, targets, ppr_scores) 14 | loss = self.default_loss_fn(pred, y) 15 | return loss 16 | 17 | def val_step(self, batch): 18 | graph = batch 19 | if isinstance(batch, list): 20 | x, targets, ppr_scores, y = batch 21 | pred = self.model(x, targets, ppr_scores) 22 | else: 23 | pred = self.model.predict(graph) 24 | 25 | y = graph.y[graph.val_mask] 26 | pred = pred[graph.val_mask] 27 | 28 | loss = self.default_loss_fn(pred, y) 29 | 30 | metric = self.evaluate(pred, y, metric="auto") 31 | 32 | self.note("val_loss", loss.item()) 33 | self.note("val_metric", metric) 34 | 35 | def test_step(self, batch): 36 | graph = batch 37 | 38 | if isinstance(batch, list): 39 | x, targets, ppr_scores, y = batch 40 | pred = self.model(x, targets, ppr_scores) 41 | else: 42 | pred = self.model.predict(graph) 43 | test_mask = batch.test_mask 44 | 45 | pred = pred[test_mask] 46 | y = graph.y[test_mask] 47 | 48 | loss = self.default_loss_fn(pred, y) 49 | 50 | self.note("test_loss", loss.item()) 51 | self.note("test_metric", self.evaluate(pred, y)) 52 | 53 | def setup_optimizer(self): 54 | cfg = self.optimizer_cfg 55 | return torch.optim.Adam(self.model.parameters(), lr=cfg["lr"], weight_decay=cfg["weight_decay"]) 56 | -------------------------------------------------------------------------------- /cogdl/wrappers/model_wrapper/node_classification/sagn_mw.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from .. import ModelWrapper 4 | 5 | 6 | class SAGNModelWrapper(ModelWrapper): 7 | def __init__(self, model, optimizer_cfg): 8 | super(SAGNModelWrapper, self).__init__() 9 | self.model = model 10 | self.optimizer_cfg = optimizer_cfg 11 | 12 | def train_step(self, batch): 13 | batch_x, batch_y_emb, y = batch 14 | pred = self.model(batch_x, batch_y_emb) 15 | loss = self.default_loss_fn(pred, y) 16 | return loss 17 | 18 | def val_step(self, batch): 19 | batch_x, batch_y_emb, y = batch 20 | # print(batch_x.device, batch_y_emb.devce, y.device, next(self.parameters()).device) 21 | pred = self.model(batch_x, batch_y_emb) 22 | 23 | metric = self.evaluate(pred, y, metric="auto") 24 | 25 | self.note("val_loss", self.default_loss_fn(pred, y)) 26 | self.note("val_metric", metric) 27 | 28 | def test_step(self, batch): 29 | batch_x, batch_y_emb, y = batch 30 | pred = self.model(batch_x, batch_y_emb) 31 | 32 | metric = self.evaluate(pred, y, metric="auto") 33 | 34 | self.note("test_loss", self.default_loss_fn(pred, y)) 35 | self.note("test_metric", metric) 36 | 37 | def pre_stage(self, stage, data_w): 38 | device = next(self.model.parameters()).device 39 | if stage == 0: 40 | return None 41 | 42 | self.model.eval() 43 | preds = [] 44 | 45 | eval_loader = data_w.post_stage_wrapper() 46 | with torch.no_grad(): 47 | for batch in eval_loader: 48 | batch_x, batch_y_emb, _ = data_w.pre_stage_transform(batch) 49 | batch_x = batch_x.to(device) 50 | batch_y_emb = batch_y_emb.to(device) if batch_y_emb is not None else batch_y_emb 51 | pred = self.model(batch_x, batch_y_emb) 52 | preds.append(pred.to("cpu")) 53 | probs = torch.cat(preds, dim=0) 54 | return probs 55 | 56 | def setup_optimizer(self): 57 | cfg = self.optimizer_cfg 58 | return torch.optim.Adam(self.model.parameters(), lr=cfg["lr"], weight_decay=cfg["weight_decay"]) 59 | -------------------------------------------------------------------------------- /cogdl/wrappers/model_wrapper/pretraining/__init__.py: -------------------------------------------------------------------------------- 1 | from .gcc_mw import GCCModelWrapper 2 | -------------------------------------------------------------------------------- /cogdl/wrappers/model_wrapper/traffic_prediction/__init__.py: -------------------------------------------------------------------------------- 1 | from .stgcn_mw import STGCNModelWrapper 2 | from .stgat_mw import STGATModelWrapper -------------------------------------------------------------------------------- /cogdl/wrappers/tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/CogDL/281f47424d58844b167ccbe41d9829c1f77689f8/cogdl/wrappers/tools/__init__.py -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SPHINXPROJ = Cogdl 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | 13 | %SPHINXBUILD% >NUL 2>NUL 14 | if errorlevel 9009 ( 15 | echo. 16 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 17 | echo.installed, then set the SPHINXBUILD environment variable to point 18 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 19 | echo.may add the Sphinx directory to PATH. 20 | echo. 21 | echo.If you don't have Sphinx installed, grab it from 22 | echo.https://www.sphinx-doc.org/ 23 | exit /b 1 24 | ) 25 | 26 | if "%1" == "" goto help 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx==4.2.0 2 | sphinx_rtd_theme==1.0.0 3 | markdown==3.3.4 4 | sphinx-markdown-tables==0.0.15 5 | recommonmark==0.7.1 6 | importlib-metadata<5.0 7 | networkx 8 | matplotlib 9 | tqdm 10 | numpy>=1.21 11 | scipy 12 | gensim>=4.0 13 | grave 14 | scikit_learn==0.24.2 15 | tabulate 16 | optuna==2.4.0 17 | ogb 18 | black 19 | pytest 20 | coveralls 21 | https://download.pytorch.org/whl/cpu/torch-1.7.1%2Bcpu-cp37-cp37m-linux_x86_64.whl 22 | https://pytorch-geometric.com/whl/torch-1.7.0+cpu/torch_scatter-2.0.7-cp37-cp37m-linux_x86_64.whl 23 | https://pytorch-geometric.com/whl/torch-1.7.0+cpu/torch_sparse-0.6.9-cp37-cp37m-linux_x86_64.whl 24 | numba 25 | transformers 26 | sentencepiece 27 | sphinx-gallery==0.10.0 28 | geopy==2.2.0 -------------------------------------------------------------------------------- /docs/source/_static/cogdl-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/CogDL/281f47424d58844b167ccbe41d9829c1f77689f8/docs/source/_static/cogdl-logo.png -------------------------------------------------------------------------------- /docs/source/_static/cogdl-training.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/CogDL/281f47424d58844b167ccbe41d9829c1f77689f8/docs/source/_static/cogdl-training.png -------------------------------------------------------------------------------- /docs/source/_static/coo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/CogDL/281f47424d58844b167ccbe41d9829c1f77689f8/docs/source/_static/coo.png -------------------------------------------------------------------------------- /docs/source/_static/csr.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/CogDL/281f47424d58844b167ccbe41d9829c1f77689f8/docs/source/_static/csr.png -------------------------------------------------------------------------------- /docs/source/_static/graph.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/CogDL/281f47424d58844b167ccbe41d9829c1f77689f8/docs/source/_static/graph.jpg -------------------------------------------------------------------------------- /docs/source/api/data.rst: -------------------------------------------------------------------------------- 1 | data 2 | ==== 3 | 4 | .. automodule:: cogdl.data 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/api/datasets.rst: -------------------------------------------------------------------------------- 1 | datasets 2 | ======== 3 | 4 | GATNE dataset 5 | ------------------------------- 6 | 7 | .. automodule:: cogdl.datasets.gatne 8 | :members: 9 | :undoc-members: 10 | :show-inheritance: 11 | 12 | GCC dataset 13 | ------------------------------- 14 | 15 | .. automodule:: cogdl.datasets.gcc_data 16 | :members: 17 | :undoc-members: 18 | :show-inheritance: 19 | 20 | GTN dataset 21 | ------------------------------- 22 | 23 | .. automodule:: cogdl.datasets.gtn_data 24 | :members: 25 | :undoc-members: 26 | :show-inheritance: 27 | 28 | HAN dataset 29 | ------------------------------- 30 | 31 | .. automodule:: cogdl.datasets.han_data 32 | :members: 33 | :undoc-members: 34 | :show-inheritance: 35 | 36 | KG dataset 37 | ------------------------------- 38 | 39 | .. automodule:: cogdl.datasets.kg_data 40 | :members: 41 | :undoc-members: 42 | :show-inheritance: 43 | 44 | Matlab matrix dataset 45 | ------------------------------ 46 | 47 | .. automodule:: cogdl.datasets.matlab_matrix 48 | :members: 49 | :undoc-members: 50 | :show-inheritance: 51 | 52 | OGB dataset 53 | ------------------------------- 54 | 55 | .. automodule:: cogdl.datasets.ogb 56 | :members: 57 | :undoc-members: 58 | :show-inheritance: 59 | 60 | TU dataset 61 | ------------------------------- 62 | 63 | .. automodule:: cogdl.datasets.tu_data 64 | :members: 65 | :undoc-members: 66 | :show-inheritance: 67 | 68 | 69 | Module contents 70 | ------------------------------- 71 | 72 | .. automodule:: cogdl.datasets 73 | :members: 74 | :undoc-members: 75 | :show-inheritance: 76 | -------------------------------------------------------------------------------- /docs/source/api/experiments.rst: -------------------------------------------------------------------------------- 1 | experiments 2 | =========== 3 | 4 | .. automodule:: cogdl.experiments 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/api/layers.rst: -------------------------------------------------------------------------------- 1 | layers 2 | ====== 3 | 4 | .. automodule:: cogdl.layers.gcn_layer 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | .. automodule:: cogdl.layers.gat_layer 10 | :members: 11 | :undoc-members: 12 | :show-inheritance: 13 | 14 | .. automodule:: cogdl.layers.sage_layer 15 | :members: 16 | :undoc-members: 17 | :show-inheritance: 18 | 19 | .. automodule:: cogdl.layers.gin_layer 20 | :members: 21 | :undoc-members: 22 | :show-inheritance: 23 | 24 | .. automodule:: cogdl.layers.gcnii_layer 25 | :members: 26 | :undoc-members: 27 | :show-inheritance: 28 | 29 | .. automodule:: cogdl.layers.deepergcn_layer 30 | :members: 31 | :undoc-members: 32 | :show-inheritance: 33 | 34 | .. automodule:: cogdl.layers.disengcn_layer 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | 39 | .. automodule:: cogdl.layers.han_layer 40 | :members: 41 | :undoc-members: 42 | :show-inheritance: 43 | 44 | .. automodule:: cogdl.layers.mlp_layer 45 | :members: 46 | :undoc-members: 47 | :show-inheritance: 48 | 49 | .. automodule:: cogdl.layers.pprgo_layer 50 | :members: 51 | :undoc-members: 52 | :show-inheritance: 53 | 54 | .. automodule:: cogdl.layers.rgcn_layer 55 | :members: 56 | :undoc-members: 57 | :show-inheritance: 58 | 59 | .. automodule:: cogdl.layers.saint_layer 60 | :members: 61 | :undoc-members: 62 | :show-inheritance: 63 | 64 | .. automodule:: cogdl.layers.sgc_layer 65 | :members: 66 | :undoc-members: 67 | :show-inheritance: 68 | 69 | .. automodule:: cogdl.layers.mixhop_layer 70 | :members: 71 | :undoc-members: 72 | :show-inheritance: 73 | 74 | .. automodule:: cogdl.layers.se_layer 75 | :members: 76 | :undoc-members: 77 | :show-inheritance: 78 | 79 | -------------------------------------------------------------------------------- /docs/source/api/options.rst: -------------------------------------------------------------------------------- 1 | options 2 | ======= 3 | 4 | .. automodule:: cogdl.options 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/api/pipelines.rst: -------------------------------------------------------------------------------- 1 | pipelines 2 | ========= 3 | 4 | .. automodule:: cogdl.pipelines 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/api/utils.rst: -------------------------------------------------------------------------------- 1 | utils 2 | ===== 3 | 4 | .. automodule:: cogdl.utils.utils 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | .. automodule:: cogdl.utils.evaluator 10 | :members: 11 | :undoc-members: 12 | :show-inheritance: 13 | 14 | .. automodule:: cogdl.utils.sampling 15 | :members: 16 | :undoc-members: 17 | :show-inheritance: 18 | 19 | .. automodule:: cogdl.utils.graph_utils 20 | :members: 21 | :undoc-members: 22 | :show-inheritance: 23 | 24 | .. automodule:: cogdl.utils.link_prediction_utils 25 | :members: 26 | :undoc-members: 27 | :show-inheritance: 28 | 29 | .. automodule:: cogdl.utils.ppr_utils 30 | :members: 31 | :undoc-members: 32 | :show-inheritance: 33 | 34 | .. automodule:: cogdl.utils.prone_utils 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | 39 | .. automodule:: cogdl.utils.srgcn_utils 40 | :members: 41 | :undoc-members: 42 | :show-inheritance: 43 | -------------------------------------------------------------------------------- /docs/source/examples/README.rst: -------------------------------------------------------------------------------- 1 | Code Gallery 2 | ================== 3 | 4 | Below is a code of examples -------------------------------------------------------------------------------- /docs/source/install.rst: -------------------------------------------------------------------------------- 1 | Install 2 | ======= 3 | 4 | - Python version >= 3.7 5 | - PyTorch version >= 1.7.1 6 | 7 | Please follow the instructions here to install PyTorch (https://github.com/pytorch/pytorch#installation). 8 | 9 | When PyTorch has been installed, cogdl can be installed using pip as follows: 10 | 11 | .. code-block:: bash 12 | 13 | pip install cogdl 14 | 15 | 16 | Install from source via: 17 | 18 | .. code-block:: bash 19 | 20 | pip install git+https://github.com/thudm/cogdl.git 21 | 22 | 23 | Or clone the repository and install with the following commands: 24 | 25 | .. code-block:: bash 26 | 27 | git clone git@github.com:THUDM/cogdl.git 28 | cd cogdl 29 | pip install -e . 30 | 31 | 32 | If you want to use the modules from PyTorch Geometric (PyG), 33 | you can follow the instructions to install PyTorch Geometric (https://github.com/rusty1s/pytorch_geometric/#installation). 34 | -------------------------------------------------------------------------------- /docs/source/tutorial_cn/examples/README.rst: -------------------------------------------------------------------------------- 1 | 示例代码 2 | ================== 3 | 4 | Below is a code of examples -------------------------------------------------------------------------------- /docs/source/tutorial_cn/index.rst: -------------------------------------------------------------------------------- 1 | .. CogDL documentation master file 2 | 3 | 中文教程 4 | ======================== 5 | .. toctree:: 6 | :maxdepth: 2 7 | :caption: 快速开始 8 | 9 | install_cn 10 | quickstart_cn 11 | 12 | .. toctree:: 13 | :maxdepth: 2 14 | :caption: 教程 15 | 16 | graph_cn 17 | model_cn 18 | training_cn 19 | custom_dataset_cn 20 | custom_gnn_cn 21 | results_cn 22 | auto_examples/index 23 | 24 | 25 | -------------------------------------------------------------------------------- /docs/source/tutorial_cn/install_cn.rst: -------------------------------------------------------------------------------- 1 | 安装 2 | ======= 3 | 4 | - Python version >= 3.7 5 | - PyTorch version >= 1.7.1 6 | 7 | 请按照此处的说明安装 `PyTorch `_ 8 | 9 | 安装 PyTorch 后,可以使用 pip命令安装 cogdl,如下所示: 10 | 11 | .. code-block:: bash 12 | 13 | pip install cogdl 14 | 15 | 或者Install from source via: 16 | 17 | .. code-block:: bash 18 | 19 | pip install git+https://github.com/thudm/cogdl.git 20 | 21 | 或者clone仓库并使用以下命令进行安装: 22 | 23 | .. code-block:: bash 24 | 25 | git clone git@github.com:THUDM/cogdl.git 26 | cd cogdl 27 | pip install -e . 28 | 29 | 如果您想使用 PyTorch Geometric (PyG) 中的模块,您可以按照此处的说明安装 `PyTorch Geometric `_ 31 | 32 | 33 | -------------------------------------------------------------------------------- /examples/GRB/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/CogDL/281f47424d58844b167ccbe41d9829c1f77689f8/examples/GRB/__init__.py -------------------------------------------------------------------------------- /examples/GRB/attack/__init__.py: -------------------------------------------------------------------------------- 1 | """Attack Module for implementation of graph adversarial attacks""" 2 | from .base import Attack, InjectionAttack, ModificationAttack 3 | -------------------------------------------------------------------------------- /examples/GRB/attack/injection/__init__.py: -------------------------------------------------------------------------------- 1 | """Graph injection attacks""" 2 | from .fgsm import FGSM 3 | from .pgd import PGD 4 | from .rand import RAND 5 | from .speit import SPEIT 6 | from .tdgia import TDGIA 7 | -------------------------------------------------------------------------------- /examples/GRB/attack/modification/__init__.py: -------------------------------------------------------------------------------- 1 | from .dice import DICE 2 | from .fga import FGA 3 | from .flip import FLIP 4 | from .nea import NEA 5 | from .rand import RAND 6 | from .stack import STACK 7 | from .pgd import PGD 8 | from .prbcd import PRBCD 9 | -------------------------------------------------------------------------------- /examples/GRB/defense/__init__.py: -------------------------------------------------------------------------------- 1 | from .gcnsvd import GCNSVD 2 | from .robustgcn import RobustGCN 3 | from .gnnguard import GCNGuard, GATGuard 4 | 5 | __all__ = ["GCNSVD", "RobustGCN", "GCNGuard", "GATGuard"] 6 | -------------------------------------------------------------------------------- /examples/VRGCN/README.md: -------------------------------------------------------------------------------- 1 | # CogDL examples for ogbn-arxiv 2 | 3 | CogDL implementation of VRGCN for [ogbn-arxiv](https://ogb.stanford.edu/docs/nodeprop/#ogbn-arxiv): 4 | 5 | > Jianfei Chen, Jun Zhu, Le Song. Stochastic Training of Graph Convolutional Networks with Variance Reduction. [Paper in arXiv](https://arxiv.org/abs/1710.10568). In ICML'2018. 6 | 7 | Requires CogDL 0.5-alpha0 or later versions. 8 | 9 | 10 | ## Training & Evaluation 11 | 12 | ``` 13 | # Run with model with default config 14 | python main.py 15 | ``` 16 | For more hyper-parameters, please find them in the `main.py`. 17 | 18 | ## Results 19 | 20 | Here are the results over 10 runs which are comparable with OGB official results reported in the leaderboard. 21 | 22 | | Method | Test Accuracy | Validation Accuracy | #Parameters | 23 | |:-------------------------------:|:---------------:|:-------------------:|:-----------:| 24 | | VRGCN | 0.7224 ± 0.0042 | 0.7260 ± 0.0030 | 44,328 | 25 | -------------------------------------------------------------------------------- /examples/bgrl/README.md: -------------------------------------------------------------------------------- 1 | # Large-Scale Representation Learning on Graphs via Bootstrapping (BGRL) with CogDL 2 | This is an attempt to implement BGRL with CogDL for graph representation. The authors' implementation can be found [here](https://github.com/nerdslab/bgrl). Another version of the implementation from [Namkyeong](https://github.com/Namkyeong/BGRL_Pytorch) can also be used as a reference. 3 | 4 | ## Hyperparameters 5 | Some optional parameters are allowed to be added to the training process. 6 | 7 | `layers`: the dimension for each layer of GNN. 8 | 9 | `pred_hid`: the hidden dimension of the predict moudle. 10 | 11 | `aug_params`: the ratio of pollution for graph augmentation. 12 | 13 | ## Usage 14 | You can find their datasets [here](https://pan.baidu.com/s/15RyvXD2G-xwGM9jrT7IDLQ?pwd=85vv) and put them in the path `./data`. Experiments on their datasets with given hyperparameters can be achieved by the following commands. 15 | 16 | ### Wiki-CS 17 | ``` 18 | python train.py --name WikiCS --aug_params 0.2 0.1 0.2 0.3 --layers 512 256 --pred_hid 512 --lr 0.0001 -epochs 10000 -cs 250 19 | ``` 20 | ### Amazon Computers 21 | ``` 22 | python train.py --name computers --aug_params 0.2 0.1 0.5 0.4 --layers 256 128 --pred_hid 512 --lr 0.0005 --epochs 10000 -cs 250 23 | ``` 24 | ### Amazon Photo 25 | ``` 26 | python train.py --name photo --aug_params 0.1 0.2 0.4 0.1 --layers 512 256 --pred_hid 512 --lr 0.0001 --epochs 10000 -cs 250 27 | ``` 28 | ### Coauthor CS 29 | ``` 30 | python train.py --name cs --aug_params 0.3 0.4 0.3 0.2 --layers 512 256 --pred_hid 512 --lr 0.00001 --epochs 10000 -cs 250 31 | ``` 32 | ### Coauthor Physics 33 | ``` 34 | python train.py --name physics --aug_params 0.1 0.4 0.4 0.1 --layers 256 128 --pred_hid 512 --lr 0.00001 --epochs 10000 -cs 250 35 | ``` 36 | 37 | ## Performance 38 | The results on five datasets shown on the table. 39 | 40 | | |Wiki-CS|Computers|Photo |CS |Physics| 41 | |------ |------ |---------|---------|-----|-------| 42 | |Paper |79.98 |90.34 |93.17 |93.31|95.73 | 43 | |Namkyeong |79.50 |88.21 |92.76 |92.49|94.89 | 44 | |CogDL |79.76 |88.06 |92.91 |93.05|95.46 | 45 | * Hyperparameters are from original paper 46 | 47 | -------------------------------------------------------------------------------- /examples/dgraph/dataset/ReadMe.txt: -------------------------------------------------------------------------------- 1 | Put Dgraph dataset in this file -------------------------------------------------------------------------------- /examples/dgraph/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .gcn import GCN 2 | from .gat import GAT 3 | from .mlp import MLP 4 | from .grand import Grand 5 | from .graphsage import Graphsage 6 | from .graphsage import SAGE 7 | from .graphsaint import GraphSAINT 8 | from .mixhop import MixHop 9 | from .sgc import sgc 10 | from .dropedge_gcn import DropEdge_GCN 11 | from .gin import GIN 12 | from .dgi import DGI 13 | from .sign import SIGN 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /examples/dgraph/models/gcn.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from cogdl.layers import GCNLayer 3 | import torch.nn.functional as F 4 | 5 | from cogdl.models import BaseModel 6 | 7 | 8 | class GCN(BaseModel): 9 | r"""The GCN model from the `"Semi-Supervised Classification with Graph Convolutional Networks" 10 | `_ paper 11 | 12 | Args: 13 | in_features (int) : Number of input features. 14 | out_features (int) : Number of classes. 15 | hidden_size (int) : The dimension of node representation. 16 | dropout (float) : Dropout rate for model training. 17 | """ 18 | 19 | def __init__( 20 | self, 21 | in_feats, 22 | hidden_size, 23 | out_feats, 24 | num_layers, 25 | dropout=0.0, 26 | activation="relu", 27 | residual=False, 28 | norm=None, 29 | ): 30 | super(GCN, self).__init__() 31 | shapes = [in_feats] + [hidden_size] * (num_layers - 1) + [out_feats] 32 | self.layers = nn.ModuleList( 33 | [ 34 | GCNLayer( 35 | shapes[i], 36 | shapes[i + 1], 37 | dropout=dropout if i != num_layers - 1 else 0, 38 | residual=residual if i != num_layers - 1 else None, 39 | norm=norm if i != num_layers - 1 else None, 40 | activation=activation if i != num_layers - 1 else None, 41 | ) 42 | for i in range(num_layers) 43 | ] 44 | ) 45 | self.num_layers = num_layers 46 | 47 | def reset_parameters(self): 48 | for layer in self.layers: 49 | layer.reset_parameters() 50 | 51 | def embed(self, graph): 52 | graph.sym_norm() 53 | h = graph.x 54 | for i in range(self.num_layers - 1): 55 | h = self.layers[i](graph, h) 56 | return h 57 | 58 | def forward(self, graph): 59 | graph.sym_norm() 60 | h = graph.x 61 | for i in range(self.num_layers): 62 | h = self.layers[i](graph, h) 63 | return F.log_softmax(h, dim=-1) 64 | -------------------------------------------------------------------------------- /examples/dgraph/models/mixhop.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | 4 | from cogdl.layers import MixHopLayer 5 | from cogdl.models import BaseModel 6 | 7 | 8 | class MixHop(BaseModel): 9 | def __init__(self, num_features, num_classes, dropout, layer1_pows, layer2_pows): 10 | super(MixHop, self).__init__() 11 | 12 | self.dropout = dropout 13 | 14 | self.num_features = num_features 15 | self.num_classes = num_classes 16 | self.dropout = dropout 17 | layer_pows = [layer1_pows, layer2_pows] 18 | 19 | shapes = [num_features] + [sum(layer1_pows), sum(layer2_pows)] 20 | 21 | self.mixhops = nn.ModuleList( 22 | [MixHopLayer(shapes[layer], [0, 1, 2], layer_pows[layer]) for layer in range(len(layer_pows))] 23 | ) 24 | self.fc = nn.Linear(shapes[-1], num_classes) 25 | 26 | def forward(self, graph): 27 | x = graph.x 28 | for mixhop in self.mixhops: 29 | x = F.relu(mixhop(graph, x)) 30 | x = F.dropout(x, p=self.dropout, training=self.training) 31 | x = self.fc(x) 32 | return x 33 | 34 | def predict(self, data): 35 | return self.forward(data) 36 | -------------------------------------------------------------------------------- /examples/dgraph/models/mlp.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from cogdl.layers import MLP as MLPLayer 3 | from cogdl.data import Graph 4 | import torch.nn.functional as F 5 | 6 | from cogdl.models import BaseModel 7 | 8 | 9 | class MLP(BaseModel): 10 | def __init__( 11 | self, 12 | in_feats, 13 | out_feats, 14 | hidden_size, 15 | num_layers, 16 | dropout=0.0, 17 | activation="relu", 18 | norm=None, 19 | act_first=False, 20 | bias=True, 21 | ): 22 | super(MLP, self).__init__() 23 | self.nn = MLPLayer(in_feats, out_feats, hidden_size, num_layers, dropout, activation, norm, act_first, bias) 24 | 25 | def reset_parameters(self): 26 | self.nn.reset_parameters() 27 | 28 | def forward(self, x): 29 | if isinstance(x, Graph): 30 | x = x.x 31 | #return self.nn(x) 32 | return F.log_softmax(self.nn(x), dim=-1) 33 | 34 | def predict(self, data): 35 | return self.forward(data.x) 36 | -------------------------------------------------------------------------------- /examples/dgraph/models/sgc.py: -------------------------------------------------------------------------------- 1 | from cogdl.layers import SGCLayer 2 | import torch.nn.functional as F 3 | 4 | from cogdl.models import BaseModel 5 | 6 | 7 | class sgc(BaseModel): 8 | def __init__(self, in_feats, hidden_size, out_feats): 9 | super(sgc, self).__init__() 10 | self.nn1 = SGCLayer(in_feats, hidden_size) 11 | self.nn2 = SGCLayer(hidden_size, out_feats) 12 | 13 | self.cache = dict() 14 | 15 | def reset_parameters(self): 16 | self.nn1.W.reset_parameters() 17 | self.nn2.W.reset_parameters() 18 | 19 | def forward(self, graph): 20 | graph.sym_norm() 21 | 22 | x = self.nn1(graph, graph.x) 23 | x = self.nn2(graph, x) 24 | 25 | return F.log_softmax(x, dim=-1) 26 | 27 | def predict(self, data): 28 | return self.forward(data) 29 | 30 | -------------------------------------------------------------------------------- /examples/dgraph/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .dgraphfin import Dgraph_Dataloader 2 | 3 | -------------------------------------------------------------------------------- /examples/dgraph/utils/dgraphfin.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from cogdl.data import Graph 3 | import numpy as np 4 | 5 | def mask_change(id_mask, node_size): 6 | mask = torch.zeros(node_size).bool() 7 | for i in id_mask: 8 | mask[i] = True 9 | return mask 10 | 11 | 12 | def Dgraph_Dataloader(datapath): 13 | # Load data 14 | print('read_dgraphfin') 15 | folder = datapath 16 | 17 | items = [np.load(folder)] 18 | 19 | # Create cogdl graph 20 | x = items[0]['x'] 21 | y = items[0]['y'].reshape(-1, 1) 22 | 23 | edge_index = items[0]['edge_index'] 24 | 25 | # set train/val/test mask in node_classification task 26 | train_id = items[0]['train_mask'] 27 | valid_id = items[0]['valid_mask'] 28 | test_id = items[0]['test_mask'] 29 | 30 | x = torch.tensor(x, dtype=torch.float).contiguous() 31 | # Feature normalization 32 | # x = (x - x.mean(0)) / x.std(0) 33 | 34 | y = torch.tensor(y, dtype=torch.int64) 35 | y = y.squeeze(1) 36 | 37 | edge_index = torch.tensor(edge_index.transpose(), dtype=torch.int64).contiguous() 38 | 39 | # edge_type = torch.tensor(edge_type, dtype=torch.float) 40 | 41 | node_size = x.size()[0] 42 | 43 | train_m = torch.tensor(train_id, dtype=torch.int64) 44 | train_mask = mask_change(train_m, node_size) 45 | 46 | valid_m = torch.tensor(valid_id, dtype=torch.int64) 47 | valid_mask = mask_change(valid_m, node_size) 48 | 49 | test_m = torch.tensor(test_id, dtype=torch.int64) 50 | test_mask = mask_change(test_m, node_size) 51 | 52 | return x,edge_index,y,train_mask,valid_mask,test_mask 53 | 54 | 55 | -------------------------------------------------------------------------------- /examples/dgraph/utils/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import os 3 | from datetime import datetime 4 | import shutil 5 | 6 | 7 | def prepare_folder(name, model_name): 8 | model_dir = f'./model_results/{name}/{model_name}' 9 | 10 | if os.path.exists(model_dir): 11 | shutil.rmtree(model_dir) 12 | os.makedirs(model_dir) 13 | return model_dir 14 | 15 | def prepare_tune_folder(name, model_name): 16 | str_time = datetime.strftime(datetime.now(), '%Y%m%d_%H%M%S') 17 | tune_model_dir = f'./tune_results/{name}/{model_name}/{str_time}/' 18 | 19 | if os.path.exists(tune_model_dir): 20 | print(f'rm tune_model_dir {tune_model_dir}') 21 | shutil.rmtree(tune_model_dir) 22 | os.makedirs(tune_model_dir) 23 | print(f'make tune_model_dir {tune_model_dir}') 24 | return tune_model_dir 25 | 26 | def save_preds_and_params(parameters, preds, model, file): 27 | save_dict = {'parameters':parameters, 'preds': preds, 'params': model.state_dict() 28 | , 'nparams': sum(p.numel() for p in model.parameters())} 29 | torch.save(save_dict, file) 30 | return 31 | 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /examples/gcc/run_gcc.py: -------------------------------------------------------------------------------- 1 | """ 2 | pretrain, freeze and finetune 3 | """ 4 | 5 | from cogdl import experiment 6 | 7 | # pretrain 8 | experiment(dataset="gcc_academic gcc_dblp_netrep gcc_dblp_snap gcc_facebook gcc_imdb gcc_livejournal", 9 | model="gcc", 10 | lr=0.005, 11 | weight_decay=1e-05, 12 | clip_grad_norm=1., 13 | beta1=0.9, 14 | beta2=0.999, 15 | n_warmup_steps=0.1, 16 | devices=[0], 17 | epochs=100, 18 | pretrain=True, # must 19 | no_test=True, # must 20 | unsup=True, # must 21 | do_valid=False, # must 22 | do_test=False # must 23 | ) 24 | 25 | # freeze 26 | # experiment(dataset="usa-airport", 27 | # model="gcc", 28 | # epochs=0, # must 29 | # freeze=True, # must 30 | # load_model_path="./saved/already_trained_model/gcc_pretrain.pt", # must 31 | # # load_model_path="./saved/Pretrain_academic_dblp-netrep_dblp-snap_facebook_imdb_livejournal_gin_layer_5_lr_0.005_decay_1e-05_bsz_32_hid_64_samples_2000_nce_t_0.07_nce_k_16384_rw_hops_256_restart_prob_0.8_aug_rwr_ft_False_deg_16_pos_32_momentum_0.999/gcc_pretrain.pt" 32 | # ) 33 | 34 | # finetune 35 | # experiment(dataset="usa-airport", 36 | # model="gcc", 37 | # epochs=30, # must 38 | # finetune=True, # must 39 | # # load_model_path="./saved/Pretrain_academic_dblp-netrep_dblp-snap_facebook_imdb_livejournal_gin_layer_5_lr_0.005_decay_1e-05_bsz_32_hid_64_samples_2000_nce_t_0.07_nce_k_16384_rw_hops_256_restart_prob_0.8_aug_rwr_ft_False_deg_16_pos_32_momentum_0.999/gcc_pretrain.pt" 40 | # load_model_path="./saved/already_trained_model/gcc_pretrain.pt" #must 41 | # ) 42 | -------------------------------------------------------------------------------- /examples/graphmae/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Zhenyu Hou 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /examples/graphmae/graphmae/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/CogDL/281f47424d58844b167ccbe41d9829c1f77689f8/examples/graphmae/graphmae/__init__.py -------------------------------------------------------------------------------- /examples/graphmae/graphmae/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/CogDL/281f47424d58844b167ccbe41d9829c1f77689f8/examples/graphmae/graphmae/datasets/__init__.py -------------------------------------------------------------------------------- /examples/graphmae/graphmae/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .edcoder import PreModel 2 | 3 | 4 | def build_model(args): 5 | num_heads = args.num_heads 6 | num_out_heads = args.num_out_heads 7 | num_hidden = args.num_hidden 8 | num_layers = args.num_layers 9 | residual = args.residual 10 | attn_drop = args.attn_drop 11 | in_drop = args.in_drop 12 | norm = args.norm 13 | negative_slope = args.negative_slope 14 | encoder_type = args.encoder 15 | decoder_type = args.decoder 16 | mask_rate = args.mask_rate 17 | drop_edge_rate = args.drop_edge_rate 18 | replace_rate = args.replace_rate 19 | 20 | 21 | activation = args.activation 22 | loss_fn = args.loss_fn 23 | alpha_l = args.alpha_l 24 | concat_hidden = args.concat_hidden 25 | num_features = args.num_features 26 | 27 | 28 | model = PreModel( 29 | in_dim=num_features, 30 | num_hidden=num_hidden, 31 | num_layers=num_layers, 32 | nhead=num_heads, 33 | nhead_out=num_out_heads, 34 | activation=activation, 35 | feat_drop=in_drop, 36 | attn_drop=attn_drop, 37 | negative_slope=negative_slope, 38 | residual=residual, 39 | encoder_type=encoder_type, 40 | decoder_type=decoder_type, 41 | mask_rate=mask_rate, 42 | norm=norm, 43 | loss_fn=loss_fn, 44 | drop_edge_rate=drop_edge_rate, 45 | replace_rate=replace_rate, 46 | alpha_l=alpha_l, 47 | concat_hidden=concat_hidden, 48 | ) 49 | return model 50 | -------------------------------------------------------------------------------- /examples/graphmae/graphmae/models/loss_func.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | 4 | 5 | def sce_loss(x, y, alpha=3): 6 | x = F.normalize(x, p=2, dim=-1) 7 | y = F.normalize(y, p=2, dim=-1) 8 | 9 | # loss = - (x * y).sum(dim=-1) 10 | # loss = (x_h - y_h).norm(dim=1).pow(alpha) 11 | 12 | loss = (1 - (x * y).sum(dim=-1)).pow_(alpha) 13 | 14 | loss = loss.mean() 15 | return loss 16 | 17 | 18 | def sig_loss(x, y): 19 | x = F.normalize(x, p=2, dim=-1) 20 | y = F.normalize(y, p=2, dim=-1) 21 | 22 | loss = (x * y).sum(1) 23 | loss = torch.sigmoid(-loss) 24 | loss = loss.mean() 25 | return loss -------------------------------------------------------------------------------- /examples/graphmae/imgs/ablation.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/CogDL/281f47424d58844b167ccbe41d9829c1f77689f8/examples/graphmae/imgs/ablation.jpg -------------------------------------------------------------------------------- /examples/graphmae/imgs/compare.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/CogDL/281f47424d58844b167ccbe41d9829c1f77689f8/examples/graphmae/imgs/compare.png -------------------------------------------------------------------------------- /examples/graphmae/imgs/fig.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/CogDL/281f47424d58844b167ccbe41d9829c1f77689f8/examples/graphmae/imgs/fig.png -------------------------------------------------------------------------------- /examples/graphmae/requirements.txt: -------------------------------------------------------------------------------- 1 | pyyaml 2 | tqdm 3 | tensorboardX 4 | scikit-learn 5 | ogb -------------------------------------------------------------------------------- /examples/graphmae/scripts/run_graph.sh: -------------------------------------------------------------------------------- 1 | dataset=$1 2 | device=$2 3 | 4 | [ -z "${dataset}" ] && dataset="mutag" 5 | [ -z "${device}" ] && device=-1 6 | 7 | python main_graph.py \ 8 | --device $device \ 9 | --dataset $dataset \ 10 | --mask_rate 0.5 \ 11 | --encoder "gin" \ 12 | --decoder "gin" \ 13 | --in_drop 0.2 \ 14 | --attn_drop 0.1 \ 15 | --num_layers 2 \ 16 | --num_hidden 512 \ 17 | --num_heads 2 \ 18 | --max_epoch 100 \ 19 | --max_epoch_f 0 \ 20 | --lr 0.00015 \ 21 | --weight_decay 0.0 \ 22 | --activation prelu \ 23 | --optimizer adam \ 24 | --drop_edge_rate 0.0 \ 25 | --loss_fn "sce" \ 26 | --seeds 0 1 2 3 4 \ 27 | --linear_prob \ 28 | --use_cfg \ 29 | -------------------------------------------------------------------------------- /examples/graphmae/scripts/run_inductive.sh: -------------------------------------------------------------------------------- 1 | dataset=$1 2 | device=$2 3 | 4 | [ -z "${dataset}" ] && dataset="ppi" 5 | [ -z "${device}" ] && device=-1 6 | 7 | 8 | python main_inductive.py \ 9 | --device $device \ 10 | --dataset $dataset \ 11 | --mask_rate 0.5 \ 12 | --encoder "gat" \ 13 | --decoder "gat" \ 14 | --in_drop 0.2 \ 15 | --attn_drop 0.1 \ 16 | --num_layers 3 \ 17 | --num_hidden 2048 \ 18 | --num_heads 4 \ 19 | --max_epoch 1000 \ 20 | --max_epoch_f 500 \ 21 | --lr 0.001 \ 22 | --weight_decay 0 \ 23 | --lr_f 0.005 \ 24 | --weight_decay_f 0 \ 25 | --activation prelu \ 26 | --optimizer adam \ 27 | --drop_edge_rate 0.0 \ 28 | --loss_fn "sce" \ 29 | --seeds 0 1 2 3 4 \ 30 | --replace_rate 0.0 \ 31 | --alpha_l 3 \ 32 | --linear_prob \ 33 | --use_cfg \ 34 | -------------------------------------------------------------------------------- /examples/graphmae/scripts/run_transductive.sh: -------------------------------------------------------------------------------- 1 | dataset=$1 2 | device=$2 3 | 4 | [ -z "${dataset}" ] && dataset="cora" 5 | [ -z "${device}" ] && device=-1 6 | 7 | 8 | python main_transductive.py \ 9 | --device $device \ 10 | --dataset $dataset \ 11 | --mask_rate 0.5 \ 12 | --encoder "gat" \ 13 | --decoder "gat" \ 14 | --in_drop 0.2 \ 15 | --attn_drop 0.1 \ 16 | --num_layers 2 \ 17 | --num_hidden 512 \ 18 | --num_heads 4 \ 19 | --max_epoch 1500 \ 20 | --max_epoch_f 300 \ 21 | --lr 0.001 \ 22 | --weight_decay 0 \ 23 | --lr_f 0.01 \ 24 | --weight_decay_f 1e-4 \ 25 | --activation prelu \ 26 | --optimizer adam \ 27 | --drop_edge_rate 0.0 \ 28 | --loss_fn "sce" \ 29 | --seeds 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 \ 30 | --replace_rate 0.05 \ 31 | --alpha_l 3 \ 32 | --linear_prob \ 33 | --scheduler \ 34 | --use_cfg \ 35 | -------------------------------------------------------------------------------- /examples/graphmae2/asserts/overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/CogDL/281f47424d58844b167ccbe41d9829c1f77689f8/examples/graphmae2/asserts/overview.png -------------------------------------------------------------------------------- /examples/graphmae2/configs/citeseer.yaml: -------------------------------------------------------------------------------- 1 | lr: 0.0005 # 0.0005 2 | lr_f: 0.025 3 | num_hidden: 1024 4 | num_heads: 4 5 | num_out_heads: 1 6 | num_layers: 2 7 | weight_decay: 1e-4 8 | weight_decay_f: 1e-2 9 | max_epoch: 500 10 | max_epoch_f: 500 11 | mask_rate: 0.5 12 | num_layers: 2 13 | encoder: gat 14 | decoder: gat 15 | activation: prelu 16 | attn_drop: 0.1 17 | linear_prob: True 18 | in_drop: 0.2 19 | loss_fn: sce 20 | drop_edge_rate: 0.0 21 | optimizer: adam 22 | replace_rate: 0.0 23 | alpha_l: 1 24 | scheduler: True 25 | remask_method: fixed 26 | momentum: 1 27 | lam: 0.1 -------------------------------------------------------------------------------- /examples/graphmae2/configs/cora.yaml: -------------------------------------------------------------------------------- 1 | lr: 0.001 2 | lr_f: 0.025 3 | num_hidden: 1024 4 | num_heads: 8 5 | num_out_heads: 1 6 | num_layers: 2 7 | weight_decay: 2e-4 8 | weight_decay_f: 1e-4 9 | max_epoch: 2000 10 | max_epoch_f: 300 11 | mask_rate: 0.5 12 | num_layers: 2 13 | encoder: gat 14 | decoder: gat 15 | activation: prelu 16 | attn_drop: 0.1 17 | linear_prob: True 18 | in_drop: 0.2 19 | loss_fn: sce 20 | drop_edge_rate: 0.0 21 | optimizer: adam 22 | replace_rate: 0.1 23 | alpha_l: 4 24 | scheduler: True 25 | remask_method: fixed 26 | momentum: 0 27 | lam: 0.1 -------------------------------------------------------------------------------- /examples/graphmae2/configs/mag-scholar-f.yaml: -------------------------------------------------------------------------------- 1 | lr: 0.001 2 | lr_f: 0.001 3 | num_hidden: 1024 4 | num_heads: 8 5 | num_out_heads: 1 6 | num_layers: 4 7 | weight_decay: 0.04 8 | weight_decay_f: 0 9 | max_epoch: 10 10 | max_epoch_f: 1000 11 | batch_size: 512 12 | batch_size_f: 256 13 | mask_rate: 0.5 14 | num_layers: 4 15 | encoder: gat 16 | decoder: gat 17 | activation: prelu 18 | attn_drop: 0.2 19 | linear_prob: True 20 | in_drop: 0.2 21 | loss_fn: sce 22 | drop_edge_rate: 0.5 23 | optimizer: adamw 24 | alpha_l: 2 25 | scheduler: True 26 | remask_method: random 27 | momentum: 0.996 28 | lam: 0.1 29 | delayed_ema_epoch: 0 30 | num_remasking: 3 -------------------------------------------------------------------------------- /examples/graphmae2/configs/ogbn-arxiv.yaml: -------------------------------------------------------------------------------- 1 | lr: 0.0025 2 | lr_f: 0.005 3 | num_hidden: 1024 4 | num_heads: 8 5 | num_out_heads: 1 6 | num_layers: 4 7 | weight_decay: 0.06 8 | weight_decay_f: 1e-4 9 | max_epoch: 60 10 | max_epoch_f: 1000 11 | batch_size: 512 12 | batch_size_f: 256 13 | mask_rate: 0.5 14 | num_layers: 4 15 | encoder: gat 16 | decoder: gat 17 | activation: prelu 18 | attn_drop: 0.1 19 | linear_prob: True 20 | in_drop: 0.2 21 | loss_fn: sce 22 | drop_edge_rate: 0.5 23 | optimizer: adamw 24 | alpha_l: 6 25 | scheduler: True 26 | remask_method: random 27 | momentum: 0.996 28 | lam: 10.0 29 | delayed_ema_epoch: 40 30 | num_remasking: 3 -------------------------------------------------------------------------------- /examples/graphmae2/configs/ogbn-papers100M.yaml: -------------------------------------------------------------------------------- 1 | lr: 0.001 2 | lr_f: 0.001 3 | num_hidden: 1024 4 | num_heads: 4 5 | num_out_heads: 1 6 | num_layers: 4 7 | weight_decay: 0.05 8 | weight_decay_f: 0 9 | max_epoch: 10 10 | max_epoch_f: 1000 11 | batch_size: 512 12 | batch_size_f: 256 13 | mask_rate: 0.5 14 | num_layers: 4 15 | encoder: gat 16 | decoder: gat 17 | activation: prelu 18 | attn_drop: 0.2 19 | linear_prob: True 20 | in_drop: 0.2 21 | loss_fn: sce 22 | drop_edge_rate: 0.5 23 | optimizer: adamw 24 | alpha_l: 2 25 | scheduler: True 26 | remask_method: random 27 | momentum: 0.996 28 | lam: 10.0 29 | delayed_ema_epoch: 0 30 | num_remasking: 3 -------------------------------------------------------------------------------- /examples/graphmae2/configs/ogbn-products.yaml: -------------------------------------------------------------------------------- 1 | lr: 0.002 2 | lr_f: 0.001 3 | num_hidden: 1024 4 | num_heads: 4 5 | num_out_heads: 1 6 | num_layers: 4 7 | weight_decay: 0.04 8 | weight_decay_f: 0 9 | max_epoch: 20 10 | max_epoch_f: 1000 11 | batch_size: 512 12 | batch_size_f: 256 13 | mask_rate: 0.5 14 | num_layers: 4 15 | encoder: gat 16 | decoder: gat 17 | activation: prelu 18 | attn_drop: 0.2 19 | linear_prob: True 20 | in_drop: 0.2 21 | loss_fn: sce 22 | drop_edge_rate: 0.5 23 | optimizer: adamw 24 | alpha_l: 3 25 | scheduler: True 26 | remask_method: random 27 | momentum: 0.996 28 | lam: 5.0 29 | delayed_ema_epoch: 0 30 | num_remasking: 3 -------------------------------------------------------------------------------- /examples/graphmae2/configs/pubmed.yaml: -------------------------------------------------------------------------------- 1 | lr: 0.005 2 | lr_f: 0.025 3 | num_hidden: 512 4 | num_heads: 2 5 | num_out_heads: 1 6 | num_layers: 2 7 | weight_decay: 1e-5 8 | weight_decay_f: 5e-4 9 | max_epoch: 2000 10 | max_epoch_f: 500 11 | mask_rate: 0.9 12 | num_layers: 2 13 | encoder: gat 14 | decoder: gat 15 | activation: prelu 16 | attn_drop: 0.1 17 | linear_prob: True 18 | in_drop: 0.2 19 | loss_fn: sce 20 | drop_edge_rate: 0.0 21 | optimizer: adam 22 | replace_rate: 0.0 23 | alpha_l: 4 24 | scheduler: True 25 | remask_method: fixed 26 | momentum: 0.995 27 | lam: 1 28 | -------------------------------------------------------------------------------- /examples/graphmae2/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/CogDL/281f47424d58844b167ccbe41d9829c1f77689f8/examples/graphmae2/datasets/__init__.py -------------------------------------------------------------------------------- /examples/graphmae2/datasets/data_proc.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from typing import Counter 3 | from xml.sax.handler import feature_string_interning 4 | import numpy as np 5 | from collections import namedtuple 6 | import scipy.sparse as sp 7 | from scipy.special import iv 8 | from sklearn import preprocessing 9 | from sklearn.utils.extmath import randomized_svd 10 | 11 | import torch 12 | import torch.nn.functional as F 13 | 14 | from cogdl.utils import to_undirected 15 | from cogdl.datasets import build_dataset_from_path 16 | from sklearn.preprocessing import StandardScaler 17 | 18 | def load_small_dataset(data_dir, dataset_name): 19 | dataset = build_dataset_from_path(data_dir, dataset=dataset_name) 20 | if dataset_name == "ogbn-arxiv": 21 | graph = dataset[0] 22 | feat = graph.x 23 | feat = scale_feats(feat) 24 | graph.x = feat 25 | else: 26 | graph = dataset[0] 27 | graph.add_remaining_self_loops() 28 | 29 | num_features = graph.x.shape[1] 30 | num_classes = dataset.num_classes 31 | return graph, (num_features, num_classes) 32 | 33 | def preprocess(graph): 34 | feat = graph.x 35 | edge_index = graph.edge_index 36 | edge_index = to_undirected(edge_index, num_nodes=graph.num_nodes) 37 | graph.edge_index = edge_index 38 | graph.x = feat 39 | 40 | graph.add_remaining_self_loops() 41 | return graph 42 | 43 | 44 | def scale_feats(x): 45 | logging.info("### scaling features ###") 46 | scaler = StandardScaler() 47 | feats = x.numpy() 48 | scaler.fit(feats) 49 | feats = torch.from_numpy(scaler.transform(feats)).float() 50 | return feats 51 | -------------------------------------------------------------------------------- /examples/graphmae2/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .edcoder import PreModel 2 | 3 | 4 | def build_model(args): 5 | num_heads = args.num_heads 6 | num_out_heads = args.num_out_heads 7 | num_hidden = args.num_hidden 8 | num_layers = args.num_layers 9 | residual = args.residual 10 | attn_drop = args.attn_drop 11 | in_drop = args.in_drop 12 | norm = args.norm 13 | negative_slope = args.negative_slope 14 | encoder_type = args.encoder 15 | decoder_type = args.decoder 16 | mask_rate = args.mask_rate 17 | remask_rate = args.remask_rate 18 | mask_method = args.mask_method 19 | drop_edge_rate = args.drop_edge_rate 20 | 21 | activation = args.activation 22 | loss_fn = args.loss_fn 23 | alpha_l = args.alpha_l 24 | 25 | num_features = args.num_features 26 | num_dec_layers = args.num_dec_layers 27 | num_remasking = args.num_remasking 28 | lam = args.lam 29 | delayed_ema_epoch = args.delayed_ema_epoch 30 | replace_rate = args.replace_rate 31 | remask_method = args.remask_method 32 | momentum = args.momentum 33 | 34 | model = PreModel( 35 | in_dim=num_features, 36 | num_hidden=num_hidden, 37 | num_layers=num_layers, 38 | num_dec_layers=num_dec_layers, 39 | num_remasking=num_remasking, 40 | nhead=num_heads, 41 | nhead_out=num_out_heads, 42 | activation=activation, 43 | feat_drop=in_drop, 44 | attn_drop=attn_drop, 45 | negative_slope=negative_slope, 46 | residual=residual, 47 | encoder_type=encoder_type, 48 | decoder_type=decoder_type, 49 | mask_rate=mask_rate, 50 | remask_rate=remask_rate, 51 | mask_method=mask_method, 52 | norm=norm, 53 | loss_fn=loss_fn, 54 | drop_edge_rate=drop_edge_rate, 55 | alpha_l=alpha_l, 56 | lam=lam, 57 | delayed_ema_epoch=delayed_ema_epoch, 58 | replace_rate=replace_rate, 59 | remask_method=remask_method, 60 | momentum=momentum 61 | ) 62 | return model 63 | -------------------------------------------------------------------------------- /examples/graphmae2/run_fullbatch.sh: -------------------------------------------------------------------------------- 1 | dataset=$1 2 | device=$2 3 | 4 | [ -z "${dataset}" ] && dataset="cora" 5 | [ -z "${device}" ] && device=0 6 | 7 | CUDA_VISIBLE_DEVICES=$device \ 8 | python main_full_batch.py \ 9 | --device 0 \ 10 | --dataset $dataset \ 11 | --mask_method "random" \ 12 | --remask_method "fixed" \ 13 | --mask_rate 0.5 \ 14 | --in_drop 0.2 \ 15 | --attn_drop 0.1 \ 16 | --num_layers 2 \ 17 | --num_dec_layers 1 \ 18 | --num_hidden 256 \ 19 | --num_heads 4 \ 20 | --num_out_heads 1 \ 21 | --encoder "gat" \ 22 | --decoder "gat" \ 23 | --max_epoch 1000 \ 24 | --max_epoch_f 300 \ 25 | --lr 0.001 \ 26 | --weight_decay 0.04 \ 27 | --lr_f 0.005 \ 28 | --weight_decay_f 1e-4 \ 29 | --activation "prelu" \ 30 | --loss_fn "sce" \ 31 | --alpha_l 3 \ 32 | --scheduler \ 33 | --seeds 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 \ 34 | --lam 0.5 \ 35 | --linear_prob \ 36 | --data_dir "./dataset" \ 37 | --use_cfg 38 | -------------------------------------------------------------------------------- /examples/graphmae2/run_minibatch.sh: -------------------------------------------------------------------------------- 1 | dataset=$1 2 | device=$2 3 | 4 | [ -z "${dataset}" ] && dataset="ogbn-arxiv" 5 | [ -z "${device}" ] && device=0 6 | 7 | CUDA_VISIBLE_DEVICES=$device \ 8 | python main_large.py \ 9 | --device 0 \ 10 | --dataset $dataset \ 11 | --mask_type "mask" \ 12 | --mask_rate 0.5 \ 13 | --remask_rate 0.5 \ 14 | --num_remasking 3 \ 15 | --in_drop 0.2 \ 16 | --attn_drop 0.2 \ 17 | --num_layers 4 \ 18 | --num_dec_layers 1 \ 19 | --num_hidden 1024 \ 20 | --num_heads 4 \ 21 | --num_out_heads 1 \ 22 | --encoder "gat" \ 23 | --decoder "gat" \ 24 | --max_epoch 60 \ 25 | --max_epoch_f 1000 \ 26 | --lr 0.002 \ 27 | --weight_decay 0.04 \ 28 | --lr_f 0.005 \ 29 | --weight_decay_f 1e-4 \ 30 | --activation "prelu" \ 31 | --optimizer "adamw" \ 32 | --drop_edge_rate 0.5 \ 33 | --loss_fn "sce" \ 34 | --alpha_l 4 \ 35 | --mask_method "random" \ 36 | --scheduler \ 37 | --batch_size 512 \ 38 | --batch_size_f 256 \ 39 | --seeds 0 \ 40 | --residual \ 41 | --norm "layernorm" \ 42 | --sampling_method "lc" \ 43 | --label_rate 1.0 \ 44 | --lam 1.0 \ 45 | --momentum 0.996 \ 46 | --linear_prob \ 47 | --use_cfg \ 48 | --ego_graph_file_path "./lc_ego_graphs/${dataset}-lc-ego-graphs-256.pt" \ 49 | --data_dir "./dataset" \ 50 | # --logging 51 | -------------------------------------------------------------------------------- /examples/jittor/gcn.py: -------------------------------------------------------------------------------- 1 | import jittor as jt 2 | jt.flags.use_cuda = 1 3 | 4 | from jittor import nn, Module, init 5 | from jittor import optim 6 | from jittor.contrib import slice_var_index 7 | 8 | from tqdm import tqdm 9 | 10 | from cogdl.layers.jittor import GCNLayer 11 | from cogdl.datasets.planetoid_data import CoraDataset 12 | 13 | 14 | def tensor2jit(x): 15 | return jt.array(x.cpu().numpy()) 16 | 17 | class GCN(Module): 18 | def __init__(self, in_feats, hidden_size, out_feats, dropout=0.5): 19 | super(GCN, self).__init__() 20 | self.in_feats = in_feats 21 | self.conv1 = GCNLayer(in_feats, hidden_size, dropout=dropout, activation="relu") 22 | self.conv2 = GCNLayer(hidden_size, out_feats) 23 | 24 | def execute(self, graph): 25 | graph.sym_norm() 26 | x = tensor2jit(graph.x) 27 | out = self.conv1(graph, x) 28 | out = self.conv2(graph, out) 29 | return out 30 | 31 | 32 | def train(model, dataset): 33 | graph = dataset[0] 34 | 35 | optimizer = nn.AdamW(model.parameters(), lr=0.01) 36 | loss_function = nn.CrossEntropyLoss() 37 | 38 | train_mask = tensor2jit(graph.train_mask) 39 | test_mask = tensor2jit(graph.test_mask) 40 | val_mask = tensor2jit(graph.val_mask) 41 | labels = tensor2jit(graph.y) 42 | 43 | for epoch in range(100): 44 | model.train() 45 | output = model(graph) 46 | loss = loss_function(output[train_mask], labels[train_mask]) 47 | optimizer.step(loss) 48 | 49 | model.eval() 50 | with jt.no_grad(): 51 | output = model(graph) 52 | pred = output.argmax(1)[0] 53 | train_acc = (pred[train_mask] == labels[train_mask]).float().mean() 54 | val_acc = (pred[val_mask] == labels[val_mask]).float().mean() 55 | test_acc = (pred[test_mask] == labels[test_mask]).float().mean() 56 | 57 | print(f"Epoch:{epoch}, loss:{loss:.3f}, val_acc:{val_acc:.3f}, test_acc:{test_acc:.3f}") 58 | 59 | if __name__ == "__main__": 60 | dataset = CoraDataset() 61 | model = GCN(in_feats=dataset.num_features, hidden_size=64, out_feats=dataset.num_classes, dropout=0.5) 62 | 63 | train(model, dataset) 64 | -------------------------------------------------------------------------------- /examples/legacy/custom_dataset.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from cogdl.experiments import experiment 4 | from cogdl.data import Graph 5 | from cogdl.datasets import NodeDataset, generate_random_graph 6 | 7 | 8 | class MyNodeClassificationDataset(NodeDataset): 9 | def __init__(self, path="data.pt"): 10 | super(MyNodeClassificationDataset, self).__init__(path) 11 | 12 | def process(self): 13 | num_nodes, num_edges, feat_dim = 100, 300, 30 14 | 15 | # load or generate your dataset 16 | edge_index = torch.randint(0, num_nodes, (2, num_edges)) 17 | x = torch.randn(num_nodes, feat_dim) 18 | y = torch.randint(0, 2, (num_nodes,)) 19 | 20 | # set train/val/test mask in node_classification task 21 | train_mask = torch.zeros(num_nodes).bool() 22 | train_mask[0 : int(0.3 * num_nodes)] = True 23 | val_mask = torch.zeros(num_nodes).bool() 24 | val_mask[int(0.3 * num_nodes) : int(0.7 * num_nodes)] = True 25 | test_mask = torch.zeros(num_nodes).bool() 26 | test_mask[int(0.7 * num_nodes) :] = True 27 | data = Graph(x=x, edge_index=edge_index, y=y, train_mask=train_mask, val_mask=val_mask, test_mask=test_mask) 28 | return data 29 | 30 | 31 | if __name__ == "__main__": 32 | # Train customized dataset via defining a new class 33 | dataset = MyNodeClassificationDataset() 34 | experiment(dataset=dataset, model="gcn") 35 | 36 | # Train customized dataset via feeding the graph data to NodeDataset 37 | data = generate_random_graph(num_nodes=100, num_edges=300, num_feats=30) 38 | dataset = NodeDataset(data=data) 39 | experiment(dataset=dataset, model="gcn") 40 | -------------------------------------------------------------------------------- /examples/legacy/custom_gcn.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | 4 | from cogdl import experiment 5 | from cogdl.layers import GCNLayer 6 | from cogdl.models import BaseModel 7 | from cogdl.datasets.planetoid_data import CoraDataset 8 | 9 | 10 | class GCN(BaseModel): 11 | def __init__(self, in_feats, hidden_size, out_feats, dropout): 12 | super(GCN, self).__init__() 13 | self.conv1 = GCNLayer(in_feats, hidden_size) 14 | self.conv2 = GCNLayer(hidden_size, out_feats) 15 | self.dropout = nn.Dropout(dropout) 16 | 17 | def forward(self, graph): 18 | graph.sym_norm() 19 | h = graph.x 20 | h = F.relu(self.conv1(graph, self.dropout(h))) 21 | h = self.conv2(graph, self.dropout(h)) 22 | return h 23 | 24 | 25 | if __name__ == "__main__": 26 | cora = CoraDataset() 27 | model = GCN(in_feats=cora.num_features, hidden_size=64, out_feats=cora.num_classes, dropout=0.1) 28 | experiment(dataset="cora", model=model, dw="node_classification_dw", mw="node_classification_mw") 29 | -------------------------------------------------------------------------------- /examples/legacy/custom_triple_dataset.py: -------------------------------------------------------------------------------- 1 | from cogdl import experiment 2 | from cogdl.datasets.kg_data import KnowledgeGraphDataset 3 | import os.path as osp 4 | 5 | # ./data/custom_dataset/raw need "train2id.txt", "valid2id.txt", "test2id.txt" 6 | class Test_kgDatset(KnowledgeGraphDataset): 7 | def __init__(self, data_path="/home/cogdl/data"): 8 | dataset = "custom_dataset" 9 | path = osp.join(data_path, dataset) 10 | super((Test_kgDatset), self).__init__(path, dataset) 11 | 12 | def download(self): 13 | pass 14 | 15 | if __name__ == "__main__": 16 | dataset =Test_kgDatset() 17 | experiment(dataset=dataset, model="transe",do_valid=False,do_test=True,epochs=500,eval_step=501) 18 | -------------------------------------------------------------------------------- /examples/legacy/generate_emb.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from cogdl import pipeline 3 | 4 | # build a pipeline for generating embeddings 5 | # pass model name with its hyper-parameters to this API 6 | generator = pipeline("generate-emb", model="prone") 7 | 8 | # generate embedding by an unweighted graph 9 | edge_index = np.array([[0, 1], [0, 2], [0, 3], [1, 2], [2, 3], [3, 4], [4, 5], [5, 6], [6, 7]]) 10 | outputs = generator(edge_index) 11 | print(outputs) 12 | 13 | # generate embeddings by a weighted graph 14 | edge_weight = np.array([0.1, 0.3, 1.0, 0.8, 0.5, 0.2, 0.1, 0.5, 2.0]) 15 | outputs = generator(edge_index, edge_weight) 16 | print(outputs) 17 | 18 | # build a pipeline for generating embeddings using unsupervised GNNs 19 | # pass model name and num_features with its hyper-parameters to this API 20 | generator = pipeline("generate-emb", model="mvgrl", return_model=True, num_features=8, hidden_size=4) 21 | outputs = generator(edge_index, x=np.random.randn(8, 8)) 22 | print(outputs) 23 | -------------------------------------------------------------------------------- /examples/legacy/pipeline.py: -------------------------------------------------------------------------------- 1 | from cogdl import pipeline 2 | 3 | # print the statistics of datasets 4 | stats = pipeline("dataset-stats") 5 | stats(["cora", "citeseer"]) 6 | 7 | # visualize k-hop neighbors of seed in the dataset 8 | visual = pipeline("dataset-visual") 9 | visual("cora", seed=0, depth=3) 10 | 11 | # load OAGBert model and perform inference 12 | oagbert = pipeline("oagbert") 13 | outputs = oagbert(["CogDL is developed by KEG, Tsinghua.", "OAGBert is developed by KEG, Tsinghua."]) 14 | -------------------------------------------------------------------------------- /examples/legacy/quick_start.py: -------------------------------------------------------------------------------- 1 | from cogdl import experiment 2 | 3 | # basic usage 4 | experiment(dataset="cora", model="gcn") 5 | 6 | # set other hyper-parameters 7 | experiment(dataset="cora", model="gcn", hidden_size=32, epochs=200) 8 | 9 | # run over multiple models on different seeds 10 | experiment(dataset="cora", model=["gcn", "gat"], seed=[0, 1]) 11 | 12 | # run on different splits 13 | experiment(dataset="chameleon", model="gcn", seed=[0, 1], split=[0, 1]) 14 | 15 | 16 | def search_space(trial): 17 | return { 18 | "lr": trial.suggest_categorical("lr", [1e-3, 5e-3, 1e-2]), 19 | "hidden_size": trial.suggest_categorical("hidden_size", [32, 64, 128]), 20 | "dropout": trial.suggest_uniform("dropout", 0.5, 0.8), 21 | } 22 | 23 | 24 | experiment(dataset="cora", model="gcn", seed=[1, 2], search_space=search_space, n_trials=3) 25 | -------------------------------------------------------------------------------- /examples/legacy/recommendation.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from cogdl import pipeline 3 | 4 | data = np.array([[0, 0], [0, 1], [0, 2], [1, 1], [1, 3], [1, 4], [2, 4], [2, 5], [2, 6]]) 5 | rec = pipeline("recommendation", model="lightgcn", data=data, epochs=10, evaluate_interval=1000, cpu=True) 6 | print(rec([0])) 7 | 8 | rec = pipeline("recommendation", model="lightgcn", dataset="ali", epochs=1, n_negs=1, evaluate_interval=1000) 9 | print(rec([0])) 10 | -------------------------------------------------------------------------------- /examples/oagbert/generate_title.py: -------------------------------------------------------------------------------- 1 | from cogdl.oag import oagbert 2 | 3 | tokenizer, model = oagbert("oagbert-v2-lm") 4 | model.eval() 5 | 6 | for seq, prob in model.generate_title( 7 | abstract="To enrich language models with domain knowledge is crucial but difficult. Based on the world's largest public academic graph Open Academic Graph (OAG), we pre-train an academic language model, namely OAG-BERT, which integrates massive heterogeneous entities including paper, author, concept, venue, and affiliation. To better endow OAG-BERT with the ability to capture entity information, we develop novel pre-training strategies including heterogeneous entity type embedding, entity-aware 2D positional encoding, and span-aware entity masking. For zero-shot inference, we design a special decoding strategy to allow OAG-BERT to generate entity names from scratch. We evaluate the OAG-BERT on various downstream academic tasks, including NLP benchmarks, zero-shot entity inference, heterogeneous graph link prediction, and author name disambiguation. Results demonstrate the effectiveness of the proposed pre-training approach to both comprehending academic texts and modeling knowledge from heterogeneous entities. OAG-BERT has been deployed to multiple real-world applications, such as reviewer recommendations for NSFC (National Nature Science Foundation of China) and paper tagging in the AMiner system. It is also available to the public through the CogDL package." 8 | ): 9 | print("Title: %s" % seq) 10 | print("Perplexity: %.4f" % prob) 11 | -------------------------------------------------------------------------------- /examples/oagbert/oagbert.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from cogdl.oag import oagbert 3 | 4 | tokenizer, bert_model = oagbert() 5 | bert_model.eval() 6 | 7 | sequence = ["CogDL is developed by KEG, Tsinghua.", "OAGBert is developed by KEG, Tsinghua."] 8 | tokens = tokenizer(sequence, return_tensors="pt", padding=True) 9 | 10 | with torch.no_grad(): 11 | outputs = bert_model(**tokens) 12 | 13 | print(outputs[0]) 14 | -------------------------------------------------------------------------------- /examples/oagbert/oagbert_encode_paper.py: -------------------------------------------------------------------------------- 1 | from cogdl.oag import oagbert 2 | 3 | tokenizer, model = oagbert("oagbert-v2") 4 | title = "BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding" 5 | abstract = "We introduce a new language representation model called BERT, which stands for Bidirectional Encoder Representations from Transformers. Unlike recent language representation..." 6 | authors = ["Jacob Devlin", "Ming-Wei Chang", "Kenton Lee", "Kristina Toutanova"] 7 | venue = "north american chapter of the association for computational linguistics" 8 | affiliations = ["Google"] 9 | concepts = ["language model", "natural language inference", "question answering"] 10 | # encode paper 11 | paper_info = model.encode_paper( 12 | title=title, 13 | abstract=abstract, 14 | venue=venue, 15 | authors=authors, 16 | concepts=concepts, 17 | affiliations=affiliations, 18 | reduction="max", 19 | ) 20 | 21 | for name, content in paper_info.items(): 22 | print(name) 23 | print(content) 24 | -------------------------------------------------------------------------------- /examples/ogb/arxiv/README.md: -------------------------------------------------------------------------------- 1 | # CogDL examples for ogbn-arxiv 2 | 3 | CogDL implementation of GCN and SAGE for [ogbn-arxiv](https://ogb.stanford.edu/docs/nodeprop/#ogbn-arxiv). 4 | 5 | Requires CogDL 0.5-alpha0 or later versions. 6 | 7 | 8 | ## Training & Evaluation 9 | 10 | ``` 11 | # Run with gcn model with default config 12 | python gnn.py 13 | 14 | # Run with sage model with default config 15 | python gnn.py --model sage 16 | ``` 17 | For more hyper-parameters, please find them in the `gnn.py`. 18 | 19 | ## Results 20 | 21 | Here are the results over 10 runs which are comparable with OGB official results reported in the leaderboard. 22 | 23 | | Method | Test Accuracy | Validation Accuracy | #Parameters | 24 | |:-------------------------------:|:---------------:|:-------------------:|:-----------:| 25 | | GCN | 0.7168 ± 0.0030 | 0.7274 ± 0.0018 | 110,120 | 26 | | GraphSAGE | 0.7224 ± 0.0014 | 0.7336 ± 0.0011 | 218,664 | 27 | | GAT | 0.7231 ± 0.0013 | 0.7337 ± 0.0008 | 1,414,736 | 28 | -------------------------------------------------------------------------------- /examples/ogb/products/README.md: -------------------------------------------------------------------------------- 1 | # CogDL examples for ogbn-products 2 | 3 | CogDL implementation of ClusterGCN (SAGE aggr) for [ogbn-products](https://ogb.stanford.edu/docs/nodeprop/#ogbn-products). 4 | 5 | Requires CogDL 0.5.1 or later versions. 6 | 7 | 8 | ## Training & Evaluation 9 | 10 | ``` 11 | # Run with sage model with default config 12 | python gnn.py 13 | 14 | # Run with sage model with custom config 15 | python gnn.py --hidden-size 128 16 | ``` 17 | For more hyper-parameters, please find them in the `gnn.py`. 18 | 19 | ## Results 20 | 21 | Here are the results over 10 runs which are comparable with OGB official results reported in the leaderboard. 22 | 23 | | Method | Test Accuracy | Validation Accuracy | #Parameters | 24 | |:-------------------------------:|:---------------:|:-------------------:|:-----------:| 25 | | ClusterGCN (SAGE aggr) | 0.7906 ± 0.0032 | 0.9168 ± 0.0006 | 207,919 | 26 | -------------------------------------------------------------------------------- /examples/pyg/README.md: -------------------------------------------------------------------------------- 1 | # Running experiments with PyG modules 2 | 3 | If you are familiar with other popular graph libraries, you can implement your own model in CogDL using modules from PyTorch Geometric (PyG). 4 | 5 | ## Installation 6 | For the installation of PyG, you can follow the instructions from PyG (https://github.com/rusty1s/pytorch_geometric/#installation). 7 | 8 | ## Usage 9 | For the quick-start usage of how to use layers of PyG, you can find some examples in the this folder. 10 | For example, just run 11 | ```bash 12 | python gcn.py 13 | ``` 14 | -------------------------------------------------------------------------------- /examples/pyg/chebnet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from torch_geometric.nn.conv import ChebConv 5 | 6 | from cogdl import experiment 7 | from cogdl.models import BaseModel 8 | from cogdl.datasets.planetoid_data import CoraDataset 9 | 10 | 11 | class ChebyNet(BaseModel): 12 | def __init__(self, in_feats, hidden_size, out_feats, num_layers, dropout, filter_size): 13 | super(ChebyNet, self).__init__() 14 | 15 | self.num_features = in_feats 16 | self.num_classes = out_feats 17 | self.hidden_size = hidden_size 18 | self.num_layers = num_layers 19 | self.dropout = dropout 20 | self.filter_size = filter_size 21 | shapes = [in_feats] + [hidden_size] * (num_layers - 1) + [out_feats] 22 | self.convs = nn.ModuleList( 23 | [ChebConv(shapes[layer], shapes[layer + 1], filter_size) for layer in range(num_layers)] 24 | ) 25 | 26 | def forward(self, graph): 27 | x = graph.x 28 | edge_index = torch.stack(graph.edge_index) 29 | for conv in self.convs[:-1]: 30 | x = F.relu(conv(x, edge_index)) 31 | x = F.dropout(x, p=self.dropout, training=self.training) 32 | x = self.convs[-1](x, edge_index) 33 | return x 34 | 35 | 36 | if __name__ == "__main__": 37 | cora = CoraDataset() 38 | model = ChebyNet( 39 | in_feats=cora.num_features, 40 | hidden_size=64, 41 | out_feats=cora.num_classes, 42 | num_layers=2, 43 | dropout=0.5, 44 | filter_size=5, 45 | ) 46 | ret = experiment(dataset=cora, model=model) 47 | -------------------------------------------------------------------------------- /examples/pyg/gat.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | from torch_geometric.nn.conv import GATConv 4 | 5 | from cogdl import experiment 6 | from cogdl.models import BaseModel 7 | from cogdl.datasets.planetoid_data import CoraDataset 8 | 9 | 10 | class GAT(BaseModel): 11 | def __init__(self, in_feats, hidden_size, out_feats, num_heads, dropout): 12 | super(GAT, self).__init__() 13 | self.in_feats = in_feats 14 | self.out_feats = out_feats 15 | self.hidden_size = hidden_size 16 | self.num_heads = num_heads 17 | self.dropout = dropout 18 | self.conv1 = GATConv(in_feats, hidden_size, heads=num_heads, dropout=dropout) 19 | self.conv2 = GATConv(hidden_size * num_heads, out_feats, dropout=dropout) 20 | 21 | def forward(self, graph): 22 | x = graph.x 23 | edge_index = torch.stack(graph.edge_index) 24 | x = F.dropout(x, p=self.dropout, training=self.training) 25 | x = F.elu(self.conv1(x, edge_index)) 26 | x = F.dropout(x, p=self.dropout, training=self.training) 27 | x = F.elu(self.conv2(x, edge_index)) 28 | return x 29 | 30 | 31 | if __name__ == "__main__": 32 | cora = CoraDataset() 33 | model = GAT(in_feats=cora.num_features, hidden_size=64, out_feats=cora.num_classes, num_heads=2, dropout=0.1) 34 | ret = experiment(dataset=cora, model=model, dw="node_classification_dw", mw="node_classification_mw") 35 | -------------------------------------------------------------------------------- /examples/pyg/gcn.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from torch_geometric.nn.conv import GCNConv 5 | 6 | from cogdl import experiment 7 | from cogdl.models import BaseModel 8 | from cogdl.datasets.planetoid_data import CoraDataset 9 | 10 | 11 | class GCN(BaseModel): 12 | def __init__(self, num_features, num_classes, hidden_size, num_layers, dropout): 13 | super(GCN, self).__init__() 14 | 15 | self.num_features = num_features 16 | self.num_classes = num_classes 17 | self.hidden_size = hidden_size 18 | self.num_layers = num_layers 19 | self.dropout = dropout 20 | shapes = [num_features] + [hidden_size] * (num_layers - 1) + [num_classes] 21 | self.convs = nn.ModuleList( 22 | [GCNConv(shapes[layer], shapes[layer + 1], cached=False) for layer in range(num_layers)] 23 | ) 24 | 25 | def forward(self, graph): 26 | x = graph.x 27 | edge_index, edge_weight = torch.stack(graph.edge_index), graph.edge_weight 28 | for conv in self.convs[:-1]: 29 | x = F.relu(conv(x, edge_index, edge_weight)) 30 | x = F.dropout(x, p=self.dropout, training=self.training) 31 | x = self.convs[-1](x, edge_index, edge_weight) 32 | return F.log_softmax(x, dim=1) 33 | 34 | 35 | if __name__ == "__main__": 36 | cora = CoraDataset() 37 | model = GCN( 38 | num_features=cora.num_features, 39 | hidden_size=64, 40 | num_classes=cora.num_classes, 41 | num_layers=2, 42 | dropout=0.5, 43 | ) 44 | ret = experiment(dataset=cora, model=model) 45 | -------------------------------------------------------------------------------- /examples/pyg/unet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | from torch_geometric.nn import GraphUNet 4 | from torch_geometric.utils import dropout_adj 5 | 6 | from cogdl import experiment 7 | from cogdl.models import BaseModel 8 | from cogdl.datasets.planetoid_data import CoraDataset 9 | 10 | 11 | class UNet(BaseModel): 12 | def __init__(self, in_feats, hidden_size, out_feats, num_layers, dropout, num_nodes): 13 | super(UNet, self).__init__() 14 | 15 | self.in_feats = in_feats 16 | self.out_feats = out_feats 17 | self.hidden_size = hidden_size 18 | self.num_layers = num_layers 19 | self.dropout = dropout 20 | 21 | self.unet = GraphUNet( 22 | self.in_feats, self.hidden_size, self.out_feats, depth=3, pool_ratios=[2000 / num_nodes, 0.5], act=F.elu 23 | ) 24 | 25 | def forward(self, graph): 26 | x = graph.x 27 | edge_index = torch.stack(graph.edge_index) 28 | edge_index, _ = dropout_adj( 29 | edge_index, p=0.2, force_undirected=True, num_nodes=x.shape[0], training=self.training 30 | ) 31 | x = F.dropout(x, p=self.dropout, training=self.training) 32 | 33 | x = self.unet(x, edge_index) 34 | return x 35 | 36 | 37 | if __name__ == "__main__": 38 | cora = CoraDataset() 39 | model = UNet( 40 | in_feats=cora.num_features, 41 | hidden_size=64, 42 | out_feats=cora.num_classes, 43 | num_layers=2, 44 | dropout=0.1, 45 | num_nodes=cora.num_nodes, 46 | ) 47 | ret = experiment(dataset=cora, model=model) 48 | -------------------------------------------------------------------------------- /examples/simple_hgn/README.md: -------------------------------------------------------------------------------- 1 | # Simple-HGN 2 | 3 | Simple-HGN code for heterogeneous node classification in cogdl [leaderboard](../../cogdl/tasks/README.md). 4 | 5 | ```bash 6 | CUDA_VISIBLE_DEVICES=0 python run.py --seed 0 1 2 3 4 -dt gtn-acm -m simple_hgn --lr 0.001 7 | CUDA_VISIBLE_DEVICES=0 python run.py --seed 0 1 2 3 4 -dt gtn-dblp -m simple_hgn --lr 0.001 8 | CUDA_VISIBLE_DEVICES=0 python run.py --seed 0 1 2 3 4 -dt gtn-imdb -m simple_hgn --lr 0.001 9 | ``` 10 | -------------------------------------------------------------------------------- /examples/simple_trafficPre/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/THUDM/CogDL/281f47424d58844b167ccbe41d9829c1f77689f8/examples/simple_trafficPre/__init__.py -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.black] 2 | line-length = 120 3 | target-version = ['py37'] 4 | exclude = ''' 5 | /( 6 | \.eggs 7 | | \.git 8 | | \.hg 9 | | \.mypy_cache 10 | | \.tox 11 | | \.venv 12 | | _build 13 | | buck-out 14 | | build 15 | | dist 16 | )/ 17 | ''' -------------------------------------------------------------------------------- /scripts/download.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from cogdl import options 4 | from cogdl.datasets import build_dataset_from_name 5 | 6 | 7 | def download_datasets(args): 8 | if not isinstance(args.dataset, list): 9 | args.dataset = [args.dataset] 10 | 11 | for name in args.dataset: 12 | dataset = build_dataset_from_name(name) 13 | print(dataset[0]) 14 | 15 | 16 | if __name__ == "__main__": 17 | parser = options.get_download_data_parser() 18 | args = parser.parse_args() 19 | 20 | download_datasets(args) 21 | -------------------------------------------------------------------------------- /scripts/installation/gcc.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ "${TRAVIS_OS_NAME}" = "linux" ]; then 4 | sudo add-apt-repository ppa:ubuntu-toolchain-r/test --yes 5 | sudo apt update 6 | sudo apt install gcc-7 g++-7 --yes 7 | sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-7 60 \ 8 | --slave /usr/bin/g++ g++ /usr/bin/g++-7 9 | sudo update-alternatives --config gcc 10 | gcc --version 11 | g++ --version 12 | fi 13 | -------------------------------------------------------------------------------- /scripts/installation/metis.sh: -------------------------------------------------------------------------------- 1 | file="metis-5.1.0" 2 | 3 | wget https://cloud.tsinghua.edu.cn/f/fd30d629a08645bbbdb8/?dl=1 -O "${file}.tar.gz" 4 | gunzip "${file}.tar.gz" 5 | tar -xvf "${file}.tar" 6 | 7 | cd ${file} 8 | make config shared=1 prefix="~/.local/" 9 | make install 10 | -------------------------------------------------------------------------------- /scripts/train.py: -------------------------------------------------------------------------------- 1 | from cogdl import options, experiment 2 | 3 | 4 | if __name__ == "__main__": 5 | parser = options.get_training_parser() 6 | args, _ = parser.parse_known_args() 7 | args = options.parse_args_and_arch(parser, args) 8 | 9 | experiment(dataset=args.dataset, model=args.model, args=args) 10 | -------------------------------------------------------------------------------- /tests/datasets/test_data.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | from cogdl.datasets import build_dataset_from_name 5 | from cogdl.utils import get_degrees 6 | 7 | 8 | class Test_Data(object): 9 | def setup_class(self): 10 | self.dataset = build_dataset_from_name("cora") 11 | self.data = self.dataset[0] 12 | self.num_nodes = self.data.num_nodes 13 | self.num_edges = self.data.num_edges 14 | self.num_features = self.data.num_features 15 | print("Call Setup") 16 | 17 | def test_subgraph_sampling(self): 18 | sampled_nodes = np.random.randint(0, self.num_nodes, (100,)) 19 | sampled_nodes = np.unique(sampled_nodes) 20 | subgraph = self.data.subgraph(sampled_nodes) 21 | assert subgraph.x.shape[0] == len(set(sampled_nodes)) 22 | assert subgraph.x.shape[1] == self.data.x.shape[1] 23 | 24 | def test_edge_subgraph_sampling(self): 25 | sampled_edges = np.random.randint(0, self.num_edges, (200,)) 26 | subgraph = self.data.edge_subgraph(sampled_edges, require_idx=False) 27 | row, col = subgraph.edge_index 28 | assert row.shape[0] == col.shape[0] 29 | assert row.shape[0] == len(sampled_edges) 30 | 31 | def test_adj_sampling(self): 32 | sampled_nodes = np.arange(0, 10) 33 | edge_index = torch.stack(self.data.edge_index).t().cpu().numpy() 34 | edge_index = [tuple(x) for x in edge_index] 35 | print(np.array(edge_index).shape) 36 | for size in [5, -1]: 37 | node_idx, sampled_edge_index = self.data.sample_adj(sampled_nodes, size) 38 | node_idx = node_idx.cpu().numpy() 39 | assert (set(node_idx) & set(sampled_nodes)) == set(sampled_nodes) 40 | 41 | def test_to_csr(self): 42 | self.data._adj._to_csr() 43 | symmetric = self.data.is_symmetric() 44 | assert symmetric is True 45 | degrees = self.data.degrees() 46 | row, col = self.data.edge_index 47 | _degrees = get_degrees(row, col) 48 | assert (degrees == _degrees).all() 49 | -------------------------------------------------------------------------------- /tests/datasets/test_gcc_data.py: -------------------------------------------------------------------------------- 1 | from cogdl.datasets import build_dataset 2 | from cogdl.utils import build_args_from_dict 3 | 4 | 5 | def test_gcc_kdd_icdm(): 6 | args = build_args_from_dict({"dataset": "kdd_icdm"}) 7 | assert args.dataset == "kdd_icdm" 8 | dataset = build_dataset(args) 9 | data = dataset.data 10 | assert data[0].edge_index[0].shape[0] == 17316 11 | assert data[1].edge_index[0].shape[0] == 10846 12 | 13 | 14 | if __name__ == "__main__": 15 | test_gcc_kdd_icdm() 16 | -------------------------------------------------------------------------------- /tests/datasets/test_geom_data.py: -------------------------------------------------------------------------------- 1 | from cogdl.datasets import build_dataset 2 | from cogdl.utils import build_args_from_dict 3 | 4 | 5 | def test_chameleon(): 6 | args = build_args_from_dict({"dataset": "chameleon"}) 7 | data = build_dataset(args) 8 | assert data.num_nodes == 2277 9 | assert data.num_features == 2325 10 | assert data.num_classes == 5 11 | 12 | 13 | def test_cornell(): 14 | args = build_args_from_dict({"dataset": "cornell"}) 15 | data = build_dataset(args) 16 | assert data.num_nodes == 183 17 | assert data.num_features == 1703 18 | assert data.num_classes == 5 19 | 20 | 21 | def test_film(): 22 | args = build_args_from_dict({"dataset": "film"}) 23 | data = build_dataset(args) 24 | assert data.num_nodes == 7600 25 | assert data.num_features == 932 26 | assert data.num_classes == 5 27 | 28 | 29 | def test_squirrel(): 30 | args = build_args_from_dict({"dataset": "squirrel"}) 31 | data = build_dataset(args) 32 | assert data.num_nodes == 5201 33 | assert data.num_features == 2089 34 | assert data.num_classes == 5 35 | 36 | 37 | def test_texas(): 38 | args = build_args_from_dict({"dataset": "texas"}) 39 | data = build_dataset(args) 40 | assert data.num_nodes == 183 41 | assert data.num_features == 1703 42 | assert data.num_classes == 5 43 | 44 | 45 | def test_wisconsin(): 46 | args = build_args_from_dict({"dataset": "wisconsin"}) 47 | data = build_dataset(args) 48 | assert data.num_nodes == 251 49 | assert data.num_features == 1703 50 | assert data.num_classes == 5 51 | 52 | 53 | def test_citeseer_geom(): 54 | args = build_args_from_dict({"dataset": "citeseer_geom"}) 55 | data = build_dataset(args) 56 | assert data.data.num_nodes == 3327 57 | assert data.num_features == 3703 58 | assert data.num_classes == 6 59 | 60 | 61 | if __name__ == "__main__": 62 | test_chameleon() 63 | test_cornell() 64 | test_film() 65 | test_squirrel() 66 | test_texas() 67 | test_wisconsin() 68 | test_citeseer_geom() 69 | -------------------------------------------------------------------------------- /tests/datasets/test_kg_data.py: -------------------------------------------------------------------------------- 1 | from cogdl.datasets import build_dataset 2 | from cogdl.utils import build_args_from_dict 3 | 4 | 5 | def test_fb13(): 6 | args = build_args_from_dict({"dataset": "fb13"}) 7 | assert args.dataset == "fb13" 8 | dataset = build_dataset(args) 9 | data = dataset.data 10 | assert len(data.edge_index) == 2 11 | assert data.edge_index[0].shape[0] == 345873 12 | assert data.edge_attr.shape[0] == 345873 13 | 14 | 15 | def test_fb15k(): 16 | args = build_args_from_dict({"dataset": "fb15k"}) 17 | assert args.dataset == "fb15k" 18 | dataset = build_dataset(args) 19 | data = dataset.data 20 | assert len(data.edge_index) == 2 21 | assert data.edge_index[0].shape[0] == 592213 22 | assert data.edge_attr.shape[0] == 592213 23 | 24 | 25 | def test_fb15k237(): 26 | args = build_args_from_dict({"dataset": "fb15k237"}) 27 | assert args.dataset == "fb15k237" 28 | dataset = build_dataset(args) 29 | data = dataset.data 30 | assert len(data.edge_index) == 2 31 | assert data.edge_index[0].shape[0] == 310116 32 | assert data.edge_attr.shape[0] == 310116 33 | 34 | 35 | if __name__ == "__main__": 36 | test_fb13() 37 | test_fb15k() 38 | test_fb15k237() 39 | -------------------------------------------------------------------------------- /tests/datasets/test_matlab_data.py: -------------------------------------------------------------------------------- 1 | from cogdl.datasets import build_dataset 2 | from cogdl.utils import build_args_from_dict 3 | 4 | 5 | def test_dblp_ne(): 6 | args = build_args_from_dict({"dataset": "dblp-ne"}) 7 | data = build_dataset(args) 8 | assert data.data.num_nodes == 51264 9 | assert data.data.num_edges == 255936 10 | assert data.num_classes == 60 11 | 12 | 13 | if __name__ == "__main__": 14 | test_dblp_ne() 15 | -------------------------------------------------------------------------------- /tests/datasets/test_oagbert_data.py: -------------------------------------------------------------------------------- 1 | from cogdl.data import dataset 2 | from cogdl.datasets import build_dataset 3 | from cogdl.utils import build_args_from_dict 4 | 5 | def test_oagbert_dataset(): 6 | args = build_args_from_dict({'dataset': 'aff30'}) 7 | data = build_dataset(args) 8 | assert isinstance(data.get_data(), dict) 9 | assert isinstance(data.get_candidates(), list) 10 | 11 | if __name__ == '__main__': 12 | test_oagbert_dataset() -------------------------------------------------------------------------------- /tests/datasets/test_ogb.py: -------------------------------------------------------------------------------- 1 | from cogdl.datasets import build_dataset 2 | from cogdl.utils import build_args_from_dict 3 | 4 | 5 | def test_ogbn_arxiv(): 6 | args = build_args_from_dict({"dataset": "ogbn-arxiv"}) 7 | assert args.dataset == "ogbn-arxiv" 8 | dataset = build_dataset(args) 9 | data = dataset.data 10 | assert data.num_nodes == 169343 11 | 12 | 13 | def test_ogbg_molhiv(): 14 | args = build_args_from_dict({"dataset": "ogbg-molhiv"}) 15 | assert args.dataset == "ogbg-molhiv" 16 | dataset = build_dataset(args) 17 | assert dataset.all_edges == 2259376 18 | assert dataset.all_nodes == 1049163 19 | assert len(dataset.data) == 41127 20 | 21 | def test_ogbl_ddi(): 22 | args = build_args_from_dict({"dataset": "ogbl-ddi"}) 23 | assert args.dataset == "ogbl-ddi" 24 | dataset = build_dataset(args) 25 | data = dataset.data 26 | assert data.num_nodes == 4267 27 | 28 | def test_ogbl_collab(): 29 | args = build_args_from_dict({"dataset": "ogbl-collab"}) 30 | assert args.dataset == "ogbl-collab" 31 | dataset = build_dataset(args) 32 | data = dataset.data 33 | assert data.num_nodes == 235868 34 | 35 | if __name__ == "__main__": 36 | test_ogbn_arxiv() 37 | test_ogbg_molhiv() 38 | test_ogbl_ddi() 39 | test_ogbl_collab() -------------------------------------------------------------------------------- /tests/datasets/test_planetoid.py: -------------------------------------------------------------------------------- 1 | from cogdl.datasets import build_dataset 2 | from cogdl.utils import build_args_from_dict 3 | 4 | 5 | def test_citeseer(): 6 | args = build_args_from_dict({"dataset": "citeseer"}) 7 | data = build_dataset(args) 8 | assert data.data.num_nodes == 3327 9 | assert data.num_features == 3703 10 | assert data.num_classes == 6 11 | 12 | 13 | if __name__ == "__main__": 14 | test_citeseer() 15 | -------------------------------------------------------------------------------- /tests/datasets/test_rd2cd_data.py: -------------------------------------------------------------------------------- 1 | from cogdl.datasets import build_dataset 2 | from cogdl.utils import build_args_from_dict 3 | 4 | 5 | def test_rd2cd_github(): 6 | args = build_args_from_dict({"dataset": "Github"}) 7 | assert args.dataset == "Github" 8 | dataset = build_dataset(args) 9 | data = dataset.data 10 | assert data.num_nodes == 37700 11 | assert dataset.num_features == 4005 12 | 13 | 14 | if __name__ == "__main__": 15 | test_rd2cd_github() 16 | -------------------------------------------------------------------------------- /tests/datasets/test_rec_data.py: -------------------------------------------------------------------------------- 1 | from cogdl.data import Graph 2 | from cogdl.datasets import build_dataset 3 | from cogdl.utils import build_args_from_dict 4 | 5 | 6 | def test_rec_dataset(): 7 | args = build_args_from_dict({"dataset": "yelp2018"}) 8 | data = build_dataset(args) 9 | assert isinstance(data[0], Graph) 10 | 11 | 12 | if __name__ == "__main__": 13 | test_rec_dataset() 14 | -------------------------------------------------------------------------------- /tests/datasets/test_saint_data.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import numpy as np 4 | import scipy.sparse as sp 5 | from cogdl.datasets.saint_data import SAINTDataset 6 | 7 | 8 | def build_test_data(): 9 | os.makedirs("data", exist_ok=True) 10 | os.makedirs("./data/test_saint", exist_ok=True) 11 | os.makedirs("./data/test_saint/raw", exist_ok=True) 12 | prefix = "./data/test_saint/raw" 13 | 14 | def join(x): 15 | return os.path.join(prefix, x) 16 | 17 | num_nodes = 100 18 | train_edge = np.random.randint(0, num_nodes, (2, 200)) 19 | all_edge = np.random.randint(0, num_nodes, (2, 200)) 20 | all_edge = np.concatenate([train_edge, all_edge], axis=1) 21 | adj_train = sp.csr_matrix((np.ones(200), (train_edge[0], train_edge[1])), shape=(num_nodes, num_nodes)) 22 | adj_full = sp.csr_matrix((np.ones(400), (all_edge[0], all_edge[1])), shape=(num_nodes, num_nodes)) 23 | sp.save_npz(join("adj_train.npz"), adj_train) 24 | sp.save_npz(join("adj_full.npz"), adj_full) 25 | 26 | feats = np.random.rand(100, 10) 27 | class_map = [(str(i), np.random.randint(0, 1, 10).astype(np.float).tolist()) for i in range(num_nodes)] 28 | class_map = dict(class_map) 29 | roles = {"tr": list(range(40)), "va": list(range(40, 80)), "te": list(range(80, 100))} 30 | np.save(join("feats.npy"), feats) 31 | with open(join("class_map.json"), "w") as f: 32 | json.dump(class_map, f) 33 | with open(join("role.json"), "w") as f: 34 | json.dump(roles, f) 35 | 36 | 37 | def test_saint_data(): 38 | build_test_data() 39 | dataset = "test_saint" 40 | path = os.path.join("./data/", dataset) 41 | dataset = SAINTDataset(path, dataset) 42 | assert dataset.num_features == 10 43 | assert dataset.num_classes == 10 44 | 45 | 46 | if __name__ == "__main__": 47 | test_saint_data() 48 | -------------------------------------------------------------------------------- /tests/tasks/test_encode_paper.py: -------------------------------------------------------------------------------- 1 | from cogdl.oag import oagbert 2 | 3 | 4 | def test_encode_paper(): 5 | tokenizer, model = oagbert("oagbert-v2-test") 6 | title = "BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding" 7 | abstract = "We introduce a new language representation model called BERT, which stands for Bidirectional Encoder Representations from Transformers. Unlike recent language representation..." 8 | authors = ["Jacob Devlin", "Ming-Wei Chang", "Kenton Lee", "Kristina Toutanova"] 9 | venue = "north american chapter of the association for computational linguistics" 10 | affiliations = ["Google"] 11 | concepts = ["language model", "natural language inference", "question answering"] 12 | # encode paper 13 | paper_info = model.encode_paper( 14 | title=title, 15 | abstract=abstract, 16 | venue=venue, 17 | authors=authors, 18 | concepts=concepts, 19 | affiliations=affiliations, 20 | reduction="max", 21 | ) 22 | 23 | assert len(paper_info) == 5 24 | assert paper_info["text"][0]["type"] == "TEXT" 25 | assert len(paper_info["authors"]) == 4 26 | assert len(paper_info["venue"][0]["token_ids"]) == 9 27 | assert tuple(paper_info["text"][0]["sequence_output"].shape) == (43, 768) 28 | assert len(paper_info["text"][0]["pooled_output"]) == 768 29 | 30 | 31 | if __name__ == "__main__": 32 | test_encode_paper() 33 | -------------------------------------------------------------------------------- /tests/tasks/test_triple_link_prediction.py: -------------------------------------------------------------------------------- 1 | from cogdl.options import get_default_args 2 | from cogdl.experiments import train 3 | 4 | 5 | 6 | default_dict_kg = { 7 | "epochs": 2, 8 | "batch_size": 1024, 9 | "cpu":True, 10 | "lr": 0.001, 11 | "negative_ratio": 3, 12 | "checkpoint": False, 13 | "save_dir": ".", 14 | "device_id": [0], 15 | "actnn": False, 16 | "do_test":False, 17 | "do_valid":False , 18 | "eval_step":3, 19 | } 20 | 21 | 22 | def get_default_args_kg(dataset, model, dw="triple_link_prediction_dw", mw="triple_link_prediction_mw"): 23 | args = get_default_args(dataset=dataset, model=model, dw=dw, mw=mw) 24 | for key, value in default_dict_kg.items(): 25 | args.__setattr__(key, value) 26 | return args 27 | 28 | 29 | def test_transe_fb15k(): 30 | args = get_default_args_kg(dataset="fb15k", model="transe") 31 | ret = train(args) 32 | #assert 0 <= ret["mrr"] <= 1 33 | 34 | 35 | def test_complex_fb15k(): 36 | args = get_default_args_kg(dataset="fb15k", model="complex") 37 | args.double_entity_embedding = True 38 | args.double_relation_embedding=True 39 | ret = train(args) 40 | #assert 0 <= ret["mrr"] <= 1 41 | 42 | 43 | def test_distmult_wn18(): 44 | args = get_default_args_kg(dataset="wn18", model="distmult") 45 | ret = train(args) 46 | #assert 0 <= ret["mrr"] <= 1 47 | 48 | def test_rotate_wn18(): 49 | args = get_default_args_kg(dataset="wn18", model="rotate") 50 | args.double_entity_embedding = True 51 | ret = train(args) 52 | #assert 0 <= ret["mrr"] <= 1 53 | 54 | 55 | if __name__ == "__main__": 56 | test_transe_fb15k() 57 | test_complex_fb15k() 58 | test_distmult_wn18() 59 | test_rotate_wn18() 60 | -------------------------------------------------------------------------------- /tests/test_layers.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from cogdl.data import Graph 3 | from cogdl.layers import BaseLayer, GINELayer 4 | 5 | 6 | def test_base_layer(): 7 | layer = BaseLayer() 8 | x = torch.eye(4) 9 | edge_index = (torch.tensor([0, 0, 0, 1, 1, 2]), torch.tensor([1, 2, 3, 2, 3, 3])) 10 | graph = Graph(x=x, edge_index=edge_index) 11 | x = layer(graph, x) 12 | assert tuple(x.shape) == (4, 4) 13 | 14 | 15 | def test_gine_layer(): 16 | layer = GINELayer() 17 | x = torch.eye(4) 18 | edge_index = (torch.tensor([0, 0, 0, 1, 1, 2]), torch.tensor([1, 2, 3, 2, 3, 3])) 19 | graph = Graph(x=x, edge_index=edge_index, edge_attr=torch.randn(6, 4)) 20 | x = layer(graph, x) 21 | assert tuple(x.shape) == (4, 4) 22 | 23 | 24 | if __name__ == "__main__": 25 | test_base_layer() 26 | test_gine_layer() 27 | -------------------------------------------------------------------------------- /tests/test_options.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from cogdl import options 3 | 4 | 5 | def test_training_options(): 6 | sys.argv = [sys.argv[0], "-m", "gcn", "-dt", "cora"] 7 | parser = options.get_training_parser() 8 | args, _ = parser.parse_known_args() 9 | args = options.parse_args_and_arch(parser, args) 10 | print(args) 11 | 12 | assert args.model[0] == "gcn" 13 | assert args.dataset[0] == "cora" 14 | 15 | 16 | def test_display_options(): 17 | sys.argv = [sys.argv[0], "-dt", "cora"] 18 | parser = options.get_display_data_parser() 19 | args = parser.parse_args() 20 | print(args) 21 | 22 | assert args.dataset[0] == "cora" 23 | assert args.depth > 0 24 | 25 | 26 | def test_download_options(): 27 | sys.argv = [sys.argv[0], "-dt", "cora"] 28 | parser = options.get_download_data_parser() 29 | args = parser.parse_args() 30 | print(args) 31 | 32 | assert args.dataset[0] == "cora" 33 | 34 | 35 | def test_get_default_args(): 36 | args = options.get_default_args(dataset=["cora", "citeseer"], model=["gcn", "gat"], hidden_size=128) 37 | 38 | assert args.model[0] == "gcn" 39 | assert args.model[1] == "gat" 40 | assert args.dataset[0] == "cora" 41 | assert args.dataset[1] == "citeseer" 42 | assert args.hidden_size == 128 43 | 44 | 45 | if __name__ == "__main__": 46 | test_training_options() 47 | test_display_options() 48 | test_download_options() 49 | test_get_default_args() 50 | -------------------------------------------------------------------------------- /tests/test_pipelines.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from cogdl import pipeline 3 | 4 | 5 | def test_dataset_stats(): 6 | stats = pipeline("dataset-stats") 7 | outputs = stats("cora") 8 | outputs = outputs[0] 9 | 10 | assert len(outputs) == 6 11 | assert tuple(outputs) == ("cora", 2708, 10556, 1433, 7, 140) 12 | 13 | 14 | def test_dataset_visual(): 15 | visual = pipeline("dataset-visual") 16 | outputs = visual("cora", seed=0, depth=3) 17 | 18 | assert len(outputs) == 72 19 | 20 | 21 | def test_oagbert(): 22 | oagbert = pipeline("oagbert", model="oagbert-test", load_weights=False) 23 | outputs = oagbert("CogDL is developed by KEG, Tsinghua.") 24 | 25 | assert len(outputs) == 2 26 | assert tuple(outputs[0].shape) == (1, 14, 32) 27 | assert tuple(outputs[1].shape) == (1, 32) 28 | 29 | 30 | def test_gen_emb(): 31 | generator = pipeline("generate-emb", model="prone") 32 | 33 | edge_index = np.array([[0, 1], [0, 2], [0, 3], [1, 2], [2, 3], [3, 4], [4, 5], [5, 6], [6, 7]]) 34 | outputs = generator(edge_index) 35 | assert tuple(outputs.shape) == (8, 8) 36 | 37 | generator = pipeline( 38 | "generate-emb", 39 | model="mvgrl", 40 | return_model=True, 41 | num_features=8, 42 | hidden_size=10, 43 | sample_size=2, 44 | epochs=2, 45 | cpu=True, 46 | ) 47 | outputs = generator(edge_index, x=np.random.randn(8, 8)) 48 | assert tuple(outputs.shape) == (8, 10) 49 | 50 | 51 | # def test_recommendation(): 52 | # data = np.array([[0, 0], [0, 1], [0, 2], [1, 1], [1, 3], [1, 4], [2, 4], [2, 5], [2, 6]]) 53 | # rec = pipeline("recommendation", model="lightgcn", data=data, epochs=2, evaluate_interval=1000, cpu=True) 54 | # ret = rec([0], topk=3) 55 | # assert len(ret[0]) == 3 56 | 57 | 58 | if __name__ == "__main__": 59 | test_dataset_stats() 60 | test_dataset_visual() 61 | test_oagbert() 62 | test_gen_emb() 63 | # test_recommendation() 64 | -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- 1 | from sklearn.metrics import f1_score 2 | import torch 3 | from cogdl.utils import build_args_from_dict 4 | from cogdl.utils import accuracy, multiclass_f1, multilabel_f1, bce_with_logits_loss, cross_entropy_loss 5 | 6 | 7 | def test_build_args_from_dict(): 8 | dic = {"arg1": "value1", "arg2": 2, "arg3": 0.3} 9 | args = build_args_from_dict(dic) 10 | 11 | assert args.arg1 == "value1" 12 | assert args.arg2 == 2 13 | assert args.arg3 == 0.3 14 | 15 | 16 | def test_evaluator(): 17 | pred = torch.randn(20, 5) 18 | target_one = torch.randint(0, 5, (20,)) 19 | target_mult = torch.randint(0, 2, (20, 5)).float() 20 | 21 | def f(x): 22 | return round(float(x), 5) 23 | 24 | _ = cross_entropy_loss(pred, target_one) 25 | _pred = torch.nn.functional.log_softmax(pred, dim=-1) 26 | acc = _pred.max(1)[1].eq(target_one).double().sum() / len(_pred) 27 | assert f(acc) == f(accuracy(_pred, target_one)) 28 | f1 = f1_score(target_one, _pred.max(1)[1], average="micro") 29 | assert f(f1) == f(multiclass_f1(_pred, target_one)) 30 | _ = bce_with_logits_loss(pred, target_mult) 31 | _pred = torch.zeros_like(pred) 32 | _pred[pred > 0] = 1 33 | f1 = f1_score(target_mult, _pred, average="micro") 34 | assert f(f1) == f(multilabel_f1(pred, target_mult)) 35 | 36 | 37 | if __name__ == "__main__": 38 | test_build_args_from_dict() 39 | -------------------------------------------------------------------------------- /third_party/README.md: -------------------------------------------------------------------------------- 1 | # Third-party libraries 2 | 3 | [dgNN](https://github.com/dgSPARSE/dgNN) is currently used for fast GAT training with much less GPU memory. 4 | 5 | [ActNN](https://github.com/ucbrise/actnn) can reduce the training memory footprint by compressing the saved activations. 6 | 7 | [FastMoE](https://github.com/laekov/fastmoe) can be used for GNN models wtih the Mixture of Experts (MoE). 8 | 9 | ## Installation 10 | 11 | For dgNN, 12 | ```bash 13 | cd dgNN 14 | python setup.py install 15 | ``` 16 | 17 | For ActNN, 18 | ```bash 19 | cd actnn/actnn 20 | pip install -v -e . 21 | ``` 22 | 23 | For FastMoE, 24 | ```bash 25 | cd fastmoe 26 | python setup.py install 27 | ``` 28 | --------------------------------------------------------------------------------