├── .gitignore ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── CONTRIBUTORS.md ├── Jenkinsfile ├── LICENSE ├── NOTICE ├── README.md ├── conda └── dgllife │ ├── README.md │ ├── build.sh │ ├── conda_build_config.yaml │ └── meta.yaml ├── docker ├── Dockerfile.ci_cpu ├── Dockerfile.ci_gpu ├── README.md └── install │ ├── conda_env │ ├── torch_cpu.yml │ └── torch_gpu.yml │ ├── ubuntu_install_build.sh │ ├── ubuntu_install_conda.sh │ └── ubuntu_install_core.sh ├── docs ├── Makefile ├── README.md ├── clean.sh └── source │ ├── api │ ├── data.rst │ ├── model.gnn.rst │ ├── model.pretrain.rst │ ├── model.readout.rst │ ├── model.zoo.rst │ ├── utils.complexes.rst │ ├── utils.mols.rst │ ├── utils.pipeline.rst │ └── utils.splitters.rst │ ├── cli.rst │ ├── conf.py │ ├── index.rst │ └── install │ └── index.rst ├── examples ├── README.md ├── binding_affinity_prediction │ ├── README.md │ ├── configure.py │ ├── main.py │ └── utils.py ├── generative_models │ ├── dgmg │ │ ├── README.md │ │ ├── eval.py │ │ ├── sascorer.py │ │ ├── train.py │ │ └── utils.py │ └── jtvae │ │ ├── README.md │ │ ├── pretrain.py │ │ ├── reconstruct.py │ │ ├── utils.py │ │ └── vaetrain.py ├── link_prediction │ ├── README.md │ └── ogbl-ppa │ │ ├── README.md │ │ ├── full_graph_link_predictor.py │ │ └── logger.py ├── molecule_embeddings │ ├── README.md │ └── main.py ├── property_prediction │ ├── MTL │ │ ├── README.md │ │ ├── configure.py │ │ ├── main.py │ │ ├── model │ │ │ ├── __init__.py │ │ │ ├── attentivefp.py │ │ │ ├── blocks.py │ │ │ ├── gat.py │ │ │ ├── gcn.py │ │ │ ├── mpnn.py │ │ │ └── regressor.py │ │ ├── run.py │ │ └── utils.py │ ├── README.md │ ├── alchemy │ │ ├── README.md │ │ ├── configure.py │ │ ├── main.py │ │ └── utils.py │ ├── csv_data_configuration │ │ ├── README.md │ │ ├── analysis.py │ │ ├── classification_inference.py │ │ ├── classification_train.py │ │ ├── hyper.py │ │ ├── model_configures │ │ │ ├── AttentiveFP.json │ │ │ ├── GAT.json │ │ │ ├── GCN.json │ │ │ ├── MPNN.json │ │ │ ├── NF.json │ │ │ ├── README.md │ │ │ ├── Weave.json │ │ │ ├── gin_supervised_contextpred.json │ │ │ ├── gin_supervised_edgepred.json │ │ │ ├── gin_supervised_infomax.json │ │ │ └── gin_supervised_masking.json │ │ ├── regression_inference.py │ │ ├── regression_train.py │ │ └── utils.py │ ├── moleculenet │ │ ├── README.md │ │ ├── classification.py │ │ ├── configures │ │ │ ├── BACE │ │ │ │ ├── AttentiveFP_attentivefp.json │ │ │ │ ├── AttentiveFP_canonical.json │ │ │ │ ├── GAT_attentivefp.json │ │ │ │ ├── GAT_canonical.json │ │ │ │ ├── GCN_attentivefp.json │ │ │ │ ├── GCN_canonical.json │ │ │ │ ├── MPNN_attentivefp.json │ │ │ │ ├── MPNN_canonical.json │ │ │ │ ├── NF_canonical.json │ │ │ │ ├── Weave_attentivefp.json │ │ │ │ ├── Weave_canonical.json │ │ │ │ ├── gin_supervised_contextpred.json │ │ │ │ ├── gin_supervised_edgepred.json │ │ │ │ ├── gin_supervised_infomax.json │ │ │ │ └── gin_supervised_masking.json │ │ │ ├── BBBP │ │ │ │ ├── AttentiveFP_attentivefp.json │ │ │ │ ├── AttentiveFP_canonical.json │ │ │ │ ├── GAT_attentivefp.json │ │ │ │ ├── GAT_canonical.json │ │ │ │ ├── GCN_attentivefp.json │ │ │ │ ├── GCN_canonical.json │ │ │ │ ├── MPNN_attentivefp.json │ │ │ │ ├── MPNN_canonical.json │ │ │ │ ├── NF_canonical.json │ │ │ │ ├── Weave_attentivefp.json │ │ │ │ ├── Weave_canonical.json │ │ │ │ ├── gin_supervised_contextpred.json │ │ │ │ ├── gin_supervised_edgepred.json │ │ │ │ ├── gin_supervised_infomax.json │ │ │ │ └── gin_supervised_masking.json │ │ │ ├── ClinTox │ │ │ │ ├── AttentiveFP_attentivefp.json │ │ │ │ ├── AttentiveFP_canonical.json │ │ │ │ ├── GAT_attentivefp.json │ │ │ │ ├── GAT_canonical.json │ │ │ │ ├── GCN_attentivefp.json │ │ │ │ ├── GCN_canonical.json │ │ │ │ ├── MPNN_attentivefp.json │ │ │ │ ├── MPNN_canonical.json │ │ │ │ ├── Weave_attentivefp.json │ │ │ │ └── Weave_canonical.json │ │ │ ├── ESOL │ │ │ │ ├── AttentiveFP_attentivefp.json │ │ │ │ ├── AttentiveFP_canonical.json │ │ │ │ ├── GAT_attentivefp.json │ │ │ │ ├── GAT_canonical.json │ │ │ │ ├── GCN_attentivefp.json │ │ │ │ ├── GCN_canonical.json │ │ │ │ ├── MPNN_attentivefp.json │ │ │ │ ├── MPNN_canonical.json │ │ │ │ ├── Weave_attentivefp.json │ │ │ │ ├── Weave_canonical.json │ │ │ │ ├── gin_supervised_contextpred.json │ │ │ │ ├── gin_supervised_edgepred.json │ │ │ │ ├── gin_supervised_infomax.json │ │ │ │ └── gin_supervised_masking.json │ │ │ ├── FreeSolv │ │ │ │ ├── AttentiveFP_attentivefp.json │ │ │ │ ├── AttentiveFP_canonical.json │ │ │ │ ├── GAT_attentivefp.json │ │ │ │ ├── GAT_canonical.json │ │ │ │ ├── GCN_attentivefp.json │ │ │ │ ├── GCN_canonical.json │ │ │ │ ├── MPNN_attentivefp.json │ │ │ │ ├── MPNN_canonical.json │ │ │ │ ├── Weave_attentivefp.json │ │ │ │ ├── Weave_canonical.json │ │ │ │ ├── gin_supervised_contextpred.json │ │ │ │ ├── gin_supervised_edgepred.json │ │ │ │ ├── gin_supervised_infomax.json │ │ │ │ └── gin_supervised_masking.json │ │ │ ├── HIV │ │ │ │ ├── AttentiveFP_attentivefp.json │ │ │ │ ├── AttentiveFP_canonical.json │ │ │ │ ├── GAT_attentivefp.json │ │ │ │ ├── GAT_canonical.json │ │ │ │ ├── GCN_attentivefp.json │ │ │ │ ├── GCN_canonical.json │ │ │ │ ├── MPNN_attentivefp.json │ │ │ │ ├── MPNN_canonical.json │ │ │ │ ├── NF_canonical.json │ │ │ │ ├── Weave_attentivefp.json │ │ │ │ ├── Weave_canonical.json │ │ │ │ ├── gin_supervised_contextpred.json │ │ │ │ ├── gin_supervised_edgepred.json │ │ │ │ ├── gin_supervised_infomax.json │ │ │ │ └── gin_supervised_masking.json │ │ │ ├── Lipophilicity │ │ │ │ ├── AttentiveFP_attentivefp.json │ │ │ │ ├── AttentiveFP_canonical.json │ │ │ │ ├── GAT_attentivefp.json │ │ │ │ ├── GAT_canonical.json │ │ │ │ ├── GCN_attentivefp.json │ │ │ │ ├── GCN_canonical.json │ │ │ │ ├── MPNN_attentivefp.json │ │ │ │ ├── MPNN_canonical.json │ │ │ │ ├── Weave_attentivefp.json │ │ │ │ ├── Weave_canonical.json │ │ │ │ ├── gin_supervised_contextpred.json │ │ │ │ ├── gin_supervised_edgepred.json │ │ │ │ ├── gin_supervised_infomax.json │ │ │ │ └── gin_supervised_masking.json │ │ │ ├── MUV │ │ │ │ ├── AttentiveFP_attentivefp.json │ │ │ │ ├── AttentiveFP_canonical.json │ │ │ │ ├── GAT_attentivefp.json │ │ │ │ ├── GAT_canonical.json │ │ │ │ ├── GCN_attentivefp.json │ │ │ │ ├── GCN_canonical.json │ │ │ │ ├── MPNN_attentivefp.json │ │ │ │ ├── MPNN_canonical.json │ │ │ │ ├── Weave_attentivefp.json │ │ │ │ ├── Weave_canonical.json │ │ │ │ ├── gin_supervised_contextpred.json │ │ │ │ ├── gin_supervised_edgepred.json │ │ │ │ ├── gin_supervised_infomax.json │ │ │ │ └── gin_supervised_masking.json │ │ │ ├── PCBA │ │ │ │ ├── AttentiveFP_attentivefp.json │ │ │ │ ├── AttentiveFP_canonical.json │ │ │ │ ├── GAT_attentivefp.json │ │ │ │ ├── GAT_canonical.json │ │ │ │ ├── GCN_attentivefp.json │ │ │ │ ├── GCN_canonical.json │ │ │ │ ├── MPNN_attentivefp.json │ │ │ │ ├── MPNN_canonical.json │ │ │ │ ├── Weave_attentivefp.json │ │ │ │ ├── Weave_canonical.json │ │ │ │ ├── gin_supervised_contextpred.json │ │ │ │ ├── gin_supervised_edgepred.json │ │ │ │ ├── gin_supervised_infomax.json │ │ │ │ └── gin_supervised_masking.json │ │ │ ├── SIDER │ │ │ │ ├── AttentiveFP_attentivefp.json │ │ │ │ ├── AttentiveFP_canonical.json │ │ │ │ ├── GAT_attentivefp.json │ │ │ │ ├── GAT_canonical.json │ │ │ │ ├── GCN_attentivefp.json │ │ │ │ ├── GCN_canonical.json │ │ │ │ ├── MPNN_attentivefp.json │ │ │ │ ├── MPNN_canonical.json │ │ │ │ ├── NF_canonical.json │ │ │ │ ├── Weave_attentivefp.json │ │ │ │ ├── Weave_canonical.json │ │ │ │ ├── gin_supervised_contextpred.json │ │ │ │ ├── gin_supervised_edgepred.json │ │ │ │ ├── gin_supervised_infomax.json │ │ │ │ └── gin_supervised_masking.json │ │ │ ├── Tox21 │ │ │ │ ├── AttentiveFP_attentivefp.json │ │ │ │ ├── AttentiveFP_canonical.json │ │ │ │ ├── GAT_attentivefp.json │ │ │ │ ├── GAT_canonical.json │ │ │ │ ├── GCN_attentivefp.json │ │ │ │ ├── GCN_canonical.json │ │ │ │ ├── MPNN_attentivefp.json │ │ │ │ ├── MPNN_canonical.json │ │ │ │ ├── NF_canonical.json │ │ │ │ ├── Weave_attentivefp.json │ │ │ │ ├── Weave_canonical.json │ │ │ │ ├── gin_supervised_contextpred.json │ │ │ │ ├── gin_supervised_edgepred.json │ │ │ │ ├── gin_supervised_infomax.json │ │ │ │ └── gin_supervised_masking.json │ │ │ └── ToxCast │ │ │ │ ├── AttentiveFP_attentivefp.json │ │ │ │ ├── AttentiveFP_canonical.json │ │ │ │ ├── GAT_attentivefp.json │ │ │ │ ├── GAT_canonical.json │ │ │ │ ├── GCN_attentivefp.json │ │ │ │ ├── GCN_canonical.json │ │ │ │ ├── MPNN_attentivefp.json │ │ │ │ ├── MPNN_canonical.json │ │ │ │ ├── NF_canonical.json │ │ │ │ ├── Weave_attentivefp.json │ │ │ │ ├── Weave_canonical.json │ │ │ │ ├── gin_supervised_contextpred.json │ │ │ │ ├── gin_supervised_edgepred.json │ │ │ │ ├── gin_supervised_infomax.json │ │ │ │ └── gin_supervised_masking.json │ │ ├── regression.py │ │ └── utils.py │ ├── ogbg_ppa │ │ ├── README.md │ │ └── main.py │ ├── pretrain_gnns │ │ └── chem │ │ │ ├── README.md │ │ │ ├── classification.py │ │ │ ├── pretrain_masking.py │ │ │ ├── pretrain_supervised.py │ │ │ └── utils.py │ └── pubchem_aromaticity │ │ ├── README.md │ │ ├── configure.py │ │ ├── main.py │ │ └── utils.py └── reaction_prediction │ └── rexgen_direct │ ├── README.md │ ├── candidate_ranking_eval.py │ ├── candidate_ranking_train.py │ ├── clean.sh │ ├── configure.py │ ├── find_reaction_center_eval.py │ ├── find_reaction_center_train.py │ └── utils.py ├── python ├── __init__.py ├── dgllife │ ├── __init__.py │ ├── data │ │ ├── __init__.py │ │ ├── alchemy.py │ │ ├── astrazeneca_chembl_solubility.py │ │ ├── bace.py │ │ ├── bbbp.py │ │ ├── clintox.py │ │ ├── csv_dataset.py │ │ ├── esol.py │ │ ├── freesolv.py │ │ ├── hiv.py │ │ ├── jtvae.py │ │ ├── lipophilicity.py │ │ ├── muv.py │ │ ├── pcba.py │ │ ├── pdbbind.py │ │ ├── pubchem_aromaticity.py │ │ ├── sider.py │ │ ├── smiles_inference.py │ │ ├── tox21.py │ │ ├── toxcast.py │ │ └── uspto.py │ ├── libinfo.py │ ├── model │ │ ├── __init__.py │ │ ├── gnn │ │ │ ├── __init__.py │ │ │ ├── attentivefp.py │ │ │ ├── gat.py │ │ │ ├── gatv2.py │ │ │ ├── gcn.py │ │ │ ├── gin.py │ │ │ ├── gnn_ogb.py │ │ │ ├── graphsage.py │ │ │ ├── mgcn.py │ │ │ ├── mpnn.py │ │ │ ├── nf.py │ │ │ ├── pagtn.py │ │ │ ├── schnet.py │ │ │ ├── weave.py │ │ │ └── wln.py │ │ ├── model_zoo │ │ │ ├── __init__.py │ │ │ ├── acnn.py │ │ │ ├── attentivefp_predictor.py │ │ │ ├── dgmg.py │ │ │ ├── gat_predictor.py │ │ │ ├── gatv2_predictor.py │ │ │ ├── gcn_predictor.py │ │ │ ├── gin_predictor.py │ │ │ ├── gnn_ogb_predictor.py │ │ │ ├── hadamard_link_predictor.py │ │ │ ├── jtvae.py │ │ │ ├── mgcn_predictor.py │ │ │ ├── mlp_predictor.py │ │ │ ├── mpnn_predictor.py │ │ │ ├── nf_predictor.py │ │ │ ├── pagtn_predictor.py │ │ │ ├── potentialnet.py │ │ │ ├── schnet_predictor.py │ │ │ ├── weave_predictor.py │ │ │ ├── wln_reaction_center.py │ │ │ └── wln_reaction_ranking.py │ │ ├── pretrain │ │ │ ├── __init__.py │ │ │ ├── generative_models.py │ │ │ ├── moleculenet │ │ │ │ ├── __init__.py │ │ │ │ ├── bace.py │ │ │ │ ├── bbbp.py │ │ │ │ ├── clintox.py │ │ │ │ ├── esol.py │ │ │ │ ├── freesolv.py │ │ │ │ ├── hiv.py │ │ │ │ ├── lipophilicity.py │ │ │ │ ├── muv.py │ │ │ │ ├── pcba.py │ │ │ │ ├── sider.py │ │ │ │ ├── tox21.py │ │ │ │ └── toxcast.py │ │ │ ├── property_prediction.py │ │ │ └── reaction.py │ │ └── readout │ │ │ ├── __init__.py │ │ │ ├── attentivefp_readout.py │ │ │ ├── mlp_readout.py │ │ │ ├── sum_and_max.py │ │ │ ├── weave_readout.py │ │ │ └── weighted_sum_and_max.py │ └── utils │ │ ├── __init__.py │ │ ├── analysis.py │ │ ├── complex_to_graph.py │ │ ├── early_stop.py │ │ ├── eval.py │ │ ├── featurizers.py │ │ ├── io.py │ │ ├── jtvae │ │ ├── __init__.py │ │ ├── chemutils.py │ │ ├── mol_tree.py │ │ └── vocab.py │ │ ├── mol_to_graph.py │ │ └── splitters.py └── setup.py ├── readthedocs.yml └── tests ├── data ├── test_datasets.py └── test_new_dataset.py ├── lint └── pylintrc ├── model ├── test_binding_affinity.py ├── test_generative_models.py ├── test_gnn.py ├── test_link_prediction.py ├── test_pretrain.py ├── test_property_prediction.py ├── test_reaction_prediction.py └── test_readout.py ├── scripts ├── build.sh ├── task_lint.sh └── task_unit_test.sh └── utils ├── test_analysis.py ├── test_complex_to_graph.py ├── test_early_stop.py ├── test_eval.py ├── test_featurizers.py ├── test_io_utils.py ├── test_jtvae.py ├── test_mol_to_graph.py └── test_splitters.py /.gitignore: -------------------------------------------------------------------------------- 1 | # IDE 2 | .idea 3 | .vscode 4 | 5 | # Byte-compiled 6 | __pycache__/ 7 | 8 | # Distribution / packaging 9 | dgllife.egg-info/ 10 | 11 | # Docs 12 | docs/build 13 | docs/source/tutorials 14 | docs/source/generated 15 | 16 | # .DS_Store 17 | # examples/.DS_Store 18 | # python/.DS_Store 19 | # python/dgllife/.DS_Store 20 | *.DS_Store 21 | .vscode/ 22 | 23 | # Installation from source 24 | python/build/ 25 | python/dist/ 26 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | ## Code of Conduct 2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 4 | opensource-codeofconduct@amazon.com with any additional questions or comments. 5 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guidelines 2 | 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional 4 | documentation, we greatly value feedback and contributions from our community. 5 | 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary 7 | information to effectively respond to your bug report or contribution. 8 | 9 | 10 | ## Reporting Bugs/Feature Requests 11 | 12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features. 13 | 14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already 15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful: 16 | 17 | * A reproducible test case or series of steps 18 | * The version of our code being used 19 | * Any modifications you've made relevant to the bug 20 | * Anything unusual about your environment or deployment 21 | 22 | 23 | ## Contributing via Pull Requests 24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that: 25 | 26 | 1. You are working against the latest source on the *master* branch. 27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already. 28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted. 29 | 30 | To send us a pull request, please: 31 | 32 | 1. Fork the repository. 33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change. 34 | 3. Ensure local tests pass. 35 | 4. Commit to your fork using clear commit messages. 36 | 5. Send us a pull request, answering any default questions in the pull request interface. 37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation. 38 | 39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and 40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/). 41 | 42 | 43 | ## Finding contributions to work on 44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start. 45 | 46 | 47 | ## Code of Conduct 48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 50 | opensource-codeofconduct@amazon.com with any additional questions or comments. 51 | 52 | 53 | ## Security issue notifications 54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue. 55 | 56 | 57 | ## Licensing 58 | 59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution. 60 | 61 | We may ask you to sign a [Contributor License Agreement (CLA)](http://en.wikipedia.org/wiki/Contributor_License_Agreement) for larger changes. 62 | -------------------------------------------------------------------------------- /CONTRIBUTORS.md: -------------------------------------------------------------------------------- 1 | # Contributing to DGL-LifeSci 2 | 3 | Contribution is always welcome. All contributions must go through pull requests 4 | and code review. 5 | 6 | Below is a list of community contributors for this project. 7 | 8 | Contributors 9 | ------------ 10 | * [Chengqiang Lu](https://github.com/geekinglcq): Alchemy dataset; MPNN, MGCN and SchNet 11 | * [Jiajing Hu](https://github.com/jjhu94): Weave 12 | * [Zhaoqiang Chen](https://github.com/autodataming): Update RDkit channel for conda installation; Suggestions and improvement for rexgen_direct 13 | * [Yue Zhong](https://github.com/YueZhong-bio): HadamardLinkPredictor, link prediction for ogbl-ppa 14 | * [Yangkang Zhang](https://github.com/yangkang98): GNNOGBPredictor, graph property prediction for ogbg-ppa, support for GuacaMol in JTVAE 15 | * [Sooheon Kim](https://github.com/sooheon): Parallel processing for dataset construction from a CSV file 16 | * [Krishna Sirumalla](https://github.com/skrsna): Fix for regression_inference.py and classification_inference.py; support non-ring systems for ScaffoldSplitter 17 | * [Joshua Meyers](https://github.com/JoshuaMeyers): Fix for load_molecule 18 | * [Pavol Drotar](https://github.com/padr31): Fix for import 19 | * [Nanxuan Zhou](https://github.com/xnuohz): Fix for doc 20 | * [mar-volk](https://github.com/mar-volk): Fix for rexgen_direct 21 | * [Wenxuan Fan](https://github.com/wenx00): Strategies for Pre-training Graph Neural Networks 22 | * [Vignesh Venkataraman](https://github.com/VIGNESHinZONE): PAGTN 23 | * [Eric O. Korman](https://github.com/ekorman): Fix for ogbg_ppa 24 | * [Marcos Leal](https://github.com/marcossilva): Change default number of processes to 1 for rexgen 25 | * [Raymond Gasper](https://github.com/rgasper): GATv2 26 | * [Andrew Stolman](https://github.com/astolman): Add allow_zero_in_degree option for GAT and GCN 27 | * [In-Ho Yi](https://github.com/chajath): WeaveAtomFeaturizer performance fix -------------------------------------------------------------------------------- /Jenkinsfile: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env groovy 2 | // Adapted from github.com/dmlc/dgl/Jenkinsfile 3 | 4 | app = "dgllife" 5 | 6 | def init_git() { 7 | sh "rm -rf *" 8 | checkout scm 9 | sh "git submodule update --recursive --init" 10 | } 11 | 12 | def build_linux(dev) { 13 | init_git() 14 | sh "bash tests/scripts/build.sh ${dev}" 15 | } 16 | 17 | def unit_test_linux(backend, dev) { 18 | timeout(time: 10, unit: 'MINUTES') { 19 | sh "bash tests/scripts/task_unit_test.sh ${backend} ${dev}" 20 | } 21 | } 22 | 23 | pipeline { 24 | agent any 25 | stages { 26 | stage("Lint Check") { 27 | agent { 28 | docker { 29 | label "linux-c52x-node" 30 | image "dgllib/dgl-ci-lint" 31 | } 32 | } 33 | steps { 34 | init_git() 35 | sh "bash tests/scripts/task_lint.sh" 36 | } 37 | post { 38 | always { 39 | cleanWs disableDeferredWipeout: true, deleteDirs: true 40 | } 41 | } 42 | } 43 | stage("Build") { 44 | parallel { 45 | stage("CPU Build") { 46 | agent { 47 | docker { 48 | label "linux-c52x-node" 49 | image "dgllib/${app}-ci-cpu" 50 | alwaysPull true 51 | } 52 | } 53 | steps { 54 | build_linux("cpu") 55 | } 56 | post { 57 | always { 58 | cleanWs disableDeferredWipeout: true, deleteDirs: true 59 | } 60 | } 61 | } 62 | stage("GPU Build") { 63 | agent { 64 | docker { 65 | label "linux-c52x-node" 66 | image "dgllib/${app}-ci-gpu:latest" 67 | args "-u root" 68 | alwaysPull true 69 | } 70 | } 71 | steps { 72 | build_linux("gpu") 73 | } 74 | post { 75 | always { 76 | cleanWs disableDeferredWipeout: true, deleteDirs: true 77 | } 78 | } 79 | } 80 | } 81 | } 82 | stage("Test") { 83 | parallel { 84 | stage("Torch CPU") { 85 | agent { 86 | docker { 87 | label "linux-c52x-node" 88 | image "dgllib/${app}-ci-cpu:latest" 89 | } 90 | } 91 | stages { 92 | stage("Unit test") { 93 | steps { 94 | unit_test_linux("pytorch", "cpu") 95 | } 96 | } 97 | } 98 | post { 99 | always { 100 | cleanWs disableDeferredWipeout: true, deleteDirs: true 101 | } 102 | } 103 | } 104 | stage("Torch GPU") { 105 | agent { 106 | docker { 107 | label "linux-gpu-node" 108 | image "dgllib/${app}-ci-gpu:latest" 109 | args "--runtime nvidia" 110 | } 111 | } 112 | stages { 113 | stage("Unit test") { 114 | steps { 115 | sh "nvidia-smi" 116 | unit_test_linux("pytorch", "gpu") 117 | } 118 | } 119 | } 120 | post { 121 | always { 122 | cleanWs disableDeferredWipeout: true, deleteDirs: true 123 | } 124 | } 125 | } 126 | } 127 | } 128 | } 129 | } 130 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | -------------------------------------------------------------------------------- /conda/dgllife/README.md: -------------------------------------------------------------------------------- 1 | # Conda Recipe 2 | 3 | Build the package with `conda build .` -------------------------------------------------------------------------------- /conda/dgllife/build.sh: -------------------------------------------------------------------------------- 1 | $PYTHON setup.py install --single-version-externally-managed --record=record.txt # Python command to install the script. -------------------------------------------------------------------------------- /conda/dgllife/conda_build_config.yaml: -------------------------------------------------------------------------------- 1 | python: 2 | - 3.6 3 | - 3.7 -------------------------------------------------------------------------------- /conda/dgllife/meta.yaml: -------------------------------------------------------------------------------- 1 | package: 2 | name: dgllife{{ environ.get('APP_PACKAGE_SUFFIX', '') }} 3 | version: "0.2.7" 4 | 5 | source: 6 | url: https://files.pythonhosted.org/packages/04/f1/08bed2f6e9f869d8cd4a101978c5ec202769554d8baf44e1c39f80a40cf5/dgllife-0.2.7.tar.gz 7 | sha256: acd23be4c3810c2dbfd4ec2bade58d552f6b81452a89e1ffe35a34e846327bd2 8 | 9 | channels: 10 | - defaults 11 | - conda-forge 12 | 13 | requirements: 14 | build: 15 | - python {{ python }} 16 | - setuptools 17 | - cmake 18 | - git 19 | - cython 20 | run: 21 | - python 22 | - requests 23 | - scikit-learn 24 | - pandas 25 | - tqdm 26 | - numpy 27 | - scipy 28 | - networkx 29 | 30 | about: 31 | license: Apache -------------------------------------------------------------------------------- /docker/Dockerfile.ci_cpu: -------------------------------------------------------------------------------- 1 | # CI docker CPU env 2 | # Adapted from github.com/dmlc/tvm/docker/Dockerfile.ci_cpu 3 | FROM ubuntu:16.04 4 | 5 | RUN apt-get update --fix-missing 6 | 7 | COPY install/ubuntu_install_core.sh /install/ubuntu_install_core.sh 8 | RUN bash /install/ubuntu_install_core.sh 9 | 10 | COPY install/ubuntu_install_build.sh /install/ubuntu_install_build.sh 11 | RUN bash /install/ubuntu_install_build.sh 12 | 13 | # python 14 | COPY install/ubuntu_install_conda.sh /install/ubuntu_install_conda.sh 15 | RUN bash /install/ubuntu_install_conda.sh 16 | 17 | ENV CONDA_ALWAYS_YES="true" 18 | 19 | COPY install/conda_env/torch_cpu.yml /install/conda_env/torch_cpu.yml 20 | RUN ["/bin/bash", "-i", "-c", "conda env create -f /install/conda_env/torch_cpu.yml"] 21 | 22 | ENV CONDA_ALWAYS_YES= -------------------------------------------------------------------------------- /docker/Dockerfile.ci_gpu: -------------------------------------------------------------------------------- 1 | # CI docker GPU env 2 | FROM nvidia/cuda:10.1-cudnn7-devel-ubuntu16.04 3 | 4 | RUN apt-get update --fix-missing 5 | 6 | COPY install/ubuntu_install_core.sh /install/ubuntu_install_core.sh 7 | RUN bash /install/ubuntu_install_core.sh 8 | 9 | COPY install/ubuntu_install_build.sh /install/ubuntu_install_build.sh 10 | RUN bash /install/ubuntu_install_build.sh 11 | 12 | # python 13 | COPY install/ubuntu_install_conda.sh /install/ubuntu_install_conda.sh 14 | RUN bash /install/ubuntu_install_conda.sh 15 | 16 | ENV CONDA_ALWAYS_YES="true" 17 | 18 | COPY install/conda_env/torch_gpu.yml /install/conda_env/torch_gpu.yml 19 | RUN ["/bin/bash", "-i", "-c", "conda env create -f /install/conda_env/torch_gpu.yml"] 20 | 21 | ENV CONDA_ALWAYS_YES= 22 | 23 | # Environment variables 24 | ENV PATH=/usr/local/nvidia/bin:${PATH} 25 | ENV PATH=/usr/local/cuda/bin:${PATH} 26 | ENV CPLUS_INCLUDE_PATH=/usr/local/cuda/include:${CPLUS_INCLUDE_PATH} 27 | ENV C_INCLUDE_PATH=/usr/local/cuda/include:${C_INCLUDE_PATH} 28 | ENV LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/nvidia/lib64:${LIBRARY_PATH} 29 | ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/nvidia/lib64:${LD_LIBRARY_PATH} 30 | ENV CUDA_VISIBLE_DEVICES=0 31 | ENV TF_FORCE_GPU_ALLOW_GROWTH=true -------------------------------------------------------------------------------- /docker/README.md: -------------------------------------------------------------------------------- 1 | # Build Docker Image for CI 2 | 3 | Docker images are used by the CI and release script. Make sure to install necessary requirements in it. 4 | 5 | ## To build 6 | 7 | ```bash 8 | docker build -t dgllib/dgllife-ci-cpu:latest -f Dockerfile.ci_cpu . 9 | ``` 10 | 11 | ```bash 12 | docker build -t dgllib/dgllife-ci-gpu:latest -f Dockerfile.ci_gpu . 13 | ``` 14 | 15 | ## To push 16 | 17 | ```bash 18 | docker push dgllib/dgllife-ci-cpu:latest 19 | ``` 20 | 21 | ```bash 22 | docker push dgllib/dgllife-ci-gpu:latest 23 | ``` 24 | -------------------------------------------------------------------------------- /docker/install/conda_env/torch_cpu.yml: -------------------------------------------------------------------------------- 1 | name: pytorch-ci 2 | channels: 3 | - defaults 4 | - rdkit 5 | dependencies: 6 | - python=3.6.9 7 | - pip 8 | - pip: 9 | - torch 10 | - torchvision 11 | - pytest 12 | - nose 13 | - numpy 14 | - cython 15 | - scipy 16 | - networkx 17 | - matplotlib 18 | - nltk 19 | - requests[security] 20 | - tqdm 21 | - scikit-learn 22 | - pandas 23 | - rdkit==2018.09.3 24 | -------------------------------------------------------------------------------- /docker/install/conda_env/torch_gpu.yml: -------------------------------------------------------------------------------- 1 | name: pytorch-ci 2 | channels: 3 | - defaults 4 | - rdkit 5 | dependencies: 6 | - python=3.6.9 7 | - pip 8 | - pip: 9 | - torch 10 | - torchvision 11 | - pytest 12 | - nose 13 | - numpy 14 | - cython 15 | - scipy 16 | - networkx 17 | - matplotlib 18 | - nltk 19 | - requests[security] 20 | - tqdm 21 | - scikit-learn 22 | - pandas 23 | - rdkit==2018.09.3 24 | -------------------------------------------------------------------------------- /docker/install/ubuntu_install_build.sh: -------------------------------------------------------------------------------- 1 | # install cmake 3.15, cmake>=3.12 is required for CUDA 10.1 2 | version=3.15 3 | build=5 4 | mkdir ~/temp 5 | cd ~/temp 6 | wget https://cmake.org/files/v$version/cmake-$version.$build-Linux-x86_64.sh 7 | sudo mkdir /opt/cmake 8 | sudo sh cmake-$version.$build-Linux-x86_64.sh --prefix=/opt/cmake --skip-license 9 | sudo ln -s /opt/cmake/bin/cmake /usr/local/bin/cmake 10 | cd ~ 11 | rm -rf ~/temp -------------------------------------------------------------------------------- /docker/install/ubuntu_install_conda.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | export LANG=C.UTF-8 LC_ALL=C.UTF-8 3 | export PATH=/opt/conda/bin:$PATH 4 | 5 | apt-get update --fix-missing && \ 6 | apt-get install -y wget bzip2 ca-certificates curl git && \ 7 | apt-get clean && \ 8 | rm -rf /var/lib/apt/lists/* 9 | 10 | wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-4.5.11-Linux-x86_64.sh -O ~/miniconda.sh && \ 11 | /bin/bash ~/miniconda.sh -b -p /opt/conda && \ 12 | rm ~/miniconda.sh && \ 13 | /opt/conda/bin/conda clean -tipsy && \ 14 | ln -s /opt/conda/etc/profile.d/conda.sh /etc/profile.d/conda.sh && \ 15 | echo ". /opt/conda/etc/profile.d/conda.sh" >> ~/.bashrc && \ 16 | echo "conda activate base" >> ~/.bashrc 17 | 18 | export TINI_VERSION=v0.16.1 19 | source ~/.bashrc -------------------------------------------------------------------------------- /docker/install/ubuntu_install_core.sh: -------------------------------------------------------------------------------- 1 | # install libraries for building c++ core on ubuntu 2 | apt update && apt install -y --no-install-recommends --force-yes \ 3 | apt-utils git build-essential make wget unzip sudo \ 4 | libz-dev libxml2-dev libopenblas-dev libopencv-dev \ 5 | graphviz graphviz-dev libgraphviz-dev ca-certificates -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SOURCEDIR = source 8 | BUILDDIR = build 9 | 10 | # Put it first so that "make" without argument is like "make help". 11 | help: 12 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 13 | 14 | .PHONY: help Makefile 15 | 16 | pytorch: 17 | @echo "##################################################################" 18 | @echo "# #" 19 | @echo "# Step 1: Building PyTorch tutorials #" 20 | @echo "# #" 21 | @echo "##################################################################" 22 | @DGLBACKEND=pytorch $(SPHINXBUILD) -M html "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 23 | 24 | html-noexec: 25 | $(SPHINXBUILD) -D plot_gallery=0 -b html "$(SOURCEDIR)" "$(BUILDDIR)/html" 26 | @echo 27 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 28 | 29 | html: Makefile pytorch 30 | 31 | # Catch-all target: route all unknown targets to Sphinx using the new 32 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 33 | %: Makefile 34 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 35 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | Documentation and Tutorials 2 | === 3 | 4 | Requirements 5 | ------------ 6 | * sphinx 7 | * sphinx-gallery 8 | * sphinx_rtd_theme 9 | 10 | Build documents 11 | --------------- 12 | First, clean up existing files: 13 | ``` 14 | ./clean.sh 15 | ``` 16 | 17 | Then build: 18 | ``` 19 | make html 20 | ``` 21 | 22 | Render locally 23 | -------------- 24 | ``` 25 | cd build/html 26 | python3 -m http.server 8000 27 | ``` 28 | -------------------------------------------------------------------------------- /docs/clean.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | make clean 4 | rm -rf build 5 | rm -rf source/tutorials 6 | rm -rf source/generated 7 | -------------------------------------------------------------------------------- /docs/source/api/model.gnn.rst: -------------------------------------------------------------------------------- 1 | .. _apimodelgnn: 2 | 3 | Graph Neural Networks for Updating Node/Edge Representations 4 | ============================================================ 5 | 6 | All models based on graph neural networks start with updating node/edge representations. 7 | We introduce various GNN models implemented in DGL-LifeSci for representation update. 8 | 9 | .. contents:: Contents 10 | :local: 11 | 12 | AttentiveFP 13 | ----------- 14 | .. automodule:: dgllife.model.gnn.attentivefp 15 | :members: 16 | 17 | GAT 18 | --- 19 | .. automodule:: dgllife.model.gnn.gat 20 | :members: 21 | 22 | GATv2 23 | --- 24 | .. automodule:: dgllife.model.gnn.gatv2 25 | :members: 26 | 27 | GCN 28 | --- 29 | .. automodule:: dgllife.model.gnn.gcn 30 | :members: 31 | 32 | MGCN 33 | ---- 34 | .. automodule:: dgllife.model.gnn.mgcn 35 | :members: 36 | 37 | MPNN 38 | ---- 39 | .. automodule:: dgllife.model.gnn.mpnn 40 | :members: 41 | 42 | SchNet 43 | ------ 44 | .. automodule:: dgllife.model.gnn.schnet 45 | :members: 46 | 47 | Weave 48 | ----- 49 | .. automodule:: dgllife.model.gnn.weave 50 | :members: 51 | 52 | GIN 53 | --- 54 | .. automodule:: dgllife.model.gnn.gin 55 | :members: 56 | 57 | GraphSAGE 58 | --------- 59 | .. automodule:: dgllife.model.gnn.graphsage 60 | :members: 61 | 62 | WLN 63 | --- 64 | .. automodule:: dgllife.model.gnn.wln 65 | :members: 66 | 67 | GNNOGB 68 | ------ 69 | .. automodule:: dgllife.model.gnn.gnn_ogb 70 | :members: 71 | 72 | NF 73 | -- 74 | .. automodule:: dgllife.model.gnn.nf 75 | :members: 76 | 77 | PAGTN 78 | ----- 79 | .. automodule:: dgllife.model.gnn.pagtn 80 | :members: 81 | -------------------------------------------------------------------------------- /docs/source/api/model.pretrain.rst: -------------------------------------------------------------------------------- 1 | .. _apimodelpretrain: 2 | 3 | Pre-trained Models 4 | ================== 5 | 6 | We provide multiple pre-trained models for users to use without the need of training from scratch. 7 | 8 | Example Usage 9 | ------------- 10 | 11 | Property Prediction 12 | ``````````````````` 13 | 14 | .. code-block:: python 15 | 16 | from dgllife.data import Tox21 17 | from dgllife.model import load_pretrained 18 | from dgllife.utils import smiles_to_bigraph, CanonicalAtomFeaturizer 19 | 20 | dataset = Tox21(smiles_to_bigraph, CanonicalAtomFeaturizer()) 21 | model = load_pretrained('GCN_Tox21') # Pretrained model loaded 22 | model.eval() 23 | 24 | smiles, g, label, mask = dataset[0] 25 | feats = g.ndata.pop('h') 26 | label_pred = model(g, feats) 27 | print(smiles) # CCOc1ccc2nc(S(N)(=O)=O)sc2c1 28 | print(label_pred[:, mask != 0]) # Mask non-existing labels 29 | # tensor([[ 1.4190, -0.1820, 1.2974, 1.4416, 0.6914, 30 | # 2.0957, 0.5919, 0.7715, 1.7273, 0.2070]]) 31 | 32 | Generative Models 33 | 34 | .. code-block:: python 35 | 36 | from dgllife.model import load_pretrained 37 | 38 | model = load_pretrained('DGMG_ZINC_canonical') 39 | model.eval() 40 | smiles = [] 41 | for i in range(4): 42 | smiles.append(model(rdkit_mol=True)) 43 | 44 | print(smiles) 45 | # ['CC1CCC2C(CCC3C2C(NC2=CC(Cl)=CC=C2N)S3(=O)=O)O1', 46 | # 'O=C1SC2N=CN=C(NC(SC3=CC=CC=N3)C1=CC=CO)C=2C1=CCCC1', 47 | # 'CC1C=CC(=CC=1)C(=O)NN=C(C)C1=CC=CC2=CC=CC=C21', 48 | # 'CCN(CC1=CC=CC=C1F)CC1CCCN(C)C1'] 49 | 50 | If you are running the code block above in Jupyter notebook, you can also visualize the molecules generated with 51 | 52 | .. code-block:: python 53 | 54 | from IPython.display import SVG 55 | from rdkit import Chem 56 | from rdkit.Chem import Draw 57 | 58 | mols = [Chem.MolFromSmiles(s) for s in smiles] 59 | SVG(Draw.MolsToGridImage(mols, molsPerRow=4, subImgSize=(180, 150), useSVG=True)) 60 | 61 | .. image:: https://data.dgl.ai/dgllife/dgmg/dgmg_model_zoo_example2.png 62 | 63 | API 64 | --- 65 | 66 | .. autofunction:: dgllife.model.load_pretrained 67 | -------------------------------------------------------------------------------- /docs/source/api/model.readout.rst: -------------------------------------------------------------------------------- 1 | .. _apimodelreadout: 2 | 3 | Readout for Computing Graph Representations 4 | =========================================== 5 | 6 | After updating node/edge representations with graph neural networks (GNNs), a common operation is to compute 7 | graph representations out of updated node/edge representations. For example, we need to compute molecular 8 | representations out of atom/bond representations in molecular property prediction. We call the various modules 9 | for computing graph-level representations **readout** as in Neural Message Passing for Quantum Chemistry and this 10 | section lists the readout modules implemented in DGL-LifeSci. 11 | 12 | .. contents:: Contents 13 | :local: 14 | 15 | AttentiveFP Readout 16 | ------------------- 17 | .. automodule:: dgllife.model.readout.attentivefp_readout 18 | :members: 19 | 20 | MLP Readout 21 | ----------- 22 | .. automodule:: dgllife.model.readout.mlp_readout 23 | :members: 24 | 25 | Weighted Sum and Max Readout 26 | ---------------------------- 27 | .. automodule:: dgllife.model.readout.weighted_sum_and_max 28 | :members: 29 | 30 | Weave Readout 31 | ------------- 32 | .. automodule:: dgllife.model.readout.weave_readout 33 | :members: 34 | -------------------------------------------------------------------------------- /docs/source/api/model.zoo.rst: -------------------------------------------------------------------------------- 1 | .. _apimodelzoo: 2 | 3 | Model Zoo 4 | ========= 5 | 6 | This section introduces complete models for various downstream tasks. 7 | 8 | .. contents:: Contents 9 | :local: 10 | 11 | Building Blocks 12 | --------------- 13 | 14 | MLP Predictor 15 | ````````````` 16 | .. automodule:: dgllife.model.model_zoo.mlp_predictor 17 | :members: 18 | 19 | Hadamard Link Predictor 20 | ``````````````````````` 21 | .. automodule:: dgllife.model.model_zoo.hadamard_link_predictor 22 | :members: 23 | 24 | Molecular Property Prediction 25 | ----------------------------- 26 | 27 | AttentiveFP Predictor 28 | ````````````````````` 29 | .. automodule:: dgllife.model.model_zoo.attentivefp_predictor 30 | :members: 31 | 32 | GAT Predictor 33 | ````````````` 34 | .. automodule:: dgllife.model.model_zoo.gat_predictor 35 | :members: 36 | 37 | GATv2 Predictor 38 | ````````````` 39 | .. automodule:: dgllife.model.model_zoo.gatv2_predictor 40 | :members: 41 | 42 | GCN Predictor 43 | ````````````` 44 | .. automodule:: dgllife.model.model_zoo.gcn_predictor 45 | :members: 46 | 47 | MGCN Predictor 48 | `````````````` 49 | .. automodule:: dgllife.model.model_zoo.mgcn_predictor 50 | :members: 51 | 52 | MPNN Predictor 53 | `````````````` 54 | .. automodule:: dgllife.model.model_zoo.mpnn_predictor 55 | :members: 56 | 57 | SchNet Predictor 58 | ```````````````` 59 | .. automodule:: dgllife.model.model_zoo.schnet_predictor 60 | :members: 61 | 62 | Weave Predictor 63 | ``````````````` 64 | .. automodule:: dgllife.model.model_zoo.weave_predictor 65 | :members: 66 | 67 | GIN Predictor 68 | ````````````` 69 | .. automodule:: dgllife.model.model_zoo.gin_predictor 70 | :members: 71 | 72 | GNN OGB Predictor 73 | ````````````````` 74 | .. automodule:: dgllife.model.model_zoo.gnn_ogb_predictor 75 | :members: 76 | 77 | Neural Fingerprint Predictor 78 | ```````````````````````````` 79 | .. automodule:: dgllife.model.model_zoo.nf_predictor 80 | :members: 81 | 82 | Path-Augmented Graph Transformer Predictor 83 | `````````````````````````````````````````` 84 | .. automodule:: dgllife.model.model_zoo.pagtn_predictor 85 | :members: 86 | 87 | Generative Models 88 | ----------------- 89 | 90 | DGMG 91 | ```` 92 | .. automodule:: dgllife.model.model_zoo.dgmg 93 | :members: 94 | 95 | JTNNVAE 96 | ``````` 97 | .. automodule:: dgllife.model.model_zoo.jtvae 98 | :members: 99 | 100 | Reaction Prediction 101 | 102 | WLN for Reaction Center Prediction 103 | `````````````````````````````````` 104 | .. automodule:: dgllife.model.model_zoo.wln_reaction_center 105 | :members: 106 | 107 | WLN for Ranking Candidate Products 108 | `````````````````````````````````` 109 | .. automodule:: dgllife.model.model_zoo.wln_reaction_ranking 110 | :members: 111 | 112 | Protein-Ligand Binding Affinity Prediction 113 | 114 | ACNN 115 | ```` 116 | .. automodule:: dgllife.model.model_zoo.acnn 117 | :members: 118 | 119 | PotentialNet 120 | ```````````` 121 | .. automodule:: dgllife.model.model_zoo.potentialnet 122 | :members: 123 | -------------------------------------------------------------------------------- /docs/source/api/utils.complexes.rst: -------------------------------------------------------------------------------- 1 | .. _apiutilscomplexes: 2 | 3 | Utils for protein-ligand complexes 4 | ================================== 5 | 6 | Utilities in DGL-LifeSci for working with protein-ligand complexes. 7 | 8 | .. autosummary:: 9 | :toctree: ../generated/ 10 | 11 | dgllife.utils.ACNN_graph_construction_and_featurization 12 | dgllife.utils.PN_graph_construction_and_featurization 13 | -------------------------------------------------------------------------------- /docs/source/api/utils.pipeline.rst: -------------------------------------------------------------------------------- 1 | .. _apiutilspipeline: 2 | 3 | Model Development Pipeline 4 | ========================== 5 | 6 | .. contents:: Contents 7 | :local: 8 | 9 | Model Evaluation 10 | ---------------- 11 | 12 | A utility class for evaluating model performance on (multi-label) supervised learning. 13 | 14 | .. autoclass:: dgllife.utils.Meter 15 | :members: update, compute_metric 16 | 17 | Early Stopping 18 | -------------- 19 | 20 | Early stopping is a standard practice for preventing models from overfitting and we provide a utility 21 | class for handling it. 22 | 23 | .. autoclass:: dgllife.utils.EarlyStopping 24 | :members: step 25 | -------------------------------------------------------------------------------- /docs/source/api/utils.splitters.rst: -------------------------------------------------------------------------------- 1 | .. _apiutilssplitters: 2 | 3 | Splitting Datasets 4 | ================== 5 | 6 | We provide multiple splitting methods for datasets. 7 | 8 | .. contents:: Contents 9 | :local: 10 | 11 | ConsecutiveSplitter 12 | ------------------- 13 | 14 | .. autoclass:: dgllife.utils.ConsecutiveSplitter 15 | :members: train_val_test_split, k_fold_split 16 | 17 | RandomSplitter 18 | -------------- 19 | 20 | .. autoclass:: dgllife.utils.RandomSplitter 21 | :members: train_val_test_split, k_fold_split 22 | 23 | MolecularWeightSplitter 24 | ----------------------- 25 | 26 | .. autoclass:: dgllife.utils.MolecularWeightSplitter 27 | :members: train_val_test_split, k_fold_split 28 | 29 | ScaffoldSplitter 30 | ---------------- 31 | 32 | .. autoclass:: dgllife.utils.ScaffoldSplitter 33 | :members: train_val_test_split, k_fold_split 34 | 35 | SingleTaskStratifiedSplitter 36 | ---------------------------- 37 | 38 | .. autoclass:: dgllife.utils.SingleTaskStratifiedSplitter 39 | :members: train_val_test_split, k_fold_split 40 | -------------------------------------------------------------------------------- /docs/source/cli.rst: -------------------------------------------------------------------------------- 1 | .. _cli: 2 | 3 | Command Line Interface 4 | ====================== 5 | 6 | DGL-LifeSci provides command line interfaces that allow users 7 | to perform modeling without any background in programming and 8 | deep learning. In addition to installation, you will need to 9 | clone the github repo with 10 | 11 | .. code:: bash 12 | 13 | git clone https://github.com/awslabs/dgl-lifesci.git 14 | 15 | Molecular Property Prediction 16 | ----------------------------- 17 | 18 | Go to the directory below with 19 | 20 | .. code:: bash 21 | 22 | cd dgl-lifesci/examples/property_prediction/csv_data_configuration/ 23 | 24 | and then follow the README file. 25 | 26 | Reaction Prediction 27 | ------------------- 28 | 29 | Go to the directory below with 30 | 31 | .. code:: bash 32 | 33 | cd dgl-lifesci/examples/reaction_prediction/rexgen_direct/ 34 | 35 | and then follow the README file. 36 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | DGL-LifeSci: Bringing Graph Neural Networks to Chemistry and Biology 2 | =========================================================================================== 3 | 4 | DGL-LifeSci is a python package for applying graph neural networks to various tasks in chemistry 5 | and biology, on top of PyTorch, DGL, and RDKit. It covers various applications, including: 6 | 7 | * Molecular property prediction 8 | * Generative models 9 | * Reaction prediction 10 | * Protein-ligand binding affinity prediction 11 | 12 | .. toctree:: 13 | :maxdepth: 1 14 | :caption: Installation 15 | :hidden: 16 | :glob: 17 | 18 | install/index 19 | cli 20 | 21 | .. toctree:: 22 | :maxdepth: 2 23 | :caption: API Reference 24 | :hidden: 25 | :glob: 26 | 27 | api/utils.mols 28 | api/utils.splitters 29 | api/utils.pipeline 30 | api/utils.complexes 31 | api/data 32 | api/model.pretrain 33 | api/model.gnn 34 | api/model.readout 35 | api/model.zoo 36 | 37 | Free software 38 | ------------- 39 | DGL-LifeSci is free software; you can redistribute it and/or modify it under the terms 40 | of the Apache License 2.0. We welcome contributions. Join us on `GitHub `_. 41 | -------------------------------------------------------------------------------- /docs/source/install/index.rst: -------------------------------------------------------------------------------- 1 | Installation 2 | ============ 3 | 4 | System requirements 5 | ------------------- 6 | DGL-LifeSci should work on: 7 | 8 | * Ubuntu 16.04 9 | * macOS X 10 | * Windows 10 11 | 12 | DGL-LifeSci requires: 13 | 14 | * Python 3.6 or later 15 | * `DGL 0.4.3 or later `_ 16 | * `PyTorch 1.2.0 or later `_ 17 | 18 | Additionally, we require **RDKit 2018.09.3** for cheminformatics. We recommend installing it with 19 | 20 | .. code:: bash 21 | 22 | conda install -c conda-forge rdkit==2018.09.3 23 | 24 | Other verions of RDKit are not tested. 25 | 26 | Install from pip 27 | ---------------- 28 | 29 | .. code:: bash 30 | 31 | pip install dgllife 32 | 33 | .. _install-from-source: 34 | 35 | Install from source 36 | ------------------- 37 | 38 | To use the latest experimental features, 39 | 40 | .. code:: bash 41 | 42 | git clone https://github.com/awslabs/dgl-lifesci.git 43 | cd dgl-lifesci/python 44 | python setup.py install 45 | -------------------------------------------------------------------------------- /examples/generative_models/jtvae/README.md: -------------------------------------------------------------------------------- 1 | # Junction Tree Variational Autoencoder for Molecular Graph Generation 2 | 3 | - [paper](https://arxiv.org/abs/1802.04364) 4 | - [authors' code](https://github.com/wengong-jin/icml18-jtnn/tree/master/molvae) 5 | 6 | ## Training 7 | 8 | We trained VAE model in two phases: 9 | 10 | 1. We first train a model without KL regularization term. The model checkpoints will be saved as `pre_model/model.iter-x`, where `x + 1` is the number of training epochs. 11 | 2. We then train the model with KL regularization by passing the path to a saved model checkpoint. The model checkpoints will be saved as `vae_model/model.iter-x`. 12 | 13 | ```bash 14 | CUDA_VISIBLE_DEVICES=0 python pretrain.py 15 | CUDA_VISIBLE_DEVICES=0 python vaetrain.py -m pre_model/model.iter-2 16 | ``` 17 | 18 | Note that the weight of the KL regularization term generally controls a trade off between reconstruction accuracy and generation diversity. To adjust the weight, specify `-z` (default: 0.001). 19 | 20 | ## Testing 21 | 22 | For molecule reconstruction, 23 | 24 | ```bash 25 | CUDA_VISIBLE_DEVICES=0 python reconstruct.py -m Y 26 | ``` 27 | 28 | where `Y` is the path to a model checkpoint. If not specified, this will evaluate on a pre-trained model trained without KL regularization. 29 | -------------------------------------------------------------------------------- /examples/generative_models/jtvae/reconstruct.py: -------------------------------------------------------------------------------- 1 | import rdkit 2 | import torch 3 | 4 | from dgllife.data import JTVAEZINC, JTVAEDataset, JTVAECollator 5 | from dgllife.utils import JTVAEVocab 6 | from dgllife.model import JTNNVAE, load_pretrained 7 | from torch.utils.data import DataLoader 8 | 9 | def main(args): 10 | lg = rdkit.RDLogger.logger() 11 | lg.setLevel(rdkit.RDLogger.CRITICAL) 12 | 13 | if args.use_cpu or not torch.cuda.is_available(): 14 | device = torch.device('cpu') 15 | else: 16 | device = torch.device('cuda:0') 17 | 18 | vocab = JTVAEVocab(file_path=args.train_path) 19 | if args.test_path is None: 20 | dataset = JTVAEZINC('test', vocab) 21 | else: 22 | dataset = JTVAEDataset(args.test_path, vocab, training=False) 23 | dataloader = DataLoader(dataset, 24 | batch_size=1, 25 | collate_fn=JTVAECollator(training=False)) 26 | 27 | if args.model_path is None: 28 | model = load_pretrained('JTVAE_ZINC_no_kl') 29 | else: 30 | model = JTNNVAE(vocab, args.hidden_size, args.latent_size, args.depth) 31 | model.load_state_dict(torch.load(args.model_path, map_location='cpu')) 32 | model = model.to(device) 33 | 34 | acc = 0.0 35 | for it, (tree, tree_graph, mol_graph) in enumerate(dataloader): 36 | tot = it + 1 37 | smiles = tree.smiles 38 | tree_graph = tree_graph.to(device) 39 | mol_graph = mol_graph.to(device) 40 | dec_smiles = model.reconstruct(tree_graph, mol_graph) 41 | if dec_smiles == smiles: 42 | acc += 1 43 | if tot % args.print_iter == 0: 44 | print('Iter {:d}/{:d} | Acc {:.4f}'.format( 45 | tot // args.print_iter, len(dataloader) // args.print_iter, acc / tot)) 46 | print('Final acc: {:.4f}'.format(acc / tot)) 47 | 48 | if __name__ == '__main__': 49 | from argparse import ArgumentParser 50 | 51 | parser = ArgumentParser() 52 | parser.add_argument('-tr', '--train-path', type=str, 53 | help='Path to the training molecules, with one SMILES string a line') 54 | parser.add_argument('-te', '--test-path', type=str, 55 | help='Path to the test molecules, with one SMILES string a line') 56 | parser.add_argument('-m', '--model-path', type=str, 57 | help='Path to pre-trained model checkpoint') 58 | parser.add_argument('-w', '--hidden-size', type=int, default=450, 59 | help='Hidden size') 60 | parser.add_argument('-l', '--latent-size', type=int, default=56, 61 | help='Latent size') 62 | parser.add_argument('-d', '--depth', type=int, default=3, 63 | help='Number of GNN layers') 64 | parser.add_argument('-pi', '--print-iter', type=int, default=20, 65 | help='Frequency for printing evaluation metrics') 66 | parser.add_argument('-cpu', '--use-cpu', action='store_true', 67 | help='By default, the script uses GPU whenever available. ' 68 | 'This flag enforces the use of CPU.') 69 | args = parser.parse_args() 70 | 71 | main(args) 72 | -------------------------------------------------------------------------------- /examples/generative_models/jtvae/utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | # SPDX-License-Identifier: Apache-2.0 5 | # 6 | # pylint: disable= no-member, arguments-differ, invalid-name 7 | # 8 | # Utils for JTVAE 9 | 10 | import errno 11 | import os 12 | 13 | def mkdir_p(path): 14 | """Create a folder for the given path. 15 | 16 | Parameters 17 | ---------- 18 | path: str 19 | Folder to create 20 | """ 21 | try: 22 | os.makedirs(path) 23 | print('Created directory {}'.format(path)) 24 | except OSError as exc: 25 | if exc.errno == errno.EEXIST and os.path.isdir(path): 26 | print('Directory {} already exists.'.format(path)) 27 | else: 28 | raise 29 | -------------------------------------------------------------------------------- /examples/link_prediction/README.md: -------------------------------------------------------------------------------- 1 | # Link Prediction 2 | Link prediction is a task to estimate the probability of links between nodes in a graph. 3 | 4 | GNN-based link prediction typically consists of the following steps: 5 | 1. Construct graphs on biological networks 6 | 2. Prepare initial node (and edge) features for graphs 7 | 3. Use GNNs to update node representations of graphs 8 | 4. Compute the link representation from the product of its two updated nodes 9 | 5. Pass the link representations to a MLP for training and perform final link prediction 10 | 11 | ## Datasets 12 | - **ogbl-ppa**: is an undirected, unweighted graph. Nodes represent proteins from 58 different species, 13 | and edges indicate biologically meaningful associations between proteins, e.g., physical interactions, 14 | co-expression, homology or genomic neighborhood. Each node contains a 58-dimensional one-hot feature 15 | vector that indicates the species that the corresponding protein comes from.[1] 16 | 17 | ## References 18 | 19 | [1] Hu W, Fey M, Zitnik M, et al. Open graph benchmark: Datasets for machine learning on graphs[J]. 20 | arXiv preprint arXiv:2005.00687, 2020. 21 | -------------------------------------------------------------------------------- /examples/link_prediction/ogbl-ppa/README.md: -------------------------------------------------------------------------------- 1 | # Link Prediction for ogbl-ppa 2 | 3 | For a detailed description of the dataset, see [the OGB website](https://ogb.stanford.edu/docs/linkprop/). 4 | 5 | ## Models 6 | 7 | - **Graph Convolutional Networks (GCN)** [1] 8 | - **GraphSAGE** [2] 9 | 10 | ## Dependencies 11 | 12 | - **OGB v1.1.1**, which can be installed with ```pip install ogb``` 13 | - **DGL v0.4.3** 14 | 15 | ## Usage 16 | 17 | To run with default options, simply do 18 | 19 | ```bash 20 | python full_graph_link_predictor.py 21 | ``` 22 | 23 | By default, we use CPU for computation as the graph is too large for a GPU with normal size. 24 | 25 | The optional arguments are as follows: 26 | 27 | ``` 28 | --use_gpu, use gpu for computation 29 | --use_sage, use GraphSAGE rather than GCN 30 | --num_layers, number of GNN layers to use as well as linear layers for final link prediction (default=3) 31 | --hidden_feats, size for hidden representations (default=256) 32 | --dropout, (default=0.0) 33 | --batch_size, batch size to use for link prediction (default=64 * 1024) 34 | --lr, learning rate (default=0.01) 35 | --epochs, number of epochs for training (default=20) 36 | --eval_steps, evaluate hits@100 every {eval_steps} epochs (default=1) 37 | --runs, number of random experiments to perform (default=1) 38 | ``` 39 | 40 | ## Performance 41 | 42 | For model evaluation, we consider hits@100 -- ranking each true link against 3,000,000 randomly-sampled 43 | negative edges, and counting the ratio of positive edges that are ranked at 100-th place or above. 44 | 45 | Using the default parameters, the performance of 10 random runs is as follows. 46 | 47 | | Method | Train hits@100 | Validation hits@100 | Test hits@100 | 48 | | --------- | -------------- | ------------------- | ------------- | 49 | | GCN | 12.87 ± 5.07 | 12.39 ± 4.85 | 11.65 ± 4.56 | 50 | | GraphSAGE | 9.58 ± 0.99 | 9.44 ± 0.96 | 9.86 ± 1.21 | 51 | 52 | | Method | Average Time (hour) / epoch | 53 | | --------- | --------------------------- | 54 | | GCN | 1.38 | 55 | | GraphSAGE | 1.47 | 56 | 57 | ## References 58 | 59 | [1] Kipf T., Welling M. Semi-Supervised Classification with Graph Convolutional Networks. 2016. 60 | 61 | [2] Hamilton W., Ying R., Leskovec J. Inductive Representation Learning on Large Graphs. 2017. 62 | -------------------------------------------------------------------------------- /examples/link_prediction/ogbl-ppa/logger.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | # SPDX-License-Identifier: Apache-2.0 5 | 6 | import torch 7 | 8 | class Logger(object): 9 | def __init__(self, runs, info=None): 10 | self.info = info 11 | self.results = [[] for _ in range(runs)] 12 | 13 | def add_result(self, run, result): 14 | assert len(result) == 3 15 | assert run >= 0 and run < len(self.results) 16 | self.results[run].append(result) 17 | 18 | def print_statistics(self, run=None): 19 | if run is not None: 20 | result = 100 * torch.tensor(self.results[run]) 21 | argmax = result[:, 1].argmax().item() 22 | print(f'Run {run + 1:02d}:') 23 | print(f'Highest Train: {result[:, 0].max():.2f}') 24 | print(f'Highest Valid: {result[:, 1].max():.2f}') 25 | print(f' Final Train: {result[argmax, 0]:.2f}') 26 | print(f' Final Test: {result[argmax, 2]:.2f}') 27 | else: 28 | result = 100 * torch.tensor(self.results) 29 | 30 | best_results = [] 31 | for r in result: 32 | train1 = r[:, 0].max().item() 33 | valid = r[:, 1].max().item() 34 | train2 = r[r[:, 1].argmax(), 0].item() 35 | test = r[r[:, 1].argmax(), 2].item() 36 | best_results.append((train1, valid, train2, test)) 37 | 38 | best_result = torch.tensor(best_results) 39 | 40 | print(f'All runs:') 41 | r = best_result[:, 0] 42 | print(f'Highest Train: {r.mean():.2f} ± {r.std():.2f}') 43 | r = best_result[:, 1] 44 | print(f'Highest Valid: {r.mean():.2f} ± {r.std():.2f}') 45 | r = best_result[:, 2] 46 | print(f' Final Train: {r.mean():.2f} ± {r.std():.2f}') 47 | r = best_result[:, 3] 48 | print(f' Final Test: {r.mean():.2f} ± {r.std():.2f}') 49 | -------------------------------------------------------------------------------- /examples/molecule_embeddings/README.md: -------------------------------------------------------------------------------- 1 | # Pre-trained Molecule Embeddings 2 | 3 | We can use pre-trained GNNs to compute molecule embeddings. In particular, 4 | [Strategies for Pre-training Graph Neural Networks](https://arxiv.org/abs/1905.12265) develops multiple 5 | approaches for pre-training GNNs on two million molecules. In addition to training the models for 6 | molecular property prediction with supervised learning, we can also combine with the following 7 | semi-supervised learning approaches: 8 | 9 | - **contextpred**: Use subgraphs of molecular graphs for predicting surrounding graph structures. 10 | - **infomax**: Train the models to maximize the mutual information between local node representations 11 | and a global graph representation. 12 | - **edgepred**: Encourage nearby nodes to have similar representations, while enforcing that the 13 | representations of disparate nodes are highly distinct. 14 | - **masking**: Randomly mask node and edge attributes and let the model predict those attributes. 15 | 16 | We adapted the trained models they released for computing molecule embeddings, 17 | which can be used like traditional molecular fingerprints. 18 | 19 | ## Data Preparation 20 | 21 | You can either prepare a `.txt` file where each line corresponds to the SMILES string for a molecule or 22 | a `.csv` file where a column contains the SMILES strings for all molecules. 23 | 24 | ## Usage 25 | 26 | To compute molecule embeddings by providing a `.txt` file, do 27 | 28 | ```bash 29 | python main.py -fi A -m B 30 | ``` 31 | 32 | To compute molecule embeddings by providing a `.csv` file, do 33 | 34 | ```bash 35 | python main.py -fi A -m B -fo csv -sc C 36 | ``` 37 | 38 | where: 39 | - `A` specifies the path to the `.txt` file or the `.csv` file 40 | - `B` specifies the pre-trained model to use, which can be `gin_supervised_contextpred`, 41 | `gin_supervised_infomax`, `gin_supervised_edgepred`, `gin_supervised_masking`. 42 | - `C` specifies the header for SMILES column in the `.csv` file 43 | 44 | Other optional arguments include: 45 | - **batch_size**: `-b D` can be used to specify the batch size for computation. 46 | By default we use `256`. 47 | - **out_dir**: `-o E` can be used to specify the directory for storing the computation results. 48 | By default we use `results`. 49 | 50 | ## Results 51 | 52 | We store two files in the output directory: 53 | - `mol_parsed.npy`: Since we may not be able to parse some SMILES strings with RDKit, we use a 54 | bool numpy array `a` where `a[i]` is True if the i-th SMILES string can be parsed by RDKit. 55 | - `mol_emb.npy`: We use a numpy array `b` for storing the computed molecule embeddings where `b[i]` 56 | gives the molecule embedding of the i-th molecule which can be parsed by RDKit. 57 | -------------------------------------------------------------------------------- /examples/property_prediction/MTL/model/__init__.py: -------------------------------------------------------------------------------- 1 | from .gcn import GCNRegressor, GCNRegressorBypass 2 | from .gat import GATRegressor, GATRegressorBypass 3 | from .mpnn import MPNNRegressor, MPNNRegressorBypass 4 | from .attentivefp import AttentiveFPRegressor, AttentiveFPRegressorBypass -------------------------------------------------------------------------------- /examples/property_prediction/MTL/model/attentivefp.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | from dgllife.model import AttentiveFPGNN, AttentiveFPReadout 4 | 5 | from .regressor import BaseGNNRegressor, BaseGNNRegressorBypass 6 | 7 | class AttentiveFPRegressor(BaseGNNRegressor): 8 | """AttentiveFP-based model for multitask molecular property prediction. 9 | We assume all tasks are regression problems. 10 | 11 | Parameters 12 | ---------- 13 | in_node_feats : int 14 | Number of input node features 15 | in_edge_feats : int 16 | Number of input edge features 17 | gnn_out_feats : int 18 | The GNN output size 19 | num_layers : int 20 | Number of GNN layers 21 | num_timesteps : int 22 | Number of timesteps for updating molecular representations with GRU during readout 23 | n_tasks : int 24 | Number of prediction tasks 25 | regressor_hidden_feats : int 26 | Hidden size in MLP regressor 27 | dropout : float 28 | The probability for dropout. Default to 0, i.e. no dropout is performed. 29 | """ 30 | def __init__(self, in_node_feats, in_edge_feats, gnn_out_feats, num_layers, num_timesteps, 31 | n_tasks, regressor_hidden_feats=128, dropout=0.): 32 | super(AttentiveFPRegressor, self).__init__(readout_feats=gnn_out_feats, 33 | n_tasks=n_tasks, 34 | regressor_hidden_feats=regressor_hidden_feats, 35 | dropout=dropout) 36 | self.gnn = AttentiveFPGNN(in_node_feats, in_edge_feats, num_layers, 37 | gnn_out_feats, dropout) 38 | self.readout = AttentiveFPReadout(gnn_out_feats, num_timesteps, dropout) 39 | 40 | class AttentiveFPRegressorBypass(BaseGNNRegressorBypass): 41 | """AttentiveFP-based model for bypass multitask molecular property prediction. 42 | We assume all tasks are regression problems. 43 | 44 | Parameters 45 | ---------- 46 | in_node_feats : int 47 | Number of input node features 48 | in_edge_feats : int 49 | Number of input edge features 50 | gnn_out_feats : int 51 | The GNN output size 52 | num_layers : int 53 | Number of GNN layers 54 | num_timesteps : int 55 | Number of timesteps for updating molecular representations with GRU during readout 56 | n_tasks : int 57 | Number of prediction tasks 58 | regressor_hidden_feats : int 59 | Hidden size in MLP regressor 60 | dropout : float 61 | The probability for dropout. Default to 0, i.e. no dropout is performed. 62 | """ 63 | def __init__(self, in_node_feats, in_edge_feats, gnn_out_feats, num_layers, num_timesteps, 64 | n_tasks, regressor_hidden_feats=128, dropout=0.): 65 | super(AttentiveFPRegressorBypass, self).__init__( 66 | readout_feats= 2 * gnn_out_feats, n_tasks=n_tasks, 67 | regressor_hidden_feats=regressor_hidden_feats, 68 | dropout=dropout) 69 | self.shared_gnn = AttentiveFPGNN(in_node_feats, in_edge_feats, num_layers, 70 | gnn_out_feats, dropout) 71 | for _ in range(n_tasks): 72 | self.task_gnns.append(AttentiveFPGNN(in_node_feats, in_edge_feats, num_layers, 73 | gnn_out_feats, dropout)) 74 | self.readouts.append(AttentiveFPReadout(2 * gnn_out_feats, num_timesteps, dropout)) 75 | -------------------------------------------------------------------------------- /examples/property_prediction/MTL/model/blocks.py: -------------------------------------------------------------------------------- 1 | import dgl.function as fn 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | 6 | class InputInitializer(nn.Module): 7 | """Initializde edge representations based on input node and edge features 8 | 9 | Parameters 10 | ---------- 11 | in_node_feats : int 12 | Number of input node features 13 | in_edge_feats : int 14 | Number of input edge features 15 | """ 16 | def __init__(self, in_node_feats, in_edge_feats): 17 | super(InputInitializer, self).__init__() 18 | 19 | self.project_nodes = nn.Linear(in_node_feats, in_node_feats) 20 | self.project_edges = nn.Linear(in_edge_feats, in_edge_feats) 21 | 22 | def forward(self, bg, node_feats, edge_feats): 23 | """Initialize input representations. 24 | 25 | Project the node/edge features and then concatenate the edge representations with the 26 | representations of their source nodes. 27 | """ 28 | node_feats = self.project_nodes(node_feats) 29 | edge_feats = self.project_edges(edge_feats) 30 | 31 | bg = bg.local_var() 32 | bg.ndata['hv'] = node_feats 33 | bg.apply_edges(fn.copy_u('hv', 'he')) 34 | return torch.cat([bg.edata['he'], edge_feats], dim=1) 35 | 36 | class EdgeGraphConv(nn.Module): 37 | """Apply graph convolution over an input edge signal. 38 | 39 | Parameters 40 | ---------- 41 | in_feats : int 42 | Input feature size. 43 | out_feats : int 44 | Output feature size. 45 | activation : callable activation function/layer or None, optional 46 | If not None, applies an activation function to the updated node features. 47 | """ 48 | def __init__(self, in_feats, out_feats, activation=F.relu): 49 | super(EdgeGraphConv, self).__init__() 50 | self.in_feats = in_feats 51 | self.out_feats = out_feats 52 | self.linear = nn.Linear(in_feats, out_feats) 53 | self.activation = activation 54 | 55 | def forward(self, graph, feat): 56 | """Compute graph convolution. 57 | 58 | Parameters 59 | ---------- 60 | graph : DGLGraph 61 | The graph. 62 | feat : torch.Tensor 63 | The input edge features. 64 | 65 | Returns 66 | ------- 67 | torch.Tensor 68 | The output features. 69 | """ 70 | graph = graph.local_var() 71 | 72 | if self.in_feats > self.out_feats: 73 | # multiply by W first to reduce the feature size for aggregation. 74 | feat = self.linear(feat) 75 | graph.edata['h'] = feat 76 | graph.update_all(fn.copy_e('h', 'm'), fn.sum('m', 'h')) 77 | rst = graph.ndata['h'] 78 | else: 79 | # aggregate first then multiply by W 80 | graph.edata['h'] = feat 81 | graph.update_all(fn.copy_e('h', 'm'), fn.sum('m', 'h')) 82 | rst = graph.ndata['h'] 83 | rst = self.linear(rst) 84 | 85 | if self.activation is not None: 86 | rst = self.activation(rst) 87 | 88 | return rst 89 | -------------------------------------------------------------------------------- /examples/property_prediction/README.md: -------------------------------------------------------------------------------- 1 | # Graph Property Prediction 2 | 3 | GNN-based graph property prediction typically consists of the following steps: 4 | 1. Construct graphs 5 | 2. Prepare initial node (and edge) features for graphs 6 | 3. Use GNNs to update node representations of graphs 7 | 4. Compute graph representations out of updated node representations 8 | 5. Pass the graph representations to a MLP for final prediction 9 | 10 | For those who are familiar with molecular fingerprints, GNN-based molecular representations 11 | can be viewed as learnable molecular fingerprints as first introduced in [1]. 12 | 13 | **To develop a GNN-based molecular property prediction model for your own dataset, see `csv_data_configuration`.** 14 | 15 | ## References 16 | 17 | [1] Duvenaud et al. (2015) Convolutional networks on graphs for learning molecular fingerprints. *Advances in neural 18 | information processing systems (NeurIPS)*, 2224-2232. 19 | -------------------------------------------------------------------------------- /examples/property_prediction/alchemy/README.md: -------------------------------------------------------------------------------- 1 | # Alchemy 2 | 3 | ## Dataset Introduction 4 | 5 | [1] introduced a dataset comprised of 12 quantum mechanical properties of 119,487 organic molecules with up to 14 6 | heavy atoms, sampled from the GDB MedChem database. 7 | 8 | ## Modeling 9 | 10 | ### Pre-processing 11 | 12 | [1] performed a stratified split of the dataset so that each of the training, validation and test set covers the 13 | full range of provided labels. By default, we construct a complete graph for each molecule, i.e. each pair of atoms 14 | is connected. The details for node/edge featurization can be found 15 | [here](https://lifesci.dgl.ai/api/data.html#alchemy-for-quantum-chemistry). 16 | 17 | ### Training and Evaluation 18 | 19 | We support three models for this dataset -- SchNet [2], MPNN [3], MGCN [4]. To train from scratch, run 20 | 21 | ```bash 22 | python main.py -m X 23 | ``` 24 | 25 | where `X` can be `SchNet`, `MPNN`, or `MGCN`. 26 | 27 | ## References 28 | 29 | [1] Chen et al. (2019) Alchemy: A Quantum Chemistry Dataset for Benchmarking AI Models. 30 | 31 | [2] Schütt et al. (2017) SchNet: A continuous-filter convolutional neural network for modeling quantum interactions. 32 | *Advances in Neural Information Processing Systems (NeurIPS)*, 992-1002. 33 | 34 | [3] Gilmer et al. (2017) Neural Message Passing for Quantum Chemistry. *Proceedings of the 34th International Conference on 35 | Machine Learning*, JMLR. 1263-1272. 36 | 37 | [4] Lu et al. (2019) Molecular Property Prediction: A Multilevel Quantum Interactions Modeling Perspective. 38 | *The 33rd AAAI Conference on Artificial Intelligence*. 39 | -------------------------------------------------------------------------------- /examples/property_prediction/alchemy/configure.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | # SPDX-License-Identifier: Apache-2.0 5 | 6 | MPNN_Alchemy = { 7 | 'random_seed': 0, 8 | 'batch_size': 16, 9 | 'node_in_feats': 15, 10 | 'node_out_feats': 64, 11 | 'edge_in_feats': 5, 12 | 'edge_hidden_feats': 128, 13 | 'n_tasks': 12, 14 | 'lr': 0.0001, 15 | 'patience': 50, 16 | 'metric_name': 'mae', 17 | 'mode': 'lower', 18 | 'weight_decay': 0 19 | } 20 | 21 | SchNet_Alchemy = { 22 | 'random_seed': 0, 23 | 'batch_size': 16, 24 | 'node_feats': 64, 25 | 'hidden_feats': [64, 64, 64], 26 | 'predictor_hidden_feats': 64, 27 | 'n_tasks': 12, 28 | 'lr': 0.0001, 29 | 'patience': 50, 30 | 'metric_name': 'mae', 31 | 'mode': 'lower', 32 | 'weight_decay': 0 33 | } 34 | 35 | MGCN_Alchemy = { 36 | 'random_seed': 0, 37 | 'batch_size': 16, 38 | 'feats': 128, 39 | 'n_layers': 3, 40 | 'predictor_hidden_feats': 64, 41 | 'n_tasks': 12, 42 | 'lr': 0.0001, 43 | 'patience': 50, 44 | 'metric_name': 'mae', 45 | 'mode': 'lower', 46 | 'weight_decay': 0 47 | } 48 | 49 | experiment_configures = { 50 | 'MPNN_Alchemy': MPNN_Alchemy, 51 | 'SchNet_Alchemy': SchNet_Alchemy, 52 | 'MGCN_Alchemy': MGCN_Alchemy 53 | } 54 | def get_exp_configure(exp_name): 55 | return experiment_configures[exp_name] 56 | -------------------------------------------------------------------------------- /examples/property_prediction/alchemy/utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | # SPDX-License-Identifier: Apache-2.0 5 | 6 | import dgl 7 | import numpy as np 8 | import random 9 | import torch 10 | 11 | def set_random_seed(seed=0): 12 | """Set random seed. 13 | 14 | Parameters 15 | ---------- 16 | seed : int 17 | Random seed to use. Default to 0. 18 | """ 19 | random.seed(seed) 20 | np.random.seed(seed) 21 | torch.manual_seed(seed) 22 | if torch.cuda.is_available(): 23 | torch.cuda.manual_seed(seed) 24 | 25 | def collate_molgraphs(data): 26 | """Batching a list of datapoints for dataloader. 27 | 28 | Parameters 29 | ---------- 30 | data : list of 4-tuples. 31 | Each tuple is for a single datapoint, consisting of 32 | a SMILES, a DGLGraph, all-task labels and a binary 33 | mask indicating the existence of labels. 34 | 35 | Returns 36 | ------- 37 | smiles : list 38 | List of smiles 39 | bg : DGLGraph 40 | The batched DGLGraph. 41 | labels : Tensor of dtype float32 and shape (B, T) 42 | Batched datapoint labels. B is len(data) and 43 | T is the number of total tasks. 44 | """ 45 | smiles, graphs, labels = map(list, zip(*data)) 46 | 47 | bg = dgl.batch(graphs) 48 | bg.set_n_initializer(dgl.init.zero_initializer) 49 | bg.set_e_initializer(dgl.init.zero_initializer) 50 | labels = torch.stack(labels, dim=0) 51 | 52 | return smiles, bg, labels 53 | 54 | def load_model(args): 55 | if args['model'] == 'SchNet': 56 | from dgllife.model import SchNetPredictor 57 | model = SchNetPredictor(node_feats=args['node_feats'], 58 | hidden_feats=args['hidden_feats'], 59 | predictor_hidden_feats=args['predictor_hidden_feats'], 60 | n_tasks=args['n_tasks']) 61 | 62 | if args['model'] == 'MGCN': 63 | from dgllife.model import MGCNPredictor 64 | model = MGCNPredictor(feats=args['feats'], 65 | n_layers=args['n_layers'], 66 | predictor_hidden_feats=args['predictor_hidden_feats'], 67 | n_tasks=args['n_tasks']) 68 | 69 | if args['model'] == 'MPNN': 70 | from dgllife.model import MPNNPredictor 71 | model = MPNNPredictor(node_in_feats=args['node_in_feats'], 72 | edge_in_feats=args['edge_in_feats'], 73 | node_out_feats=args['node_out_feats'], 74 | edge_hidden_feats=args['edge_hidden_feats'], 75 | n_tasks=args['n_tasks']) 76 | 77 | return model 78 | -------------------------------------------------------------------------------- /examples/property_prediction/csv_data_configuration/analysis.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | # SPDX-License-Identifier: Apache-2.0 5 | 6 | if __name__ == '__main__': 7 | import pandas 8 | 9 | from argparse import ArgumentParser 10 | from dgllife.utils import analyze_mols 11 | 12 | from utils import mkdir_p 13 | 14 | parser = ArgumentParser('Dataset analysis') 15 | parser.add_argument('-c', '--csv-path', type=str, required=True, 16 | help='Path to a csv file for loading a dataset') 17 | parser.add_argument('-sc', '--smiles-column', type=str, required=True, 18 | help='Header for the SMILES column in the CSV file') 19 | parser.add_argument('-np', '--num-processes', type=int, default=1, 20 | help='Number of processes to use for analysis') 21 | parser.add_argument('-p', '--path', type=str, default='analysis_results', 22 | help='Path to export analysis results') 23 | args = parser.parse_args().__dict__ 24 | 25 | mkdir_p(args['path']) 26 | 27 | df = pandas.read_csv(args['csv_path']) 28 | analyze_mols(smiles=df[args['smiles_column']].tolist(), 29 | num_processes=args['num_processes'], 30 | path_to_export=args['path']) 31 | -------------------------------------------------------------------------------- /examples/property_prediction/csv_data_configuration/model_configures/AttentiveFP.json: -------------------------------------------------------------------------------- 1 | { 2 | "lr": 3e-4, 3 | "weight_decay": 0, 4 | "patience": 30, 5 | "batch_size": 128, 6 | "num_layers": 2, 7 | "num_timesteps": 2, 8 | "graph_feat_size": 200, 9 | "dropout": 0 10 | } 11 | -------------------------------------------------------------------------------- /examples/property_prediction/csv_data_configuration/model_configures/GAT.json: -------------------------------------------------------------------------------- 1 | { 2 | "lr": 3e-4, 3 | "weight_decay": 0, 4 | "patience": 30, 5 | "batch_size": 128, 6 | "dropout": 0.05, 7 | "gnn_hidden_feats": 64, 8 | "num_heads": 8, 9 | "alpha": 0.06, 10 | "predictor_hidden_feats": 128, 11 | "num_gnn_layers": 5, 12 | "residual": true 13 | } 14 | -------------------------------------------------------------------------------- /examples/property_prediction/csv_data_configuration/model_configures/GCN.json: -------------------------------------------------------------------------------- 1 | { 2 | "lr": 2e-2, 3 | "weight_decay": 0, 4 | "patience": 30, 5 | "batch_size": 128, 6 | "dropout": 0.05, 7 | "gnn_hidden_feats": 256, 8 | "predictor_hidden_feats": 128, 9 | "num_gnn_layers": 2, 10 | "residual": true, 11 | "batchnorm": false 12 | } 13 | -------------------------------------------------------------------------------- /examples/property_prediction/csv_data_configuration/model_configures/MPNN.json: -------------------------------------------------------------------------------- 1 | { 2 | "lr": 3e-4, 3 | "weight_decay": 0, 4 | "patience": 30, 5 | "batch_size": 128, 6 | "node_out_feats": 64, 7 | "edge_hidden_feats": 128, 8 | "num_step_message_passing": 6, 9 | "num_step_set2set": 6, 10 | "num_layer_set2set": 3 11 | } 12 | -------------------------------------------------------------------------------- /examples/property_prediction/csv_data_configuration/model_configures/NF.json: -------------------------------------------------------------------------------- 1 | { 2 | "lr": 1e-2, 3 | "batch_size": 512, 4 | "batchnorm": false, 5 | "dropout": 0.15, 6 | "gnn_hidden_feats": 32, 7 | "num_gnn_layers": 2, 8 | "patience": 30, 9 | "predictor_hidden_feats": 32, 10 | "weight_decay": 1e-3 11 | } -------------------------------------------------------------------------------- /examples/property_prediction/csv_data_configuration/model_configures/README.md: -------------------------------------------------------------------------------- 1 | # Hyperparameter Configuration 2 | 3 | To manually set the hyperparameters for a model, modify the corresponding json file. 4 | 5 | ## Common Hyperparameters 6 | 7 | - `lr`: (float) Learning rate for updating model parameters 8 | - `weight_decay`: (float) Strength for L2 penalty in the objective function 9 | - `patience`: (int) Number of epochs to wait before early stopping when validation performance no longer gets improved 10 | - `batch_size`: (int) Batch size for mini-batch training 11 | 12 | ## GCN 13 | 14 | - `gnn_hidden_feats`: (int) Hidden size for GNN layers 15 | - `predictor_hidden_feats`: (int) Hidden size for the MLP predictor 16 | - `num_gnn_layers`: (int) Number of GCN layers to use 17 | - `residual`: (bool) Whether to use residual connection for each GCN layer 18 | - `batchnorm`: (bool) Whether to apply batch normalization to the output of each GCN layer 19 | - `dropout`: (float) Dropout probability 20 | 21 | ## GAT 22 | 23 | - `gnn_hidden_feats`: (int) Hidden size for each attention head in GNN layers 24 | - `num_heads`: (int) Number of attention heads in each GNN layer 25 | - `alpha`: (float) Slope for negative values in LeakyReLU 26 | - `predictor_hidden_feats`: (int) Hidden size for the MLP predictor 27 | - `num_gnn_layers`: (int) Number of GNN layers to use 28 | - `residual`: (bool) Whether to use residual connection for each GAT layer 29 | - `dropout`: (float) Dropout probability 30 | 31 | ## Weave 32 | 33 | - `num_gnn_layers`: (int) Number of GNN layers to use 34 | - `gnn_hidden_feats`: (int) Hidden size for GNN layers 35 | - `graph_feats`: (int) Hidden size for the MLP predictor 36 | - `gaussian_expand`: (bool) Whether to expand each dimension of node features by 37 | gaussian histogram in computing graph representations. 38 | 39 | ## MPNN 40 | 41 | - `node_out_feats`: (int) Hidden size for node representations in GNN layers 42 | - `edge_hidden_feats`: (int) Hidden size for edge representations in GNN layers 43 | - `num_step_message_passing` (int) Number of times for message passing, which is equivalent to the number of GNN layers 44 | - `num_step_set2set`: (int) Number of set2set steps 45 | - `num_layer_set2set`: (int) Number of set2set layers 46 | 47 | ## AttentiveFP 48 | 49 | - `num_layers`: (int) Number of GNN layers 50 | - `num_timesteps`: (int) Times of updating graph representations with GRU 51 | - `graph_feat_size` (int) Hidden size for the graph representations 52 | - `dropout`: (float) Dropout probability 53 | 54 | ## gin_supervised_contextpred / gin_supervised_edgepred / gin_supervised_infomax / gin_supervised_masking 55 | 56 | - `jk`: (str) The way to aggregate the output of all GNN layers. One of `'concat'`, `'last'`, `'max'`, 57 | `'sum'`, separately for taking the concatenation of all GNN layer output, taking the output of the last 58 | GNN layer, performing max pooling across all GNN layer output, and summing all GNN layer output. 59 | - `readout`: (str) The way to compute graph-level representations out of node-level representations, which 60 | can be one of `'sum'`, `'mean'`, `'max'`, and `'attention'`. 61 | -------------------------------------------------------------------------------- /examples/property_prediction/csv_data_configuration/model_configures/Weave.json: -------------------------------------------------------------------------------- 1 | { 2 | "lr": 3e-4, 3 | "weight_decay": 0, 4 | "patience": 30, 5 | "batch_size": 128, 6 | "num_gnn_layers": 5, 7 | "gnn_hidden_feats": 50, 8 | "graph_feats": 128, 9 | "gaussian_expand": true 10 | } 11 | -------------------------------------------------------------------------------- /examples/property_prediction/csv_data_configuration/model_configures/gin_supervised_contextpred.json: -------------------------------------------------------------------------------- 1 | { 2 | "lr": 2e-2, 3 | "weight_decay": 0, 4 | "patience": 30, 5 | "batch_size": 128, 6 | "jk": "last", 7 | "readout": "sum" 8 | } 9 | -------------------------------------------------------------------------------- /examples/property_prediction/csv_data_configuration/model_configures/gin_supervised_edgepred.json: -------------------------------------------------------------------------------- 1 | { 2 | "lr": 2e-2, 3 | "weight_decay": 0, 4 | "patience": 30, 5 | "batch_size": 128, 6 | "jk": "last", 7 | "readout": "sum" 8 | } 9 | -------------------------------------------------------------------------------- /examples/property_prediction/csv_data_configuration/model_configures/gin_supervised_infomax.json: -------------------------------------------------------------------------------- 1 | { 2 | "lr": 2e-2, 3 | "weight_decay": 0, 4 | "patience": 30, 5 | "batch_size": 128, 6 | "jk": "last", 7 | "readout": "sum" 8 | } 9 | -------------------------------------------------------------------------------- /examples/property_prediction/csv_data_configuration/model_configures/gin_supervised_masking.json: -------------------------------------------------------------------------------- 1 | { 2 | "lr": 2e-2, 3 | "weight_decay": 0, 4 | "patience": 30, 5 | "batch_size": 128, 6 | "jk": "last", 7 | "readout": "sum" 8 | } 9 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/BACE/AttentiveFP_attentivefp.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 128, 3 | "dropout": 0.12249297382460408, 4 | "graph_feat_size": 32, 5 | "lr": 0.007805579546099391, 6 | "num_layers": 1, 7 | "num_timesteps": 4, 8 | "patience": 30, 9 | "weight_decay": 0.0006081980506190971 10 | } 11 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/BACE/AttentiveFP_canonical.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 64, 3 | "dropout": 0.39078446228187624, 4 | "graph_feat_size": 16, 5 | "lr": 0.02382515128525911, 6 | "num_layers": 2, 7 | "num_timesteps": 4, 8 | "patience": 30, 9 | "weight_decay": 0.0018952891636735724 10 | } 11 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/BACE/GAT_attentivefp.json: -------------------------------------------------------------------------------- 1 | { 2 | "alpha": 0.6702823790658061, 3 | "batch_size": 256, 4 | "dropout": 0.09842987062340869, 5 | "gnn_hidden_feats": 256, 6 | "lr": 0.005534847757716649, 7 | "num_gnn_layers": 2, 8 | "num_heads": 8, 9 | "patience": 30, 10 | "predictor_hidden_feats": 128, 11 | "residual": false, 12 | "weight_decay": 0.001032565838193678 13 | } 14 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/BACE/GAT_canonical.json: -------------------------------------------------------------------------------- 1 | { 2 | "alpha": 0.2547844032722401, 3 | "batch_size": 256, 4 | "dropout": 0.012993892934328621, 5 | "gnn_hidden_feats": 64, 6 | "lr": 0.07817741154542117, 7 | "num_gnn_layers": 1, 8 | "num_heads": 8, 9 | "patience": 30, 10 | "predictor_hidden_feats": 128, 11 | "residual": false, 12 | "weight_decay": 0.0003096155718731571 13 | } 14 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/BACE/GCN_attentivefp.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 256, 3 | "batchnorm": false, 4 | "dropout": 0.009923177126280991, 5 | "gnn_hidden_feats": 64, 6 | "lr": 0.014062994454917627, 7 | "num_gnn_layers": 2, 8 | "patience": 30, 9 | "predictor_hidden_feats": 256, 10 | "residual": false, 11 | "weight_decay": 0.0019709239173825678 12 | } 13 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/BACE/GCN_canonical.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 512, 3 | "batchnorm": false, 4 | "dropout": 0.022033656211803594, 5 | "gnn_hidden_feats": 128, 6 | "lr": 0.019448185834245686, 7 | "num_gnn_layers": 1, 8 | "patience": 30, 9 | "predictor_hidden_feats": 16, 10 | "residual": true, 11 | "weight_decay": 0.0014564011624124194 12 | } 13 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/BACE/MPNN_attentivefp.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 512, 3 | "edge_hidden_feats": 32, 4 | "lr": 0.07456107899117562, 5 | "node_out_feats": 64, 6 | "num_layer_set2set": 1, 7 | "num_step_message_passing": 1, 8 | "num_step_set2set": 1, 9 | "patience": 30, 10 | "weight_decay": 0.0015279064705638519 11 | } 12 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/BACE/MPNN_canonical.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 128, 3 | "edge_hidden_feats": 64, 4 | "lr": 0.0006552561998568872, 5 | "node_out_feats": 64, 6 | "num_layer_set2set": 1, 7 | "num_step_message_passing": 1, 8 | "num_step_set2set": 3, 9 | "patience": 30, 10 | "weight_decay": 0.0009810201376149145 11 | } 12 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/BACE/NF_canonical.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 256, 3 | "batchnorm": true, 4 | "dropout": 0.14096514656248904, 5 | "gnn_hidden_feats": 32, 6 | "lr": 0.0030737961878790354, 7 | "num_gnn_layers": 1, 8 | "patience": 10, 9 | "predictor_hidden_feats": 1024, 10 | "weight_decay": 0.002017810235088199 11 | } 12 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/BACE/Weave_attentivefp.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 64, 3 | "gaussian_expand": false, 4 | "gnn_hidden_feats": 32, 5 | "graph_feats": 32, 6 | "lr": 0.04545486339775049, 7 | "num_gnn_layers": 1, 8 | "patience": 30, 9 | "weight_decay": 0.00023692807301030228 10 | } 11 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/BACE/Weave_canonical.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 32, 3 | "gaussian_expand": false, 4 | "gnn_hidden_feats": 32, 5 | "graph_feats": 256, 6 | "lr": 0.003402592506956134, 7 | "num_gnn_layers": 2, 8 | "patience": 30, 9 | "weight_decay": 0.0020723653849218926 10 | } 11 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/BACE/gin_supervised_contextpred.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 256, 3 | "jk": "concat", 4 | "lr": 0.0007635178701763262, 5 | "patience": 30, 6 | "readout": "max", 7 | "weight_decay": 0.00024963281046459784 8 | } 9 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/BACE/gin_supervised_edgepred.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 512, 3 | "jk": "last", 4 | "lr": 0.001041156195100848, 5 | "patience": 30, 6 | "readout": "max", 7 | "weight_decay": 0.0007399351793946772 8 | } 9 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/BACE/gin_supervised_infomax.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 128, 3 | "jk": "sum", 4 | "lr": 0.011223536124418093, 5 | "patience": 30, 6 | "readout": "attention", 7 | "weight_decay": 0.001743012359692264 8 | } 9 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/BACE/gin_supervised_masking.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 128, 3 | "jk": "sum", 4 | "lr": 0.00021739706003870494, 5 | "patience": 30, 6 | "readout": "attention", 7 | "weight_decay": 0.0013102017740472963 8 | } 9 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/BBBP/AttentiveFP_attentivefp.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 128, 3 | "dropout": 0.4216675614776068, 4 | "graph_feat_size": 128, 5 | "lr": 0.013103855223164028, 6 | "num_layers": 3, 7 | "num_timesteps": 2, 8 | "patience": 30, 9 | "weight_decay": 3.4463043594661286e-05 10 | } 11 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/BBBP/AttentiveFP_canonical.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 32, 3 | "dropout": 0.22184205119419326, 4 | "graph_feat_size": 16, 5 | "lr": 0.0007558909853712381, 6 | "num_layers": 1, 7 | "num_timesteps": 5, 8 | "patience": 30, 9 | "weight_decay": 0.0013859819352381362 10 | } 11 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/BBBP/GAT_attentivefp.json: -------------------------------------------------------------------------------- 1 | { 2 | "alpha": 0.8731920595699334, 3 | "batch_size": 32, 4 | "dropout": 0.07001765207708285, 5 | "gnn_hidden_feats": 32, 6 | "lr": 0.0004682562483003267, 7 | "num_gnn_layers": 4, 8 | "num_heads": 8, 9 | "patience": 30, 10 | "predictor_hidden_feats": 256, 11 | "residual": false, 12 | "weight_decay": 0.002074983153298509 13 | } 14 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/BBBP/GAT_canonical.json: -------------------------------------------------------------------------------- 1 | { 2 | "alpha": 0.6544012585238377, 3 | "batch_size": 512, 4 | "dropout": 0.046515821442611856, 5 | "gnn_hidden_feats": 128, 6 | "lr": 0.01907388214512412, 7 | "num_gnn_layers": 3, 8 | "num_heads": 8, 9 | "patience": 30, 10 | "predictor_hidden_feats": 256, 11 | "residual": false, 12 | "weight_decay": 0.00015386570821936986 13 | } 14 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/BBBP/GCN_attentivefp.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 128, 3 | "batchnorm": true, 4 | "dropout": 0.2130511856011713, 5 | "gnn_hidden_feats": 128, 6 | "lr": 0.05035374044127924, 7 | "num_gnn_layers": 2, 8 | "patience": 30, 9 | "predictor_hidden_feats": 128, 10 | "residual": true, 11 | "weight_decay": 0.0003962760086087119 12 | } 13 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/BBBP/GCN_canonical.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 64, 3 | "batchnorm": false, 4 | "dropout": 0.0272564399565973, 5 | "gnn_hidden_feats": 256, 6 | "lr": 0.02020086171843634, 7 | "num_gnn_layers": 4, 8 | "patience": 30, 9 | "predictor_hidden_feats": 32, 10 | "residual": true, 11 | "weight_decay": 0.001168051063650801 12 | } 13 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/BBBP/MPNN_attentivefp.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 256, 3 | "edge_hidden_feats": 64, 4 | "lr": 0.0007665155556178249, 5 | "node_out_feats": 64, 6 | "num_layer_set2set": 2, 7 | "num_step_message_passing": 4, 8 | "num_step_set2set": 3, 9 | "patience": 30, 10 | "weight_decay": 0.0015152431461163147 11 | } 12 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/BBBP/MPNN_canonical.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 256, 3 | "edge_hidden_feats": 64, 4 | "lr": 0.019972837535528663, 5 | "node_out_feats": 64, 6 | "num_layer_set2set": 1, 7 | "num_step_message_passing": 1, 8 | "num_step_set2set": 2, 9 | "patience": 30, 10 | "weight_decay": 0.0007471791755792163 11 | } 12 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/BBBP/NF_canonical.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 512, 3 | "batchnorm": false, 4 | "dropout": 0.1425900250956499, 5 | "gnn_hidden_feats": 32, 6 | "lr": 0.014363376427570391, 7 | "num_gnn_layers": 2, 8 | "patience": 30, 9 | "predictor_hidden_feats": 32, 10 | "weight_decay": 0.0013136797911629864, 11 | "in_node_feats": 74 12 | } 13 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/BBBP/Weave_attentivefp.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 64, 3 | "gaussian_expand": false, 4 | "gnn_hidden_feats": 32, 5 | "graph_feats": 256, 6 | "lr": 0.00010311484277689478, 7 | "num_gnn_layers": 4, 8 | "patience": 30, 9 | "weight_decay": 0.0011899233885048492 10 | } 11 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/BBBP/Weave_canonical.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 512, 3 | "gaussian_expand": false, 4 | "gnn_hidden_feats": 256, 5 | "graph_feats": 256, 6 | "lr": 0.0006149653013292772, 7 | "num_gnn_layers": 1, 8 | "patience": 30, 9 | "weight_decay": 0.002003308216459167 10 | } 11 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/BBBP/gin_supervised_contextpred.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 512, 3 | "jk": "last", 4 | "lr": 0.029792979117511588, 5 | "patience": 30, 6 | "readout": "attention", 7 | "weight_decay": 0.0016007421985042994 8 | } 9 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/BBBP/gin_supervised_edgepred.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 64, 3 | "jk": "sum", 4 | "lr": 0.0046485897678776485, 5 | "patience": 30, 6 | "readout": "sum", 7 | "weight_decay": 0.0027947446214444767 8 | } 9 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/BBBP/gin_supervised_infomax.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 128, 3 | "jk": "last", 4 | "lr": 0.0015135083464233052, 5 | "patience": 30, 6 | "readout": "attention", 7 | "weight_decay": 0.002217595871900105 8 | } 9 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/BBBP/gin_supervised_masking.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 512, 3 | "jk": "concat", 4 | "lr": 0.012012690359411063, 5 | "patience": 30, 6 | "readout": "sum", 7 | "weight_decay": 5.358117963097202e-07 8 | } 9 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/ClinTox/AttentiveFP_attentivefp.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 64, 3 | "dropout": 0.08746338896051695, 4 | "graph_feat_size": 16, 5 | "lr": 0.03572700975256572, 6 | "num_layers": 1, 7 | "num_timesteps": 2, 8 | "patience": 30, 9 | "weight_decay": 0.0008639377041415643 10 | } 11 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/ClinTox/AttentiveFP_canonical.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 512, 3 | "dropout": 0.3391802249114625, 4 | "graph_feat_size": 64, 5 | "lr": 0.009696667027060839, 6 | "num_layers": 2, 7 | "num_timesteps": 1, 8 | "patience": 30, 9 | "weight_decay": 0.00028432897751174286 10 | } 11 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/ClinTox/GAT_attentivefp.json: -------------------------------------------------------------------------------- 1 | { 2 | "alpha": 0.3794180901463749, 3 | "batch_size": 64, 4 | "dropout": 0.023789159870020463, 5 | "gnn_hidden_feats": 64, 6 | "lr": 0.0006275804887230993, 7 | "num_gnn_layers": 1, 8 | "num_heads": 8, 9 | "patience": 30, 10 | "predictor_hidden_feats": 32, 11 | "residual": true, 12 | "weight_decay": 0.0014626729829709858 13 | } 14 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/ClinTox/GAT_canonical.json: -------------------------------------------------------------------------------- 1 | { 2 | "alpha": 0.4828530106865167, 3 | "batch_size": 512, 4 | "dropout": 0.1622787886635157, 5 | "gnn_hidden_feats": 256, 6 | "lr": 0.11183010574126619, 7 | "num_gnn_layers": 1, 8 | "num_heads": 4, 9 | "patience": 30, 10 | "predictor_hidden_feats": 128, 11 | "residual": false, 12 | "weight_decay": 0.001250916981848196 13 | } 14 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/ClinTox/GCN_attentivefp.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 512, 3 | "batchnorm": true, 4 | "dropout": 0.09369442571380307, 5 | "gnn_hidden_feats": 32, 6 | "lr": 0.035076484145608475, 7 | "num_gnn_layers": 5, 8 | "patience": 30, 9 | "predictor_hidden_feats": 512, 10 | "residual": true, 11 | "weight_decay": 0.00013504194418214894 12 | } 13 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/ClinTox/GCN_canonical.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 128, 3 | "batchnorm": false, 4 | "dropout": 0.27771104411983266, 5 | "gnn_hidden_feats": 256, 6 | "lr": 0.0016869055272843558, 7 | "num_gnn_layers": 4, 8 | "patience": 30, 9 | "predictor_hidden_feats": 32, 10 | "residual": true, 11 | "weight_decay": 0.0012001326197055202 12 | } 13 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/ClinTox/MPNN_attentivefp.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 64, 3 | "edge_hidden_feats": 32, 4 | "lr": 0.07629194090575299, 5 | "node_out_feats": 64, 6 | "num_layer_set2set": 2, 7 | "num_step_message_passing": 2, 8 | "num_step_set2set": 2, 9 | "patience": 30, 10 | "weight_decay": 0.00023871411834429845 11 | } 12 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/ClinTox/MPNN_canonical.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 64, 3 | "edge_hidden_feats": 32, 4 | "lr": 0.014432401312506324, 5 | "node_out_feats": 32, 6 | "num_layer_set2set": 2, 7 | "num_step_message_passing": 4, 8 | "num_step_set2set": 3, 9 | "patience": 30, 10 | "weight_decay": 0.0013402300751073676 11 | } 12 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/ClinTox/Weave_attentivefp.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 256, 3 | "gaussian_expand": false, 4 | "gnn_hidden_feats": 64, 5 | "graph_feats": 128, 6 | "lr": 0.0008637455264179758, 7 | "num_gnn_layers": 5, 8 | "patience": 30, 9 | "weight_decay": 0.0008375075418031251 10 | } 11 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/ClinTox/Weave_canonical.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 128, 3 | "gaussian_expand": false, 4 | "gnn_hidden_feats": 64, 5 | "graph_feats": 32, 6 | "lr": 0.0066356275261501745, 7 | "num_gnn_layers": 5, 8 | "patience": 30, 9 | "weight_decay": 0.0011183470388493179 10 | } 11 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/ESOL/AttentiveFP_attentivefp.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 64, 3 | "dropout": 0.19597186400407615, 4 | "graph_feat_size": 16, 5 | "lr": 0.024933339157992538, 6 | "num_layers": 5, 7 | "num_timesteps": 5, 8 | "patience": 30, 9 | "weight_decay": 0.00020112775502600266 10 | } 11 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/ESOL/AttentiveFP_canonical.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 512, 3 | "dropout": 0.3144543143291027, 4 | "graph_feat_size": 16, 5 | "lr": 0.041761357613928296, 6 | "num_layers": 3, 7 | "num_timesteps": 5, 8 | "patience": 30, 9 | "weight_decay": 0.0012496585485300006 10 | } 11 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/ESOL/GAT_attentivefp.json: -------------------------------------------------------------------------------- 1 | { 2 | "alpha": 0.7197105722372982, 3 | "batch_size": 256, 4 | "dropout": 0.00033036046538620356, 5 | "gnn_hidden_feats": 32, 6 | "lr": 0.029739422059074316, 7 | "num_gnn_layers": 1, 8 | "num_heads": 8, 9 | "patience": 30, 10 | "predictor_hidden_feats": 32, 11 | "residual": false, 12 | "weight_decay": 0.0007476070545723656 13 | } 14 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/ESOL/GAT_canonical.json: -------------------------------------------------------------------------------- 1 | { 2 | "alpha": 0.4994779445224584, 3 | "batch_size": 512, 4 | "dropout": 0.28070328302954156, 5 | "gnn_hidden_feats": 32, 6 | "lr": 0.043266928603070534, 7 | "num_gnn_layers": 1, 8 | "num_heads": 4, 9 | "patience": 30, 10 | "predictor_hidden_feats": 16, 11 | "residual": true, 12 | "weight_decay": 0.002864868877598975 13 | } 14 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/ESOL/GCN_attentivefp.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 512, 3 | "batchnorm": false, 4 | "dropout": 0.03400405080274294, 5 | "gnn_hidden_feats": 64, 6 | "lr": 0.08161293526883558, 7 | "num_gnn_layers": 1, 8 | "patience": 30, 9 | "predictor_hidden_feats": 256, 10 | "residual": false, 11 | "weight_decay": 6.226090297377926e-05 12 | } 13 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/ESOL/GCN_canonical.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 256, 3 | "batchnorm": false, 4 | "dropout": 0.0004181672129021179, 5 | "gnn_hidden_feats": 128, 6 | "lr": 0.002268579772034901, 7 | "num_gnn_layers": 1, 8 | "patience": 30, 9 | "predictor_hidden_feats": 1024, 10 | "residual": true, 11 | "weight_decay": 0.0017036351312322193 12 | } 13 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/ESOL/MPNN_attentivefp.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 128, 3 | "edge_hidden_feats": 64, 4 | "lr": 0.0010072922765793442, 5 | "node_out_feats": 32, 6 | "num_layer_set2set": 2, 7 | "num_step_message_passing": 1, 8 | "num_step_set2set": 2, 9 | "patience": 30, 10 | "weight_decay": 0.001444988987734611 11 | } 12 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/ESOL/MPNN_canonical.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 64, 3 | "edge_hidden_feats": 64, 4 | "lr": 0.0012167943583608425, 5 | "node_out_feats": 32, 6 | "num_layer_set2set": 3, 7 | "num_step_message_passing": 3, 8 | "num_step_set2set": 2, 9 | "patience": 30, 10 | "weight_decay": 0.0016883196017373992 11 | } 12 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/ESOL/Weave_attentivefp.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 32, 3 | "gaussian_expand": false, 4 | "gnn_hidden_feats": 32, 5 | "graph_feats": 256, 6 | "lr": 0.017143946113641867, 7 | "num_gnn_layers": 1, 8 | "patience": 30, 9 | "weight_decay": 0.0024550976805966566 10 | } 11 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/ESOL/Weave_canonical.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 64, 3 | "gaussian_expand": true, 4 | "gnn_hidden_feats": 256, 5 | "graph_feats": 128, 6 | "lr": 0.0007450856286183947, 7 | "num_gnn_layers": 3, 8 | "patience": 30, 9 | "weight_decay": 0.0012182889964973162 10 | } 11 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/ESOL/gin_supervised_contextpred.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 128, 3 | "jk": "last", 4 | "lr": 0.003684241891836297, 5 | "patience": 30, 6 | "readout": "sum", 7 | "weight_decay": 0.0018192004446449632 8 | } 9 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/ESOL/gin_supervised_edgepred.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 256, 3 | "jk": "max", 4 | "lr": 0.04837377263222005, 5 | "patience": 30, 6 | "readout": "mean", 7 | "weight_decay": 0.0002554084517776402 8 | } 9 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/ESOL/gin_supervised_infomax.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 64, 3 | "jk": "sum", 4 | "lr": 0.00201807888828201, 5 | "patience": 30, 6 | "readout": "sum", 7 | "weight_decay": 0.0029245081791971898 8 | } 9 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/ESOL/gin_supervised_masking.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 32, 3 | "jk": "last", 4 | "lr": 0.0017984818061637078, 5 | "patience": 30, 6 | "readout": "mean", 7 | "weight_decay": 5.276051476334892e-05 8 | } 9 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/FreeSolv/AttentiveFP_attentivefp.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 32, 3 | "dropout": 0.1457037675069287, 4 | "graph_feat_size": 128, 5 | "lr": 0.03876673668181575, 6 | "num_layers": 1, 7 | "num_timesteps": 1, 8 | "patience": 30, 9 | "weight_decay": 0.00045572639385189006 10 | } 11 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/FreeSolv/AttentiveFP_canonical.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 128, 3 | "dropout": 0.07118127568309571, 4 | "graph_feat_size": 32, 5 | "lr": 0.08150547095978268, 6 | "num_layers": 4, 7 | "num_timesteps": 1, 8 | "patience": 30, 9 | "weight_decay": 0.0022641885703534917 10 | } 11 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/FreeSolv/GAT_attentivefp.json: -------------------------------------------------------------------------------- 1 | { 2 | "alpha": 0.6294479518124414, 3 | "batch_size": 512, 4 | "dropout": 0.06949846918000477, 5 | "gnn_hidden_feats": 32, 6 | "lr": 0.037801827370532046, 7 | "num_gnn_layers": 2, 8 | "num_heads": 8, 9 | "patience": 30, 10 | "predictor_hidden_feats": 64, 11 | "residual": true, 12 | "weight_decay": 0.0014391819011883935 13 | } 14 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/FreeSolv/GAT_canonical.json: -------------------------------------------------------------------------------- 1 | { 2 | "alpha": 0.6211392042947481, 3 | "batch_size": 32, 4 | "dropout": 0.02327359604429937, 5 | "gnn_hidden_feats": 256, 6 | "lr": 0.07818695435305947, 7 | "num_gnn_layers": 1, 8 | "num_heads": 4, 9 | "patience": 30, 10 | "predictor_hidden_feats": 256, 11 | "residual": true, 12 | "weight_decay": 0.0016023739175454656 13 | } 14 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/FreeSolv/GCN_attentivefp.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 128, 3 | "batchnorm": false, 4 | "dropout": 0.09905316493862346, 5 | "gnn_hidden_feats": 32, 6 | "lr": 0.2755190168688042, 7 | "num_gnn_layers": 4, 8 | "patience": 30, 9 | "predictor_hidden_feats": 32, 10 | "residual": true, 11 | "weight_decay": 0.000336370735188623 12 | } 13 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/FreeSolv/GCN_canonical.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 128, 3 | "batchnorm": false, 4 | "dropout": 0.05769700663189804, 5 | "gnn_hidden_feats": 32, 6 | "lr": 0.12112036225411393, 7 | "num_gnn_layers": 2, 8 | "patience": 30, 9 | "predictor_hidden_feats": 64, 10 | "residual": true, 11 | "weight_decay": 0.0013405372514807934 12 | } 13 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/FreeSolv/MPNN_attentivefp.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 512, 3 | "edge_hidden_feats": 64, 4 | "lr": 0.025368763025499428, 5 | "node_out_feats": 32, 6 | "num_layer_set2set": 1, 7 | "num_step_message_passing": 2, 8 | "num_step_set2set": 2, 9 | "patience": 30, 10 | "weight_decay": 0.0001366466162553145 11 | } 12 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/FreeSolv/MPNN_canonical.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 64, 3 | "edge_hidden_feats": 32, 4 | "lr": 0.26617679127292676, 5 | "node_out_feats": 32, 6 | "num_layer_set2set": 3, 7 | "num_step_message_passing": 4, 8 | "num_step_set2set": 2, 9 | "patience": 30, 10 | "weight_decay": 0.00010861344092661995 11 | } 12 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/FreeSolv/Weave_attentivefp.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 64, 3 | "gaussian_expand": false, 4 | "gnn_hidden_feats": 32, 5 | "graph_feats": 16, 6 | "lr": 0.01314971081368449, 7 | "num_gnn_layers": 1, 8 | "patience": 30, 9 | "weight_decay": 0.0022168062553836743 10 | } 11 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/FreeSolv/Weave_canonical.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 256, 3 | "gaussian_expand": false, 4 | "gnn_hidden_feats": 64, 5 | "graph_feats": 64, 6 | "lr": 0.002161253584332697, 7 | "num_gnn_layers": 1, 8 | "patience": 30, 9 | "weight_decay": 0.001503114696859922 10 | } 11 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/FreeSolv/gin_supervised_contextpred.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 256, 3 | "jk": "concat", 4 | "lr": 0.02092230927633503, 5 | "patience": 30, 6 | "readout": "sum", 7 | "weight_decay": 0.0020111034335989634 8 | } 9 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/FreeSolv/gin_supervised_edgepred.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 512, 3 | "jk": "sum", 4 | "lr": 0.04363525697390356, 5 | "patience": 30, 6 | "readout": "sum", 7 | "weight_decay": 0.00010458380016246623 8 | } 9 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/FreeSolv/gin_supervised_infomax.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 64, 3 | "jk": "last", 4 | "lr": 0.016323807868240735, 5 | "patience": 30, 6 | "readout": "mean", 7 | "weight_decay": 0.00019320883541792976 8 | } 9 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/FreeSolv/gin_supervised_masking.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 512, 3 | "jk": "concat", 4 | "lr": 0.01626762972038756, 5 | "patience": 30, 6 | "readout": "sum", 7 | "weight_decay": 0.0006641390612734168 8 | } 9 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/HIV/AttentiveFP_attentivefp.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 64, 3 | "dropout": 0.22938425755507835, 4 | "graph_feat_size": 64, 5 | "lr": 0.004781745450166949, 6 | "num_layers": 1, 7 | "num_timesteps": 2, 8 | "patience": 30, 9 | "weight_decay": 0.0020934048369245393 10 | } 11 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/HIV/AttentiveFP_canonical.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 64, 3 | "dropout": 0.24511656823509329, 4 | "graph_feat_size": 256, 5 | "lr": 0.0007707517744236503, 6 | "num_layers": 1, 7 | "num_timesteps": 1, 8 | "patience": 30, 9 | "weight_decay": 0.0004153560385673111 10 | } 11 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/HIV/GAT_attentivefp.json: -------------------------------------------------------------------------------- 1 | { 2 | "alpha": 0.3649234413811788, 3 | "batch_size": 64, 4 | "dropout": 0.06616256199038872, 5 | "gnn_hidden_feats": 256, 6 | "lr": 0.14350460499229986, 7 | "num_gnn_layers": 3, 8 | "num_heads": 6, 9 | "patience": 30, 10 | "predictor_hidden_feats": 64, 11 | "residual": false, 12 | "weight_decay": 0.002221509195682305 13 | } 14 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/HIV/GAT_canonical.json: -------------------------------------------------------------------------------- 1 | { 2 | "alpha": 0.0821566804349384, 3 | "batch_size": 32, 4 | "dropout": 0.05181359502277236, 5 | "gnn_hidden_feats": 32, 6 | "lr": 0.0006055001004724864, 7 | "num_gnn_layers": 4, 8 | "num_heads": 8, 9 | "patience": 30, 10 | "predictor_hidden_feats": 64, 11 | "residual": false, 12 | "weight_decay": 0.0002183578667382893 13 | } 14 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/HIV/GCN_attentivefp.json: -------------------------------------------------------------------------------- 1 | 2 | { 3 | "batch_size": 512, 4 | "batchnorm": true, 5 | "dropout": 0.010378057763216847, 6 | "gnn_hidden_feats": 32, 7 | "lr": 0.003187973578002712, 8 | "num_gnn_layers": 3, 9 | "patience": 30, 10 | "predictor_hidden_feats": 256, 11 | "residual": false, 12 | "weight_decay": 0.0014827191700590339 13 | } 14 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/HIV/GCN_canonical.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 512, 3 | "batchnorm": true, 4 | "dropout": 0.0013086019242321, 5 | "gnn_hidden_feats": 256, 6 | "lr": 0.000508635928951698, 7 | "num_gnn_layers": 1, 8 | "patience": 30, 9 | "predictor_hidden_feats": 512, 10 | "residual": false, 11 | "weight_decay": 0.0013253058161908312 12 | } 13 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/HIV/MPNN_attentivefp.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 512, 3 | "edge_hidden_feats": 64, 4 | "lr": 0.0010997571037308761, 5 | "node_out_feats": 32, 6 | "num_layer_set2set": 3, 7 | "num_step_message_passing": 3, 8 | "num_step_set2set": 3, 9 | "patience": 30, 10 | "weight_decay": 0.00020913013196948994 11 | } 12 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/HIV/MPNN_canonical.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 128, 3 | "edge_hidden_feats": 64, 4 | "lr": 0.0009658319364892974, 5 | "node_out_feats": 64, 6 | "num_layer_set2set": 2, 7 | "num_step_message_passing": 4, 8 | "num_step_set2set": 1, 9 | "patience": 30, 10 | "weight_decay": 0.000278611558904995 11 | } 12 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/HIV/NF_canonical.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 512, 3 | "batchnorm": true, 4 | "dropout": 0.29642808718861385, 5 | "gnn_hidden_feats": 64, 6 | "lr": 0.0009317188335487155, 7 | "num_gnn_layers": 4, 8 | "patience": 10, 9 | "predictor_hidden_feats": 256, 10 | "weight_decay": 0.0013328031858695633 11 | } 12 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/HIV/Weave_attentivefp.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 512, 3 | "gaussian_expand": false, 4 | "gnn_hidden_feats": 64, 5 | "graph_feats": 16, 6 | "lr": 0.00015637132576302656, 7 | "num_gnn_layers": 4, 8 | "patience": 30, 9 | "weight_decay": 0.0023879684399599403 10 | } 11 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/HIV/Weave_canonical.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 128, 3 | "gaussian_expand": false, 4 | "gnn_hidden_feats": 256, 5 | "graph_feats": 64, 6 | "lr": 0.0007134875771707405, 7 | "num_gnn_layers": 2, 8 | "patience": 30, 9 | "weight_decay": 0.001720970533747118 10 | } 11 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/HIV/gin_supervised_contextpred.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 256, 3 | "jk": "sum", 4 | "lr": 0.0007268763976438582, 5 | "patience": 30, 6 | "readout": "sum", 7 | "weight_decay": 0.0005399503682192909 8 | } 9 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/HIV/gin_supervised_edgepred.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 512, 3 | "jk": "concat", 4 | "lr": 0.0019680599800649128, 5 | "patience": 30, 6 | "readout": "sum", 7 | "weight_decay": 0.00010696402892008527 8 | } 9 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/HIV/gin_supervised_infomax.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 512, 3 | "jk": "concat", 4 | "lr": 0.0004465718095416106, 5 | "patience": 30, 6 | "readout": "attention", 7 | "weight_decay": 2.0259768608221306e-05 8 | } 9 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/HIV/gin_supervised_masking.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 256, 3 | "jk": "sum", 4 | "lr": 0.001151731494139877, 5 | "patience": 30, 6 | "readout": "max", 7 | "weight_decay": 0.0003770822201850537 8 | } 9 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/Lipophilicity/AttentiveFP_attentivefp.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 128, 3 | "dropout": 0.1392528529851128, 4 | "graph_feat_size": 256, 5 | "lr": 0.002336127222633497, 6 | "num_layers": 1, 7 | "num_timesteps": 4, 8 | "patience": 3, 9 | "weight_decay": 0.0018789377785017737 10 | } 11 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/Lipophilicity/AttentiveFP_canonical.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 256, 3 | "dropout": 0.18905153162605368, 4 | "graph_feat_size": 16, 5 | "lr": 0.010851100520380222, 6 | "num_layers": 2, 7 | "num_timesteps": 4, 8 | "patience": 3, 9 | "weight_decay": 0.000666154605426195 10 | } 11 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/Lipophilicity/GAT_attentivefp.json: -------------------------------------------------------------------------------- 1 | { 2 | "alpha": 0.7133648170252214, 3 | "batch_size": 512, 4 | "dropout": 0.023072031250834013, 5 | "gnn_hidden_feats": 128, 6 | "lr": 0.000942871304361291, 7 | "num_gnn_layers": 1, 8 | "num_heads": 8, 9 | "patience": 3, 10 | "predictor_hidden_feats": 64, 11 | "residual": false, 12 | "weight_decay": 0.0010426637069091997 13 | } 14 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/Lipophilicity/GAT_canonical.json: -------------------------------------------------------------------------------- 1 | { 2 | "alpha": 0.41300745504829595, 3 | "batch_size": 256, 4 | "dropout": 0.00804563560011903, 5 | "gnn_hidden_feats": 128, 6 | "lr": 0.004104951919252541, 7 | "num_gnn_layers": 5, 8 | "num_heads": 8, 9 | "patience": 3, 10 | "predictor_hidden_feats": 64, 11 | "residual": false, 12 | "weight_decay": 0.00041627808450648787 13 | } 14 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/Lipophilicity/GCN_attentivefp.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 64, 3 | "batchnorm": false, 4 | "dropout": 0.0690767663743611, 5 | "gnn_hidden_feats": 64, 6 | "lr": 0.00334586725481109, 7 | "num_gnn_layers": 2, 8 | "patience": 3, 9 | "predictor_hidden_feats": 128, 10 | "residual": true, 11 | "weight_decay": 0.0004953531940259846 12 | } 13 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/Lipophilicity/GCN_canonical.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 128, 3 | "batchnorm": false, 4 | "dropout": 0.28857669330071006, 5 | "gnn_hidden_feats": 128, 6 | "lr": 0.0023146598514588723, 7 | "num_gnn_layers": 2, 8 | "patience": 3, 9 | "predictor_hidden_feats": 16, 10 | "residual": true, 11 | "weight_decay": 0.0027530573103769617 12 | } 13 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/Lipophilicity/MPNN_attentivefp.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 64, 3 | "edge_hidden_feats": 32, 4 | "lr": 0.0014719353344412388, 5 | "node_out_feats": 32, 6 | "num_layer_set2set": 2, 7 | "num_step_message_passing": 3, 8 | "num_step_set2set": 1, 9 | "patience": 3, 10 | "weight_decay": 0.0006102918276099665 11 | } 12 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/Lipophilicity/MPNN_canonical.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 256, 3 | "edge_hidden_feats": 32, 4 | "lr": 0.001589363184976579, 5 | "node_out_feats": 64, 6 | "num_layer_set2set": 2, 7 | "num_step_message_passing": 2, 8 | "num_step_set2set": 3, 9 | "patience": 3, 10 | "weight_decay": 0.002935301407610767 11 | } 12 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/Lipophilicity/Weave_attentivefp.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 512, 3 | "gaussian_expand": true, 4 | "gnn_hidden_feats": 64, 5 | "graph_feats": 128, 6 | "lr": 0.0002639480734811517, 7 | "num_gnn_layers": 2, 8 | "patience": 3, 9 | "weight_decay": 0.002131450567613569 10 | } 11 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/Lipophilicity/Weave_canonical.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 64, 3 | "gaussian_expand": false, 4 | "gnn_hidden_feats": 64, 5 | "graph_feats": 256, 6 | "lr": 0.001025694617347174, 7 | "num_gnn_layers": 2, 8 | "patience": 3, 9 | "weight_decay": 0.0007295291026702683 10 | } 11 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/Lipophilicity/gin_supervised_contextpred.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 64, 3 | "jk": "concat", 4 | "lr": 0.00014286945232108716, 5 | "patience": 3, 6 | "readout": "sum", 7 | "weight_decay": 0.0019227196033965886 8 | } 9 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/Lipophilicity/gin_supervised_edgepred.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 64, 3 | "jk": "concat", 4 | "lr": 0.00014828969778268408, 5 | "patience": 3, 6 | "readout": "sum", 7 | "weight_decay": 0.0014720051108745896 8 | } 9 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/Lipophilicity/gin_supervised_infomax.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 512, 3 | "jk": "max", 4 | "lr": 0.025125936190360937, 5 | "patience": 3, 6 | "readout": "mean", 7 | "weight_decay": 2.1986285047353006e-05 8 | } 9 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/Lipophilicity/gin_supervised_masking.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 128, 3 | "jk": "last", 4 | "lr": 0.0023488539537758514, 5 | "patience": 3, 6 | "readout": "sum", 7 | "weight_decay": 3.961699422046444e-05 8 | } 9 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/MUV/AttentiveFP_attentivefp.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 64, 3 | "dropout": 0.3260017176688692, 4 | "graph_feat_size": 16, 5 | "lr": 0.0004161106954054198, 6 | "num_layers": 1, 7 | "num_timesteps": 2, 8 | "patience": 10, 9 | "weight_decay": 3.259570004861701e-06 10 | } 11 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/MUV/AttentiveFP_canonical.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 128, 3 | "dropout": 0.20184515449053175, 4 | "graph_feat_size": 16, 5 | "lr": 0.27373030579364105, 6 | "num_layers": 1, 7 | "num_timesteps": 3, 8 | "patience": 10, 9 | "weight_decay": 0.0002615869860590384 10 | } 11 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/MUV/GAT_attentivefp.json: -------------------------------------------------------------------------------- 1 | { 2 | "alpha": 0.9101107032743763, 3 | "batch_size": 256, 4 | "dropout": 0.37739180577199594, 5 | "gnn_hidden_feats": 128, 6 | "lr": 0.1064728667731296, 7 | "num_gnn_layers": 1, 8 | "num_heads": 6, 9 | "patience": 10, 10 | "predictor_hidden_feats": 32, 11 | "residual": false, 12 | "weight_decay": 0.0014041564813188868 13 | } 14 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/MUV/GAT_canonical.json: -------------------------------------------------------------------------------- 1 | { 2 | "alpha": 0.8145285541930105, 3 | "batch_size": 512, 4 | "dropout": 0.5477918396466305, 5 | "gnn_hidden_feats": 128, 6 | "lr": 0.2890724138267819, 7 | "num_gnn_layers": 4, 8 | "num_heads": 6, 9 | "patience": 10, 10 | "predictor_hidden_feats": 128, 11 | "residual": true, 12 | "weight_decay": 0.0006296853617271957 13 | } 14 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/MUV/GCN_attentivefp.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 256, 3 | "batchnorm": false, 4 | "dropout": 0.24997398695768708, 5 | "gnn_hidden_feats": 64, 6 | "lr": 0.10097480366765507, 7 | "num_gnn_layers": 1, 8 | "patience": 10, 9 | "predictor_hidden_feats": 128, 10 | "residual": true, 11 | "weight_decay": 0.0029930026970601377 12 | } 13 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/MUV/GCN_canonical.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 128, 3 | "batchnorm": false, 4 | "dropout": 0.10811886971338101, 5 | "gnn_hidden_feats": 32, 6 | "lr": 0.0517799539037415, 7 | "num_gnn_layers": 1, 8 | "patience": 10, 9 | "predictor_hidden_feats": 128, 10 | "residual": false, 11 | "weight_decay": 0.002934359614800871 12 | } 13 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/MUV/MPNN_attentivefp.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 128, 3 | "edge_hidden_feats": 32, 4 | "lr": 0.16583561459387547, 5 | "node_out_feats": 32, 6 | "num_layer_set2set": 1, 7 | "num_step_message_passing": 5, 8 | "num_step_set2set": 2, 9 | "patience": 10, 10 | "weight_decay": 0.0002346614512845315 11 | } 12 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/MUV/MPNN_canonical.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 32, 3 | "edge_hidden_feats": 32, 4 | "lr": 0.1183659110235217, 5 | "node_out_feats": 64, 6 | "num_layer_set2set": 3, 7 | "num_step_message_passing": 5, 8 | "num_step_set2set": 2, 9 | "patience": 10, 10 | "weight_decay": 0.002584470408523727 11 | } 12 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/MUV/Weave_attentivefp.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 512, 3 | "gaussian_expand": false, 4 | "gnn_hidden_feats": 32, 5 | "graph_feats": 128, 6 | "lr": 0.08390110047012145, 7 | "num_gnn_layers": 3, 8 | "patience": 10, 9 | "weight_decay": 0.0002976018989380823 10 | } 11 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/MUV/Weave_canonical.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 512, 3 | "gaussian_expand": false, 4 | "gnn_hidden_feats": 64, 5 | "graph_feats": 64, 6 | "lr": 0.10592008005751387, 7 | "num_gnn_layers": 1, 8 | "patience": 10, 9 | "weight_decay": 0.00040473826255505677 10 | } 11 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/MUV/gin_supervised_contextpred.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 32, 3 | "jk": "concat", 4 | "lr": 0.02064541030428942, 5 | "patience": 10, 6 | "readout": "attention", 7 | "weight_decay": 0.0027544000743672924 8 | } 9 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/MUV/gin_supervised_edgepred.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 128, 3 | "jk": "max", 4 | "lr": 0.13432694269340834, 5 | "patience": 10, 6 | "readout": "attention", 7 | "weight_decay": 0.002487734773488951 8 | } 9 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/MUV/gin_supervised_infomax.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 256, 3 | "jk": "concat", 4 | "lr": 0.0012083405545543907, 5 | "patience": 10, 6 | "readout": "attention", 7 | "weight_decay": 0.00019879816557403872 8 | } 9 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/MUV/gin_supervised_masking.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 512, 3 | "jk": "max", 4 | "lr": 0.042597014228161284, 5 | "patience": 10, 6 | "readout": "attention", 7 | "weight_decay": 0.00020066278994669388 8 | } 9 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/PCBA/AttentiveFP_attentivefp.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 512, 3 | "dropout": 0.31957324617702254, 4 | "graph_feat_size": 16, 5 | "lr": 0.026645176883161523, 6 | "num_layers": 3, 7 | "num_timesteps": 2, 8 | "patience": 3, 9 | "weight_decay": 2.303595485627464e-05 10 | } 11 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/PCBA/AttentiveFP_canonical.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 256, 3 | "dropout": 0.05370268638522968, 4 | "graph_feat_size": 32, 5 | "lr": 0.0013691849212736692, 6 | "num_layers": 2, 7 | "num_timesteps": 3, 8 | "patience": 3, 9 | "weight_decay": 0.0003202981688363421 10 | } 11 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/PCBA/GAT_attentivefp.json: -------------------------------------------------------------------------------- 1 | { 2 | "alpha": 0.25837424873685433, 3 | "batch_size": 256, 4 | "dropout": 0.2811043226878611, 5 | "gnn_hidden_feats": 32, 6 | "lr": 0.041696741871750195, 7 | "num_gnn_layers": 1, 8 | "num_heads": 8, 9 | "patience": 3, 10 | "predictor_hidden_feats": 16, 11 | "residual": true, 12 | "weight_decay": 0.0019497193311344152 13 | } 14 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/PCBA/GAT_canonical.json: -------------------------------------------------------------------------------- 1 | { 2 | "alpha": 0.0194367227727808, 3 | "batch_size": 256, 4 | "dropout": 0.008451521225305653, 5 | "gnn_hidden_feats": 64, 6 | "lr": 0.1436879597821312, 7 | "num_gnn_layers": 1, 8 | "num_heads": 8, 9 | "patience": 3, 10 | "predictor_hidden_feats": 64, 11 | "residual": false, 12 | "weight_decay": 2.544626281529127e-06 13 | } 14 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/PCBA/GCN_attentivefp.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 128, 3 | "batchnorm": true, 4 | "dropout": 0.24826461148671453, 5 | "gnn_hidden_feats": 128, 6 | "lr": 0.00029348737871487396, 7 | "num_gnn_layers": 5, 8 | "patience": 3, 9 | "predictor_hidden_feats": 64, 10 | "residual": true, 11 | "weight_decay": 0.000340859037349049 12 | } 13 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/PCBA/GCN_canonical.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 512, 3 | "batchnorm": true, 4 | "dropout": 0.053320999462421345, 5 | "gnn_hidden_feats": 128, 6 | "lr": 0.08516904593639796, 7 | "num_gnn_layers": 2, 8 | "patience": 3, 9 | "predictor_hidden_feats": 1024, 10 | "residual": true, 11 | "weight_decay": 1.3004434508416202e-05 12 | } 13 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/PCBA/MPNN_attentivefp.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 128, 3 | "edge_hidden_feats": 32, 4 | "lr": 0.0024096089271755192, 5 | "node_out_feats": 64, 6 | "num_layer_set2set": 1, 7 | "num_step_message_passing": 2, 8 | "num_step_set2set": 1, 9 | "patience": 3, 10 | "weight_decay": 1.1571175346962144e-05 11 | } 12 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/PCBA/MPNN_canonical.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 64, 3 | "edge_hidden_feats": 64, 4 | "lr": 0.0002777334389300178, 5 | "node_out_feats": 32, 6 | "num_layer_set2set": 3, 7 | "num_step_message_passing": 1, 8 | "num_step_set2set": 3, 9 | "patience": 3, 10 | "weight_decay": 2.734809971559392e-06 11 | } 12 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/PCBA/Weave_attentivefp.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 256, 3 | "gaussian_expand": false, 4 | "gnn_hidden_feats": 64, 5 | "graph_feats": 64, 6 | "lr": 0.019304730824566674, 7 | "num_gnn_layers": 2, 8 | "patience": 3, 9 | "weight_decay": 0.00014074431597979565 10 | } 11 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/PCBA/Weave_canonical.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 512, 3 | "gaussian_expand": true, 4 | "gnn_hidden_feats": 128, 5 | "graph_feats": 256, 6 | "lr": 0.0004065948420309967, 7 | "num_gnn_layers": 1, 8 | "patience": 3, 9 | "weight_decay": 0.0012452819006325308 10 | } 11 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/PCBA/gin_supervised_contextpred.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 128, 3 | "jk": "last", 4 | "lr": 0.002040642386942102, 5 | "patience": 3, 6 | "readout": "attention", 7 | "weight_decay": 1.669350338238329e-05 8 | } 9 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/PCBA/gin_supervised_edgepred.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 512, 3 | "jk": "sum", 4 | "lr": 0.0710014576285246, 5 | "patience": 3, 6 | "readout": "attention", 7 | "weight_decay": 0.001553828327407514 8 | } 9 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/PCBA/gin_supervised_infomax.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 512, 3 | "jk": "concat", 4 | "lr": 0.000252666257799225, 5 | "patience": 3, 6 | "readout": "max", 7 | "weight_decay": 0.0008467516338932003 8 | } 9 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/PCBA/gin_supervised_masking.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 256, 3 | "jk": "sum", 4 | "lr": 0.0005792614370892417, 5 | "patience": 3, 6 | "readout": "attention", 7 | "weight_decay": 3.5138608334314253e-06 8 | } 9 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/SIDER/AttentiveFP_attentivefp.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 128, 3 | "dropout": 0.4246486427806226, 4 | "graph_feat_size": 16, 5 | "lr": 0.2799816219870452, 6 | "num_layers": 2, 7 | "num_timesteps": 1, 8 | "patience": 30, 9 | "weight_decay": 0.0009610489977483794 10 | } 11 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/SIDER/AttentiveFP_canonical.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 64, 3 | "dropout": 0.4023383130974747, 4 | "graph_feat_size": 128, 5 | "lr": 0.029145897588248787, 6 | "num_layers": 5, 7 | "num_timesteps": 2, 8 | "patience": 30, 9 | "weight_decay": 0.001240398838965628 10 | } 11 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/SIDER/GAT_attentivefp.json: -------------------------------------------------------------------------------- 1 | { 2 | "alpha": 0.15881060281037407, 3 | "batch_size": 32, 4 | "dropout": 0.5941912608794613, 5 | "gnn_hidden_feats": 256, 6 | "lr": 0.14226783702436058, 7 | "num_gnn_layers": 4, 8 | "num_heads": 4, 9 | "patience": 30, 10 | "predictor_hidden_feats": 128, 11 | "residual": true, 12 | "weight_decay": 0.0018334890531216362 13 | } 14 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/SIDER/GAT_canonical.json: -------------------------------------------------------------------------------- 1 | { 2 | "alpha": 0.7874749485670144, 3 | "batch_size": 256, 4 | "dropout": 0.026789468731402546, 5 | "gnn_hidden_feats": 256, 6 | "lr": 0.09260461512319425, 7 | "num_gnn_layers": 4, 8 | "num_heads": 8, 9 | "patience": 30, 10 | "predictor_hidden_feats": 64, 11 | "residual": true, 12 | "weight_decay": 0.000630559477369353 13 | } 14 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/SIDER/GCN_attentivefp.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 256, 3 | "batchnorm": true, 4 | "dropout": 0.08333992387843633, 5 | "gnn_hidden_feats": 256, 6 | "lr": 0.0007825448344124272, 7 | "num_gnn_layers": 4, 8 | "patience": 30, 9 | "predictor_hidden_feats": 1024, 10 | "residual": false, 11 | "weight_decay": 0.0018736122189025961 12 | } 13 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/SIDER/GCN_canonical.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 128, 3 | "batchnorm": true, 4 | "dropout": 0.034959769945995006, 5 | "gnn_hidden_feats": 256, 6 | "lr": 0.013478258874348836, 7 | "num_gnn_layers": 3, 8 | "patience": 30, 9 | "predictor_hidden_feats": 512, 10 | "residual": true, 11 | "weight_decay": 0.00038620036961077456 12 | } 13 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/SIDER/MPNN_attentivefp.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 64, 3 | "edge_hidden_feats": 32, 4 | "lr": 0.21133118919773075, 5 | "node_out_feats": 64, 6 | "num_layer_set2set": 1, 7 | "num_step_message_passing": 5, 8 | "num_step_set2set": 1, 9 | "patience": 30, 10 | "weight_decay": 0.0013233098855699446 11 | } 12 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/SIDER/MPNN_canonical.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 32, 3 | "edge_hidden_feats": 64, 4 | "lr": 0.2673797480814446, 5 | "node_out_feats": 64, 6 | "num_layer_set2set": 2, 7 | "num_step_message_passing": 5, 8 | "num_step_set2set": 2, 9 | "patience": 30, 10 | "weight_decay": 0.0015324965239380626 11 | } 12 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/SIDER/NF_canonical.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 128, 3 | "batchnorm": true, 4 | "dropout": 0.3004340538386648, 5 | "gnn_hidden_feats": 64, 6 | "lr": 0.15295366744281266, 7 | "num_gnn_layers": 1, 8 | "patience": 10, 9 | "predictor_hidden_feats": 32, 10 | "weight_decay": 0.00015437346720966085 11 | } 12 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/SIDER/Weave_attentivefp.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 512, 3 | "gaussian_expand": true, 4 | "gnn_hidden_feats": 64, 5 | "graph_feats": 64, 6 | "lr": 0.0016560263410790033, 7 | "num_gnn_layers": 3, 8 | "patience": 30, 9 | "weight_decay": 0.0023729738302695866 10 | } 11 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/SIDER/Weave_canonical.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 512, 3 | "gaussian_expand": false, 4 | "gnn_hidden_feats": 64, 5 | "graph_feats": 16, 6 | "lr": 0.0009033904614314281, 7 | "num_gnn_layers": 1, 8 | "patience": 30, 9 | "weight_decay": 0.0003628007907774161 10 | } 11 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/SIDER/gin_supervised_contextpred.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 512, 3 | "jk": "concat", 4 | "lr": 0.004582483350073413, 5 | "patience": 30, 6 | "readout": "max", 7 | "weight_decay": 0.002854321225485725 8 | } 9 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/SIDER/gin_supervised_edgepred.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 256, 3 | "jk": "concat", 4 | "lr": 0.0016594614349409037, 5 | "patience": 30, 6 | "readout": "attention", 7 | "weight_decay": 0.0003865744904692904 8 | } 9 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/SIDER/gin_supervised_infomax.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 256, 3 | "jk": "concat", 4 | "lr": 0.0024537777646741017, 5 | "patience": 30, 6 | "readout": "attention", 7 | "weight_decay": 0.0008110689592892218 8 | } 9 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/SIDER/gin_supervised_masking.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 32, 3 | "jk": "max", 4 | "lr": 0.037592972380361474, 5 | "patience": 30, 6 | "readout": "max", 7 | "weight_decay": 0.00044259195086712646 8 | } 9 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/Tox21/AttentiveFP_attentivefp.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 256, 3 | "dropout": 0.08321482571554469, 4 | "graph_feat_size": 16, 5 | "lr": 0.01547590897542292, 6 | "num_layers": 2, 7 | "num_timesteps": 4, 8 | "patience": 3, 9 | "weight_decay": 5.03498791753481e-05 10 | } 11 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/Tox21/AttentiveFP_canonical.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 32, 3 | "dropout": 0.02557007921295823, 4 | "graph_feat_size": 32, 5 | "lr": 0.0025274616878070413, 6 | "num_layers": 1, 7 | "num_timesteps": 5, 8 | "patience": 3, 9 | "weight_decay": 0.0017458430442674627 10 | } 11 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/Tox21/GAT_attentivefp.json: -------------------------------------------------------------------------------- 1 | { 2 | "alpha": 0.3471639890634216, 3 | "batch_size": 256, 4 | "dropout": 0.21647573234277548, 5 | "gnn_hidden_feats": 64, 6 | "lr": 0.0015106677798468588, 7 | "num_gnn_layers": 1, 8 | "num_heads": 4, 9 | "patience": 3, 10 | "predictor_hidden_feats": 128, 11 | "residual": false, 12 | "weight_decay": 0.0012545498805961982 13 | } 14 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/Tox21/GAT_canonical.json: -------------------------------------------------------------------------------- 1 | { 2 | "alpha": 0.574285650239047, 3 | "batch_size": 256, 4 | "dropout": 0.06205513003092991, 5 | "gnn_hidden_feats": 128, 6 | "lr": 0.018489331666601175, 7 | "num_gnn_layers": 1, 8 | "num_heads": 4, 9 | "patience": 3, 10 | "predictor_hidden_feats": 32, 11 | "residual": true, 12 | "weight_decay": 0.0005221614306821765 13 | } 14 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/Tox21/GCN_attentivefp.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 128, 3 | "batchnorm": true, 4 | "dropout": 0.5432104441360837, 5 | "gnn_hidden_feats": 256, 6 | "lr": 0.0008308729010190474, 7 | "num_gnn_layers": 4, 8 | "patience": 3, 9 | "predictor_hidden_feats": 512, 10 | "residual": false, 11 | "weight_decay": 0.00104683904967067 12 | } 13 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/Tox21/GCN_canonical.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 32, 3 | "batchnorm": false, 4 | "dropout": 0.18118350615245202, 5 | "gnn_hidden_feats": 64, 6 | "lr": 0.0008963957671468941, 7 | "num_gnn_layers": 3, 8 | "patience": 3, 9 | "predictor_hidden_feats": 16, 10 | "residual": false, 11 | "weight_decay": 1.5353642545259472e-05 12 | } 13 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/Tox21/MPNN_attentivefp.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 64, 3 | "edge_hidden_feats": 64, 4 | "lr": 0.0007555540847341835, 5 | "node_out_feats": 32, 6 | "num_layer_set2set": 2, 7 | "num_step_message_passing": 3, 8 | "num_step_set2set": 2, 9 | "patience": 3, 10 | "weight_decay": 0.0008267188068088885 11 | } 12 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/Tox21/MPNN_canonical.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 128, 3 | "edge_hidden_feats": 64, 4 | "lr": 0.03531524773979991, 5 | "node_out_feats": 32, 6 | "num_layer_set2set": 3, 7 | "num_step_message_passing": 1, 8 | "num_step_set2set": 3, 9 | "patience": 3, 10 | "weight_decay": 3.520879880952354e-05 11 | } 12 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/Tox21/NF_canonical.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 512, 3 | "batchnorm": true, 4 | "dropout": 0.23946951437213781, 5 | "gnn_hidden_feats": 256, 6 | "lr": 0.0019727882244461445, 7 | "num_gnn_layers": 5, 8 | "patience": 30, 9 | "predictor_hidden_feats": 512, 10 | "weight_decay": 0.0006643047003408009 11 | } 12 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/Tox21/Weave_attentivefp.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 512, 3 | "gaussian_expand": true, 4 | "gnn_hidden_feats": 64, 5 | "graph_feats": 256, 6 | "lr": 0.0012017709863221026, 7 | "num_gnn_layers": 1, 8 | "patience": 3, 9 | "weight_decay": 0.0017884236674114446 10 | } 11 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/Tox21/Weave_canonical.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 512, 3 | "gaussian_expand": true, 4 | "gnn_hidden_feats": 256, 5 | "graph_feats": 64, 6 | "lr": 0.22280361636790463, 7 | "num_gnn_layers": 5, 8 | "patience": 3, 9 | "weight_decay": 0.00043492984484493763 10 | } 11 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/Tox21/gin_supervised_contextpred.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 64, 3 | "jk": "concat", 4 | "lr": 0.001979947576885688, 5 | "patience": 3, 6 | "readout": "attention", 7 | "weight_decay": 0.0007541054393255765 8 | } 9 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/Tox21/gin_supervised_edgepred.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 512, 3 | "jk": "sum", 4 | "lr": 0.0029323909407753847, 5 | "patience": 3, 6 | "readout": "attention", 7 | "weight_decay": 1.4272998228504754e-05 8 | } 9 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/Tox21/gin_supervised_infomax.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 128, 3 | "jk": "concat", 4 | "lr": 0.002117046769172436, 5 | "patience": 3, 6 | "readout": "max", 7 | "weight_decay": 0.0025862398606286974 8 | } 9 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/Tox21/gin_supervised_masking.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 32, 3 | "jk": "concat", 4 | "lr": 0.0012686595443356097, 5 | "patience": 3, 6 | "readout": "mean", 7 | "weight_decay": 0.001233417060890296 8 | } 9 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/ToxCast/AttentiveFP_attentivefp.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 128, 3 | "dropout": 0.15914067005489962, 4 | "graph_feat_size": 32, 5 | "lr": 0.0346002100671352, 6 | "num_layers": 1, 7 | "num_timesteps": 3, 8 | "patience": 30, 9 | "weight_decay": 0.0002873201766557768 10 | } 11 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/ToxCast/AttentiveFP_canonical.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 128, 3 | "dropout": 0.00020996203951184622, 4 | "graph_feat_size": 64, 5 | "lr": 0.0044409518726535886, 6 | "num_layers": 1, 7 | "num_timesteps": 1, 8 | "patience": 30, 9 | "weight_decay": 0.00214363227501933 10 | } 11 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/ToxCast/GAT_attentivefp.json: -------------------------------------------------------------------------------- 1 | { 2 | "alpha": 0.8044239663965763, 3 | "batch_size": 128, 4 | "dropout": 0.039304570924327153, 5 | "gnn_hidden_feats": 32, 6 | "lr": 0.002614591918497472, 7 | "num_gnn_layers": 3, 8 | "num_heads": 8, 9 | "patience": 30, 10 | "predictor_hidden_feats": 128, 11 | "residual": false, 12 | "weight_decay": 0.0018978279394269767 13 | } 14 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/ToxCast/GAT_canonical.json: -------------------------------------------------------------------------------- 1 | { 2 | "alpha": 0.5850073967467644, 3 | "batch_size": 64, 4 | "dropout": 0.30477898651808644, 5 | "gnn_hidden_feats": 256, 6 | "lr": 0.002044234155815553, 7 | "num_gnn_layers": 3, 8 | "num_heads": 6, 9 | "patience": 30, 10 | "predictor_hidden_feats": 256, 11 | "residual": true, 12 | "weight_decay": 3.4489185948996613e-06 13 | } 14 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/ToxCast/GCN_attentivefp.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 128, 3 | "batchnorm": true, 4 | "dropout": 0.5790202973197223, 5 | "gnn_hidden_feats": 256, 6 | "lr": 0.002660249637448869, 7 | "num_gnn_layers": 1, 8 | "patience": 30, 9 | "predictor_hidden_feats": 16, 10 | "residual": false, 11 | "weight_decay": 1.3112729442186144e-05 12 | } 13 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/ToxCast/GCN_canonical.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 256, 3 | "batchnorm": false, 4 | "dropout": 0.2354934715188964, 5 | "gnn_hidden_feats": 256, 6 | "lr": 0.04093040454026587, 7 | "num_gnn_layers": 3, 8 | "patience": 30, 9 | "predictor_hidden_feats": 512, 10 | "residual": true, 11 | "weight_decay": 1.656799296565859e-05 12 | } 13 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/ToxCast/MPNN_attentivefp.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 64, 3 | "edge_hidden_feats": 64, 4 | "lr": 0.002516497260309336, 5 | "node_out_feats": 32, 6 | "num_layer_set2set": 3, 7 | "num_step_message_passing": 5, 8 | "num_step_set2set": 3, 9 | "patience": 30, 10 | "weight_decay": 6.425777245387374e-05 11 | } 12 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/ToxCast/MPNN_canonical.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 64, 3 | "edge_hidden_feats": 32, 4 | "lr": 0.00025558990115406854, 5 | "node_out_feats": 32, 6 | "num_layer_set2set": 3, 7 | "num_step_message_passing": 2, 8 | "num_step_set2set": 2, 9 | "patience": 30, 10 | "weight_decay": 1.1829228770819933e-05 11 | } 12 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/ToxCast/NF_canonical.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 512, 3 | "batchnorm": true, 4 | "dropout": 0.23659695490588817, 5 | "gnn_hidden_feats": 128, 6 | "lr": 0.00014219057150340053, 7 | "num_gnn_layers": 4, 8 | "patience": 30, 9 | "predictor_hidden_feats": 512, 10 | "weight_decay": 0.0005317420854508431 11 | } 12 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/ToxCast/Weave_attentivefp.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 64, 3 | "gaussian_expand": false, 4 | "gnn_hidden_feats": 128, 5 | "graph_feats": 128, 6 | "lr": 0.00022259059163133414, 7 | "num_gnn_layers": 1, 8 | "patience": 30, 9 | "weight_decay": 0.0010232640559564882 10 | } 11 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/ToxCast/Weave_canonical.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 64, 3 | "gaussian_expand": false, 4 | "gnn_hidden_feats": 256, 5 | "graph_feats": 256, 6 | "lr": 0.00035949007148311464, 7 | "num_gnn_layers": 1, 8 | "patience": 30, 9 | "weight_decay": 0.0006201487905693401 10 | } 11 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/ToxCast/gin_supervised_contextpred.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 512, 3 | "jk": "last", 4 | "lr": 0.0026561989005571603, 5 | "patience": 30, 6 | "readout": "max", 7 | "weight_decay": 2.7341239008350446e-05 8 | } 9 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/ToxCast/gin_supervised_edgepred.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 512, 3 | "jk": "sum", 4 | "lr": 0.004355479710898058, 5 | "patience": 30, 6 | "readout": "attention", 7 | "weight_decay": 0.0005511351762166026 8 | } 9 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/ToxCast/gin_supervised_infomax.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 256, 3 | "jk": "sum", 4 | "lr": 0.04169874484424693, 5 | "patience": 30, 6 | "readout": "sum", 7 | "weight_decay": 0.00021885264635364418 8 | } 9 | -------------------------------------------------------------------------------- /examples/property_prediction/moleculenet/configures/ToxCast/gin_supervised_masking.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 256, 3 | "jk": "sum", 4 | "lr": 0.046088669310167196, 5 | "patience": 30, 6 | "readout": "max", 7 | "weight_decay": 5.142373222354395e-06 8 | } 9 | -------------------------------------------------------------------------------- /examples/property_prediction/ogbg_ppa/README.md: -------------------------------------------------------------------------------- 1 | # ogbg-ppa 2 | 3 | For a detailed description of the dataset, see [the OGB website](https://ogb.stanford.edu/docs/graphprop/). 4 | 5 | ## Models 6 | 7 | - **Graph Convolutional Networks (GCN)** [1] 8 | - **Graph Isomorphism Networks (GIN)** [2] 9 | 10 | ## Dependencies 11 | 12 | - **OGB v1.2.1**, which can be installed with ```pip install ogb``` 13 | 14 | ## Usage 15 | 16 | To run the script, 17 | 18 | ```bash 19 | python main.py --gnn X 20 | ``` 21 | 22 | where `X` can be `gcn`, `gin`, `gcn-virtual` and `gin-virtual`. The postfix `-virtual` means that 23 | we will use a virtual node connected to all nodes in the graph for synchronizing information across all nodes. 24 | 25 | By default, we use GPU whenever possible. 26 | 27 | The optional arguments are as follows: 28 | 29 | ``` 30 | --dropout, dropout to use, (default=0.5) 31 | --n_layers, number of GNN layers to use, (default=5) 32 | --hidden_feats, number of hidden units in GNNs, (default=300) 33 | --batch_size, batch size for training, (default=32) 34 | --epochs, number of epochs for training, (default=100) 35 | --num_workers, number of processes for data loading, (default=1) 36 | --filename, filename to output results. By default, it will be the same as the gnn used. 37 | ``` 38 | 39 | ## Performance 40 | 41 | Using the default parameters, the performance of 10 random runs is as follows. 42 | 43 | | Method | Accuracy (%) | 44 | | ----------- | ------------- | 45 | | GCN | 67.80 +- 0.49 | 46 | | GIN | 69.31 +- 1.94 | 47 | | GCN-virtual | 69.02 +- 0.47 | 48 | | GIN-virtual | 70.62 +- 0.70 | 49 | 50 | ## References 51 | 52 | [1] Kipf T., Welling M. Semi-Supervised Classification with Graph Convolutional Networks. 2016. 53 | 54 | [2] Xu K., Hu W., Leskovec J., Jegelka S. How Powerful are Graph Neural Networks? 2019. 55 | -------------------------------------------------------------------------------- /examples/property_prediction/pretrain_gnns/chem/README.md: -------------------------------------------------------------------------------- 1 | # Strategies for Pre-training Graph Neural Networks 2 | 3 | ## Intro 4 | This is a DGL implementation of the following paper based on PyTorch. 5 | 6 | - [Strategies for Pre-training Graph Neural Networks.](https://arxiv.org/abs/1905.12265) W. Hu*, B. Liu*, J. Gomes, M. Zitnik., P. Liang, V. Pande, J. Leskovec. *International Conference on Learning Representations (ICLR)*, 2020. 7 | 8 | ## Datasets 9 | - For node-level self-supervised pre-training, 2 million unlabeled molecules sampled from the ZINC15 database are used. Custom datasets are supported. 10 | - For graph-level multi-task supervised pre-training, a preprocessed ChEMBL dataset is used, which contains 456K molecules with 1310 kinds of diverse and extensive biochemical assays. Custom datasets are supported. 11 | - For fine-tuning downstream tasks, BBBP, Tox21, ToxCast, SIDER, MUV, HIV and BACE dataset are supported. 12 | 13 | ## Usage 14 | **1. Self-supervised pre-training** 15 | 16 | This paper purposed an attribute masking pre-training method. It randomly masks input node/edge attributes by replacing them with special masked indicators, then the GNN will predict those attributes based on neighboring structure. 17 | 18 | ``` bash 19 | python pretrain_masking.py --output_model_file OUTPUT_MODEL_FILE 20 | ``` 21 | The self-supervised pre-training model will be found in `OUTPUT_MODEL_FILE` after training (default filename: pretrain_masking.pth). 22 | 23 | If a custom dataset is specified, the path needs to be provided with `--dataset`. The custom dataset is supposed to be a text file, where every line is a molecule SMILES except that the first is 'smiles'. 24 | 25 | **2. Supervised pre-training** 26 | ``` bash 27 | python pretrain_supervised.py --input_model_file INPUT_MODEL_FILE --output_model_file OUTPUT_MODEL_FILE 28 | ``` 29 | The self-supervised pre-trained model can be loaded from `INPUT_MODEL_FILE`. 30 | 31 | The supervised pre-training model will be found in `OUTPUT_MODEL_FILE` after training (default filename: pretrain_supervised.pth). 32 | 33 | If a custom dataset is specified, the path needs to be provided with `--dataset`. The custom dataset is supposed to be a `.pkl` file, which is pickled from "a list of tuples". The first element in every `tuple` should be a molecule SMILES in class `str`, and the second element should be its corresponding label in class `torch.Tensor`. Possible values are {-1, 0, 1} in labels. "1" means positive, and "-1" means negative. "0" indicates the molecule is invalid. 34 | 35 | **3. Fine-tuning for downstream dataset** 36 | ``` bash 37 | python classification.py --input_model_file INPUT_MODEL_FILE --output_model_file OUTPUT_MODEL_FILE --dataset DOWNSTREAM_DATASET 38 | ``` 39 | 40 | The supervised pre-trained model can be loaded from `INPUT_MODEL_FILE`. 41 | 42 | The fine-tuned model will be found in `OUTPUT_MODEL_FILE` after training (default filename: pretrain_fine_tuning.pth). 43 | 44 | ## Experiment Results 45 | 46 | With the default parameters, following downstream task results are based on Attribute Masking (Node-level) and Supervised (Graph-level) pre-training strategy with GIN. 47 | 48 | | Dataset | ROC-AUC (%) | ROC-AUC reported (%) | 49 | | :-----: | :-----: | :--------: | 50 | | BBBP | 71.75 | 66.5 ± 2.5 | 51 | | Tox21 | 72.67 | 77.9 ± 0.4 | 52 | | ToxCast | 62.22 | 65.1 ± 0.3 | 53 | | SIDER | 58.97 | 63.9 ± 0.9 | 54 | | MUV | 79.44 | 81.2 ± 1.9 | 55 | | HIV | 74.52 | 77.1 ± 1.2 | 56 | | BACE | 77.34 | 80.3 ± 0.9 | 57 | -------------------------------------------------------------------------------- /examples/property_prediction/pubchem_aromaticity/README.md: -------------------------------------------------------------------------------- 1 | # Pubchem Aromaticity 2 | 3 | ## Dataset Introduction 4 | 5 | [1] extracted a total of 3945 molecules with 0-40 aromatic atoms from the PubChem BioAssay dataset for predicting 6 | the number of aromatic atoms of molecules. 7 | 8 | ## Modeling 9 | 10 | ### Pre-processing 11 | 12 | We randomly split the dataset into training/validation/test subsets with a ratio of 0.8:0.1:0.1. 13 | For featurization, we exclude all bond features and all atom aromatic features as in [1]. 14 | 15 | ### Training and Evaluation 16 | 17 | We currently only support AttentiveFP [1] for this dataset. To train from scratch, run 18 | 19 | ```bash 20 | python main.py 21 | ``` 22 | 23 | You can also directly evaluate a pre-trained model with 24 | 25 | ```bash 26 | python main.py -p 27 | ``` 28 | 29 | which will yield a test RMSE of 0.7508. 30 | 31 | ## Visualization 32 | 33 | In computing molecular representations out of atom representations, the model learns 34 | to assign some weights to the atoms, which can be viewed as the importance of atoms. 35 | [1] visualizes the weights of the atoms for possible interpretations like the figure below. 36 | We provide a jupyter notebook for performing the visualization and you can download it with 37 | `wget https://data.dgl.ai/dgllife/attentive_fp/atom_weight_visualization.ipynb`. 38 | 39 | ![](https://data.dgl.ai/dgllife/attentive_fp_vis_example.png) 40 | 41 | ## References 42 | 43 | [1] Xiong et al. (2019) Pushing the Boundaries of Molecular Representation for Drug Discovery with the Graph 44 | Attention Mechanism. *Journal of Medicinal Chemistry*. 45 | -------------------------------------------------------------------------------- /examples/property_prediction/pubchem_aromaticity/configure.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | # SPDX-License-Identifier: Apache-2.0 5 | 6 | # graph construction 7 | from dgllife.utils import smiles_to_bigraph 8 | # general featurization 9 | from dgllife.utils import ConcatFeaturizer 10 | # node featurization 11 | from dgllife.utils import BaseAtomFeaturizer, atom_type_one_hot, atom_degree_one_hot, \ 12 | atom_formal_charge, atom_num_radical_electrons, \ 13 | atom_hybridization_one_hot, atom_total_num_H_one_hot 14 | # edge featurization 15 | from dgllife.utils.featurizers import BaseBondFeaturizer 16 | from functools import partial 17 | 18 | from utils import chirality 19 | 20 | attentivefp = { 21 | 'random_seed': 8, 22 | 'graph_feat_size': 200, 23 | 'num_layers': 2, 24 | 'num_timesteps': 2, 25 | 'node_feat_size': 39, 26 | 'edge_feat_size': 10, 27 | 'n_tasks': 1, 28 | 'dropout': 0.2, 29 | 'weight_decay': 10 ** (-5.0), 30 | 'lr': 10 ** (-2.5), 31 | 'batch_size': 128, 32 | 'num_epochs': 800, 33 | 'frac_train': 0.8, 34 | 'frac_val': 0.1, 35 | 'frac_test': 0.1, 36 | 'patience': 80, 37 | 'metric_name': 'rmse', 38 | 'mode': 'lower', 39 | 'smiles_to_graph': smiles_to_bigraph, 40 | # Follow the atom featurization in the original work 41 | 'node_featurizer': BaseAtomFeaturizer( 42 | featurizer_funcs={'hv': ConcatFeaturizer([ 43 | partial(atom_type_one_hot, allowable_set=[ 44 | 'B', 'C', 'N', 'O', 'F', 'Si', 'P', 'S', 'Cl', 'As', 'Se', 'Br', 'Te', 'I', 'At'], 45 | encode_unknown=True), 46 | partial(atom_degree_one_hot, allowable_set=list(range(6))), 47 | atom_formal_charge, atom_num_radical_electrons, 48 | partial(atom_hybridization_one_hot, encode_unknown=True), 49 | lambda atom: [0], # A placeholder for aromatic information, 50 | atom_total_num_H_one_hot, chirality 51 | ], 52 | )} 53 | ), 54 | 'edge_featurizer': BaseBondFeaturizer({ 55 | 'he': lambda bond: [0 for _ in range(10)] 56 | }) 57 | } 58 | 59 | experiment_configures = { 60 | 'AttentiveFP_Aromaticity': attentivefp 61 | } 62 | def get_exp_configure(exp_name): 63 | return experiment_configures[exp_name] 64 | -------------------------------------------------------------------------------- /examples/property_prediction/pubchem_aromaticity/utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | # SPDX-License-Identifier: Apache-2.0 5 | 6 | import dgl 7 | import numpy as np 8 | import random 9 | import torch 10 | 11 | from dgllife.utils import one_hot_encoding 12 | 13 | def set_random_seed(seed=0): 14 | """Set random seed. 15 | 16 | Parameters 17 | ---------- 18 | seed : int 19 | Random seed to use. Default to 0. 20 | """ 21 | random.seed(seed) 22 | np.random.seed(seed) 23 | torch.manual_seed(seed) 24 | if torch.cuda.is_available(): 25 | torch.cuda.manual_seed(seed) 26 | 27 | def chirality(atom): 28 | """Get Chirality information for an atom. 29 | 30 | Parameters 31 | ---------- 32 | atom : rdkit.Chem.rdchem.Atom 33 | RDKit atom instance. 34 | 35 | Returns 36 | ------- 37 | list of 3 boolean values 38 | The 3 boolean values separately indicate whether the atom 39 | has a chiral tag R, whether the atom has a chiral tag S and 40 | whether the atom is a possible chiral center. 41 | """ 42 | try: 43 | return one_hot_encoding(atom.GetProp('_CIPCode'), ['R', 'S']) + \ 44 | [atom.HasProp('_ChiralityPossible')] 45 | except: 46 | return [False, False] + [atom.HasProp('_ChiralityPossible')] 47 | 48 | def collate_molgraphs(data): 49 | """Batching a list of datapoints for dataloader. 50 | 51 | Parameters 52 | ---------- 53 | data : list of 4-tuples. 54 | Each tuple is for a single datapoint, consisting of 55 | a SMILES, a DGLGraph, all-task labels and a binary 56 | mask indicating the existence of labels. 57 | 58 | Returns 59 | ------- 60 | smiles : list 61 | List of smiles 62 | bg : DGLGraph 63 | The batched DGLGraph. 64 | labels : Tensor of dtype float32 and shape (B, T) 65 | Batched datapoint labels. B is len(data) and 66 | T is the number of total tasks. 67 | masks : Tensor of dtype float32 and shape (B, T) 68 | Batched datapoint binary mask, indicating the 69 | existence of labels. 70 | """ 71 | smiles, graphs, labels, masks = map(list, zip(*data)) 72 | 73 | bg = dgl.batch(graphs) 74 | bg.set_n_initializer(dgl.init.zero_initializer) 75 | bg.set_e_initializer(dgl.init.zero_initializer) 76 | labels = torch.stack(labels, dim=0) 77 | masks = torch.stack(masks, dim=0) 78 | 79 | return smiles, bg, labels, masks 80 | 81 | def load_model(args): 82 | if args['model'] == 'AttentiveFP': 83 | from dgllife.model import AttentiveFPPredictor 84 | model = AttentiveFPPredictor(node_feat_size=args['node_featurizer'].feat_size('hv'), 85 | edge_feat_size=args['edge_featurizer'].feat_size('he'), 86 | num_layers=args['num_layers'], 87 | num_timesteps=args['num_timesteps'], 88 | graph_feat_size=args['graph_feat_size'], 89 | n_tasks=args['n_tasks'], 90 | dropout=args['dropout']) 91 | 92 | return model 93 | -------------------------------------------------------------------------------- /examples/reaction_prediction/rexgen_direct/clean.sh: -------------------------------------------------------------------------------- 1 | rm -rf center_results 2 | rm -rf candidate_results 3 | rm -f train.bin 4 | rm -f train_valid_reactions.proc 5 | rm -f train_invalid_reactions.proc 6 | rm -f val.bin 7 | rm -f val_valid_reactions.proc 8 | rm -f val_invalid_reactions.proc 9 | rm -f test.bin 10 | rm -f test_valid_reactions.proc 11 | rm -f test_invalid_reactions.proc -------------------------------------------------------------------------------- /examples/reaction_prediction/rexgen_direct/configure.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | # SPDX-License-Identifier: Apache-2.0 5 | 6 | # Configuration for reaction center identification 7 | reaction_center_config = { 8 | 'batch_size': 20, 9 | 'hidden_size': 300, 10 | 'max_norm': 5.0, 11 | 'node_in_feats': 82, 12 | 'edge_in_feats': 6, 13 | 'node_pair_in_feats': 10, 14 | 'node_out_feats': 300, 15 | 'n_layers': 3, 16 | 'n_tasks': 5, 17 | 'lr': 0.001, 18 | 'num_epochs': 18, 19 | 'print_every': 50, 20 | 'decay_every': 10000, # Learning rate decay 21 | 'lr_decay_factor': 0.9, 22 | 'top_ks_val': [12, 16, 20, 40, 80], 23 | 'top_ks_test': [6, 8, 10], 24 | 'max_k': 80 25 | } 26 | 27 | # Configuration for candidate ranking 28 | candidate_ranking_config = { 29 | 'batch_size': 4, 30 | 'hidden_size': 500, 31 | 'num_encode_gnn_layers': 3, 32 | 'max_norm': 50.0, 33 | 'node_in_feats': 89, 34 | 'edge_in_feats': 5, 35 | 'lr': 0.001, 36 | 'num_epochs': 6, 37 | 'print_every': 20, 38 | 'decay_every': 100000, 39 | 'lr_decay_factor': 0.9, 40 | 'top_ks': [1, 2, 3, 5], 41 | 'max_k': 10, 42 | 'max_num_change_combos_per_reaction_train': 150, 43 | 'max_num_change_combos_per_reaction_eval': 1500, 44 | 'num_candidate_bond_changes': 16 45 | } 46 | candidate_ranking_config['max_norm'] = candidate_ranking_config['max_norm'] * \ 47 | candidate_ranking_config['batch_size'] 48 | -------------------------------------------------------------------------------- /python/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awslabs/dgl-lifesci/be8bc71d29ecf34a9dab7c7bd47c08f3383d9be0/python/__init__.py -------------------------------------------------------------------------------- /python/dgllife/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | # SPDX-License-Identifier: Apache-2.0 5 | # 6 | # DGL-based package for applications in life science. 7 | 8 | from .libinfo import __version__ 9 | from . import model 10 | 11 | try: 12 | import rdkit 13 | except ImportError: 14 | print('RDKit is not installed, which is required for utils related to cheminformatics') 15 | 16 | from . import data 17 | from . import utils 18 | -------------------------------------------------------------------------------- /python/dgllife/data/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | # SPDX-License-Identifier: Apache-2.0 5 | 6 | from .alchemy import * 7 | from .astrazeneca_chembl_solubility import * 8 | from .csv_dataset import * 9 | from .smiles_inference import * 10 | from .pdbbind import * 11 | from .pubchem_aromaticity import * 12 | from .tox21 import * 13 | from .esol import * 14 | from .freesolv import * 15 | from .lipophilicity import * 16 | from .pcba import * 17 | from .muv import * 18 | from .hiv import * 19 | from .bace import * 20 | from .bbbp import * 21 | from .toxcast import * 22 | from .sider import * 23 | from .clintox import * 24 | from .uspto import * 25 | from .jtvae import * 26 | -------------------------------------------------------------------------------- /python/dgllife/data/pubchem_aromaticity.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | # SPDX-License-Identifier: Apache-2.0 5 | # 6 | # Dataset for aromaticity prediction 7 | 8 | import pandas as pd 9 | 10 | from dgl.data.utils import get_download_dir, download, _get_dgl_url 11 | 12 | from .csv_dataset import MoleculeCSVDataset 13 | from ..utils.mol_to_graph import smiles_to_bigraph 14 | 15 | __all__ = ['PubChemBioAssayAromaticity'] 16 | 17 | class PubChemBioAssayAromaticity(MoleculeCSVDataset): 18 | """Subset of PubChem BioAssay Dataset for aromaticity prediction. 19 | 20 | The dataset was constructed in `Pushing the Boundaries of Molecular Representation for Drug 21 | Discovery with the Graph Attention Mechanism 22 | `__ and is accompanied by the task of predicting 23 | the number of aromatic atoms in molecules. 24 | 25 | The dataset was constructed by sampling 3945 molecules with 0-40 aromatic atoms from the 26 | PubChem BioAssay dataset. 27 | 28 | Parameters 29 | ---------- 30 | smiles_to_graph: callable, str -> DGLGraph 31 | A function turning a SMILES string into a DGLGraph. 32 | Default to :func:`dgllife.utils.smiles_to_bigraph`. 33 | node_featurizer : callable, rdkit.Chem.rdchem.Mol -> dict 34 | Featurization for nodes like atoms in a molecule, which can be used to update 35 | ndata for a DGLGraph. Default to None. 36 | edge_featurizer : callable, rdkit.Chem.rdchem.Mol -> dict 37 | Featurization for edges like bonds in a molecule, which can be used to update 38 | edata for a DGLGraph. Default to None. 39 | load : bool 40 | Whether to load the previously pre-processed dataset or pre-process from scratch. 41 | ``load`` should be False when we want to try different graph construction and 42 | featurization methods and need to pre-process from scratch. Default to False. 43 | log_every : bool 44 | Print a message every time ``log_every`` molecules are processed. Default to 1000. 45 | n_jobs : int 46 | The maximum number of concurrently running jobs for graph construction and featurization, 47 | using joblib backend. Default to 1. 48 | """ 49 | def __init__(self, smiles_to_graph=smiles_to_bigraph, node_featurizer=None, 50 | edge_featurizer=None, load=False, log_every=1000, n_jobs=1): 51 | self._url = 'dataset/pubchem_bioassay_aromaticity.csv' 52 | data_path = get_download_dir() + '/pubchem_bioassay_aromaticity.csv' 53 | download(_get_dgl_url(self._url), path=data_path, overwrite=False) 54 | df = pd.read_csv(data_path) 55 | 56 | super(PubChemBioAssayAromaticity, self).__init__( 57 | df, smiles_to_graph, node_featurizer, edge_featurizer, "cano_smiles", 58 | './pubchem_aromaticity_dglgraph.bin', load=load, log_every=log_every, n_jobs=n_jobs) 59 | -------------------------------------------------------------------------------- /python/dgllife/libinfo.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | # SPDX-License-Identifier: Apache-2.0 5 | # 6 | # Information for the library. 7 | 8 | # current version 9 | __version__ = '0.3.1' 10 | -------------------------------------------------------------------------------- /python/dgllife/model/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | # SPDX-License-Identifier: Apache-2.0 5 | 6 | from .gnn import * 7 | from .readout import * 8 | from .model_zoo import * 9 | from .pretrain import * 10 | -------------------------------------------------------------------------------- /python/dgllife/model/gnn/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | # SPDX-License-Identifier: Apache-2.0 5 | # 6 | # Graph neural networks for updating node representations 7 | 8 | from .attentivefp import * 9 | from .gat import * 10 | from .gatv2 import * 11 | from .gcn import * 12 | from .gin import * 13 | from .gnn_ogb import * 14 | from .nf import * 15 | from .graphsage import * 16 | from .mgcn import * 17 | from .mpnn import * 18 | from .schnet import * 19 | from .weave import * 20 | from .wln import * 21 | from .pagtn import * 22 | -------------------------------------------------------------------------------- /python/dgllife/model/model_zoo/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | # SPDX-License-Identifier: Apache-2.0 5 | # 6 | # Collection of model architectures 7 | 8 | # Molecular property prediction 9 | from .attentivefp_predictor import * 10 | from .gat_predictor import * 11 | from .gcn_predictor import * 12 | from .gin_predictor import * 13 | from .nf_predictor import * 14 | from .mgcn_predictor import * 15 | from .mlp_predictor import * 16 | from .mpnn_predictor import * 17 | from .schnet_predictor import * 18 | from .weave_predictor import * 19 | from .gnn_ogb_predictor import * 20 | from .pagtn_predictor import * 21 | 22 | # Generative models 23 | try: 24 | # DGMG requires RDKit support 25 | from .dgmg import * 26 | from .jtvae import * 27 | except ImportError: 28 | pass 29 | 30 | # Reaction prediction 31 | from .wln_reaction_center import * 32 | from .wln_reaction_ranking import * 33 | 34 | # Protein-Ligand Binding 35 | from .acnn import * 36 | from .potentialnet import * 37 | 38 | # Link prediction 39 | from .hadamard_link_predictor import * 40 | -------------------------------------------------------------------------------- /python/dgllife/model/model_zoo/hadamard_link_predictor.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | # SPDX-License-Identifier: Apache-2.0 5 | # 6 | # Predictor for link prediction by taking elementwise multiplication of node representations 7 | 8 | import torch.nn as nn 9 | import torch.nn.functional as F 10 | 11 | __all__ = ['HadamardLinkPredictor'] 12 | 13 | class HadamardLinkPredictor(nn.Module): 14 | """Link prediction by taking the elementwise multiplication of two node representations 15 | 16 | The elementwise multiplication is also called Hadamard product. 17 | 18 | Parameters 19 | ---------- 20 | in_feats : int 21 | Number of input node features 22 | hidden_feats : int 23 | Number of hidden features. Default to 256. 24 | num_layers : int 25 | Number of linear layers used in total, which should be 26 | at least 2, counting the input and output layers. Default to 3. 27 | n_tasks : int 28 | Number of output tasks. Default to 1. 29 | dropout : float 30 | Dropout before each linear layer except for the first one. 31 | Default to 0., i.e. no dropout is performed. 32 | activation : callable 33 | Activation function to apply after the output of each linear layer. 34 | Default to ReLU. 35 | """ 36 | def __init__(self, 37 | in_feats, 38 | hidden_feats=256, 39 | num_layers=3, 40 | n_tasks=1, 41 | dropout=0., 42 | activation=F.relu): 43 | super(HadamardLinkPredictor, self).__init__() 44 | 45 | assert num_layers >= 2, 'Expect num_layers to be at least 2, got {:d}'.format(num_layers) 46 | 47 | self.layers = nn.ModuleList() 48 | # input layer 49 | self.layers.append(nn.Linear(in_feats, hidden_feats)) 50 | # hidden layers 51 | for _ in range(num_layers - 2): 52 | self.layers.append(nn.Linear(hidden_feats, hidden_feats)) 53 | # output layer 54 | self.layers.append(nn.Linear(hidden_feats, n_tasks)) 55 | self.dropout = nn.Dropout(dropout) 56 | self.activation = activation 57 | 58 | def reset_parameters(self): 59 | # Reset the parameters of the Linear layers 60 | for layer in self.layers: 61 | layer.reset_parameters() 62 | 63 | def forward(self, left_node_feats, right_node_feats): 64 | """Link Prediction 65 | 66 | Perform link prediction for P pairs of nodes. Note 67 | that this model is symmetric and we don't have 68 | separate parameters for the two arguments. 69 | 70 | Parameters 71 | ---------- 72 | left_node_feats : float32 tensor of shape (P, D1) 73 | Representations for the first node in P pairs. 74 | D1 for the number of input node features. 75 | right_node_feats : float32 tensor of shape (P, D1) 76 | Representations for the second node in P pairs. 77 | D1 for the number of input node features. 78 | 79 | Returns 80 | ------- 81 | float32 tensor of shape (P, D2) 82 | Pre-softmax/sigmoid logits, D2 equals n_tasks. 83 | """ 84 | pair_feats = left_node_feats * right_node_feats 85 | for layer in self.layers[:-1]: 86 | pair_feats = layer(pair_feats) 87 | if self.activation is not None: 88 | pair_feats = self.activation(pair_feats) 89 | pair_feats = self.dropout(pair_feats) 90 | out = self.layers[-1](pair_feats) 91 | 92 | return out 93 | -------------------------------------------------------------------------------- /python/dgllife/model/model_zoo/mlp_predictor.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | # SPDX-License-Identifier: Apache-2.0 5 | # 6 | # MLP for prediction on the output of readout. 7 | # pylint: disable= no-member, arguments-differ, invalid-name 8 | 9 | import torch.nn as nn 10 | 11 | # pylint: disable=W0221 12 | class MLPPredictor(nn.Module): 13 | """Two-layer MLP for regression or soft classification 14 | over multiple tasks from graph representations. 15 | 16 | For classification tasks, the output will be logits, i.e. 17 | values before sigmoid or softmax. 18 | 19 | Parameters 20 | ---------- 21 | in_feats : int 22 | Number of input graph features 23 | hidden_feats : int 24 | Number of graph features in hidden layers 25 | n_tasks : int 26 | Number of tasks, which is also the output size. 27 | dropout : float 28 | The probability for dropout. Default to be 0., i.e. no 29 | dropout is performed. 30 | """ 31 | def __init__(self, in_feats, hidden_feats, n_tasks, dropout=0.): 32 | super(MLPPredictor, self).__init__() 33 | 34 | self.predict = nn.Sequential( 35 | nn.Dropout(dropout), 36 | nn.Linear(in_feats, hidden_feats), 37 | nn.ReLU(), 38 | nn.BatchNorm1d(hidden_feats), 39 | nn.Linear(hidden_feats, n_tasks) 40 | ) 41 | 42 | def forward(self, feats): 43 | """Make prediction. 44 | 45 | Parameters 46 | ---------- 47 | feats : FloatTensor of shape (B, M3) 48 | * B is the number of graphs in a batch 49 | * M3 is the input graph feature size, must match in_feats in initialization 50 | 51 | Returns 52 | ------- 53 | FloatTensor of shape (B, n_tasks) 54 | """ 55 | return self.predict(feats) 56 | -------------------------------------------------------------------------------- /python/dgllife/model/model_zoo/mpnn_predictor.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | # SPDX-License-Identifier: Apache-2.0 5 | # 6 | # MPNN 7 | # pylint: disable= no-member, arguments-differ, invalid-name 8 | 9 | import torch.nn as nn 10 | 11 | from dgl.nn.pytorch import Set2Set 12 | 13 | from ..gnn import MPNNGNN 14 | 15 | __all__ = ['MPNNPredictor'] 16 | 17 | # pylint: disable=W0221 18 | class MPNNPredictor(nn.Module): 19 | """MPNN for regression and classification on graphs. 20 | 21 | MPNN is introduced in `Neural Message Passing for Quantum Chemistry 22 | `__. 23 | 24 | Parameters 25 | ---------- 26 | node_in_feats : int 27 | Size for the input node features. 28 | edge_in_feats : int 29 | Size for the input edge features. 30 | node_out_feats : int 31 | Size for the output node representations. Default to 64. 32 | edge_hidden_feats : int 33 | Size for the hidden edge representations. Default to 128. 34 | n_tasks : int 35 | Number of tasks, which is also the output size. Default to 1. 36 | num_step_message_passing : int 37 | Number of message passing steps. Default to 6. 38 | num_step_set2set : int 39 | Number of set2set steps. Default to 6. 40 | num_layer_set2set : int 41 | Number of set2set layers. Default to 3. 42 | """ 43 | def __init__(self, 44 | node_in_feats, 45 | edge_in_feats, 46 | node_out_feats=64, 47 | edge_hidden_feats=128, 48 | n_tasks=1, 49 | num_step_message_passing=6, 50 | num_step_set2set=6, 51 | num_layer_set2set=3): 52 | super(MPNNPredictor, self).__init__() 53 | 54 | self.gnn = MPNNGNN(node_in_feats=node_in_feats, 55 | node_out_feats=node_out_feats, 56 | edge_in_feats=edge_in_feats, 57 | edge_hidden_feats=edge_hidden_feats, 58 | num_step_message_passing=num_step_message_passing) 59 | self.readout = Set2Set(input_dim=node_out_feats, 60 | n_iters=num_step_set2set, 61 | n_layers=num_layer_set2set) 62 | self.predict = nn.Sequential( 63 | nn.Linear(2 * node_out_feats, node_out_feats), 64 | nn.ReLU(), 65 | nn.Linear(node_out_feats, n_tasks) 66 | ) 67 | 68 | def forward(self, g, node_feats, edge_feats): 69 | """Graph-level regression/soft classification. 70 | 71 | Parameters 72 | ---------- 73 | g : DGLGraph 74 | DGLGraph for a batch of graphs. 75 | node_feats : float32 tensor of shape (V, node_in_feats) 76 | Input node features. 77 | edge_feats : float32 tensor of shape (E, edge_in_feats) 78 | Input edge features. 79 | 80 | Returns 81 | ------- 82 | float32 tensor of shape (G, n_tasks) 83 | Prediction for the graphs in the batch. G for the number of graphs. 84 | """ 85 | node_feats = self.gnn(g, node_feats, edge_feats) 86 | graph_feats = self.readout(g, node_feats) 87 | return self.predict(graph_feats) 88 | -------------------------------------------------------------------------------- /python/dgllife/model/model_zoo/pagtn_predictor.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | # SPDX-License-Identifier: Apache-2.0 5 | # 6 | # Path-Augmented Graph Transformer Network 7 | # pylint: disable= no-member, arguments-differ, invalid-name 8 | 9 | import torch 10 | import torch.nn as nn 11 | from ..gnn import PAGTNGNN 12 | from ..readout import MLPNodeReadout 13 | 14 | 15 | class PAGTNPredictor(nn.Module): 16 | """PAGTN model for regression and classification on graphs. 17 | 18 | PAGTN is introduced in `Path-Augmented Graph Transformer Network 19 | `__. 20 | 21 | Parameters 22 | ---------- 23 | node_in_feats : int 24 | Size for the input node features. 25 | node_out_feats : int 26 | Size for the output node features in PAGTN layers. 27 | node_hid_feats : int 28 | Size for the hidden node features in PAGTN layers. 29 | edge_feats : int 30 | Size for the input edge features. 31 | depth : int 32 | Number of PAGTN layers to be applied. 33 | nheads : int 34 | Number of attention heads. 35 | dropout : float 36 | The probability for performing dropout. Default to 0.1 37 | activation : callable 38 | Activation function to apply. Default to LeakyReLU. 39 | n_tasks : int 40 | Number of tasks, which is also the output size. Default to 1. 41 | mode : 'max' or 'mean' or 'sum' 42 | Whether to compute elementwise maximum, mean or sum of the node representations. 43 | 44 | """ 45 | def __init__(self, 46 | node_in_feats, 47 | node_out_feats, 48 | node_hid_feats, 49 | edge_feats, 50 | depth=5, 51 | nheads=1, 52 | dropout=0.1, 53 | activation=nn.LeakyReLU(0.2), 54 | n_tasks=1, 55 | mode='sum'): 56 | super(PAGTNPredictor, self).__init__() 57 | self.model = PAGTNGNN(node_in_feats, node_out_feats, 58 | node_hid_feats, edge_feats, 59 | depth, nheads, dropout, activation) 60 | self.readout = MLPNodeReadout(node_out_feats + node_in_feats, 61 | node_out_feats, 62 | n_tasks, 63 | mode=mode) 64 | 65 | def forward(self, g, node_feats, edge_feats): 66 | """Graph-level regression/soft classification. 67 | 68 | Parameters 69 | ---------- 70 | g : DGLGraph 71 | DGLGraph for a batch of graphs 72 | node_feats : float32 tensor of shape (V, node_in_feats) 73 | Input node features. V for the number of nodes in the batch of graphs. 74 | edge_feats : float32 tensor of shape (E, edge_in_feats) 75 | Input edge features. E for the number of edges in the batch of graphs. 76 | 77 | Returns 78 | ------- 79 | float32 tensor of shape (V, node_out_feats) 80 | Updated node features. 81 | """ 82 | 83 | atom_h = self.model(g, node_feats, edge_feats) 84 | atom_h = torch.cat([atom_h, node_feats], dim=1) 85 | return self.readout(g, atom_h) 86 | -------------------------------------------------------------------------------- /python/dgllife/model/model_zoo/schnet_predictor.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | # SPDX-License-Identifier: Apache-2.0 5 | # 6 | # SchNet 7 | # pylint: disable= no-member, arguments-differ, invalid-name 8 | import torch.nn as nn 9 | 10 | from dgl.nn.pytorch.conv.cfconv import ShiftedSoftplus 11 | 12 | from ..gnn import SchNetGNN 13 | from ..readout import MLPNodeReadout 14 | 15 | __all__ = ['SchNetPredictor'] 16 | 17 | # pylint: disable=W0221 18 | class SchNetPredictor(nn.Module): 19 | """SchNet for regression and classification on graphs. 20 | 21 | SchNet is introduced in `SchNet: A continuous-filter convolutional neural network for 22 | modeling quantum interactions `__. 23 | 24 | Parameters 25 | ---------- 26 | node_feats : int 27 | Size for node representations to learn. Default to 64. 28 | hidden_feats : list of int 29 | ``hidden_feats[i]`` gives the size of hidden representations for the i-th interaction 30 | (gnn) layer. ``len(hidden_feats)`` equals the number of interaction (gnn) layers. 31 | Default to ``[64, 64, 64]``. 32 | classifier_hidden_feats : int 33 | (Deprecated, see ``predictor_hidden_feats``) Size for hidden representations in the 34 | classifier. Default to 64. 35 | n_tasks : int 36 | Number of tasks, which is also the output size. Default to 1. 37 | num_node_types : int 38 | Number of node types to embed. Default to 100. 39 | cutoff : float 40 | Largest center in RBF expansion. Default to 30. 41 | gap : float 42 | Difference between two adjacent centers in RBF expansion. Default to 0.1. 43 | predictor_hidden_feats : int 44 | Size for hidden representations in the output MLP predictor. Default to 64. 45 | """ 46 | def __init__(self, node_feats=64, hidden_feats=None, classifier_hidden_feats=64, n_tasks=1, 47 | num_node_types=100, cutoff=30., gap=0.1, predictor_hidden_feats=64): 48 | super(SchNetPredictor, self).__init__() 49 | 50 | if predictor_hidden_feats == 64 and classifier_hidden_feats != 64: 51 | print('classifier_hidden_feats is deprecated and will be removed in the future, ' 52 | 'use predictor_hidden_feats instead') 53 | predictor_hidden_feats = classifier_hidden_feats 54 | 55 | self.gnn = SchNetGNN(node_feats, hidden_feats, num_node_types, cutoff, gap) 56 | self.readout = MLPNodeReadout(node_feats, predictor_hidden_feats, n_tasks, 57 | activation=ShiftedSoftplus()) 58 | 59 | def forward(self, g, node_types, edge_dists): 60 | """Graph-level regression/soft classification. 61 | 62 | Parameters 63 | ---------- 64 | g : DGLGraph 65 | DGLGraph for a batch of graphs. 66 | node_types : int64 tensor of shape (V) 67 | Node types to embed, V for the number of nodes. 68 | edge_dists : float32 tensor of shape (E, 1) 69 | Distances between end nodes of edges, E for the number of edges. 70 | 71 | Returns 72 | ------- 73 | float32 tensor of shape (G, n_tasks) 74 | Prediction for the graphs in the batch. G for the number of graphs. 75 | """ 76 | node_feats = self.gnn(g, node_types, edge_dists) 77 | return self.readout(g, node_feats) 78 | -------------------------------------------------------------------------------- /python/dgllife/model/pretrain/generative_models.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | # SPDX-License-Identifier: Apache-2.0 5 | # 6 | # pylint: disable= no-member, arguments-differ, invalid-name 7 | # 8 | # Pre-trained generative models. 9 | 10 | __all__ = ['generative_url', 11 | 'create_generative_model'] 12 | 13 | generative_url = { 14 | 'DGMG_ChEMBL_canonical': 'pre_trained/dgmg_ChEMBL_canonical.pth', 15 | 'DGMG_ChEMBL_random': 'pre_trained/dgmg_ChEMBL_random.pth', 16 | 'DGMG_ZINC_canonical': 'pre_trained/dgmg_ZINC_canonical.pth', 17 | 'DGMG_ZINC_random': 'pre_trained/dgmg_ZINC_random.pth', 18 | # JTVAE pre-trained on ZINC without KL regularization 19 | 'JTVAE_ZINC_no_kl': 'pre_trained/jtvae_ZINC_no_kl.pth' 20 | } 21 | 22 | try: 23 | # Things requiring RDKit 24 | from rdkit import Chem 25 | from ...model import DGMG, JTNNVAE 26 | from ...utils import JTVAEVocab 27 | except ImportError: 28 | pass 29 | 30 | def create_generative_model(model_name): 31 | """Create a model. 32 | 33 | Parameters 34 | ---------- 35 | model_name : str 36 | Name for the model. 37 | 38 | Returns 39 | ------- 40 | Created model 41 | """ 42 | if model_name.startswith('DGMG'): 43 | if model_name.startswith('DGMG_ChEMBL'): 44 | atom_types = ['O', 'Cl', 'C', 'S', 'F', 'Br', 'N'] 45 | elif model_name.startswith('DGMG_ZINC'): 46 | atom_types = ['Br', 'S', 'C', 'P', 'N', 'O', 'F', 'Cl', 'I'] 47 | bond_types = [Chem.rdchem.BondType.SINGLE, 48 | Chem.rdchem.BondType.DOUBLE, 49 | Chem.rdchem.BondType.TRIPLE] 50 | 51 | return DGMG(atom_types=atom_types, 52 | bond_types=bond_types, 53 | node_hidden_size=128, 54 | num_prop_rounds=2, 55 | dropout=0.2) 56 | 57 | elif model_name.startswith('JTVAE'): 58 | vocab = JTVAEVocab() 59 | return JTNNVAE(vocab=vocab, 60 | hidden_size=450, 61 | latent_size=56, 62 | depth=3) 63 | 64 | else: 65 | return None 66 | -------------------------------------------------------------------------------- /python/dgllife/model/pretrain/moleculenet/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | # SPDX-License-Identifier: Apache-2.0 5 | # 6 | # pylint: disable= no-member, arguments-differ, invalid-name 7 | # 8 | # Index for pre-trained models on MoleculeNet 9 | 10 | from .bace import * 11 | from .bbbp import * 12 | from .clintox import * 13 | from .esol import * 14 | from .freesolv import * 15 | from .hiv import * 16 | from .lipophilicity import * 17 | from .pcba import * 18 | from .muv import * 19 | from .sider import * 20 | from .tox21 import * 21 | from .toxcast import * 22 | 23 | __all__ = ['moleculenet_url', 24 | 'create_moleculenet_model'] 25 | 26 | moleculenet_url = {**bace_url, **bbbp_url, **clintox_url, **esol_url, **freesolv_url, **hiv_url, 27 | **lipophilicity_url, **muv_url, **pcba_url, **sider_url, **tox21_url, 28 | **toxcast_url} 29 | 30 | def create_moleculenet_model(model_name): 31 | """Create a model. 32 | 33 | Parameters 34 | ---------- 35 | model_name : str 36 | Name for the model. 37 | 38 | Returns 39 | ------- 40 | Created model 41 | """ 42 | for func in [create_bace_model, create_bbbp_model, create_clintox_model, create_esol_model, 43 | create_freesolv_model, create_hiv_model, create_lipophilicity_model, 44 | create_muv_model, create_pcba_model, create_sider_model, create_tox21_model, 45 | create_toxcast_model]: 46 | model = func(model_name) 47 | if model is not None: 48 | return model 49 | return None 50 | -------------------------------------------------------------------------------- /python/dgllife/model/pretrain/property_prediction.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | # SPDX-License-Identifier: Apache-2.0 5 | # 6 | # pylint: disable= no-member, arguments-differ, invalid-name 7 | # 8 | # Pre-trained models for molecular property prediction 9 | 10 | from ..gnn.gin import GIN 11 | from ..model_zoo.attentivefp_predictor import AttentiveFPPredictor 12 | 13 | __all__ = ['property_url', 14 | 'create_property_model'] 15 | 16 | property_url = { 17 | 'AttentiveFP_Aromaticity': 'dgllife/pre_trained/attentivefp_aromaticity.pth', 18 | 'gin_supervised_contextpred': 'dgllife/pre_trained/gin_supervised_contextpred.pth', 19 | 'gin_supervised_infomax': 'dgllife/pre_trained/gin_supervised_infomax.pth', 20 | 'gin_supervised_edgepred': 'dgllife/pre_trained/gin_supervised_edgepred.pth', 21 | 'gin_supervised_masking': 'dgllife/pre_trained/gin_supervised_masking.pth' 22 | } 23 | 24 | def create_property_model(model_name): 25 | """Create a model. 26 | 27 | Parameters 28 | ---------- 29 | model_name : str 30 | Name for the model. 31 | 32 | Returns 33 | ------- 34 | Created model 35 | """ 36 | if model_name == 'AttentiveFP_Aromaticity': 37 | return AttentiveFPPredictor(node_feat_size=39, 38 | edge_feat_size=10, 39 | num_layers=2, 40 | num_timesteps=2, 41 | graph_feat_size=200, 42 | n_tasks=1, 43 | dropout=0.2) 44 | 45 | elif model_name in ['gin_supervised_contextpred', 'gin_supervised_infomax', 46 | 'gin_supervised_edgepred', 'gin_supervised_masking']: 47 | return GIN(num_node_emb_list=[120, 3], 48 | num_edge_emb_list=[6, 3], 49 | num_layers=5, 50 | emb_dim=300, 51 | JK='last', 52 | dropout=0.5) 53 | 54 | else: 55 | return None 56 | -------------------------------------------------------------------------------- /python/dgllife/model/pretrain/reaction.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | # SPDX-License-Identifier: Apache-2.0 5 | # 6 | # pylint: disable= no-member, arguments-differ, invalid-name 7 | # 8 | # Pre-trained models for reaction prediction 9 | 10 | from ..model_zoo.wln_reaction_center import WLNReactionCenter 11 | from ..model_zoo.wln_reaction_ranking import WLNReactionRanking 12 | 13 | __all__ = ['reaction_url', 14 | 'create_reaction_model'] 15 | 16 | reaction_url = { 17 | 'wln_center_uspto': 'dgllife/pre_trained/wln_center_uspto_v3.pth', 18 | 'wln_rank_uspto': 'dgllife/pre_trained/wln_rank_uspto.pth' 19 | } 20 | 21 | def create_reaction_model(model_name): 22 | """Create a model. 23 | 24 | Parameters 25 | ---------- 26 | model_name : str 27 | Name for the model. 28 | 29 | Returns 30 | ------- 31 | Created model 32 | """ 33 | if model_name == 'wln_center_uspto': 34 | return WLNReactionCenter(node_in_feats=82, 35 | edge_in_feats=6, 36 | node_pair_in_feats=10, 37 | node_out_feats=300, 38 | n_layers=3, 39 | n_tasks=5) 40 | 41 | elif model_name == 'wln_rank_uspto': 42 | return WLNReactionRanking(node_in_feats=89, 43 | edge_in_feats=5, 44 | node_hidden_feats=500, 45 | num_encode_gnn_layers=3) 46 | 47 | else: 48 | return None 49 | -------------------------------------------------------------------------------- /python/dgllife/model/readout/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | # SPDX-License-Identifier: Apache-2.0 5 | # 6 | # Readout functions for computing molecular representations 7 | # out of node and edge representations. 8 | 9 | from .attentivefp_readout import * 10 | from .mlp_readout import * 11 | from .sum_and_max import * 12 | from .weave_readout import * 13 | from .weighted_sum_and_max import * 14 | -------------------------------------------------------------------------------- /python/dgllife/model/readout/mlp_readout.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | # SPDX-License-Identifier: Apache-2.0 5 | # 6 | # pylint: disable= no-member, arguments-differ, invalid-name 7 | 8 | import dgl 9 | import torch.nn as nn 10 | 11 | __all__ = ['MLPNodeReadout'] 12 | 13 | # pylint: disable=W0221 14 | class MLPNodeReadout(nn.Module): 15 | """MLP-based Readout. 16 | 17 | This layer updates node representations with a MLP and computes graph representations 18 | out of node representations with max, mean or sum. 19 | 20 | Parameters 21 | ---------- 22 | node_feats : int 23 | Size for the input node features. 24 | hidden_feats : int 25 | Size for the hidden representations. 26 | graph_feats : int 27 | Size for the output graph representations. 28 | activation : callable 29 | Activation function. Default to None. 30 | mode : 'max' or 'mean' or 'sum' 31 | Whether to compute elementwise maximum, mean or sum of the node representations. 32 | """ 33 | def __init__(self, node_feats, hidden_feats, graph_feats, activation=None, mode='sum'): 34 | super(MLPNodeReadout, self).__init__() 35 | 36 | assert mode in ['max', 'mean', 'sum'], \ 37 | "Expect mode to be 'max' or 'mean' or 'sum', got {}".format(mode) 38 | self.mode = mode 39 | self.in_project = nn.Linear(node_feats, hidden_feats) 40 | self.activation = activation 41 | self.out_project = nn.Linear(hidden_feats, graph_feats) 42 | 43 | def forward(self, g, node_feats): 44 | """Computes graph representations out of node features. 45 | 46 | Parameters 47 | ---------- 48 | g : DGLGraph 49 | DGLGraph for a batch of graphs. 50 | node_feats : float32 tensor of shape (V, node_feats) 51 | Input node features, V for the number of nodes. 52 | 53 | Returns 54 | ------- 55 | graph_feats : float32 tensor of shape (G, graph_feats) 56 | Graph representations computed. G for the number of graphs. 57 | """ 58 | node_feats = self.in_project(node_feats) 59 | if self.activation is not None: 60 | node_feats = self.activation(node_feats) 61 | node_feats = self.out_project(node_feats) 62 | 63 | with g.local_scope(): 64 | g.ndata['h'] = node_feats 65 | if self.mode == 'max': 66 | graph_feats = dgl.max_nodes(g, 'h') 67 | elif self.mode == 'mean': 68 | graph_feats = dgl.mean_nodes(g, 'h') 69 | elif self.mode == 'sum': 70 | graph_feats = dgl.sum_nodes(g, 'h') 71 | 72 | return graph_feats 73 | -------------------------------------------------------------------------------- /python/dgllife/model/readout/sum_and_max.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | # SPDX-License-Identifier: Apache-2.0 5 | # 6 | # Apply sum and max pooling to the node representations and concatenate the results. 7 | # pylint: disable= no-member, arguments-differ, invalid-name 8 | 9 | import dgl 10 | import torch 11 | 12 | __all__ = ['SumAndMax'] 13 | 14 | # pylint: disable=W0221, W0622 15 | class SumAndMax(object): 16 | r"""Apply sum and max pooling to the node 17 | representations and concatenate the results. 18 | """ 19 | def __init__(self): 20 | pass 21 | 22 | def forward(self, bg, feats): 23 | """Readout 24 | 25 | Parameters 26 | ---------- 27 | bg : DGLGraph 28 | DGLGraph for a batch of graphs. 29 | feats : FloatTensor of shape (N, M1) 30 | * N is the total number of nodes in the batch of graphs 31 | * M1 is the input node feature size. 32 | 33 | Returns 34 | ------- 35 | h_g : FloatTensor of shape (B, 2 * M1) 36 | * B is the number of graphs in the batch 37 | """ 38 | with bg.local_scope(): 39 | bg.ndata['h'] = feats 40 | h_g_sum = dgl.sum_nodes(bg, 'h') 41 | h_g_max = dgl.max_nodes(bg, 'h') 42 | h_g = torch.cat([h_g_sum, h_g_max], dim=1) 43 | return h_g 44 | 45 | def __call__(self, *input, **kwargs): 46 | return self.forward(*input, **kwargs) 47 | -------------------------------------------------------------------------------- /python/dgllife/model/readout/weighted_sum_and_max.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | # SPDX-License-Identifier: Apache-2.0 5 | # 6 | # Apply weighted sum and max pooling to the node representations and concatenate the results. 7 | # pylint: disable= no-member, arguments-differ, invalid-name 8 | 9 | import dgl 10 | import torch 11 | import torch.nn as nn 12 | 13 | from dgl.nn.pytorch import WeightAndSum 14 | 15 | __all__ = ['WeightedSumAndMax'] 16 | 17 | # pylint: disable=W0221 18 | class WeightedSumAndMax(nn.Module): 19 | r"""Apply weighted sum and max pooling to the node 20 | representations and concatenate the results. 21 | 22 | Parameters 23 | ---------- 24 | in_feats : int 25 | Input node feature size 26 | """ 27 | def __init__(self, in_feats): 28 | super(WeightedSumAndMax, self).__init__() 29 | 30 | self.weight_and_sum = WeightAndSum(in_feats) 31 | 32 | def forward(self, bg, feats): 33 | """Readout 34 | 35 | Parameters 36 | ---------- 37 | bg : DGLGraph 38 | DGLGraph for a batch of graphs. 39 | feats : FloatTensor of shape (N, M1) 40 | * N is the total number of nodes in the batch of graphs 41 | * M1 is the input node feature size, which must match 42 | in_feats in initialization 43 | 44 | Returns 45 | ------- 46 | h_g : FloatTensor of shape (B, 2 * M1) 47 | * B is the number of graphs in the batch 48 | * M1 is the input node feature size, which must match 49 | in_feats in initialization 50 | """ 51 | h_g_sum = self.weight_and_sum(bg, feats) 52 | with bg.local_scope(): 53 | bg.ndata['h'] = feats 54 | h_g_max = dgl.max_nodes(bg, 'h') 55 | h_g = torch.cat([h_g_sum, h_g_max], dim=1) 56 | return h_g 57 | -------------------------------------------------------------------------------- /python/dgllife/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | # SPDX-License-Identifier: Apache-2.0 5 | 6 | from .analysis import * 7 | from .complex_to_graph import * 8 | from .early_stop import * 9 | from .eval import * 10 | from .featurizers import * 11 | from .io import * 12 | from .jtvae import * 13 | from .mol_to_graph import * 14 | from .splitters import * 15 | -------------------------------------------------------------------------------- /python/dgllife/utils/jtvae/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | # SPDX-License-Identifier: Apache-2.0 5 | # 6 | # pylint: disable= no-member, arguments-differ, invalid-name 7 | # 8 | # Utils in JTVAE 9 | 10 | from .vocab import * 11 | -------------------------------------------------------------------------------- /python/setup.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | # SPDX-License-Identifier: Apache-2.0 5 | 6 | import os 7 | 8 | from setuptools import find_packages 9 | from setuptools import setup 10 | 11 | CURRENT_DIR = os.path.dirname(__file__) 12 | 13 | def get_lib_path(): 14 | """Get library path, name and version""" 15 | # We can not import `libinfo.py` in setup.py directly since __init__.py 16 | # Will be invoked which introduces dependences 17 | libinfo_py = os.path.join(CURRENT_DIR, './dgllife/libinfo.py') 18 | libinfo = {'__file__': libinfo_py} 19 | exec(compile(open(libinfo_py, "rb").read(), libinfo_py, 'exec'), libinfo, libinfo) 20 | version = libinfo['__version__'] 21 | 22 | return version 23 | 24 | VERSION = get_lib_path() 25 | 26 | setup( 27 | name='dgllife', 28 | version=VERSION, 29 | description='DGL-based package for Life Science', 30 | keywords=[ 31 | 'pytorch', 32 | 'dgl', 33 | 'graph-neural-networks', 34 | 'life-science', 35 | 'drug-discovery' 36 | ], 37 | maintainer='DGL Team', 38 | packages=[package for package in find_packages() 39 | if package.startswith('dgllife')], 40 | install_requires=[ 41 | 'scikit-learn>=0.22.2', 42 | 'pandas', 43 | 'requests>=2.22.0', 44 | 'tqdm', 45 | 'numpy>=1.14.0', 46 | 'scipy>=1.1.0', 47 | 'networkx>=2.1', 48 | 'hyperopt', 49 | 'joblib' 50 | ], 51 | url='https://github.com/awslabs/dgl-lifesci', 52 | classifiers=[ 53 | 'Development Status :: 3 - Alpha', 54 | 'Programming Language :: Python :: 3', 55 | 'License :: OSI Approved :: Apache Software License' 56 | ], 57 | license='APACHE' 58 | ) 59 | -------------------------------------------------------------------------------- /readthedocs.yml: -------------------------------------------------------------------------------- 1 | build: 2 | image: latest 3 | 4 | formats: [] 5 | 6 | python: 7 | version: 3.7 8 | use_system_site_packages: true 9 | setup_py_install: false 10 | -------------------------------------------------------------------------------- /tests/model/test_binding_affinity.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | # SPDX-License-Identifier: Apache-2.0 5 | 6 | import dgl 7 | import os 8 | import shutil 9 | import torch 10 | 11 | from dgl.data.utils import _get_dgl_url, download, extract_archive 12 | 13 | from dgllife.model.model_zoo.acnn import ACNN 14 | from dgllife.utils.complex_to_graph import ACNN_graph_construction_and_featurization 15 | from dgllife.utils.io import load_molecule 16 | 17 | def remove_dir(dir): 18 | if os.path.isdir(dir): 19 | try: 20 | shutil.rmtree(dir) 21 | except OSError: 22 | pass 23 | 24 | def test_acnn(): 25 | remove_dir('tmp1') 26 | remove_dir('tmp2') 27 | 28 | url = _get_dgl_url('dgllife/example_mols.tar.gz') 29 | local_path = 'tmp1/example_mols.tar.gz' 30 | download(url, path=local_path) 31 | extract_archive(local_path, 'tmp2') 32 | 33 | pocket_mol, pocket_coords = load_molecule( 34 | 'tmp2/example_mols/example.pdb', remove_hs=True) 35 | ligand_mol, ligand_coords = load_molecule( 36 | 'tmp2/example_mols/example.pdbqt', remove_hs=True) 37 | 38 | remove_dir('tmp1') 39 | remove_dir('tmp2') 40 | 41 | if torch.cuda.is_available(): 42 | device = torch.device('cuda:0') 43 | else: 44 | device = torch.device('cpu') 45 | 46 | g1 = ACNN_graph_construction_and_featurization(ligand_mol, 47 | pocket_mol, 48 | ligand_coords, 49 | pocket_coords) 50 | 51 | model = ACNN() 52 | model.to(device) 53 | g1 = g1.to(device) 54 | assert model(g1).shape == torch.Size([1, 1]) 55 | 56 | bg = dgl.batch([g1, g1]) 57 | bg = bg.to(device) 58 | assert model(bg).shape == torch.Size([2, 1]) 59 | 60 | model = ACNN(hidden_sizes=[1, 2], 61 | weight_init_stddevs=[1, 1], 62 | dropouts=[0.1, 0.], 63 | features_to_use=torch.tensor([6., 8.]), 64 | radial=[[12.0], [0.0, 2.0], [4.0]]) 65 | model.to(device) 66 | g1 = g1.to(device) 67 | assert model(g1).shape == torch.Size([1, 1]) 68 | 69 | bg = dgl.batch([g1, g1]) 70 | bg = bg.to(device) 71 | assert model(bg).shape == torch.Size([2, 1]) 72 | 73 | if __name__ == '__main__': 74 | test_acnn() 75 | -------------------------------------------------------------------------------- /tests/model/test_generative_models.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | # SPDX-License-Identifier: Apache-2.0 5 | 6 | from rdkit import Chem 7 | 8 | from dgllife.model import DGMG, JTNNVAE 9 | from dgllife.utils import JTVAEVocab 10 | 11 | def test_dgmg(): 12 | model = DGMG(atom_types=['O', 'Cl', 'C', 'S', 'F', 'Br', 'N'], 13 | bond_types=[Chem.rdchem.BondType.SINGLE, 14 | Chem.rdchem.BondType.DOUBLE, 15 | Chem.rdchem.BondType.TRIPLE], 16 | node_hidden_size=1, 17 | num_prop_rounds=1, 18 | dropout=0.2) 19 | assert model( 20 | actions=[(0, 2), (1, 3), (0, 0), (1, 0), (2, 0), (1, 3), (0, 7)], rdkit_mol=True) == 'CO' 21 | assert model(rdkit_mol=False) is None 22 | model.eval() 23 | assert model(rdkit_mol=True) is not None 24 | 25 | model = DGMG(atom_types=['O', 'Cl', 'C', 'S', 'F', 'Br', 'N'], 26 | bond_types=[Chem.rdchem.BondType.SINGLE, 27 | Chem.rdchem.BondType.DOUBLE, 28 | Chem.rdchem.BondType.TRIPLE]) 29 | assert model( 30 | actions=[(0, 2), (1, 3), (0, 0), (1, 0), (2, 0), (1, 3), (0, 7)], rdkit_mol=True) == 'CO' 31 | assert model(rdkit_mol=False) is None 32 | model.eval() 33 | assert model(rdkit_mol=True) is not None 34 | 35 | def test_jtvae(): 36 | vocab = JTVAEVocab() 37 | model = JTNNVAE(vocab, 38 | hidden_size=1, 39 | latent_size=2, 40 | depth=3) 41 | 42 | if __name__ == '__main__': 43 | test_dgmg() 44 | -------------------------------------------------------------------------------- /tests/model/test_link_prediction.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | # SPDX-License-Identifier: Apache-2.0 5 | 6 | import torch 7 | 8 | from dgllife.model import HadamardLinkPredictor 9 | 10 | def test_hadamard_link_predictor(): 11 | if torch.cuda.is_available(): 12 | device = torch.device('cuda:0') 13 | else: 14 | device = torch.device('cpu') 15 | 16 | num_pairs = 4 17 | in_feats = 2 18 | model = HadamardLinkPredictor(in_feats=in_feats, hidden_feats=3, num_layers=3).to(device) 19 | left_node_feats = torch.randn(num_pairs, in_feats).to(device) 20 | right_node_feats = torch.randn(num_pairs, in_feats).to(device) 21 | assert model(left_node_feats, right_node_feats).shape == torch.Size([num_pairs, 1]) 22 | 23 | if __name__ == '__main__': 24 | test_hadamard_link_predictor() 25 | -------------------------------------------------------------------------------- /tests/scripts/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Argument 4 | # - dev: cpu or gpu 5 | if [ $# -ne 1 ]; then 6 | echo "Device argument required, can be cpu or gpu" 7 | exit -1 8 | fi 9 | 10 | dev=$1 11 | 12 | set -e 13 | . /opt/conda/etc/profile.d/conda.sh 14 | 15 | rm -rf _deps 16 | conda activate "pytorch-ci" 17 | 18 | if [ "$dev" == "gpu" ]; then 19 | pushd python 20 | python3 setup.py install 21 | else 22 | pushd python 23 | python3 setup.py install 24 | fi 25 | popd -------------------------------------------------------------------------------- /tests/scripts/task_lint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Adapted from github.com/dmlc/dgl/tests/scripts/task_lint.sh 3 | 4 | # pylint 5 | echo 'Checking code style of python codes...' 6 | python3 -m pylint --reports=y -v --rcfile=tests/lint/pylintrc python/dgllife || exit 1 -------------------------------------------------------------------------------- /tests/scripts/task_unit_test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | . /opt/conda/etc/profile.d/conda.sh 4 | 5 | function fail { 6 | echo FAIL: $@ 7 | exit -1 8 | } 9 | 10 | function usage { 11 | echo "Usage: $0 backend device" 12 | } 13 | 14 | if [ $# -ne 2 ]; then 15 | usage 16 | fail "Error: must specify backend and device" 17 | fi 18 | 19 | export DGLBACKEND=$1 20 | export DGL_DOWNLOAD_DIR=${PWD} 21 | export PYTHONPATH=${PWD}/python:$PYTHONPATH 22 | 23 | conda activate ${DGLBACKEND}-ci 24 | 25 | if [ $2 == "gpu" ] 26 | then 27 | export CUDA_VISIBLE_DEVICES=0 28 | pip uninstall -y dgl-cu101 29 | pip install --pre dgl-cu101 30 | else 31 | export CUDA_VISIBLE_DEVICES=-1 32 | pip uninstall -y dgl 33 | pip install --pre dgl 34 | fi 35 | 36 | export LD_LIBRARY_PATH=$CONDA_PREFIX/lib 37 | python -m pytest -v -o junit_family=xunit1 --junitxml=pytest_data.xml --disable-pytest-warnings tests/data || fail "data" 38 | python -m pytest -v -o junit_family=xunit1 --junitxml=pytest_utils.xml --disable-pytest-warnings tests/utils || fail "utils" 39 | python -m pytest -v -o junit_family=xunit1 --junitxml=pytest_model.xml --disable-pytest-warnings tests/model || fail "model" -------------------------------------------------------------------------------- /tests/utils/test_analysis.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | # SPDX-License-Identifier: Apache-2.0 5 | 6 | import os 7 | from dgllife.utils import analyze_mols 8 | 9 | def remove_file(fname): 10 | if os.path.isfile(fname): 11 | try: 12 | os.remove(fname) 13 | except OSError: 14 | pass 15 | 16 | def test_analyze_mols(): 17 | smiles = ['CCO', 'CC1([C@@H](N2[C@H](S1)[C@@H](C2=O)NC(=O)CC3=CC=CC=C3)C(=O)O)C', '1'] 18 | results = analyze_mols(smiles, path_to_export='.') 19 | assert results['num_atoms'] == [3, 23] 20 | assert results['num_bonds'] == [2, 25] 21 | assert results['num_rings'] == [0, 3] 22 | assert results['num_input_mols'] == 3 23 | assert results['num_valid_mols'] == 2 24 | assert results['valid_proportion'] == 0.6666666666666666 25 | assert results['cano_smi'] == [ 26 | 'CCO', 'CC1(C)S[C@@H]2[C@H](NC(=O)Cc3ccccc3)C(=O)N2[C@H]1C(=O)O'] 27 | assert results['atom_type_frequency'] == {'O': 2, 'C': 2, 'N': 1, 'S': 1} 28 | assert results['degree_frequency'] == {1: 2, 2: 2, 3: 1, 4: 1} 29 | assert results['total_degree_frequency'] == {2: 2, 4: 2, 1: 1, 3: 1} 30 | assert results['explicit_valence_frequency'] == {1: 2, 2: 2, 3: 1, 4: 1} 31 | assert results['implicit_valence_frequency'] == {1: 2, 2: 2, 3: 2, 0: 1} 32 | assert results['hybridization_frequency'] == {'SP3': 2, 'SP2': 1} 33 | assert results['total_num_h_frequency'] == {1: 2, 2: 2, 3: 2, 0: 1} 34 | assert results['formal_charge_frequency'] == {0: 2} 35 | assert results['num_radical_electrons_frequency'] == {0: 2} 36 | assert results['aromatic_atom_frequency'] == {False: 2, True: 1} 37 | assert results['chirality_tag_frequency'] == {'CHI_UNSPECIFIED': 2, 38 | 'CHI_TETRAHEDRAL_CCW': 1, 39 | 'CHI_TETRAHEDRAL_CW': 1} 40 | assert results['bond_type_frequency'] == {'SINGLE': 2, 'DOUBLE': 1, 'AROMATIC': 1} 41 | assert results['conjugated_bond_frequency'] == {False: 2, True: 1} 42 | assert results['bond_stereo_configuration_frequency'] == {'STEREONONE': 2} 43 | assert results['bond_direction_frequency'] == {'NONE': 2} 44 | 45 | remove_file('valid_canonical_smiles.txt') 46 | remove_file('summary.txt') 47 | 48 | if __name__ == '__main__': 49 | test_analyze_mols() 50 | -------------------------------------------------------------------------------- /tests/utils/test_io_utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | # SPDX-License-Identifier: Apache-2.0 5 | 6 | import numpy as np 7 | import os 8 | import shutil 9 | 10 | from dgl.data.utils import download, _get_dgl_url, extract_archive 11 | from dgllife.utils.io import get_mol_3d_coordinates, load_molecule, load_smiles_from_txt 12 | from rdkit import Chem 13 | from rdkit.Chem import AllChem 14 | 15 | def test_get_mol_3D_coordinates(): 16 | mol = Chem.MolFromSmiles('CCO') 17 | # Test the case when conformation does not exist 18 | assert get_mol_3d_coordinates(mol) is None 19 | 20 | # Test the case when conformation exists 21 | AllChem.EmbedMolecule(mol) 22 | AllChem.MMFFOptimizeMolecule(mol) 23 | coords = get_mol_3d_coordinates(mol) 24 | assert isinstance(coords, np.ndarray) 25 | assert coords.shape == (mol.GetNumAtoms(), 3) 26 | 27 | def remove_dir(dir): 28 | if os.path.isdir(dir): 29 | try: 30 | shutil.rmtree(dir) 31 | except OSError: 32 | pass 33 | 34 | def test_load_molecule(): 35 | remove_dir('tmp1') 36 | remove_dir('tmp2') 37 | 38 | url = _get_dgl_url('dgllife/example_mols.tar.gz') 39 | local_path = 'tmp1/example_mols.tar.gz' 40 | download(url, path=local_path) 41 | extract_archive(local_path, 'tmp2') 42 | 43 | load_molecule('tmp2/example_mols/example.sdf') 44 | load_molecule('tmp2/example_mols/example.mol2', use_conformation=False, sanitize=True) 45 | load_molecule('tmp2/example_mols/example.pdbqt', calc_charges=True) 46 | mol, _ = load_molecule('tmp2/example_mols/example.pdb', remove_hs=True) 47 | assert mol.GetNumAtoms() == mol.GetNumHeavyAtoms() 48 | 49 | remove_dir('tmp1') 50 | remove_dir('tmp2') 51 | 52 | def test_load_smiles_from_txt(): 53 | smiles_list1 = ['CCO', 'O=P(O)(OC1O[C@@H]([C@@H](O)[C@H](O)[C@H]1O)CO)O'] 54 | file = 'smiles.txt' 55 | with open(file, 'w') as f: 56 | for s in smiles_list1: 57 | f.write(s + '\n') 58 | smiles_list2 = load_smiles_from_txt(file) 59 | assert smiles_list1 == smiles_list2 60 | 61 | if __name__ == '__main__': 62 | test_get_mol_3D_coordinates() 63 | test_load_molecule() 64 | test_load_smiles_from_txt() 65 | -------------------------------------------------------------------------------- /tests/utils/test_jtvae.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | # SPDX-License-Identifier: Apache-2.0 5 | # 6 | # pylint: disable= no-member, arguments-differ, invalid-name 7 | # 8 | # Test for utils in JTVAE 9 | 10 | import os 11 | 12 | def remove_file(fname): 13 | if os.path.isfile(fname): 14 | try: 15 | os.remove(fname) 16 | except OSError: 17 | pass 18 | 19 | def test_vocab(): 20 | from dgllife.utils.jtvae import JTVAEVocab 21 | 22 | vocab = JTVAEVocab() 23 | assert vocab.get_smiles(0) == 'C1=[NH+]C=[NH+]CC1' 24 | assert vocab.get_index('C1=[NH+]C=[NH+]CC1') == 0 25 | 26 | tmp_file = 'tmp.txt' 27 | with open(tmp_file, 'w') as f: 28 | f.write('CCO\n') 29 | f.write('C1=CC2=CC=CC=CC2=C1\n') 30 | f.write('O=C(O)/C=C/C(=O)O\n') 31 | f.write('N[C@@H](C)C(=O)O\n') 32 | vocab = JTVAEVocab(tmp_file) 33 | assert set(vocab.vocab) == \ 34 | set(['C=C', 'C1=CCC=C1', 'C', 'C=O', 'CN', 'C1=CC=CCC=C1', 'CO', 'CC']) 35 | remove_file(tmp_file) 36 | 37 | if __name__ == '__main__': 38 | test_vocab() 39 | -------------------------------------------------------------------------------- /tests/utils/test_splitters.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | # SPDX-License-Identifier: Apache-2.0 5 | 6 | import torch 7 | 8 | from dgllife.utils.splitters import * 9 | from rdkit import Chem 10 | 11 | class TestDataset(object): 12 | def __init__(self): 13 | self.smiles = [ 14 | 'CCO', 15 | 'C1CCCCC1', 16 | 'O1CCOCC1', 17 | 'C1CCCC2C1CCCC2', 18 | 'N#N' 19 | ] 20 | self.mols = [Chem.MolFromSmiles(s) for s in self.smiles] 21 | self.labels = torch.arange(2 * len(self.smiles)).reshape(len(self.smiles), -1) 22 | 23 | def __getitem__(self, item): 24 | return self.smiles[item], self.mols[item] 25 | 26 | def __len__(self): 27 | return len(self.smiles) 28 | 29 | def test_consecutive_splitter(): 30 | dataset = TestDataset() 31 | ConsecutiveSplitter.train_val_test_split(dataset) 32 | ConsecutiveSplitter.k_fold_split(dataset) 33 | 34 | def test_random_splitter(): 35 | dataset = TestDataset() 36 | RandomSplitter.train_val_test_split(dataset, random_state=0) 37 | RandomSplitter.k_fold_split(dataset) 38 | 39 | def test_molecular_weight_splitter(): 40 | dataset = TestDataset() 41 | MolecularWeightSplitter.train_val_test_split(dataset) 42 | MolecularWeightSplitter.k_fold_split(dataset, mols=dataset.mols) 43 | 44 | def test_scaffold_splitter(): 45 | dataset = TestDataset() 46 | ScaffoldSplitter.train_val_test_split(dataset) 47 | ScaffoldSplitter.k_fold_split(dataset, mols=dataset.mols) 48 | 49 | def test_single_task_stratified_splitter(): 50 | dataset = TestDataset() 51 | SingleTaskStratifiedSplitter.train_val_test_split(dataset, dataset.labels, 1) 52 | SingleTaskStratifiedSplitter.k_fold_split(dataset, dataset.labels, 1) 53 | 54 | if __name__ == '__main__': 55 | test_consecutive_splitter() 56 | test_random_splitter() 57 | test_molecular_weight_splitter() 58 | test_scaffold_splitter() 59 | test_single_task_stratified_splitter() 60 | --------------------------------------------------------------------------------