├── .github
    ├── CODEOWNERS
    └── workflows
    │   └── cla.yml
├── .gitmodules
├── CONTRIBUTING.md
├── LICENSE.md
├── README.md
├── cosmoflow
    ├── .gitignore
    ├── LEGAL
    ├── README.md
    ├── builds
    │   ├── Dockerfile
    │   └── Dockerfile.cpu_mpich
    ├── configs
    │   ├── cosmo.yaml
    │   ├── cosmo_dummy.yaml
    │   └── cosmo_v07.yaml
    ├── data
    │   ├── __init__.py
    │   ├── cosmo.py
    │   └── dummy.py
    ├── logs
    │   └── .gitignore
    ├── models
    │   ├── __init__.py
    │   ├── cosmoflow.py
    │   └── layers.py
    ├── prepare.py
    ├── scripts
    │   ├── setup_cgpu.sh
    │   ├── train_cgpu.sh
    │   └── train_cori_shifter.sh
    ├── train.py
    └── utils
    │   ├── __init__.py
    │   ├── argparse.py
    │   ├── callbacks.py
    │   ├── checkpoints.py
    │   ├── device.py
    │   ├── distributed.py
    │   ├── mlperf_logging.py
    │   ├── optimizers.py
    │   └── staging.py
├── deepcam
    ├── LICENSE
    ├── README.md
    ├── analysis
    │   ├── process_nsight_deepcam.ipynb
    │   ├── roofline_plot.ipynb
    │   ├── training_analysis.ipynb
    │   └── utils.py
    ├── compliance
    │   └── 1.0.0
    │   │   ├── closed_deepcam_adam.yml
    │   │   ├── closed_deepcam_lamb.yml
    │   │   └── rcps_deepcam.json
    ├── docker
    │   ├── Dockerfile
    │   └── build_docker.sh
    ├── requirements.txt
    └── src
    │   ├── deepCam
    │       ├── architecture
    │       │   ├── __init__.py
    │       │   └── deeplab_xception.py
    │       ├── data
    │       │   ├── __init__.py
    │       │   └── cam_hdf5_dataset.py
    │       ├── driver
    │       │   ├── __init__.py
    │       │   ├── trainer.py
    │       │   └── validation.py
    │       ├── run_scripts
    │       │   ├── run_training.sh
    │       │   └── run_training_nranks1024.sh
    │       ├── train.py
    │       └── utils
    │       │   ├── __init__.py
    │       │   ├── bnstats.py
    │       │   ├── comm.py
    │       │   ├── losses.py
    │       │   ├── metric.py
    │       │   ├── mlperf_log_utils.py
    │       │   ├── optimizer_helpers.py
    │       │   ├── parser.py
    │       │   ├── schedulers.py
    │       │   └── types.py
    │   └── utils
    │       ├── run_stage.sh
    │       ├── run_summarize_circe.sh
    │       ├── split_data.py
    │       └── summarize_data.py
├── open_catalyst
    ├── .circleci
    │   └── config.yml
    ├── .flake8
    ├── .gitignore
    ├── .isort.cfg
    ├── .pre-commit-config.yaml
    ├── DATASET.md
    ├── DATASET_PER_ADSORBATE.md
    ├── LICENSE.md
    ├── MODELS.md
    ├── README.md
    ├── TRAIN.md
    ├── configs
    │   ├── is2re
    │   │   ├── 100k
    │   │   │   ├── base.yml
    │   │   │   ├── cgcnn
    │   │   │   │   └── cgcnn.yml
    │   │   │   ├── dimenet_plus_plus
    │   │   │   │   └── dpp.yml
    │   │   │   └── schnet
    │   │   │   │   └── schnet.yml
    │   │   ├── 10k
    │   │   │   ├── base.yml
    │   │   │   ├── cgcnn
    │   │   │   │   └── cgcnn.yml
    │   │   │   ├── dimenet_plus_plus
    │   │   │   │   └── dpp.yml
    │   │   │   └── schnet
    │   │   │   │   └── schnet.yml
    │   │   └── all
    │   │   │   ├── base.yml
    │   │   │   ├── cgcnn
    │   │   │       └── cgcnn.yml
    │   │   │   ├── dimenet_plus_plus
    │   │   │       └── dpp.yml
    │   │   │   └── schnet
    │   │   │       └── schnet.yml
    │   ├── mlperf_hpc.yml
    │   ├── pm_b2048.yml
    │   └── s2ef
    │   │   ├── 200k
    │   │       ├── base.yml
    │   │       ├── cgcnn
    │   │       │   └── cgcnn.yml
    │   │       ├── dimenet_plus_plus
    │   │       │   └── dpp.yml
    │   │       ├── forcenet
    │   │       │   └── fn_forceonly.yml
    │   │       └── schnet
    │   │       │   └── schnet.yml
    │   │   ├── 20M
    │   │       ├── base.yml
    │   │       ├── cgcnn
    │   │       │   └── cgcnn.yml
    │   │       ├── dimenet_plus_plus
    │   │       │   └── dpp.yml
    │   │       └── schnet
    │   │       │   └── schnet.yml
    │   │   ├── 2M
    │   │       ├── base.yml
    │   │       ├── cgcnn
    │   │       │   └── cgcnn.yml
    │   │       ├── dimenet_plus_plus
    │   │       │   ├── dpp.yml
    │   │       │   └── dpp_relax.yml
    │   │       └── schnet
    │   │       │   └── schnet.yml
    │   │   └── all
    │   │       ├── base.yml
    │   │       ├── cgcnn
    │   │           └── cgcnn.yml
    │   │       ├── dimenet_plus_plus
    │   │           ├── dpp.yml
    │   │           ├── dpp10.7M_forceonly.yml
    │   │           ├── dpp_energyonly.yml
    │   │           └── dpp_forceonly.yml
    │   │       └── schnet
    │   │           └── schnet.yml
    ├── docker
    │   └── Dockerfile
    ├── docs
    │   ├── Makefile
    │   ├── make.bat
    │   ├── requirements.txt
    │   └── source
    │   │   ├── conf.py
    │   │   ├── index.rst
    │   │   ├── modules
    │   │       ├── dataset.rst
    │   │       ├── model.rst
    │   │       └── trainer.rst
    │   │   └── tutorials
    │   │       ├── data_preprocessing.ipynb
    │   │       ├── data_visualization.ipynb
    │   │       ├── getting_started.rst
    │   │       ├── lmdb_dataset_creation.ipynb
    │   │       ├── submission.rst
    │   │       ├── train_s2ef_example.ipynb
    │   │       └── training.rst
    ├── env.common.yml
    ├── env.cpu.yml
    ├── env.gpu.yml
    ├── env.yml
    ├── licenses
    │   ├── LICENSE.cgcnn
    │   └── LICENSE.mmf
    ├── logs
    │   └── .gitignore
    ├── main.py
    ├── ocpmodels
    │   ├── __init__.py
    │   ├── common
    │   │   ├── __init__.py
    │   │   ├── data_parallel.py
    │   │   ├── distutils.py
    │   │   ├── flags.py
    │   │   ├── hpo_utils.py
    │   │   ├── logger.py
    │   │   ├── registry.py
    │   │   ├── relaxation
    │   │   │   ├── __init__.py
    │   │   │   ├── ase_utils.py
    │   │   │   ├── ml_relaxation.py
    │   │   │   └── optimizers
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── lbfgs_torch.py
    │   │   ├── transforms.py
    │   │   └── utils.py
    │   ├── datasets
    │   │   ├── __init__.py
    │   │   ├── embeddings
    │   │   │   ├── __init__.py
    │   │   │   ├── atomic_radii.py
    │   │   │   ├── continuous_embeddings.py
    │   │   │   └── khot_embeddings.py
    │   │   ├── single_point_lmdb.py
    │   │   └── trajectory_lmdb.py
    │   ├── models
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   ├── cgcnn.py
    │   │   ├── dimenet.py
    │   │   ├── dimenet_plus_plus.py
    │   │   ├── forcenet.py
    │   │   ├── schnet.py
    │   │   └── utils
    │   │   │   ├── __init__.py
    │   │   │   ├── activations.py
    │   │   │   └── basis.py
    │   ├── modules
    │   │   ├── __init__.py
    │   │   ├── evaluator.py
    │   │   ├── exponential_moving_average.py
    │   │   ├── loss.py
    │   │   ├── normalizer.py
    │   │   └── scheduler.py
    │   ├── preprocessing
    │   │   ├── __init__.py
    │   │   └── atoms_to_graphs.py
    │   ├── tasks
    │   │   ├── __init__.py
    │   │   └── task.py
    │   └── trainers
    │   │   ├── __init__.py
    │   │   ├── base_trainer.py
    │   │   ├── energy_trainer.py
    │   │   ├── forces_trainer.py
    │   │   └── mlperf_forces_trainer.py
    ├── pyproject.toml
    ├── scripts
    │   ├── __init__.py
    │   ├── download_data.py
    │   ├── gif_maker_parallelized.py
    │   ├── hpo
    │   │   ├── README.md
    │   │   ├── __init__.py
    │   │   ├── run_tune.py
    │   │   ├── run_tune_pbt.py
    │   │   └── slurm
    │   │   │   ├── start-head.sh
    │   │   │   ├── start-worker.sh
    │   │   │   └── submit-ray-cluster.sbatch
    │   ├── make_submission_file.py
    │   ├── preprocess_ef.py
    │   ├── preprocess_relaxed.py
    │   ├── run_training.sh
    │   ├── train_cgpu.sh
    │   ├── train_cgpu_shifter.sh
    │   ├── train_pm.sh
    │   ├── train_pm_shifter.sh
    │   └── uncompress.py
    ├── setup.py
    ├── submit.sh
    └── tests
    │   ├── __init__.py
    │   ├── evaluator
    │       └── test_evaluator.py
    │   ├── models
    │       ├── atoms.json
    │       ├── test_cgcnn.py
    │       ├── test_dimenet.py
    │       ├── test_dimenetpp.py
    │       ├── test_forcenet.py
    │       └── test_schnet.py
    │   └── preprocessing
    │       ├── __init__.py
    │       ├── atoms.json
    │       ├── test_atoms_to_graphs.py
    │       └── test_pbc.py
└── openfold
    ├── Dockerfile
    ├── LICENSE
    ├── NOTICE
    ├── README.md
    ├── openfold
        ├── __init__.py
        ├── checkpoint_utils.py
        ├── config.py
        ├── data
        │   ├── __init__.py
        │   ├── alignments.py
        │   ├── cameo_targets.py
        │   ├── features.py
        │   ├── mmcif.py
        │   ├── parsers.py
        │   ├── residue_constants.py
        │   ├── resources
        │   │   ├── README.md
        │   │   ├── __init__.py
        │   │   └── stereo_chemical_props.txt
        │   ├── templates.py
        │   ├── tools
        │   │   ├── __init__.py
        │   │   └── kalign.py
        │   └── transforms.py
        ├── dataloaders.py
        ├── datasets.py
        ├── distributed.py
        ├── helpers.py
        ├── log_utils.py
        ├── loss.py
        ├── lr_scheduler.py
        ├── model
        │   ├── __init__.py
        │   ├── alphafold.py
        │   ├── angle_resnet.py
        │   ├── attention.py
        │   ├── auxiliary_heads.py
        │   ├── backbone_update.py
        │   ├── dropout.py
        │   ├── evoformer_block.py
        │   ├── evoformer_block_core.py
        │   ├── evoformer_stack.py
        │   ├── extra_msa_block.py
        │   ├── extra_msa_embedder.py
        │   ├── extra_msa_stack.py
        │   ├── global_attention.py
        │   ├── input_embedder.py
        │   ├── invariant_point_attention.py
        │   ├── layer_norm.py
        │   ├── linear.py
        │   ├── msa_column_attention.py
        │   ├── msa_column_global_attention.py
        │   ├── msa_row_attention_with_pair_bias.py
        │   ├── msa_transition.py
        │   ├── outer_product_mean.py
        │   ├── pair_transition.py
        │   ├── recycling_embedder.py
        │   ├── single_transition.py
        │   ├── structure_module.py
        │   ├── template_angle_embedder.py
        │   ├── template_pair_block.py
        │   ├── template_pair_embedder.py
        │   ├── template_pair_stack.py
        │   ├── template_pointwise_attention.py
        │   ├── triangular_attention.py
        │   └── triangular_multiplicative_update.py
        ├── numpy_utils.py
        ├── rigid_utils.py
        ├── samplers.py
        ├── superimposition.py
        ├── swa.py
        ├── torch_utils.py
        └── validation_metrics.py
    ├── scripts
        ├── activate_local_openfold_venv.sh
        ├── build_local_openfold_venv.sh
        ├── deactivate_local_openfold_venv.sh
        ├── download_open_protein_set.sh
        ├── download_pdb_mmcif.sh
        ├── multi_node_training.sub
        ├── preprocess_open_protein_set.py
        └── preprocess_pdb_mmcif.py
    ├── setup.py
    └── train.py


/.github/CODEOWNERS:
--------------------------------------------------------------------------------
 1 | # These owners will be the default owners for everything in the repo.
 2 | # Unless a later match takes precedence,they will be requested for review when someone opens a pull request.
 3 | * @mlcommons/wg-hpc @mlcommons/wg-science
 4 | 
 5 | /.github/CODEOWNERS @mlcommons/systems
 6 | 
 7 | /.github/workflows/cla.yml @mlcommons/systems
 8 | 
 9 | /LICENSE.md @mlcommons/systems
10 | 


--------------------------------------------------------------------------------
/.github/workflows/cla.yml:
--------------------------------------------------------------------------------
 1 | 
 2 | name: "cla-bot"
 3 | on:
 4 |   issue_comment:
 5 |     types: [created]
 6 |   pull_request_target:
 7 |     types: [opened,closed,synchronize]
 8 | 
 9 | jobs:
10 |   cla-check:
11 |     runs-on: ubuntu-latest
12 |     steps:
13 |       - name: "MLCommons CLA bot check"
14 |         if: (github.event.comment.body == 'recheck') || github.event_name == 'pull_request_target'
15 |         # Alpha Release
16 |         uses: mlcommons/cla-bot@master
17 |         env:
18 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
19 |           # the below token should have repo scope and must be manually added by you in the repository's secret
20 |           PERSONAL_ACCESS_TOKEN : ${{ secrets.MLCOMMONS_BOT_CLA_TOKEN }}
21 |         with:
22 |           path-to-signatures: 'cla-bot/v1/cla.json'
23 |           # branch should not be protected
24 |           branch: 'main'
25 |           allowlist: user1,bot*
26 |           remote-organization-name: mlcommons
27 |           remote-repository-name: systems
28 |           
29 |          #below are the optional inputs - If the optional inputs are not given, then default values will be taken
30 |           #remote-organization-name: enter the remote organization name where the signatures should be stored (Default is storing the signatures in the same repository)
31 |           #remote-repository-name:  enter the  remote repository name where the signatures should be stored (Default is storing the signatures in the same repository)
32 |           #create-file-commit-message: 'For example: Creating file for storing CLA Signatures'
33 |           #signed-commit-message: 'For example: $contributorName has signed the CLA in #$pullRequestNo'
34 |           #custom-notsigned-prcomment: 'pull request comment with Introductory message to ask new contributors to sign'
35 |           #custom-pr-sign-comment: 'The signature to be committed in order to sign the CLA'
36 |           #custom-allsigned-prcomment: 'pull request comment when all contributors has signed, defaults to **CLA Assistant Lite bot** All Contributors have signed the CLA.'
37 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/hpc/2c627d457004eff77a014205b3151ed48a6fa149/.gitmodules


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | ## Contributing
 2 | 
 3 | The best way to contribute to the MLCommons is to get involved with one of our many project communities. You find more information about getting involved with MLCommons [here](https://mlcommons.org/en/get-involved/#getting-started). 
 4 | 
 5 | Generally we encourage people to become a MLCommons member if they wish to contribute to MLCommons projects, but outside pull requests are very welcome too.
 6 | 
 7 | To get started contributing code, you or your organization needs to sign the MLCommons CLA found at the [MLC policies page](https://mlcommons.org/en/policies/). Once you or your organization has signed the corporate CLA, please fill out this [CLA sign up form](https://forms.gle/Ew1KkBVpyeJDuRw67) form to get your specific GitHub handle authorized so that you can start contributing code under the proper license.
 8 | 
 9 | MLCommons project work is tracked with issue trackers and pull requests. Modify the project in your own fork and issue a pull request once you want other developers to take a look at what you have done and discuss the proposed changes. Ensure that cla-bot and other checks pass for your Pull requests.
10 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # MLPerf™ HPC reference implementations
 2 | 
 3 | This is a repository of reference implementations for the MLPerf HPC benchmarks.
 4 | 
 5 | General format should follow https://github.com/mlperf/training.
 6 | 
 7 | ## Rules
 8 | 
 9 | The MLPerf HPC rules are based on the MLPerf Training rules with
10 | some adjustments.
11 | 
12 | The MLPerf Training rules are available at [training\_rules](https://github.com/mlcommons/training_policies/blob/master/training_rules.adoc).
13 | 
14 | The MLPerf HPC specific rules are at [hpc\_training\_rules](https://github.com/mlcommons/training_policies/blob/master/hpc_training_rules.adoc).
15 | 
16 | ## Compliance
17 | The MLPerf logging package implements logging and compliance-checking utilities. This is available in hpc-1.0-branch of the MLPerf logging repository (https://github.com/mlcommons/logging/tree/hpc-1.0-branch).
18 | These work for the HPC v2.0 submissions as well.
19 | 
20 | To install and test compliance of your runs/submissions:
21 | 
22 | ```
23 | # Install the package into your python environment.
24 | # A development install (-e) is recommended for now so you can pull new updates.
25 | git clone -b hpc-1.0-branch https://github.com/mlcommons/logging mlperf-logging
26 | pip install [--user] -e mlperf-logging
27 | 
28 | # Test a full submission folder
29 | python3 -m mlperf_logging.package_checker <YOUR SUBMISSION_FOLDER> hpc 1.0.0
30 | ```
31 | 
32 | There is also a script that performs compliance checks and summarizes the results. From the mlperf-logging directory (https://github.com/mlcommons/logging), use
33 | ```
34 | ./scripts/verify_for_v1.0_hpc.sh
35 | ```
36 | 
37 | 
38 | 


--------------------------------------------------------------------------------
/cosmoflow/.gitignore:
--------------------------------------------------------------------------------
1 | *__pycache__
2 | *.ipynb_checkpoints
3 | 


--------------------------------------------------------------------------------
/cosmoflow/LEGAL:
--------------------------------------------------------------------------------
1 | *** Copyright Notice ***
2 | 'Regression of 3D Sky Map to Cosmological Parameters (CosmoFlow)' Copyright (c) 2018, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from the U.S. Dept. of Energy). All rights reserved.
3 | If you have questions about your rights to use or distribute this software, please contact Berkeley Lab's Intellectual Property Office at  IPO@lbl.gov.
4 | NOTICE.  This Software was developed under funding from the U.S. Department of Energy and the U.S. Government consequently retains certain rights. As such, the U.S. Government has been granted for itself and others acting on its behalf a paid-up, nonexclusive, irrevocable, worldwide license in the Software to reproduce, distribute copies to the public, prepare derivative works, and perform publicly and display publicly, and to permit other to do so.
5 | ****************************
6 | 


--------------------------------------------------------------------------------
/cosmoflow/README.md:
--------------------------------------------------------------------------------
 1 | # CosmoFlow TensorFlow Keras benchmark implementation
 2 | 
 3 | This is a an implementation of the
 4 | [CosmoFlow](https://arxiv.org/abs/1808.04728) 3D convolutional neural network
 5 | for benchmarking. It is written in TensorFlow with the Keras API and uses
 6 | [Horovod](https://github.com/horovod/horovod) for distributed training.
 7 | 
 8 | You can find the previous TensorFlow implementation which accompanied the CosmoFlow paper at
 9 | https://github.com/NERSC/CosmoFlow
10 | 
11 | ## Datasets
12 | 
13 | The dataset we use for this benchmark comes from simulations run by the
14 | ExaLearn group and hosted at NERSC. The following web portal describes the
15 | technical content of the dataset and provides links to the raw data.
16 | 
17 | https://portal.nersc.gov/project/m3363/
18 | 
19 | For this benchmark we currently use a preprocessed version of the dataset which
20 | generates crops of size (128, 128, 128, 4) and stores in TFRecord format.
21 | This preprocessing is done using the [prepare.py](prepare.py) script included
22 | in this package. We describe here how to get access to this processed dataset,
23 | but please refer to the ExaLearn web portal for additional technical details.
24 | 
25 | Globus is the current recommended way to transfer the dataset locally.
26 | There is a globus endpoint at:
27 | 
28 | https://app.globus.org/file-manager?origin_id=31647fba-a006-4322-ad3e-9a4f124db422
29 | 
30 | The contents are also available via HTTPS at:
31 | 
32 | https://portal.nersc.gov/project/dasrepo/cosmoflow-benchmark/
33 | 
34 | ### MLPerf HPC v1.0 preliminary dataset
35 | 
36 | Preprocessed TFRecord files are available in a 1.7TB tarball named
37 | `cosmoUniverse_2019_05_4parE_tf_v2.tar`. It contains subfolders for
38 | train/val/test file splits.
39 | 
40 | In this preparation, there are 524288 samples for training and 65536 samples for
41 | validation. The TFRecord files are written with gzip compression to reduce total
42 | storage size.
43 | 
44 | ### MLPerf HPC v0.7 dataset
45 | 
46 | The pre-processed dataset in TFRecord format is in the
47 | `cosmoUniverse_2019_05_4parE_tf` folder, which contains training and validation
48 | subfolders. There are 262144 samples for training and 65536 samples
49 | for validation/testing. The combined size of the dataset is 5.1 TB.
50 | 
51 | For getting started, there is also a small tarball (179MB) with 32 training
52 | samples and 32 validation samples, called `cosmoUniverse_2019_05_4parE_tf_small.tgz`.
53 | 
54 | ## Running the benchmark
55 | 
56 | Submission scripts are in `scripts`. YAML configuration files go in `configs`.
57 | 
58 | ### Running at NERSC
59 | 
60 | `sbatch -N 64 scripts/train_cori.sh`
61 | 


--------------------------------------------------------------------------------
/cosmoflow/builds/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM nvcr.io/nvidia/tensorflow:22.04-tf2-py3
2 | 
3 | RUN python -m pip install --no-cache-dir -U pip
4 | 
5 | RUN pip install --no-cache-dir pandas wandb
6 | 
7 | # Install MLPerf-logging
8 | RUN pip install --no-cache-dir "git+https://github.com/mlcommons/logging.git"
9 | 


--------------------------------------------------------------------------------
/cosmoflow/builds/Dockerfile.cpu_mpich:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:18.04
 2 | WORKDIR /tmp
 3 | 
 4 | RUN apt-get update && \
 5 |     apt-get install --yes  \
 6 |     build-essential cmake git curl \
 7 |     gfortran-8 gcc-8 g++-8 \
 8 |     python3-dev \
 9 |     python3-pip \
10 |     wget less vim && \
11 |     apt-get clean all
12 | 
13 |     #gfortran gcc g++ \
14 | 
15 | RUN ln -s /usr/bin/python3 /usr/bin/python
16 | 
17 | RUN curl -O https://bootstrap.pypa.io/get-pip.py && \
18 |     python get-pip.py && \
19 |     rm get-pip.py
20 | 
21 | ARG mpich=3.3.2
22 | ARG mpich_prefix=mpich-$mpich
23 | 
24 | ENV FC=gfortran-8
25 | ENV F77=gfortran-8
26 | ENV CC=gcc-8
27 | ENV CXX=g++-8
28 | 
29 | RUN wget https://www.mpich.org/static/downloads/$mpich/$mpich_prefix.tar.gz && \
30 |     tar xvzf $mpich_prefix.tar.gz                                           && \
31 |     cd $mpich_prefix                                                        && \
32 |     ./configure                                                             && \
33 |     make -j 4                                                               && \
34 |     make install                                                            && \
35 |     make clean                                                              && \
36 |     cd ..                                                                   && \
37 |     rm -rf $mpich_prefix
38 | 
39 | # Now need the python+TF dependencies
40 | RUN pip install \
41 |     ipython \
42 |     pandas \
43 |     pyyaml \
44 |     intel-tensorflow==1.15.2
45 |     #tensorflow==1.15.2
46 | 
47 | # Now need Horovod
48 | RUN HOROVOD_WITH_TENSORFLOW=1 pip install --no-cache-dir -v horovod
49 | 
50 | # Install MLPerf-logging
51 | RUN pip install --no-cache-dir "git+https://github.com/mlperf-hpc/logging.git@hpc-0.5.0"
52 | 
53 | RUN /sbin/ldconfig
54 | 


--------------------------------------------------------------------------------
/cosmoflow/configs/cosmo.yaml:
--------------------------------------------------------------------------------
 1 | output_dir: results/cosmo-002
 2 | 
 3 | mlperf:
 4 |     org: LBNL
 5 |     division: closed
 6 |     status: onprem
 7 |     platform: SUBMISSION_PLATFORM_PLACEHOLDER
 8 | 
 9 | data:
10 |     name: cosmo
11 |     data_dir: /global/cscratch1/sd/sfarrell/cosmoflow-benchmark/data/cosmoUniverse_2019_05_4parE_tf_v2
12 |     compression: GZIP
13 |     n_train: 524288
14 |     n_valid: 65536
15 |     sample_shape: [128, 128, 128, 4]
16 |     batch_size: 4
17 |     n_epochs: 128
18 |     shard: True
19 |     apply_log: True
20 |     prefetch: 4
21 | 
22 | model:
23 |     name: cosmoflow
24 |     input_shape: [128, 128, 128, 4]
25 |     kernel_size: 3
26 |     target_size: 4
27 |     conv_size: 32
28 |     fc1_size: 128
29 |     fc2_size: 64
30 |     hidden_activation: LeakyReLU
31 |     pooling_type: MaxPool3D
32 |     dropout: 0.5
33 | 
34 | optimizer:
35 |     name: SGD
36 |     momentum: 0.9
37 | 
38 | lr_schedule:
39 |     # Standard linear LR scaling configuration
40 |     base_lr: 0.001
41 |     scaling: linear
42 |     base_batch_size: 64
43 |     n_warmup_epochs: 4
44 | 
45 |     # Learning rate decay epochs and decay factors
46 |     decay_schedule:
47 |         32: 0.25
48 |         64: 0.125
49 | 
50 | train:
51 |     loss: mse
52 |     metrics: ['mean_absolute_error']
53 |     #early_stopping_patience: 8
54 | 
55 |     # Uncomment to stop at target quality
56 |     #target_mae: 0.124
57 | 


--------------------------------------------------------------------------------
/cosmoflow/configs/cosmo_dummy.yaml:
--------------------------------------------------------------------------------
 1 | output_dir: results/cosmo-dummy-000
 2 | 
 3 | data:
 4 |     name: dummy
 5 |     n_train: 256
 6 |     n_valid: 256
 7 |     sample_shape: [128, 128, 128, 4]
 8 |     target_shape: [4]
 9 |     batch_size: 4
10 |     n_epochs: 4
11 |     shard: True
12 | 
13 | model:
14 |     name: cosmoflow
15 |     input_shape: [128, 128, 128, 4]
16 |     target_size: 4
17 |     conv_size: 32
18 |     fc1_size: 128
19 |     fc2_size: 64
20 |     hidden_activation: LeakyReLU
21 |     pooling_type: MaxPool3D
22 |     dropout: 0.5
23 | 
24 | optimizer:
25 |     name: SGD
26 |     momentum: 0.9
27 | 
28 | lr_schedule:
29 |     # Standard linear LR scaling configuration, tested up to batch size 1024
30 |     base_lr: 0.001
31 |     scaling: linear
32 |     base_batch_size: 64
33 | 
34 |     # Alternate sqrt LR scaling which has worked well for batch size 512-1024.
35 |     #base_lr: 0.0025
36 |     #scaling: sqrt
37 |     #base_batch_size: 32
38 | 
39 |     n_warmup_epochs: 4
40 | 
41 |     # You may want to adjust these decay epochs depending on your batch size.
42 |     # E.g. if training batch size 64 you may want to decay at 16 and 32 epochs.
43 |     decay_schedule:
44 |         32: 0.25
45 |         64: 0.125
46 | 
47 | train:
48 |     loss: mse
49 |     metrics: ['mean_absolute_error']
50 |     early_stopping_patience: 8
51 | 


--------------------------------------------------------------------------------
/cosmoflow/configs/cosmo_v07.yaml:
--------------------------------------------------------------------------------
 1 | # This YAML file describes the configuration for the MLPerf HPC v0.7 reference.
 2 | 
 3 | output_dir: results/cosmo-000
 4 | 
 5 | mlperf:
 6 |     org: LBNL
 7 |     division: closed
 8 |     status: onprem
 9 |     platform: SUBMISSION_PLATFORM_PLACEHOLDER
10 | 
11 | data:
12 |     name: cosmo
13 |     data_dir: /global/cscratch1/sd/sfarrell/cosmoflow-benchmark/data/cosmoUniverse_2019_05_4parE_tf
14 |     n_train: 262144
15 |     n_valid: 65536
16 |     sample_shape: [128, 128, 128, 4]
17 |     batch_size: 4
18 |     n_epochs: 128
19 |     shard: True
20 |     apply_log: True
21 |     prefetch: 4
22 | 
23 | model:
24 |     name: cosmoflow
25 |     input_shape: [128, 128, 128, 4]
26 |     target_size: 4
27 |     conv_size: 32
28 |     fc1_size: 128
29 |     fc2_size: 64
30 |     hidden_activation: LeakyReLU
31 |     pooling_type: MaxPool3D
32 |     dropout: 0.5
33 | 
34 | optimizer:
35 |     name: SGD
36 |     momentum: 0.9
37 | 
38 | lr_schedule:
39 |     # Standard linear LR scaling configuration, tested up to batch size 1024
40 |     base_lr: 0.001
41 |     scaling: linear
42 |     base_batch_size: 64
43 | 
44 |     # Alternate sqrt LR scaling which has worked well for batch size 512-1024.
45 |     #base_lr: 0.0025
46 |     #scaling: sqrt
47 |     #base_batch_size: 32
48 | 
49 |     n_warmup_epochs: 4
50 | 
51 |     # You may want to adjust these decay epochs depending on your batch size.
52 |     # E.g. if training batch size 64 you may want to decay at 16 and 32 epochs.
53 |     decay_schedule:
54 |         32: 0.25
55 |         64: 0.125
56 | 
57 | train:
58 |     loss: mse
59 |     metrics: ['mean_absolute_error']
60 | 
61 |     # Uncomment to stop at target quality
62 |     #target_mae: 0.124
63 | 


--------------------------------------------------------------------------------
/cosmoflow/data/__init__.py:
--------------------------------------------------------------------------------
 1 | # 'Regression of 3D Sky Map to Cosmological Parameters (CosmoFlow)'
 2 | # Copyright (c) 2018, The Regents of the University of California,
 3 | # through Lawrence Berkeley National Laboratory (subject to receipt of any
 4 | # required approvals from the U.S. Dept. of Energy).  All rights reserved.
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #     http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | #
18 | # If you have questions about your rights to use or distribute this software,
19 | # please contact Berkeley Lab's Innovation & Partnerships Office at IPO@lbl.gov.
20 | #
21 | # NOTICE.  This Software was developed under funding from the U.S. Department of
22 | # Energy and the U.S. Government consequently retains certain rights. As such,
23 | # the U.S. Government has been granted for itself and others acting on its
24 | # behalf a paid-up, nonexclusive, irrevocable, worldwide license in the Software
25 | # to reproduce, distribute copies to the public, prepare derivative works, and
26 | # perform publicly and display publicly, and to permit other to do so.
27 | 
28 | """
29 | Keras dataset specifications.
30 | """
31 | 
32 | 
33 | def get_datasets(name, **data_args):
34 |     if name == 'dummy':
35 |         from .dummy import get_datasets
36 |         return get_datasets(**data_args)
37 |     elif name == 'cosmo':
38 |         from .cosmo import get_datasets
39 |         return get_datasets(**data_args)
40 |     else:
41 |         raise ValueError('Dataset %s unknown' % name)
42 | 


--------------------------------------------------------------------------------
/cosmoflow/data/dummy.py:
--------------------------------------------------------------------------------
 1 | # 'Regression of 3D Sky Map to Cosmological Parameters (CosmoFlow)'
 2 | # Copyright (c) 2018, The Regents of the University of California,
 3 | # through Lawrence Berkeley National Laboratory (subject to receipt of any
 4 | # required approvals from the U.S. Dept. of Energy).  All rights reserved.
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #     http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | #
18 | # If you have questions about your rights to use or distribute this software,
19 | # please contact Berkeley Lab's Innovation & Partnerships Office at IPO@lbl.gov.
20 | #
21 | # NOTICE.  This Software was developed under funding from the U.S. Department of
22 | # Energy and the U.S. Government consequently retains certain rights. As such,
23 | # the U.S. Government has been granted for itself and others acting on its
24 | # behalf a paid-up, nonexclusive, irrevocable, worldwide license in the Software
25 | # to reproduce, distribute copies to the public, prepare derivative works, and
26 | # perform publicly and display publicly, and to permit other to do so.
27 | 
28 | """
29 | Random dummy dataset specification.
30 | """
31 | 
32 | # Externals
33 | import tensorflow as tf
34 | 
35 | 
36 | def construct_dataset(sample_shape, target_shape,
37 |                        batch_size=1, n_samples=32):
38 |     x = tf.random.uniform([n_samples]+sample_shape)
39 |     y = tf.random.uniform([n_samples]+target_shape)
40 |     data = tf.data.Dataset.from_tensor_slices((x, y))
41 |     return data.repeat().batch(batch_size).prefetch(4)
42 | 
43 | 
44 | def get_datasets(sample_shape, target_shape, batch_size,
45 |                  n_train, n_valid, dist, n_epochs=None, shard=False):
46 |     train_dataset = construct_dataset(sample_shape, target_shape, batch_size=batch_size)
47 |     valid_dataset = None
48 |     if n_valid > 0:
49 |         valid_dataset = construct_dataset(sample_shape, target_shape, batch_size=batch_size)
50 |     n_train_steps = n_train  // batch_size
51 |     n_valid_steps = n_valid  // batch_size
52 |     if shard:
53 |         n_train_steps = n_train_steps // dist.size
54 |         n_valid_steps = n_valid_steps // dist.size
55 | 
56 |     return dict(train_dataset=train_dataset, valid_dataset=valid_dataset,
57 |                 n_train=n_train, n_valid=n_valid, n_train_steps=n_train_steps,
58 |                 n_valid_steps=n_valid_steps)
59 | 


--------------------------------------------------------------------------------
/cosmoflow/logs/.gitignore:
--------------------------------------------------------------------------------
1 | *.out
2 | 


--------------------------------------------------------------------------------
/cosmoflow/models/__init__.py:
--------------------------------------------------------------------------------
 1 | # 'Regression of 3D Sky Map to Cosmological Parameters (CosmoFlow)'
 2 | # Copyright (c) 2018, The Regents of the University of California,
 3 | # through Lawrence Berkeley National Laboratory (subject to receipt of any
 4 | # required approvals from the U.S. Dept. of Energy).  All rights reserved.
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #     http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | #
18 | # If you have questions about your rights to use or distribute this software,
19 | # please contact Berkeley Lab's Innovation & Partnerships Office at IPO@lbl.gov.
20 | #
21 | # NOTICE.  This Software was developed under funding from the U.S. Department of
22 | # Energy and the U.S. Government consequently retains certain rights. As such,
23 | # the U.S. Government has been granted for itself and others acting on its
24 | # behalf a paid-up, nonexclusive, irrevocable, worldwide license in the Software
25 | # to reproduce, distribute copies to the public, prepare derivative works, and
26 | # perform publicly and display publicly, and to permit other to do so.
27 | 
28 | """
29 | Keras example model factory functions.
30 | """
31 | 
32 | import importlib
33 | 
34 | def get_model(name, **model_args):
35 |     """Factory function for constructing a model by name with args"""
36 |     module = importlib.import_module('.' + name, 'models')
37 |     return module.build_model(**model_args)
38 | 


--------------------------------------------------------------------------------
/cosmoflow/models/layers.py:
--------------------------------------------------------------------------------
 1 | # 'Regression of 3D Sky Map to Cosmological Parameters (CosmoFlow)'
 2 | # Copyright (c) 2018, The Regents of the University of California,
 3 | # through Lawrence Berkeley National Laboratory (subject to receipt of any
 4 | # required approvals from the U.S. Dept. of Energy).  All rights reserved.
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #     http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | #
18 | # If you have questions about your rights to use or distribute this software,
19 | # please contact Berkeley Lab's Innovation & Partnerships Office at IPO@lbl.gov.
20 | #
21 | # NOTICE.  This Software was developed under funding from the U.S. Department of
22 | # Energy and the U.S. Government consequently retains certain rights. As such,
23 | # the U.S. Government has been granted for itself and others acting on its
24 | # behalf a paid-up, nonexclusive, irrevocable, worldwide license in the Software
25 | # to reproduce, distribute copies to the public, prepare derivative works, and
26 | # perform publicly and display publicly, and to permit other to do so.
27 | 
28 | """Custom layer functionality"""
29 | 
30 | def scale_1p2(x):
31 |     """Simple scaling function for Lambda layers.
32 | 
33 |     Just multiplies the input by 1.2. Useful for extending the coverage of a
34 |     tanh activation for targets in the range [-1,1].
35 |     """
36 |     return x*1.2
37 | 


--------------------------------------------------------------------------------
/cosmoflow/scripts/setup_cgpu.sh:
--------------------------------------------------------------------------------
1 | # Source this script to setup the runtime environment on cori
2 | module load cgpu tensorflow/2.5.0-gpu
3 | module list
4 | 


--------------------------------------------------------------------------------
/cosmoflow/scripts/train_cgpu.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #SBATCH -C gpu -c 10
 3 | #SBATCH --ntasks-per-node 8
 4 | #SBATCH --gpus-per-task 1
 5 | #SBATCH --exclusive
 6 | #SBATCH -t 4:00:00
 7 | #SBATCH -J train-cgpu
 8 | #SBATCH -o logs/%x-%j.out
 9 | 
10 | . scripts/setup_cgpu.sh
11 | #export HOROVOD_TIMELINE=./timeline.json
12 | 
13 | # Slurm job variables
14 | env | grep SLURM_JOB
15 | 
16 | set -x
17 | srun -l -u python train.py -d --rank-gpu $@
18 | 


--------------------------------------------------------------------------------
/cosmoflow/scripts/train_cori_shifter.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #SBATCH -C knl
 3 | #SBATCH -q debug
 4 | #SBATCH -t 30
 5 | #SBATCH -J train-cori
 6 | #SBATCH --image docker:sfarrell/cosmoflow-cpu-mpich:latest
 7 | #SBATCH -o logs/%x-%j.out
 8 | 
 9 | export OMP_NUM_THREADS=32
10 | export KMP_BLOCKTIME=1
11 | export KMP_AFFINITY="granularity=fine,compact,1,0"
12 | export HDF5_USE_FILE_LOCKING=FALSE
13 | 
14 | set -x
15 | srun -l -u shifter python train.py -d $@
16 | 


--------------------------------------------------------------------------------
/cosmoflow/utils/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | """
3 | 


--------------------------------------------------------------------------------
/cosmoflow/utils/argparse.py:
--------------------------------------------------------------------------------
 1 | # 'Regression of 3D Sky Map to Cosmological Parameters (CosmoFlow)'
 2 | # Copyright (c) 2018, The Regents of the University of California,
 3 | # through Lawrence Berkeley National Laboratory (subject to receipt of any
 4 | # required approvals from the U.S. Dept. of Energy).  All rights reserved.
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #     http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | #
18 | # If you have questions about your rights to use or distribute this software,
19 | # please contact Berkeley Lab's Innovation & Partnerships Office at IPO@lbl.gov.
20 | #
21 | # NOTICE.  This Software was developed under funding from the U.S. Department of
22 | # Energy and the U.S. Government consequently retains certain rights. As such,
23 | # the U.S. Government has been granted for itself and others acting on its
24 | # behalf a paid-up, nonexclusive, irrevocable, worldwide license in the Software
25 | # to reproduce, distribute copies to the public, prepare derivative works, and
26 | # perform publicly and display publicly, and to permit other to do so.
27 | 
28 | """Utility code for argparse"""
29 | 
30 | import argparse
31 | import yaml
32 | 
33 | 
34 | class ReadYaml(argparse.Action):
35 |     def __call__(self, parser, namespace, values, option_string=None):
36 |         my_dict = yaml.load(values, Loader=yaml.FullLoader)
37 |         setattr(namespace, self.dest, my_dict)
38 | 


--------------------------------------------------------------------------------
/cosmoflow/utils/checkpoints.py:
--------------------------------------------------------------------------------
 1 | # 'Regression of 3D Sky Map to Cosmological Parameters (CosmoFlow)'
 2 | # Copyright (c) 2018, The Regents of the University of California,
 3 | # through Lawrence Berkeley National Laboratory (subject to receipt of any
 4 | # required approvals from the U.S. Dept. of Energy).  All rights reserved.
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #     http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | #
18 | # If you have questions about your rights to use or distribute this software,
19 | # please contact Berkeley Lab's Innovation & Partnerships Office at IPO@lbl.gov.
20 | #
21 | # NOTICE.  This Software was developed under funding from the U.S. Department of
22 | # Energy and the U.S. Government consequently retains certain rights. As such,
23 | # the U.S. Government has been granted for itself and others acting on its
24 | # behalf a paid-up, nonexclusive, irrevocable, worldwide license in the Software
25 | # to reproduce, distribute copies to the public, prepare derivative works, and
26 | # perform publicly and display publicly, and to permit other to do so.
27 | 
28 | """Utility code for handling checkpoint loading"""
29 | 
30 | # System imports
31 | import os
32 | import logging
33 | 
34 | # External imports
35 | import tensorflow as tf
36 | import horovod.tensorflow.keras as hvd
37 | 
38 | def reload_last_checkpoint(checkpoint_format, n_epochs, distributed):
39 |     """Finds and loads the last checkpoint matching the provided pattern"""
40 |     # Count down from n_epochs to 0 to find the last epoch.
41 |     # Note that keras names checkpoint files with epoch number starting from 1.
42 |     # So the matched number corresponds to the new initial epoch.
43 |     for epoch in range(n_epochs, 0, -1):
44 |         checkpoint = checkpoint_format.format(epoch=epoch)
45 |         if os.path.exists(checkpoint):
46 |             logging.info('Found last checkpoint at %s', checkpoint)
47 |             # Use special reload to prepare the DistributedOptimizer
48 |             if distributed:
49 |                 model = hvd.load_model(checkpoint)
50 |             else:
51 |                 model = tf.keras.models.load_model(checkpoint)
52 |             return epoch, model
53 |     raise Exception('Unable to find a checkpoint file at %s' % checkpoint_format)
54 | 


--------------------------------------------------------------------------------
/cosmoflow/utils/distributed.py:
--------------------------------------------------------------------------------
 1 | # 'Regression of 3D Sky Map to Cosmological Parameters (CosmoFlow)'
 2 | # Copyright (c) 2018, The Regents of the University of California,
 3 | # through Lawrence Berkeley National Laboratory (subject to receipt of any
 4 | # required approvals from the U.S. Dept. of Energy).  All rights reserved.
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #     http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | #
18 | # If you have questions about your rights to use or distribute this software,
19 | # please contact Berkeley Lab's Innovation & Partnerships Office at IPO@lbl.gov.
20 | #
21 | # NOTICE.  This Software was developed under funding from the U.S. Department of
22 | # Energy and the U.S. Government consequently retains certain rights. As such,
23 | # the U.S. Government has been granted for itself and others acting on its
24 | # behalf a paid-up, nonexclusive, irrevocable, worldwide license in the Software
25 | # to reproduce, distribute copies to the public, prepare derivative works, and
26 | # perform publicly and display publicly, and to permit other to do so.
27 | 
28 | """Utilties for distributed processing"""
29 | 
30 | import horovod.tensorflow.keras as hvd
31 | 
32 | 
33 | def rank():
34 |     try:
35 |         return hvd.rank()
36 |     except ValueError:
37 |         return 0
38 | 
39 | 
40 | def barrier():
41 |     try:
42 |         hvd.allreduce([], name='Barrier')
43 |     except ValueError:
44 |         pass
45 | 


--------------------------------------------------------------------------------
/cosmoflow/utils/mlperf_logging.py:
--------------------------------------------------------------------------------
 1 | # 'Regression of 3D Sky Map to Cosmological Parameters (CosmoFlow)'
 2 | # Copyright (c) 2018, The Regents of the University of California,
 3 | # through Lawrence Berkeley National Laboratory (subject to receipt of any
 4 | # required approvals from the U.S. Dept. of Energy).  All rights reserved.
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #     http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | #
18 | # If you have questions about your rights to use or distribute this software,
19 | # please contact Berkeley Lab's Innovation & Partnerships Office at IPO@lbl.gov.
20 | #
21 | # NOTICE.  This Software was developed under funding from the U.S. Department of
22 | # Energy and the U.S. Government consequently retains certain rights. As such,
23 | # the U.S. Government has been granted for itself and others acting on its
24 | # behalf a paid-up, nonexclusive, irrevocable, worldwide license in the Software
25 | # to reproduce, distribute copies to the public, prepare derivative works, and
26 | # perform publicly and display publicly, and to permit other to do so.
27 | 
28 | """
29 | Utilities for MLPerf logging.
30 | Depends on the mlperf_logging package at
31 | https://github.com/mlperf/logging
32 | """
33 | 
34 | # System
35 | import os
36 | 
37 | # Externals
38 | try:
39 |     from mlperf_logging import mllog
40 |     have_mlperf_logging = True
41 | except ImportError:
42 |     have_mlperf_logging = False
43 | 
44 | 
45 | def configure_mllogger(log_dir):
46 |     """Setup the MLPerf logger"""
47 |     if not have_mlperf_logging:
48 |         raise RuntimeError('mlperf_logging package unavailable')
49 |     mllog.config(filename=os.path.join(log_dir, 'mlperf.log'))
50 |     return mllog.get_mllogger()
51 | 
52 | 
53 | def log_submission_info(benchmark='cosmoflow',
54 |                         org='UNDEFINED',
55 |                         division='UNDEFINED',
56 |                         status='UNDEFINED',
57 |                         platform='UNDEFINED'):
58 |     """Log general MLPerf submission details from config"""
59 |     mllogger = mllog.get_mllogger()
60 |     mllogger.event(key=mllog.constants.SUBMISSION_BENCHMARK, value=benchmark)
61 |     mllogger.event(key=mllog.constants.SUBMISSION_ORG, value=org)
62 |     mllogger.event(key=mllog.constants.SUBMISSION_DIVISION, value=division)
63 |     mllogger.event(key=mllog.constants.SUBMISSION_STATUS, value=status)
64 |     mllogger.event(key=mllog.constants.SUBMISSION_PLATFORM, value=platform)
65 | 


--------------------------------------------------------------------------------
/cosmoflow/utils/staging.py:
--------------------------------------------------------------------------------
 1 | # 'Regression of 3D Sky Map to Cosmological Parameters (CosmoFlow)'
 2 | # Copyright (c) 2018, The Regents of the University of California,
 3 | # through Lawrence Berkeley National Laboratory (subject to receipt of any
 4 | # required approvals from the U.S. Dept. of Energy).  All rights reserved.
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #     http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | #
18 | # If you have questions about your rights to use or distribute this software,
19 | # please contact Berkeley Lab's Innovation & Partnerships Office at IPO@lbl.gov.
20 | #
21 | # NOTICE.  This Software was developed under funding from the U.S. Department of
22 | # Energy and the U.S. Government consequently retains certain rights. As such,
23 | # the U.S. Government has been granted for itself and others acting on its
24 | # behalf a paid-up, nonexclusive, irrevocable, worldwide license in the Software
25 | # to reproduce, distribute copies to the public, prepare derivative works, and
26 | # perform publicly and display publicly, and to permit other to do so.
27 | 
28 | """Utility code for staging data files into local storage"""
29 | 
30 | # System imports
31 | import os
32 | import shutil
33 | import logging
34 | 
35 | 
36 | def stage_files(input_dir, output_dir, n_files, rank=0, size=1):
37 |     """Stage specified number of files to directory.
38 | 
39 |     This function works in a distributed fashion. Each rank will only stage
40 |     its chunk of the file list.
41 |     """
42 |     if rank == 0:
43 |         logging.info(f'Staging {n_files} files to {output_dir}')
44 | 
45 |     # Find all the files in the input directory
46 |     files = sorted(os.listdir(input_dir))
47 | 
48 |     # Make sure there are at least enough files available
49 |     if len(files) < n_files:
50 |         raise ValueError(f'Cannot stage {n_files} files; only {len(files)} available')
51 | 
52 |     # Take the specified number of files
53 |     files = files[:n_files]
54 | 
55 |     # Copy my chunk into the output directory
56 |     os.makedirs(output_dir, exist_ok=True)
57 |     for f in files[rank::size]:
58 |         logging.debug(f'Staging file {f}')
59 |         shutil.copyfile(os.path.join(input_dir, f),
60 |                         os.path.join(output_dir, f))
61 |     logging.debug('Data staging completed')
62 | 


--------------------------------------------------------------------------------
/deepcam/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2020 NVIDIA CORPORATION. All rights reserved.
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of
 6 | this software and associated documentation files (the "Software"), to deal in
 7 | the Software without restriction, including without limitation the rights to
 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
 9 | the Software, and to permit persons to whom the Software is furnished to do so,
10 | subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 | 


--------------------------------------------------------------------------------
/deepcam/compliance/1.0.0/closed_deepcam_adam.yml:
--------------------------------------------------------------------------------
 1 | # General Settings
 2 | - KEY:
 3 |     NAME:  seed
 4 |     REQ:   EXACTLY_ONE
 5 |     CHECK: " v['value'] > 0"
 6 | 
 7 | - KEY:
 8 |     NAME:  global_batch_size
 9 |     REQ:   EXACTLY_ONE
10 |     CHECK: " v['value'] > 0"
11 |     
12 | - KEY:
13 |     NAME:  num_workers
14 |     REQ:   EXACTLY_ONE
15 |     CHECK: " v['value'] > 0"
16 | 
17 | - KEY:
18 |     NAME:  batchnorm_group_size
19 |     REQ:   EXACTLY_ONE
20 |     CHECK: " v['value'] > 0"
21 | 
22 | - KEY:
23 |     NAME:  gradient_accumulation_frequency
24 |     REQ:   EXACTLY_ONE
25 |     CHECK: " v['value'] > 0"
26 | 
27 | 
28 | # Optimizer Parameters
29 | - KEY:
30 |     NAME:  opt_name
31 |     REQ:   EXACTLY_ONE
32 |     CHECK: " v['value'] in ['Adam', 'AdamW']"
33 |     POST:  " s['opt_name'] = v['value'] "
34 | 
35 | - KEY:
36 |     NAME:  opt_lr
37 |     REQ:   EXACTLY_ONE
38 |     CHECK: " v['value'] >0."
39 | 
40 | - KEY:
41 |     NAME:  opt_betas
42 |     REQ:   EXACTLY_ONE
43 |     CHECK: " len(v['value']) == 2"
44 | 
45 | - KEY:
46 |     NAME:  opt_eps
47 |     REQ:   EXACTLY_ONE
48 |     CHECK: " math.isclose(v['value'], 1e-6)"
49 | 
50 | 
51 | # Scheduler Parameters
52 | - KEY:
53 |     NAME:  scheduler_type
54 |     REQ:   EXACTLY_ONE
55 |     CHECK: " v['value'] in ['multistep', 'cosine_annealing']"
56 |     POST:  " s['scheduler_type'] = v['value'] "
57 | 
58 | - KEY:
59 |     NAME:  scheduler_milestones
60 |     REQ:   AT_LEAST_ONE_OR(scheduler_t_max)
61 |     CHECK: " len(v['value']) >= 0 if s['scheduler_type'] == 'multistep' else True "
62 | 
63 | - KEY:
64 |     NAME:  scheduler_decay_rate
65 |     REQ:   AT_LEAST_ONE_OR(scheduler_t_max)
66 |     CHECK: " v['value'] <= 1. if s['scheduler_type'] == 'multistep' else True "
67 | 
68 | - KEY:
69 |     NAME:  scheduler_t_max
70 |     REQ:   AT_LEAST_ONE_OR(scheduler_milestones)
71 |     CHECK: " v['value'] >= 1. if s['scheduler_type'] == 'cosine_annealing' else True "
72 | 
73 | - kEY:
74 |     NAME:  scheduler_eta_min
75 |     REQ:   AT_LEAST_ONE_OR(scheduler_milestones)
76 |     CHECK: " v['value'] >= 0. if s['scheduler_type'] == 'cosine_annealing' else True "
77 | 
78 | - KEY:
79 |     NAME:  scheduler_lr_warmup_steps
80 |     REQ:   EXACTLY_ONE
81 |     CHECK: " v['value'] >= 0 "
82 | 
83 | - KEY:
84 |     NAME:  scheduler_lr_warmup_factor
85 |     REQ:   EXACTLY_ONE
86 |     CHECK: " v['value'] >= 1. "
87 | 
88 | # Dataset Properties
89 | - KEY:
90 |     NAME:  train_samples
91 |     REQ:   EXACTLY_ONE
92 |     CHECK: " v['value'] == 121266"
93 | 
94 | - KEY:
95 |     NAME:  eval_samples
96 |     REQ:   EXACTLY_ONE
97 |     CHECK: " v['value'] == 15158"
98 | 
99 | 


--------------------------------------------------------------------------------
/deepcam/compliance/1.0.0/closed_deepcam_lamb.yml:
--------------------------------------------------------------------------------
  1 | # General Settings
  2 | - KEY:
  3 |     NAME:  seed
  4 |     REQ:   EXACTLY_ONE
  5 |     CHECK: " v['value'] > 0"
  6 | 
  7 | - KEY:
  8 |     NAME:  global_batch_size
  9 |     REQ:   EXACTLY_ONE
 10 |     CHECK: " v['value'] > 0"
 11 |     
 12 | - KEY:
 13 |     NAME:  num_workers
 14 |     REQ:   EXACTLY_ONE
 15 |     CHECK: " v['value'] > 0"
 16 | 
 17 | - KEY:
 18 |     NAME:  batchnorm_group_size
 19 |     REQ:   EXACTLY_ONE
 20 |     CHECK: " v['value'] > 0"
 21 | 
 22 | - KEY:
 23 |     NAME:  gradient_accumulation_frequency
 24 |     REQ:   EXACTLY_ONE
 25 |     CHECK: " v['value'] > 0"
 26 | 
 27 | # Optimizer Parameters
 28 | - KEY:
 29 |     NAME:  opt_name
 30 |     REQ:   EXACTLY_ONE
 31 |     CHECK: " v['value'] == 'LAMB'"
 32 | 
 33 | - KEY:
 34 |     NAME:  opt_lr
 35 |     REQ:   EXACTLY_ONE
 36 |     CHECK: " v['value'] >0."
 37 | 
 38 | - KEY:
 39 |     NAME:  opt_betas
 40 |     REQ:   EXACTLY_ONE
 41 |     CHECK: " len(v['value']) == 2"
 42 | 
 43 | - KEY:
 44 |     NAME:  opt_eps
 45 |     REQ:   EXACTLY_ONE
 46 |     CHECK: " math.isclose(v['value'], 1e-6)"
 47 | 
 48 | - KEY:
 49 |     NAME:  opt_bias_correction
 50 |     REQ:   EXACTLY_ONE
 51 |     CHECK: " v['value'] "
 52 | 
 53 | - KEY:
 54 |     NAME:  opt_grad_averaging
 55 |     REQ:   EXACTLY_ONE
 56 |     CHECK: " v['value'] "
 57 | 
 58 | - KEY:
 59 |     NAME:  opt_max_grad_norm
 60 |     REQ:   EXACTLY_ONE
 61 |     CHECK: " v['value'] == 1."
 62 | 
 63 | 
 64 | # Scheduler Parameters
 65 | - KEY:
 66 |     NAME:  scheduler_type
 67 |     REQ:   EXACTLY_ONE
 68 |     CHECK: " v['value'] in ['multistep', 'cosine_annealing']"
 69 |     POST:  " s['scheduler_type'] = v['value'] "
 70 | 
 71 | - KEY:
 72 |     NAME:  scheduler_milestones
 73 |     REQ:   AT_LEAST_ONE_OR(scheduler_t_max)
 74 |     CHECK: " len(v['value']) >= 0 if s['scheduler_type'] == 'multistep' else True "
 75 | 
 76 | - KEY:
 77 |     NAME:  scheduler_decay_rate
 78 |     REQ:   AT_LEAST_ONE_OR(scheduler_t_max)
 79 |     CHECK: " v['value'] <= 1. if s['scheduler_type'] == 'multistep' else True "
 80 | 
 81 | - KEY:
 82 |     NAME:  scheduler_t_max
 83 |     REQ:   AT_LEAST_ONE_OR(scheduler_milestones)
 84 |     CHECK: " v['value'] >= 1. if s['scheduler_type'] == 'cosine_annealing' else True "
 85 | 
 86 | - kEY:
 87 |     NAME:  scheduler_eta_min
 88 |     REQ:   AT_LEAST_ONE_OR(scheduler_milestones)
 89 |     CHECK: " v['value'] >= 0. if s['scheduler_type'] == 'cosine_annealing' else True "
 90 | 
 91 | - KEY:
 92 |     NAME:  scheduler_lr_warmup_steps
 93 |     REQ:   EXACTLY_ONE
 94 |     CHECK: " v['value'] >= 0 "
 95 | 
 96 | - KEY:
 97 |     NAME:  scheduler_lr_warmup_factor
 98 |     REQ:   EXACTLY_ONE
 99 |     CHECK: " v['value'] >= 1. "
100 | 
101 | # Dataset Properties
102 | - KEY:
103 |     NAME:  train_samples
104 |     REQ:   EXACTLY_ONE
105 |     CHECK: " v['value'] == 121266"
106 | 
107 | - KEY:
108 |     NAME:  eval_samples
109 |     REQ:   EXACTLY_ONE
110 |     CHECK: " v['value'] == 15158"
111 | 
112 | 


--------------------------------------------------------------------------------
/deepcam/docker/Dockerfile:
--------------------------------------------------------------------------------
 1 | # The MIT License (MIT)
 2 | #
 3 | # Copyright (c) 2020 NVIDIA CORPORATION. All rights reserved.
 4 | #
 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy of
 6 | # this software and associated documentation files (the "Software"), to deal in
 7 | # the Software without restriction, including without limitation the rights to
 8 | # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
 9 | # the Software, and to permit persons to whom the Software is furnished to do so,
10 | # subject to the following conditions:
11 | #
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 | #
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17 | # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18 | # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19 | # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 | 
22 | FROM nvcr.io/nvidia/pytorch:21.12-py3
23 | 
24 | # some requirements
25 | RUN pip install h5py==3.2.1
26 | 
27 | #install mpi4py
28 | RUN pip install mpi4py==3.0.3
29 | 
30 | #install mlperf logging
31 | RUN pip install "git+https://github.com/mlperf/logging.git@501bbde47f005d67c6357da6e5c1931eab339f8e"
32 | 
33 | #copy additional stuff
34 | COPY src/deepCam /opt/deepCam
35 | COPY src/utils /opt/utils
36 | 
37 | #create additional folders for mapping data in
38 | RUN mkdir -p /data
39 | 


--------------------------------------------------------------------------------
/deepcam/docker/build_docker.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # The MIT License (MIT)
 4 | #
 5 | # Copyright (c) 2020 NVIDIA CORPORATION. All rights reserved.
 6 | #
 7 | # Permission is hereby granted, free of charge, to any person obtaining a copy of
 8 | # this software and associated documentation files (the "Software"), to deal in
 9 | # the Software without restriction, including without limitation the rights to
10 | # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
11 | # the Software, and to permit persons to whom the Software is furnished to do so,
12 | # subject to the following conditions:
13 | #
14 | # The above copyright notice and this permission notice shall be included in all
15 | # copies or substantial portions of the Software.
16 | #
17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
19 | # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
20 | # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
21 | # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 | 
24 | #we need to step out to expand the build context
25 | cd ..
26 | 
27 | #training container
28 | docker build -t mlperf-deepcam:latest -f docker/Dockerfile .
29 | 


--------------------------------------------------------------------------------
/deepcam/requirements.txt:
--------------------------------------------------------------------------------
1 | apex==0.1
2 | torch=1.8.1
3 | h5py==3.2.1
4 | mpi4py==3.0.3
5 | warmup-scheduler @ git+https://github.com/ildoonet/pytorch-gradual-warmup-lr.git@6b5e8953a80aef5b324104dc0c2e9b8c34d622bd
6 | mlperf-logging @ git+https://github.com/mlperf/logging.git@501bbde47f005d67c6357da6e5c1931eab339f8e
7 | 


--------------------------------------------------------------------------------
/deepcam/src/deepCam/architecture/__init__.py:
--------------------------------------------------------------------------------
 1 | # The MIT License (MIT)
 2 | #
 3 | # Copyright (c) 2018 Pyjcsx
 4 | # Modifications Copyright (c) 2020 NVIDIA CORPORATION. All rights reserved.
 5 | #
 6 | # Permission is hereby granted, free of charge, to any person obtaining a copy of
 7 | # this software and associated documentation files (the "Software"), to deal in
 8 | # the Software without restriction, including without limitation the rights to
 9 | # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
10 | # the Software, and to permit persons to whom the Software is furnished to do so,
11 | # subject to the following conditions:
12 | #
13 | # The above copyright notice and this permission notice shall be included in all
14 | # copies or substantial portions of the Software.
15 | #
16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
18 | # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
19 | # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
20 | # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/deepcam/src/deepCam/driver/__init__.py:
--------------------------------------------------------------------------------
 1 | # The MIT License (MIT)
 2 | #
 3 | # Copyright (c) 2020 NVIDIA CORPORATION. All rights reserved.
 4 | #
 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy of
 6 | # this software and associated documentation files (the "Software"), to deal in
 7 | # the Software without restriction, including without limitation the rights to
 8 | # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
 9 | # the Software, and to permit persons to whom the Software is furnished to do so,
10 | # subject to the following conditions:
11 | #
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 | #
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17 | # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18 | # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19 | # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.   
21 | 
22 | from .trainer import train_epoch
23 | from .validation import validate
24 | 


--------------------------------------------------------------------------------
/deepcam/src/deepCam/run_scripts/run_training.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # The MIT License (MIT)
 4 | #
 5 | # Copyright (c) 2020 NVIDIA CORPORATION. All rights reserved.
 6 | #
 7 | # Permission is hereby granted, free of charge, to any person obtaining a copy of
 8 | # this software and associated documentation files (the "Software"), to deal in
 9 | # the Software without restriction, including without limitation the rights to
10 | # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
11 | # the Software, and to permit persons to whom the Software is furnished to do so,
12 | # subject to the following conditions:
13 | #
14 | # The above copyright notice and this permission notice shall be included in all
15 | # copies or substantial portions of the Software.
16 | #
17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
19 | # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
20 | # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
21 | # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 | 
24 | # parameters
25 | data_dir=""
26 | output_dir=""
27 | run_tag="test_run"
28 | local_batch_size=2
29 | 
30 | python ./train.py \
31 |        --wireup_method "dummy" \
32 |        --run_tag ${run_tag} \
33 |        --data_dir_prefix ${data_dir} \
34 |        --output_dir ${output_dir} \
35 |        --model_prefix "segmentation" \
36 |        --optimizer "LAMB" \
37 |        --adam_eps 1e-6 \
38 |        --start_lr 0.0055 \
39 |        --lr_schedule type="multistep",milestones="800",decay_rate="0.1" \
40 |        --lr_warmup_steps 400 \
41 |        --lr_warmup_factor 1. \
42 |        --weight_decay 1e-2 \
43 |        --logging_frequency 10 \
44 |        --save_frequency 0 \
45 |        --max_epochs 200 \
46 |        --max_inter_threads 4 \
47 |        --seed $(date +%s) \
48 |        --batchnorm_group_size 1 \
49 |        --local_batch_size ${local_batch_size}
50 | 


--------------------------------------------------------------------------------
/deepcam/src/deepCam/run_scripts/run_training_nranks1024.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # The MIT License (MIT)
 4 | #
 5 | # Copyright (c) 2020 NVIDIA CORPORATION. All rights reserved.
 6 | #
 7 | # Permission is hereby granted, free of charge, to any person obtaining a copy of
 8 | # this software and associated documentation files (the "Software"), to deal in
 9 | # the Software without restriction, including without limitation the rights to
10 | # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
11 | # the Software, and to permit persons to whom the Software is furnished to do so,
12 | # subject to the following conditions:
13 | #
14 | # The above copyright notice and this permission notice shall be included in all
15 | # copies or substantial portions of the Software.
16 | #
17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
19 | # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
20 | # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
21 | # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 | 
24 | #ranks per node
25 | totalranks=1024
26 | local_batch_size=2
27 | data_dir=""
28 | output_dir=""
29 | run_tag="test_run_nranks-1024"
30 | 
31 | mpirun -np ${totalranks} \
32 |        python ./train.py \
33 |        --wireup_method "nccl-openmpi" \
34 |        --run_tag ${run_tag} \
35 |        --data_dir_prefix ${data_dir} \
36 |        --output_dir ${output_dir} \
37 |        --model_prefix "segmentation" \
38 |        --optimizer "LAMB" \
39 |        --adam_eps 1e-6 \
40 |        --start_lr 0.0055 \
41 |        --lr_schedule type="multistep",milestones="800",decay_rate="0.1" \
42 |        --lr_warmup_steps 400 \
43 |        --lr_warmup_factor 1. \
44 |        --weight_decay 1e-2 \
45 |        --logging_frequency 10 \
46 |        --save_frequency 0 \
47 |        --max_epochs 200 \
48 |        --max_inter_threads 4 \
49 |        --seed $(date +%s) \
50 |        --batchnorm_group_size 1 \
51 |        --local_batch_size ${local_batch_size}
52 | 


--------------------------------------------------------------------------------
/deepcam/src/deepCam/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/hpc/2c627d457004eff77a014205b3151ed48a6fa149/deepcam/src/deepCam/utils/__init__.py


--------------------------------------------------------------------------------
/deepcam/src/deepCam/utils/metric.py:
--------------------------------------------------------------------------------
 1 | # The MIT License (MIT)
 2 | #
 3 | # Copyright (c) 2020 NVIDIA CORPORATION. All rights reserved.
 4 | #
 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy of
 6 | # this software and associated documentation files (the "Software"), to deal in
 7 | # the Software without restriction, including without limitation the rights to
 8 | # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
 9 | # the Software, and to permit persons to whom the Software is furnished to do so,
10 | # subject to the following conditions:
11 | #
12 | # The above copyright notice and this permission notice shall be included in all
13 | # copies or substantial portions of the Software.
14 | #
15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17 | # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18 | # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19 | # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.   
21 | 
22 | import torch
23 | from torch import Tensor
24 | 
25 | 
26 | def compute_score(prediction: Tensor, gt: Tensor, num_classes: int) -> Tensor:
27 |     # flatten input
28 |     batch_size = gt.shape[0]
29 |     tpt = torch.zeros((batch_size, num_classes), dtype=torch.long, device=prediction.device)
30 |     fpt = torch.zeros((batch_size, num_classes), dtype=torch.long, device=prediction.device)
31 |     fnt = torch.zeros((batch_size, num_classes), dtype=torch.long, device=prediction.device)
32 |     
33 |     # create views:
34 |     pv = prediction.view(batch_size, -1)
35 |     gtv = gt.view(batch_size, -1)
36 |     
37 |     # compute per class accuracy
38 |     for j in range(0, num_classes):
39 |         # compute helper tensors
40 |         pv_eq_j = (pv == j)
41 |         pv_ne_j = (pv != j)
42 |         gtv_eq_j = (gtv == j)
43 |         gtv_ne_j = (gtv != j)
44 |         
45 |         #true positve: prediction and gt agree and gt is of class j: (p == j) & (g == j)
46 |         tpt[:, j] = torch.sum(torch.logical_and(pv_eq_j, gtv_eq_j), dim=1)
47 |         
48 |         #false positive: prediction is of class j and gt not of class j: (p == j) & (g != j)
49 |         fpt[:, j] = torch.sum(torch.logical_and(pv_eq_j, gtv_ne_j), dim=1)
50 | 
51 |         #false negative: prediction is not of class j and gt is of class j: (p != j) & (g == j)
52 |         fnt[:, j] = torch.sum(torch.logical_and(pv_ne_j, gtv_eq_j), dim=1)
53 |         
54 |     # compute IoU per batch
55 |     uniont = (tpt + fpt + fnt) * num_classes
56 |     iout = torch.sum(torch.nan_to_num(tpt.float() / uniont.float(), nan=1./float(num_classes)), dim=1)
57 |         
58 |     # average over batch dim
59 |     iout = torch.mean(iout)
60 |     
61 |     return iout
62 | 


--------------------------------------------------------------------------------
/deepcam/src/deepCam/utils/types.py:
--------------------------------------------------------------------------------
 1 | # Ported from torch_optimizer 0.0.1a15: https://pypi.org/project/torch-optimizer/
 2 | 
 3 | from typing import Any, Callable, Dict, Iterable, Optional, Tuple, Union
 4 | 
 5 | from torch import Tensor
 6 | 
 7 | Params = Union[Iterable[Tensor], Iterable[Dict[str, Any]]]
 8 | 
 9 | LossClosure = Callable[[], float]
10 | OptLossClosure = Optional[LossClosure]
11 | Betas2 = Tuple[float, float]
12 | State = Dict[str, Any]
13 | OptFloat = Optional[float]
14 | Nus2 = Tuple[float, float]
15 | 


--------------------------------------------------------------------------------
/deepcam/src/utils/run_stage.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # data dir
 4 | mode=$1
 5 | src_dir=$2
 6 | dest_dir=$3
 7 | 
 8 | # step into source dir
 9 | cd $(dirname ${src_dir})
10 | 
11 | # tar
12 | if [ "${mode}" == "tar" ]; then
13 |     /opt/mpifileutils/bin/dtar -cf $(basename ${src_dir}).tar $(basename ${src_dir})
14 | fi
15 | 
16 | # zip
17 | if [ "${mode}" == "compress" ]; then
18 |     /opt/mpifileutils/bin/dbz2 -z $(basename ${src_dir}).tar
19 | fi
20 | 
21 | # bcast
22 | if [ "${mode}" == "broadcast" ]; then
23 |     if [ -f $(basename ${src_dir}).dbz2 ]; then
24 | 	srcfile=$(basename ${src_dir}).tar.dbz2
25 |     else
26 | 	srcfile=$(basename ${src_dir}).tar
27 |     fi
28 |     /opt/mpifileutils/bin/dbcast ${srcfile} ${dest_dir}/$(basename ${srcfile})
29 | fi
30 | 
31 | # untar
32 | if [ "${mode}" == "untar" ]; then
33 |     local_rank=$(( ${PMIX_RANK} % 8 ))
34 |     if [ "${local_rank}" == "0" ]; then
35 | 	time tar -xf ${dest_dir}/$(basename ${src_dir}).tar
36 |     fi
37 | fi
38 | 


--------------------------------------------------------------------------------
/deepcam/src/utils/run_summarize_circe.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #SBATCH -A hpc
 3 | #SBATCH -J summarize_cam5
 4 | #SBATCH -t 01:00:00
 5 | 
 6 | # The MIT License (MIT)
 7 | #
 8 | # Copyright (c) 2020 NVIDIA CORPORATION. All rights reserved.
 9 | #
10 | # Permission is hereby granted, free of charge, to any person obtaining a copy of
11 | # this software and associated documentation files (the "Software"), to deal in
12 | # the Software without restriction, including without limitation the rights to
13 | # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
14 | # the Software, and to permit persons to whom the Software is furnished to do so,
15 | # subject to the following conditions:
16 | #
17 | # The above copyright notice and this permission notice shall be included in all
18 | # copies or substantial portions of the Software.
19 | #
20 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
22 | # FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
23 | # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
24 | # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
25 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 | 
27 | rankspernode=48
28 | totalranks=$(( ${SLURM_NNODES} * ${rankspernode} ))
29 | 
30 | srun --wait=60 --mpi=pmix -N ${SLURM_NNODES} -n ${totalranks} -c $(( 96 / ${rankspernode} )) \
31 |      --container-workdir=/opt/utils \
32 |      --container-mounts=/gpfs/fs1/tkurth/cam5_dataset/All-Hist:/data \
33 |      --container-image=gitlab-master.nvidia.com/tkurth/mlperf-deepcam:debug \
34 |      python summarize_data.py
35 | 


--------------------------------------------------------------------------------
/open_catalyst/.circleci/config.yml:
--------------------------------------------------------------------------------
 1 | version: 2.1
 2 | 
 3 | jobs:
 4 |   build:
 5 |     docker:
 6 |       - image: circleci/python:3.7
 7 | 
 8 |     steps:
 9 |       - checkout
10 | 
11 |       - restore_cache:
12 |           keys:
13 |           - v0.3-dependencies-{{ checksum "env.common.yml" }}-{{ checksum "env.cpu.yml" }}-{{ checksum "env.gpu.yml" }}
14 | 
15 |       - run:
16 |           name: Install conda
17 |           command: |
18 |             if [ ! -d "/home/circleci/miniconda" ]; then
19 |               wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh
20 |               bash miniconda.sh -b -p "$HOME"/miniconda
21 |               source /home/circleci/miniconda/etc/profile.d/conda.sh
22 |               conda activate base
23 |               # Conda configuration
24 |               conda config --set always_yes yes --set auto_update_conda false
25 |               # Update conda
26 |               conda update conda
27 |             fi
28 |       - run:
29 |           name: Create environment
30 |           command: |
31 |             if [ ! -d "/home/circleci/miniconda/envs/ocp-models" ]; then
32 |               source /home/circleci/miniconda/etc/profile.d/conda.sh
33 |               conda activate base
34 |               conda install -c conda-forge conda-merge
35 |               conda-merge env.common.yml env.cpu.yml > env.yml
36 |               conda env create -f env.yml
37 |             fi
38 |       - save_cache:
39 |           paths:
40 |             - /home/circleci/miniconda
41 |           key: v0.3-dependencies-{{ checksum "env.common.yml" }}-{{ checksum "env.cpu.yml" }}-{{ checksum "env.gpu.yml" }}
42 | 
43 |       - run:
44 |           name: Run tests
45 |           command: |
46 |             source /home/circleci/miniconda/etc/profile.d/conda.sh
47 |             conda activate ocp-models
48 |             pip install -e .
49 |             pre-commit install
50 |             pytest /home/circleci/project/tests
51 | 
52 |       - run:
53 |           name: Run black
54 |           command: |
55 |             source /home/circleci/miniconda/etc/profile.d/conda.sh
56 |             conda activate ocp-models
57 |             pip install black==20.8b1
58 |             black . --check
59 | 


--------------------------------------------------------------------------------
/open_catalyst/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | ignore = E203, E266, E501, E731, W503, F403, F401
3 | max-line-length = 79
4 | max-complexity = 18
5 | select = B,C,E,F,W,T4,B9
6 | 


--------------------------------------------------------------------------------
/open_catalyst/.gitignore:
--------------------------------------------------------------------------------
  1 | wandb
  2 | data
  3 | checkpoints
  4 | results
  5 | *.traj
  6 | experimental
  7 | 
  8 | # Byte-compiled / optimized / DLL files
  9 | __pycache__/
 10 | *.py[cod]
 11 | *$py.class
 12 | 
 13 | # C extensions
 14 | *.so
 15 | 
 16 | # Distribution / packaging
 17 | .Python
 18 | env/
 19 | build/
 20 | develop-eggs/
 21 | dist/
 22 | downloads/
 23 | eggs/
 24 | .eggs/
 25 | lib/
 26 | lib64/
 27 | parts/
 28 | sdist/
 29 | var/
 30 | *.egg-info/
 31 | .installed.cfg
 32 | *.egg
 33 | 
 34 | # PyInstaller
 35 | #  Usually these files are written by a python script from a template
 36 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 37 | *.manifest
 38 | *.spec
 39 | 
 40 | # Installer logs
 41 | pip-log.txt
 42 | pip-delete-this-directory.txt
 43 | 
 44 | # Unit test / coverage reports
 45 | htmlcov/
 46 | .tox/
 47 | .coverage
 48 | .coverage.*
 49 | .cache
 50 | nosetests.xml
 51 | coverage.xml
 52 | *,cover
 53 | .hypothesis/
 54 | 
 55 | # Translations
 56 | *.mo
 57 | *.pot
 58 | 
 59 | # Django stuff:
 60 | *.log
 61 | local_settings.py
 62 | 
 63 | # Flask stuff:
 64 | instance/
 65 | .webassets-cache
 66 | 
 67 | # Scrapy stuff:
 68 | .scrapy
 69 | 
 70 | # Sphinx documentation
 71 | docs/_build/
 72 | docs/source/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # IPython Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # pyenv
 81 | .python-version
 82 | 
 83 | # celery beat schedule file
 84 | celerybeat-schedule
 85 | 
 86 | # dotenv
 87 | .env
 88 | 
 89 | # virtualenv
 90 | venv/
 91 | ENV/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | 
 96 | # Rope project settings
 97 | .ropeproject
 98 | 
 99 | # User directories
100 | Local
101 | 
102 | # .DS_Store
103 | .DS_Store
104 | 
105 | # VIM swap files
106 | *.swp
107 | 
108 | # PyCharm
109 | .idea/
110 | 
111 | # VS Code
112 | .vscode/
113 | 


--------------------------------------------------------------------------------
/open_catalyst/.isort.cfg:
--------------------------------------------------------------------------------
1 | [settings]
2 | multi_line_output=3
3 | include_trailing_comma=True
4 | force_grid_wrap=0
5 | use_parentheses=True
6 | line_length=79
7 | 


--------------------------------------------------------------------------------
/open_catalyst/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 | -   repo: https://github.com/ambv/black
 3 |     rev: 20.8b1
 4 |     hooks:
 5 |     - id: black
 6 |       language_version: python3.8
 7 | -   repo: https://github.com/pre-commit/pre-commit-hooks
 8 |     rev: v2.3.0
 9 |     hooks:
10 |     - id: flake8
11 |     - id: trailing-whitespace
12 |     - id: check-added-large-files
13 |     - id: end-of-file-fixer
14 | -   repo: https://github.com/pre-commit/mirrors-isort
15 |     rev: v5.9.1
16 |     hooks:
17 |     -   id: isort
18 |         args: ["--profile", "black", "--filter-files"]
19 | 


--------------------------------------------------------------------------------
/open_catalyst/LICENSE.md:
--------------------------------------------------------------------------------
 1 | 
 2 | MIT License
 3 | 
 4 | Copyright (c) Facebook, Inc. and its affiliates.
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy
 7 | of this software and associated documentation files (the "Software"), to deal
 8 | in the Software without restriction, including without limitation the rights
 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 | 


--------------------------------------------------------------------------------
/open_catalyst/configs/is2re/100k/base.yml:
--------------------------------------------------------------------------------
 1 | trainer: energy
 2 | 
 3 | dataset:
 4 |   - src: data/is2re/100k/train/data.lmdb
 5 |     normalize_labels: True
 6 |     target_mean: -1.525913953781128
 7 |     target_std: 2.279365062713623
 8 |   - src: data/is2re/all/val_id/data.lmdb
 9 | 
10 | logger: tensorboard
11 | 
12 | task:
13 |   dataset: single_point_lmdb
14 |   description: "Relaxed state energy prediction from initial structure."
15 |   type: regression
16 |   metric: mae
17 |   labels:
18 |     - relaxed energy
19 | 


--------------------------------------------------------------------------------
/open_catalyst/configs/is2re/100k/cgcnn/cgcnn.yml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - configs/is2re/100k/base.yml
 3 | 
 4 | model:
 5 |   name: cgcnn
 6 |   atom_embedding_size: 384
 7 |   fc_feat_size: 128
 8 |   num_fc_layers: 4
 9 |   num_graph_conv_layers: 5
10 |   num_gaussians: 100
11 |   cutoff: 6.0
12 |   regress_forces: False
13 |   use_pbc: True
14 | 
15 | # *** Important note ***
16 | #   The total number of gpus used for this run was 1.
17 | #   If the global batch size (num_gpus * batch_size) is modified
18 | #   the lr_milestones and warmup_steps need to be adjusted accordingly.
19 | 
20 | optim:
21 |   batch_size: 16
22 |   eval_batch_size: 16
23 |   num_workers: 16
24 |   lr_initial: 0.01
25 |   lr_gamma: 0.1
26 |   lr_milestones: # steps at which lr_initial <- lr_initial * lr_gamma
27 |     - 31250
28 |     - 56250
29 |     - 75000
30 |   warmup_steps: 18750
31 |   warmup_factor: 0.2
32 |   max_epochs: 30
33 | 


--------------------------------------------------------------------------------
/open_catalyst/configs/is2re/100k/dimenet_plus_plus/dpp.yml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - configs/is2re/100k/base.yml
 3 | 
 4 | model:
 5 |   name: dimenetplusplus
 6 |   hidden_channels: 256
 7 |   out_emb_channels: 192
 8 |   num_blocks: 3
 9 |   cutoff: 6.0
10 |   num_radial: 6
11 |   num_spherical: 7
12 |   num_before_skip: 1
13 |   num_after_skip: 2
14 |   num_output_layers: 3
15 |   regress_forces: False
16 |   use_pbc: True
17 | 
18 | # *** Important note ***
19 | #   The total number of gpus used for this run was 1.
20 | #   If the global batch size (num_gpus * batch_size) is modified
21 | #   the lr_milestones and warmup_steps need to be adjusted accordingly.
22 | 
23 | optim:
24 |   batch_size: 2
25 |   eval_batch_size: 2
26 |   num_workers: 2
27 |   lr_initial: 0.0001
28 |   lr_gamma: 0.1
29 |   lr_milestones: # steps at which lr_initial <- lr_initial * lr_gamma
30 |     - 200000
31 |     - 400000
32 |     - 600000
33 |   warmup_steps: 100000
34 |   warmup_factor: 0.2
35 |   max_epochs: 20
36 | 


--------------------------------------------------------------------------------
/open_catalyst/configs/is2re/100k/schnet/schnet.yml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - configs/is2re/100k/base.yml
 3 | 
 4 | model:
 5 |   name: schnet
 6 |   hidden_channels: 384
 7 |   num_filters: 128
 8 |   num_interactions: 4
 9 |   num_gaussians: 100
10 |   cutoff: 6.0
11 |   use_pbc: True
12 |   regress_forces: False
13 | 
14 | # *** Important note ***
15 | #   The total number of gpus used for this run was 1.
16 | #   If the global batch size (num_gpus * batch_size) is modified
17 | #   the lr_milestones and warmup_steps need to be adjusted accordingly.
18 | 
19 | optim:
20 |   batch_size: 32
21 |   eval_batch_size: 32
22 |   num_workers: 16
23 |   lr_initial: 0.0005
24 |   lr_gamma: 0.1
25 |   lr_milestones: # steps at which lr_initial <- lr_initial * lr_gamma
26 |     - 15625
27 |     - 31250
28 |     - 46875
29 |   warmup_steps: 9375
30 |   warmup_factor: 0.2
31 |   max_epochs: 30
32 | 


--------------------------------------------------------------------------------
/open_catalyst/configs/is2re/10k/base.yml:
--------------------------------------------------------------------------------
 1 | trainer: energy
 2 | 
 3 | dataset:
 4 |   - src: data/is2re/10k/train/data.lmdb
 5 |     normalize_labels: True
 6 |     target_mean: -1.525913953781128
 7 |     target_std: 2.279365062713623
 8 |   - src: data/is2re/all/val_id/data.lmdb
 9 | 
10 | logger: tensorboard
11 | 
12 | task:
13 |   dataset: single_point_lmdb
14 |   description: "Relaxed state energy prediction from initial structure."
15 |   type: regression
16 |   metric: mae
17 |   labels:
18 |     - relaxed energy
19 | 


--------------------------------------------------------------------------------
/open_catalyst/configs/is2re/10k/cgcnn/cgcnn.yml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - configs/is2re/10k/base.yml
 3 | 
 4 | model:
 5 |   name: cgcnn
 6 |   atom_embedding_size: 128
 7 |   fc_feat_size: 256
 8 |   num_fc_layers: 4
 9 |   num_graph_conv_layers: 5
10 |   num_gaussians: 100
11 |   cutoff: 6.0
12 |   regress_forces: False
13 |   use_pbc: True
14 | 
15 | # *** Important note ***
16 | #   The total number of gpus used for this run was 1.
17 | #   If the global batch size (num_gpus * batch_size) is modified
18 | #   the lr_milestones and warmup_steps need to be adjusted accordingly.
19 | 
20 | optim:
21 |   batch_size: 64
22 |   eval_batch_size: 64
23 |   num_workers: 16
24 |   lr_initial: 0.01
25 |   lr_gamma: 0.1
26 |   lr_milestones: # steps at which lr_initial <- lr_initial * lr_gamma
27 |     - 781
28 |     - 1406
29 |     - 2031
30 |   warmup_steps: 468
31 |   warmup_factor: 0.2
32 |   max_epochs: 20
33 | 


--------------------------------------------------------------------------------
/open_catalyst/configs/is2re/10k/dimenet_plus_plus/dpp.yml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - configs/is2re/10k/base.yml
 3 | 
 4 | model:
 5 |   name: dimenetplusplus
 6 |   hidden_channels: 256
 7 |   out_emb_channels: 192
 8 |   num_blocks: 3
 9 |   cutoff: 6.0
10 |   num_radial: 6
11 |   num_spherical: 7
12 |   num_before_skip: 1
13 |   num_after_skip: 2
14 |   num_output_layers: 3
15 |   regress_forces: False
16 |   use_pbc: True
17 | 
18 | # *** Important note ***
19 | #   The total number of gpus used for this run was 1.
20 | #   If the global batch size (num_gpus * batch_size) is modified
21 | #   the lr_milestones and warmup_steps need to be adjusted accordingly.
22 | 
23 | optim:
24 |   batch_size: 2
25 |   eval_batch_size: 2
26 |   num_workers: 2
27 |   lr_initial: 0.0001
28 |   lr_gamma: 0.1
29 |   lr_milestones: # steps at which lr_initial <- lr_initial * lr_gamma
30 |     - 20000
31 |     - 40000
32 |     - 60000
33 |   warmup_steps: 10000
34 |   warmup_factor: 0.2
35 |   max_epochs: 20
36 | 


--------------------------------------------------------------------------------
/open_catalyst/configs/is2re/10k/schnet/schnet.yml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - configs/is2re/10k/base.yml
 3 | 
 4 | model:
 5 |   name: schnet
 6 |   hidden_channels: 256
 7 |   num_filters: 128
 8 |   num_interactions: 3
 9 |   num_gaussians: 100
10 |   cutoff: 6.0
11 |   use_pbc: True
12 |   regress_forces: False
13 | 
14 | # *** Important note ***
15 | #   The total number of gpus used for this run was 1.
16 | #   If the global batch size (num_gpus * batch_size) is modified
17 | #   the lr_milestones and warmup_steps need to be adjusted accordingly.
18 | 
19 | optim:
20 |   batch_size: 64
21 |   eval_batch_size: 64
22 |   num_workers: 16
23 |   lr_initial: 0.005
24 |   lr_gamma: 0.1
25 |   lr_milestones: # epochs at which lr_initial <- lr_initial * lr_gamma
26 |     - 1562
27 |     - 2343
28 |     - 3125
29 |   warmup_steps: 468
30 |   warmup_factor: 0.2
31 |   max_epochs: 30
32 | 


--------------------------------------------------------------------------------
/open_catalyst/configs/is2re/all/base.yml:
--------------------------------------------------------------------------------
 1 | trainer: energy
 2 | 
 3 | dataset:
 4 |   - src: data/is2re/all/train/data.lmdb
 5 |     normalize_labels: True
 6 |     target_mean: -1.525913953781128
 7 |     target_std: 2.279365062713623
 8 |   - src: data/is2re/all/val_id/data.lmdb
 9 | 
10 | logger: tensorboard
11 | 
12 | task:
13 |   dataset: single_point_lmdb
14 |   description: "Relaxed state energy prediction from initial structure."
15 |   type: regression
16 |   metric: mae
17 |   labels:
18 |     - relaxed energy
19 | 


--------------------------------------------------------------------------------
/open_catalyst/configs/is2re/all/cgcnn/cgcnn.yml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - configs/is2re/all/base.yml
 3 | 
 4 | model:
 5 |   name: cgcnn
 6 |   atom_embedding_size: 384
 7 |   fc_feat_size: 512
 8 |   num_fc_layers: 4
 9 |   num_graph_conv_layers: 6
10 |   num_gaussians: 100
11 |   cutoff: 6.0
12 |   regress_forces: False
13 |   use_pbc: True
14 | 
15 | # *** Important note ***
16 | #   The total number of gpus used for this run was 4.
17 | #   If the global batch size (num_gpus * batch_size) is modified
18 | #   the lr_milestones and warmup_steps need to be adjusted accordingly.
19 | 
20 | optim:
21 |   batch_size: 32
22 |   eval_batch_size: 32
23 |   num_workers: 16
24 |   lr_initial: 0.01
25 |   lr_gamma: 0.1
26 |   lr_milestones: # steps at which lr_initial <- lr_initial * lr_gamma
27 |     - 17981
28 |     - 32366
29 |     - 46752
30 |   warmup_steps: 10788
31 |   warmup_factor: 0.2
32 |   max_epochs: 20
33 | 


--------------------------------------------------------------------------------
/open_catalyst/configs/is2re/all/dimenet_plus_plus/dpp.yml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - configs/is2re/all/base.yml
 3 | 
 4 | model:
 5 |   name: dimenetplusplus
 6 |   hidden_channels: 256
 7 |   out_emb_channels: 192
 8 |   num_blocks: 3
 9 |   cutoff: 6.0
10 |   num_radial: 6
11 |   num_spherical: 7
12 |   num_before_skip: 1
13 |   num_after_skip: 2
14 |   num_output_layers: 3
15 |   regress_forces: False
16 |   use_pbc: True
17 | 
18 | # *** Important note ***
19 | #   The total number of gpus used for this run was 4.
20 | #   If the global batch size (num_gpus * batch_size) is modified
21 | #   the lr_milestones and warmup_steps need to be adjusted accordingly.
22 | 
23 | optim:
24 |   batch_size: 4
25 |   eval_batch_size: 4
26 |   num_workers: 4
27 |   lr_initial: 0.0001
28 |   lr_gamma: 0.1
29 |   lr_milestones: # steps at which lr_initial <- lr_initial * lr_gamma
30 |     - 115082
31 |     - 230164
32 |     - 345246
33 |   warmup_steps: 57541
34 |   warmup_factor: 0.2
35 |   max_epochs: 20
36 | 


--------------------------------------------------------------------------------
/open_catalyst/configs/is2re/all/schnet/schnet.yml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - configs/is2re/all/base.yml
 3 | 
 4 | model:
 5 |   name: schnet
 6 |   hidden_channels: 384
 7 |   num_filters: 128
 8 |   num_interactions: 4
 9 |   num_gaussians: 100
10 |   cutoff: 6.0
11 |   use_pbc: True
12 |   regress_forces: False
13 | 
14 | # *** Important note ***
15 | #   The total number of gpus used for this run was 4.
16 | #   If the global batch size (num_gpus * batch_size) is modified
17 | #   the lr_milestones and warmup_steps need to be adjusted accordingly.
18 | 
19 | optim:
20 |   batch_size: 64
21 |   eval_batch_size: 64
22 |   num_workers: 16
23 |   lr_initial: 0.001
24 |   lr_gamma: 0.1
25 |   lr_milestones: # steps at which lr_initial <- lr_initial * lr_gamma
26 |     - 17981
27 |     - 26972
28 |     - 35963
29 |   warmup_steps: 5394
30 |   warmup_factor: 0.2
31 |   max_epochs: 30
32 | 


--------------------------------------------------------------------------------
/open_catalyst/configs/mlperf_hpc.yml:
--------------------------------------------------------------------------------
 1 | trainer: mlperf_forces
 2 | 
 3 | dataset:
 4 |     - src: /global/cfs/cdirs/m1759/catalysis_dl/oc20_data/s2ef/2M/train
 5 |       normalize_labels: True
 6 |       target_mean: -0.7554450631141663
 7 |       target_std: 2.887317180633545
 8 |       grad_target_mean: 0.0
 9 |       grad_target_std: 2.887317180633545
10 |     - src: /global/cfs/cdirs/m1759/catalysis_dl/oc20_data/s2ef/all/val_id
11 | 
12 | logger: wandb
13 | 
14 | task:
15 |     mlperf_benchmark: oc20
16 |     mlperf_org: LBNL
17 |     mlperf_division: closed
18 |     mlperf_status: onprem
19 |     mlperf_platform: SUBMISSION_PLATFORM_PLACEHOLDER
20 |     mlperf_accelerators_per_node: 8
21 |     mlperf_accelerators_per_rank: 1
22 | 
23 |     dataset: trajectory_lmdb
24 |     description: "Regressing to energies and forces for DFT trajectories from OCP"
25 |     type: regression
26 |     metric: mae
27 |     primary_metric: forces_mae
28 |     target_forces_mae: 0.036
29 |     labels:
30 |         - potential energy
31 |     grad_input: atomic forces
32 |     train_on_free_atoms: True
33 |     eval_on_free_atoms: True
34 | 
35 | model:
36 |     name: dimenetplusplus
37 |     hidden_channels: 192
38 |     out_emb_channels: 192
39 |     num_blocks: 3
40 |     cutoff: 6.0
41 |     num_radial: 6
42 |     num_spherical: 7
43 |     num_before_skip: 1
44 |     num_after_skip: 2
45 |     num_output_layers: 3
46 |     regress_forces: True
47 |     use_pbc: True
48 |     #otf_graph: True
49 | 
50 | # These settings optimized for global batch size (batch_size * gpus) = 256
51 | optim:
52 |     batch_size: 8
53 |     eval_batch_size: 8
54 |     num_workers: 8
55 | 
56 |     optimizer: AdamW
57 |     lr_initial: 0.0004
58 |     warmup_steps: 31252 # 4 epochs
59 |     warmup_factor: 0.2
60 |     lr_milestones:
61 |         - 125008 # 16 epochs
62 |         - 187512 # 24 epochs
63 |         - 250016 # 32 epochs
64 |     lr_gamma: 0.1
65 | 
66 |     max_epochs: 30
67 |     energy_coefficient: 0
68 |     force_coefficient: 50
69 |     disable_tqdm: True
70 | 
71 | slurm:
72 |     partition: null
73 |     constraint: gpu
74 |     account: m1759
75 |     qos: special
76 |     time_min: "4:00:00"
77 | 


--------------------------------------------------------------------------------
/open_catalyst/configs/pm_b2048.yml:
--------------------------------------------------------------------------------
 1 | trainer: mlperf_forces
 2 | 
 3 | dataset:
 4 |     - src: /pscratch/sd/s/sfarrell/ocp/data/s2ef/2M/train
 5 |       normalize_labels: True
 6 |       target_mean: -0.7554450631141663
 7 |       target_std: 2.887317180633545
 8 |       grad_target_mean: 0.0
 9 |       grad_target_std: 2.887317180633545
10 |     - src: /pscratch/sd/s/sfarrell/ocp/data/s2ef/all/val_id
11 | 
12 | logger: wandb
13 | 
14 | task:
15 |     mlperf_benchmark: oc20
16 |     mlperf_org: LBNL
17 |     mlperf_division: closed
18 |     mlperf_status: onprem
19 |     mlperf_platform: SUBMISSION_PLATFORM_PLACEHOLDER
20 |     mlperf_accelerators_per_node: 4
21 |     mlperf_accelerators_per_rank: 1
22 | 
23 |     dataset: trajectory_lmdb
24 |     description: "Regressing to energies and forces for DFT trajectories from OCP"
25 |     type: regression
26 |     metric: mae
27 |     primary_metric: forces_mae
28 |     target_forces_mae: 0.036
29 |     labels:
30 |         - potential energy
31 |     grad_input: atomic forces
32 |     train_on_free_atoms: True
33 |     eval_on_free_atoms: True
34 | 
35 | model:
36 |     name: dimenetplusplus
37 |     hidden_channels: 192
38 |     out_emb_channels: 192
39 |     num_blocks: 3
40 |     cutoff: 6.0
41 |     num_radial: 6
42 |     num_spherical: 7
43 |     num_before_skip: 1
44 |     num_after_skip: 2
45 |     num_output_layers: 3
46 |     regress_forces: True
47 |     use_pbc: True
48 |     #otf_graph: True
49 | 
50 | # These settings optimized for global batch size (batch_size * gpus) = 2048
51 | optim:
52 |     batch_size: 4
53 |     eval_batch_size: 8
54 |     num_workers: 8
55 | 
56 |     optimizer: AdamW
57 |     lr_initial: 0.0016
58 |     warmup_steps: 3908 # 4 epochs
59 |     warmup_factor: 0.2
60 |     lr_milestones:
61 |         - 23448 # 24 epochs
62 |         - 31264 # 32 epochs
63 |     lr_gamma: 0.1
64 | 
65 |     max_epochs: 48
66 |     energy_coefficient: 0
67 |     force_coefficient: 50
68 | 
69 | hide_eval_progressbar: True
70 | 


--------------------------------------------------------------------------------
/open_catalyst/configs/s2ef/200k/base.yml:
--------------------------------------------------------------------------------
 1 | trainer: forces
 2 | 
 3 | dataset:
 4 |   - src: data/s2ef/200k/train/
 5 |     normalize_labels: True
 6 |     target_mean: -0.7554450631141663
 7 |     target_std: 2.887317180633545
 8 |     grad_target_mean: 0.0
 9 |     grad_target_std: 2.887317180633545
10 |   - src: data/s2ef/all/val_id/
11 | 
12 | logger: tensorboard
13 | 
14 | task:
15 |   dataset: trajectory_lmdb
16 |   description: "Regressing to energies and forces for DFT trajectories from OCP"
17 |   type: regression
18 |   metric: mae
19 |   labels:
20 |     - potential energy
21 |   grad_input: atomic forces
22 |   train_on_free_atoms: True
23 |   eval_on_free_atoms: True
24 | 


--------------------------------------------------------------------------------
/open_catalyst/configs/s2ef/200k/cgcnn/cgcnn.yml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - configs/s2ef/200k/base.yml
 3 | 
 4 | model:
 5 |   name: cgcnn
 6 |   atom_embedding_size: 128
 7 |   fc_feat_size: 128
 8 |   num_fc_layers: 3
 9 |   num_graph_conv_layers: 2
10 |   cutoff: 6.0
11 |   num_gaussians: 100
12 |   use_pbc: True
13 | 
14 | # *** Important note ***
15 | #   The total number of gpus used for this run was 4.
16 | #   If the global batch size (num_gpus * batch_size) is modified
17 | #   the lr_milestones and warmup_steps need to be adjusted accordingly.
18 | 
19 | optim:
20 |   batch_size: 32
21 |   eval_batch_size: 32
22 |   num_workers: 16
23 |   lr_initial: 0.0005
24 |   lr_gamma: 0.1
25 |   lr_milestones: # steps at which lr_initial <- lr_initial * lr_gamma
26 |     - 23437
27 |     - 31250
28 |   warmup_steps: 3125
29 |   warmup_factor: 0.2
30 |   max_epochs: 50
31 |   force_coefficient: 10
32 | 


--------------------------------------------------------------------------------
/open_catalyst/configs/s2ef/200k/dimenet_plus_plus/dpp.yml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - configs/s2ef/200k/base.yml
 3 | 
 4 | model:
 5 |   name: dimenetplusplus
 6 |   hidden_channels: 192
 7 |   out_emb_channels: 192
 8 |   num_blocks: 3
 9 |   cutoff: 6.0
10 |   num_radial: 6
11 |   num_spherical: 7
12 |   num_before_skip: 1
13 |   num_after_skip: 2
14 |   num_output_layers: 3
15 |   regress_forces: True
16 |   use_pbc: True
17 | 
18 | # *** Important note ***
19 | #   The total number of gpus used for this run was 16.
20 | #   If the global batch size (num_gpus * batch_size) is modified
21 | #   the lr_milestones and warmup_steps need to be adjusted accordingly.
22 | 
23 | optim:
24 |   batch_size: 12
25 |   eval_batch_size: 12
26 |   num_workers: 8
27 |   lr_initial: 0.00001
28 |   lr_gamma: 0.1
29 |   lr_milestones: # steps at which lr_initial <- lr_initial * lr_gamma
30 |     - 5208
31 |     - 8333
32 |     - 10416
33 |   warmup_steps: 3125
34 |   warmup_factor: 0.2
35 |   max_epochs: 30
36 |   force_coefficient: 50
37 | 


--------------------------------------------------------------------------------
/open_catalyst/configs/s2ef/200k/forcenet/fn_forceonly.yml:
--------------------------------------------------------------------------------
 1 | trainer: forces
 2 | 
 3 | dataset:
 4 |   - src: data/s2ef/200k/train/
 5 |   - src: data/s2ef/all/val_id/
 6 | 
 7 | model:
 8 |   name: forcenet
 9 |   num_interactions: 5
10 |   cutoff: 6
11 |   basis: "sphallmul"
12 |   ablation: "none"
13 |   depth_mlp_edge: 2
14 |   depth_mlp_node: 1
15 |   activation_str: "swish"
16 |   decoder_activation_str: "swish"
17 |   feat: "full"
18 |   hidden_channels: 512
19 |   decoder_hidden_channels: 512
20 |   max_n: 3
21 | 
22 | # *** Important note ***
23 | #   The total number of gpus used for this run was 8.
24 | #   If the global batch size (num_gpus * batch_size) is modified
25 | #   the lr_milestones and warmup_steps need to be adjusted accordingly.
26 | 
27 | optim:
28 |   batch_size: 8
29 |   eval_batch_size: 8
30 |   eval_every: 10000
31 |   num_workers: 8
32 |   lr_initial: 0.0005
33 |   max_epochs: 20
34 |   energy_coefficient: 0
35 |   lr_gamma: 0.1
36 |   lr_milestones: # steps at which lr_initial <- lr_initial * lr_gamma
37 |     - 15625
38 |     - 25000
39 |     - 31250
40 |   warmup_steps: 9375
41 |   warmup_factor: 0.2
42 | 
43 | task:
44 |   dataset: trajectory_lmdb
45 |   description: "Regressing to energies and forces for DFT trajectories from OCP"
46 |   type: regression
47 |   metric: mae
48 |   primary_metric: forces_mae
49 |   labels:
50 |     - potential energy
51 |   grad_input: atomic forces
52 |   tag_specific_weights:
53 |     - 0.05
54 |     - 1.0
55 |     - 1.0
56 | 


--------------------------------------------------------------------------------
/open_catalyst/configs/s2ef/200k/schnet/schnet.yml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - configs/s2ef/200k/base.yml
 3 | 
 4 | model:
 5 |   name: schnet
 6 |   hidden_channels: 1024
 7 |   num_filters: 256
 8 |   num_interactions: 3
 9 |   num_gaussians: 200
10 |   cutoff: 6.0
11 |   use_pbc: True
12 | 
13 | # *** Important note ***
14 | #   The total number of gpus used for this run was 4.
15 | #   If the global batch size (num_gpus * batch_size) is modified
16 | #   the lr_milestones and warmup_steps need to be adjusted accordingly.
17 | 
18 | optim:
19 |   batch_size: 32
20 |   eval_batch_size: 32
21 |   num_workers: 16
22 |   lr_initial: 0.0005
23 |   lr_gamma: 0.1
24 |   lr_milestones: # steps at which lr_initial <- lr_initial * lr_gamma
25 |     - 7812
26 |     - 12500
27 |     - 15625
28 |   warmup_steps: 4687
29 |   warmup_factor: 0.2
30 |   max_epochs: 30
31 |   force_coefficient: 100
32 | 


--------------------------------------------------------------------------------
/open_catalyst/configs/s2ef/20M/base.yml:
--------------------------------------------------------------------------------
 1 | trainer: forces
 2 | 
 3 | dataset:
 4 |   - src: data/s2ef/20M/train/
 5 |     normalize_labels: True
 6 |     target_mean: -0.7554450631141663
 7 |     target_std: 2.887317180633545
 8 |     grad_target_mean: 0.0
 9 |     grad_target_std: 2.887317180633545
10 |   - src: data/s2ef/all/val_id/
11 | 
12 | logger: tensorboard
13 | 
14 | task:
15 |   dataset: trajectory_lmdb
16 |   description: "Regressing to energies and forces for DFT trajectories from OCP"
17 |   type: regression
18 |   metric: mae
19 |   labels:
20 |     - potential energy
21 |   grad_input: atomic forces
22 |   train_on_free_atoms: True
23 |   eval_on_free_atoms: True
24 | 


--------------------------------------------------------------------------------
/open_catalyst/configs/s2ef/20M/cgcnn/cgcnn.yml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - configs/s2ef/20M/base.yml
 3 | 
 4 | model:
 5 |   name: cgcnn
 6 |   atom_embedding_size: 512
 7 |   fc_feat_size: 128
 8 |   num_fc_layers: 3
 9 |   num_graph_conv_layers: 3
10 |   cutoff: 6.0
11 |   num_gaussians: 100
12 |   use_pbc: True
13 | 
14 | # *** Important note ***
15 | #   The total number of gpus used for this run was 48.
16 | #   If the global batch size (num_gpus * batch_size) is modified
17 | #   the lr_milestones and warmup_steps need to be adjusted accordingly.
18 | 
19 | optim:
20 |   batch_size: 24
21 |   eval_batch_size: 24
22 |   num_workers: 16
23 |   lr_initial: 0.0005
24 |   lr_gamma: 0.1
25 |   lr_milestones: # steps at which lr_initial <- lr_initial * lr_gamma
26 |     - 52083
27 |     - 86805
28 |     - 121527
29 |   warmup_steps: 34722
30 |   warmup_factor: 0.2
31 |   max_epochs: 20
32 |   force_coefficient: 100
33 | 


--------------------------------------------------------------------------------
/open_catalyst/configs/s2ef/20M/dimenet_plus_plus/dpp.yml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - configs/s2ef/20M/base.yml
 3 | 
 4 | model:
 5 |   name: dimenetplusplus
 6 |   hidden_channels: 192
 7 |   out_emb_channels: 192
 8 |   num_blocks: 3
 9 |   cutoff: 6.0
10 |   num_radial: 6
11 |   num_spherical: 7
12 |   num_before_skip: 1
13 |   num_after_skip: 2
14 |   num_output_layers: 3
15 |   regress_forces: True
16 |   use_pbc: True
17 | 
18 | # *** Important note ***
19 | #   The total number of gpus used for this run was 64.
20 | #   If the global batch size (num_gpus * batch_size) is modified
21 | #   the lr_milestones and warmup_steps need to be adjusted accordingly.
22 | 
23 | optim:
24 |   batch_size: 12
25 |   eval_batch_size: 12
26 |   eval_every: 10000
27 |   num_workers: 8
28 |   lr_initial: 0.0001
29 |   lr_gamma: 0.1
30 |   lr_milestones: # steps at which lr_initial <- lr_initial * lr_gamma
31 |     - 78125
32 |     - 130208
33 |     - 208333
34 |   warmup_steps: 52083
35 |   warmup_factor: 0.2
36 |   max_epochs: 15
37 |   force_coefficient: 50
38 | 


--------------------------------------------------------------------------------
/open_catalyst/configs/s2ef/20M/schnet/schnet.yml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - configs/s2ef/20M/base.yml
 3 | 
 4 | model:
 5 |   name: schnet
 6 |   hidden_channels: 1024
 7 |   num_filters: 256
 8 |   num_interactions: 5
 9 |   num_gaussians: 200
10 |   cutoff: 6.0
11 |   use_pbc: True
12 | 
13 | # *** Important note ***
14 | #   The total number of gpus used for this run was 48.
15 | #   If the global batch size (num_gpus * batch_size) is modified
16 | #   the lr_milestones and warmup_steps need to be adjusted accordingly.
17 | 
18 | optim:
19 |   batch_size: 24
20 |   eval_batch_size: 24
21 |   eval_every: 10000
22 |   num_workers: 16
23 |   lr_initial: 0.0001
24 |   lr_gamma: 0.1
25 |   lr_milestones: # steps at which lr_initial <- lr_initial * lr_gamma
26 |     - 86805
27 |     - 138888
28 |     - 173611
29 |   warmup_steps: 52083
30 |   warmup_factor: 0.2
31 |   max_epochs: 30
32 |   force_coefficient: 50
33 | 


--------------------------------------------------------------------------------
/open_catalyst/configs/s2ef/2M/base.yml:
--------------------------------------------------------------------------------
 1 | trainer: forces
 2 | 
 3 | dataset:
 4 |   - src: data/s2ef/2M/train/
 5 |     normalize_labels: True
 6 |     target_mean: -0.7554450631141663
 7 |     target_std: 2.887317180633545
 8 |     grad_target_mean: 0.0
 9 |     grad_target_std: 2.887317180633545
10 |   - src: data/s2ef/all/val_id/
11 | 
12 | logger: tensorboard
13 | 
14 | task:
15 |   dataset: trajectory_lmdb
16 |   description: "Regressing to energies and forces for DFT trajectories from OCP"
17 |   type: regression
18 |   metric: mae
19 |   labels:
20 |     - potential energy
21 |   grad_input: atomic forces
22 |   train_on_free_atoms: True
23 |   eval_on_free_atoms: True
24 | 


--------------------------------------------------------------------------------
/open_catalyst/configs/s2ef/2M/cgcnn/cgcnn.yml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - configs/s2ef/2M/base.yml
 3 | 
 4 | model:
 5 |   name: cgcnn
 6 |   atom_embedding_size: 384
 7 |   fc_feat_size: 128
 8 |   num_fc_layers: 3
 9 |   num_graph_conv_layers: 3
10 |   cutoff: 6.0
11 |   num_gaussians: 100
12 |   use_pbc: True
13 | 
14 | # *** Important note ***
15 | #   The total number of gpus used for this run was 8.
16 | #   If the global batch size (num_gpus * batch_size) is modified
17 | #   the lr_milestones and warmup_steps need to be adjusted accordingly.
18 | 
19 | optim:
20 |   batch_size: 8
21 |   eval_batch_size: 8
22 |   num_workers: 8
23 |   lr_initial: 0.001
24 |   lr_gamma: 0.1
25 |   lr_milestones: # steps at which lr_initial <- lr_initial * lr_gamma
26 |     - 156250
27 |     - 281250
28 |     - 437500
29 |   warmup_steps: 62500
30 |   warmup_factor: 0.2
31 |   max_epochs: 20
32 |   force_coefficient: 10
33 | 


--------------------------------------------------------------------------------
/open_catalyst/configs/s2ef/2M/dimenet_plus_plus/dpp.yml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - configs/s2ef/2M/base.yml
 3 | 
 4 | model:
 5 |   name: dimenetplusplus
 6 |   hidden_channels: 192
 7 |   out_emb_channels: 192
 8 |   num_blocks: 3
 9 |   cutoff: 6.0
10 |   num_radial: 6
11 |   num_spherical: 7
12 |   num_before_skip: 1
13 |   num_after_skip: 2
14 |   num_output_layers: 3
15 |   regress_forces: True
16 |   use_pbc: True
17 | 
18 | # *** Important note ***
19 | #   The total number of gpus used for this run was 32.
20 | #   If the global batch size (num_gpus * batch_size) is modified
21 | #   the lr_milestones and warmup_steps need to be adjusted accordingly.
22 | 
23 | optim:
24 |   batch_size: 12
25 |   eval_batch_size: 12
26 |   eval_every: 10000
27 |   num_workers: 8
28 |   lr_initial: 0.0001
29 |   lr_gamma: 0.1
30 |   lr_milestones: # steps at which lr_initial <- lr_initial * lr_gamma
31 |     - 20833
32 |     - 31250
33 |     - 41666
34 |   warmup_steps: 10416
35 |   warmup_factor: 0.2
36 |   max_epochs: 15
37 |   force_coefficient: 50
38 | 


--------------------------------------------------------------------------------
/open_catalyst/configs/s2ef/2M/dimenet_plus_plus/dpp_relax.yml:
--------------------------------------------------------------------------------
 1 | trainer: forces
 2 | 
 3 | dataset:
 4 |   - src: data/s2ef/2M/train/
 5 |     normalize_labels: True
 6 |     target_mean: -0.7554450631141663
 7 |     target_std: 2.887317180633545
 8 |     grad_target_mean: 0.0
 9 |     grad_target_std: 2.887317180633545
10 |   - src: data/s2ef/all/val_id/
11 | 
12 | logger: tensorboard
13 | 
14 | task:
15 |   dataset: trajectory_lmdb
16 |   description: "Regressing to energies and forces for DFT trajectories from OCP"
17 |   type: regression
18 |   metric: mae
19 |   labels:
20 |     - potential energy
21 |   grad_input: atomic forces
22 |   train_on_free_atoms: True
23 |   eval_on_free_atoms: True
24 |   relax_dataset:
25 |     src: data/is2re/all/test_id/data.lmdb
26 |   write_pos: True
27 |   relaxation_steps: 200
28 |   relax_opt:
29 |     maxstep: 0.04
30 |     memory: 50
31 |     damping: 1.0
32 |     alpha: 70.0
33 |     traj_dir: "ml-relaxations/dpp-2M-test-id"
34 | 
35 | model:
36 |   name: dimenetplusplus
37 |   hidden_channels: 192
38 |   out_emb_channels: 192
39 |   num_blocks: 3
40 |   cutoff: 6.0
41 |   num_radial: 6
42 |   num_spherical: 7
43 |   num_before_skip: 1
44 |   num_after_skip: 2
45 |   num_output_layers: 3
46 |   regress_forces: True
47 |   use_pbc: True
48 | 
49 | # *** Important note ***
50 | #   The total number of gpus used for this run was 32.
51 | #   If the global batch size (num_gpus * batch_size) is modified
52 | #   the lr_milestones and warmup_steps need to be adjusted accordingly.
53 | 
54 | optim:
55 |   batch_size: 12
56 |   eval_batch_size: 12
57 |   eval_every: 10000
58 |   num_workers: 8
59 |   lr_initial: 0.0001
60 |   lr_gamma: 0.1
61 |   lr_milestones: # steps at which lr_initial <- lr_initial * lr_gamma
62 |     - 20833
63 |     - 31250
64 |     - 41666
65 |   warmup_steps: 10416
66 |   warmup_factor: 0.2
67 |   max_epochs: 15
68 |   force_coefficient: 50
69 | 


--------------------------------------------------------------------------------
/open_catalyst/configs/s2ef/2M/schnet/schnet.yml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - configs/s2ef/2M/base.yml
 3 | 
 4 | model:
 5 |   name: schnet
 6 |   hidden_channels: 1024
 7 |   num_filters: 256
 8 |   num_interactions: 5
 9 |   num_gaussians: 200
10 |   cutoff: 6.0
11 |   use_pbc: True
12 | 
13 | # *** Important note ***
14 | #   The total number of gpus used for this run was 8.
15 | #   If the global batch size (num_gpus * batch_size) is modified
16 | #   the lr_milestones and warmup_steps need to be adjusted accordingly.
17 | 
18 | optim:
19 |   batch_size: 24
20 |   eval_batch_size: 24
21 |   num_workers: 16
22 |   lr_initial: 0.0001
23 |   lr_gamma: 0.1
24 |   lr_milestones: # steps at which lr_initial <- lr_initial * lr_gamma
25 |     - 52083
26 |     - 83333
27 |     - 104166
28 |   warmup_steps: 31250
29 |   warmup_factor: 0.2
30 |   max_epochs: 30
31 |   force_coefficient: 100
32 | 


--------------------------------------------------------------------------------
/open_catalyst/configs/s2ef/all/base.yml:
--------------------------------------------------------------------------------
 1 | trainer: forces
 2 | 
 3 | dataset:
 4 |   - src: data/s2ef/all/train/
 5 |     normalize_labels: True
 6 |     target_mean: -0.7554450631141663
 7 |     target_std: 2.887317180633545
 8 |     grad_target_mean: 0.0
 9 |     grad_target_std: 2.887317180633545
10 |   - src: data/s2ef/all/val_id/
11 | 
12 | logger: tensorboard
13 | 
14 | task:
15 |   dataset: trajectory_lmdb
16 |   description: "Regressing to energies and forces for DFT trajectories from OCP"
17 |   type: regression
18 |   metric: mae
19 |   labels:
20 |     - potential energy
21 |   grad_input: atomic forces
22 |   train_on_free_atoms: True
23 |   eval_on_free_atoms: True
24 | 


--------------------------------------------------------------------------------
/open_catalyst/configs/s2ef/all/cgcnn/cgcnn.yml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - configs/s2ef/all/base.yml
 3 | 
 4 | model:
 5 |   name: cgcnn
 6 |   atom_embedding_size: 512
 7 |   fc_feat_size: 128
 8 |   num_fc_layers: 3
 9 |   num_graph_conv_layers: 3
10 |   cutoff: 6.0
11 |   num_gaussians: 100
12 |   use_pbc: True
13 | 
14 | # *** Important note ***
15 | #   The total number of gpus used for this run was 32.
16 | #   If the global batch size (num_gpus * batch_size) is modified
17 | #   the lr_milestones and warmup_steps need to be adjusted accordingly.
18 | 
19 | optim:
20 |   batch_size: 24
21 |   eval_batch_size: 24
22 |   num_workers: 16
23 |   lr_initial: 0.0005
24 |   lr_gamma: 0.1
25 |   lr_milestones: # steps at which lr_initial <- lr_initial * lr_gamma
26 |     - 523179
27 |     - 871966
28 |     - 1220752
29 |   warmup_steps: 348786
30 |   warmup_factor: 0.2
31 |   max_epochs: 20
32 |   force_coefficient: 10
33 | 


--------------------------------------------------------------------------------
/open_catalyst/configs/s2ef/all/dimenet_plus_plus/dpp.yml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - configs/s2ef/all/base.yml
 3 | 
 4 | model:
 5 |   name: dimenetplusplus
 6 |   hidden_channels: 192
 7 |   out_emb_channels: 192
 8 |   num_blocks: 3
 9 |   cutoff: 6.0
10 |   num_radial: 6
11 |   num_spherical: 7
12 |   num_before_skip: 1
13 |   num_after_skip: 2
14 |   num_output_layers: 3
15 |   regress_forces: True
16 |   use_pbc: True
17 | 
18 | # *** Important note ***
19 | #   The total number of gpus used for this run was 256.
20 | #   If the global batch size (num_gpus * batch_size) is modified
21 | #   the lr_milestones and warmup_steps need to be adjusted accordingly.
22 | 
23 | optim:
24 |   batch_size: 8
25 |   eval_batch_size: 8
26 |   eval_every: 10000
27 |   num_workers: 8
28 |   lr_initial: 0.0001
29 |   lr_gamma: 0.1
30 |   lr_milestones: # steps at which lr_initial <- lr_initial * lr_gamma
31 |     - 130794
32 |     - 196192
33 |     - 261589
34 |   warmup_steps: 130794
35 |   warmup_factor: 0.2
36 |   max_epochs: 7
37 |   force_coefficient: 50
38 | 


--------------------------------------------------------------------------------
/open_catalyst/configs/s2ef/all/dimenet_plus_plus/dpp10.7M_forceonly.yml:
--------------------------------------------------------------------------------
 1 | trainer: forces
 2 | 
 3 | dataset:
 4 |   - src: data/s2ef/all/train/
 5 |     normalize_labels: True
 6 |     target_mean: -0.7554450631141663
 7 |     target_std: 2.887317180633545
 8 |     grad_target_mean: 0.0
 9 |     grad_target_std: 2.887317180633545
10 |   - src: data/s2ef/all/val_id/
11 | 
12 | logger: tensorboard
13 | 
14 | task:
15 |   dataset: trajectory_lmdb
16 |   description: "Regressing to energies and forces for DFT trajectories from OCP"
17 |   type: regression
18 |   metric: mae
19 |   primary_metric: forces_mae
20 |   labels:
21 |     - potential energy
22 |   grad_input: atomic forces
23 |   train_on_free_atoms: True
24 |   eval_on_free_atoms: True
25 | 
26 | model:
27 |   name: dimenetplusplus
28 |   hidden_channels: 512
29 |   out_emb_channels: 384
30 |   num_blocks: 3
31 |   cutoff: 6.0
32 |   num_radial: 6
33 |   num_spherical: 7
34 |   num_before_skip: 1
35 |   num_after_skip: 2
36 |   num_output_layers: 3
37 |   regress_forces: True
38 |   use_pbc: True
39 | 
40 | # *** Important note ***
41 | #   The total number of gpus used for this run was 256.
42 | #   If the global batch size (num_gpus * batch_size) is modified
43 | #   the lr_milestones and warmup_steps need to be adjusted accordingly.
44 | 
45 | optim:
46 |   batch_size: 3
47 |   eval_batch_size: 3
48 |   eval_every: 10000
49 |   num_workers: 3
50 |   lr_initial: 0.0001
51 |   lr_gamma: 0.1
52 |   lr_milestones: # steps at which lr_initial <- lr_initial * lr_gamma
53 |     - 174393
54 |     - 348786
55 |     - 523179
56 |   warmup_steps: 174393
57 |   warmup_factor: 0.2
58 |   max_epochs: 5
59 |   energy_coefficient: 0
60 |   force_coefficient: 100
61 | 


--------------------------------------------------------------------------------
/open_catalyst/configs/s2ef/all/dimenet_plus_plus/dpp_energyonly.yml:
--------------------------------------------------------------------------------
 1 | trainer: forces
 2 | 
 3 | dataset:
 4 |   - src: data/s2ef/all/train/
 5 |     normalize_labels: True
 6 |     target_mean: -0.7554450631141663
 7 |     target_std: 2.887317180633545
 8 |     grad_target_mean: 0.0
 9 |     grad_target_std: 2.887317180633545
10 |   - src: data/s2ef/all/val_id/
11 | 
12 | logger: tensorboard
13 | 
14 | task:
15 |   dataset: trajectory_lmdb
16 |   description: "Regressing to energies and forces for DFT trajectories from OCP"
17 |   type: regression
18 |   metric: mae
19 |   primary_metric: energy_mae
20 |   labels:
21 |     - potential energy
22 |   grad_input: atomic forces
23 |   train_on_free_atoms: True
24 |   eval_on_free_atoms: True
25 | 
26 | model:
27 |   name: dimenetplusplus
28 |   hidden_channels: 192
29 |   out_emb_channels: 192
30 |   num_blocks: 3
31 |   cutoff: 6.0
32 |   num_radial: 6
33 |   num_spherical: 7
34 |   num_before_skip: 1
35 |   num_after_skip: 2
36 |   num_output_layers: 3
37 |   regress_forces: True
38 |   use_pbc: True
39 | 
40 | # *** Important note ***
41 | #   The total number of gpus used for this run was 256.
42 | #   If the global batch size (num_gpus * batch_size) is modified
43 | #   the lr_milestones and warmup_steps need to be adjusted accordingly.
44 | 
45 | optim:
46 |   batch_size: 8
47 |   eval_batch_size: 8
48 |   eval_every: 10000
49 |   num_workers: 8
50 |   lr_initial: 0.0001
51 |   lr_gamma: 0.1
52 |   lr_milestones: # steps at which lr_initial <- lr_initial * lr_gamma
53 |     - 130794
54 |     - 196192
55 |     - 261589
56 |   warmup_steps: 130794
57 |   warmup_factor: 0.2
58 |   max_epochs: 7
59 |   energy_coefficient: 100
60 |   force_coefficient: 0
61 | 


--------------------------------------------------------------------------------
/open_catalyst/configs/s2ef/all/dimenet_plus_plus/dpp_forceonly.yml:
--------------------------------------------------------------------------------
 1 | trainer: forces
 2 | 
 3 | dataset:
 4 |   - src: data/s2ef/all/train/
 5 |     normalize_labels: True
 6 |     target_mean: -0.7554450631141663
 7 |     target_std: 2.887317180633545
 8 |     grad_target_mean: 0.0
 9 |     grad_target_std: 2.887317180633545
10 |   - src: data/s2ef/all/val_id/
11 | 
12 | logger: tensorboard
13 | 
14 | task:
15 |   dataset: trajectory_lmdb
16 |   description: "Regressing to energies and forces for DFT trajectories from OCP"
17 |   type: regression
18 |   metric: mae
19 |   primary_metric: forces_mae
20 |   labels:
21 |     - potential energy
22 |   grad_input: atomic forces
23 |   train_on_free_atoms: True
24 |   eval_on_free_atoms: True
25 | 
26 | model:
27 |   name: dimenetplusplus
28 |   hidden_channels: 192
29 |   out_emb_channels: 192
30 |   num_blocks: 3
31 |   cutoff: 6.0
32 |   num_radial: 6
33 |   num_spherical: 7
34 |   num_before_skip: 1
35 |   num_after_skip: 2
36 |   num_output_layers: 3
37 |   regress_forces: True
38 |   use_pbc: True
39 | 
40 | # *** Important note ***
41 | #   The total number of gpus used for this run was 64.
42 | #   If the global batch size (num_gpus * batch_size) is modified
43 | #   the lr_milestones and warmup_steps need to be adjusted accordingly.
44 | 
45 | optim:
46 |   batch_size: 8
47 |   eval_batch_size: 8
48 |   eval_every: 10000
49 |   num_workers: 8
50 |   lr_initial: 0.0001
51 |   lr_gamma: 0.1
52 |   lr_milestones: # steps at which lr_initial <- lr_initial * lr_gamma
53 |     - 523179
54 |     - 784769
55 |     - 1046359
56 |   warmup_steps: 523179
57 |   warmup_factor: 0.2
58 |   max_epochs: 7
59 |   energy_coefficient: 0
60 |   force_coefficient: 100
61 | 


--------------------------------------------------------------------------------
/open_catalyst/configs/s2ef/all/schnet/schnet.yml:
--------------------------------------------------------------------------------
 1 | includes:
 2 | - configs/s2ef/all/base.yml
 3 | 
 4 | model:
 5 |   name: schnet
 6 |   hidden_channels: 1024
 7 |   num_filters: 256
 8 |   num_interactions: 5
 9 |   num_gaussians: 200
10 |   cutoff: 6.0
11 |   use_pbc: True
12 | 
13 | # *** Important note ***
14 | #   The total number of gpus used for this run was 64.
15 | #   If the global batch size (num_gpus * batch_size) is modified
16 | #   the lr_milestones and warmup_steps need to be adjusted accordingly.
17 | 
18 | optim:
19 |   batch_size: 20
20 |   eval_batch_size: 20
21 |   eval_every: 10000
22 |   num_workers: 16
23 |   lr_initial: 0.0001
24 |   lr_gamma: 0.1
25 |   lr_milestones: # steps at which lr_initial <- lr_initial * lr_gamma
26 |     - 313907
27 |     - 523179
28 |     - 732451
29 |   warmup_steps: 209271
30 |   warmup_factor: 0.2
31 |   max_epochs: 15
32 |   force_coefficient: 30
33 | 


--------------------------------------------------------------------------------
/open_catalyst/docker/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM nvcr.io/nvidia/pytorch:21.08-py3
 2 | 
 3 | # PyG
 4 | RUN FORCE_CUDA=1 TORCH_CUDA_ARCH_LIST="7.0 8.0" pip install --no-cache-dir \
 5 |     torch-scatter torch-sparse torch-geometric==1.7.2
 6 | 
 7 | # MLPerf logging
 8 | RUN pip install --no-cache-dir git+https://github.com/mlcommons/logging.git
 9 | 
10 | # Other packages
11 | RUN pip install --no-cache-dir ray submitit demjson wandb ase pymatgen
12 | 


--------------------------------------------------------------------------------
/open_catalyst/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = source
 9 | BUILDDIR      = build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/open_catalyst/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=source
11 | set BUILDDIR=build
12 | 
13 | if "%1" == "" goto help
14 | 
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | 	echo.
18 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | 	echo.installed, then set the SPHINXBUILD environment variable to point
20 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | 	echo.may add the Sphinx directory to PATH.
22 | 	echo.
23 | 	echo.If you don't have Sphinx installed, grab it from
24 | 	echo.http://sphinx-doc.org/
25 | 	exit /b 1
26 | )
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/open_catalyst/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | nbsphinx
2 | 


--------------------------------------------------------------------------------
/open_catalyst/docs/source/conf.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Copyright (c) Facebook, Inc. and its affiliates.
 3 | 
 4 | This source code is licensed under the MIT license found in the
 5 | LICENSE file in the root directory of this source tree.
 6 | """
 7 | 
 8 | # Configuration file for the Sphinx documentation builder.
 9 | #
10 | # This file only contains a selection of the most common options. For a full
11 | # list see the documentation:
12 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
13 | 
14 | # -- Path setup --------------------------------------------------------------
15 | 
16 | # If extensions (or modules to document with autodoc) are in another directory,
17 | # add these directories to sys.path here. If the directory is relative to the
18 | # documentation root, use os.path.abspath to make it absolute, like shown here.
19 | #
20 | import os
21 | import sys
22 | 
23 | sys.path.insert(0, os.path.abspath("../../"))
24 | 
25 | 
26 | # -- Project information -----------------------------------------------------
27 | 
28 | project = "Open Catalyst Project"
29 | copyright = "2020, Facebook, Inc."
30 | author = "Anuroop Sriram"
31 | 
32 | 
33 | # -- General configuration ---------------------------------------------------
34 | 
35 | # Add any Sphinx extension module names here, as strings. They can be
36 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
37 | # ones.
38 | extensions = [
39 |     "sphinx.ext.autodoc",
40 |     "sphinx.ext.coverage",
41 |     "sphinx.ext.napoleon",
42 |     "sphinx_rtd_theme",
43 |     "nbsphinx",
44 | ]
45 | 
46 | # Add any paths that contain templates here, relative to this directory.
47 | templates_path = ["_templates"]
48 | 
49 | # List of patterns, relative to source directory, that match files and
50 | # directories to ignore when looking for source files.
51 | # This pattern also affects html_static_path and html_extra_path.
52 | exclude_patterns = []
53 | 
54 | 
55 | # -- Options for HTML output -------------------------------------------------
56 | 
57 | # The theme to use for HTML and HTML Help pages.  See the documentation for
58 | # a list of builtin themes.
59 | #
60 | html_theme = "sphinx_rtd_theme"
61 | 
62 | # Add any paths that contain custom static files (such as style sheets) here,
63 | # relative to this directory. They are copied after the builtin static files,
64 | # so a file named "default.css" will overwrite the builtin "default.css".
65 | html_static_path = ["_static"]
66 | 
67 | master_doc = "index"
68 | 


--------------------------------------------------------------------------------
/open_catalyst/docs/source/index.rst:
--------------------------------------------------------------------------------
 1 | Open Catalyst Project
 2 | =====================
 3 | 
 4 | The Open Catalyst Project is a collaborative research effort between Facebook AI
 5 | Research (FAIR) and Carnegie Mellon University’s (CMU) Department of Chemical Engineering.
 6 | The aim is to use AI to model and discover new catalysts for use in renewable energy
 7 | storage to help in addressing climate change.
 8 | 
 9 | Scalable and cost-effective solutions to renewable energy storage are essential to
10 | addressing the world’s rising energy needs while reducing climate change.  As we
11 | increase our reliance on renewable energy sources such as wind and solar, which produce
12 | intermittent power, storage is needed to transfer power from times of peak generation to
13 | peak demand. This may require the storage of power for hours, days, or months. One solution
14 | that offers the potential of scaling to nation-sized grids is the conversion of
15 | renewable energy to other fuels, such as hydrogen. To be widely adopted, this
16 | process requires cost-effective solutions to running chemical reactions.
17 | 
18 | An open challenge is finding low-cost catalysts to drive these reactions at high rates.
19 | Through the use of quantum mechanical simulations (density functional theory), new
20 | catalyst structures can be tested and evaluated. Unfortunately, the high computational
21 | cost of these simulations limits the number of structures that may be tested. The use of
22 | AI or machine learning may provide a method to efficiently approximate these calculations,
23 | leading to new approaches in finding effective catalysts.
24 | 
25 | To enable the broader research community to participate in this important project,
26 | we provide baseline models and code at
27 | `Github page <https://github.com/Open-Catalyst-Project/baselines>`_.
28 | 
29 | 
30 | .. toctree::
31 |    :maxdepth: 1
32 |    :caption: Tutorials
33 | 
34 |    tutorials/getting_started
35 |    tutorials/data_playground.ipynb
36 |    tutorials/train_s2ef_example.ipynb
37 |    tutorials/training
38 |    tutorials/submission
39 | 
40 | ..
41 |     .. toctree::
42 |        :maxdepth: 1
43 |        :caption: Modules
44 | 
45 |        modules/model
46 |        modules/dataset
47 |        modules/trainer
48 | 
49 | Indices and tables
50 | ==================
51 | 
52 | * :ref:`genindex`
53 | * :ref:`modindex`
54 | * :ref:`search`
55 | 


--------------------------------------------------------------------------------
/open_catalyst/docs/source/modules/dataset.rst:
--------------------------------------------------------------------------------
 1 | ocpmodels.datasets
 2 | ==================
 3 | 
 4 | .. .. currentmodule:: ocpmodels.datasets
 5 | 
 6 | .. .. autosummary::
 7 | ..     :toctree: generated
 8 | ..     :nosignatures:
 9 | 
10 | .. automodule:: ocpmodels.datasets
11 |     :members:
12 |     :exclude-members: data_list_collater
13 | 


--------------------------------------------------------------------------------
/open_catalyst/docs/source/modules/model.rst:
--------------------------------------------------------------------------------
 1 | ocpmodels.models
 2 | ================
 3 | 
 4 | .. .. currentmodule:: ocpmodels.models
 5 | 
 6 | .. .. autosummary::
 7 | ..     :toctree: generated
 8 | ..     :nosignatures:
 9 | 
10 | .. automodule:: ocpmodels.models
11 |     :members:
12 |     :exclude-members:
13 | 


--------------------------------------------------------------------------------
/open_catalyst/docs/source/modules/trainer.rst:
--------------------------------------------------------------------------------
 1 | ocpmodels.trainers
 2 | ==================
 3 | 
 4 | .. .. currentmodule:: ocpmodels.trainers
 5 | 
 6 | .. .. autosummary::
 7 | ..     :toctree: generated
 8 | ..     :nosignatures:
 9 | 
10 | .. automodule:: ocpmodels.trainers
11 |     :members:
12 |     :exclude-members:
13 | 


--------------------------------------------------------------------------------
/open_catalyst/docs/source/tutorials/submission.rst:
--------------------------------------------------------------------------------
 1 | Create EvalAI submission files
 2 | ==============================
 3 | 
 4 | EvalAI expects results to be structured in a specific format for a submission to be successful. A submission must contain results from the 4 different splits - in distribution (id), out of distribution adsorbate (ood ads), out of distribution catalyst (ood cat), and out of distribution adsorbate and catalyst (ood both). Constructing the submission file for each of the above tasks is as follows:
 5 | 
 6 | S2EF / IS2RE
 7 | ************
 8 | 
 9 | 1. Run predictions :obj:`--mode predict` on all 4 splits, generating :obj:`predictions.json` files for each split.
10 | 2. Modify :obj:`scripts/make_evalai_json.py` with the corresponding paths of the :obj:`predictions.json` files and run to generate your final submission file :obj:`taskname_split_submission.json` (filename may be modified).
11 | 3. Upload :obj:`taskname_split_submission.json` to EvalAI.
12 | 
13 | 
14 | IS2RS
15 | *****
16 | 
17 | 1. Ensure :obj:`write_pos: True` is included in your configuration file. Run relaxations :obj:`--mode run-relaxations` on all 4 splits, generating :obj:`relaxed_pos_[DEVICE #].json` files for each split.
18 | 2. For each split, if relaxations were run with multiple GPUs, combine :obj:`relaxed_pos_[DEVICE #].json` into one :obj:`relaxed_pos.json` file using :obj:`scripts/make_evalai_json.py`, otherwise skip to 3.
19 | 3. Modify :obj:`scripts/make_evalai_json.py` with the corresponding paths of the :obj:`relaxed_pos.json` files and run to generate your final submission file :obj:`taskname_split_submission.json` (filename may be modified).
20 | 4. Upload :obj:`taskname_split_submission.json` to EvalAI.
21 | 


--------------------------------------------------------------------------------
/open_catalyst/env.common.yml:
--------------------------------------------------------------------------------
 1 | name: ocp-models
 2 | channels:
 3 |   - pytorch
 4 |   - conda-forge
 5 |   - defaults
 6 | dependencies:
 7 |   - ase=3.21.*
 8 |   - matplotlib=3.3.*
 9 |   - pip
10 |   - pre-commit=2.10.*
11 |   - pymatgen=2020.12.31
12 |   - python=3.8.*
13 |   - pytorch=1.8.1
14 |   - pyyaml=5.4.*
15 |   - tensorboard=2.4.*
16 |   - tqdm=4.58.*
17 |   - sphinx
18 |   - nbsphinx
19 |   - pandoc
20 |   - black
21 |   - pip:
22 |     - demjson
23 |     - Pillow
24 |     - git+https://github.com/rusty1s/pytorch_geometric.git@4ea63d3
25 |     - wandb
26 |     - lmdb==1.1.1
27 |     - pytest==6.2.2
28 |     - submitit
29 |     - sphinx-rtd-theme
30 | 


--------------------------------------------------------------------------------
/open_catalyst/env.cpu.yml:
--------------------------------------------------------------------------------
1 | dependencies:
2 |   - cpuonly
3 |   - pip:
4 |     - -f https://pytorch-geometric.com/whl/torch-1.8.0+cpu.html
5 |     - torch-cluster
6 |     - torch-scatter
7 |     - torch-sparse
8 |     - torch-spline-conv
9 | 


--------------------------------------------------------------------------------
/open_catalyst/env.gpu.yml:
--------------------------------------------------------------------------------
 1 | channels:
 2 |   - pytorch
 3 |   - conda-forge
 4 |   - defaults
 5 | dependencies:
 6 |   - cudatoolkit=10.2
 7 |   - pip:
 8 |     - -f https://pytorch-geometric.com/whl/torch-1.8.0+cu102.html
 9 |     - torch-cluster
10 |     - torch-scatter
11 |     - torch-sparse
12 |     - torch-spline-conv
13 | 


--------------------------------------------------------------------------------
/open_catalyst/env.yml:
--------------------------------------------------------------------------------
 1 | channels:
 2 | - pytorch
 3 | - nvidia
 4 | - defaults
 5 | - conda-forge
 6 | dependencies:
 7 | - ase=3.21.*
 8 | - black
 9 | - cudatoolkit=11.1
10 | - matplotlib=3.3.*
11 | - nbsphinx
12 | - pandoc
13 | - pip
14 | - pre-commit=2.10.*
15 | - pymatgen=2020.12.31
16 | - python=3.8.*
17 | - pytorch=1.8.0
18 | - pyyaml=5.4.*
19 | - sphinx
20 | - tensorboard=2.4.*
21 | - tqdm=4.58.*
22 | - pip:
23 |   - -f https://pytorch-geometric.com/whl/torch-1.8.0+cu111.html
24 |   - Pillow
25 |   - demjson
26 |   - lmdb==1.1.1
27 |   - pytest==6.2.2
28 |   - ray
29 |   - sphinx-rtd-theme
30 |   - submitit
31 |   - torch-cluster
32 |   - torch-scatter
33 |   - torch-sparse
34 |   - torch-spline-conv
35 |   - git+https://github.com/rusty1s/pytorch_geometric.git
36 |   - wandb
37 |   - git+https://github.com/mlperf-hpc/logging.git@hpc-0.5.0
38 | 


--------------------------------------------------------------------------------
/open_catalyst/licenses/LICENSE.cgcnn:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Tian Xie
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/open_catalyst/licenses/LICENSE.mmf:
--------------------------------------------------------------------------------
 1 | BSD License
 2 | 
 3 | For MMF software
 4 | 
 5 | Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.
 6 | 
 7 | Redistribution and use in source and binary forms, with or without modification,
 8 | are permitted provided that the following conditions are met:
 9 | 
10 |  * Redistributions of source code must retain the above copyright notice, this
11 |    list of conditions and the following disclaimer.
12 | 
13 |  * Redistributions in binary form must reproduce the above copyright notice,
14 |    this list of conditions and the following disclaimer in the documentation
15 |    and/or other materials provided with the distribution.
16 | 
17 |  * Neither the name Facebook nor the names of its contributors may be used to
18 |    endorse or promote products derived from this software without specific
19 |    prior written permission.
20 | 
21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
22 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
23 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
25 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
26 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
28 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 | 


--------------------------------------------------------------------------------
/open_catalyst/logs/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | 


--------------------------------------------------------------------------------
/open_catalyst/ocpmodels/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Copyright (c) Facebook, Inc. and its affiliates.
3 | 
4 | This source code is licensed under the MIT license found in the
5 | LICENSE file in the root directory of this source tree.
6 | """
7 | 


--------------------------------------------------------------------------------
/open_catalyst/ocpmodels/common/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Copyright (c) Facebook, Inc. and its affiliates.
3 | 
4 | This source code is licensed under the MIT license found in the
5 | LICENSE file in the root directory of this source tree.
6 | """
7 | 


--------------------------------------------------------------------------------
/open_catalyst/ocpmodels/common/hpo_utils.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Copyright (c) Facebook, Inc. and its affiliates.
 3 | 
 4 | This source code is licensed under the MIT license found in the
 5 | LICENSE file in the root directory of this source tree.
 6 | """
 7 | 
 8 | import math
 9 | 
10 | from ray import tune
11 | 
12 | 
13 | def tune_reporter(
14 |     iters,
15 |     train_metrics,
16 |     val_metrics,
17 |     test_metrics=None,
18 |     metric_to_opt="val_loss",
19 |     min_max="min",
20 | ):
21 |     """
22 |     Wrapper function for tune.report()
23 | 
24 |     Args:
25 |         iters(dict): dict with training iteration info (e.g. steps, epochs)
26 |         train_metrics(dict): train metrics dict
27 |         val_metrics(dict): val metrics dict
28 |         test_metrics(dict, optional): test metrics dict, default is None
29 |         metric_to_opt(str, optional): str for val metric to optimize, default is val_loss
30 |         min_max(str, optional): either "min" or "max", determines whether metric_to_opt is to be minimized or maximized, default is min
31 | 
32 |     """
33 |     # labels metric dicts
34 |     train = label_metric_dict(train_metrics, "train")
35 |     val = label_metric_dict(val_metrics, "val")
36 |     # this enables tolerance for NaNs assumes val set is used for optimization
37 |     if math.isnan(val[metric_to_opt]):
38 |         if min_max == "min":
39 |             val[metric_to_opt] = 100000.0
40 |         if min_max == "max":
41 |             val[metric_to_opt] = 0.0
42 |     if test_metrics:
43 |         test = label_metric_dict(test_metrics, "test")
44 |     else:
45 |         test = {}
46 |     # report results to Ray Tune
47 |     tune.report(**iters, **train, **val, **test)
48 | 
49 | 
50 | def label_metric_dict(metric_dict, split):
51 |     new_dict = {}
52 |     for key in metric_dict:
53 |         new_dict["{}_{}".format(split, key)] = metric_dict[key]
54 |     metric_dict = new_dict
55 |     return metric_dict
56 | 


--------------------------------------------------------------------------------
/open_catalyst/ocpmodels/common/relaxation/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/hpc/2c627d457004eff77a014205b3151ed48a6fa149/open_catalyst/ocpmodels/common/relaxation/__init__.py


--------------------------------------------------------------------------------
/open_catalyst/ocpmodels/common/relaxation/ml_relaxation.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Copyright (c) Facebook, Inc. and its affiliates.
 3 | 
 4 | This source code is licensed under the MIT license found in the
 5 | LICENSE file in the root directory of this source tree.
 6 | """
 7 | 
 8 | from pathlib import Path
 9 | 
10 | import torch
11 | 
12 | from ocpmodels.common.registry import registry
13 | 
14 | from .optimizers.lbfgs_torch import LBFGS, TorchCalc
15 | 
16 | 
17 | def ml_relax(
18 |     batch,
19 |     model,
20 |     steps,
21 |     fmax,
22 |     relax_opt,
23 |     device="cuda:0",
24 |     transform=None,
25 |     early_stop_batch=False,
26 | ):
27 |     """
28 |     Runs ML-based relaxations.
29 |     Args:
30 |         batch: object
31 |         model: object
32 |         steps: int
33 |             Max number of steps in the structure relaxation.
34 |         fmax: float
35 |             Structure relaxation terminates when the max force
36 |             of the system is no bigger than fmax.
37 |         relax_opt: str
38 |             Optimizer and corresponding parameters to be used for structure relaxations.
39 |     """
40 |     batch = batch[0]
41 |     ids = batch.sid
42 |     calc = TorchCalc(model, transform)
43 | 
44 |     # Run ML-based relaxation
45 |     traj_dir = relax_opt.get("traj_dir", None)
46 |     optimizer = LBFGS(
47 |         batch,
48 |         calc,
49 |         maxstep=relax_opt.get("maxstep", 0.04),
50 |         memory=relax_opt["memory"],
51 |         damping=relax_opt.get("damping", 1.0),
52 |         alpha=relax_opt.get("alpha", 70.0),
53 |         device=device,
54 |         traj_dir=Path(traj_dir) if traj_dir is not None else None,
55 |         traj_names=ids,
56 |         early_stop_batch=early_stop_batch,
57 |     )
58 |     relaxed_batch = optimizer.run(fmax=fmax, steps=steps)
59 | 
60 |     return relaxed_batch
61 | 


--------------------------------------------------------------------------------
/open_catalyst/ocpmodels/common/relaxation/optimizers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/hpc/2c627d457004eff77a014205b3151ed48a6fa149/open_catalyst/ocpmodels/common/relaxation/optimizers/__init__.py


--------------------------------------------------------------------------------
/open_catalyst/ocpmodels/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | __all__ = [
 7 |     "SinglePointLmdbDataset",
 8 |     "TrajectoryLmdbDataset",
 9 |     "data_list_collater",
10 | ]
11 | 
12 | from .single_point_lmdb import SinglePointLmdbDataset
13 | from .trajectory_lmdb import TrajectoryLmdbDataset, data_list_collater
14 | 


--------------------------------------------------------------------------------
/open_catalyst/ocpmodels/datasets/embeddings/__init__.py:
--------------------------------------------------------------------------------
 1 | __all__ = [
 2 |     "ATOMIC_RADII",
 3 |     "KHOT_EMBEDDINGS",
 4 |     "CONTINUOUS_EMBEDDINGS",
 5 | ]
 6 | 
 7 | from .atomic_radii import ATOMIC_RADII
 8 | from .continuous_embeddings import CONTINUOUS_EMBEDDINGS
 9 | from .khot_embeddings import KHOT_EMBEDDINGS
10 | 


--------------------------------------------------------------------------------
/open_catalyst/ocpmodels/datasets/embeddings/atomic_radii.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Atomic radii in picometers
  3 | 
  4 | NaN stored for unavailable parameters.
  5 | """
  6 | ATOMIC_RADII = {
  7 |     0: float("NaN"),
  8 |     1: 25.0,
  9 |     2: 120.0,
 10 |     3: 145.0,
 11 |     4: 105.0,
 12 |     5: 85.0,
 13 |     6: 70.0,
 14 |     7: 65.0,
 15 |     8: 60.0,
 16 |     9: 50.0,
 17 |     10: 160.0,
 18 |     11: 180.0,
 19 |     12: 150.0,
 20 |     13: 125.0,
 21 |     14: 110.0,
 22 |     15: 100.0,
 23 |     16: 100.0,
 24 |     17: 100.0,
 25 |     18: 71.0,
 26 |     19: 220.0,
 27 |     20: 180.0,
 28 |     21: 160.0,
 29 |     22: 140.0,
 30 |     23: 135.0,
 31 |     24: 140.0,
 32 |     25: 140.0,
 33 |     26: 140.0,
 34 |     27: 135.0,
 35 |     28: 135.0,
 36 |     29: 135.0,
 37 |     30: 135.0,
 38 |     31: 130.0,
 39 |     32: 125.0,
 40 |     33: 115.0,
 41 |     34: 115.0,
 42 |     35: 115.0,
 43 |     36: float("NaN"),
 44 |     37: 235.0,
 45 |     38: 200.0,
 46 |     39: 180.0,
 47 |     40: 155.0,
 48 |     41: 145.0,
 49 |     42: 145.0,
 50 |     43: 135.0,
 51 |     44: 130.0,
 52 |     45: 135.0,
 53 |     46: 140.0,
 54 |     47: 160.0,
 55 |     48: 155.0,
 56 |     49: 155.0,
 57 |     50: 145.0,
 58 |     51: 145.0,
 59 |     52: 140.0,
 60 |     53: 140.0,
 61 |     54: float("NaN"),
 62 |     55: 260.0,
 63 |     56: 215.0,
 64 |     57: 195.0,
 65 |     58: 185.0,
 66 |     59: 185.0,
 67 |     60: 185.0,
 68 |     61: 185.0,
 69 |     62: 185.0,
 70 |     63: 185.0,
 71 |     64: 180.0,
 72 |     65: 175.0,
 73 |     66: 175.0,
 74 |     67: 175.0,
 75 |     68: 175.0,
 76 |     69: 175.0,
 77 |     70: 175.0,
 78 |     71: 175.0,
 79 |     72: 155.0,
 80 |     73: 145.0,
 81 |     74: 135.0,
 82 |     75: 135.0,
 83 |     76: 130.0,
 84 |     77: 135.0,
 85 |     78: 135.0,
 86 |     79: 135.0,
 87 |     80: 150.0,
 88 |     81: 190.0,
 89 |     82: 180.0,
 90 |     83: 160.0,
 91 |     84: 190.0,
 92 |     85: float("NaN"),
 93 |     86: float("NaN"),
 94 |     87: float("NaN"),
 95 |     88: 215.0,
 96 |     89: 195.0,
 97 |     90: 180.0,
 98 |     91: 180.0,
 99 |     92: 175.0,
100 |     93: 175.0,
101 |     94: 175.0,
102 |     95: 175.0,
103 |     96: float("NaN"),
104 |     97: float("NaN"),
105 |     98: float("NaN"),
106 |     99: float("NaN"),
107 |     100: float("NaN"),
108 | }
109 | 


--------------------------------------------------------------------------------
/open_catalyst/ocpmodels/datasets/single_point_lmdb.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Copyright (c) Facebook, Inc. and its affiliates.
 3 | 
 4 | This source code is licensed under the MIT license found in the
 5 | LICENSE file in the root directory of this source tree.
 6 | """
 7 | 
 8 | import os
 9 | import pickle
10 | 
11 | import lmdb
12 | from torch.utils.data import Dataset
13 | 
14 | from ocpmodels.common.registry import registry
15 | 
16 | 
17 | @registry.register_dataset("single_point_lmdb")
18 | class SinglePointLmdbDataset(Dataset):
19 |     r"""Dataset class to load from LMDB files containing single point computations.
20 |     Useful for Initial Structure to Relaxed Energy (IS2RE) task.
21 | 
22 |     Args:
23 |         config (dict): Dataset configuration
24 |         transform (callable, optional): Data transform function.
25 |             (default: :obj:`None`)
26 |     """
27 | 
28 |     def __init__(self, config, transform=None):
29 |         super(SinglePointLmdbDataset, self).__init__()
30 | 
31 |         self.config = config
32 | 
33 |         self.db_path = self.config["src"]
34 |         assert os.path.isfile(self.db_path), "{} not found".format(
35 |             self.db_path
36 |         )
37 | 
38 |         self.env = self.connect_db(self.db_path)
39 | 
40 |         self._keys = [
41 |             f"{j}".encode("ascii") for j in range(self.env.stat()["entries"])
42 |         ]
43 |         self.transform = transform
44 | 
45 |     def __len__(self):
46 |         return len(self._keys)
47 | 
48 |     def __getitem__(self, idx):
49 |         # Return features.
50 |         datapoint_pickled = self.env.begin().get(self._keys[idx])
51 |         data_object = pickle.loads(datapoint_pickled)
52 |         data_object = (
53 |             data_object
54 |             if self.transform is None
55 |             else self.transform(data_object)
56 |         )
57 | 
58 |         return data_object
59 | 
60 |     def connect_db(self, lmdb_path=None):
61 |         env = lmdb.open(
62 |             lmdb_path,
63 |             subdir=False,
64 |             readonly=True,
65 |             lock=False,
66 |             readahead=False,
67 |             meminit=False,
68 |             max_readers=1,
69 |         )
70 |         return env
71 | 
72 |     def close_db(self):
73 |         self.env.close()
74 | 


--------------------------------------------------------------------------------
/open_catalyst/ocpmodels/models/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | __all__ = [
 7 |     "BaseModel",
 8 |     "CGCNN",
 9 |     "DimeNet",
10 |     "DimeNetPlusPlus",
11 |     "SchNet",
12 |     "ForceNet",
13 | ]
14 | 
15 | from .base import BaseModel
16 | from .cgcnn import CGCNN
17 | from .dimenet import DimeNetWrap as DimeNet
18 | from .dimenet_plus_plus import DimeNetPlusPlusWrap as DimeNetPlusPlus
19 | from .forcenet import ForceNet
20 | from .schnet import SchNetWrap as SchNet
21 | 
22 | DimeNet.__module__ = __name__
23 | DimeNet.__name__ = "DimeNet"
24 | 
25 | DimeNetPlusPlus.__module__ = __name__
26 | DimeNetPlusPlus.__name__ = "DimeNetPlusPlus"
27 | 
28 | SchNet.__module__ = __name__
29 | SchNet.__name__ = "SchNet"
30 | 
31 | ForceNet.__module__ = __name__
32 | ForceNet.__name__ = "ForceNet"
33 | 


--------------------------------------------------------------------------------
/open_catalyst/ocpmodels/models/base.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Copyright (c) Facebook, Inc. and its affiliates.
 3 | 
 4 | This source code is licensed under the MIT license found in the
 5 | LICENSE file in the root directory of this source tree.
 6 | """
 7 | 
 8 | import torch.nn as nn
 9 | 
10 | 
11 | class BaseModel(nn.Module):
12 |     def __init__(self, num_atoms=None, bond_feat_dim=None, num_targets=None):
13 |         super(BaseModel, self).__init__()
14 |         self.num_atoms = num_atoms
15 |         self.bond_feat_dim = bond_feat_dim
16 |         self.num_targets = num_targets
17 | 
18 |     def forward(self, data):
19 |         raise NotImplementedError
20 | 
21 |     @property
22 |     def num_params(self):
23 |         return sum(p.numel() for p in self.parameters())
24 | 


--------------------------------------------------------------------------------
/open_catalyst/ocpmodels/models/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | #
3 | # This source code is licensed under the MIT license found in the
4 | # LICENSE file in the root directory of this source tree.
5 | 


--------------------------------------------------------------------------------
/open_catalyst/ocpmodels/models/utils/activations.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Copyright (c) Facebook, Inc. and its affiliates.
 3 | 
 4 | This source code is licensed under the MIT license found in the
 5 | LICENSE file in the root directory of this source tree.
 6 | """
 7 | 
 8 | import torch
 9 | import torch.nn.functional as F
10 | 
11 | 
12 | class Act(torch.nn.Module):
13 |     def __init__(self, act, slope=0.05):
14 |         super(Act, self).__init__()
15 |         self.act = act
16 |         self.slope = slope
17 |         self.shift = torch.log(torch.tensor(2.0)).item()
18 | 
19 |     def forward(self, input):
20 |         if self.act == "relu":
21 |             return F.relu(input)
22 |         elif self.act == "leaky_relu":
23 |             return F.leaky_relu(input)
24 |         elif self.act == "sp":
25 |             return F.softplus(input, beta=1)
26 |         elif self.act == "leaky_sp":
27 |             return F.softplus(input, beta=1) - self.slope * F.relu(-input)
28 |         elif self.act == "elu":
29 |             return F.elu(input, alpha=1)
30 |         elif self.act == "leaky_elu":
31 |             return F.elu(input, alpha=1) - self.slope * F.relu(-input)
32 |         elif self.act == "ssp":
33 |             return F.softplus(input, beta=1) - self.shift
34 |         elif self.act == "leaky_ssp":
35 |             return (
36 |                 F.softplus(input, beta=1)
37 |                 - self.slope * F.relu(-input)
38 |                 - self.shift
39 |             )
40 |         elif self.act == "tanh":
41 |             return torch.tanh(input)
42 |         elif self.act == "leaky_tanh":
43 |             return torch.tanh(input) + self.slope * input
44 |         elif self.act == "swish":
45 |             return torch.sigmoid(input) * input
46 |         else:
47 |             raise RuntimeError(f"Undefined activation called {self.act}")
48 | 


--------------------------------------------------------------------------------
/open_catalyst/ocpmodels/modules/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Copyright (c) Facebook, Inc. and its affiliates.
3 | 
4 | This source code is licensed under the MIT license found in the
5 | LICENSE file in the root directory of this source tree.
6 | """
7 | 


--------------------------------------------------------------------------------
/open_catalyst/ocpmodels/modules/loss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | 
 4 | from ocpmodels.common import distutils
 5 | 
 6 | 
 7 | class L2MAELoss(nn.Module):
 8 |     def __init__(self, reduction="mean"):
 9 |         super().__init__()
10 |         self.reduction = reduction
11 |         assert reduction in ["mean", "sum"]
12 | 
13 |     def forward(self, input: torch.Tensor, target: torch.Tensor):
14 |         dists = torch.norm(input - target, p=2, dim=-1)
15 |         if self.reduction == "mean":
16 |             return torch.mean(dists)
17 |         elif self.reduction == "sum":
18 |             return torch.sum(dists)
19 | 
20 | 
21 | class DDPLoss(nn.Module):
22 |     def __init__(self, loss_fn, reduction="mean"):
23 |         super().__init__()
24 |         self.loss_fn = loss_fn
25 |         self.loss_fn.reduction = "sum"
26 |         self.reduction = reduction
27 |         assert reduction in ["mean", "sum"]
28 | 
29 |     def forward(self, input: torch.Tensor, target: torch.Tensor):
30 |         loss = self.loss_fn(input, target)
31 |         if self.reduction == "mean":
32 |             num_samples = input.shape[0]
33 |             num_samples = distutils.all_reduce(
34 |                 num_samples, device=input.device
35 |             )
36 |             # Multiply by world size since gradients are averaged
37 |             # across DDP replicas
38 |             return loss * distutils.get_world_size() / num_samples
39 |         else:
40 |             return loss
41 | 


--------------------------------------------------------------------------------
/open_catalyst/ocpmodels/modules/normalizer.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Copyright (c) Facebook, Inc. and its affiliates.
 3 | 
 4 | This source code is licensed under the MIT license found in the
 5 | LICENSE file in the root directory of this source tree.
 6 | """
 7 | 
 8 | import torch
 9 | 
10 | 
11 | class Normalizer(object):
12 |     """Normalize a Tensor and restore it later."""
13 | 
14 |     def __init__(self, tensor=None, mean=None, std=None, device=None):
15 |         """tensor is taken as a sample to calculate the mean and std"""
16 |         if tensor is None and mean is None:
17 |             return
18 | 
19 |         if device is None:
20 |             device = "cpu"
21 | 
22 |         if tensor is not None:
23 |             self.mean = torch.mean(tensor, dim=0).to(device)
24 |             self.std = torch.std(tensor, dim=0).to(device)
25 |             return
26 | 
27 |         if mean is not None and std is not None:
28 |             self.mean = torch.tensor(mean).to(device)
29 |             self.std = torch.tensor(std).to(device)
30 | 
31 |     def to(self, device):
32 |         self.mean = self.mean.to(device)
33 |         self.std = self.std.to(device)
34 | 
35 |     def norm(self, tensor):
36 |         return (tensor - self.mean) / self.std
37 | 
38 |     def denorm(self, normed_tensor):
39 |         return normed_tensor * self.std + self.mean
40 | 
41 |     def state_dict(self):
42 |         return {"mean": self.mean, "std": self.std}
43 | 
44 |     def load_state_dict(self, state_dict):
45 |         self.mean = state_dict["mean"].to(self.mean.device)
46 |         self.std = state_dict["std"].to(self.mean.device)
47 | 


--------------------------------------------------------------------------------
/open_catalyst/ocpmodels/modules/scheduler.py:
--------------------------------------------------------------------------------
 1 | import inspect
 2 | 
 3 | import torch.optim.lr_scheduler as lr_scheduler
 4 | 
 5 | from ocpmodels.common.utils import warmup_lr_lambda
 6 | 
 7 | 
 8 | class LRScheduler:
 9 |     """
10 |     Learning rate scheduler class for torch.optim learning rate schedulers
11 | 
12 |     Notes:
13 |         If no learning rate scheduler is specified in the config the default
14 |         scheduler is warmup_lr_lambda (ocpmodels.common.utils) not no scheduler,
15 |         this is for backward-compatibility reasons. To run without a lr scheduler
16 |         specify scheduler: "Null" in the optim section of the config.
17 | 
18 |     Args:
19 |         config (dict): Optim dict from the input config
20 |         optimizer (obj): torch optim object
21 |     """
22 | 
23 |     def __init__(self, optimizer, config):
24 |         self.optimizer = optimizer
25 |         self.config = config.copy()
26 |         if "scheduler" in self.config:
27 |             self.scheduler_type = self.config["scheduler"]
28 |         else:
29 |             self.scheduler_type = "LambdaLR"
30 |             scheduler_lambda_fn = lambda x: warmup_lr_lambda(x, self.config)
31 |             self.config["lr_lambda"] = scheduler_lambda_fn
32 | 
33 |         if self.scheduler_type != "Null":
34 |             self.scheduler = getattr(lr_scheduler, self.scheduler_type)
35 |             scheduler_args = self.filter_kwargs(config)
36 |             self.scheduler = self.scheduler(optimizer, **scheduler_args)
37 | 
38 |     def step(self, metrics=None, epoch=None):
39 |         if self.scheduler_type == "Null":
40 |             return
41 |         if self.scheduler_type == "ReduceLROnPlateau":
42 |             if metrics is None:
43 |                 raise Exception(
44 |                     "Validation set required for ReduceLROnPlateau."
45 |                 )
46 |             self.scheduler.step(metrics)
47 |         else:
48 |             self.scheduler.step()
49 | 
50 |     def filter_kwargs(self, config):
51 |         # adapted from https://stackoverflow.com/questions/26515595/
52 |         sig = inspect.signature(self.scheduler)
53 |         filter_keys = [
54 |             param.name
55 |             for param in sig.parameters.values()
56 |             if param.kind == param.POSITIONAL_OR_KEYWORD
57 |         ]
58 |         filter_keys.remove("optimizer")
59 |         scheduler_args = {
60 |             arg: self.config[arg] for arg in self.config if arg in filter_keys
61 |         }
62 |         return scheduler_args
63 | 
64 |     def get_lr(self):
65 |         for group in self.optimizer.param_groups:
66 |             return group["lr"]
67 | 


--------------------------------------------------------------------------------
/open_catalyst/ocpmodels/preprocessing/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Copyright (c) Facebook, Inc. and its affiliates.
3 | 
4 | This source code is licensed under the MIT license found in the
5 | LICENSE file in the root directory of this source tree.
6 | """
7 | 
8 | from .atoms_to_graphs import AtomsToGraphs
9 | 


--------------------------------------------------------------------------------
/open_catalyst/ocpmodels/tasks/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates.
2 | #
3 | # This source code is licensed under the MIT license found in the
4 | # LICENSE file in the root directory of this source tree.
5 | 
6 | __all__ = ["TrainTask", "PredictTask", "ValidateTask", "RelxationTask"]
7 | 
8 | from .task import PredictTask, RelxationTask, TrainTask, ValidateTask
9 | 


--------------------------------------------------------------------------------
/open_catalyst/ocpmodels/tasks/task.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Copyright (c) Facebook, Inc. and its affiliates.
 3 | 
 4 | This source code is licensed under the MIT license found in the
 5 | LICENSE file in the root directory of this source tree.
 6 | """
 7 | 
 8 | import os
 9 | 
10 | from ocpmodels.common.registry import registry
11 | from ocpmodels.trainers.forces_trainer import ForcesTrainer
12 | 
13 | 
14 | class BaseTask:
15 |     def __init__(self, config):
16 |         self.config = config
17 | 
18 |     def setup(self, trainer):
19 |         self.trainer = trainer
20 |         if self.config["checkpoint"] is not None:
21 |             self.trainer.load_checkpoint(self.config["checkpoint"])
22 | 
23 |         # save checkpoint path to runner state for slurm resubmissions
24 |         self.chkpt_path = os.path.join(
25 |             self.trainer.config["cmd"]["checkpoint_dir"], "checkpoint.pt"
26 |         )
27 | 
28 |     def run(self):
29 |         raise NotImplementedError
30 | 
31 | 
32 | @registry.register_task("train")
33 | class TrainTask(BaseTask):
34 |     def run(self):
35 |         self.trainer.train(
36 |             disable_eval_tqdm=self.config.get("hide_eval_progressbar", False)
37 |         )
38 | 
39 | 
40 | @registry.register_task("predict")
41 | class PredictTask(BaseTask):
42 |     def run(self):
43 |         assert (
44 |             self.trainer.test_loader is not None
45 |         ), "Test dataset is required for making predictions"
46 |         assert self.config["checkpoint"]
47 |         results_file = "predictions"
48 |         self.trainer.predict(
49 |             self.trainer.test_loader,
50 |             results_file=results_file,
51 |             disable_tqdm=self.config.get("hide_eval_progressbar", False),
52 |         )
53 | 
54 | 
55 | @registry.register_task("validate")
56 | class ValidateTask(BaseTask):
57 |     def run(self):
58 |         # Note that the results won't be precise on multi GPUs due to padding of extra images (although the difference should be minor)
59 |         assert (
60 |             self.trainer.val_loader is not None
61 |         ), "Val dataset is required for making predictions"
62 |         assert self.config["checkpoint"]
63 |         self.trainer.validate(
64 |             split="val",
65 |             disable_tqdm=self.config.get("hide_eval_progressbar", False),
66 |         )
67 | 
68 | 
69 | @registry.register_task("run-relaxations")
70 | class RelxationTask(BaseTask):
71 |     def run(self):
72 |         assert isinstance(
73 |             self.trainer, ForcesTrainer
74 |         ), "Relaxations are only possible for ForcesTrainer"
75 |         assert (
76 |             self.trainer.relax_dataset is not None
77 |         ), "Relax dataset is required for making predictions"
78 |         assert self.config["checkpoint"]
79 |         self.trainer.run_relaxations()
80 | 


--------------------------------------------------------------------------------
/open_catalyst/ocpmodels/trainers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | #
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | __all__ = [
 7 |     "BaseTrainer",
 8 |     "ForcesTrainer",
 9 |     "EnergyTrainer",
10 | ]
11 | 
12 | from .base_trainer import BaseTrainer
13 | from .energy_trainer import EnergyTrainer
14 | from .forces_trainer import ForcesTrainer
15 | 


--------------------------------------------------------------------------------
/open_catalyst/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.black]
 2 | line-length = 79
 3 | include = '\.pyi?$'
 4 | exclude = '''
 5 | /(
 6 |     \.git
 7 |   | \.hg
 8 |   | \.mypy_cache
 9 |   | \.tox
10 |   | \.venv
11 |   | _build
12 |   | buck-out
13 |   | build
14 |   | dist
15 | )/
16 | '''
17 | 


--------------------------------------------------------------------------------
/open_catalyst/scripts/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Copyright (c) Facebook, Inc. and its affiliates.
3 | 
4 | This source code is licensed under the MIT license found in the
5 | LICENSE file in the root directory of this source tree.
6 | """
7 | 


--------------------------------------------------------------------------------
/open_catalyst/scripts/hpo/README.md:
--------------------------------------------------------------------------------
 1 | # Running Hyperparameter Optimization with Ray Tune
 2 | 
 3 | # Installation
 4 | `pip install ray ray[tune]`
 5 | 
 6 | ## Model config considerations
 7 | 
 8 | The current Ray Tune implementation uses the standard OCP config. However, there are a number of config settings that require additional consideration.
 9 | 
10 | ```
11 | logger: None
12 | is_hpo: True
13 | 
14 | optim:
15 |   …
16 |   eval_every: (int) number of steps
17 |   checkpoint_every: (int: optional) number of steps
18 | ```
19 | The first two are easily set. The logger is set to None because Ray Tune internally handles the logging.
20 | 
21 | The `eval_every` setting is case specific and will likely require some experimentation. The `eval_every` flag sets how often the validation set is run in number of steps. Depending on the OCP model and dataset of interest, training for a single epoch can take a substantial amount of time. However, to take full advantage of HPO methods that minimize compute by terminating trials that are not promising, such as successive halving, communication of train and val metrics need to happen on shorter timescales. Paraphrasing the Ray Tune docs, `eval_every` should be set large enough to avoid overheads but short enough to report progress periodically — minutes timescale recommended.
22 | 
23 | The `eval_every` setting is only available for the force trainer so when using the energy trainer validation will be run and reporting to Ray Tune will occur on a per epoch basis.
24 | 
25 | The `checkpoint_every` setting determines how frequently, in steps, Ray Tune will write a checkpoint. Checkpointing can create a lot of overhead for certain HPO methods so do not do it too frequently. The default behavior is no checkpointing.
26 | 
27 | ## Usage with Slurm
28 | 
29 | 1. Make necessary changes to `run_tune.py` and `slurm/submit-ray-cluster.sbatch`
30 | 
31 |     Example `run_tune.py` updates
32 |     - choose search and scheduler algorithms and set associated parameters (see [Ray Tune docs](https://docs.ray.io/en/master/tune/index.html) for details)
33 |     - set the resources to use per individual trial
34 | 
35 |     Example `slurm/submit-ray-cluster.sbatch` updates
36 |     - load modules or set conda env
37 |     - change the total run time and resources to use
38 | 
39 | 2. submit using `sbatch slurm/submit-ray-cluster.sbatch`
40 | 
41 | Slurm scripts taken from https://github.com/NERSC/slurm-ray-cluster
42 | 
43 | For usage with other cluster managers or cloud resources please refer to the
44 | [Distributed Ray Docs](https://docs.ray.io/en/master/cluster/index.html#)
45 | 
46 | ## Examples
47 | 
48 | 1. Asynchronous Successive Halving — `ocp/scripts/hpo/run_tune.py`
49 | 2. Population Based Training — `ocp/scripts/hpo/run_tune_pbt.py`
50 | 
51 | ## Testing/Debugging Ray Tune
52 | 
53 | - In `run_tune.py` set `ray.init(local_mode=True)`
54 | - run `python path_to/run_tune.py --mode train --config-yml path_to/config --run_dir path_to_run_dir`
55 | 


--------------------------------------------------------------------------------
/open_catalyst/scripts/hpo/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Copyright (c) Facebook, Inc. and its affiliates.
3 | 
4 | This source code is licensed under the MIT license found in the
5 | LICENSE file in the root directory of this source tree.
6 | """
7 | 


--------------------------------------------------------------------------------
/open_catalyst/scripts/hpo/slurm/start-head.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | export LC_ALL=C.UTF-8
 4 | export LANG=C.UTF-8
 5 | 
 6 | echo "starting ray head node"
 7 | # Launch the head node
 8 | ray start --head --node-ip-address=$1 --port=6379 --redis-password=$2
 9 | sleep infinity
10 | 


--------------------------------------------------------------------------------
/open_catalyst/scripts/hpo/slurm/start-worker.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | export LC_ALL=C.UTF-8
4 | export LANG=C.UTF-8
5 | 
6 | echo "starting ray worker node"
7 | ray start --address $1 --redis-password=$2
8 | sleep infinity
9 | 


--------------------------------------------------------------------------------
/open_catalyst/scripts/hpo/slurm/submit-ray-cluster.sbatch:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #SBATCH -C gpu
 4 | #SBATCH --time=00:10:00
 5 | 
 6 | ### This script works for any number of nodes, Ray will find and manage all resources
 7 | #SBATCH --nodes=1
 8 | 
 9 | ### Give all resources to a single Ray task, ray can manage the resources internally
10 | #SBATCH --ntasks-per-node=1
11 | #SBATCH --gpus-per-task=8
12 | #SBATCH --cpus-per-task=80
13 | 
14 | 
15 | # Load modules or your own conda environment here
16 | # e.g. conda activate ocp-models
17 | 
18 | ################# DON NOT CHANGE THINGS HERE UNLESS YOU KNOW WHAT YOU ARE DOING ###############
19 | # This script is a modification to the implementation suggest by gregSchwartz18 here:
20 | # https://github.com/ray-project/ray/issues/826#issuecomment-522116599
21 | redis_password=$(uuidgen)
22 | export redis_password
23 | 
24 | nodes=$(scontrol show hostnames $SLURM_JOB_NODELIST) # Getting the node names
25 | nodes_array=( $nodes )
26 | 
27 | node_1=${nodes_array[0]}
28 | ip=$(srun --nodes=1 --ntasks=1 -w $node_1 hostname --ip-address) # making redis-address
29 | port=6379
30 | ip_head=$ip:$port
31 | export ip_head
32 | echo "IP Head: $ip_head"
33 | 
34 | echo "STARTING HEAD at $node_1"
35 | srun --nodes=1 --ntasks=1 -w $node_1 start-head.sh $ip $redis_password &
36 | sleep 45
37 | 
38 | worker_num=$(($SLURM_JOB_NUM_NODES - 1)) #number of nodes other than the head node
39 | for ((  i=1; i<=$worker_num; i++ ))
40 | do
41 |   node_i=${nodes_array[$i]}
42 |   echo "STARTING WORKER $i at $node_i"
43 |   srun --nodes=1 --ntasks=1 -w $node_i start-worker.sh $ip_head $redis_password &
44 |   sleep 5
45 | done
46 | ##############################################################################################
47 | 
48 | #### call your code below
49 | # e.g. python path_to/run_tune.py --mode train --config-yml path_to/configs/s2ef/200k/forcenet/fn_forceonly.yml --run_dir path_to_run_dir
50 | exit
51 | 


--------------------------------------------------------------------------------
/open_catalyst/scripts/run_training.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # This script will run the actual training command with provided
 4 | # command line options. It is run by every rank and sets the per-rank
 5 | # environment variables needed for pytorch distributed initialization.
 6 | 
 7 | args=$@
 8 | id=${SLURM_JOB_NAME}-n${SLURM_NTASKS}-${SLURM_JOB_ID}
 9 | 
10 | export WORLD_SIZE=$SLURM_NTASKS
11 | export RANK=$SLURM_PROCID
12 | export LOCAL_RANK=$SLURM_LOCALID
13 | 
14 | python main.py --mode train \
15 |     --distributed \
16 |     --local_rank $LOCAL_RANK \
17 |     --identifier $id $args
18 | 


--------------------------------------------------------------------------------
/open_catalyst/scripts/train_cgpu.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #SBATCH -C gpu
 3 | #SBATCH -J ocp-cgpu
 4 | #SBATCH --ntasks-per-node=2
 5 | #SBATCH --gpus-per-task=1
 6 | #SBATCH --cpus-per-task=10
 7 | #SBATCH --time 4:00:00
 8 | #SBATCH -o logs/slurm-%x-%j.out
 9 | 
10 | args=$@
11 | 
12 | # Default settings
13 | : "${OCP_CONFIG:=configs/mlperf_hpc.yml}"
14 | 
15 | # Setup software
16 | conda activate ocp-dev
17 | module load cuda/11.1.1
18 | 
19 | # Distributed config
20 | export MASTER_ADDR=$(hostname)
21 | export MASTER_PORT=29504
22 | export NCCL_DEBUG=WARN
23 | export NCCL_SOCKET_IFNAME=eth
24 | export NCCL_IB_HCA=mlx5_0:1,mlx5_2:1,mlx5_4:1,mlx5_6:1
25 | 
26 | set -x
27 | srun -u -l scripts/run_training.sh --config-yml $OCP_CONFIG $args
28 | 


--------------------------------------------------------------------------------
/open_catalyst/scripts/train_cgpu_shifter.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #SBATCH -C gpu
 3 | #SBATCH -J ocp-cgpu
 4 | #SBATCH --image=sfarrell/mlperf-ocp:latest
 5 | #SBATCH --ntasks-per-node=2
 6 | #SBATCH --gpus-per-task=1
 7 | #SBATCH --cpus-per-task=10
 8 | #SBATCH --time 4:00:00
 9 | #SBATCH -o logs/slurm-%x-%j.out
10 | 
11 | args=$@
12 | 
13 | # Default settings
14 | : "${OCP_CONFIG:=configs/mlperf_hpc.yml}"
15 | 
16 | # Distributed config
17 | export MASTER_ADDR=$(hostname)
18 | export MASTER_PORT=29504
19 | export NCCL_DEBUG=WARN
20 | export NCCL_SOCKET_IFNAME=eth
21 | export NCCL_IB_HCA=mlx5_0:1,mlx5_2:1,mlx5_4:1,mlx5_6:1
22 | 
23 | set -x
24 | srun -l -u shifter scripts/run_training.sh --config-yml $OCP_CONFIG $args
25 | 


--------------------------------------------------------------------------------
/open_catalyst/scripts/train_pm.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #SBATCH -C gpu
 3 | #SBATCH -J ocp-pm
 4 | #SBATCH --ntasks-per-node=4
 5 | #SBATCH --gpus-per-task=1
 6 | #SBATCH --cpus-per-task=32
 7 | #SBATCH --time 4:00:00
 8 | #SBATCH -o logs/slurm-%x-%j.out
 9 | 
10 | args=$@
11 | 
12 | # Default settings
13 | : "${OCP_CONFIG:=configs/mlperf_hpc_pm.yml}"
14 | 
15 | # Setup software
16 | module purge
17 | source $CONDA_INIT_SCRIPT
18 | conda activate ocp-dev
19 | module load cuda/11.1.1
20 | 
21 | # Distributed config
22 | export MASTER_ADDR=$(hostname)
23 | export MASTER_PORT=29504
24 | export NCCL_IB_DISABLE=1
25 | export NCCL_DEBUG=WARN
26 | export NCCL_SOCKET_IFNAME=hsn
27 | 
28 | set -x
29 | srun -l -u scripts/run_training.sh --config-yml $OCP_CONFIG $args
30 | 


--------------------------------------------------------------------------------
/open_catalyst/scripts/train_pm_shifter.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #SBATCH -C gpu
 3 | #SBATCH -J ocp-pm
 4 | #SBATCH -A nstaff_g
 5 | #SBATCH -q early_science
 6 | #SBATCH --image=sfarrell/mlperf-ocp:latest
 7 | #SBATCH --ntasks-per-node=4
 8 | #SBATCH --gpus-per-task=1
 9 | #SBATCH --cpus-per-task=32
10 | #SBATCH --gpu-bind=none
11 | #SBATCH --time 4:00:00
12 | #SBATCH -o logs/slurm-%x-%j.out
13 | 
14 | args=$@
15 | 
16 | # Default settings
17 | : "${OCP_CONFIG:=configs/mlperf_hpc_pm.yml}"
18 | 
19 | # Distributed config
20 | export MASTER_ADDR=$(hostname)
21 | export MASTER_PORT=29504
22 | export NCCL_DEBUG=WARN
23 | export NCCL_SOCKET_IFNAME=hsn
24 | 
25 | set -x
26 | srun -l -u shifter scripts/run_training.sh --config-yml $OCP_CONFIG $args
27 | 


--------------------------------------------------------------------------------
/open_catalyst/scripts/uncompress.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Uncompresses downloaded S2EF datasets to be used by the LMDB preprocessing
 3 | script - preprocess_ef.py
 4 | """
 5 | 
 6 | import argparse
 7 | import glob
 8 | import lzma
 9 | import multiprocessing as mp
10 | import os
11 | 
12 | from tqdm import tqdm
13 | 
14 | 
15 | def read_lzma(inpfile, outfile):
16 |     with open(inpfile, "rb") as f:
17 |         contents = lzma.decompress(f.read())
18 |         with open(outfile, "wb") as op:
19 |             op.write(contents)
20 | 
21 | 
22 | def decompress_list_of_files(ip_op_pair):
23 |     ip_file, op_file = ip_op_pair
24 |     read_lzma(ip_file, op_file)
25 | 
26 | 
27 | def get_parser():
28 |     parser = argparse.ArgumentParser()
29 |     parser.add_argument(
30 |         "--ipdir", type=str, help="Path to compressed dataset directory"
31 |     )
32 |     parser.add_argument(
33 |         "--opdir", type=str, help="Directory path to uncompress files to"
34 |     )
35 |     parser.add_argument(
36 |         "--num-workers", type=int, help="# of processes to parallelize across"
37 |     )
38 |     return parser
39 | 
40 | 
41 | def main(args):
42 |     os.makedirs(args.opdir, exist_ok=True)
43 | 
44 |     filelist = glob.glob(os.path.join(args.ipdir, "*txt.xz")) + glob.glob(
45 |         os.path.join(args.ipdir, "*extxyz.xz")
46 |     )
47 |     ip_op_pairs = []
48 |     for i in filelist:
49 |         fname_base = os.path.basename(i)
50 |         ip_op_pairs.append((i, os.path.join(args.opdir, fname_base[:-3])))
51 | 
52 |     pool = mp.Pool(args.num_workers)
53 |     list(
54 |         tqdm(
55 |             pool.imap(decompress_list_of_files, ip_op_pairs),
56 |             total=len(ip_op_pairs),
57 |             desc=f"Uncompressing {args.ipdir}",
58 |         )
59 |     )
60 | 
61 | 
62 | if __name__ == "__main__":
63 |     parser = get_parser()
64 |     args = parser.parse_args()
65 |     main(args)
66 | 


--------------------------------------------------------------------------------
/open_catalyst/setup.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Copyright (c) Facebook, Inc. and its affiliates.
 3 | 
 4 | This source code is licensed under the MIT license found in the
 5 | LICENSE file in the root directory of this source tree.
 6 | """
 7 | 
 8 | from setuptools import find_packages, setup
 9 | 
10 | setup(
11 |     name="ocp-models",
12 |     version="0.0.3",
13 |     description="Machine learning models for use in catalysis as part of the Open Catalyst Project",
14 |     url="https://github.com/Open-Catalyst-Project/ocp",
15 |     packages=find_packages(),
16 |     include_package_data=True,
17 | )
18 | 


--------------------------------------------------------------------------------
/open_catalyst/submit.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | conda activate ocp-models
 4 | export NCCL_SOCKET_IFNAME=eth
 5 | id=cgpu-005-n64
 6 | 
 7 | set -x
 8 | python main.py --config-yml configs/mlperf_hpc.yml \
 9 |     --mode train --distributed --submit --amp \
10 |     --identifier $id \
11 |     --num-gpus 8 \
12 |     --num-workers 8 \
13 |     --num-nodes 8 \
14 |     --slurm-timeout 8
15 | 


--------------------------------------------------------------------------------
/open_catalyst/tests/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Copyright (c) Facebook, Inc. and its affiliates.
3 | 
4 | This source code is licensed under the MIT license found in the
5 | LICENSE file in the root directory of this source tree.
6 | """
7 | 


--------------------------------------------------------------------------------
/open_catalyst/tests/models/test_dimenet.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Copyright (c) Facebook, Inc. and its affiliates.
 3 | 
 4 | This source code is licensed under the MIT license found in the
 5 | LICENSE file in the root directory of this source tree.
 6 | """
 7 | 
 8 | import os
 9 | import random
10 | 
11 | import numpy as np
12 | import pytest
13 | import torch
14 | from ase.io import read
15 | from torch_geometric.data import Data
16 | 
17 | from ocpmodels.common.transforms import RandomRotate
18 | from ocpmodels.datasets import data_list_collater
19 | from ocpmodels.models import DimeNet
20 | from ocpmodels.preprocessing import AtomsToGraphs
21 | 
22 | 
23 | @pytest.fixture(scope="class")
24 | def load_data(request):
25 |     atoms = read(
26 |         os.path.join(os.path.dirname(os.path.abspath(__file__)), "atoms.json"),
27 |         index=0,
28 |         format="json",
29 |     )
30 |     a2g = AtomsToGraphs(
31 |         max_neigh=200,
32 |         radius=6,
33 |         r_energy=True,
34 |         r_forces=True,
35 |         r_distances=True,
36 |     )
37 |     data_list = a2g.convert_all([atoms])
38 |     request.cls.data = data_list[0]
39 | 
40 | 
41 | @pytest.fixture(scope="class")
42 | def load_model(request):
43 |     torch.manual_seed(4)
44 |     model = DimeNet(
45 |         None,
46 |         32,
47 |         1,
48 |         cutoff=6.0,
49 |         regress_forces=True,
50 |         use_pbc=False,
51 |     )
52 |     request.cls.model = model
53 | 
54 | 
55 | @pytest.mark.usefixtures("load_data")
56 | @pytest.mark.usefixtures("load_model")
57 | class TestDimeNet:
58 |     def test_rotation_invariance(self):
59 |         random.seed(1)
60 |         data = self.data
61 | 
62 |         # Sampling a random rotation within [-180, 180] for all axes.
63 |         transform = RandomRotate([-180, 180], [0, 1, 2])
64 |         data_rotated, rot, inv_rot = transform(data.clone())
65 |         assert not np.array_equal(data.pos, data_rotated.pos)
66 | 
67 |         # Pass it through the model.
68 |         batch = data_list_collater([data, data_rotated])
69 |         out = self.model(batch)
70 | 
71 |         # Compare predicted energies and forces (after inv-rotation).
72 |         energies = out[0].detach()
73 |         np.testing.assert_almost_equal(energies[0], energies[1], decimal=5)
74 | 
75 |         forces = out[1].detach()
76 |         np.testing.assert_array_almost_equal(
77 |             forces[: forces.shape[0] // 2],
78 |             torch.matmul(forces[forces.shape[0] // 2 :], inv_rot),
79 |             decimal=5,
80 |         )
81 | 
82 |     def test_energy_force_shape(self):
83 |         data = self.data
84 | 
85 |         # Pass it through the model.
86 |         out = self.model(data_list_collater([data]))
87 | 
88 |         # Compare shape of predicted energies, forces.
89 |         energy = out[0].detach()
90 |         np.testing.assert_equal(energy.shape, (1, 1))
91 | 
92 |         forces = out[1].detach()
93 |         np.testing.assert_equal(forces.shape, (data.pos.shape[0], 3))
94 | 


--------------------------------------------------------------------------------
/open_catalyst/tests/models/test_dimenetpp.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Copyright (c) Facebook, Inc. and its affiliates.
 3 | 
 4 | This source code is licensed under the MIT license found in the
 5 | LICENSE file in the root directory of this source tree.
 6 | """
 7 | 
 8 | import logging
 9 | import os
10 | import random
11 | 
12 | import numpy as np
13 | import pytest
14 | import torch
15 | from ase.io import read
16 | from torch_geometric.data import Data
17 | 
18 | from ocpmodels.common.transforms import RandomRotate
19 | from ocpmodels.datasets import data_list_collater
20 | from ocpmodels.models import DimeNetPlusPlus
21 | from ocpmodels.preprocessing import AtomsToGraphs
22 | 
23 | 
24 | @pytest.fixture(scope="class")
25 | def load_data(request):
26 |     atoms = read(
27 |         os.path.join(os.path.dirname(os.path.abspath(__file__)), "atoms.json"),
28 |         index=0,
29 |         format="json",
30 |     )
31 |     a2g = AtomsToGraphs(
32 |         max_neigh=200,
33 |         radius=6,
34 |         r_energy=True,
35 |         r_forces=True,
36 |         r_distances=True,
37 |     )
38 |     data_list = a2g.convert_all([atoms])
39 |     request.cls.data = data_list[0]
40 | 
41 | 
42 | @pytest.fixture(scope="class")
43 | def load_model(request):
44 |     torch.manual_seed(4)
45 |     model = DimeNetPlusPlus(
46 |         None,
47 |         32,
48 |         1,
49 |         cutoff=6.0,
50 |         regress_forces=True,
51 |         use_pbc=False,
52 |     )
53 |     request.cls.model = model
54 | 
55 | 
56 | @pytest.mark.usefixtures("load_data")
57 | @pytest.mark.usefixtures("load_model")
58 | class TestDimeNet:
59 |     def test_rotation_invariance(self):
60 |         random.seed(1)
61 |         data = self.data
62 | 
63 |         # Sampling a random rotation within [-180, 180] for all axes.
64 |         transform = RandomRotate([-180, 180], [0, 1, 2])
65 |         data_rotated, rot, inv_rot = transform(data.clone())
66 |         assert not np.array_equal(data.pos, data_rotated.pos)
67 | 
68 |         # Pass it through the model.
69 |         batch = data_list_collater([data, data_rotated])
70 |         out = self.model(batch)
71 | 
72 |         # Compare predicted energies and forces (after inv-rotation).
73 |         energies = out[0].detach()
74 |         np.testing.assert_almost_equal(energies[0], energies[1], decimal=5)
75 | 
76 |         forces = out[1].detach()
77 |         logging.info(forces)
78 |         np.testing.assert_array_almost_equal(
79 |             forces[: forces.shape[0] // 2],
80 |             torch.matmul(forces[forces.shape[0] // 2 :], inv_rot),
81 |             decimal=5,
82 |         )
83 | 
84 |     def test_energy_force_shape(self):
85 |         data = self.data
86 | 
87 |         # Pass it through the model.
88 |         out = self.model(data_list_collater([data]))
89 | 
90 |         # Compare shape of predicted energies, forces.
91 |         energy = out[0].detach()
92 |         np.testing.assert_equal(energy.shape, (1, 1))
93 | 
94 |         forces = out[1].detach()
95 |         np.testing.assert_equal(forces.shape, (data.pos.shape[0], 3))
96 | 


--------------------------------------------------------------------------------
/open_catalyst/tests/models/test_forcenet.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Copyright (c) Facebook, Inc. and its affiliates.
 3 | 
 4 | This source code is licensed under the MIT license found in the
 5 | LICENSE file in the root directory of this source tree.
 6 | """
 7 | 
 8 | import os
 9 | import random
10 | 
11 | import numpy as np
12 | import pytest
13 | import torch
14 | from ase.io import read
15 | from torch_geometric.data import Data
16 | 
17 | from ocpmodels.common.transforms import RandomRotate
18 | from ocpmodels.datasets import data_list_collater
19 | from ocpmodels.models import ForceNet
20 | from ocpmodels.preprocessing import AtomsToGraphs
21 | 
22 | 
23 | @pytest.fixture(scope="class")
24 | def load_data(request):
25 |     atoms = read(
26 |         os.path.join(os.path.dirname(os.path.abspath(__file__)), "atoms.json"),
27 |         index=0,
28 |         format="json",
29 |     )
30 |     a2g = AtomsToGraphs(
31 |         max_neigh=200,
32 |         radius=6,
33 |         r_energy=True,
34 |         r_forces=True,
35 |         r_distances=True,
36 |     )
37 |     data_list = a2g.convert_all([atoms])
38 |     request.cls.data = data_list[0]
39 | 
40 | 
41 | @pytest.fixture(scope="class")
42 | def load_model(request):
43 |     model = ForceNet(
44 |         None,
45 |         32,
46 |         1,
47 |         cutoff=6.0,
48 |     )
49 |     request.cls.model = model
50 | 
51 | 
52 | @pytest.mark.usefixtures("load_data")
53 | @pytest.mark.usefixtures("load_model")
54 | class TestForceNet:
55 |     def test_energy_force_shape(self):
56 |         data = self.data
57 | 
58 |         # Pass it through the model.
59 |         out = self.model(data_list_collater([data]))
60 | 
61 |         # Compare shape of predicted energies, forces.
62 |         energy = out[0].detach()
63 |         np.testing.assert_equal(energy.shape, (1, 1))
64 | 
65 |         forces = out[1].detach()
66 |         np.testing.assert_equal(forces.shape, (data.pos.shape[0], 3))
67 | 


--------------------------------------------------------------------------------
/open_catalyst/tests/models/test_schnet.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Copyright (c) Facebook, Inc. and its affiliates.
 3 | 
 4 | This source code is licensed under the MIT license found in the
 5 | LICENSE file in the root directory of this source tree.
 6 | """
 7 | 
 8 | import os
 9 | import random
10 | 
11 | import numpy as np
12 | import pytest
13 | import torch
14 | from ase.io import read
15 | from torch_geometric.data import Batch, Data
16 | 
17 | from ocpmodels.common.transforms import RandomRotate
18 | from ocpmodels.datasets import data_list_collater
19 | from ocpmodels.models import SchNet
20 | from ocpmodels.preprocessing import AtomsToGraphs
21 | 
22 | 
23 | @pytest.fixture(scope="class")
24 | def load_data(request):
25 |     atoms = read(
26 |         os.path.join(os.path.dirname(os.path.abspath(__file__)), "atoms.json"),
27 |         index=0,
28 |         format="json",
29 |     )
30 |     a2g = AtomsToGraphs(
31 |         max_neigh=200,
32 |         radius=6,
33 |         r_energy=True,
34 |         r_forces=True,
35 |         r_distances=True,
36 |     )
37 |     data_list = a2g.convert_all([atoms])
38 |     request.cls.data = data_list[0]
39 | 
40 | 
41 | @pytest.fixture(scope="class")
42 | def load_model(request):
43 |     torch.manual_seed(4)
44 |     model = SchNet(None, 32, 1, cutoff=6.0, regress_forces=True, use_pbc=True)
45 |     request.cls.model = model
46 | 
47 | 
48 | @pytest.mark.usefixtures("load_data")
49 | @pytest.mark.usefixtures("load_model")
50 | class TestSchNet:
51 |     def test_rotation_invariance(self):
52 |         random.seed(1)
53 |         data = self.data
54 | 
55 |         # Sampling a random rotation within [-180, 180] for all axes.
56 |         transform = RandomRotate([-180, 180], [0, 1, 2])
57 |         data_rotated, rot, inv_rot = transform(data.clone())
58 |         assert not np.array_equal(data.pos, data_rotated.pos)
59 | 
60 |         # Pass it through the model.
61 |         batch = data_list_collater([data, data_rotated])
62 |         out = self.model(batch)
63 | 
64 |         # Compare predicted energies and forces (after inv-rotation).
65 |         energies = out[0].detach()
66 |         np.testing.assert_almost_equal(energies[0], energies[1], decimal=5)
67 | 
68 |         forces = out[1].detach()
69 |         np.testing.assert_array_almost_equal(
70 |             forces[: forces.shape[0] // 2],
71 |             torch.matmul(forces[forces.shape[0] // 2 :], inv_rot),
72 |             decimal=5,
73 |         )
74 | 
75 |     def test_energy_force_shape(self):
76 |         data = self.data
77 | 
78 |         # Pass it through the model.
79 |         out = self.model(data_list_collater([data]))
80 | 
81 |         # Compare shape of predicted energies, forces.
82 |         energy = out[0].detach()
83 |         np.testing.assert_equal(energy.shape, (1, 1))
84 | 
85 |         forces = out[1].detach()
86 |         np.testing.assert_equal(forces.shape, (data.pos.shape[0], 3))
87 | 


--------------------------------------------------------------------------------
/open_catalyst/tests/preprocessing/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Copyright (c) Facebook, Inc. and its affiliates.
3 | 
4 | This source code is licensed under the MIT license found in the
5 | LICENSE file in the root directory of this source tree.
6 | """
7 | 


--------------------------------------------------------------------------------
/open_catalyst/tests/preprocessing/test_pbc.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Copyright (c) Facebook, Inc. and its affiliates.
 3 | 
 4 | This source code is licensed under the MIT license found in the
 5 | LICENSE file in the root directory of this source tree.
 6 | """
 7 | 
 8 | import os
 9 | 
10 | import ase
11 | import numpy as np
12 | import pytest
13 | from ase.io import read
14 | from pymatgen.io.ase import AseAtomsAdaptor
15 | 
16 | from ocpmodels.common.utils import get_pbc_distances
17 | from ocpmodels.datasets import data_list_collater
18 | from ocpmodels.preprocessing import AtomsToGraphs
19 | 
20 | 
21 | @pytest.fixture(scope="class")
22 | def load_data(request):
23 |     atoms = read(
24 |         os.path.join(os.path.dirname(os.path.abspath(__file__)), "atoms.json"),
25 |         index=0,
26 |         format="json",
27 |     )
28 |     a2g = AtomsToGraphs(
29 |         max_neigh=12,
30 |         radius=6,
31 |         r_energy=True,
32 |         r_forces=True,
33 |         r_distances=True,
34 |     )
35 |     data_list = a2g.convert_all([atoms])
36 |     request.cls.data = data_list[0]
37 | 
38 | 
39 | @pytest.mark.usefixtures("load_data")
40 | class TestPBC:
41 |     def test_pbc_distances(self):
42 |         data = self.data
43 |         batch = data_list_collater([data] * 5)
44 |         out = get_pbc_distances(
45 |             batch.pos,
46 |             batch.edge_index,
47 |             batch.cell,
48 |             batch.cell_offsets,
49 |             batch.neighbors,
50 |         )
51 |         edge_index, pbc_distances = out["edge_index"], out["distances"]
52 | 
53 |         np.testing.assert_array_equal(
54 |             batch.edge_index,
55 |             edge_index,
56 |         )
57 |         np.testing.assert_array_almost_equal(batch.distances, pbc_distances)
58 | 


--------------------------------------------------------------------------------
/openfold/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 NVIDIA CORPORATION
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | ARG FROM_IMAGE_NAME=nvcr.io/nvidia/pytorch:23.04-py3
16 | 
17 | FROM ${FROM_IMAGE_NAME}
18 | 
19 | ENV DEBIAN_FRONTEND=noninteractive
20 | 
21 | # Install pip requirements:
22 | RUN pip install \
23 |     biopython==1.79 \
24 |     Pympler==1.0.1 \
25 |     dacite==1.8.0 \
26 |     "git+https://github.com/mlcommons/logging.git@2.1.0" \
27 |     "git+https://github.com/NVIDIA/mlperf-common.git"
28 | 
29 | # Build and install Kalign from source:
30 | RUN wget -q -P /workspace/downloads https://github.com/TimoLassmann/kalign/archive/refs/tags/v3.3.5.tar.gz \
31 |     && tar -xzf /workspace/downloads/v3.3.5.tar.gz --directory /workspace \
32 |     && rm -r /workspace/downloads \
33 |     && ls /workspace \
34 |     && cd /workspace/kalign-3.3.5 \
35 |     && mkdir build \
36 |     && cd build \
37 |     && cmake .. \
38 |     && make -j \
39 |     && make install \
40 |     && rm -r /workspace/kalign-3.3.5
41 | 
42 | # Copy OpenFold source code into the docker image:
43 | COPY . /workspace/openfold
44 | WORKDIR /workspace/openfold
45 | 
46 | # Install OpenFold source code package in editable mode:
47 | RUN pip install -e .
48 | 


--------------------------------------------------------------------------------
/openfold/NOTICE:
--------------------------------------------------------------------------------
1 | This repository defines the reference implementation for the MLPerf HPC OpenFold benchmark.
2 | 
3 | This repository includes software from https://github.com/deepmind/alphafold licensed under the Apache-2.0 License.
4 | 
5 | This repository includes software from https://github.com/aqlaboratory/openfold licensed under the Apache-2.0 License.
6 | 


--------------------------------------------------------------------------------
/openfold/openfold/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/hpc/2c627d457004eff77a014205b3151ed48a6fa149/openfold/openfold/__init__.py


--------------------------------------------------------------------------------
/openfold/openfold/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/hpc/2c627d457004eff77a014205b3151ed48a6fa149/openfold/openfold/data/__init__.py


--------------------------------------------------------------------------------
/openfold/openfold/data/alignments.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 NVIDIA CORPORATION
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import json
16 | from pathlib import Path
17 | from typing import Dict
18 | 
19 | 
20 | def load_alignments_super_index(
21 |     alignments_super_index_filepath: Path,
22 |     verbose: bool = False,
23 |     pprefix: str = "",
24 | ) -> Dict[str, dict]:
25 |     if verbose:
26 |         print(f"{pprefix}Loading {repr(alignments_super_index_filepath)}...")
27 |     with open(alignments_super_index_filepath) as f:
28 |         alignments_super_index = json.load(f)
29 |     if verbose:
30 |         print(
31 |             f"{pprefix}alignments_super_index ({len(alignments_super_index)})"
32 |             f" loaded from {repr(alignments_super_index_filepath)} successfully!"
33 |         )
34 |     return alignments_super_index
35 | 
36 | 
37 | def load_alignments(
38 |     alignments_super_index: Dict[str, dict],
39 |     alignments_dirpath: Path,
40 |     key: str,
41 | ) -> dict:
42 |     alignments_index = alignments_super_index[key]
43 |     alignments_db_path = alignments_dirpath / alignments_index["db"]
44 |     alignments = {}
45 |     with open(alignments_db_path, "rb") as f:
46 |         for file_index in alignments_index["files"]:
47 |             filename, start, size = file_index
48 |             f.seek(start)
49 |             content = f.read(size).decode("utf-8")
50 |             alignments[filename] = content
51 |     return alignments
52 | 


--------------------------------------------------------------------------------
/openfold/openfold/data/resources/README.md:
--------------------------------------------------------------------------------
1 | # Resources
2 | 
3 | ## 1. `stereo_chemical_props.txt`
4 | 
5 | source: https://git.scicore.unibas.ch/schwede/openstructure/-/raw/7102c63615b64735c4941278d92b554ec94415f8/modules/mol/alg/src/stereo_chemical_props.txt
6 | 


--------------------------------------------------------------------------------
/openfold/openfold/data/resources/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/hpc/2c627d457004eff77a014205b3151ed48a6fa149/openfold/openfold/data/resources/__init__.py


--------------------------------------------------------------------------------
/openfold/openfold/data/tools/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/hpc/2c627d457004eff77a014205b3151ed48a6fa149/openfold/openfold/data/tools/__init__.py


--------------------------------------------------------------------------------
/openfold/openfold/log_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 NVIDIA CORPORATION
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import json
16 | from pathlib import Path
17 | from typing import List
18 | 
19 | import pandas as pd
20 | 
21 | 
22 | def save_logs(logs: List[dict], outpath: Path, append: bool) -> None:
23 |     outpath.parent.mkdir(parents=True, exist_ok=True)
24 |     lines = []
25 |     for log in logs:
26 |         line = json.dumps(log)
27 |         lines.append(line)
28 |     outstr = "\n".join(lines) + "\n"
29 |     mode = "a" if append else "w"
30 |     with open(outpath, mode) as f:
31 |         f.write(outstr)
32 | 
33 | 
34 | def read_logs(
35 |     filepath: Path,
36 |     drop_overridden_iterations: bool = True,
37 | ) -> pd.DataFrame:
38 |     with open(filepath) as f:
39 |         logs = f.read().strip().split("\n")
40 |     logs = [json.loads(log) for log in logs]
41 |     logs_df = pd.DataFrame(logs)
42 |     if drop_overridden_iterations:
43 |         logs_df = logs_df.drop_duplicates("iteration", keep="last")
44 |         logs_df = logs_df.reset_index(drop=True).copy()
45 |     return logs_df
46 | 


--------------------------------------------------------------------------------
/openfold/openfold/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/hpc/2c627d457004eff77a014205b3151ed48a6fa149/openfold/openfold/model/__init__.py


--------------------------------------------------------------------------------
/openfold/openfold/model/backbone_update.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 DeepMind Technologies Limited
 2 | # Copyright 2022 AlQuraishi Laboratory
 3 | # Copyright 2023 NVIDIA CORPORATION
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | import torch
18 | import torch.nn as nn
19 | 
20 | from openfold.model.linear import Linear
21 | 
22 | 
23 | class BackboneUpdate(nn.Module):
24 |     """Backbone Update module.
25 | 
26 |     Supplementary '1.8.3 Backbone update': Algorithm 23.
27 | 
28 |     Args:
29 |         c_s: Single representation dimension (channels).
30 | 
31 |     """
32 | 
33 |     def __init__(self, c_s: int) -> None:
34 |         super(BackboneUpdate, self).__init__()
35 |         self.linear = Linear(c_s, 6, bias=True, init="final")
36 | 
37 |     def forward(self, s: torch.Tensor) -> torch.Tensor:
38 |         return self.linear(s)
39 | 


--------------------------------------------------------------------------------
/openfold/openfold/model/dropout.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 DeepMind Technologies Limited
 2 | # Copyright 2022 AlQuraishi Laboratory
 3 | # Copyright 2023 NVIDIA CORPORATION
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | from typing import Tuple, Union
18 | 
19 | import torch
20 | import torch.nn as nn
21 | import torch.nn.functional as F
22 | 
23 | 
24 | class Dropout(nn.Module):
25 |     """Dropout module.
26 | 
27 |     Implementation of dropout with the ability to share the dropout mask
28 |     along a particular dimension.
29 | 
30 |     If not in training mode, this module computes the identity function.
31 | 
32 |     Supplementary '1.11.6 Dropout details'.
33 | 
34 |     Args:
35 |         p: Dropout rate (probability of an element to be zeroed).
36 |         share_dim: Dimension(s) along which the dropout mask is shared.
37 |         inplace: If set to `True`, will do this operation in-place.
38 | 
39 |     """
40 | 
41 |     def __init__(
42 |         self,
43 |         p: float,
44 |         share_dim: Union[int, Tuple[int, ...]] = (),
45 |         inplace: bool = False,
46 |     ) -> None:
47 |         super(Dropout, self).__init__()
48 |         assert 0.0 <= p <= 1.0
49 |         self.p = p
50 |         if type(share_dim) == int:
51 |             share_dim = (share_dim,)
52 |         else:
53 |             assert isinstance(share_dim, tuple)
54 |         self.share_dim = share_dim
55 |         self.inplace = inplace
56 | 
57 |     def forward(self, x: torch.Tensor) -> torch.Tensor:
58 |         shape = list(x.shape)
59 |         for d in self.share_dim:
60 |             shape[d] = 1
61 |         mask = x.new_ones(shape)
62 |         mask = F.dropout(
63 |             input=mask,
64 |             p=self.p,
65 |             training=self.training,
66 |             inplace=self.inplace,
67 |         )
68 |         x *= mask
69 |         return x
70 | 
71 | 
72 | class DropoutRowwise(Dropout):
73 |     """Dropout Rowwise module."""
74 | 
75 |     def __init__(self, p: float) -> None:
76 |         super(DropoutRowwise, self).__init__(p=p, share_dim=-3)
77 | 
78 | 
79 | class DropoutColumnwise(Dropout):
80 |     """Dropout Columnwise module."""
81 | 
82 |     def __init__(self, p: float) -> None:
83 |         super(DropoutColumnwise, self).__init__(p=p, share_dim=-2)
84 | 


--------------------------------------------------------------------------------
/openfold/openfold/model/extra_msa_embedder.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 DeepMind Technologies Limited
 2 | # Copyright 2022 AlQuraishi Laboratory
 3 | # Copyright 2023 NVIDIA CORPORATION
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | import torch
18 | import torch.nn as nn
19 | 
20 | from openfold.model.linear import Linear
21 | 
22 | 
23 | class ExtraMSAEmbedder(nn.Module):
24 |     """Extra MSA Embedder module.
25 | 
26 |     Embeds the "extra_msa_feat" feature.
27 | 
28 |     Supplementary '1.4 AlphaFold Inference': Algorithm 2, line 15.
29 | 
30 |     Args:
31 |         emsa_dim: Input `extra_msa_feat` dimension (channels).
32 |         c_e: Output extra MSA representation dimension (channels).
33 | 
34 |     """
35 | 
36 |     def __init__(
37 |         self,
38 |         emsa_dim: int,
39 |         c_e: int,
40 |     ) -> None:
41 |         super(ExtraMSAEmbedder, self).__init__()
42 |         self.linear = Linear(emsa_dim, c_e, bias=True, init="default")
43 | 
44 |     def forward(
45 |         self,
46 |         extra_msa_feat: torch.Tensor,
47 |     ) -> torch.Tensor:
48 |         """Extra MSA Embedder forward pass.
49 | 
50 |         Args:
51 |             extra_msa_feat: [batch, N_extra_seq, N_res, emsa_dim]
52 | 
53 |         Returns:
54 |             extra_msa_embedding: [batch, N_extra_seq, N_res, c_e]
55 | 
56 |         """
57 |         return self.linear(extra_msa_feat)
58 | 


--------------------------------------------------------------------------------
/openfold/openfold/model/layer_norm.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 DeepMind Technologies Limited
 2 | # Copyright 2022 AlQuraishi Laboratory
 3 | # Copyright 2023 NVIDIA CORPORATION
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | import torch
18 | import torch.nn as nn
19 | import torch.nn.functional as F
20 | 
21 | 
22 | class LayerNorm(nn.Module):
23 |     """Layer Normalization module.
24 | 
25 |     Supplementary '1.11.4 Parameters initialization': Layer normalization.
26 | 
27 |     Args:
28 |         in_channels: Last dimension of the input tensor.
29 |         eps: A value added to the denominator for numerical stability.
30 | 
31 |     """
32 | 
33 |     def __init__(
34 |         self,
35 |         in_channels: int,
36 |         eps: float = 1e-5,
37 |     ) -> None:
38 |         super(LayerNorm, self).__init__()
39 |         self.normalized_shape = (in_channels,)
40 |         self.eps = eps
41 |         self.weight = nn.Parameter(torch.ones(in_channels))
42 |         self.bias = nn.Parameter(torch.zeros(in_channels))
43 | 
44 |     def forward(self, x: torch.Tensor) -> torch.Tensor:
45 |         return F.layer_norm(
46 |             input=x,
47 |             normalized_shape=self.normalized_shape,
48 |             weight=self.weight,
49 |             bias=self.bias,
50 |             eps=self.eps,
51 |         )
52 | 


--------------------------------------------------------------------------------
/openfold/openfold/model/msa_column_attention.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 DeepMind Technologies Limited
 2 | # Copyright 2022 AlQuraishi Laboratory
 3 | # Copyright 2023 NVIDIA CORPORATION
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | from typing import Optional
18 | 
19 | import torch
20 | import torch.nn as nn
21 | 
22 | from openfold.model.attention import SelfAttentionWithGate
23 | from openfold.model.layer_norm import LayerNorm
24 | 
25 | 
26 | class MSAColumnAttention(nn.Module):
27 |     """MSA Column Attention module.
28 | 
29 |     Supplementary '1.6.2 MSA column-wise gated self-attention': Algorithm 8.
30 | 
31 |     Args:
32 |         c_m: MSA representation dimension (channels).
33 |         c_hidden: Per-head hidden dimension (channels).
34 |         num_heads: Number of attention heads.
35 |         inf: Safe infinity value.
36 |         chunk_size: Optional chunk size for a batch-like dimension.
37 | 
38 |     """
39 | 
40 |     def __init__(
41 |         self,
42 |         c_m: int,
43 |         c_hidden: int,
44 |         num_heads: int,
45 |         inf: float,
46 |         chunk_size: Optional[int],
47 |     ) -> None:
48 |         super(MSAColumnAttention, self).__init__()
49 |         self.layer_norm_m = LayerNorm(c_m)
50 |         self.mha = SelfAttentionWithGate(
51 |             c_qkv=c_m,
52 |             c_hidden=c_hidden,
53 |             num_heads=num_heads,
54 |             inf=inf,
55 |             chunk_size=chunk_size,
56 |         )
57 | 
58 |     def forward(
59 |         self,
60 |         m: torch.Tensor,
61 |         mask: torch.Tensor,
62 |     ) -> torch.Tensor:
63 |         """MSA Column Attention forward pass.
64 | 
65 |         Args:
66 |             m: [batch, N_seq, N_res, c_m] MSA representation
67 |             mask: [batch, N_seq, N_res] MSA mask
68 | 
69 |         Returns:
70 |             m_update: [batch, N_seq, N_res, c_m] MSA representation update
71 | 
72 |         """
73 |         m = m.transpose(-2, -3)
74 |         # m: [batch, N_res, N_seq, c_m]
75 | 
76 |         mask = mask.transpose(-1, -2)
77 |         # mask: [batch, N_res, N_seq]
78 | 
79 |         mask = mask.unsqueeze(-2).unsqueeze(-3)
80 |         # mask: [batch, N_res, 1, 1, N_seq]
81 | 
82 |         m = self.layer_norm_m(m)
83 |         m = self.mha(
84 |             input_qkv=m,
85 |             mask=mask,
86 |             bias=None,
87 |         )
88 |         # m: [batch, N_res, N_seq, c_m]
89 | 
90 |         m = m.transpose(-2, -3)
91 |         # m: [batch, N_seq, N_res, c_m]
92 | 
93 |         return m
94 | 


--------------------------------------------------------------------------------
/openfold/openfold/model/msa_transition.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 DeepMind Technologies Limited
 2 | # Copyright 2022 AlQuraishi Laboratory
 3 | # Copyright 2023 NVIDIA CORPORATION
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | import torch
18 | import torch.nn as nn
19 | 
20 | from openfold.model.layer_norm import LayerNorm
21 | from openfold.model.linear import Linear
22 | 
23 | 
24 | class MSATransition(nn.Module):
25 |     """MSA Transition module.
26 | 
27 |     Supplementary '1.6.3 MSA transition': Algorithm 9.
28 | 
29 |     Args:
30 |         c_m: MSA (or Extra MSA) representation dimension (channels).
31 |         n: `c_m` multiplier to obtain hidden dimension (channels).
32 | 
33 |     """
34 | 
35 |     def __init__(
36 |         self,
37 |         c_m: int,
38 |         n: int,
39 |     ) -> None:
40 |         super(MSATransition, self).__init__()
41 |         self.layer_norm = LayerNorm(c_m)
42 |         self.linear_1 = Linear(c_m, n * c_m, bias=True, init="relu")
43 |         self.linear_2 = Linear(n * c_m, c_m, bias=True, init="final")
44 | 
45 |     def forward(
46 |         self,
47 |         m: torch.Tensor,
48 |         mask: torch.Tensor,
49 |     ) -> torch.Tensor:
50 |         """MSA Transition forward pass.
51 | 
52 |         Args:
53 |             m: [batch, N_seq, N_res, c_m] MSA representation
54 |             mask: [batch, N_seq, N_res] MSA mask
55 | 
56 |         Returns:
57 |             m_update: [batch, N_seq, N_res, c_m] MSA representation update
58 | 
59 |         """
60 |         # DeepMind forgets to apply the MSA mask here.
61 |         m = self.layer_norm(m)
62 |         m = self.linear_1(m)
63 |         m = torch.relu(m)
64 |         m = self.linear_2(m)
65 |         return m
66 | 


--------------------------------------------------------------------------------
/openfold/openfold/model/pair_transition.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 DeepMind Technologies Limited
 2 | # Copyright 2022 AlQuraishi Laboratory
 3 | # Copyright 2023 NVIDIA CORPORATION
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | import torch
18 | import torch.nn as nn
19 | 
20 | from openfold.model.layer_norm import LayerNorm
21 | from openfold.model.linear import Linear
22 | 
23 | 
24 | class PairTransition(nn.Module):
25 |     """Pair Transition module.
26 | 
27 |     Supplementary '1.6.7 Transition in the pair stack': Algorithm 15.
28 | 
29 |     Args:
30 |         c_z: Pair or template representation dimension (channels).
31 |         n: `c_z` multiplier to obtain hidden dimension (channels).
32 | 
33 |     """
34 | 
35 |     def __init__(
36 |         self,
37 |         c_z: int,
38 |         n: int,
39 |     ) -> None:
40 |         super(PairTransition, self).__init__()
41 |         self.layer_norm = LayerNorm(c_z)
42 |         self.linear_1 = Linear(c_z, n * c_z, bias=True, init="relu")
43 |         self.linear_2 = Linear(n * c_z, c_z, bias=True, init="final")
44 | 
45 |     def forward(
46 |         self,
47 |         z: torch.Tensor,
48 |         mask: torch.Tensor,
49 |     ) -> torch.Tensor:
50 |         """Pair Transition forward pass.
51 | 
52 |         Args:
53 |             z: [batch, N_res, N_res, c_z] pair representation
54 |             mask: [batch, N_res, N_res] pair mask
55 | 
56 |         Returns:
57 |             z_update: [batch, N_res, N_res, c_z] pair representation update
58 | 
59 |         """
60 |         # DeepMind forgets to apply the MSA mask here.
61 |         z = self.layer_norm(z)
62 |         z = self.linear_1(z)
63 |         z = torch.relu(z)
64 |         z = self.linear_2(z)
65 |         return z
66 | 


--------------------------------------------------------------------------------
/openfold/openfold/model/single_transition.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 DeepMind Technologies Limited
 2 | # Copyright 2022 AlQuraishi Laboratory
 3 | # Copyright 2023 NVIDIA CORPORATION
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | import torch
18 | import torch.nn as nn
19 | 
20 | from openfold.model.layer_norm import LayerNorm
21 | from openfold.model.linear import Linear
22 | 
23 | 
24 | class SingleTransition(nn.Module):
25 |     """Single Transition module.
26 | 
27 |     Supplementary '1.8 Structure module': Algorithm 20, lines 8-9.
28 | 
29 |     Args:
30 |         c_s: Single representation dimension (channels).
31 |         dropout_rate: Dropout rate.
32 | 
33 |     """
34 | 
35 |     def __init__(
36 |         self,
37 |         c_s: int,
38 |         dropout_rate: float,
39 |     ) -> None:
40 |         super(SingleTransition, self).__init__()
41 |         self.linear_1 = Linear(c_s, c_s, bias=True, init="relu")
42 |         self.linear_2 = Linear(c_s, c_s, bias=True, init="relu")
43 |         self.linear_3 = Linear(c_s, c_s, bias=True, init="final")
44 |         self.dropout = nn.Dropout(dropout_rate)
45 |         self.layer_norm = LayerNorm(c_s)
46 | 
47 |     def forward(self, s: torch.Tensor) -> torch.Tensor:
48 |         s = s + self.linear_3(torch.relu(self.linear_2(torch.relu(self.linear_1(s)))))
49 |         s = self.layer_norm(self.dropout(s))
50 |         return s
51 | 


--------------------------------------------------------------------------------
/openfold/openfold/model/template_angle_embedder.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 DeepMind Technologies Limited
 2 | # Copyright 2022 AlQuraishi Laboratory
 3 | # Copyright 2023 NVIDIA CORPORATION
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | import torch
18 | import torch.nn as nn
19 | 
20 | from openfold.model.linear import Linear
21 | 
22 | 
23 | class TemplateAngleEmbedder(nn.Module):
24 |     """Template Angle Embedder module.
25 | 
26 |     Embeds the "template_angle_feat" feature.
27 | 
28 |     Supplementary '1.4 AlphaFold Inference': Algorithm 2, line 7.
29 | 
30 |     Args:
31 |         ta_dim: Input `template_angle_feat` dimension (channels).
32 |         c_m: Output MSA representation dimension (channels).
33 | 
34 |     """
35 | 
36 |     def __init__(
37 |         self,
38 |         ta_dim: int,
39 |         c_m: int,
40 |     ) -> None:
41 |         super(TemplateAngleEmbedder, self).__init__()
42 |         self.linear_1 = Linear(ta_dim, c_m, bias=True, init="relu")
43 |         self.linear_2 = Linear(c_m, c_m, bias=True, init="relu")
44 | 
45 |     def forward(
46 |         self,
47 |         template_angle_feat: torch.Tensor,
48 |     ) -> torch.Tensor:
49 |         """Template Angle Embedder forward pass.
50 | 
51 |         Args:
52 |             template_angle_feat: [batch, N_templ, N_res, ta_dim]
53 | 
54 |         Returns:
55 |             template_angle_embedding: [batch, N_templ, N_res, c_m]
56 | 
57 |         """
58 |         return self.linear_2(torch.relu(self.linear_1(template_angle_feat)))
59 | 


--------------------------------------------------------------------------------
/openfold/openfold/model/template_pair_embedder.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 DeepMind Technologies Limited
 2 | # Copyright 2022 AlQuraishi Laboratory
 3 | # Copyright 2023 NVIDIA CORPORATION
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | import torch
18 | import torch.nn as nn
19 | 
20 | from openfold.model.linear import Linear
21 | 
22 | 
23 | class TemplatePairEmbedder(nn.Module):
24 |     """Template Pair Embedder module.
25 | 
26 |     Embeds the "template_pair_feat" feature.
27 | 
28 |     Supplementary '1.4 AlphaFold Inference': Algorithm 2, line 9.
29 | 
30 |     Args:
31 |         tp_dim: Input `template_pair_feat` dimension (channels).
32 |         c_t: Output template representation dimension (channels).
33 | 
34 |     """
35 | 
36 |     def __init__(
37 |         self,
38 |         tp_dim: int,
39 |         c_t: int,
40 |     ) -> None:
41 |         super(TemplatePairEmbedder, self).__init__()
42 |         self.tp_dim = tp_dim
43 |         self.c_t = c_t
44 |         self.linear = Linear(tp_dim, c_t, bias=True, init="relu")
45 | 
46 |     def forward(
47 |         self,
48 |         template_pair_feat: torch.Tensor,
49 |     ) -> torch.Tensor:
50 |         """Template Pair Embedder forward pass.
51 | 
52 |         Args:
53 |             template_pair_feat: [batch, N_res, N_res, tp_dim]
54 | 
55 |         Returns:
56 |             template_pair_embedding: [batch, N_res, N_res, c_t]
57 | 
58 |         """
59 |         return self.linear(template_pair_feat)
60 | 


--------------------------------------------------------------------------------
/openfold/openfold/numpy_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 NVIDIA CORPORATION
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from typing import Callable
16 | 
17 | import numpy as np
18 | 
19 | from openfold.helpers import map_tree_leaves
20 | 
21 | NUMPY_SEED_MODULUS = 0xFFFF_FFFF + 1
22 | 
23 | 
24 | def map_array_tree(fn: Callable, tree: dict) -> dict:
25 |     """Maps array tree using given function."""
26 |     return map_tree_leaves(fn=fn, tree=tree, leaf_type=np.ndarray)
27 | 


--------------------------------------------------------------------------------
/openfold/openfold/swa.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 NVIDIA CORPORATION
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import torch
16 | import torch.nn as nn
17 | 
18 | from openfold.model.alphafold import AlphaFold
19 | 
20 | 
21 | class AlphaFoldSWA(nn.Module):
22 |     """AlphaFold SWA (Stochastic Weight Averaging) module wrapper."""
23 | 
24 |     def __init__(self, alphafold: AlphaFold, enabled: bool, decay_rate: float) -> None:
25 |         super(AlphaFoldSWA, self).__init__()
26 |         if enabled:
27 |             self.averaged_model = torch.optim.swa_utils.AveragedModel(
28 |                 model=alphafold,
29 |                 avg_fn=swa_avg_fn(decay_rate=decay_rate),
30 |             )
31 |             self.enabled = True
32 |         else:
33 |             self.averaged_model = None
34 |             self.enabled = False
35 | 
36 |     def update(self, alphafold: AlphaFold) -> None:
37 |         if self.enabled:
38 |             self.averaged_model.update_parameters(model=alphafold)
39 | 
40 |     def forward(self, batch):
41 |         if not self.enabled:
42 |             raise RuntimeError("AlphaFoldSWA is not enabled")
43 |         return self.averaged_model(batch)
44 | 
45 | 
46 | class swa_avg_fn:
47 |     """Averaging function for EMA with configurable decay rate
48 |     (Supplementary '1.11.7 Evaluator setup')."""
49 | 
50 |     def __init__(self, decay_rate: float) -> None:
51 |         self._decay_rate = decay_rate
52 | 
53 |     def __call__(
54 |         self,
55 |         averaged_model_parameter: torch.Tensor,
56 |         model_parameter: torch.Tensor,
57 |         num_averaged: torch.Tensor,
58 |     ) -> torch.Tensor:
59 |         # for decay_rate = 0.999:
60 |         # return averaged_model_parameter * 0.999 + model_parameter * 0.001
61 |         # avg * 0.999 + m * 0.001
62 |         # 999*avg/1000 + m/1000
63 |         # (999*avg + avg - avg)/1000 + m/1000
64 |         # (1000*avg - avg)/1000 + m/1000
65 |         # 1000*avg/1000 - avg/1000 + m/1000
66 |         # avg + (m - avg)/1000
67 |         # avg + (m - avg)*0.001
68 |         return averaged_model_parameter + (
69 |             model_parameter - averaged_model_parameter
70 |         ) * (1.0 - self._decay_rate)
71 | 


--------------------------------------------------------------------------------
/openfold/openfold/torch_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 NVIDIA CORPORATION
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | from typing import Callable, List
17 | 
18 | import torch
19 | 
20 | from openfold.helpers import map_tree_leaves
21 | 
22 | TORCH_SEED_MODULUS = 0xFFFF_FFFF_FFFF_FFFF + 1
23 | 
24 | 
25 | def enable_tf32() -> None:
26 |     os.environ["TORCH_ALLOW_TF32_CUBLAS_OVERRIDE"] = "1"
27 |     torch.backends.cuda.matmul.allow_tf32 = True
28 |     torch.backends.cudnn.allow_tf32 = True
29 | 
30 | 
31 | def disable_tf32() -> None:
32 |     os.environ["TORCH_ALLOW_TF32_CUBLAS_OVERRIDE"] = "0"
33 |     torch.backends.cuda.matmul.allow_tf32 = False
34 |     torch.backends.cudnn.allow_tf32 = False
35 | 
36 | 
37 | def is_autocast_fp16_enabled() -> bool:
38 |     return (
39 |         torch.is_autocast_enabled() and torch.get_autocast_gpu_dtype() == torch.float16
40 |     )
41 | 
42 | 
43 | def map_tensor_tree(fn: Callable, tree: dict) -> dict:
44 |     """Maps tensor tree using given function."""
45 |     return map_tree_leaves(fn=fn, tree=tree, leaf_type=torch.Tensor)
46 | 
47 | 
48 | def collate(samples: List[dict]) -> dict:
49 |     """Converts list of samples into a batch dict."""
50 |     assert isinstance(samples, list)
51 |     assert len(samples) > 0
52 |     sample0 = samples[0]
53 |     assert isinstance(sample0, dict)
54 |     batch = {}
55 |     for key in list(sample0.keys()):
56 |         batch[key] = [sample[key] for sample in samples]
57 |         if isinstance(sample0[key], torch.Tensor):
58 |             batch[key] = torch.stack(batch[key], dim=0)
59 |     return batch
60 | 


--------------------------------------------------------------------------------
/openfold/scripts/activate_local_openfold_venv.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # Copyright 2023 NVIDIA CORPORATION
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | #
17 | # Usage: source scripts/activate_local_openfold_venv.sh /path/to/openfold-venv
18 | #
19 | # Exit: conda deactivate
20 | 
21 | # Setup text effects:
22 | GREEN=$(tput setaf 2)
23 | BOLD=$(tput bold)
24 | NORMAL=$(tput sgr0)
25 | 
26 | # Read input argument:
27 | PREFIX_PATH=$1
28 | 
29 | # Activate conda environment:
30 | source $PREFIX_PATH/conda/etc/profile.d/conda.sh && \
31 | conda activate openfold-venv && \
32 | echo -e "${GREEN}${BOLD}openfold-venv activated!${NORMAL}"
33 | 


--------------------------------------------------------------------------------
/openfold/scripts/build_local_openfold_venv.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # Copyright 2023 NVIDIA CORPORATION
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | #
17 | # Usage: bash scripts/build_local_openfold_venv.sh /path/to/openfold-venv
18 | 
19 | set -e  # immediately exit on first error
20 | 
21 | # Setup text effects:
22 | RED=$(tput setaf 1)
23 | GREEN=$(tput setaf 2)
24 | BOLD=$(tput bold)
25 | NORMAL=$(tput sgr0)
26 | 
27 | # Read input argument:
28 | PREFIX_PATH=$1
29 | if [ -z $PREFIX_PATH ]; then
30 |     echo "${BOLD}${RED}Input error:${NORMAL} missing path!"
31 |     echo "Please, specify venv location!"
32 |     exit 1
33 | fi
34 | 
35 | # Check if prefix path already exists:
36 | if [ -f $PREFIX_PATH ] || [ -d $PREFIX_PATH ] ; then
37 |     echo "${BOLD}${RED}Build error:${NORMAL} ${BOLD}$PREFIX_PATH${NORMAL} already exists!"
38 |     echo "Remove ${BOLD}$PREFIX_PATH${NORMAL} manually or set different location."
39 |     exit 1
40 | fi
41 | 
42 | echo "Building ${GREEN}${BOLD}$PREFIX_PATH${NORMAL}..."
43 | 
44 | # Install conda to specified prefix path:
45 | wget -4 https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh \
46 |     && bash Miniconda3-latest-Linux-x86_64.sh -b -p $PREFIX_PATH/conda \
47 |     && rm Miniconda3-latest-Linux-x86_64.sh
48 | 
49 | # Create conda environment:
50 | $PREFIX_PATH/conda/bin/conda create --name=openfold-venv -y python==3.8.*
51 | 
52 | # Activate conda environment:
53 | source scripts/activate_local_openfold_venv.sh $PREFIX_PATH
54 | 
55 | # Install requirements:
56 | echo "Installing requirements..."
57 | conda install -y \
58 |     pytorch::pytorch==2.0.* \
59 |     conda-forge::numpy==1.22.2 \
60 |     conda-forge::pandas==1.5.2 \
61 |     conda-forge::scipy==1.10.1 \
62 |     conda-forge::tqdm==4.65.0 \
63 |     conda-forge::psutil==5.9.4 \
64 |     conda-forge::biopython==1.79 \
65 |     conda-forge::Pympler==1.0.1 \
66 |     bioconda::kalign3==3.3.*
67 | 
68 | pip install dacite==1.8.0 \
69 |     "git+https://github.com/mlcommons/logging.git@2.1.0" \
70 |     "git+https://github.com/NVIDIA/mlperf-common.git"
71 | 
72 | # Install OpenFold source code package in editable mode:
73 | pip install -e .
74 | 
75 | echo "${GREEN}${BOLD}$0 finished successfully!${NORMAL}"
76 | 


--------------------------------------------------------------------------------
/openfold/scripts/deactivate_local_openfold_venv.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # Copyright 2023 NVIDIA CORPORATION
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | #
17 | # Usage: source scripts/deactivate_local_openfold_venv.sh
18 | 
19 | # Setup text effects:
20 | CYAN=$(tput setaf 6)
21 | BOLD=$(tput bold)
22 | NORMAL=$(tput sgr0)
23 | 
24 | # Deactivate conda environment:
25 | conda deactivate && \
26 | echo -e "${CYAN}${BOLD}openfold-venv deactivated!${NORMAL}"
27 | 


--------------------------------------------------------------------------------
/openfold/scripts/download_open_protein_set.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # Copyright 2021 DeepMind Technologies Limited
 4 | # Copyright 2022 OpenFold Consortium
 5 | # Copyright 2023 NVIDIA CORPORATION
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | #
19 | # Usage: bash download_open_protein_set.sh /path/to/data/open_protein_set/original
20 | 
21 | set -e
22 | 
23 | if [[ $# -eq 0 ]]; then
24 |     echo "Error: download directory must be provided as an input argument."
25 |     exit 1
26 | fi
27 | 
28 | if ! command -v aws &> /dev/null ; then
29 |     echo "Error: AWS CLI could not be found. Check https://aws.amazon.com/cli/ and install AWS CLI."
30 |     exit 1
31 | fi
32 | 
33 | DOWNLOAD_DIR="${1}/"
34 | mkdir -p "${DOWNLOAD_DIR}"
35 | 
36 | # download root files:
37 | aws s3 cp --no-sign-request s3://openfold/LICENSE "${DOWNLOAD_DIR}"
38 | aws s3 cp --no-sign-request s3://openfold/duplicate_pdb_chains.txt "${DOWNLOAD_DIR}"
39 | 
40 | # download pdb directory:
41 | mkdir -p "${DOWNLOAD_DIR}/pdb"
42 | aws s3 cp --no-sign-request s3://openfold/pdb "${DOWNLOAD_DIR}/pdb" --recursive
43 | 


--------------------------------------------------------------------------------
/openfold/scripts/download_pdb_mmcif.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # Copyright 2021 DeepMind Technologies Limited
 4 | # Copyright 2023 NVIDIA CORPORATION
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #     http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | #
18 | # Usage: bash download_pdb_mmcif.sh /path/to/data/pdb_mmcif/original
19 | 
20 | set -e
21 | 
22 | if [[ $# -eq 0 ]]; then
23 |     echo "Error: download directory must be provided as an input argument."
24 |     exit 1
25 | fi
26 | 
27 | if ! command -v aria2c &> /dev/null ; then
28 |     echo "Error: aria2c could not be found. Please install aria2c (sudo apt install aria2)."
29 |     exit 1
30 | fi
31 | 
32 | if ! command -v rsync &> /dev/null ; then
33 |     echo "Error: rsync could not be found. Please install rsync."
34 |     exit 1
35 | fi
36 | 
37 | DOWNLOAD_DIR="$1"
38 | DOWNLOAD_RAW_DIR="${DOWNLOAD_DIR}/raw"
39 | 
40 | echo "Running rsync to fetch all mmCIF files (note that the rsync progress estimate might be inaccurate)..."
41 | echo "If the download speed is too slow, try changing the mirror to:"
42 | echo "  * rsync.ebi.ac.uk::pub/databases/pdb/data/structures/divided/mmCIF/ (Europe)"
43 | echo "  * ftp.pdbj.org::ftp_data/structures/divided/mmCIF/ (Asia)"
44 | echo "or see https://www.wwpdb.org/ftp/pdb-ftp-sites for more download options."
45 | mkdir -p "${DOWNLOAD_RAW_DIR}"
46 | rsync --recursive --links --perms --times --compress --info=progress2 --delete --port=33444 \
47 |   rsync.rcsb.org::ftp_data/structures/divided/mmCIF/ \
48 |   "${DOWNLOAD_RAW_DIR}"
49 | 
50 | aria2c "ftp://ftp.wwpdb.org/pub/pdb/data/status/obsolete.dat" --dir="${DOWNLOAD_DIR}"
51 | 
52 | aria2c "https://cdn.rcsb.org/resources/sequence/clusters/clusters-by-entity-40.txt" --dir="${DOWNLOAD_DIR}"
53 | 


--------------------------------------------------------------------------------
/openfold/setup.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 NVIDIA CORPORATION
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from setuptools import find_packages, setup
16 | 
17 | setup(
18 |     name="openfold",
19 |     version="1.0.0",
20 |     packages=find_packages(),
21 |     include_package_data=True,
22 |     package_data={
23 |         "openfold": [
24 |             "data/resources/stereo_chemical_props.txt",
25 |         ],
26 |     },
27 | )
28 | 


--------------------------------------------------------------------------------