├── .devcontainer
├── devcontainer.json
├── initializeCommand.sh
└── postCreateCommand.sh
├── .dockerignore
├── .github
├── ISSUE_TEMPLATE
│ ├── bug-report.yml
│ └── feature-request.yml
├── codecov.yml
├── copy-pr-bot.yaml
├── dependabot.yml
├── pull_request_template.md
└── workflows
│ ├── approvals.yml
│ ├── bionemo-subpackage-ci.yml
│ ├── blossom-ci.yml
│ ├── gh-docs-deploy.yml
│ ├── internal_tools.yml
│ ├── trufflehog.yml
│ └── unit-tests.yml
├── .gitignore
├── .gitmodules
├── .nspect-allowlist.toml
├── .pre-commit-config.yaml
├── .secrets-nb.baseline
├── .secrets.baseline
├── .vscode
└── settings.json
├── CODE-REVIEW.md
├── CODEOWNERS
├── CONTRIBUTING.md
├── Dockerfile
├── LICENSE
├── license.txt
└── third_party.txt
├── README.md
├── SECURITY.md
├── VERSION
├── ci
├── benchmarks
│ ├── partial-conv
│ │ ├── amplify_pretrain.yaml
│ │ ├── esm2_pretrain.yaml
│ │ ├── evo2_pretrain.yaml
│ │ └── geneformer_pretrain.yaml
│ └── perf
│ │ ├── amplify_pretrain.yaml
│ │ ├── esm2_pretrain.yaml
│ │ ├── evo2_pretrain.yaml
│ │ └── geneformer_pretrain.yaml
├── docker
│ ├── Dockerfile.uv
│ └── entrypoint.sh
└── scripts
│ ├── build_docker_image.sh
│ ├── run_pytest.sh
│ ├── static_checks.sh
│ └── utils.sh
├── docker_build_patches
└── decord_ffmpeg6_fix.patch
├── docs
├── Dockerfile
├── README.md
├── conftest.py
├── docs
│ ├── SUMMARY.md
│ ├── assets
│ │ ├── css
│ │ │ ├── chatbot.css
│ │ │ ├── color-schemes.css
│ │ │ ├── custom-material.css
│ │ │ ├── fonts.css
│ │ │ └── jupyter-themes.css
│ │ ├── images
│ │ │ ├── amplify
│ │ │ │ ├── training_loss.png
│ │ │ │ ├── validation_loss.png
│ │ │ │ └── validation_ppl.png
│ │ │ ├── esm2
│ │ │ │ ├── esm2_device_scaling.png
│ │ │ │ ├── esm2_device_scaling.svg
│ │ │ │ ├── esm2_model_scaling.png
│ │ │ │ ├── esm2_model_scaling.svg
│ │ │ │ ├── esm2_peft_memory_usage.png
│ │ │ │ ├── esm2_peft_time.png
│ │ │ │ ├── esm2_pretrain_convergence.png
│ │ │ │ ├── esm2_pretrain_convergence.svg
│ │ │ │ ├── esm2_single_node_training_perf.png
│ │ │ │ └── esm2_single_node_training_perf.svg
│ │ │ ├── evo2
│ │ │ │ ├── evo2_bionemo_1b_6950steps.png
│ │ │ │ ├── evo2_bionemo_7bnv_28ksteps.png
│ │ │ │ ├── evo2_performance_by_cluster_size.png
│ │ │ │ ├── evo2_savanna_1b_6950steps.png
│ │ │ │ ├── evo2_savanna_7b_28ksteps.png
│ │ │ │ ├── evo2_vs_7b_40b_performance_vs_context_length.png
│ │ │ │ ├── evo2_vs_llama2_performance_vs_context_length.png
│ │ │ │ └── evo2_zeroshot_brca1_stripplot.png
│ │ │ ├── favicon.png
│ │ │ ├── geneformer
│ │ │ │ ├── F1-score-models.png
│ │ │ │ ├── Geneformer_steven_106m_train.png
│ │ │ │ ├── Geneformer_steven_106m_val.png
│ │ │ │ ├── accuracy-models-04-18-2025.png
│ │ │ │ ├── average-accuracy-models.png
│ │ │ │ ├── f1-score-models-04-18-2025.png
│ │ │ │ ├── geneformer_106m_train_loss.png
│ │ │ │ ├── geneformer_106m_val_loss.png
│ │ │ │ ├── geneformer_10m_training_loss.png
│ │ │ │ ├── geneformer_10m_val_loss.png
│ │ │ │ ├── loss_curve_new_v_old_geneformer_64_node_10M.png
│ │ │ │ └── model_tflops_per_gpu_chart_geneformer.png
│ │ │ ├── logo-icon-black.svg
│ │ │ ├── logo-white.svg
│ │ │ ├── megatron_background
│ │ │ │ ├── README.md
│ │ │ │ ├── data_parallelism.png
│ │ │ │ ├── execution_schedulers.png
│ │ │ │ ├── fsdp_slide1.png
│ │ │ │ ├── fsdp_slide2.png
│ │ │ │ ├── pipeline_parallelism.png
│ │ │ │ ├── sp_korthikanti_2022_fig5.png
│ │ │ │ ├── tensor_and_pipeline_parallelism.png
│ │ │ │ └── tensor_parallelism.png
│ │ │ ├── sub_package_graphs
│ │ │ │ ├── dependency_file_imports.png
│ │ │ │ ├── dependency_graph_pyproject.png
│ │ │ │ └── dependency_graph_tach.png
│ │ │ └── wandb_tips_tricks
│ │ │ │ └── trainer_global_step.png
│ │ ├── javascript
│ │ │ └── chatbot.js
│ │ └── old_images
│ │ │ ├── .gitkeep
│ │ │ ├── MMB_molecule_generation_1.png
│ │ │ ├── MMB_molecule_generation_2.png
│ │ │ ├── MMB_molecule_generation_3.png
│ │ │ ├── MMB_molecule_generation_4.png
│ │ │ ├── MMB_molecule_generation_5.png
│ │ │ ├── MolMIM_model.png
│ │ │ ├── MolMIM_molecule_generation_1.png
│ │ │ ├── MolMIM_molecule_generation_2.png
│ │ │ ├── bcp_snapshot_.png
│ │ │ ├── bcp_snapshot_1.png
│ │ │ ├── bcp_snapshot_2.png
│ │ │ ├── bcp_snapshot_3.png
│ │ │ ├── bionemo_overview_1.png
│ │ │ ├── bionemo_overview_2.png
│ │ │ ├── cellxgene
│ │ │ ├── num_cells_by_assay.png
│ │ │ ├── num_cells_by_dataset.png
│ │ │ ├── num_genes_measured_by_assay.png
│ │ │ ├── pct_cells_by_age.png
│ │ │ ├── pct_cells_by_ethnicity_category.png
│ │ │ ├── pct_cells_by_sex.png
│ │ │ ├── pct_cells_by_tissue_category.png
│ │ │ └── top9_datasets_tissue_distribution.png
│ │ │ ├── diffdock_1.png
│ │ │ ├── diffdock_2.png
│ │ │ ├── diffdock_3.png
│ │ │ ├── diffdock_4.png
│ │ │ ├── diffdock_fw_overview.png
│ │ │ ├── equidock_1.png
│ │ │ ├── equidock_2.png
│ │ │ ├── equidock_3.png
│ │ │ ├── equidock_4.png
│ │ │ ├── esm1nv_1.png
│ │ │ ├── esm1nv_2.png
│ │ │ ├── esm1nv_3.png
│ │ │ ├── esm1nv_4.png
│ │ │ ├── mmb_1.png
│ │ │ ├── mmb_2.png
│ │ │ ├── mmb_3.png
│ │ │ ├── mmb_4.png
│ │ │ ├── mmb_5.png
│ │ │ ├── molmim-embedding.png
│ │ │ ├── molmim-hidden-state.png
│ │ │ ├── molmim-predictive-modeling.png
│ │ │ ├── sc_fm
│ │ │ ├── F1-score-models.png
│ │ │ ├── average-accuracy-models.png
│ │ │ ├── geneformer-106m-240530-val-train-loss.png
│ │ │ ├── geneformer-10m-240530-val-train-loss.png
│ │ │ ├── geneformer-240530-val-comparison.png
│ │ │ └── model_tflops_per_gpu_chart_tight_layout.png
│ │ │ ├── wandai_charts.png
│ │ │ └── wandb-dashboard.png
│ ├── index.md
│ ├── main
│ │ ├── SUMMARY.md
│ │ ├── about
│ │ │ ├── SUMMARY.md
│ │ │ ├── background
│ │ │ │ ├── SUMMARY.md
│ │ │ │ ├── megatron_datasets.md
│ │ │ │ └── nemo2.md
│ │ │ ├── overview.md
│ │ │ └── releasenotes-fw.md
│ │ ├── contributing
│ │ │ ├── Writing Documentation
│ │ │ │ ├── index.md
│ │ │ │ ├── jupyter-notebooks.ipynb
│ │ │ │ └── mkdocs.md
│ │ │ ├── code-review.md
│ │ │ ├── contributing.md
│ │ │ └── sub-package_dependency_graph.md
│ │ ├── datasets
│ │ │ ├── CELLxGENE.md
│ │ │ ├── index.md
│ │ │ └── uniprot.md
│ │ ├── developer-guide
│ │ │ └── SUMMARY.md
│ │ ├── examples
│ │ │ ├── .gitignore
│ │ │ ├── SUMMARY.md
│ │ │ └── conftest.py
│ │ ├── getting-started
│ │ │ ├── SUMMARY.md
│ │ │ ├── access-startup.md
│ │ │ ├── development.md
│ │ │ ├── index.md
│ │ │ ├── initialization-guide.md
│ │ │ ├── pre-reqs.md
│ │ │ ├── training-models.md
│ │ │ └── using-slurm.md
│ │ ├── index.md
│ │ └── references
│ │ │ ├── API_reference
│ │ │ └── index.md
│ │ │ └── FAQ.md
│ └── models
│ │ ├── ESM-2
│ │ ├── SUMMARY.md
│ │ ├── index.md
│ │ └── pre-training.md
│ │ ├── amplify.md
│ │ ├── evo2.md
│ │ ├── geneformer.md
│ │ └── index.md
├── mkdocs.yml
├── overrides
│ ├── .icons
│ │ └── nvidia
│ │ │ └── nvidia-logo.svg
│ └── main.html
├── requirements.txt
└── scripts
│ └── gen_ref_pages.py
├── internal
├── Pypi_publish.md
├── README_justfile.md
├── infra-bionemo
│ ├── LICENSE
│ ├── README.md
│ ├── pyproject.toml
│ ├── setup.py
│ ├── src
│ │ └── infra_bionemo
│ │ │ ├── __init__.py
│ │ │ ├── license_check.py
│ │ │ └── new_project
│ │ │ ├── __init__.py
│ │ │ ├── api.py
│ │ │ ├── exe
│ │ │ ├── __init__.py
│ │ │ ├── bionemo_subpackage.py
│ │ │ ├── namespace.py
│ │ │ └── simple.py
│ │ │ ├── templates.py
│ │ │ └── utils.py
│ └── tests
│ │ ├── conftest.py
│ │ └── test_infra_bionemo
│ │ ├── test_license_check.py
│ │ └── test_new_project
│ │ ├── test_api.py
│ │ ├── test_cli_tools.py
│ │ └── test_utils.py
└── scripts
│ ├── README.md
│ ├── build_dev_image.sh
│ ├── run_dev.sh
│ └── setup_env_file.sh
├── justfile
├── license_header
├── pyproject.toml
├── requirements-cve.txt
├── requirements-dev.txt
├── requirements-test.txt
├── scripts
├── gpt-pretrain.py
└── protein
│ └── esm2
│ └── esm2_dataset_perplexity.py
├── sub-packages
├── bionemo-amplify
│ ├── README.md
│ ├── VERSION
│ ├── pyproject.toml
│ ├── src
│ │ └── bionemo
│ │ │ └── amplify
│ │ │ ├── __init__.py
│ │ │ ├── convert.py
│ │ │ ├── datamodule.py
│ │ │ ├── dataset.py
│ │ │ ├── hf_rotary.py
│ │ │ ├── infer_amplify.py
│ │ │ ├── model.py
│ │ │ ├── tokenizer.py
│ │ │ └── train_amplify.py
│ └── tests
│ │ └── bionemo
│ │ └── amplify
│ │ ├── __init__.py
│ │ ├── test_convert.py
│ │ ├── test_datamodule.py
│ │ ├── test_dataset.py
│ │ ├── test_hf_rotary.py
│ │ ├── test_infer_amplify.py
│ │ ├── test_model.py
│ │ ├── test_tokenizer.py
│ │ └── test_train_amplify.py
├── bionemo-core
│ ├── LICENSE
│ ├── README.md
│ ├── VERSION
│ ├── pyproject.toml
│ ├── src
│ │ └── bionemo
│ │ │ └── core
│ │ │ ├── __init__.py
│ │ │ ├── api.py
│ │ │ ├── data
│ │ │ ├── README.md
│ │ │ ├── __init__.py
│ │ │ ├── api.py
│ │ │ ├── load.py
│ │ │ ├── multi_epoch_dataset.py
│ │ │ ├── permute.py
│ │ │ ├── resamplers.py
│ │ │ ├── resource.py
│ │ │ └── resources
│ │ │ │ ├── esm2.yaml
│ │ │ │ ├── evo2.yaml
│ │ │ │ ├── geneformer.yaml
│ │ │ │ ├── scdl.yaml
│ │ │ │ └── single_cell.yaml
│ │ │ ├── model
│ │ │ ├── __init__.py
│ │ │ └── config.py
│ │ │ └── utils
│ │ │ ├── __init__.py
│ │ │ ├── batching_utils.py
│ │ │ ├── dtypes.py
│ │ │ └── random_utils.py
│ └── tests
│ │ └── bionemo
│ │ └── core
│ │ ├── data
│ │ ├── test_load.py
│ │ ├── test_load_notebook.ipynb
│ │ ├── test_multi_epoch_dataset.py
│ │ ├── test_permute.py
│ │ ├── test_resamplers.py
│ │ └── test_resource.py
│ │ └── utils
│ │ └── test_dtypes.py
├── bionemo-esm2
│ ├── LICENSE
│ ├── README.md
│ ├── VERSION
│ ├── examples
│ │ ├── finetune.ipynb
│ │ ├── inference.ipynb
│ │ ├── mutant-design.ipynb
│ │ └── pretrain.md
│ ├── pyproject.toml
│ ├── src
│ │ └── bionemo
│ │ │ └── esm2
│ │ │ ├── __init__.py
│ │ │ ├── api.py
│ │ │ ├── data
│ │ │ ├── __init__.py
│ │ │ ├── datamodule.py
│ │ │ ├── dataset.py
│ │ │ └── tokenizer
│ │ │ │ ├── README.md
│ │ │ │ ├── __init__.py
│ │ │ │ ├── special_tokens_map.json
│ │ │ │ ├── tokenizer_config.json
│ │ │ │ └── vocab.txt
│ │ │ ├── model
│ │ │ ├── __init__.py
│ │ │ ├── convert.py
│ │ │ ├── embedding.py
│ │ │ ├── finetune
│ │ │ │ ├── __init__.py
│ │ │ │ ├── datamodule.py
│ │ │ │ ├── dataset.py
│ │ │ │ ├── loss.py
│ │ │ │ ├── peft.py
│ │ │ │ ├── sequence_model.py
│ │ │ │ └── token_model.py
│ │ │ └── model.py
│ │ │ ├── run
│ │ │ ├── __init__.py
│ │ │ ├── config_models.py
│ │ │ ├── main.py
│ │ │ └── recipes.py
│ │ │ ├── scripts
│ │ │ ├── README.md
│ │ │ ├── __init__.py
│ │ │ ├── finetune_esm2.py
│ │ │ ├── infer_esm2.py
│ │ │ └── train_esm2.py
│ │ │ └── testing
│ │ │ ├── __init__.py
│ │ │ └── compare.py
│ └── tests
│ │ └── bionemo
│ │ └── esm2
│ │ ├── __init__.py
│ │ ├── conftest.py
│ │ ├── data
│ │ ├── __init__.py
│ │ ├── test_datamodule.py
│ │ ├── test_dataset.py
│ │ └── test_tokenizer.py
│ │ ├── model
│ │ ├── __init__.py
│ │ ├── finetune
│ │ │ ├── __init__.py
│ │ │ ├── test_datamodule.py
│ │ │ ├── test_dataset.py
│ │ │ ├── test_sequence_model.py
│ │ │ └── test_token_model.py
│ │ ├── test_convert.py
│ │ ├── test_embedding.py
│ │ ├── test_model.py
│ │ └── test_stop_and_go.py
│ │ └── scripts
│ │ ├── __init__.py
│ │ ├── test_finetune_esm2.py
│ │ ├── test_infer_esm2.py
│ │ ├── test_pydantic_train.py
│ │ └── test_train_esm2.py
├── bionemo-evo2
│ ├── LICENSE
│ ├── README.md
│ ├── VERSION
│ ├── assets
│ │ └── 1b_finetuning_train_curve_500_steps_256gbs.png
│ ├── examples
│ │ ├── .gitignore
│ │ ├── configs
│ │ │ ├── README.md
│ │ │ ├── full_pretrain_longphase_config.yaml
│ │ │ ├── full_pretrain_shortphase_config.yaml
│ │ │ ├── test_preproc_config.yaml
│ │ │ └── test_promotors_dataset_config.yaml
│ │ ├── fine-tuning-tutorial.ipynb
│ │ └── zeroshot_brca1.ipynb
│ ├── pyproject.toml
│ ├── src
│ │ └── bionemo
│ │ │ └── evo2
│ │ │ ├── __init__.py
│ │ │ ├── data
│ │ │ ├── README.md
│ │ │ ├── __init__.py
│ │ │ ├── fasta_dataset.py
│ │ │ ├── preprocess.py
│ │ │ ├── tokenizer.py
│ │ │ └── transcript_extraction.py
│ │ │ ├── run
│ │ │ ├── __init__.py
│ │ │ ├── infer.py
│ │ │ ├── predict.py
│ │ │ └── train.py
│ │ │ └── utils
│ │ │ ├── __init__.py
│ │ │ ├── checkpoint
│ │ │ ├── README.md
│ │ │ ├── __init__.py
│ │ │ ├── convert_checkpoint_model_parallel_evo2.py
│ │ │ ├── convert_to_nemo.py
│ │ │ ├── convert_zero3_to_zero1.py
│ │ │ ├── params.py
│ │ │ └── zero3_conversion_lib.py
│ │ │ └── config.py
│ └── tests
│ │ └── bionemo
│ │ └── evo2
│ │ ├── data
│ │ ├── test_fasta_dataset.py
│ │ ├── test_preprocess.py
│ │ └── test_tokenizer.py
│ │ ├── run
│ │ ├── test_infer.py
│ │ ├── test_inference.py
│ │ ├── test_predict.py
│ │ └── test_train.py
│ │ ├── test_evo2.py
│ │ └── test_hyena_operators.py
├── bionemo-example_model
│ ├── LICENSE
│ ├── README.md
│ ├── VERSION
│ ├── pyproject.toml
│ ├── src
│ │ └── bionemo
│ │ │ └── example_model
│ │ │ ├── lightning
│ │ │ ├── __init__.py
│ │ │ └── lightning_basic.py
│ │ │ └── training_scripts
│ │ │ ├── finetune_mnist.py
│ │ │ ├── predict_mnist.py
│ │ │ └── pretrain_mnist.py
│ └── tests
│ │ └── bionemo
│ │ └── example_model
│ │ └── lightning
│ │ └── test_lightning_basic.py
├── bionemo-fw
│ ├── LICENSE
│ ├── README.md
│ ├── VERSION
│ ├── pyproject.toml
│ ├── src
│ │ └── bionemo
│ │ │ └── fw
│ │ │ ├── __init__.py
│ │ │ └── dependency_graph.py
│ └── tests
│ │ ├── __init__.py
│ │ └── bionemo
│ │ └── fw
│ │ ├── test_dependency_graph.py
│ │ └── test_sub_package_imports.py
├── bionemo-geneformer
│ ├── LICENSE
│ ├── README.md
│ ├── VERSION
│ ├── examples
│ │ ├── .gitignore
│ │ ├── geneformer-celltype-classification.ipynb
│ │ ├── geneformer-gene-embedding-GRN.ipynb
│ │ └── geneformer_cellxgene_tutorial.ipynb
│ ├── pyproject.toml
│ ├── scripts
│ │ ├── README.md
│ │ └── geneformer_mlm_loss_eval.py
│ ├── src
│ │ └── bionemo
│ │ │ └── geneformer
│ │ │ ├── __init__.py
│ │ │ ├── api.py
│ │ │ ├── data
│ │ │ ├── __init__.py
│ │ │ ├── preprocess.py
│ │ │ └── singlecell
│ │ │ │ ├── __init__.py
│ │ │ │ ├── datamodule.py
│ │ │ │ ├── dataset.py
│ │ │ │ ├── preprocess.py
│ │ │ │ └── utils.py
│ │ │ ├── model
│ │ │ ├── __init__.py
│ │ │ └── finetune_token_regressor.py
│ │ │ ├── run
│ │ │ ├── __init__.py
│ │ │ ├── config_models.py
│ │ │ ├── main.py
│ │ │ └── recipes.py
│ │ │ ├── scripts
│ │ │ ├── README.md
│ │ │ ├── __init__.py
│ │ │ ├── celltype_classification_bench
│ │ │ │ ├── README.md
│ │ │ │ ├── __init__.py
│ │ │ │ ├── bench.py
│ │ │ │ └── download.py
│ │ │ ├── infer_geneformer.py
│ │ │ └── train_geneformer.py
│ │ │ ├── tokenizer
│ │ │ ├── __init__.py
│ │ │ └── gene_tokenizer.py
│ │ │ └── utils
│ │ │ ├── __init__.py
│ │ │ └── callbacks.py
│ └── tests
│ │ └── bionemo
│ │ └── geneformer
│ │ ├── __init__.py
│ │ ├── conftest.py
│ │ ├── scripts
│ │ ├── __init__.py
│ │ ├── test_pydantic_train.py
│ │ └── test_train_geneformer.py
│ │ ├── test_celltype_bench.py
│ │ ├── test_dataset.py
│ │ ├── test_model.py
│ │ ├── test_stop_and_go.py
│ │ └── test_transformer_specs.py
├── bionemo-geometric
│ ├── LICENSE
│ ├── README.md
│ ├── VERSION
│ ├── pyproject.toml
│ ├── requirements.txt
│ ├── src
│ │ └── bionemo
│ │ │ └── geometric
│ │ │ ├── __init__.py
│ │ │ ├── atom_featurizers.py
│ │ │ ├── base_featurizer.py
│ │ │ ├── bond_featurizers.py
│ │ │ ├── data
│ │ │ └── electronic_data.csv
│ │ │ └── molecule_featurizers.py
│ └── tests
│ │ └── bionemo
│ │ └── geometric
│ │ ├── test_atom_featurizers.py
│ │ ├── test_bionemo_geometric.py
│ │ ├── test_bond_featurizers.py
│ │ └── test_molecule_featurizers.py
├── bionemo-llm
│ ├── LICENSE
│ ├── README.md
│ ├── VERSION
│ ├── pyproject.toml
│ ├── src
│ │ └── bionemo
│ │ │ └── llm
│ │ │ ├── __init__.py
│ │ │ ├── api.py
│ │ │ ├── data
│ │ │ ├── __init__.py
│ │ │ ├── collate.py
│ │ │ ├── datamodule.py
│ │ │ ├── label2id_tokenizer.py
│ │ │ ├── masking.py
│ │ │ └── types.py
│ │ │ ├── lightning.py
│ │ │ ├── model
│ │ │ ├── __init__.py
│ │ │ ├── biobert
│ │ │ │ ├── __init__.py
│ │ │ │ ├── lightning.py
│ │ │ │ ├── model.py
│ │ │ │ ├── testing_utils.py
│ │ │ │ └── transformer_specs.py
│ │ │ ├── config.py
│ │ │ ├── layers.py
│ │ │ ├── loss.py
│ │ │ └── lr_scheduler.py
│ │ │ ├── run
│ │ │ ├── __init__.py
│ │ │ └── config_models.py
│ │ │ ├── train.py
│ │ │ └── utils
│ │ │ ├── __init__.py
│ │ │ ├── callbacks.py
│ │ │ ├── datamodule_utils.py
│ │ │ ├── iomixin_utils.py
│ │ │ ├── logger_utils.py
│ │ │ ├── megatron_utils.py
│ │ │ ├── remote.py
│ │ │ └── weight_utils.py
│ └── tests
│ │ ├── __init__.py
│ │ └── bionemo
│ │ └── llm
│ │ ├── __init__.py
│ │ ├── data
│ │ ├── test_collate.py
│ │ ├── test_datamodule.py
│ │ └── test_masking.py
│ │ ├── model
│ │ ├── biobert
│ │ │ └── test_transformer_specs.py
│ │ ├── test_loss.py
│ │ └── test_lr_scheduler.py
│ │ ├── test_lightning.py
│ │ └── utils
│ │ ├── __init__.py
│ │ ├── test_callbacks.py
│ │ ├── test_datamodule_utils.py
│ │ ├── test_iomixin_utils.py
│ │ ├── test_logger_utils.py
│ │ └── test_megatron_utils.py
├── bionemo-moco
│ ├── LICENSE
│ ├── README.md
│ ├── VERSION
│ ├── blog.md
│ ├── documentation.md
│ ├── environment
│ │ ├── Instructions.md
│ │ ├── clone_bionemo_moco.sh
│ │ ├── moco_env.yaml
│ │ └── setup.sh
│ ├── examples
│ │ ├── continuous_data_interpolant_tutorial_cfm.ipynb
│ │ ├── continuous_data_interpolant_tutorial_ddpm.ipynb
│ │ ├── continuous_data_interpolant_tutorial_vdm.ipynb
│ │ ├── discrete_data_interpolant_tutorial.ipynb
│ │ └── ot_sampler_tutorial.ipynb
│ ├── figures
│ │ └── model_figure.png
│ ├── pyproject.toml
│ ├── scripts
│ │ ├── README.md
│ │ ├── clean_documentation.py
│ │ └── create_documentation.sh
│ ├── src
│ │ └── bionemo
│ │ │ └── moco
│ │ │ ├── __init__.py
│ │ │ ├── distributions
│ │ │ ├── __init__.py
│ │ │ ├── prior
│ │ │ │ ├── __init__.py
│ │ │ │ ├── continuous
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── gaussian.py
│ │ │ │ │ ├── harmonic.py
│ │ │ │ │ └── utils.py
│ │ │ │ ├── discrete
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── custom.py
│ │ │ │ │ ├── mask.py
│ │ │ │ │ └── uniform.py
│ │ │ │ └── distribution.py
│ │ │ └── time
│ │ │ │ ├── __init__.py
│ │ │ │ ├── beta.py
│ │ │ │ ├── distribution.py
│ │ │ │ ├── logit_normal.py
│ │ │ │ ├── uniform.py
│ │ │ │ └── utils.py
│ │ │ ├── interpolants
│ │ │ ├── __init__.py
│ │ │ ├── base_interpolant.py
│ │ │ ├── batch_augmentation.py
│ │ │ ├── continuous_time
│ │ │ │ ├── __init__.py
│ │ │ │ ├── continuous
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── continuous_flow_matching.py
│ │ │ │ │ ├── data_augmentation
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ ├── augmentation_types.py
│ │ │ │ │ │ ├── equivariant_ot_sampler.py
│ │ │ │ │ │ ├── kabsch_augmentation.py
│ │ │ │ │ │ └── ot_sampler.py
│ │ │ │ │ └── vdm.py
│ │ │ │ └── discrete
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── discrete_flow_matching.py
│ │ │ │ │ └── mdlm.py
│ │ │ └── discrete_time
│ │ │ │ ├── __init__.py
│ │ │ │ ├── continuous
│ │ │ │ ├── __init__.py
│ │ │ │ └── ddpm.py
│ │ │ │ ├── discrete
│ │ │ │ ├── __init__.py
│ │ │ │ └── d3pm.py
│ │ │ │ └── utils.py
│ │ │ ├── schedules
│ │ │ ├── __init__.py
│ │ │ ├── inference_time_schedules.py
│ │ │ ├── noise
│ │ │ │ ├── __init__.py
│ │ │ │ ├── continuous_noise_transforms.py
│ │ │ │ ├── continuous_snr_transforms.py
│ │ │ │ └── discrete_noise_schedules.py
│ │ │ └── utils.py
│ │ │ └── testing
│ │ │ ├── __init__.py
│ │ │ └── parallel_test_utils.py
│ └── tests
│ │ └── bionemo
│ │ └── moco
│ │ ├── distributions
│ │ ├── prior
│ │ │ ├── continuous
│ │ │ │ ├── test_gaussian.py
│ │ │ │ └── test_harmonic.py
│ │ │ └── discrete
│ │ │ │ ├── test_custom.py
│ │ │ │ ├── test_mask.py
│ │ │ │ └── test_uniform.py
│ │ └── time
│ │ │ └── test_time_distribution.py
│ │ ├── interpolants
│ │ ├── continuous_time
│ │ │ ├── continuous
│ │ │ │ ├── test_continuous_flow_matching.py
│ │ │ │ ├── test_continuous_flow_matching_parallel.py
│ │ │ │ ├── test_optimal_transport.py
│ │ │ │ ├── test_vdm.py
│ │ │ │ └── test_vdm_parallel.py
│ │ │ └── discrete
│ │ │ │ ├── test_discrete_flow_matching.py
│ │ │ │ ├── test_discrete_flow_matching_parallel.py
│ │ │ │ ├── test_mdlm.py
│ │ │ │ └── test_mdlm_parallel.py
│ │ └── discrete_time
│ │ │ ├── continuous
│ │ │ ├── test_ddpm.py
│ │ │ └── test_ddpm_parallel.py
│ │ │ └── discrete
│ │ │ ├── test_d3pm.py
│ │ │ └── test_d3pm_parallel.py
│ │ ├── schedules
│ │ ├── noise
│ │ │ ├── test_continuous_noise_transforms.py
│ │ │ ├── test_continuous_snr_transforms.py
│ │ │ └── test_discrete_noise_schedule.py
│ │ └── test_inference_schedules.py
│ │ └── test_env.py
├── bionemo-noodles
│ ├── .gitignore
│ ├── Cargo.lock
│ ├── Cargo.toml
│ ├── LICENSE
│ ├── README.md
│ ├── VERSION
│ ├── pyproject.toml
│ ├── requirements.txt
│ ├── rust
│ │ └── src
│ │ │ └── lib.rs
│ ├── src
│ │ └── bionemo
│ │ │ └── noodles
│ │ │ ├── __init__.py
│ │ │ └── nvfaidx.py
│ └── tests
│ │ └── bionemo
│ │ └── noodles
│ │ ├── data
│ │ ├── bad_index.fasta
│ │ ├── bad_index.fasta.fai
│ │ ├── dupes.fasta
│ │ ├── sample.fasta
│ │ └── sample.fasta.fai
│ │ ├── test_nvfaidx.py
│ │ └── test_sequence_ops.py
├── bionemo-scdl
│ ├── LICENSE
│ ├── README.md
│ ├── VERSION
│ ├── assets
│ │ ├── disk_space.png
│ │ └── throughput.png
│ ├── examples
│ │ └── example_notebook.ipynb
│ ├── pyproject.toml
│ ├── src
│ │ └── bionemo
│ │ │ └── scdl
│ │ │ ├── __init__.py
│ │ │ ├── api
│ │ │ ├── __init__.py
│ │ │ └── single_cell_row_dataset.py
│ │ │ ├── index
│ │ │ ├── __init__.py
│ │ │ └── row_feature_index.py
│ │ │ ├── io
│ │ │ ├── __init__.py
│ │ │ ├── single_cell_collection.py
│ │ │ └── single_cell_memmap_dataset.py
│ │ │ ├── scripts
│ │ │ ├── __init__.py
│ │ │ └── convert_h5ad_to_scdl.py
│ │ │ └── util
│ │ │ ├── __init__.py
│ │ │ ├── async_worker_queue.py
│ │ │ ├── filecopyutil.py
│ │ │ └── torch_dataloader_utils.py
│ └── tests
│ │ └── bionemo
│ │ └── scdl
│ │ ├── conftest.py
│ │ ├── index
│ │ └── test_row_feature_index.py
│ │ ├── io
│ │ ├── test_single_cell_collection.py
│ │ └── test_single_cell_memmap_dataset.py
│ │ └── util
│ │ ├── test_async_worker_queue.py
│ │ └── test_torch_dataloader_utils.py
├── bionemo-size-aware-batching
│ ├── LICENSE
│ ├── README.md
│ ├── VERSION
│ ├── pyproject.toml
│ ├── requirements.txt
│ ├── src
│ │ └── bionemo
│ │ │ └── size_aware_batching
│ │ │ ├── __init__.py
│ │ │ ├── sampler.py
│ │ │ └── utils.py
│ └── tests
│ │ └── bionemo
│ │ └── size_aware_batching
│ │ ├── conftest.py
│ │ ├── test_sampler.py
│ │ └── test_utils.py
├── bionemo-testing
│ ├── LICENSE
│ ├── README.md
│ ├── VERSION
│ ├── pyproject.toml
│ ├── src
│ │ └── bionemo
│ │ │ └── testing
│ │ │ ├── __init__.py
│ │ │ ├── callbacks.py
│ │ │ ├── data
│ │ │ ├── __init__.py
│ │ │ ├── esm2.py
│ │ │ ├── fasta.py
│ │ │ ├── load.py
│ │ │ └── resource.py
│ │ │ ├── harnesses
│ │ │ ├── __init__.py
│ │ │ ├── mode.py
│ │ │ └── stop_and_go.py
│ │ │ ├── lightning.py
│ │ │ ├── megatron_dataset_compatibility.py
│ │ │ ├── megatron_parallel_state_utils.py
│ │ │ ├── subprocess_utils.py
│ │ │ ├── testing_callbacks.py
│ │ │ ├── torch.py
│ │ │ └── utils.py
│ └── tests
│ │ └── bionemo
│ │ └── testing
│ │ ├── data
│ │ └── test_fasta.py
│ │ ├── test_megatron_dataset_compatibility.py
│ │ └── test_megatron_parallel_state_utils.py
└── bionemo-webdatamodule
│ ├── LICENSE
│ ├── README.md
│ ├── VERSION
│ ├── pyproject.toml
│ ├── src
│ └── bionemo
│ │ └── webdatamodule
│ │ ├── __init__.py
│ │ ├── datamodule.py
│ │ └── utils.py
│ └── tests
│ └── bionemo
│ └── webdatamodule
│ ├── __init__.py
│ ├── conftest.py
│ └── test_datamodule.py
├── tach.toml
└── uv.lock
/.devcontainer/initializeCommand.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Create the mounted config directories if they don't already exist
3 |
4 | mkdir -p ~/.aws
5 | mkdir -p ~/.ngc
6 | mkdir -p ~/.cache
7 | mkdir -p ~/.ssh
8 | [ ! -f ~/.netrc ] && touch ~/.netrc
9 |
10 | # Create the ~/.bash_history_devcontainer file if it doesn't exist
11 | [ ! -f ~/.bash_history_devcontainer ] && touch ~/.bash_history_devcontainer
12 |
13 | exit 0
14 |
--------------------------------------------------------------------------------
/.devcontainer/postCreateCommand.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | for sub in ./3rdparty/*/ ./sub-packages/bionemo-*/; do
4 | uv pip install --no-deps --no-build-isolation --editable $sub
5 | done
6 |
--------------------------------------------------------------------------------
/.github/codecov.yml:
--------------------------------------------------------------------------------
1 | codecov:
2 | require_ci_to_pass: false
3 |
4 | coverage:
5 | status:
6 | project:
7 | default:
8 | target: auto
9 | threshold: 5
10 |
11 | comment:
12 | layout: "diff, flags, files"
13 | behavior: default
14 | require_changes: false # if true: only post the comment if coverage changes
15 |
--------------------------------------------------------------------------------
/.github/copy-pr-bot.yaml:
--------------------------------------------------------------------------------
1 | enabled: true
2 | auto_sync_draft: false
3 | auto_sync_ready: true
4 |
--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 |
3 | updates:
4 | - package-ecosystem: "gitsubmodule"
5 | schedule:
6 | interval: "weekly"
7 | directory: "/"
8 | target-branch: "main"
9 | open-pull-requests-limit: 2
10 | reviewers:
11 | - "pstjohn"
12 | - "jstjohn"
13 | - package-ecosystem: "docker"
14 | directory: "/"
15 | target-branch: "main"
16 | open-pull-requests-limit: 1
17 | schedule:
18 | interval: "weekly"
19 | reviewers:
20 | - "pstjohn"
21 | - "dorotat-nv"
22 | - "trvachov"
23 |
--------------------------------------------------------------------------------
/.github/workflows/gh-docs-deploy.yml:
--------------------------------------------------------------------------------
1 | name: gh-pages-docs-deploy
2 |
3 | on:
4 | push:
5 | branches: [main]
6 | pull_request:
7 | branches: [main]
8 | merge_group:
9 | types: [checks_requested]
10 |
11 | jobs:
12 | build-and-deploy:
13 | runs-on: ubuntu-latest
14 | steps:
15 | - uses: actions/checkout@v4
16 | - uses: actions/setup-python@v5
17 | with:
18 | python-version: 3.x
19 | - name: Cache dependencies
20 | uses: actions/cache@v4
21 | with:
22 | path: ~/.cache/pip
23 | key: ${{ runner.os }}-pip-${{ hashFiles('docs/requirements.txt') }}
24 | restore-keys: |
25 | ${{ runner.os }}-pip-
26 | - name: Install dependencies
27 | run: |
28 | python -m pip install --upgrade pip
29 | pip install -r docs/requirements.txt
30 | - name: Build site
31 | run: mkdocs build
32 | working-directory: docs
33 | - name: Configure Git Credentials
34 | if: github.event_name == 'push'
35 | run: |
36 | git config user.name github-actions[bot]
37 | git config user.email 41898282+github-actions[bot]@users.noreply.github.com
38 | - name: Deploy
39 | if: github.event_name == 'push'
40 | run: mkdocs gh-deploy --force
41 | working-directory: docs
42 |
--------------------------------------------------------------------------------
/.github/workflows/internal_tools.yml:
--------------------------------------------------------------------------------
1 | name: Install internal tools Python packages & run test suite
2 |
3 | on:
4 | pull_request:
5 | branches: [main]
6 | paths:
7 | - internal/infra-bionemo/**
8 |
9 | jobs:
10 | infra-bionemo:
11 | runs-on: ubuntu-latest
12 | steps:
13 | - uses: actions/checkout@v4
14 | with:
15 | fetch-depth: 0
16 | submodules: "recursive"
17 | - uses: actions/setup-python@v5
18 | with:
19 | python-version: "3.10"
20 | cache: "pip"
21 | - run: pip install -r requirements-dev.txt -r requirements-test.txt
22 | - run: pip install internal/infra-bionemo
23 | - run: cd internal/infra-bionemo && pytest -v --cov=infra_bionemo --cov-report=term .
24 |
--------------------------------------------------------------------------------
/.github/workflows/trufflehog.yml:
--------------------------------------------------------------------------------
1 | name: TruffleHog Scan
2 |
3 | on:
4 | push:
5 | branches:
6 | - main
7 | pull_request:
8 | merge_group:
9 |
10 | permissions:
11 | contents: read
12 | id-token: write
13 | issues: write
14 | pull-requests: write
15 |
16 | jobs:
17 | TruffleHog:
18 | runs-on: ubuntu-latest
19 | defaults:
20 | run:
21 | shell: bash
22 | steps:
23 | - name: Checkout code
24 | uses: actions/checkout@v4
25 | with:
26 | fetch-depth: 0
27 |
28 | - name: TruffleHog OSS
29 | if: github.event_name != 'merge_group'
30 | id: trufflehog
31 | uses: trufflesecurity/trufflehog@main
32 | continue-on-error: true
33 | with:
34 | extra_args: --results=verified,unknown
35 |
36 | - name: Scan Results Status
37 | if: steps.trufflehog.outcome == 'failure'
38 | run: exit 1
39 |
--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "3rdparty/Megatron-LM"]
2 | path = 3rdparty/Megatron-LM
3 | url = https://github.com/NVIDIA/Megatron-LM.git
4 | [submodule "3rdparty/NeMo"]
5 | path = 3rdparty/NeMo
6 | url = https://github.com/NVIDIA/NeMo.git
7 |
--------------------------------------------------------------------------------
/.nspect-allowlist.toml:
--------------------------------------------------------------------------------
1 | version = "1.0.0"
2 |
3 | [oss]
4 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | repos:
2 | - repo: https://github.com/pre-commit/pre-commit-hooks
3 | rev: v2.3.0
4 | hooks:
5 | - id: end-of-file-fixer
6 | - id: trailing-whitespace
7 | - id: check-yaml
8 | exclude: "mkdocs.yml"
9 | - repo: https://github.com/astral-sh/ruff-pre-commit
10 | rev: v0.9.10
11 | hooks:
12 | - id: ruff
13 | # 1. Attempt to automatically fix any lint issues.
14 | args: ["--fix"]
15 | - id: ruff-format
16 | - repo: https://github.com/Yelp/detect-secrets
17 | rev: v1.5.0
18 | hooks:
19 | - id: detect-secrets
20 | name: detect-secrets (everything but notebooks)
21 | args: ['--baseline', '.secrets.baseline', '--exclude-files', '(.*\.ipynb|.*\.baseline)$', ]
22 | exclude: package.lock.json
23 | - id: detect-secrets
24 | name: detect-secrets (notebooks only)
25 | args: ['--baseline', '.secrets-nb.baseline', '--exclude-files', '^.(?!.*\.ipynb)', '--exclude-lines', '"(hash|id|image/\w+)":.*|<.*at 0x[0-9a-f]+>|object at 0x[0-9a-f]+', ]
26 | - repo: local
27 | hooks:
28 | - id: license-header-check
29 | name: Run license-check script
30 | entry: python internal/infra-bionemo/src/infra_bionemo/license_check.py -c scripts -c sub-packages -c docs -c internal --license-header ./license_header --modify
31 | language: python
32 | additional_dependencies: ["click==8.1.7"]
33 | pass_filenames: false
34 | always_run: true
35 |
--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 | "cSpell.words": [
3 | "adata",
4 | "allclose",
5 | "bionemo",
6 | "boto",
7 | "botocore",
8 | "docstrings",
9 | "dtype",
10 | "Knowledgebase",
11 | "NBVAL",
12 | "nemo",
13 | "ngcsdk",
14 | "pbss",
15 | "platformdirs",
16 | "pretraining",
17 | "pydantic",
18 | "rampup",
19 | "Resampler",
20 | "resamplers",
21 | "singlecell",
22 | "tqdm",
23 | "uniref",
24 | "upsampling"
25 | ],
26 | "editor.rulers": [
27 | 120
28 | ],
29 | "autoDocstring.docstringFormat": "google-notypes"
30 | }
31 |
--------------------------------------------------------------------------------
/CODE-REVIEW.md:
--------------------------------------------------------------------------------
1 | docs/docs/user-guide/contributing/code-review.md
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | docs/docs/user-guide/contributing/contributing.md
--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
1 | ## Security
2 |
3 | NVIDIA is dedicated to the security and trust of our software products and services, including all source code repositories managed through our organization.
4 |
5 | If you need to report a security issue, please use the appropriate contact points outlined below. **Please do not report security vulnerabilities through GitHub/GitLab.**
6 |
7 | ## Reporting Potential Security Vulnerability in an NVIDIA Product
8 |
9 | To report a potential security vulnerability in any NVIDIA product:
10 | - Web: [Security Vulnerability Submission Form](https://www.nvidia.com/object/submit-security-vulnerability.html)
11 | - E-Mail: psirt@nvidia.com
12 | - We encourage you to use the following PGP key for secure email communication: [NVIDIA public PGP Key for communication](https://www.nvidia.com/en-us/security/pgp-key)
13 | - Please include the following information:
14 | - Product/Driver name and version/branch that contains the vulnerability
15 | - Type of vulnerability (code execution, denial of service, buffer overflow, etc.)
16 | - Instructions to reproduce the vulnerability
17 | - Proof-of-concept or exploit code
18 | - Potential impact of the vulnerability, including how an attacker could exploit the vulnerability
19 |
20 | While NVIDIA currently does not have a bug bounty program, we do offer acknowledgement when an externally reported security issue is addressed under our coordinated vulnerability disclosure policy. Please visit our [Product Security Incident Response Team (PSIRT)](https://www.nvidia.com/en-us/security/psirt-policies/) policies page for more information.
21 |
22 | ## NVIDIA Product Security
23 |
24 | For all security-related concerns, please visit NVIDIA's Product Security portal at https://www.nvidia.com/en-us/security
25 |
--------------------------------------------------------------------------------
/VERSION:
--------------------------------------------------------------------------------
1 | 2.6
2 |
--------------------------------------------------------------------------------
/ci/docker/entrypoint.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -e
3 |
4 | # Get host UID/GID from environment variables. These need to be passed in explicitly when invoking the container; i.e.,
5 | # docker run -e HOST_UID=$(id -u) -e HOST_GID=$(id -g) ...
6 | HOST_UID=${HOST_UID:-1000}
7 | HOST_GID=${HOST_GID:-1000}
8 |
9 | # Update the UID/GID of the container user
10 | groupmod -g $HOST_GID bionemo > /dev/null
11 | usermod -u $HOST_UID bionemo > /dev/null
12 |
13 | # Execute the main container command
14 | exec gosu bionemo "$@"
15 |
--------------------------------------------------------------------------------
/ci/scripts/static_checks.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | set -xueo pipefail
4 |
5 | REPOSITORY_ROOT=$(git rev-parse --show-toplevel)
6 | cd $REPOSITORY_ROOT
7 |
8 | echo "Running ruff checks"
9 | set +e
10 | ruff check scripts/ sub-packages/ docs/
11 | E_RUFF_CHECK="$?"
12 | set -e
13 |
14 | echo "Running tach checks"
15 | set +e
16 | tach check
17 | E_TACH_CHECK="$?"
18 | set -e
19 |
20 | echo "Running pre-commit checks"
21 | set +e
22 | pre-commit run --all-files --show-diff-on-failure --color always
23 | E_PRE_COMMIT="$?"
24 | set -e
25 |
26 | set +e
27 | ANY_FAILURE=0
28 | if [[ "${E_PRE_COMMIT}" != "0" ]]; then
29 | ANY_FAILURE=1
30 | echo "ERROR: pre-commit hooks failed! (exit: ${E_PRE_COMMIT})"
31 | fi
32 | if [[ "${E_RUFF_CHECK}" != "0" ]]; then
33 | ANY_FAILURE=1
34 | echo "ERROR: ruff check failed! (exit: ${E_RUFF_CHECK})"
35 | fi
36 | if [[ "${E_TACH_CHECK}" != "0" ]]; then
37 | ANY_FAILURE=1
38 | echo "ERROR: tach check failed! (exit: ${E_TACH_CHECK})"
39 | fi
40 | if [[ "${ANY_FAILURE}" != "0" ]]; then
41 | exit 1
42 | else
43 | exit 0
44 | fi
45 |
--------------------------------------------------------------------------------
/docs/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM squidfunk/mkdocs-material:latest
2 |
3 | # Install plugins.
4 | RUN apk add gcc python3-dev musl-dev linux-headers
5 | COPY docs/requirements.txt /tmp/
6 | RUN pip install --disable-pip-version-check --no-cache-dir -r /tmp/requirements.txt
7 |
--------------------------------------------------------------------------------
/docs/conftest.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 |
17 | # FIXME: remove this notebooks from ignore when this issue is fixed: https://github.com/NVIDIA/bionemo-framework/issues/778
18 | collect_ignore = ["docs/user-guide/examples/bionemo-geneformer/geneformer_cellxgene_tutorial.ipynb"]
19 |
--------------------------------------------------------------------------------
/docs/docs/SUMMARY.md:
--------------------------------------------------------------------------------
1 | - [Home](index.md)
2 | - [BioNeMo Documentation](main/)
3 | - [Models](models/)
4 |
--------------------------------------------------------------------------------
/docs/docs/assets/css/chatbot.css:
--------------------------------------------------------------------------------
1 | /* match styles of llm_bot Chatbot icon */
2 | img.open-icon {
3 | max-height: 20px !important;
4 | max-width: 20px !important;
5 | }
6 |
7 | .open-icon {
8 | border: none;
9 | max-width: 22px;
10 | margin-right: 8px;
11 | }
12 |
13 | /* tempoarily make invisible to test in production.
14 | turn on in going md-container > md-main > md-contant > article */
15 | #chatbot > button {
16 | display: block;
17 | }
18 |
--------------------------------------------------------------------------------
/docs/docs/assets/css/fonts.css:
--------------------------------------------------------------------------------
1 | @font-face {
2 | font-family: "NVIDIA Sans";
3 | font-style: normal;
4 | src: url(https://brand-assets.cne.ngc.nvidia.com/assets/fonts/nvidia-sans/1.0.0/NVIDIASans_Lt.woff2);
5 | font-weight: light;
6 | }
7 |
8 | @font-face {
9 | font-family: "NVIDIA Sans";
10 | font-style: italic;
11 | src: url(https://brand-assets.cne.ngc.nvidia.com/assets/fonts/nvidia-sans/1.0.0/NVIDIASans_LtIt.woff2);
12 | font-weight: light;
13 | }
14 |
15 | @font-face {
16 | font-family: "NVIDIA Sans";
17 | font-style: normal;
18 | src: url(https://brand-assets.cne.ngc.nvidia.com/assets/fonts/nvidia-sans/1.0.0/NVIDIASans_Rg.woff2);
19 | font-weight: normal;
20 | }
21 |
22 | @font-face {
23 | font-family: "NVIDIA Sans";
24 | font-style: italic;
25 | src: url(https://brand-assets.cne.ngc.nvidia.com/assets/fonts/nvidia-sans/1.0.0/NVIDIASans_It.woff2);
26 | font-weight: normal;
27 | }
28 |
29 | @font-face {
30 | font-family: "NVIDIA Sans";
31 | font-style: normal;
32 | src: url(https://brand-assets.cne.ngc.nvidia.com/assets/fonts/nvidia-sans/1.0.0/NVIDIASans_Bd.woff2);
33 | font-weight: bold;
34 | }
35 |
36 | @font-face {
37 | font-family: "NVIDIA Sans";
38 | font-style: italic;
39 | src: url(https://brand-assets.cne.ngc.nvidia.com/assets/fonts/nvidia-sans/1.0.0/NVIDIASans_BdIt.woff2);
40 | font-weight: bold;
41 | }
42 |
--------------------------------------------------------------------------------
/docs/docs/assets/css/jupyter-themes.css:
--------------------------------------------------------------------------------
1 | /* theme: light */
2 | body[data-md-color-scheme="light"] .jupyter-notebook {
3 | --jp-cell-editor-background: #f7f7f7;
4 | --jp-cell-editor-border-color: #cfcfcf;
5 | --jp-cell-prompt-fg-color: #303030;
6 | --jp-cell-prompt-bg-color: #f0f0f0;
7 | --jp-notebook-background: #ffffff;
8 | --jp-layout-color1: #ffffff;
9 | --jp-content-font-color1: #000000;
10 | }
11 |
12 | /* theme: dark */
13 | body[data-md-color-scheme="dark"] .jupyter-notebook {
14 | --jp-cell-editor-background: #2b2b2b;
15 | --jp-cell-editor-border-color: #464646;
16 | --jp-cell-prompt-fg-color: #d7d7d7;
17 | --jp-cell-prompt-bg-color: #333333;
18 | --jp-notebook-background: #1e1e1e;
19 | --jp-layout-color1: #1e1e1e;
20 | --jp-content-font-color1: #d4d4d4;
21 | }
22 |
--------------------------------------------------------------------------------
/docs/docs/assets/images/amplify/training_loss.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/images/amplify/training_loss.png
--------------------------------------------------------------------------------
/docs/docs/assets/images/amplify/validation_loss.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/images/amplify/validation_loss.png
--------------------------------------------------------------------------------
/docs/docs/assets/images/amplify/validation_ppl.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/images/amplify/validation_ppl.png
--------------------------------------------------------------------------------
/docs/docs/assets/images/esm2/esm2_device_scaling.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/images/esm2/esm2_device_scaling.png
--------------------------------------------------------------------------------
/docs/docs/assets/images/esm2/esm2_model_scaling.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/images/esm2/esm2_model_scaling.png
--------------------------------------------------------------------------------
/docs/docs/assets/images/esm2/esm2_peft_memory_usage.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/images/esm2/esm2_peft_memory_usage.png
--------------------------------------------------------------------------------
/docs/docs/assets/images/esm2/esm2_peft_time.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/images/esm2/esm2_peft_time.png
--------------------------------------------------------------------------------
/docs/docs/assets/images/esm2/esm2_pretrain_convergence.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/images/esm2/esm2_pretrain_convergence.png
--------------------------------------------------------------------------------
/docs/docs/assets/images/esm2/esm2_single_node_training_perf.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/images/esm2/esm2_single_node_training_perf.png
--------------------------------------------------------------------------------
/docs/docs/assets/images/evo2/evo2_bionemo_1b_6950steps.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/images/evo2/evo2_bionemo_1b_6950steps.png
--------------------------------------------------------------------------------
/docs/docs/assets/images/evo2/evo2_bionemo_7bnv_28ksteps.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/images/evo2/evo2_bionemo_7bnv_28ksteps.png
--------------------------------------------------------------------------------
/docs/docs/assets/images/evo2/evo2_performance_by_cluster_size.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/images/evo2/evo2_performance_by_cluster_size.png
--------------------------------------------------------------------------------
/docs/docs/assets/images/evo2/evo2_savanna_1b_6950steps.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/images/evo2/evo2_savanna_1b_6950steps.png
--------------------------------------------------------------------------------
/docs/docs/assets/images/evo2/evo2_savanna_7b_28ksteps.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/images/evo2/evo2_savanna_7b_28ksteps.png
--------------------------------------------------------------------------------
/docs/docs/assets/images/evo2/evo2_vs_7b_40b_performance_vs_context_length.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/images/evo2/evo2_vs_7b_40b_performance_vs_context_length.png
--------------------------------------------------------------------------------
/docs/docs/assets/images/evo2/evo2_vs_llama2_performance_vs_context_length.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/images/evo2/evo2_vs_llama2_performance_vs_context_length.png
--------------------------------------------------------------------------------
/docs/docs/assets/images/evo2/evo2_zeroshot_brca1_stripplot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/images/evo2/evo2_zeroshot_brca1_stripplot.png
--------------------------------------------------------------------------------
/docs/docs/assets/images/favicon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/images/favicon.png
--------------------------------------------------------------------------------
/docs/docs/assets/images/geneformer/F1-score-models.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/images/geneformer/F1-score-models.png
--------------------------------------------------------------------------------
/docs/docs/assets/images/geneformer/Geneformer_steven_106m_train.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/images/geneformer/Geneformer_steven_106m_train.png
--------------------------------------------------------------------------------
/docs/docs/assets/images/geneformer/Geneformer_steven_106m_val.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/images/geneformer/Geneformer_steven_106m_val.png
--------------------------------------------------------------------------------
/docs/docs/assets/images/geneformer/accuracy-models-04-18-2025.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/images/geneformer/accuracy-models-04-18-2025.png
--------------------------------------------------------------------------------
/docs/docs/assets/images/geneformer/average-accuracy-models.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/images/geneformer/average-accuracy-models.png
--------------------------------------------------------------------------------
/docs/docs/assets/images/geneformer/f1-score-models-04-18-2025.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/images/geneformer/f1-score-models-04-18-2025.png
--------------------------------------------------------------------------------
/docs/docs/assets/images/geneformer/geneformer_106m_train_loss.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/images/geneformer/geneformer_106m_train_loss.png
--------------------------------------------------------------------------------
/docs/docs/assets/images/geneformer/geneformer_106m_val_loss.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/images/geneformer/geneformer_106m_val_loss.png
--------------------------------------------------------------------------------
/docs/docs/assets/images/geneformer/geneformer_10m_training_loss.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/images/geneformer/geneformer_10m_training_loss.png
--------------------------------------------------------------------------------
/docs/docs/assets/images/geneformer/geneformer_10m_val_loss.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/images/geneformer/geneformer_10m_val_loss.png
--------------------------------------------------------------------------------
/docs/docs/assets/images/geneformer/loss_curve_new_v_old_geneformer_64_node_10M.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/images/geneformer/loss_curve_new_v_old_geneformer_64_node_10M.png
--------------------------------------------------------------------------------
/docs/docs/assets/images/geneformer/model_tflops_per_gpu_chart_geneformer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/images/geneformer/model_tflops_per_gpu_chart_geneformer.png
--------------------------------------------------------------------------------
/docs/docs/assets/images/megatron_background/README.md:
--------------------------------------------------------------------------------
1 | NOTE: these images are from https://nvidia.sharepoint.com/:p:/r/sites/PixelsManagementTeams/_layouts/15/doc2.aspx?sourcedoc=%7BD1FC26B6-A366-4D1E-8595-A9D3CD3A0D71%7D&file=Pixels_SW_Team_Meeting_2024_09_05.pptx&action=edit&mobileredirect=true&DefaultItemOpen=1
2 |
--------------------------------------------------------------------------------
/docs/docs/assets/images/megatron_background/data_parallelism.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/images/megatron_background/data_parallelism.png
--------------------------------------------------------------------------------
/docs/docs/assets/images/megatron_background/execution_schedulers.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/images/megatron_background/execution_schedulers.png
--------------------------------------------------------------------------------
/docs/docs/assets/images/megatron_background/fsdp_slide1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/images/megatron_background/fsdp_slide1.png
--------------------------------------------------------------------------------
/docs/docs/assets/images/megatron_background/fsdp_slide2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/images/megatron_background/fsdp_slide2.png
--------------------------------------------------------------------------------
/docs/docs/assets/images/megatron_background/pipeline_parallelism.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/images/megatron_background/pipeline_parallelism.png
--------------------------------------------------------------------------------
/docs/docs/assets/images/megatron_background/sp_korthikanti_2022_fig5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/images/megatron_background/sp_korthikanti_2022_fig5.png
--------------------------------------------------------------------------------
/docs/docs/assets/images/megatron_background/tensor_and_pipeline_parallelism.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/images/megatron_background/tensor_and_pipeline_parallelism.png
--------------------------------------------------------------------------------
/docs/docs/assets/images/megatron_background/tensor_parallelism.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/images/megatron_background/tensor_parallelism.png
--------------------------------------------------------------------------------
/docs/docs/assets/images/sub_package_graphs/dependency_file_imports.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/images/sub_package_graphs/dependency_file_imports.png
--------------------------------------------------------------------------------
/docs/docs/assets/images/sub_package_graphs/dependency_graph_pyproject.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/images/sub_package_graphs/dependency_graph_pyproject.png
--------------------------------------------------------------------------------
/docs/docs/assets/images/sub_package_graphs/dependency_graph_tach.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/images/sub_package_graphs/dependency_graph_tach.png
--------------------------------------------------------------------------------
/docs/docs/assets/images/wandb_tips_tricks/trainer_global_step.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/images/wandb_tips_tricks/trainer_global_step.png
--------------------------------------------------------------------------------
/docs/docs/assets/old_images/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/old_images/.gitkeep
--------------------------------------------------------------------------------
/docs/docs/assets/old_images/MMB_molecule_generation_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/old_images/MMB_molecule_generation_1.png
--------------------------------------------------------------------------------
/docs/docs/assets/old_images/MMB_molecule_generation_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/old_images/MMB_molecule_generation_2.png
--------------------------------------------------------------------------------
/docs/docs/assets/old_images/MMB_molecule_generation_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/old_images/MMB_molecule_generation_3.png
--------------------------------------------------------------------------------
/docs/docs/assets/old_images/MMB_molecule_generation_4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/old_images/MMB_molecule_generation_4.png
--------------------------------------------------------------------------------
/docs/docs/assets/old_images/MMB_molecule_generation_5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/old_images/MMB_molecule_generation_5.png
--------------------------------------------------------------------------------
/docs/docs/assets/old_images/MolMIM_model.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/old_images/MolMIM_model.png
--------------------------------------------------------------------------------
/docs/docs/assets/old_images/MolMIM_molecule_generation_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/old_images/MolMIM_molecule_generation_1.png
--------------------------------------------------------------------------------
/docs/docs/assets/old_images/MolMIM_molecule_generation_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/old_images/MolMIM_molecule_generation_2.png
--------------------------------------------------------------------------------
/docs/docs/assets/old_images/bcp_snapshot_.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/old_images/bcp_snapshot_.png
--------------------------------------------------------------------------------
/docs/docs/assets/old_images/bcp_snapshot_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/old_images/bcp_snapshot_1.png
--------------------------------------------------------------------------------
/docs/docs/assets/old_images/bcp_snapshot_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/old_images/bcp_snapshot_2.png
--------------------------------------------------------------------------------
/docs/docs/assets/old_images/bcp_snapshot_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/old_images/bcp_snapshot_3.png
--------------------------------------------------------------------------------
/docs/docs/assets/old_images/bionemo_overview_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/old_images/bionemo_overview_1.png
--------------------------------------------------------------------------------
/docs/docs/assets/old_images/bionemo_overview_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/old_images/bionemo_overview_2.png
--------------------------------------------------------------------------------
/docs/docs/assets/old_images/cellxgene/num_cells_by_assay.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/old_images/cellxgene/num_cells_by_assay.png
--------------------------------------------------------------------------------
/docs/docs/assets/old_images/cellxgene/num_cells_by_dataset.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/old_images/cellxgene/num_cells_by_dataset.png
--------------------------------------------------------------------------------
/docs/docs/assets/old_images/cellxgene/num_genes_measured_by_assay.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/old_images/cellxgene/num_genes_measured_by_assay.png
--------------------------------------------------------------------------------
/docs/docs/assets/old_images/cellxgene/pct_cells_by_age.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/old_images/cellxgene/pct_cells_by_age.png
--------------------------------------------------------------------------------
/docs/docs/assets/old_images/cellxgene/pct_cells_by_ethnicity_category.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/old_images/cellxgene/pct_cells_by_ethnicity_category.png
--------------------------------------------------------------------------------
/docs/docs/assets/old_images/cellxgene/pct_cells_by_sex.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/old_images/cellxgene/pct_cells_by_sex.png
--------------------------------------------------------------------------------
/docs/docs/assets/old_images/cellxgene/pct_cells_by_tissue_category.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/old_images/cellxgene/pct_cells_by_tissue_category.png
--------------------------------------------------------------------------------
/docs/docs/assets/old_images/cellxgene/top9_datasets_tissue_distribution.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/old_images/cellxgene/top9_datasets_tissue_distribution.png
--------------------------------------------------------------------------------
/docs/docs/assets/old_images/diffdock_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/old_images/diffdock_1.png
--------------------------------------------------------------------------------
/docs/docs/assets/old_images/diffdock_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/old_images/diffdock_2.png
--------------------------------------------------------------------------------
/docs/docs/assets/old_images/diffdock_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/old_images/diffdock_3.png
--------------------------------------------------------------------------------
/docs/docs/assets/old_images/diffdock_4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/old_images/diffdock_4.png
--------------------------------------------------------------------------------
/docs/docs/assets/old_images/diffdock_fw_overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/old_images/diffdock_fw_overview.png
--------------------------------------------------------------------------------
/docs/docs/assets/old_images/equidock_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/old_images/equidock_1.png
--------------------------------------------------------------------------------
/docs/docs/assets/old_images/equidock_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/old_images/equidock_2.png
--------------------------------------------------------------------------------
/docs/docs/assets/old_images/equidock_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/old_images/equidock_3.png
--------------------------------------------------------------------------------
/docs/docs/assets/old_images/equidock_4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/old_images/equidock_4.png
--------------------------------------------------------------------------------
/docs/docs/assets/old_images/esm1nv_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/old_images/esm1nv_1.png
--------------------------------------------------------------------------------
/docs/docs/assets/old_images/esm1nv_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/old_images/esm1nv_2.png
--------------------------------------------------------------------------------
/docs/docs/assets/old_images/esm1nv_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/old_images/esm1nv_3.png
--------------------------------------------------------------------------------
/docs/docs/assets/old_images/esm1nv_4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/old_images/esm1nv_4.png
--------------------------------------------------------------------------------
/docs/docs/assets/old_images/mmb_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/old_images/mmb_1.png
--------------------------------------------------------------------------------
/docs/docs/assets/old_images/mmb_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/old_images/mmb_2.png
--------------------------------------------------------------------------------
/docs/docs/assets/old_images/mmb_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/old_images/mmb_3.png
--------------------------------------------------------------------------------
/docs/docs/assets/old_images/mmb_4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/old_images/mmb_4.png
--------------------------------------------------------------------------------
/docs/docs/assets/old_images/mmb_5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/old_images/mmb_5.png
--------------------------------------------------------------------------------
/docs/docs/assets/old_images/molmim-embedding.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/old_images/molmim-embedding.png
--------------------------------------------------------------------------------
/docs/docs/assets/old_images/molmim-hidden-state.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/old_images/molmim-hidden-state.png
--------------------------------------------------------------------------------
/docs/docs/assets/old_images/molmim-predictive-modeling.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/old_images/molmim-predictive-modeling.png
--------------------------------------------------------------------------------
/docs/docs/assets/old_images/sc_fm/F1-score-models.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/old_images/sc_fm/F1-score-models.png
--------------------------------------------------------------------------------
/docs/docs/assets/old_images/sc_fm/average-accuracy-models.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/old_images/sc_fm/average-accuracy-models.png
--------------------------------------------------------------------------------
/docs/docs/assets/old_images/sc_fm/geneformer-106m-240530-val-train-loss.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/old_images/sc_fm/geneformer-106m-240530-val-train-loss.png
--------------------------------------------------------------------------------
/docs/docs/assets/old_images/sc_fm/geneformer-10m-240530-val-train-loss.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/old_images/sc_fm/geneformer-10m-240530-val-train-loss.png
--------------------------------------------------------------------------------
/docs/docs/assets/old_images/sc_fm/geneformer-240530-val-comparison.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/old_images/sc_fm/geneformer-240530-val-comparison.png
--------------------------------------------------------------------------------
/docs/docs/assets/old_images/sc_fm/model_tflops_per_gpu_chart_tight_layout.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/old_images/sc_fm/model_tflops_per_gpu_chart_tight_layout.png
--------------------------------------------------------------------------------
/docs/docs/assets/old_images/wandai_charts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/old_images/wandai_charts.png
--------------------------------------------------------------------------------
/docs/docs/assets/old_images/wandb-dashboard.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/docs/docs/assets/old_images/wandb-dashboard.png
--------------------------------------------------------------------------------
/docs/docs/main/SUMMARY.md:
--------------------------------------------------------------------------------
1 | - [About](about/)
2 | - [Get Started](getting-started/)
3 | - [Developer Guide](developer-guide/)
4 | - [Tutorials](examples/)
5 | - [Data Sets](datasets/)
6 | - [Contributing](contributing/)
7 | - [References](references/)
8 |
--------------------------------------------------------------------------------
/docs/docs/main/about/SUMMARY.md:
--------------------------------------------------------------------------------
1 | - [Overview](overview.md)
2 | - [Background](background/)
3 | - [Release Notes](releasenotes-fw.md)
4 |
--------------------------------------------------------------------------------
/docs/docs/main/about/background/SUMMARY.md:
--------------------------------------------------------------------------------
1 | - [NeMo2 Parallelism](nemo2.md)
2 | - [Megatron Dataset Considerations](megatron_datasets.md)
3 |
--------------------------------------------------------------------------------
/docs/docs/main/contributing/Writing Documentation/mkdocs.md:
--------------------------------------------------------------------------------
1 | # MkDocs
2 |
3 | ## Build system
4 |
5 | BioNeMo 2 uses [Material for MkDocs](https://squidfunk.github.io/mkdocs-material/) to build it's documentation.
6 | Docstrings are converted to automatically-generated API reference pages using `mkdocstrings`, and can be linked from
7 | markdown pages using [paths](https://mkdocstrings.github.io/usage/).
8 |
--------------------------------------------------------------------------------
/docs/docs/main/contributing/sub-package_dependency_graph.md:
--------------------------------------------------------------------------------
1 | ## Sub-Package Dependency Graph
2 |
3 | The script in `sub-packages/bionemo/fw/src/dependency_graph.py` generates a dependency graph for the BioNeMo sub-packages and verifies that the pyproject.toml and tach.toml files align and capture the dependencies needed for imports in the python files. Additionally, it checks dependencies between BioNeMo sub-packages and creates visual representations of the dependencies in pyproject.toml files, in tach.toml, and in the source files.
4 |
5 | These are visualizations of the dependency graph from the pyproject.toml files:
6 |
7 |
8 |
9 | Similarly from the tach.toml file:
10 |
11 |
12 |
13 | And these are the dependencies from the file imports:
14 |
15 |
16 |
--------------------------------------------------------------------------------
/docs/docs/main/datasets/index.md:
--------------------------------------------------------------------------------
1 | # BioNeMo Framework: Available Datasets
2 |
3 | The BioNeMo Framework provides access to a variety of high-quality datasets for bioinformatics and cheminformatics research. These datasets cover a range of biological and chemical modalities, supporting various research applications. The following table lists the currently available datasets:
4 |
5 | | **Dataset** | **Modality** | **Uses** |
6 | | -------------------------------------------------------- | -------------- | ------------------------------------------------ |
7 | | [CELLxGENE](./CELLxGENE.md) | Single Cell | Single-Cell Gene Expression |
8 | | [UniProt](./uniprot.md) | Protein | Protein Sequence and Function Analysis |
9 |
10 | For more information about the datasets included in the BioNeMo Framework, refer to the Dataset Cards linked in the table above or the original sources referenced in the respective dataset descriptions.
11 |
--------------------------------------------------------------------------------
/docs/docs/main/developer-guide/SUMMARY.md:
--------------------------------------------------------------------------------
1 | - [bionemo-amplify](bionemo-amplify/bionemo-amplify-Overview.md)
2 | - [bionemo-core](bionemo-core/bionemo-core-Overview.md)
3 | - [bionemo-esm2](bionemo-esm2/bionemo-esm2-Overview.md)
4 | - [bionemo-evo2](bionemo-evo2/bionemo-evo2-Overview.md)
5 | - [bionemo-example-model](bionemo-example_model/bionemo-example_model-Overview.md)
6 | - [bionemo-fw](bionemo-fw/bionemo-fw-Overview.md)
7 | - [bionemo-geneformer](bionemo-geneformer/bionemo-geneformer-Overview.md)
8 | - [bionemo-geometric](bionemo-geometric/bionemo-geometric-Overview.md)
9 | - [bionemo-llm](bionemo-llm/bionemo-llm-Overview.md)
10 | - [bionemo-moco](bionemo-moco/bionemo-moco-Overview.md)
11 | - [bionemo-noodles](bionemo-noodles/bionemo-noodles-Overview.md)
12 | - [bionemo-scdl](bionemo-scdl/bionemo-scdl-Overview.md)
13 | - [bionemo-size-aware-batching](bionemo-size-aware-batching/bionemo-size-aware-batching-Overview.md)
14 | - [bionemo-testing](bionemo-testing/bionemo-testing-Overview.md)
15 | - [bionemo-webdatamodule](bionemo-webdatamodule/bionemo-webdatamodule-Overview.md)
16 |
--------------------------------------------------------------------------------
/docs/docs/main/examples/.gitignore:
--------------------------------------------------------------------------------
1 | # IMPORTANT: This directory is reserved for examples that are automatically
2 | # generated by mkdocs. Please do not manually add files here. To add
3 | # examples, read the sub-packages section in root level docs README.md.
4 |
5 | # Ignore everything in this directory
6 | /*
7 |
8 | # Except the SUMMARY.md file, this .gitignore & the conftest.py file
9 | !.gitignore
10 | !conftest.py
11 | !SUMMARY.md
12 |
--------------------------------------------------------------------------------
/docs/docs/main/examples/SUMMARY.md:
--------------------------------------------------------------------------------
1 | - [ESM-2](bionemo-esm2/)
2 | - [Evo2](bionemo-evo2/)
3 | - [Geneformer](bionemo-geneformer/)
4 | - [MoCo](bionemo-moco/)
5 | - [SCDL](bionemo-scdl/)
6 |
--------------------------------------------------------------------------------
/docs/docs/main/examples/conftest.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 |
17 | def pytest_collectstart(collector):
18 | if collector.fspath and collector.fspath.ext == ".ipynb":
19 | collector.skip_compare += (
20 | "text/html",
21 | "application/javascript",
22 | "stderr",
23 | )
24 |
--------------------------------------------------------------------------------
/docs/docs/main/getting-started/SUMMARY.md:
--------------------------------------------------------------------------------
1 | - [Hardware and Software Prerequisites](pre-reqs.md)
2 | - [Access and Startup](access-startup.md)
3 | - [Initialization Guide](initialization-guide.md)
4 | - [Development](development.md)
5 | - [Training Models](training-models.md)
6 |
--------------------------------------------------------------------------------
/docs/docs/main/references/API_reference/index.md:
--------------------------------------------------------------------------------
1 | # API reference
2 |
3 | The API reference contains detailed descriptions of all public functions and objects. It's the best place to look if you need information on a specific function.
4 |
--------------------------------------------------------------------------------
/docs/docs/models/ESM-2/SUMMARY.md:
--------------------------------------------------------------------------------
1 | - [Model Overview](index.md)
2 | - [Pre-trained Checkpoints](pre-training.md)
3 |
--------------------------------------------------------------------------------
/docs/docs/models/index.md:
--------------------------------------------------------------------------------
1 | # BioNeMo Framework: Available Models
2 |
3 | State-of-the-art models are continually integrated into the BioNeMo Framework. The BioNeMo Framework currently offers the following pre-trained models:
4 |
5 | | **Model** | **Modality** | **Uses** |
6 | | ----------------------------- | ------------ | ----------------------- |
7 | | [AMPLIFY](./amplify.md) | Protein | Representation Learning |
8 | | [ESM-2](./ESM-2/index.md) | Protein | Representation Learning |
9 | | [Evo2](./evo2.md) | DNA | Generative AI |
10 | | [Geneformer](./geneformer.md) | Single Cell | Representation Learning |
11 |
12 | For more information about the models included in BioNeMo Framework, refer to the Model Cards linked in the table above or the original publications referenced in the respective model descriptions.
13 |
--------------------------------------------------------------------------------
/docs/overrides/main.html:
--------------------------------------------------------------------------------
1 | {% extends "base.html" %}
2 |
3 |
4 | {% block outdated %} You're not viewing the latest version.
5 |
6 | Click here to go to latest.
7 |
8 | {% endblock %} {% block styles %} {{ super() }}
9 |
10 | {% endblock %} {% block scripts %} {{ super() }}
11 |
12 | {% endblock %} {% block content %} {{ super() }}
13 |
14 | {% endblock %}
15 |
--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | mkdocs-material
2 | mkdocs-macros-plugin
3 | mkdocs-minify-plugin
4 | mkdocstrings[python]
5 | mkdocs-gen-files
6 | pymdown-extensions
7 | mkdocs-jupyter
8 | mkdocs-include-dir-to-nav
9 | mkdocs-literate-nav
10 | mkdocs-site-urls
11 | mike
12 |
--------------------------------------------------------------------------------
/internal/README_justfile.md:
--------------------------------------------------------------------------------
1 | To get started, first download [`just`](https://github.com/casey/just). You can use [Homebrew](https://brew.sh/) on OS X & Linux:
2 | ```bash
3 | brew install just
4 | ```
5 |
6 | **Once you have `just`, you need to run the `just setup` command once _before_ you can run any other command.**
7 | Thus, if it's your first time, you will need to do this first:
8 | ```bash
9 | just setup
10 | just
11 | ```
12 |
13 | You can see all of the commands for the development cycle by running `just`. These commands are executable as
14 | `just X` for each command `X` listed:
15 | ```
16 | build-dev # Builds the development image.
17 | build-release # Builds the release image.
18 | run-dev cmd='bash' # Runs an interactive program in the development bionemo image.
19 | run-release cmd='bash' # Runs an interactive program in the release bionemo image.
20 | setup # Checks for installed programs (docker, git, etc.), their versions, and grabs the latest cache image.
21 | test # Executes pytest in the release image.
22 | ```
23 |
24 | You can combine `just` commands together. For example, run `just build-dev build-release` to build both images.
25 |
--------------------------------------------------------------------------------
/internal/infra-bionemo/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools", "wheel"]
3 | build-backend = "setuptools.build_meta"
4 |
5 | # For guidance, see: https://packaging.python.org/en/latest/guides/writing-pyproject-toml/
6 | [project]
7 | name = "infra-bionemo"
8 | version = "0.1.0"
9 | authors = [{ name = "BioNeMo Team", email = "bionemofeedback@nvidia.com" }]
10 | description = "Internal library of utilities and programs for BioNeMo-related infrastructure."
11 | readme = "README.md"
12 | requires-python = ">=3.10"
13 | keywords = []
14 | license = {file = "LICENSE"}
15 | classifiers = [
16 | "Programming Language :: Python :: 3.10",
17 | "Private :: Do Not Upload",
18 | ]
19 | dependencies = [
20 | "click>=8.1.7,<9.0.0",
21 | "tomli>=2.0.2",
22 | "tomli_w>=1.1.0",
23 | ]
24 |
25 | [project.scripts]
26 | license-check = "infra_bionemo.license_check:entrypoint"
27 | create-bionemo-project = "infra_bionemo.new_project.exe.bionemo_subpackage:entrypoint"
28 | create-py-project = "infra_bionemo.new_project.exe.simple:entrypoint"
29 | create-namespaced-project = "infra_bionemo.new_project.exe.namespace:entrypoint"
30 |
31 | [tool.pytest.ini_options]
32 | testpaths = ["tests"]
33 | filterwarnings = [ "ignore::DeprecationWarning",]
34 |
35 | [tool.coverage.run]
36 | source = ["infra_bionemo"]
37 |
--------------------------------------------------------------------------------
/internal/infra-bionemo/setup.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 |
17 | from setuptools import setup
18 |
19 |
20 | if __name__ == "__main__":
21 | setup()
22 |
--------------------------------------------------------------------------------
/internal/infra-bionemo/src/infra_bionemo/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
--------------------------------------------------------------------------------
/internal/infra-bionemo/src/infra_bionemo/new_project/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
--------------------------------------------------------------------------------
/internal/infra-bionemo/src/infra_bionemo/new_project/exe/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
--------------------------------------------------------------------------------
/internal/infra-bionemo/tests/conftest.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
--------------------------------------------------------------------------------
/internal/infra-bionemo/tests/test_infra_bionemo/test_new_project/test_utils.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 |
17 | import io
18 |
19 | from pytest import raises
20 |
21 | from infra_bionemo.new_project.utils import ask_yes_or_no
22 |
23 |
24 | def test_ask_yes_or_no(monkeypatch):
25 | with raises(ValueError):
26 | ask_yes_or_no("")
27 |
28 | with monkeypatch.context() as ctx:
29 | ctx.setattr("sys.stdin", io.StringIO("y"))
30 | assert ask_yes_or_no("hello world?")
31 |
32 | with monkeypatch.context() as ctx:
33 | ctx.setattr("sys.stdin", io.StringIO("n"))
34 | assert not ask_yes_or_no("hello world?")
35 |
36 | with monkeypatch.context() as ctx:
37 | ctx.setattr("sys.stdin", io.StringIO("loop once\ny"))
38 | assert ask_yes_or_no("hello world?")
39 |
--------------------------------------------------------------------------------
/internal/scripts/README.md:
--------------------------------------------------------------------------------
1 | # Scripts for commonly performed bionemo-framework actions.
2 |
3 | ## First Time Setup
4 |
5 | After cloning the repository, you need to run the setup script **first**:
6 |
7 | ```bash
8 | ./internal/scripts/setup_env_file.sh
9 | ```
10 |
11 | This will return an exit code of 1 on a first time run.
12 |
13 | ## Release Image Building
14 |
15 | To build the release image, run the following script:
16 |
17 | ```bash
18 | DOCKER_BUILDKIT=1 ./ci/scripts/build_docker_image.sh \
19 | -regular-docker-builder \
20 | -image-name "nvcr.io/nvidian/cvai_bnmo_trng/bionemo:bionemo2-$(git rev-parse HEAD)"
21 | ```
22 |
23 | ## Development Image Building
24 |
25 | To build the development image, run the following script:
26 |
27 | ```bash
28 | ./internal/scripts/build_dev_image.sh
29 | ```
30 |
31 | ## Interactive Shell in Development Image
32 |
33 | After building the development image, you can start a container from it and open a bash shell in it by executing:
34 |
35 | ```bash
36 | ./internal/scripts/run_dev.sh
37 | ```
38 |
39 | ## Testing Locally
40 |
41 | Inside the development container, run `./ci/scripts/static_checks.sh` to validate that code changes will pass the code
42 | formatting and license checks run during CI. In addition, run the longer `./ci/scripts/run_pytest.sh` script to run unit
43 | tests for all sub-packages.
44 |
--------------------------------------------------------------------------------
/internal/scripts/build_dev_image.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | set -euo pipefail
4 |
5 | COMMIT=$(git rev-parse HEAD)
6 | DATE=$(date --iso-8601=seconds -u)
7 |
8 | set -x
9 | DOCKER_BUILDKIT=1 docker buildx build \
10 | -t "nvcr.io/nvidian/cvai_bnmo_trng/bionemo:dev-bionemo2-${COMMIT}" \
11 | --target="development" \
12 | --load \
13 | --cache-from nvcr.io/nvidia/clara/bionemo-framework:nightly \
14 | --cache-to type=inline \
15 | --label com.nvidia.bionemo.git_sha=${COMMIT} \
16 | --label com.nvidia.bionemo.created_at=${DATE} \
17 | -f ./Dockerfile \
18 | .
19 |
--------------------------------------------------------------------------------
/license_header:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
--------------------------------------------------------------------------------
/requirements-cve.txt:
--------------------------------------------------------------------------------
1 | onnx>=1.16.0
2 | setuptools>=78.1.1 # Addresses CVE https://github.com/advisories/GHSA-5rjg-fvgr-3xxf
3 | aiohttp>=3.9.4
4 | jupyterlab>=3.6.8
5 | jupyter_server>=2.14.1 # https://github.com/advisories/GHSA-hrw6-wg82-cm62
6 | Werkzeug>=3.0.3
7 | nltk>=3.9.1
8 | pillow>=10.3.0
9 | tornado>=6.5.0 # Addresses CVE https://github.com/advisories/GHSA-7cx3-6m66-7c5m
10 | wandb>=0.19.1 # Addresses CVE GHSA-v778-237x-gjrc
11 |
--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
1 | ruff==0.9.10 # Needs to match the version of ruff used in .pre-commit-config.yaml.
2 | pre-commit==3.4.0
3 | virtualenv==20.26.6
4 | ipdb==0.13.11
5 | click==8.1.7
6 | tenacity==8.5.0
7 | tach>=0.9.0
8 | maturin==1.7.4
9 |
--------------------------------------------------------------------------------
/requirements-test.txt:
--------------------------------------------------------------------------------
1 | pytest-cov==4.1.0
2 | pytest-timeout==2.2.0
3 | pytest-dependency==0.5.1
4 | testbook==0.4.2
5 | requests_mock==1.11.0
6 | # For SwiftStack access
7 | awscli==1.33.33
8 | nbval==0.11.0
9 | # For NvFaidx equivalence tests
10 | pyfaidx==0.8.1.3
11 |
12 | # Temporary pin for pytorch-lightning until megatron callbacks in ProgressPrinter can get fixed.
13 | # See https://nvidia.slack.com/archives/C02A7LYGHK8/p1734727482697309
14 | pytorch-lightning<2.5.0
15 | lightning<2.5.0
16 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-amplify/README.md:
--------------------------------------------------------------------------------
1 | # bionemo-amplify
2 |
3 | To install, execute the following:
4 | ```bash
5 | pip install -e .
6 | ```
7 |
8 | To run unit tests, execute:
9 | ```bash
10 | pytest -v .
11 | ```
12 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-amplify/VERSION:
--------------------------------------------------------------------------------
1 | 0.0.1
2 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-amplify/pyproject.toml:
--------------------------------------------------------------------------------
1 |
2 | [build-system]
3 | requires = ["setuptools>=64", "wheel"]
4 | build-backend = "setuptools.build_meta"
5 |
6 | [project]
7 | name = "bionemo-amplify"
8 | readme = "README.md"
9 | description = "A BioNeMo sub-package for training AMPLIFY models."
10 | authors = [{ name = "BioNeMo Team", email = "bionemofeedback@nvidia.com" }]
11 | requires-python = ">=3.10"
12 | license = { file = "LICENSE" }
13 | dynamic = ["version"]
14 | dependencies = [
15 | # internal
16 | 'bionemo-core',
17 | 'bionemo-llm',
18 | 'bionemo-esm2',
19 | ]
20 |
21 | [project.optional-dependencies]
22 | test = [
23 | 'bionemo-testing'
24 | ]
25 | te = [
26 | # TE & Apex need to be installed after PyTorch, NVCC, and CUDA.
27 | # TODO(@pstjohn, @cspades): Figure out how to do this without post-installation.
28 | 'transformer_engine[pytorch]'
29 | ]
30 |
31 | [project.scripts]
32 | train_amplify = "bionemo.amplify.train_amplify:app"
33 | infer_amplify = "bionemo.amplify.infer_amplify:app"
34 |
35 | [tool.setuptools.packages.find]
36 | where = ["src"]
37 | include = ["bionemo.*"]
38 | namespaces = true
39 | exclude = ["test*."]
40 |
41 | [tool.uv]
42 | cache-keys = [{ git = true }]
43 |
44 | [tool.setuptools.dynamic]
45 | version = { file = "VERSION" }
46 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-amplify/src/bionemo/amplify/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-amplify/src/bionemo/amplify/tokenizer.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 |
17 | import transformers
18 | from nemo.lightning.io import IOMixin
19 |
20 |
21 | class BioNeMoAMPLIFYTokenizer(transformers.PreTrainedTokenizerFast, IOMixin): # noqa D101
22 | def __init__(self):
23 | """A wrapper to make AutoTokenizer serializable for the ESM2 tokenizer."""
24 | other = transformers.AutoTokenizer.from_pretrained("chandar-lab/AMPLIFY_350M", use_fast=True)
25 | self.__dict__.update(other.__dict__)
26 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-amplify/tests/bionemo/amplify/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-amplify/tests/bionemo/amplify/test_tokenizer.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 |
17 | import pytest
18 | import torch
19 | from nemo.lightning import io
20 |
21 | from bionemo.amplify.tokenizer import BioNeMoAMPLIFYTokenizer
22 |
23 |
24 | @pytest.fixture
25 | def tokenizer():
26 | return BioNeMoAMPLIFYTokenizer()
27 |
28 |
29 | def test_tokenizer_serialization(tokenizer, tmp_path):
30 | tokenizer.io_dump(tmp_path / "tokenizer", yaml_attrs=[]) # BioNeMoESMTokenizer takes no __init__ arguments
31 | deserialized_tokenizer = io.load(tmp_path / "tokenizer", tokenizer.__class__)
32 |
33 | our_tokens = deserialized_tokenizer.encode("KAISQ", add_special_tokens=False)
34 | amplify_tokens = torch.tensor([17, 7, 2, 14, 10, 18])
35 | torch.testing.assert_close(torch.tensor(our_tokens), amplify_tokens)
36 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-core/LICENSE:
--------------------------------------------------------------------------------
1 | ../../LICENSE/license.txt
--------------------------------------------------------------------------------
/sub-packages/bionemo-core/VERSION:
--------------------------------------------------------------------------------
1 | 2.4.4
2 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-core/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools>=64", "wheel"]
3 | build-backend = "setuptools.build_meta"
4 |
5 | [project]
6 | name = "bionemo-core"
7 | readme = "README.md"
8 | description = "BioNeMo core interfaces and PyTorch-related code."
9 | authors = [{ name = "BioNeMo Team", email = "bionemofeedback@nvidia.com" }]
10 | requires-python = ">=3.10"
11 | license = { file = "LICENSE" }
12 | dynamic = ["version"]
13 | dependencies = [
14 | # bionemo sub-packages
15 | # bionemo-core **MUST NOT** depend on any other sub-packages !!!!!
16 | # external
17 | "numpy",
18 | "platformdirs",
19 | "torch>=2.2.1",
20 | 'nest_asyncio',
21 | 'ngcsdk',
22 | 'pooch',
23 | 'pydantic[email]>=2.7.0',
24 | 'pyyaml',
25 | 'tqdm',
26 | ]
27 |
28 | [project.scripts]
29 | download_bionemo_data = "bionemo.core.data.load:entrypoint"
30 |
31 | # Make sure that the resource yaml files are being packaged alongside the python files.
32 | [tool.setuptools.package-data]
33 | "bionemo.core" = ["**/*.yaml"]
34 |
35 | [tool.setuptools.packages.find]
36 | where = ["src"]
37 | include = ["bionemo.*"]
38 | namespaces = true
39 | exclude = ["test*."]
40 |
41 | [tool.setuptools.dynamic]
42 | version = { file = "VERSION" }
43 |
44 | [tool.uv]
45 | cache-keys = [{ git = true }]
46 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-core/src/bionemo/core/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | import os
17 | from pathlib import Path
18 | from typing import Sequence
19 |
20 | import platformdirs
21 |
22 |
23 | __all__: Sequence[str] = ("BIONEMO_CACHE_DIR",)
24 |
25 |
26 | def _get_cache_dir() -> Path:
27 | """Get the cache directory for downloaded resources."""
28 | if cache_dir := os.getenv("BIONEMO_CACHE_DIR"):
29 | return Path(cache_dir)
30 |
31 | cache_dir = Path(platformdirs.user_cache_dir(appname="bionemo", appauthor="nvidia"))
32 |
33 | try:
34 | cache_dir.mkdir(exist_ok=True, parents=True)
35 | except PermissionError as ex:
36 | raise PermissionError(
37 | f"Permission denied creating a cache directory at {cache_dir}. Please set BIONEMO_CACHE_DIR to a directory "
38 | "you have write access to."
39 | ) from ex
40 | return cache_dir
41 |
42 |
43 | BIONEMO_CACHE_DIR = _get_cache_dir()
44 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-core/src/bionemo/core/api.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 |
17 | from typing import Sequence
18 |
19 | from bionemo.core.model.config import BionemoModelConfig, BionemoTrainableModelConfig, Model, ModelOutput
20 |
21 |
22 | __all__: Sequence[str] = (
23 | "BionemoModelConfig",
24 | "BionemoTrainableModelConfig",
25 | "Model",
26 | "ModelOutput",
27 | )
28 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-core/src/bionemo/core/data/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-core/src/bionemo/core/data/api.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 |
17 | from typing import Sequence
18 |
19 |
20 | __all__: Sequence[str] = ()
21 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-core/src/bionemo/core/data/resources/scdl.yaml:
--------------------------------------------------------------------------------
1 | - tag: sample
2 | ngc: nvidia/clara/scdl_sample_test:1.0
3 | ngc_registry: resource
4 | pbss: "s3://bionemo-ci/test-data/scdl_sample_test.tar.gz"
5 | sha256: 7a4237537bf535dfa00301ce8cc7073e0a23d5bc8aa902ad65db9f51b57a6df9 # pragma: allowlist secret
6 | owner: Polina Binder
7 | description: Sample test data for SCDL.
8 |
9 | - tag: sample_scdl_feature_ids
10 | ngc: nvidia/clara/scdl_sample_test_feature_ids:1.0
11 | ngc_registry: resource
12 | pbss: s3://bionemo-ci/test-data/scdl_sample_test_feat_ids.tar.gz
13 | sha256: 9020ba336dbfe33bddadba26ca0cde49958cbd73c5ad44f0960a5a4837c9db26 # pragma: allowlist secret
14 | owner: Savitha Srinivasan
15 | description: Sample test data for SCDL with feature IDs appended.
16 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-core/src/bionemo/core/model/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-core/src/bionemo/core/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-core/tests/bionemo/core/utils/test_dtypes.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 |
17 | import pytest
18 | import torch
19 |
20 | from bionemo.core.utils.dtypes import get_autocast_dtype
21 |
22 |
23 | @pytest.mark.parametrize(
24 | "precision, expected_dtype",
25 | [
26 | ("fp16", torch.float16),
27 | ("bf16", torch.bfloat16),
28 | ("fp32", torch.float32),
29 | ("bf16-mixed", torch.bfloat16),
30 | ("fp32-mixed", torch.float32),
31 | ],
32 | )
33 | def test_get_autocast_dtype(precision: str, expected_dtype: torch.dtype):
34 | assert get_autocast_dtype(precision) == expected_dtype
35 |
36 |
37 | def test_unsupported_autocast_dtype():
38 | with pytest.raises(ValueError):
39 | get_autocast_dtype("unsupported")
40 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-esm2/LICENSE:
--------------------------------------------------------------------------------
1 | ../../LICENSE/license.txt
--------------------------------------------------------------------------------
/sub-packages/bionemo-esm2/README.md:
--------------------------------------------------------------------------------
1 | # bionemo-esm2
2 | ESM-2 is a protein language model with BERT architecture trained on millions of protein sequences from UniProt. ESM-2 learns the patterns and dependencies between amino acids that ultimately give rise to a protein’s structure. ESM-2 is pretrained on a masked language model (MLM) objective. During pretraining, 15% of the input sequence is perturbed, and within which 80% of the residues are replaced with a mask token, 10% are replaced with a random token, and 10% are left unchanged. The model is then trained to predict the original amino acids at the perturbed positions with the context of the surrounding amino acids.
3 |
4 | Despite pretraining on an MLM objective, the sequence representation learned by ESM-2 is highly transferable to downstream tasks. ESM-2 can be fine-tuned on a variety of tasks, including secondary structure prediction as, and whole-sequence prediction on cellular localization, thermostability, solubility, and other protein properties.
5 |
6 | ### Setup
7 | To install, execute the following:
8 | ```bash
9 | pip install -e .
10 | ```
11 |
12 | To run unit tests, execute:
13 | ```bash
14 | pytest -v .
15 | ```
16 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-esm2/VERSION:
--------------------------------------------------------------------------------
1 | 2.4
2 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-esm2/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools>=64", "wheel"]
3 | build-backend = "setuptools.build_meta"
4 |
5 | [project]
6 | name = "bionemo-esm2"
7 | readme = "README.md"
8 | description = "BioNeMo ESM2 model."
9 | authors = [{ name = "BioNeMo Team", email = "bionemofeedback@nvidia.com" }]
10 | requires-python = ">=3.10"
11 | license = { file = "LICENSE" }
12 | dynamic = ["version"]
13 | dependencies = [
14 | # internal
15 | 'bionemo-core',
16 | 'bionemo-llm',
17 | # external
18 | ]
19 |
20 | [project.optional-dependencies]
21 | test = ['bionemo-testing']
22 | te = [
23 | # TE & Apex need to be installed after PyTorch, NVCC, and CUDA.
24 | # TODO(@pstjohn, @cspades): Figure out how to do this without post-installation.
25 | 'transformer_engine[pytorch]',
26 | ]
27 |
28 | [project.scripts]
29 | bionemo-esm2-train = "bionemo.esm2.run.main:main"
30 | bionemo-esm2-recipe = "bionemo.esm2.run.recipes:main"
31 | infer_esm2 = "bionemo.esm2.scripts.infer_esm2:infer_esm2_entrypoint"
32 | train_esm2 = "bionemo.esm2.scripts.train_esm2:train_esm2_entrypoint"
33 | finetune_esm2 = "bionemo.esm2.scripts.finetune_esm2:finetune_esm2_entrypoint"
34 | convert_esm2 = "bionemo.esm2.model.convert:app"
35 |
36 | # Make sure that the tokenizer files are included along with the python files during installation.
37 | [tool.setuptools.package-data]
38 | "bionemo.esm2" = ["data/tokenizer/*.json", "data/tokenizer/*.txt"]
39 |
40 | [tool.setuptools.packages.find]
41 | where = ["src"]
42 | include = ["bionemo.*"]
43 | namespaces = true
44 | exclude = ["test*."]
45 |
46 | [tool.setuptools.dynamic]
47 | version = { file = "VERSION" }
48 |
49 | [tool.uv]
50 | cache-keys = [{ git = true }]
51 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-esm2/src/bionemo/esm2/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-esm2/src/bionemo/esm2/api.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 |
17 | from typing import Sequence
18 |
19 | from bionemo.esm2.model.model import ESM2Config, ESM2GenericConfig, ESM2Model
20 |
21 |
22 | __all__: Sequence[str] = (
23 | "ESM2Config",
24 | "ESM2GenericConfig",
25 | "ESM2Model",
26 | )
27 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-esm2/src/bionemo/esm2/data/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-esm2/src/bionemo/esm2/data/tokenizer/README.md:
--------------------------------------------------------------------------------
1 | # Vendored tokenizer config for facebook/esm2_t33_650M_UR50D
2 |
3 | This directory contains the output of
4 |
5 | ```python
6 | from transformers import AutoTokenizer
7 | AutoTokenizer.from_pretrained("facebook/esm2_t33_650M_UR50D").save_pretrained("...")
8 | ```
9 |
10 | for reproducible results and to reduce reliance on external API calls.
11 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-esm2/src/bionemo/esm2/data/tokenizer/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | import functools
17 | from importlib.resources import files
18 |
19 | import transformers
20 | from nemo.lightning.io import IOMixin
21 |
22 |
23 | class BioNeMoESMTokenizer(transformers.EsmTokenizer, IOMixin): # noqa D101
24 | def __init__(self):
25 | """A wrapper to make AutoTokenizer serializable for the ESM2 tokenizer."""
26 | other = transformers.AutoTokenizer.from_pretrained(str(files("bionemo.esm2.data.tokenizer")), use_fast=True)
27 | self.__dict__.update(dict(other.__dict__))
28 |
29 |
30 | @functools.cache
31 | def get_tokenizer() -> BioNeMoESMTokenizer:
32 | """Get the tokenizer for the ESM2 model."""
33 | return BioNeMoESMTokenizer()
34 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-esm2/src/bionemo/esm2/data/tokenizer/special_tokens_map.json:
--------------------------------------------------------------------------------
1 | {
2 | "cls_token": "",
3 | "eos_token": "",
4 | "mask_token": "",
5 | "pad_token": "",
6 | "unk_token": ""
7 | }
8 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-esm2/src/bionemo/esm2/data/tokenizer/tokenizer_config.json:
--------------------------------------------------------------------------------
1 | {
2 | "added_tokens_decoder": {
3 | "0": {
4 | "content": "",
5 | "lstrip": false,
6 | "normalized": false,
7 | "rstrip": false,
8 | "single_word": false,
9 | "special": true
10 | },
11 | "1": {
12 | "content": "",
13 | "lstrip": false,
14 | "normalized": false,
15 | "rstrip": false,
16 | "single_word": false,
17 | "special": true
18 | },
19 | "2": {
20 | "content": "",
21 | "lstrip": false,
22 | "normalized": false,
23 | "rstrip": false,
24 | "single_word": false,
25 | "special": true
26 | },
27 | "3": {
28 | "content": "",
29 | "lstrip": false,
30 | "normalized": false,
31 | "rstrip": false,
32 | "single_word": false,
33 | "special": true
34 | },
35 | "32": {
36 | "content": "",
37 | "lstrip": false,
38 | "normalized": false,
39 | "rstrip": false,
40 | "single_word": false,
41 | "special": true
42 | }
43 | },
44 | "clean_up_tokenization_spaces": true,
45 | "cls_token": "",
46 | "eos_token": "",
47 | "mask_token": "",
48 | "model_max_length": 1000000000000000019884624838656,
49 | "pad_token": "",
50 | "tokenizer_class": "EsmTokenizer",
51 | "unk_token": ""
52 | }
53 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-esm2/src/bionemo/esm2/data/tokenizer/vocab.txt:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | L
6 | A
7 | G
8 | V
9 | S
10 | E
11 | R
12 | T
13 | I
14 | D
15 | P
16 | K
17 | Q
18 | N
19 | F
20 | Y
21 | M
22 | H
23 | W
24 | C
25 | X
26 | B
27 | U
28 | Z
29 | O
30 | .
31 | -
32 |
33 |
34 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-esm2/src/bionemo/esm2/model/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-esm2/src/bionemo/esm2/model/finetune/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-esm2/src/bionemo/esm2/run/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-esm2/src/bionemo/esm2/scripts/README.md:
--------------------------------------------------------------------------------
1 | ## ESM2 Scripts Directory
2 | This is a collection for one-off scripts that can be ran through the command line. See the `[project.scripts]` section
3 | of the pyproject.toml file for how these are generated.
4 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-esm2/src/bionemo/esm2/scripts/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-esm2/src/bionemo/esm2/testing/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-esm2/tests/bionemo/esm2/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-esm2/tests/bionemo/esm2/data/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-esm2/tests/bionemo/esm2/model/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-esm2/tests/bionemo/esm2/model/finetune/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-esm2/tests/bionemo/esm2/scripts/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-evo2/VERSION:
--------------------------------------------------------------------------------
1 | 2.4
2 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-evo2/assets/1b_finetuning_train_curve_500_steps_256gbs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/sub-packages/bionemo-evo2/assets/1b_finetuning_train_curve_500_steps_256gbs.png
--------------------------------------------------------------------------------
/sub-packages/bionemo-evo2/examples/.gitignore:
--------------------------------------------------------------------------------
1 | # ignore temp files made by this tutorial
2 | # chromosome files
3 | *.fa
4 | *.fa.gz
5 |
6 | # config files
7 | *.yaml
8 |
9 | # directories created during these notebook runs.
10 | nemo2_evo2_1b_8k/
11 | preprocessed_data/
12 | pretraining_demo/
13 | brca1_fasta_files/
14 | brca1/
15 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-evo2/examples/configs/README.md:
--------------------------------------------------------------------------------
1 | ## Example configs
2 | These configs are provided as examples to the user. Note that the files referenced in these configs can be downloaded from [OpenGenome2 dataset on Hugging Face](https://huggingface.co/datasets/arcinstitute/opengenome2).
3 | * `full_pretrain_shortphase_config.yaml` was used to test full scale pre-training runs of evo2 at the 8k context length.
4 | * `full_pretrain_longphase_config.yaml` was used to test full scale context extension phase pre-training (starting from an 8k checkpoint and continuing to train at longer context lengths).
5 | * `test_preproc_config.yaml` was used to test our preprocessing scripts to generate .bin/.idx files that are used for pre-training from fasta file inputs.
6 | * `test_promotors_dataset_config.yaml` is a smaller test file that can be used for pre-training but is one of the smaller tests.
7 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-evo2/examples/configs/test_preproc_config.yaml:
--------------------------------------------------------------------------------
1 | - datapaths: ["/workspace/bionemo2/data/mmseqs_results_rep_seq_distinct.fasta"]
2 | output_dir: "/workspace/bionemo2/data"
3 | output_prefix: promoters_ab_test_noodles_uint8_distinct
4 | # Datasplit
5 | train_split: 1.0 # because they do manual splits of first 1000 for validation, 2nd 1000 for test, and leftover for training
6 | valid_split: 0.0
7 | test_split: 0.0
8 | # Overwrite existing binaries. Otherwise, skip already preprocessed datasets.
9 | overwrite: True
10 | # Raw Preprocessing Transforms
11 | embed_reverse_complement: true
12 | random_reverse_complement: 0.0
13 | random_lineage_dropout: 0.1
14 | transcribe: "back_transcribe"
15 | force_uppercase: true
16 | indexed_dataset_dtype: "uint8"
17 | # Tokenizer Transforms
18 | append_eod: true
19 | enforce_sample_length: null
20 | ftfy: false
21 | # Tokenizer
22 | tokenizer_type: "Byte-Level"
23 | vocab_file: null
24 | vocab_size: null
25 | merges_file: null
26 | tokenizer_model_name: null
27 | pretrained_tokenizer_model: null
28 | special_tokens: null
29 | fast_hf_tokenizer: true
30 | # Compute
31 | workers: 1
32 | preproc_concurrency: 100000
33 | chunksize: 25
34 | # Filters
35 | drop_empty_sequences: true
36 | nnn_filter: true
37 | # RNG
38 | seed: 42
39 | # Evo2 Taxonomic Lineage Tags
40 | taxonomy_data:
41 | FP002272:
42 | kingdom: KINGDOM
43 | phylum: PHYLUM
44 | clazz: CLASS
45 | order: ORDER
46 | family: FAMILY
47 | genus: GENUS
48 | species: SPECIES
49 | FP000491:
50 | kingdom: king
51 | order: ord
52 | family: fam
53 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-evo2/examples/configs/test_promotors_dataset_config.yaml:
--------------------------------------------------------------------------------
1 | - dataset_prefix: /workspace/bionemo2/sub-packages/bionemo-evo2/tests/bionemo/evo2/data/test_datasets/test_promoters_uint8_distinct_byte-level_train
2 | dataset_split: train
3 | dataset_weight: 1.0
4 | - dataset_prefix: /workspace/bionemo2/sub-packages/bionemo-evo2/tests/bionemo/evo2/data/test_datasets/test_promoters_uint8_distinct_byte-level_val
5 | dataset_split: validation
6 | dataset_weight: 1.0
7 | - dataset_prefix: /workspace/bionemo2/sub-packages/bionemo-evo2/tests/bionemo/evo2/data/test_datasets/test_promoters_uint8_distinct_byte-level_test
8 | dataset_split: test
9 | dataset_weight: 1.0
10 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-evo2/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools>=64", "wheel"]
3 | build-backend = "setuptools.build_meta"
4 |
5 | [project]
6 | name = "bionemo-evo2"
7 | readme = "README.md"
8 | description = "Library containing data preprocessing, training, and inference tooling for Evo2."
9 | authors = [{ name = "BioNeMo Team", email = "bionemofeedback@nvidia.com" }]
10 | requires-python = ">=3.10"
11 | license = { file = "LICENSE" }
12 | dynamic = ["version"]
13 | dependencies = [
14 | # internal
15 | "bionemo-noodles",
16 | "bionemo-core",
17 | "bionemo-llm",
18 | # external
19 | ]
20 |
21 | [project.optional-dependencies]
22 | test = [
23 | 'bionemo-testing'
24 | ]
25 |
26 | [project.scripts]
27 | infer_evo2 = "bionemo.evo2.run.infer:main"
28 | train_evo2 = "bionemo.evo2.run.train:main"
29 | predict_evo2 = "bionemo.evo2.run.predict:main"
30 | preprocess_evo2 = "bionemo.evo2.data.preprocess:main"
31 | splice_evo2 = "bionemo.evo2.data.transcript_extraction:main"
32 | evo2_convert_to_nemo2 = "bionemo.evo2.utils.checkpoint.convert_to_nemo:main"
33 |
34 | [tool.setuptools.packages.find]
35 | where = ["src"]
36 | include = ["bionemo.*"]
37 | namespaces = true
38 | exclude = ["test*."]
39 |
40 | [tool.setuptools.dynamic]
41 | version = { file = "VERSION" }
42 |
43 | [tool.uv]
44 | cache-keys = [{ git = true }]
45 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-evo2/src/bionemo/evo2/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-FileCopyrightText: Copyright (c) 2024 Arc Institute. All rights reserved.
3 | # SPDX-FileCopyrightText: Copyright (c) 2024 Michael Poli. All rights reserved.
4 | # SPDX-FileCopyrightText: Copyright (c) 2024 Stanford University. All rights reserved
5 | # SPDX-License-Identifier: LicenseRef-Apache2
6 | #
7 | # Licensed under the Apache License, Version 2.0 (the "License");
8 | # you may not use this file except in compliance with the License.
9 | # You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-evo2/src/bionemo/evo2/data/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-FileCopyrightText: Copyright (c) 2024 Arc Institute. All rights reserved.
3 | # SPDX-FileCopyrightText: Copyright (c) 2024 Michael Poli. All rights reserved.
4 | # SPDX-FileCopyrightText: Copyright (c) 2024 Stanford University. All rights reserved
5 | # SPDX-License-Identifier: LicenseRef-Apache2
6 | #
7 | # Licensed under the Apache License, Version 2.0 (the "License");
8 | # you may not use this file except in compliance with the License.
9 | # You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-evo2/src/bionemo/evo2/run/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-FileCopyrightText: Copyright (c) 2024 Arc Institute. All rights reserved.
3 | # SPDX-FileCopyrightText: Copyright (c) 2024 Michael Poli. All rights reserved.
4 | # SPDX-FileCopyrightText: Copyright (c) 2024 Stanford University. All rights reserved
5 | # SPDX-License-Identifier: LicenseRef-Apache2
6 | #
7 | # Licensed under the Apache License, Version 2.0 (the "License");
8 | # you may not use this file except in compliance with the License.
9 | # You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-evo2/src/bionemo/evo2/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-FileCopyrightText: Copyright (c) 2024 Arc Institute. All rights reserved.
3 | # SPDX-FileCopyrightText: Copyright (c) 2024 Michael Poli. All rights reserved.
4 | # SPDX-FileCopyrightText: Copyright (c) 2024 Stanford University. All rights reserved
5 | # SPDX-License-Identifier: LicenseRef-Apache2
6 | #
7 | # Licensed under the Apache License, Version 2.0 (the "License");
8 | # you may not use this file except in compliance with the License.
9 | # You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-evo2/src/bionemo/evo2/utils/checkpoint/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-FileCopyrightText: Copyright (c) 2024 Arc Institute. All rights reserved.
3 | # SPDX-FileCopyrightText: Copyright (c) 2024 Michael Poli. All rights reserved.
4 | # SPDX-FileCopyrightText: Copyright (c) 2024 Stanford University. All rights reserved
5 | # SPDX-License-Identifier: LicenseRef-Apache2
6 | #
7 | # Licensed under the Apache License, Version 2.0 (the "License");
8 | # you may not use this file except in compliance with the License.
9 | # You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-example_model/LICENSE:
--------------------------------------------------------------------------------
1 | ../../LICENSE/license.txt
--------------------------------------------------------------------------------
/sub-packages/bionemo-example_model/VERSION:
--------------------------------------------------------------------------------
1 | ../../VERSION
--------------------------------------------------------------------------------
/sub-packages/bionemo-example_model/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools>=64", "wheel"]
3 | build-backend = "setuptools.build_meta"
4 |
5 | [project]
6 | # UV doesn't seem to pick up on changes to requirements.txt files as a signal that it needs to re-lock a project's
7 | # dependencies. We should probably just move to listing requirements in these pyproject.toml files directly, and also
8 | # now include bionemo-* sub-packages explicitly.
9 | name = "bionemo-example_model"
10 | readme = "README.md"
11 | description = "BioNeMo example_model: Example model for documentation and tutorials. Do Not Distriburte on PyPI !!"
12 | authors = [{ name = "BioNeMo Team", email = "bionemofeedback@nvidia.com" }]
13 | requires-python = ">=3.10"
14 | classifiers = ["Private :: Do Not Upload", "Programming Language :: Python :: 3.10"]
15 | license = { file = "LICENSE" }
16 | dynamic = ["version"]
17 | dependencies = [
18 | 'bionemo-core',
19 | 'bionemo-llm',
20 | 'megatron-core',
21 | 'nemo_toolkit',
22 | 'torchvision >= 0.15.1',
23 | ]
24 |
25 | [project.optional-dependencies]
26 | test = [
27 | "bionemo-testing"
28 | ]
29 |
30 | [tool.setuptools.packages.find]
31 | where = ["src"]
32 | include = ["bionemo.*"]
33 | namespaces = true
34 | exclude = ["test*."]
35 |
36 | [tool.setuptools.dynamic]
37 | version = { file = "VERSION" }
38 |
39 | [tool.uv]
40 | cache-keys = [{ git = true }]
41 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-example_model/src/bionemo/example_model/lightning/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-fw/LICENSE:
--------------------------------------------------------------------------------
1 | ../../LICENSE/license.txt
--------------------------------------------------------------------------------
/sub-packages/bionemo-fw/README.md:
--------------------------------------------------------------------------------
1 | # bionemo-fw
2 |
3 | The BioNeMo Framework (FW): a production grade framework for AI-enabled Drug Discovery.
4 |
5 | The `bionemo-fw` Python package contains framework-spanning code under the `bionemo.fw` namespace.
6 | All other namespaces of the BioNeMo Framework (`bionemo.*`) are dependencies of this package.
7 |
8 | ## Developer Setup
9 | After following the setup specified in the [README](https://github.com/NVIDIA/bionemo-framework/blob/main/README.md),
10 | you may install this project's code in your environment via executing:
11 | ```bash
12 | pip install -e .
13 | ```
14 |
15 | To run unit tests with code coverage, execute:
16 | ```bash
17 | pytest -v --cov=bionemo --cov-report=term .
18 | ```
19 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-fw/VERSION:
--------------------------------------------------------------------------------
1 | ../../VERSION
--------------------------------------------------------------------------------
/sub-packages/bionemo-fw/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools>=64", "wheel"]
3 | build-backend = "setuptools.build_meta"
4 |
5 | [project]
6 | name = "bionemo-fw"
7 | readme = "README.md"
8 | description = "BioNeMo Framework (FW): Production grade framework for AI-enabled Drug Discovery. Consists of all independently installable bionemo feature packages too."
9 | authors = [{ name = "BioNeMo Team", email = "bionemofeedback@nvidia.com" }]
10 | requires-python = ">=3.10"
11 | license = { file = "LICENSE" }
12 | dynamic = ["version"]
13 | dependencies = [
14 | 'bionemo-core',
15 | 'bionemo-esm2',
16 | 'bionemo-geneformer',
17 | 'bionemo-geometric',
18 | 'bionemo-llm',
19 | 'bionemo-noodles',
20 | 'bionemo-scdl',
21 | 'bionemo-size-aware-batching',
22 | 'bionemo-webdatamodule',
23 | 'bionemo-amplify',
24 | #
25 | # NOTE: DO **NOT** INCLUDE:
26 | # bionemo-testing (test-time only dependency)
27 | # bionemo-example_model (documentation)
28 | # bionemo-fw (itself!)
29 | # external
30 | 'nltk',
31 | 'numba>=0.57.1',
32 | 'toml',
33 | 'zarr',
34 | ]
35 |
36 | [tool.setuptools.packages.find]
37 | where = ["src"]
38 | include = ["bionemo.*"]
39 | namespaces = true
40 | exclude = ["test*."]
41 |
42 | [tool.setuptools.dynamic]
43 | version = { file = "VERSION" }
44 |
45 | [tool.uv]
46 | cache-keys = [{ git = true }]
47 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-fw/src/bionemo/fw/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-fw/tests/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-fw/tests/bionemo/fw/test_sub_package_imports.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 |
17 | def test_import_bionemo_core():
18 | from bionemo import core as subpackage
19 |
20 | assert subpackage is not None
21 | del subpackage
22 |
23 |
24 | def test_import_bionemo_llm():
25 | from bionemo import core as subpackage
26 |
27 | assert subpackage is not None
28 | del subpackage
29 |
30 |
31 | def test_import_bionemo_geneformer():
32 | from bionemo import geneformer as subpackage
33 |
34 | assert subpackage is not None
35 | del subpackage
36 |
37 |
38 | def test_import_bionemo_esm2():
39 | from bionemo import esm2 as subpackage
40 |
41 | assert subpackage is not None
42 | del subpackage
43 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-geneformer/LICENSE:
--------------------------------------------------------------------------------
1 | ../../LICENSE/license.txt
--------------------------------------------------------------------------------
/sub-packages/bionemo-geneformer/README.md:
--------------------------------------------------------------------------------
1 | # bionemo-geneformer
2 |
3 | Geneformer is a foundational single-cell RNA (scRNA) language model using a BERT architecture trained on millions of single-cell RNA sequences. It captures gene co-expression patterns to learn cellular representations, enabling predictive tasks across biology and medicine. Geneformer is trained on a masked language model (MLM) objective, where expression rank-ordered "gene tokens" in single-cell RNA sequences are masked, replaced, or left unchanged, and the model learns to predict these masked genes based on context. This module provides Dataset classes, collators for expression rank ordering, and Config objects for constructing Geneformer-style models.
4 |
5 | ## Setup
6 | To install, execute the following from this directory (or point the install to this directory):
7 |
8 | ```bash
9 | pip install -e .
10 | ```
11 |
12 | To run unit tests, execute:
13 | ```bash
14 | pytest -v .
15 | ```
16 |
17 |
18 | ## Acquiring Data
19 | Datasets are expected to be in the form of AnnData (.h5ad) objects such as those downloaded from [Cell x Gene | CZI](https://chanzuckerberg.github.io/cellxgene-census/). They are then pre-processed with `sub-packages/bionemo-scdl/src/bionemo/scdl/scripts/convert_h5ad_to_scdl.py`.
20 |
21 | ## Geneformer-nv 10M and 106M
22 | Refer to the Dataset cards and Model cards to learn more about the pre-trained checkpoints provided for both 10M and 106M of Geneformer-nv.
23 |
24 | ## See Also
25 | - [sc-DL pypi](https://pypi.org/project/bionemo-scdl/)
26 | - [sc-DL github](https://github.com/NVIDIA/bionemo-framework/tree/main/sub-packages/bionemo-scdl)
27 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-geneformer/VERSION:
--------------------------------------------------------------------------------
1 | 2.4
2 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-geneformer/examples/.gitignore:
--------------------------------------------------------------------------------
1 | **.png
2 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-geneformer/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools>=64", "wheel"]
3 | build-backend = "setuptools.build_meta"
4 |
5 | [project]
6 | name = "bionemo-geneformer"
7 | readme = "README.md"
8 | description = "BioNeMo Geneformer"
9 | authors = [{ name = "BioNeMo Team", email = "bionemofeedback@nvidia.com" }]
10 | requires-python = ">=3.10"
11 | license = { file = "LICENSE" }
12 | dynamic = ["version"]
13 | dependencies = [
14 | # bionemo sub-packages
15 | 'bionemo-core',
16 | 'bionemo-llm',
17 | # external
18 | 'cellxgene_census',
19 | ]
20 |
21 | [project.optional-dependencies]
22 | test = [
23 | 'bionemo-testing'
24 | ]
25 | te = [
26 | # TE & Apex need to be installed after PyTorch, NVCC, and CUDA.
27 | # TODO(@pstjohn, @cspades): Figure out how to do this without post-installation.
28 | 'transformer_engine[pytorch]'
29 | ]
30 |
31 | [project.scripts]
32 | bionemo-geneformer-train= "bionemo.geneformer.run.main:main"
33 | bionemo-geneformer-recipe= "bionemo.geneformer.run.recipes:main"
34 | infer_geneformer = "bionemo.geneformer.scripts.infer_geneformer:geneformer_infer_entrypoint"
35 | train_geneformer = "bionemo.geneformer.scripts.train_geneformer:entrypoint"
36 | geneformer_mlm_loss_eval = "bionemo.geneformer.scripts.geneformer_mlm_loss_eval:entrypoint"
37 |
38 | [tool.setuptools.packages.find]
39 | where = ["src"]
40 | include = ["bionemo.*"]
41 | namespaces = true
42 | exclude = ["test*."]
43 |
44 | [tool.setuptools.dynamic]
45 | version = { file = "VERSION" }
46 |
47 | [tool.uv]
48 | cache-keys = [{ git = true }]
49 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-geneformer/scripts/README.md:
--------------------------------------------------------------------------------
1 | # WARNING
2 | This folder contains one-off eval scripts that may not run and are not actively tested or kept up to date.
3 | Also these scripts may depend on `bionemo-testing` which is generally not allowed.
4 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-geneformer/src/bionemo/geneformer/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-geneformer/src/bionemo/geneformer/data/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-geneformer/src/bionemo/geneformer/data/singlecell/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-geneformer/src/bionemo/geneformer/data/singlecell/utils.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | import numpy as np
17 |
18 |
19 | def sample_or_truncate(
20 | gene_ids: np.ndarray,
21 | max_length: int,
22 | sample: bool = True,
23 | ) -> np.ndarray:
24 | """Truncate and pad samples.
25 |
26 | Args:
27 | gene_ids (np.ndarray): Array of gene IDs.
28 | max_length (int): Maximum length of the samples.
29 | sample (bool, optional): Whether to sample or truncate the samples. Defaults to True.
30 |
31 | Returns:
32 | np.array: Tuple containing the truncated or padded gene IDs.
33 | """
34 | if len(gene_ids) <= max_length:
35 | return gene_ids
36 |
37 | if sample:
38 | indices = np.random.permutation(len(gene_ids))[:max_length]
39 | return gene_ids[indices]
40 | else:
41 | return gene_ids[:max_length]
42 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-geneformer/src/bionemo/geneformer/model/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-geneformer/src/bionemo/geneformer/run/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-geneformer/src/bionemo/geneformer/scripts/README.md:
--------------------------------------------------------------------------------
1 | ## Geneformer Scripts Directory
2 | This is a collection for one-off scripts that can be ran through the command line. See the `[project.scripts]` section
3 | of the pyproject.toml file for how these are generated.
4 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-geneformer/src/bionemo/geneformer/scripts/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-geneformer/src/bionemo/geneformer/scripts/celltype_classification_bench/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-geneformer/src/bionemo/geneformer/tokenizer/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-geneformer/src/bionemo/geneformer/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-geneformer/tests/bionemo/geneformer/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-geneformer/tests/bionemo/geneformer/scripts/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-geometric/LICENSE:
--------------------------------------------------------------------------------
1 | ../../LICENSE/license.txt
--------------------------------------------------------------------------------
/sub-packages/bionemo-geometric/README.md:
--------------------------------------------------------------------------------
1 | # bionemo-geometric
2 |
3 | To install, execute the following:
4 | ```bash
5 | pip install -e .
6 | ```
7 |
8 | To run unit tests, execute:
9 | ```bash
10 | pytest -v .
11 | ```
12 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-geometric/VERSION:
--------------------------------------------------------------------------------
1 | ../../VERSION
--------------------------------------------------------------------------------
/sub-packages/bionemo-geometric/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools>=64", "wheel"]
3 | build-backend = "setuptools.build_meta"
4 |
5 | [project]
6 | name = "bionemo-geometric"
7 | readme = "README.md"
8 | description = "BioNeMo component library for graphical neural networks (GNNs) solving drug discovery problems."
9 | authors = [{ name = "BioNeMo Team", email = "bionemofeedback@nvidia.com" }]
10 | requires-python = ">=3.10"
11 | license = { file = "LICENSE" }
12 | dynamic = ["version"]
13 | dependencies = [
14 | 'bionemo-core',
15 | # Version pins for the pip package. If updating these, also update the image pins in requirements.txt.
16 | # Ideally we should allow these dependencies to float in the package definition.
17 | 'torch-cluster==1.6.3',
18 | 'torch-geometric==2.5.0',
19 | 'torch-scatter==2.1.2',
20 | 'torch_sparse==0.6.18',
21 | 'rdkit==2023.9.6',
22 | ]
23 |
24 | # Make sure that the data CSV files are being packaged alongside the python files.
25 | [tool.setuptools.package-data]
26 | "bionemo.geometric" = ["**/*.csv"]
27 |
28 | [tool.setuptools.packages.find]
29 | where = ["src"]
30 | include = ["bionemo.*"]
31 | namespaces = true
32 | exclude = ["test*."]
33 |
34 | [tool.setuptools.dynamic]
35 | version = { file = "VERSION" }
36 |
37 | [tool.uv]
38 | cache-keys = [{ git = true }]
39 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-geometric/requirements.txt:
--------------------------------------------------------------------------------
1 | # Pinned versions installed in the Docker container.
2 | # If updating these, also update the version pins in `pyproject.toml` !!
3 | torch-cluster==1.6.3
4 | torch-geometric==2.5.0
5 | torch-scatter==2.1.2
6 | torch_sparse==0.6.18
7 | rdkit==2023.9.6
8 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-geometric/src/bionemo/geometric/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-geometric/tests/bionemo/geometric/test_bionemo_geometric.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 |
17 | # TODO: replace this "test" once bionemo-geometric has some real code!
18 | def test_import_geometric() -> None:
19 | from bionemo import geometric
20 |
21 | assert geometric is not None
22 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-llm/LICENSE:
--------------------------------------------------------------------------------
1 | ../../LICENSE/license.txt
--------------------------------------------------------------------------------
/sub-packages/bionemo-llm/README.md:
--------------------------------------------------------------------------------
1 | # bionemo-llm
2 |
3 | The Bionemo Large Language Model (LLM) submodule contains common code used in submodules that train LLMs on biological
4 | datasets (currently `bionemo-esm2` and `bionemo-geneformer`). This includes data masking and collate functions, the
5 | bio-BERT common architecture code, loss functions, and other NeMo / Megatron-LM compatibility functions. Sub-packages
6 | should only depend on `bionemo-llm` if they need access to NeMo and Megatron-LM.
7 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-llm/VERSION:
--------------------------------------------------------------------------------
1 | 2.4.5
2 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-llm/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools>=64", "wheel"]
3 | build-backend = "setuptools.build_meta"
4 |
5 | [project]
6 | name = "bionemo-llm"
7 | readme = "README.md"
8 | description = "BioNeMo Large Language Model Components using NeMo and Megatron"
9 | authors = [{ name = "BioNeMo Team", email = "bionemofeedback@nvidia.com" }]
10 | requires-python = ">=3.10"
11 | license = { file = "LICENSE" }
12 | dynamic = ["version"]
13 | dependencies = [
14 | # bionemo sub-packages
15 | 'bionemo-core',
16 | # external
17 | 'lightning>=2.2.1',
18 | 'megatron-core',
19 | 'nemo_toolkit[nlp,eval]>=2.2.1',
20 | 'nemo-run',
21 | 'hatchling',
22 | ]
23 |
24 | [project.optional-dependencies]
25 | test = [
26 | 'bionemo-testing'
27 | ]
28 | te = [
29 | # TE & Apex need to be installed after PyTorch, NVCC, and CUDA.
30 | # TODO(@pstjohn, @cspades): Figure out how to do this without post-installation.
31 | 'transformer_engine[pytorch]'
32 | ]
33 |
34 | [tool.setuptools.packages.find]
35 | where = ["src"]
36 | include = ["bionemo.*"]
37 | namespaces = true
38 | exclude = ["test*."]
39 |
40 | [tool.setuptools.dynamic]
41 | version = { file = "VERSION" }
42 |
43 | [tool.uv]
44 | cache-keys = [{ git = true }]
45 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-llm/src/bionemo/llm/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-llm/src/bionemo/llm/data/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-llm/src/bionemo/llm/model/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-llm/src/bionemo/llm/model/biobert/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-llm/src/bionemo/llm/run/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-llm/src/bionemo/llm/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-llm/tests/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-llm/tests/bionemo/llm/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-llm/tests/bionemo/llm/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-moco/LICENSE:
--------------------------------------------------------------------------------
1 | ../../LICENSE/license.txt
--------------------------------------------------------------------------------
/sub-packages/bionemo-moco/README.md:
--------------------------------------------------------------------------------
1 | # Modular Co-Design (MoCo) Interpolants
2 |
3 | MoCo enables abstracted interpolants for building and sampling from a variety of popular generative model frameworks. Specifically, MoCo supports interpolants for both continuous and discrete data types.
4 | [](https://pypi.org/project/bionemo-moco/)
5 |
6 | ### Continuous Data Interpolants
7 | MoCo currently supports the following continuous data interpolants:
8 | - DDPM (Denoising Diffusion Probabilistic Models)
9 | - VDM (Variational Diffusion Models)
10 | - CFM (Conditional Flow Matching)
11 |
12 | ### Discrete Data Interpolants
13 | MoCo also supports the following discrete data interpolants:
14 | - D3PM (Discrete Denoising Diffusion Probabilistic Models)
15 | - MDLM (Masked Diffusion Language Models)
16 | - DFM (Discrete Flow Matching)
17 |
18 | ### Useful Abstractions
19 | MoCo also provides useful wrappers for customizable time distributions and inference time schedules.
20 |
21 | ### Extendible
22 | If the desired interpolant or sampling method is not already supported, MoCo was designed to be easily extended.
23 |
24 | ## Installation
25 | For Conda environment setup, please refer to the `environment` directory for specific instructions.
26 |
27 | Once your environment is set up, you can install this project by running the following command:
28 |
29 | ```bash
30 | pip install -e .
31 | ```
32 | This will install the project in editable mode, allowing you to make changes and see them reflected immediately.
33 |
34 | ## Examples
35 | Please see examples of all interpolants in the [examples directory](https://github.com/NVIDIA/bionemo-framework/tree/main/sub-packages/bionemo-moco/examples).
36 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-moco/VERSION:
--------------------------------------------------------------------------------
1 | 0.0.2.1
2 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-moco/environment/Instructions.md:
--------------------------------------------------------------------------------
1 | Environment Setup
2 | ===============
3 |
4 | From the bionemo-moco directory run:
5 |
6 | ```bash
7 | bash environment/setup.sh
8 | ```
9 |
10 | This creates the conda environment, installs bionemo-moco and runs the tests.
11 |
12 | Local Code Setup
13 | ===============
14 | From the bionemo-moco directory run:
15 |
16 | ```bash
17 | bash environment/clone_bionemo_moco.sh
18 | ```
19 |
20 | This creates clones only the bionemo subpackage. To install in your local env use:
21 |
22 | ```bash
23 | pip install -e .
24 | ```
25 |
26 | inside the bionemo-moco directory.
27 |
28 | ```bash
29 | pip install --no-deps -e .
30 | ```
31 | can be used if want to install bionemo-moco over your current torch version. The remaining required jaxtyping and pot dependencies can be manually installed via pip.
32 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-moco/environment/clone_bionemo_moco.sh:
--------------------------------------------------------------------------------
1 | git clone --filter=blob:none --sparse https://github.com/NVIDIA/bionemo-framework.git
2 | cd bionemo-framework
3 | git sparse-checkout set sub-packages/bionemo-moco
4 | mv sub-packages/bionemo-moco ..
5 | cd ..
6 | rm -rf bionemo-framework
7 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-moco/environment/moco_env.yaml:
--------------------------------------------------------------------------------
1 | name: moco_bionemo
2 | channels:
3 | - conda-forge
4 | - pytorch
5 | - nvidia
6 |
7 | dependencies:
8 | - python=3.10
9 | - pytorch=2.2.1
10 | - pytorch-cuda=12.1
11 | - torchvision=0.17.1
12 | - torchaudio=2.2.1
13 |
14 | - pip:
15 | - ruff==0.0.292
16 | - black==23.1.0
17 | - pre-commit==3.4.0
18 | - virtualenv==20.26.3
19 | - ipdb==0.13.11
20 | - click==8.1.7
21 | - tenacity==8.5.0
22 | - tach>=0.9.0
23 | - pytest-cov==4.1.0
24 | - pytest-timeout==2.2.0
25 | - pytest-dependency==0.5.1
26 | - testbook==0.4.2
27 | - requests_mock==1.11.0
28 | - awscli==1.33.33
29 | - nbval==0.11.0
30 | - onnx>=1.16.0
31 | - setuptools>=70.0.0
32 | - aiohttp>=3.9.4
33 | - jupyterlab>=3.6.8
34 | - jupyter_server>=2.14.1 # Fix for GHSA-hrw6-wg82-cm62
35 | - Werkzeug>=3.0.3
36 | - nltk>=3.9.1
37 | - numpy>=1.24.4,<2
38 | - jaxtyping==0.2.34
39 | - pot>=0.9.5
40 | - scikit-learn>=1.6.0
41 | - matplotlib>=3.3.2
42 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-moco/environment/setup.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Set the path to your Conda environment YAML file
4 | ENV_YAML="environment/moco_env.yaml"
5 |
6 | # Extract the environment name from the YAML file
7 | ENV_NAME=$(head -n 1 "$ENV_YAML" | cut -d':' -f2- | tr -d ' ')
8 |
9 | # Load Conda to enable command
10 | source "$(conda info --base)/etc/profile.d/conda.sh"
11 |
12 | # Create the Conda environment from the YAML file
13 | echo "Creating Conda environment $ENV_NAME from $ENV_YAML..."
14 | conda env create -f "$ENV_YAML"
15 |
16 | # Activate the Conda environment
17 | echo "Activating Conda environment $ENV_NAME..."
18 | conda activate "$ENV_NAME"
19 |
20 | # Check if the environment was successfully activated
21 | if [ "$CONDA_DEFAULT_ENV" == "$ENV_NAME" ]; then
22 | echo "Conda environment $ENV_NAME activated successfully."
23 | # Navigate to your project directory if needed
24 | # cd /path/to/your/project # Uncomment and adjust this path as necessary
25 | # Install your project in editable mode using pip
26 | pip install pydoc-markdown>=4.8.2
27 | pip install pytest-cov==4.1.0 pytest-timeout==2.2.0 pytest-dependency==0.5.1
28 | pre-commit install
29 | echo "Installing bionemo-moco in editable mode using pip..."
30 | pip install -e .
31 | echo "Setup complete."
32 | # Run tests
33 | echo "Running tests..."
34 | pytest
35 | echo "Tests complete. You can now work within the $ENV_NAME environment."
36 | else
37 | echo "Failed to activate Conda environment $ENV_NAME. Exiting..."
38 | exit 1
39 | fi
40 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-moco/figures/model_figure.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/sub-packages/bionemo-moco/figures/model_figure.png
--------------------------------------------------------------------------------
/sub-packages/bionemo-moco/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools>=64", "wheel"]
3 | build-backend = "setuptools.build_meta"
4 |
5 | [project]
6 | name = "bionemo-moco"
7 | readme = "README.md"
8 | description = "BioNeMo Modular Co-Design: Making building Diffusion and Flow Matching generative models easier"
9 | authors = [{ name = "BioNeMo Team", email = "bionemofeedback@nvidia.com" }]
10 | requires-python = ">=3.10"
11 | license = { file = "LICENSE" }
12 | dynamic = ["version"]
13 | dependencies = [
14 | # bionemo sub-packages
15 | # external
16 | 'torch>=2.2',
17 | 'numpy>=1.24.4,<2', #needed for notebooks
18 | 'jaxtyping>=0.2.34',
19 | 'pot>=0.9.5', #needed for optimal transport
20 | 'scikit-learn>=1.6.0', #needed for notebooks
21 | 'matplotlib>=3.3.2' #needed for notebooks
22 | ]
23 |
24 | [tool.setuptools.packages.find]
25 | where = ["src"]
26 | include = ["bionemo.*"]
27 | namespaces = true
28 | exclude = ["test*."]
29 |
30 | [tool.setuptools.dynamic]
31 | version = { file = "VERSION" }
32 |
33 | [tool.uv]
34 | cache-keys = [{ git = true }]
35 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-moco/scripts/clean_documentation.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 |
17 | import re
18 |
19 |
20 | with open("documentation.md", "r") as file:
21 | lines = file.readlines()
22 |
23 | # Delete lines that start with " * " and " * "
24 | lines = [line for line in lines if not line.startswith(" * ") and not line.startswith(" * ")]
25 |
26 | # Join the lines back into a string
27 | markdown = "".join(lines)
28 |
29 | # Replace dots with no space in anchor ids
30 | markdown = re.sub(r'', lambda match: f'', markdown)
31 |
32 | # Replace dots with no space in links
33 | markdown = re.sub(
34 | r"\[([^\]]+)\]\(#([a-zA-Z0-9_\.]+)\)",
35 | lambda match: f"[{match.group(1)}](#{match.group(2).replace('.', '')})",
36 | markdown,
37 | )
38 |
39 | # Replace 'moco.' with 'bionemo.moco.'
40 | markdown = re.sub(r"moco\.", "bionemo.moco.", markdown)
41 |
42 | with open("documentation.md", "w") as file:
43 | file.write(markdown)
44 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-moco/scripts/create_documentation.sh:
--------------------------------------------------------------------------------
1 | pydoc-markdown -I src/bionemo --render-toc > documentation.md
2 | python scripts/clean_documentation.py
3 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-moco/src/bionemo/moco/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 |
17 | from .schedules.utils import TimeDirection
18 |
19 |
20 | __all__ = ["TimeDirection"]
21 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-moco/src/bionemo/moco/distributions/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-moco/src/bionemo/moco/distributions/prior/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 |
17 | from .continuous.gaussian import GaussianPrior
18 | from .discrete.custom import DiscreteCustomPrior
19 | from .discrete.mask import DiscreteMaskedPrior
20 | from .discrete.uniform import DiscreteUniformPrior
21 |
22 |
23 | __all__ = ["DiscreteCustomPrior", "DiscreteMaskedPrior", "DiscreteUniformPrior", "GaussianPrior"]
24 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-moco/src/bionemo/moco/distributions/prior/continuous/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-moco/src/bionemo/moco/distributions/prior/continuous/utils.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 |
17 | from typing import Optional
18 |
19 | from torch import Tensor
20 |
21 |
22 | def remove_center_of_mass(data: Tensor, mask: Optional[Tensor] = None) -> Tensor:
23 | """Calculates the center of mass (CoM) of the given data.
24 |
25 | Args:
26 | data: The input data with shape (..., nodes, features).
27 | mask: An optional binary mask to apply to the data with shape (..., nodes) to mask out interaction from CoM calculation. Defaults to None.
28 |
29 | Returns:
30 | The CoM of the data with shape (..., 1, features).
31 | """
32 | if mask is None:
33 | com = data.mean(dim=-2, keepdim=True)
34 | else:
35 | masked_data = data * mask.unsqueeze(-1)
36 | num_nodes = mask.sum(dim=-1, keepdim=True).unsqueeze(-1)
37 | com = masked_data.sum(dim=-2, keepdim=True) / num_nodes
38 | return data - com
39 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-moco/src/bionemo/moco/distributions/prior/discrete/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-moco/src/bionemo/moco/distributions/time/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 |
17 | from .beta import BetaTimeDistribution
18 | from .distribution import MixTimeDistribution, TimeDistribution
19 | from .logit_normal import LogitNormalTimeDistribution
20 | from .uniform import UniformTimeDistribution
21 |
22 |
23 | __all__ = [
24 | "BetaTimeDistribution",
25 | "LogitNormalTimeDistribution",
26 | "MixTimeDistribution",
27 | "TimeDistribution",
28 | "UniformTimeDistribution",
29 | ]
30 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-moco/src/bionemo/moco/distributions/time/utils.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | import torch
17 |
18 |
19 | def float_time_to_index(time: torch.Tensor, num_time_steps: int) -> torch.Tensor:
20 | """Convert a float time value to a time index.
21 |
22 | Args:
23 | time (torch.Tensor): A tensor of float time values in the range [0, 1].
24 | num_time_steps (int): The number of discrete time steps.
25 |
26 | Returns:
27 | torch.Tensor: A tensor of time indices corresponding to the input float time values.
28 | """
29 | # Ensure time values are in the range [0, 1]
30 | time = torch.clamp(time, 0.0, 1.0)
31 |
32 | # Scale to the index range and round
33 | indices = torch.round(time * (num_time_steps - 1)).to(torch.int64)
34 |
35 | return indices
36 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-moco/src/bionemo/moco/interpolants/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 |
17 | from .continuous_time.continuous.continuous_flow_matching import ContinuousFlowMatcher
18 | from .continuous_time.continuous.data_augmentation.equivariant_ot_sampler import EquivariantOTSampler
19 | from .continuous_time.continuous.data_augmentation.kabsch_augmentation import KabschAugmentation
20 | from .continuous_time.continuous.data_augmentation.ot_sampler import OTSampler
21 | from .continuous_time.continuous.vdm import VDM
22 | from .continuous_time.discrete.discrete_flow_matching import DiscreteFlowMatcher
23 | from .continuous_time.discrete.mdlm import MDLM
24 | from .discrete_time.continuous.ddpm import DDPM
25 | from .discrete_time.discrete.d3pm import D3PM
26 |
27 |
28 | __all__ = [
29 | "D3PM",
30 | "DDPM",
31 | "MDLM",
32 | "VDM",
33 | "ContinuousFlowMatcher",
34 | "DiscreteFlowMatcher",
35 | "EquivariantOTSampler",
36 | "KabschAugmentation",
37 | "OTSampler",
38 | ]
39 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-moco/src/bionemo/moco/interpolants/continuous_time/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-moco/src/bionemo/moco/interpolants/continuous_time/continuous/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-moco/src/bionemo/moco/interpolants/continuous_time/continuous/data_augmentation/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-moco/src/bionemo/moco/interpolants/continuous_time/continuous/data_augmentation/augmentation_types.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 |
17 | from enum import Enum
18 |
19 |
20 | class AugmentationType(Enum):
21 | """An enumeration representing the type ofOptimal Transport that can be used in Continuous Flow Matching.
22 |
23 | - **EXACT_OT**: Standard mini batch optimal transport defined in https://arxiv.org/pdf/2302.00482.
24 | - **EQUIVARIANT_OT**: Adding roto/translation optimization to mini batch OT see https://arxiv.org/pdf/2306.15030 https://arxiv.org/pdf/2312.07168 4.2.
25 | - **KABSCH**: Simple Kabsch alignment between each data and noise point, No permuation # https://arxiv.org/pdf/2410.22388 Sec 3.2
26 |
27 | These prediction types can be used to train neural networks for specific tasks, such as denoising, image synthesis, or time-series forecasting.
28 | """
29 |
30 | EXACT_OT = "exact_ot"
31 | EQUIVARIANT_OT = "equivariant_ot"
32 | KABSCH = "kabsch"
33 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-moco/src/bionemo/moco/interpolants/continuous_time/discrete/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-moco/src/bionemo/moco/interpolants/discrete_time/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-moco/src/bionemo/moco/interpolants/discrete_time/continuous/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-moco/src/bionemo/moco/interpolants/discrete_time/discrete/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-moco/src/bionemo/moco/schedules/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-moco/src/bionemo/moco/schedules/noise/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-moco/src/bionemo/moco/schedules/utils.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 |
17 | from enum import Enum
18 |
19 |
20 | class TimeDirection(Enum):
21 | """Enum for the direction of the noise schedule."""
22 |
23 | UNIFIED = "unified" # Noise(0) --> Data(1)
24 | DIFFUSION = "diffusion" # Noise(1) --> Data(0)
25 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-moco/src/bionemo/moco/testing/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-noodles/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-noodles/Cargo.toml:
--------------------------------------------------------------------------------
1 | [package]
2 | name = "noodles_fasta_wrapper"
3 | version = "0.1.2" # Also update the VERSION file when you change the version!
4 | edition = "2021"
5 |
6 | [lib]
7 | crate-type = ["cdylib"]
8 | name = "noodles_fasta_wrapper" # The name of the library
9 | path = "rust/src/lib.rs" # Path to the library file
10 |
11 | [dependencies]
12 | pyo3 = { version = "0.18", features = ["extension-module"] }
13 | noodles-fasta = "0.45.0" # Update to the latest version of noodles
14 | noodles-core = "*"
15 | memmap2 = "*"
16 |
17 | [package.metadata.pyo3]
18 | name = "noodles_fasta_wrapper"
19 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-noodles/LICENSE:
--------------------------------------------------------------------------------
1 | ../../LICENSE/license.txt
--------------------------------------------------------------------------------
/sub-packages/bionemo-noodles/VERSION:
--------------------------------------------------------------------------------
1 | 0.1.2
2 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-noodles/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["maturin>=1.0,<2.0"]
3 | build-backend = "maturin"
4 |
5 | [project]
6 | name = "bionemo-noodles"
7 | readme = "README.md"
8 | description = "Python wrapper around [noodles](https://github.com/zaeleus/noodles)."
9 | authors = [{ name = "BioNeMo Team", email = "bionemofeedback@nvidia.com" }]
10 | requires-python = ">=3.10"
11 | license = { file = "LICENSE" }
12 | dynamic = ["version"]
13 | dependencies = [
14 | # internal
15 | 'bionemo-core',
16 | # external
17 | 'pyfaidx',
18 | ]
19 |
20 | [project.optional-dependencies]
21 | test = [
22 | 'torch',
23 | ]
24 |
25 | [tool.maturin]
26 | bindings = "pyo3"
27 | compatibility = "manylinux_2_28"
28 | python-source = "src"
29 | # we could make this bionemo.noodles.fasta_wrapper, but that would require it to be its own namespaced package.
30 | module-name = "bionemo.noodles_fasta_wrapper"
31 | version = { file = "VERSION" }
32 |
33 | [tool.setuptools.packages.find]
34 | where = ["src"]
35 | include = ["bionemo.*"]
36 | namespaces = true
37 | exclude = ["test*."]
38 |
39 | [tool.uv]
40 | cache-keys = [{ git = true }]
41 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-noodles/requirements.txt:
--------------------------------------------------------------------------------
1 | maturin
2 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-noodles/src/bionemo/noodles/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | from bionemo.noodles_fasta_wrapper import (
17 | PyFaidxRecord,
18 | PyIndexedMmapFastaReader,
19 | back_transcribe_sequence,
20 | complement_sequence,
21 | reverse_sequence,
22 | transcribe_sequence,
23 | )
24 |
25 |
26 | __all__ = (
27 | "PyFaidxRecord",
28 | "PyIndexedMmapFastaReader",
29 | "back_transcribe_sequence",
30 | "complement_sequence",
31 | "reverse_sequence",
32 | "transcribe_sequence",
33 | )
34 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-noodles/tests/bionemo/noodles/data/bad_index.fasta:
--------------------------------------------------------------------------------
1 | >chr1
2 | ACTGACTGACTG
3 | >chr2
4 | GGTCAAGGTCAA
5 | >chr3
6 | AGTCAAGGTCCA
7 | CGTCAAGGTCCC
8 | GGTCAAGGTCCG
9 | TGTCAAGGTCCT
10 | AGTCAAGGTCAA
11 | CGTCAAGGTCAC
12 | GGTCAAGGTCAG
13 | >chr4
14 | CCCCCCCCCCCC
15 | ACGT
16 | >chr5
17 | A
18 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-noodles/tests/bionemo/noodles/data/bad_index.fasta.fai:
--------------------------------------------------------------------------------
1 | this is not a valid fasta index!!!!!!
2 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-noodles/tests/bionemo/noodles/data/dupes.fasta:
--------------------------------------------------------------------------------
1 | >chr1 version|of|seq1
2 | ACTGACTGACTG
3 | >chr1 version|of|seq2
4 | GGTCAAGGTCAA
5 | >chr1 some|random|inputs
6 | AGTCAAGGTCCA
7 | CGTCAAGGTCCC
8 | GGTCAAGGTCCG
9 | TGTCAAGGTCCT
10 | AGTCAAGGTCAA
11 | CGTCAAGGTCAC
12 | GGTCAAGGTCAG
13 | >chr1 why|is|this|done
14 | CCCCCCCCCCCC
15 | ACGT
16 | >chr1 stop|violated|fasta|spec
17 | A
18 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-noodles/tests/bionemo/noodles/data/sample.fasta:
--------------------------------------------------------------------------------
1 | >chr1
2 | ACTGACTGACTG
3 | >chr2
4 | GGTCAAGGTCAA
5 | >chr3
6 | AGTCAAGGTCCA
7 | CGTCAAGGTCCC
8 | GGTCAAGGTCCG
9 | TGTCAAGGTCCT
10 | AGTCAAGGTCAA
11 | CGTCAAGGTCAC
12 | GGTCAAGGTCAG
13 | >chr4
14 | CCCCCCCCCCCC
15 | ACGT
16 | >chr5
17 | A
18 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-noodles/tests/bionemo/noodles/data/sample.fasta.fai:
--------------------------------------------------------------------------------
1 | chr1 12 6 12 13
2 | chr2 12 25 12 13
3 | chr3 84 44 12 13
4 | chr4 16 141 12 13
5 | chr5 1 165 1 2
6 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-scdl/LICENSE:
--------------------------------------------------------------------------------
1 | ../../LICENSE/license.txt
--------------------------------------------------------------------------------
/sub-packages/bionemo-scdl/VERSION:
--------------------------------------------------------------------------------
1 | 0.0.7
2 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-scdl/assets/disk_space.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/sub-packages/bionemo-scdl/assets/disk_space.png
--------------------------------------------------------------------------------
/sub-packages/bionemo-scdl/assets/throughput.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/sub-packages/bionemo-scdl/assets/throughput.png
--------------------------------------------------------------------------------
/sub-packages/bionemo-scdl/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools>=64", "wheel"]
3 | build-backend = "setuptools.build_meta"
4 |
5 | [project]
6 | name = "bionemo-scdl"
7 | readme = "README.md"
8 | description = "SCDL, a Dataset class for Single Cell data."
9 | authors = [{ name = "BioNeMo Team", email = "bionemofeedback@nvidia.com" }]
10 | requires-python = ">=3.10"
11 | license = { file = "LICENSE" }
12 | dynamic = ["version"]
13 | dependencies = [
14 | # bionemo sub-packages
15 | 'bionemo-core>=2.2.1',
16 | # external
17 | 'anndata>=0.11.0',
18 | 'pandas>=2.2.1',
19 | 'pyarrow>=16.0.0',
20 | 'scipy>=1.11.1',
21 | 'pydantic[email]',
22 | ]
23 |
24 | [project.scripts]
25 | convert_h5ad_to_scdl = "bionemo.scdl.scripts.convert_h5ad_to_scdl:main"
26 |
27 | [tool.setuptools.packages.find]
28 | where = ["src"]
29 | include = ["bionemo.*"]
30 | namespaces = true
31 | exclude = ["test*."]
32 |
33 | [tool.setuptools.dynamic]
34 | version = { file = "VERSION" }
35 |
36 | [tool.uv]
37 | cache-keys = [{ git = true }]
38 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-scdl/src/bionemo/scdl/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-scdl/src/bionemo/scdl/api/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-scdl/src/bionemo/scdl/index/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-scdl/src/bionemo/scdl/io/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-scdl/src/bionemo/scdl/scripts/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-scdl/src/bionemo/scdl/util/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-size-aware-batching/LICENSE:
--------------------------------------------------------------------------------
1 | ../../LICENSE/license.txt
--------------------------------------------------------------------------------
/sub-packages/bionemo-size-aware-batching/VERSION:
--------------------------------------------------------------------------------
1 | 1.0.0
2 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-size-aware-batching/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools", "wheel"]
3 | build-backend = "setuptools.build_meta"
4 |
5 | [project]
6 | name = "bionemo-size-aware-batching"
7 | readme = "README.md"
8 | description = "Provides a simple way to create mini-batches in a memory consumption-aware manner, making it useful for tasks like training models on datasets with varying memory requirements."
9 | authors = [{ name = "BioNeMo Team", email = "bionemofeedback@nvidia.com" }]
10 | requires-python = ">=3.10"
11 | license = { file = "LICENSE" }
12 | dynamic = ["version"]
13 | dependencies = [
14 | # bionemo sub-packages
15 | 'bionemo-core',
16 | # external
17 | ]
18 |
19 | [tool.setuptools.packages.find]
20 | where = ["src"]
21 | include = ["bionemo.*"]
22 | namespaces = true
23 | exclude = ["test*."]
24 |
25 | [tool.setuptools.dynamic]
26 | version = { file = "VERSION" }
27 |
28 | [tool.uv]
29 | cache-keys = [{ git = true }]
30 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-size-aware-batching/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/bionemo-framework/9ac892bd46d49eec05df35965ee5b90b5b8fe763/sub-packages/bionemo-size-aware-batching/requirements.txt
--------------------------------------------------------------------------------
/sub-packages/bionemo-size-aware-batching/src/bionemo/size_aware_batching/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-testing/LICENSE:
--------------------------------------------------------------------------------
1 | ../../LICENSE/license.txt
--------------------------------------------------------------------------------
/sub-packages/bionemo-testing/README.md:
--------------------------------------------------------------------------------
1 | # bionemo-testing
2 |
3 | A package of test-time requirements and utilities for bionemo sub-packages. In particular, the `bionemo-testing` package
4 | handles downloading and caching data and other assets for running unit tests and example notebooks. For more information
5 | on test data handling, see [BioNeMo test data management](https://github.com/NVIDIA/bionemo-framework/blob/main/sub-packages/bionemo-testing/src/bionemo/testing/data/README.md)
6 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-testing/VERSION:
--------------------------------------------------------------------------------
1 | 2.4.1
2 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-testing/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools>=64", "wheel"]
3 | build-backend = "setuptools.build_meta"
4 |
5 | [project]
6 | name = "bionemo-testing"
7 | readme = "README.md"
8 | description = "Utilities aiding test creation for BioNeMo sub-packages."
9 | authors = [{ name = "BioNeMo Team", email = "bionemofeedback@nvidia.com" }]
10 | requires-python = ">=3.10"
11 | license = { file = "LICENSE" }
12 | dynamic = ["version"]
13 | dependencies = [
14 | # bionemo sub-packages
15 | 'bionemo-core',
16 | 'bionemo-llm>=2.4.5',
17 | # external
18 | 'email-validator',
19 | 'pytest',
20 | 'overrides',
21 | ]
22 |
23 | [tool.setuptools.packages.find]
24 | where = ["src"]
25 | include = ["bionemo.*"]
26 | namespaces = true
27 | exclude = ["test*."]
28 |
29 | [tool.setuptools.dynamic]
30 | version = { file = "VERSION" }
31 |
32 | [tool.uv]
33 | cache-keys = [{ git = true }]
34 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-testing/src/bionemo/testing/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-testing/src/bionemo/testing/data/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-testing/src/bionemo/testing/data/load.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | from typing import Sequence
16 |
17 | from bionemo.core.data.load import default_ngc_client, default_pbss_client, entrypoint, load
18 |
19 |
20 | _ = entrypoint
21 | # This needs to be around so that ruff doesn't automatically remove it as it's unused.
22 | # We don't want to include it in __all__.
23 | # But older installations __may__ be using the old CLI path (bionemo.core.data.load:entrypoint)
24 | # so this is here for backwards compatability.
25 |
26 |
27 | __all__: Sequence[str] = (
28 | "default_ngc_client",
29 | "default_pbss_client",
30 | "load",
31 | )
32 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-testing/src/bionemo/testing/data/resource.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | from typing import Sequence
16 |
17 | from bionemo.core.data.resource import Resource, get_all_resources
18 |
19 |
20 | __all__: Sequence[str] = (
21 | "Resource",
22 | "get_all_resources",
23 | )
24 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-testing/src/bionemo/testing/harnesses/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-testing/src/bionemo/testing/harnesses/mode.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 |
17 | from enum import Enum, auto
18 |
19 |
20 | class Mode(Enum):
21 | """Mode for stop-go testing."""
22 |
23 | STOP = auto()
24 | RESUME = auto()
25 | CONTINUOUS = auto()
26 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-webdatamodule/LICENSE:
--------------------------------------------------------------------------------
1 | ../../LICENSE/license.txt
--------------------------------------------------------------------------------
/sub-packages/bionemo-webdatamodule/VERSION:
--------------------------------------------------------------------------------
1 | 1.0.0
2 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-webdatamodule/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools>=64", "wheel"]
3 | build-backend = "setuptools.build_meta"
4 |
5 | [project]
6 | name = "bionemo-webdatamodule"
7 | readme = "README.md"
8 | description = "PyTorch Lightning Data Module for WebDataset files."
9 | authors = [{ name = "BioNeMo Team", email = "bionemofeedback@nvidia.com" }]
10 | requires-python = ">=3.10"
11 | license = { file = "LICENSE" }
12 | dynamic = ["version"]
13 | dependencies = [
14 | # bionemo sub-packages
15 | 'bionemo-core',
16 | # external
17 | 'webdataset==0.2.96',
18 | ]
19 |
20 | [tool.setuptools.packages.find]
21 | where = ["src"]
22 | include = ["bionemo.*"]
23 | namespaces = true
24 | exclude = ["test*."]
25 |
26 | [tool.setuptools.dynamic]
27 | version = { file = "VERSION" }
28 |
29 | [tool.uv]
30 | cache-keys = [{ git = true }]
31 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-webdatamodule/src/bionemo/webdatamodule/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
--------------------------------------------------------------------------------
/sub-packages/bionemo-webdatamodule/tests/bionemo/webdatamodule/__init__.py:
--------------------------------------------------------------------------------
1 | # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2 | # SPDX-License-Identifier: LicenseRef-Apache2
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
--------------------------------------------------------------------------------