├── .DS_Store ├── .gitattributes ├── CMakeLists.txt ├── LICENSE ├── OUTPUT_TERMS_OF_USE.md ├── README.md ├── WEIGHTS_PROHIBITED_USE_POLICY.md ├── WEIGHTS_TERMS_OF_USE.md ├── dev-requirements.txt ├── docker ├── Dockerfile └── dockerignore ├── docs ├── contributing.md ├── header.jpg ├── input.md ├── installation.md ├── known_issues.md ├── output.md └── performance.md ├── environment.yml ├── example ├── 1_extract_chains.py ├── 2_pdb2jax.py ├── 3_generate_json.py ├── 4_extract_iptm-ipae-pae-interaction.py ├── pdb │ └── bcov_v4_r4_ems_p1-15H-GBL-16H-GBL-16H_0129_0001_0001_0002_000000002_0001_0001_19_34_H_.._ems_p1-16H-GBL-16H-GABBL-16H_0045_0001_0001_0002_0001_0001_0001_0001.pdb ├── subset_data.csv └── subset_data_with_metrics.csv ├── fetch_databases.sh ├── pyproject.toml ├── requirements.txt ├── run_af3score.py └── src └── alphafold3 ├── __init__.py ├── __pycache__ ├── __init__.cpython-311.pyc ├── build_data.cpython-311.pyc └── version.cpython-311.pyc ├── build_data.py ├── common ├── __pycache__ │ ├── base_config.cpython-311.pyc │ ├── folding_input.cpython-311.pyc │ └── resources.cpython-311.pyc ├── base_config.py ├── folding_input.py ├── resources.py └── testing │ ├── __pycache__ │ └── data.cpython-311.pyc │ └── data.py ├── constants ├── __pycache__ │ ├── atom_types.cpython-311.pyc │ ├── chemical_component_sets.cpython-311.pyc │ ├── chemical_components.cpython-311.pyc │ ├── mmcif_names.cpython-311.pyc │ ├── periodic_table.cpython-311.pyc │ ├── residue_names.cpython-311.pyc │ └── side_chains.cpython-311.pyc ├── atom_types.py ├── chemical_component_sets.py ├── chemical_components.py ├── converters │ ├── __pycache__ │ │ ├── ccd_pickle_gen.cpython-311.pyc │ │ └── chemical_component_sets_gen.cpython-311.pyc │ ├── ccd_pickle_gen.py │ ├── chemical_component_sets.pickle │ └── chemical_component_sets_gen.py ├── mmcif_names.py ├── periodic_table.py ├── residue_names.py └── side_chains.py ├── cpp.cc ├── data ├── __pycache__ │ ├── featurisation.cpython-311.pyc │ ├── msa.cpython-311.pyc │ ├── msa_config.cpython-311.pyc │ ├── msa_features.cpython-311.pyc │ ├── parsers.cpython-311.pyc │ ├── pipeline.cpython-311.pyc │ ├── structure_stores.cpython-311.pyc │ ├── template_realign.cpython-311.pyc │ └── templates.cpython-311.pyc ├── cpp │ ├── msa_profile_pybind.cc │ └── msa_profile_pybind.h ├── featurisation.py ├── msa.py ├── msa_config.py ├── msa_features.py ├── msa_identifiers.py ├── msa_store.py ├── parsers.py ├── pipeline.py ├── structure_stores.py ├── template_realign.py ├── template_store.py ├── templates.py └── tools │ ├── __pycache__ │ ├── hmmalign.cpython-311.pyc │ ├── hmmbuild.cpython-311.pyc │ ├── hmmsearch.cpython-311.pyc │ ├── jackhmmer.cpython-311.pyc │ ├── msa_tool.cpython-311.pyc │ ├── nhmmer.cpython-311.pyc │ ├── rdkit_utils.cpython-311.pyc │ └── subprocess_utils.cpython-311.pyc │ ├── hmmalign.py │ ├── hmmbuild.py │ ├── hmmsearch.py │ ├── jackhmmer.py │ ├── msa_tool.py │ ├── nhmmer.py │ ├── rdkit_utils.py │ └── subprocess_utils.py ├── jax ├── attention │ ├── __pycache__ │ │ ├── attention.cpython-311.pyc │ │ ├── attention_base.cpython-311.pyc │ │ ├── flash_attention.cpython-311.pyc │ │ └── xla_attention.cpython-311.pyc │ ├── attention.py │ ├── attention_base.py │ ├── attention_call_arg_specs.py │ ├── flash_attention.py │ └── xla_attention.py ├── common │ ├── __pycache__ │ │ ├── array_view.cpython-311.pyc │ │ ├── precision.cpython-311.pyc │ │ └── triton_utils.cpython-311.pyc │ ├── array_view.py │ ├── precision.py │ └── triton_utils.py ├── gated_linear_unit │ ├── __pycache__ │ │ ├── block.cpython-311.pyc │ │ ├── gated_linear_unit.cpython-311.pyc │ │ ├── gated_linear_unit_base.cpython-311.pyc │ │ ├── matmul_config.cpython-311.pyc │ │ └── matmul_ext.cpython-311.pyc │ ├── block.py │ ├── gated_linear_unit.py │ ├── gated_linear_unit_base.py │ ├── matmul_config.py │ └── matmul_ext.py └── geometry │ ├── __init__.py │ ├── __pycache__ │ ├── __init__.cpython-311.pyc │ ├── rigid_matrix_vector.cpython-311.pyc │ ├── rotation_matrix.cpython-311.pyc │ ├── struct_of_array.cpython-311.pyc │ ├── utils.cpython-311.pyc │ └── vector.cpython-311.pyc │ ├── rigid_matrix_vector.py │ ├── rotation_matrix.py │ ├── struct_of_array.py │ ├── utils.py │ └── vector.py ├── model ├── __pycache__ │ ├── confidence_types.cpython-311.pyc │ ├── confidences.cpython-311.pyc │ ├── data3.cpython-311.pyc │ ├── data_constants.cpython-311.pyc │ ├── feat_batch.cpython-311.pyc │ ├── features.cpython-311.pyc │ ├── merging_features.cpython-311.pyc │ ├── mmcif_metadata.cpython-311.pyc │ ├── model_config.cpython-311.pyc │ ├── msa_pairing.cpython-311.pyc │ ├── params.cpython-311.pyc │ ├── post_processing.cpython-311.pyc │ └── protein_data_processing.cpython-311.pyc ├── atom_layout │ ├── __pycache__ │ │ └── atom_layout.cpython-311.pyc │ └── atom_layout.py ├── components │ ├── __pycache__ │ │ ├── base_model.cpython-311.pyc │ │ ├── haiku_modules.cpython-311.pyc │ │ ├── mapping.cpython-311.pyc │ │ └── utils.cpython-311.pyc │ ├── base_model.py │ ├── haiku_modules.py │ ├── mapping.py │ └── utils.py ├── confidence_types.py ├── confidences.py ├── data3.py ├── data_constants.py ├── diffusion │ ├── __pycache__ │ │ ├── atom_cross_attention.cpython-311.pyc │ │ ├── confidence_head.cpython-311.pyc │ │ ├── diffusion_head.cpython-311.pyc │ │ ├── diffusion_transformer.cpython-311.pyc │ │ ├── distogram_head.cpython-311.pyc │ │ ├── featurization.cpython-311.pyc │ │ ├── model.cpython-311.pyc │ │ ├── modules.cpython-311.pyc │ │ └── template_modules.cpython-311.pyc │ ├── atom_cross_attention.py │ ├── confidence_head.py │ ├── diffusion_head.py │ ├── diffusion_transformer.py │ ├── distogram_head.py │ ├── featurization.py │ ├── model.py │ ├── modules.py │ └── template_modules.py ├── feat_batch.py ├── features.py ├── merging_features.py ├── mkdssp_pybind.cc ├── mkdssp_pybind.h ├── mmcif_metadata.py ├── model_config.py ├── msa_pairing.py ├── params.py ├── pipeline │ ├── __pycache__ │ │ ├── inter_chain_bonds.cpython-311.pyc │ │ ├── pipeline.cpython-311.pyc │ │ └── structure_cleaning.cpython-311.pyc │ ├── inter_chain_bonds.py │ ├── pipeline.py │ └── structure_cleaning.py ├── post_processing.py ├── protein_data_processing.py └── scoring │ ├── __pycache__ │ ├── alignment.cpython-311.pyc │ ├── covalent_bond_cleaning.cpython-311.pyc │ └── scoring.cpython-311.pyc │ ├── alignment.py │ ├── covalent_bond_cleaning.py │ └── scoring.py ├── parsers └── cpp │ ├── cif_dict.pyi │ ├── cif_dict_lib.cc │ ├── cif_dict_lib.h │ ├── cif_dict_pybind.cc │ ├── cif_dict_pybind.h │ ├── fasta_iterator.pyi │ ├── fasta_iterator_lib.cc │ ├── fasta_iterator_lib.h │ ├── fasta_iterator_pybind.cc │ ├── fasta_iterator_pybind.h │ ├── msa_conversion.pyi │ ├── msa_conversion_pybind.cc │ └── msa_conversion_pybind.h ├── scripts ├── copy_to_ssd.sh └── gcp_mount_ssd.sh ├── structure ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-311.pyc │ ├── bioassemblies.cpython-311.pyc │ ├── bonds.cpython-311.pyc │ ├── chemical_components.cpython-311.pyc │ ├── mmcif.cpython-311.pyc │ ├── parsing.cpython-311.pyc │ ├── sterics.cpython-311.pyc │ ├── structure.cpython-311.pyc │ ├── structure_tables.cpython-311.pyc │ ├── table.cpython-311.pyc │ └── test_utils.cpython-311.pyc ├── bioassemblies.py ├── bonds.py ├── chemical_components.py ├── cpp │ ├── aggregation.pyi │ ├── aggregation_pybind.cc │ ├── aggregation_pybind.h │ ├── membership.pyi │ ├── membership_pybind.cc │ ├── membership_pybind.h │ ├── mmcif_altlocs.cc │ ├── mmcif_altlocs.h │ ├── mmcif_atom_site.pyi │ ├── mmcif_atom_site_pybind.cc │ ├── mmcif_atom_site_pybind.h │ ├── mmcif_layout.h │ ├── mmcif_layout.pyi │ ├── mmcif_layout_lib.cc │ ├── mmcif_layout_pybind.cc │ ├── mmcif_layout_pybind.h │ ├── mmcif_struct_conn.h │ ├── mmcif_struct_conn.pyi │ ├── mmcif_struct_conn_lib.cc │ ├── mmcif_struct_conn_pybind.cc │ ├── mmcif_struct_conn_pybind.h │ ├── mmcif_utils.pyi │ ├── mmcif_utils_pybind.cc │ ├── mmcif_utils_pybind.h │ ├── string_array.pyi │ ├── string_array_pybind.cc │ └── string_array_pybind.h ├── mmcif.py ├── parsing.py ├── sterics.py ├── structure.py ├── structure_tables.py ├── table.py └── test_utils.py ├── test_data ├── alphafold_run_outputs │ ├── run_alphafold_test_output_bucket_1024.pkl │ └── run_alphafold_test_output_bucket_default.pkl ├── featurised_example.json ├── featurised_example.pkl ├── miniature_databases │ ├── bfd-first_non_consensus_sequences__subsampled_1000.fasta │ ├── mgy_clusters__subsampled_1000.fa │ ├── nt_rna_2023_02_23_clust_seq_id_90_cov_80_rep_seq__subsampled_1000.fasta │ ├── pdb_seqres_2022_09_28__subsampled_1000.fasta │ ├── rfam_14_4_clustered_rep_seq__subsampled_1000.fasta │ ├── rnacentral_active_seq_id_90_cov_80_linclust__subsampled_1000.fasta │ ├── uniprot_all__subsampled_1000.fasta │ └── uniref90__subsampled_1000.fasta └── model_config.json └── version.py /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/.DS_Store -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | src/alphafold3/constants/converters/ccd.pickle filter=lfs diff=lfs merge=lfs -text 2 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright 2024 DeepMind Technologies Limited 2 | # 3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of 4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/ 5 | # 6 | # To request access to the AlphaFold 3 model parameters, follow the process set 7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these 8 | # if received directly from Google. Use is subject to terms of use available at 9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md 10 | 11 | cmake_minimum_required(VERSION 3.28) 12 | project( 13 | "${SKBUILD_PROJECT_NAME}" 14 | LANGUAGES CXX 15 | VERSION "${SKBUILD_PROJECT_VERSION}") 16 | 17 | include(FetchContent) 18 | set(CMAKE_CXX_STANDARD 20) 19 | set(CMAKE_CXX_STANDARD_REQUIRED ON) 20 | set(CMAKE_POSITION_INDEPENDENT_CODE TRUE) 21 | set(ABSL_PROPAGATE_CXX_STD ON) 22 | 23 | # Remove support for scan deps, which is only useful when using C++ modules. 24 | unset(CMAKE_CXX_SCANDEP_SOURCE) 25 | 26 | FetchContent_Declare( 27 | abseil-cpp 28 | GIT_REPOSITORY https://github.com/abseil/abseil-cpp 29 | GIT_TAG d7aaad83b488fd62bd51c81ecf16cd938532cc0a # 20240116.2 30 | EXCLUDE_FROM_ALL) 31 | 32 | FetchContent_Declare( 33 | pybind11 34 | GIT_REPOSITORY https://github.com/pybind/pybind11 35 | GIT_TAG 2e0815278cb899b20870a67ca8205996ef47e70f # v2.12.0 36 | EXCLUDE_FROM_ALL) 37 | 38 | FetchContent_Declare( 39 | pybind11_abseil 40 | GIT_REPOSITORY https://github.com/pybind/pybind11_abseil 41 | GIT_TAG bddf30141f9fec8e577f515313caec45f559d319 # HEAD @ 2024-08-07 42 | EXCLUDE_FROM_ALL) 43 | 44 | 45 | FetchContent_Declare( 46 | cifpp 47 | GIT_REPOSITORY https://github.com/pdb-redo/libcifpp 48 | GIT_TAG ac98531a2fc8daf21131faa0c3d73766efa46180 # v7.0.3 49 | # Don't `EXCLUDE_FROM_ALL` as necessary for build_data. 50 | ) 51 | 52 | FetchContent_Declare( 53 | dssp 54 | GIT_REPOSITORY https://github.com/PDB-REDO/dssp 55 | GIT_TAG 57560472b4260dc41f457706bc45fc6ef0bc0f10 # v4.4.7 56 | EXCLUDE_FROM_ALL) 57 | 58 | FetchContent_MakeAvailable(pybind11 abseil-cpp pybind11_abseil cifpp dssp) 59 | 60 | find_package( 61 | Python3 62 | COMPONENTS Interpreter Development NumPy 63 | REQUIRED) 64 | 65 | include_directories(${PYTHON_INCLUDE_DIRS}) 66 | include_directories(src/) 67 | 68 | file(GLOB_RECURSE cpp_srcs src/alphafold3/*.cc) 69 | list(FILTER cpp_srcs EXCLUDE REGEX ".*\(_test\|_main\|_benchmark\).cc$") 70 | 71 | add_compile_definitions(NPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION) 72 | 73 | pybind11_add_module(cpp ${cpp_srcs}) 74 | 75 | target_link_libraries( 76 | cpp 77 | PRIVATE absl::check 78 | absl::flat_hash_map 79 | absl::node_hash_map 80 | absl::strings 81 | absl::status 82 | absl::statusor 83 | absl::log 84 | pybind11_abseil::absl_casters 85 | Python3::NumPy 86 | dssp::dssp 87 | cifpp::cifpp) 88 | 89 | target_compile_definitions(cpp PRIVATE VERSION_INFO=${PROJECT_VERSION}) 90 | install(TARGETS cpp LIBRARY DESTINATION alphafold3) 91 | install( 92 | FILES LICENSE 93 | OUTPUT_TERMS_OF_USE.md 94 | WEIGHTS_PROHIBITED_USE_POLICY.md 95 | WEIGHTS_TERMS_OF_USE.md 96 | DESTINATION alphafold3) 97 | -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | # Copyright 2024 DeepMind Technologies Limited 2 | # 3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of 4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/ 5 | # 6 | # To request access to the AlphaFold 3 model parameters, follow the process set 7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these 8 | # if received directly from Google. Use is subject to terms of use available at 9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md 10 | 11 | FROM nvidia/cuda:12.6.0-base-ubuntu22.04 12 | 13 | # Some RUN statements are combined together to make Docker build run faster. 14 | # Get latest package listing, install software-properties-common, git, wget, 15 | # compilers and libraries. 16 | # git is required for pyproject.toml toolchain's use of CMakeLists.txt. 17 | # gcc, g++, make are required for compiling hmmer and AlphaFold 3 libaries. 18 | # zlib is a required dependency of AlphaFold 3. 19 | RUN apt update --quiet \ 20 | && apt install --yes --quiet software-properties-common \ 21 | && apt install --yes --quiet git wget gcc g++ make zlib1g-dev zstd 22 | 23 | # Get apt repository of specific Python versions. Then install Python. Tell APT 24 | # this isn't an interactive TTY to avoid timezone prompt when installing. 25 | RUN add-apt-repository ppa:deadsnakes/ppa \ 26 | && DEBIAN_FRONTEND=noninteractive apt install --yes --quiet python3.11 python3-pip python3.11-venv python3.11-dev 27 | RUN python3.11 -m venv /alphafold3_venv 28 | ENV PATH="/hmmer/bin:/alphafold3_venv/bin:$PATH" 29 | # Update pip to the latest version. Not necessary in Docker, but good to do when 30 | # this is used as a recipe for local installation since we rely on new pip 31 | # features for secure installs. 32 | RUN pip3 install --upgrade pip 33 | 34 | # Install HMMER. Do so before copying the source code, so that docker can cache 35 | # the image layer containing HMMER. 36 | RUN mkdir /hmmer_build /hmmer ; \ 37 | wget http://eddylab.org/software/hmmer/hmmer-3.4.tar.gz --directory-prefix /hmmer_build ; \ 38 | (cd /hmmer_build && tar zxf hmmer-3.4.tar.gz && rm hmmer-3.4.tar.gz) ; \ 39 | (cd /hmmer_build/hmmer-3.4 && ./configure --prefix /hmmer) ; \ 40 | (cd /hmmer_build/hmmer-3.4 && make -j8) ; \ 41 | (cd /hmmer_build/hmmer-3.4 && make install) ; \ 42 | (cd /hmmer_build/hmmer-3.4/easel && make install) ; \ 43 | rm -R /hmmer_build 44 | 45 | # Copy the AlphaFold 3 source code from the local machine to the container and 46 | # set the working directory to there. 47 | COPY . /app/alphafold 48 | WORKDIR /app/alphafold 49 | 50 | # Install the Python dependencies AlphaFold 3 needs. 51 | RUN pip3 install -r dev-requirements.txt 52 | RUN pip3 install --no-deps . 53 | # Build chemical components database (this binary was installed by pip). 54 | RUN build_data 55 | 56 | # To work around a known XLA issue causing the compilation time to greatly 57 | # increase, the following environment variable setting XLA flags must be enabled 58 | # when running AlphaFold 3: 59 | ENV XLA_FLAGS="--xla_gpu_enable_triton_gemm=false" 60 | # Memory settings used for folding up to 5,120 tokens on A100 80 GB. 61 | ENV XLA_PYTHON_CLIENT_PREALLOCATE=true 62 | ENV XLA_CLIENT_MEM_FRACTION=0.95 63 | 64 | CMD ["python3", "run_alphafold.py"] 65 | -------------------------------------------------------------------------------- /docker/dockerignore: -------------------------------------------------------------------------------- 1 | dockerignore 2 | Dockerfile -------------------------------------------------------------------------------- /docs/contributing.md: -------------------------------------------------------------------------------- 1 | # How to Contribute 2 | 3 | We welcome small patches related to bug fixes and documentation, but we do not 4 | plan to make any major changes to this repository. 5 | 6 | ## Before You Begin 7 | 8 | ### Sign Our Contributor License Agreement 9 | 10 | Contributions to this project must be accompanied by a 11 | [Contributor License Agreement](https://cla.developers.google.com/about) (CLA). 12 | You (or your employer) retain the copyright to your contribution; this simply 13 | gives us permission to use and redistribute your contributions as part of the 14 | project. 15 | 16 | If you or your current employer have already signed the Google CLA (even if it 17 | was for a different project), you probably don't need to do it again. 18 | 19 | Visit to see your current agreements or to 20 | sign a new one. 21 | 22 | ### Review Our Community Guidelines 23 | 24 | This project follows 25 | [Google's Open Source Community Guidelines](https://opensource.google/conduct/). 26 | 27 | ## Contribution Process 28 | 29 | We won't accept pull requests directly, but if you send one, we will review it. 30 | If we send a fix based on your pull request, we will make sure to credit you in 31 | the release notes. 32 | -------------------------------------------------------------------------------- /docs/header.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/docs/header.jpg -------------------------------------------------------------------------------- /docs/known_issues.md: -------------------------------------------------------------------------------- 1 | # Known Issues 2 | 3 | ### Devices other than NVIDIA A100 or H100 4 | 5 | There are currently known unresolved numerical issues with using devices other 6 | than NVIDIA A100 and H100. For now, accuracy has only been validated for A100 7 | and H100 GPU device types. See 8 | [this Issue](https://github.com/google-deepmind/alphafold3/issues/59) for 9 | tracking. 10 | -------------------------------------------------------------------------------- /example/1_extract_chains.py: -------------------------------------------------------------------------------- 1 | from Bio import PDB 2 | from Bio.PDB import Structure, Model, Chain 3 | from Bio.PDB import PDBParser, MMCIFIO 4 | import os 5 | import pandas as pd 6 | from tqdm import tqdm 7 | import multiprocessing as mp 8 | from pathlib import Path 9 | 10 | # Define dictionary for three-letter to one-letter amino acid conversion 11 | protein_letters_3to1 = { 12 | 'ALA': 'A', 'CYS': 'C', 'ASP': 'D', 'GLU': 'E', 13 | 'PHE': 'F', 'GLY': 'G', 'HIS': 'H', 'ILE': 'I', 14 | 'LYS': 'K', 'LEU': 'L', 'MET': 'M', 'ASN': 'N', 15 | 'PRO': 'P', 'GLN': 'Q', 'ARG': 'R', 'SER': 'S', 16 | 'THR': 'T', 'VAL': 'V', 'TRP': 'W', 'TYR': 'Y', 17 | 'MSE': 'M' # Selenomethionine is typically treated as methionine 18 | } 19 | 20 | def get_sequence(chain): 21 | """Get amino acid sequence of the chain""" 22 | sequence = "" 23 | for residue in chain: 24 | if residue.id[0] == ' ': 25 | try: 26 | resname = residue.get_resname().upper() 27 | sequence += protein_letters_3to1.get(resname, 'X') 28 | except: 29 | sequence += 'X' 30 | return sequence 31 | 32 | def process_single_pdb(args): 33 | input_pdb, output_dir_cif = args 34 | try: 35 | parser = PDB.PDBParser(QUIET=True) 36 | structure = parser.get_structure("structure", input_pdb) 37 | base_name = os.path.splitext(os.path.basename(input_pdb))[0] 38 | 39 | chain_sequences = {} 40 | 41 | for chain in structure[0]: 42 | chain_id = chain.id 43 | sequence = get_sequence(chain) 44 | chain_sequences[chain_id] = sequence 45 | new_structure = Structure.Structure("new_structure") 46 | new_model = Model.Model(0) 47 | new_structure.add(new_model) 48 | new_model.add(chain.copy()) 49 | 50 | cif_io = MMCIFIO() 51 | cif_io.set_structure(new_structure) 52 | cif_output = os.path.join(output_dir_cif, f"{base_name}_chain_{chain_id}.cif") 53 | cif_io.save(cif_output) 54 | 55 | return base_name, chain_sequences 56 | except Exception as e: 57 | print(f"\nError processing {input_pdb}: {str(e)}") 58 | return None, None 59 | 60 | def main(): 61 | input_dir = "./pdb" # Input directory 62 | output_dir_cif = "./complex_chain_cifs" # CIF output directory 63 | 64 | # Create output directory 65 | os.makedirs(output_dir_cif, exist_ok=True) 66 | 67 | # Get all PDB files 68 | pdb_files = list(Path(input_dir).glob("*.pdb")) 69 | 70 | # Prepare parameters for process pool 71 | args = [(str(f), output_dir_cif) for f in pdb_files] 72 | 73 | # Process files using process pool 74 | with mp.Pool(processes=mp.cpu_count()) as pool: 75 | results = list(tqdm( 76 | pool.imap(process_single_pdb, args), 77 | total=len(pdb_files), 78 | desc="Processing PDB files" 79 | )) 80 | 81 | # Collect results 82 | sequences_dict = {} 83 | for base_name, chain_sequences in results: 84 | if base_name is not None: 85 | sequences_dict[base_name] = chain_sequences 86 | 87 | # Find all possible chain IDs and sort them by custom order 88 | all_chain_ids = set() 89 | for complex_data in sequences_dict.values(): 90 | all_chain_ids.update(complex_data.keys()) 91 | 92 | def chain_sort_key(chain_id): 93 | if chain_id.startswith('B'): 94 | return ('0', chain_id) 95 | elif chain_id.startswith('A'): 96 | return ('2', chain_id) 97 | else: 98 | return ('1', chain_id) 99 | 100 | all_chain_ids = sorted(list(all_chain_ids), key=chain_sort_key) 101 | 102 | # Create DataFrame 103 | rows = [] 104 | for complex_name, chain_data in sequences_dict.items(): 105 | row = {'complex': complex_name} 106 | for chain_id in all_chain_ids: 107 | row[f'chain_{chain_id}_seq'] = chain_data.get(chain_id, '') 108 | rows.append(row) 109 | 110 | df = pd.DataFrame(rows) 111 | cols = ['complex'] + [col for col in df.columns if col != 'complex'] 112 | df = df[cols] 113 | 114 | # Save CSV file 115 | df.to_csv('complex_chain_sequences.csv', index=False) 116 | print("\nSequence information has been saved to complex_chain_sequences.csv") 117 | 118 | if __name__ == "__main__": 119 | main() -------------------------------------------------------------------------------- /example/3_generate_json.py: -------------------------------------------------------------------------------- 1 | import json 2 | import pandas as pd 3 | import os 4 | 5 | def format_msa_sequence(sequence): 6 | """Format MSA sequence""" 7 | return f">query\n{sequence}\n" 8 | 9 | def get_chain_sequences(row): 10 | """Get all non-empty chain sequences from row data""" 11 | chain_sequences = [] 12 | # Get all chain-related columns in their order of appearance in CSV 13 | chain_columns = [col for col in row.index if col.startswith('chain_') and col.endswith('_seq')] 14 | for col in chain_columns: 15 | if pd.notna(row[col]) and row[col] != '': 16 | # Extract chain ID from column name (e.g., 'A' from 'chain_A_seq') 17 | chain_id = col.split('_')[1] 18 | chain_sequences.append((chain_id, row[col])) 19 | return chain_sequences 20 | 21 | def generate_json_files(csv_path, output_dir, cif_dir): 22 | """Generate JSON files from CSV file""" 23 | os.makedirs(output_dir, exist_ok=True) 24 | 25 | # Read CSV file 26 | df = pd.read_csv(csv_path) 27 | json_count = 0 28 | 29 | # Process each row 30 | for _, row in df.iterrows(): 31 | complex_name = row['complex'] # Get name from complex column 32 | 33 | # Get sequences for all chains 34 | chain_sequences = get_chain_sequences(row) 35 | 36 | if not chain_sequences: # Skip if no valid chain sequences 37 | print(f"Warning: {complex_name} has no valid chain sequences") 38 | continue 39 | 40 | # Create a list of all chain sequences 41 | sequences = [] 42 | for chain_id, sequence in chain_sequences: 43 | # Build cif file path 44 | cif_filename = f"{complex_name}_chain_{chain_id}.cif" 45 | cif_path = os.path.join(cif_dir, cif_filename) 46 | 47 | # Check if cif file exists 48 | if not os.path.exists(cif_path): 49 | print(f"Warning: {cif_filename} does not exist") 50 | continue 51 | 52 | sequences.append({ 53 | "protein": { 54 | "id": chain_id, 55 | "sequence": sequence, 56 | "modifications": [], 57 | "unpairedMsa": format_msa_sequence(sequence), 58 | "pairedMsa": format_msa_sequence(sequence), 59 | "templates": [{ 60 | "mmcifPath": cif_path, 61 | "queryIndices": list(range(len(sequence))), 62 | "templateIndices": list(range(len(sequence))) 63 | }] 64 | } 65 | }) 66 | 67 | if not sequences: # Skip if no valid sequence data 68 | print(f"Warning: {complex_name} has no valid sequence data") 69 | continue 70 | 71 | # Create complete JSON data 72 | json_data = { 73 | "dialect": "alphafold3", 74 | "version": 1, 75 | "name": complex_name, 76 | "sequences": sequences, 77 | "modelSeeds": [10], 78 | "bondedAtomPairs": None, 79 | "userCCD": None 80 | } 81 | 82 | # Generate output file name - REMOVED _data suffix 83 | output_filename = f"{complex_name}.json" # Removed _data suffix to match H5 filename 84 | output_path = os.path.join(output_dir, output_filename) 85 | 86 | # Write JSON file 87 | with open(output_path, 'w') as f: 88 | json.dump(json_data, f, indent=2) 89 | 90 | chain_ids = [chain[0] for chain in chain_sequences] 91 | print(f"Generated JSON file: {output_filename} (chains: {', '.join(chain_ids)})") 92 | json_count += 1 93 | 94 | print(f"\nComplete, generated {json_count} JSON files") 95 | 96 | if __name__ == "__main__": 97 | csv_path = "./complex_chain_sequences.csv" # Path to the CSV file just generated 98 | output_dir = "./complex_json_files" # Output directory for JSON files 99 | cif_dir = "/lustre/grp/cmclab/liuyu/design/AF3Score/example/complex_chain_cifs" # Directory where CIF files are located 100 | 101 | generate_json_files(csv_path, output_dir, cif_dir) -------------------------------------------------------------------------------- /example/subset_data.csv: -------------------------------------------------------------------------------- 1 | topo,scaff_class,description,kd_lb,kd_ub,low_conf,avid_doesnt_agree,avid_lb,avid_ub,lowest_conc,highest_conc,binder_4000_nm,binder_400_nm,target,binder_800_nm,one,SCORE:_x,af2_complex_rmsd,af2_monomer_rmsd,af2_target_rmsd,sasa,SCORE:_y,pae_binder,pae_interaction,pae_interaction1,pae_interaction2,pae_target,plddt_binder,plddt_target,plddt_total,time_x,ddg,contact_molecular_surface,ddg_norepack,ss_sc,score_per_res,mon_all_rmsd,mon_ca_rmsd,mon_plddt,global_lddt,interface_lddt,binder_lddt,time_y,rf2_pae_interaction,rf2_binder_lddt 2 | 3h,,bcov_v4_r4_ems_p1-15H-GBL-16H-GBL-16H_0129_0001_0001_0002_000000002_0001_0001_19_34_H_.._ems_p1-16H-GBL-16H-GABBL-16H_0045_0001_0001_0002_0001_0001_0001_0001,0.0,19.85538460557901,False,False,0.0,3991.816142372666,5.0,1000.0,True,True,IL7Ra,,1,SCORE:,1.885,0.562,0.357,1738.284,SCORE:,2.102,5.468999999999999,5.303999999999999,5.634,4.106,93.961,95.215,94.933,14.054,-64.37899999999999,504.437,-68.735,0.818,-3.851,1.719,0.591,94.812,0.980531,0.976447,0.981153,22.087951,21.828000000000003,0.989 3 | -------------------------------------------------------------------------------- /example/subset_data_with_metrics.csv: -------------------------------------------------------------------------------- 1 | topo,scaff_class,description,kd_lb,kd_ub,low_conf,avid_doesnt_agree,avid_lb,avid_ub,lowest_conc,highest_conc,binder_4000_nm,binder_400_nm,target,binder_800_nm,one,SCORE:_x,af2_complex_rmsd,af2_monomer_rmsd,af2_target_rmsd,sasa,SCORE:_y,pae_binder,pae_interaction,pae_interaction1,pae_interaction2,pae_target,plddt_binder,plddt_target,plddt_total,time_x,ddg,contact_molecular_surface,ddg_norepack,ss_sc,score_per_res,mon_all_rmsd,mon_ca_rmsd,mon_plddt,global_lddt,interface_lddt,binder_lddt,time_y,rf2_pae_interaction,rf2_binder_lddt,AF3Score_monomer_ca_plddt,AF3Score_monomer_pae,AF3Score_monomer_ptm,AF3Score_complex_ca_plddt,AF3Score_complex_pae,AF3Score_complex_ptm,AF3Score_complex_iptm,AF3Score_pae_interaction,AF3Score_ipae 2 | 3h,,bcov_v4_r4_ems_p1-15H-GBL-16H-GBL-16H_0129_0001_0001_0002_000000002_0001_0001_19_34_H_.._ems_p1-16H-GBL-16H-GABBL-16H_0045_0001_0001_0002_0001_0001_0001_0001,0.0,19.85538460557901,False,False,0.0,3991.816142372666,5.0,1000.0,True,True,IL7Ra,,1,SCORE:,1.885,0.562,0.357,1738.284,SCORE:,2.102,5.468999999999999,5.303999999999999,5.634,4.106,93.961,95.215,94.933,14.054,-64.37899999999999,504.437,-68.735,0.818,-3.851,1.719,0.591,94.812,0.980531,0.976447,0.981153,22.087951,21.828000000000003,0.989,93.40089285714285,1.4386444708680144,0.8,95.07144578313253,4.306372477863261,0.9,0.89,5.620281273131014,2.4683658170914544 3 | -------------------------------------------------------------------------------- /fetch_databases.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2024 DeepMind Technologies Limited 3 | # 4 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of 5 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/ 6 | # 7 | # To request access to the AlphaFold 3 model parameters, follow the process set 8 | # out at https://github.com/google-deepmind/alphafold3. You may only use these 9 | # if received directly from Google. Use is subject to terms of use available at 10 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md 11 | 12 | set -euo pipefail 13 | 14 | readonly db_dir=${1:-$HOME/public_databases} 15 | 16 | for cmd in wget tar zstd ; do 17 | if ! command -v "${cmd}" > /dev/null 2>&1; then 18 | echo "${cmd} is not installed. Please install it." 19 | fi 20 | done 21 | 22 | echo "Fetching databases to ${db_dir}" 23 | mkdir -p "${db_dir}" 24 | 25 | readonly SOURCE=https://storage.googleapis.com/alphafold-databases/v3.0 26 | 27 | echo "Start Fetching and Untarring 'pdb_2022_09_28_mmcif_files.tar'" 28 | wget --quiet --output-document=- \ 29 | "${SOURCE}/pdb_2022_09_28_mmcif_files.tar.zst" | \ 30 | tar --use-compress-program=zstd -xf - --directory="${db_dir}" & 31 | 32 | for NAME in mgy_clusters_2022_05.fa \ 33 | bfd-first_non_consensus_sequences.fasta \ 34 | uniref90_2022_05.fa uniprot_all_2021_04.fa \ 35 | pdb_seqres_2022_09_28.fasta \ 36 | rnacentral_active_seq_id_90_cov_80_linclust.fasta \ 37 | nt_rna_2023_02_23_clust_seq_id_90_cov_80_rep_seq.fasta \ 38 | rfam_14_9_clust_seq_id_90_cov_80_rep_seq.fasta ; do 39 | echo "Start Fetching '${NAME}'" 40 | wget --quiet --output-document=- "${SOURCE}/${NAME}.zst" | \ 41 | zstd --decompress > "${db_dir}/${NAME}" & 42 | done 43 | 44 | wait 45 | echo "Complete" 46 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = [ 3 | "scikit_build_core", 4 | "pybind11", 5 | "cmake>=3.28", 6 | "ninja", 7 | "numpy", 8 | ] 9 | build-backend = "scikit_build_core.build" 10 | 11 | [project] 12 | name = "alphafold3" 13 | version = "3.0.0" 14 | requires-python = ">=3.11" 15 | readme = "README.md" 16 | license = {file = "LICENSE"} 17 | dependencies = [ 18 | "absl-py", 19 | "chex", 20 | "dm-haiku==0.0.13", 21 | "dm-tree", 22 | "jax==0.4.34", 23 | "jax[cuda12]==0.4.34", 24 | "jax-triton==0.2.0", 25 | "jaxtyping==0.2.34", 26 | "numpy", 27 | "rdkit==2024.3.5", 28 | "triton==3.1.0", 29 | "tqdm", 30 | "typeguard==2.13.3", 31 | "zstandard", 32 | ] 33 | 34 | [project.optional-dependencies] 35 | test = ["pytest>=6.0"] 36 | 37 | [tool.scikit-build] 38 | wheel.exclude = [ 39 | "**.pyx", 40 | "**/CMakeLists.txt", 41 | "**.cc", 42 | "**.h" 43 | ] 44 | sdist.include = [ 45 | "LICENSE", 46 | "OUTPUT_TERMS_OF_USE.md", 47 | "WEIGHTS_PROHIBITED_USE_POLICY.md", 48 | "WEIGHTS_TERMS_OF_USE.md", 49 | ] 50 | 51 | [tool.cibuildwheel] 52 | build = "cp3*-manylinux_x86_64" 53 | manylinux-x86_64-image = "manylinux_2_28" 54 | 55 | [project.scripts] 56 | build_data = "alphafold3.build_data:build_data" 57 | -------------------------------------------------------------------------------- /src/alphafold3/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 DeepMind Technologies Limited 2 | # 3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of 4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/ 5 | # 6 | # To request access to the AlphaFold 3 model parameters, follow the process set 7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these 8 | # if received directly from Google. Use is subject to terms of use available at 9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md 10 | 11 | """An implementation of the inference pipeline of AlphaFold 3.""" 12 | -------------------------------------------------------------------------------- /src/alphafold3/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/__pycache__/build_data.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/__pycache__/build_data.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/__pycache__/version.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/__pycache__/version.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/build_data.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 DeepMind Technologies Limited 2 | # 3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of 4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/ 5 | # 6 | # To request access to the AlphaFold 3 model parameters, follow the process set 7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these 8 | # if received directly from Google. Use is subject to terms of use available at 9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md 10 | 11 | """Script for building intermediate data.""" 12 | 13 | from importlib import resources 14 | import pathlib 15 | import site 16 | 17 | import alphafold3.constants.converters 18 | from alphafold3.constants.converters import ccd_pickle_gen 19 | from alphafold3.constants.converters import chemical_component_sets_gen 20 | 21 | 22 | def build_data(): 23 | """Builds intermediate data.""" 24 | for site_path in site.getsitepackages(): 25 | path = pathlib.Path(site_path) / 'share/libcifpp/components.cif' 26 | print(path) 27 | if path.exists(): 28 | cif_path = path 29 | break 30 | else: 31 | raise ValueError('Could not find components.cif') 32 | 33 | out_root = resources.files(alphafold3.constants.converters) 34 | ccd_pickle_path = out_root.joinpath('ccd.pickle') 35 | chemical_component_sets_pickle_path = out_root.joinpath( 36 | 'chemical_component_sets.pickle' 37 | ) 38 | ccd_pickle_gen.main(['', str(cif_path), str(ccd_pickle_path)]) 39 | chemical_component_sets_gen.main( 40 | ['', str(chemical_component_sets_pickle_path)] 41 | ) 42 | -------------------------------------------------------------------------------- /src/alphafold3/common/__pycache__/base_config.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/common/__pycache__/base_config.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/common/__pycache__/folding_input.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/common/__pycache__/folding_input.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/common/__pycache__/resources.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/common/__pycache__/resources.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/common/resources.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 DeepMind Technologies Limited 2 | # 3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of 4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/ 5 | # 6 | # To request access to the AlphaFold 3 model parameters, follow the process set 7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these 8 | # if received directly from Google. Use is subject to terms of use available at 9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md 10 | 11 | """Load external resources, such as external tools or data resources.""" 12 | 13 | from collections.abc import Iterator 14 | import os 15 | import pathlib 16 | import typing 17 | from typing import BinaryIO, Final, Literal, TextIO 18 | 19 | from importlib import resources 20 | import alphafold3.common 21 | 22 | 23 | _DATA_ROOT: Final[pathlib.Path] = ( 24 | resources.files(alphafold3.common).joinpath('..').resolve() 25 | ) 26 | ROOT = _DATA_ROOT 27 | 28 | 29 | def filename(name: str | os.PathLike[str]) -> str: 30 | """Returns the absolute path to an external resource. 31 | 32 | Note that this calls resources.GetResourceFilename under the hood and hence 33 | causes par file unpacking, which might be unfriendly on diskless machines. 34 | 35 | 36 | Args: 37 | name: the name of the resource corresponding to its path relative to the 38 | root of the repository. 39 | """ 40 | return (_DATA_ROOT / name).as_posix() 41 | 42 | 43 | @typing.overload 44 | def open_resource( 45 | name: str | os.PathLike[str], mode: Literal['r', 'rt'] = 'rt' 46 | ) -> TextIO: 47 | ... 48 | 49 | 50 | @typing.overload 51 | def open_resource( 52 | name: str | os.PathLike[str], mode: Literal['rb'] 53 | ) -> BinaryIO: 54 | ... 55 | 56 | 57 | def open_resource( 58 | name: str | os.PathLike[str], mode: str = 'rb' 59 | ) -> TextIO | BinaryIO: 60 | """Returns an open file object for the named resource. 61 | 62 | Args: 63 | name: the name of the resource corresponding to its path relative to the 64 | root of the repository. 65 | mode: the mode to use when opening the file. 66 | """ 67 | return (_DATA_ROOT / name).open(mode) 68 | 69 | 70 | def get_resource_dir(path: str | os.PathLike[str]) -> os.PathLike[str]: 71 | return _DATA_ROOT / path 72 | 73 | 74 | def walk(path: str) -> Iterator[tuple[str, list[str], list[str]]]: 75 | """Walks the directory tree of resources similar to os.walk.""" 76 | return os.walk((_DATA_ROOT / path).as_posix()) 77 | -------------------------------------------------------------------------------- /src/alphafold3/common/testing/__pycache__/data.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/common/testing/__pycache__/data.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/common/testing/data.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 DeepMind Technologies Limited 2 | # 3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of 4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/ 5 | # 6 | # To request access to the AlphaFold 3 model parameters, follow the process set 7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these 8 | # if received directly from Google. Use is subject to terms of use available at 9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md 10 | 11 | """Module that provides an abstraction for accessing test data.""" 12 | 13 | import os 14 | import pathlib 15 | from typing import Literal, overload 16 | 17 | from absl.testing import absltest 18 | 19 | 20 | class Data: 21 | """Provides an abstraction for accessing test data.""" 22 | 23 | def __init__(self, data_dir: os.PathLike[str] | str): 24 | """Initiailizes data wrapper, providing users with high level data access. 25 | 26 | Args: 27 | data_dir: Directory containing test data. 28 | """ 29 | self._data_dir = pathlib.Path(data_dir) 30 | 31 | def path(self, data_name: str | os.PathLike[str] | None = None) -> str: 32 | """Returns the path to a given test data. 33 | 34 | Args: 35 | data_name: the name of the test data file relative to data_dir. If not 36 | set, this will return the absolute path to the data directory. 37 | """ 38 | data_dir_path = ( 39 | pathlib.Path(absltest.get_default_test_srcdir()) / self._data_dir 40 | ) 41 | 42 | if data_name: 43 | return str(data_dir_path / data_name) 44 | 45 | return str(data_dir_path) 46 | 47 | @overload 48 | def load( 49 | self, data_name: str | os.PathLike[str], mode: Literal['rt'] = 'rt' 50 | ) -> str: 51 | ... 52 | 53 | @overload 54 | def load( 55 | self, data_name: str | os.PathLike[str], mode: Literal['rb'] = 'rb' 56 | ) -> bytes: 57 | ... 58 | 59 | def load( 60 | self, data_name: str | os.PathLike[str], mode: str = 'rt' 61 | ) -> str | bytes: 62 | """Returns the contents of a given test data. 63 | 64 | Args: 65 | data_name: the name of the test data file relative to data_dir. 66 | mode: the mode in which to read the data file. Defaults to text ('rt'). 67 | """ 68 | with open(self.path(data_name), mode=mode) as f: 69 | return f.read() 70 | -------------------------------------------------------------------------------- /src/alphafold3/constants/__pycache__/atom_types.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/constants/__pycache__/atom_types.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/constants/__pycache__/chemical_component_sets.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/constants/__pycache__/chemical_component_sets.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/constants/__pycache__/chemical_components.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/constants/__pycache__/chemical_components.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/constants/__pycache__/mmcif_names.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/constants/__pycache__/mmcif_names.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/constants/__pycache__/periodic_table.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/constants/__pycache__/periodic_table.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/constants/__pycache__/residue_names.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/constants/__pycache__/residue_names.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/constants/__pycache__/side_chains.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/constants/__pycache__/side_chains.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/constants/chemical_component_sets.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 DeepMind Technologies Limited 2 | # 3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of 4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/ 5 | # 6 | # To request access to the AlphaFold 3 model parameters, follow the process set 7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these 8 | # if received directly from Google. Use is subject to terms of use available at 9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md 10 | 11 | """Sets of chemical components.""" 12 | 13 | import pickle 14 | from typing import Final 15 | 16 | from alphafold3.common import resources 17 | 18 | 19 | _CCD_SETS_CCD_PICKLE_FILE = resources.filename( 20 | resources.ROOT / 'constants/converters/chemical_component_sets.pickle' 21 | ) 22 | 23 | _CCD_SET = pickle.load(open(_CCD_SETS_CCD_PICKLE_FILE, 'rb')) 24 | 25 | # Glycan (or 'Saccharide') ligands. 26 | # _chem_comp.type containing 'saccharide' and 'linking' (when lower-case). 27 | GLYCAN_LINKING_LIGANDS: Final[frozenset[str]] = _CCD_SET['glycans_linking'] 28 | 29 | # _chem_comp.type containing 'saccharide' and not 'linking' (when lower-case). 30 | GLYCAN_OTHER_LIGANDS: Final[frozenset[str]] = _CCD_SET['glycans_other'] 31 | 32 | # Each of these molecules appears in over 1k PDB structures, are used to 33 | # facilitate crystallization conditions, but do not have biological relevance. 34 | COMMON_CRYSTALLIZATION_AIDS: Final[frozenset[str]] = frozenset({ 35 | 'SO4', 'GOL', 'EDO', 'PO4', 'ACT', 'PEG', 'DMS', 'TRS', 'PGE', 'PG4', 'FMT', 36 | 'EPE', 'MPD', 'MES', 'CD', 'IOD', 37 | }) # pyformat: disable 38 | -------------------------------------------------------------------------------- /src/alphafold3/constants/converters/__pycache__/ccd_pickle_gen.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/constants/converters/__pycache__/ccd_pickle_gen.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/constants/converters/__pycache__/chemical_component_sets_gen.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/constants/converters/__pycache__/chemical_component_sets_gen.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/constants/converters/ccd_pickle_gen.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 DeepMind Technologies Limited 2 | # 3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of 4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/ 5 | # 6 | # To request access to the AlphaFold 3 model parameters, follow the process set 7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these 8 | # if received directly from Google. Use is subject to terms of use available at 9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md 10 | 11 | """Reads Chemical Components gz file and generates a CCD pickle file.""" 12 | 13 | from collections.abc import Sequence 14 | import gzip 15 | import pickle 16 | import sys 17 | 18 | from alphafold3.cpp import cif_dict 19 | import tqdm 20 | 21 | 22 | def main(argv: Sequence[str]) -> None: 23 | if len(argv) != 3: 24 | raise ValueError('Must specify input_file components.cif and output_file') 25 | 26 | _, input_file, output_file = argv 27 | 28 | print(f'Parsing {input_file}', flush=True) 29 | if input_file.endswith('.gz'): 30 | opener = gzip.open 31 | else: 32 | opener = open 33 | 34 | with opener(input_file, 'rb') as f: 35 | whole_file = f.read() 36 | result = { 37 | key: {k: tuple(v) for k, v in value.items()} 38 | for key, value in tqdm.tqdm( 39 | cif_dict.parse_multi_data_cif(whole_file).items() 40 | ) 41 | } 42 | assert len(result) == whole_file.count(b'data_') 43 | 44 | print(f'Writing {output_file}', flush=True) 45 | with open(output_file, 'wb') as f: 46 | pickle.dump(result, f, protocol=pickle.HIGHEST_PROTOCOL) 47 | print('Done', flush=True) 48 | 49 | if __name__ == '__main__': 50 | main(sys.argv) 51 | -------------------------------------------------------------------------------- /src/alphafold3/constants/converters/chemical_component_sets.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/constants/converters/chemical_component_sets.pickle -------------------------------------------------------------------------------- /src/alphafold3/constants/converters/chemical_component_sets_gen.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 DeepMind Technologies Limited 2 | # 3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of 4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/ 5 | # 6 | # To request access to the AlphaFold 3 model parameters, follow the process set 7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these 8 | # if received directly from Google. Use is subject to terms of use available at 9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md 10 | 11 | """Script for updating chemical_component_sets.py.""" 12 | 13 | from collections.abc import Mapping, Sequence 14 | import pathlib 15 | import pickle 16 | import re 17 | import sys 18 | 19 | from alphafold3.common import resources 20 | import tqdm 21 | 22 | 23 | _CCD_PICKLE_FILE = resources.filename( 24 | 'constants/converters/ccd.pickle' 25 | ) 26 | 27 | 28 | def find_ions_and_glycans_in_ccd( 29 | ccd: Mapping[str, Mapping[str, Sequence[str]]], 30 | ) -> dict[str, frozenset[str]]: 31 | """Finds glycans and ions in all version of CCD.""" 32 | glycans_linking = [] 33 | glycans_other = [] 34 | ions = [] 35 | for name, comp in tqdm.tqdm(ccd.items()): 36 | if name == 'UNX': 37 | continue # Skip "unknown atom or ion". 38 | comp_type = comp['_chem_comp.type'][0].lower() 39 | # Glycans have the type 'saccharide'. 40 | if re.findall(r'\bsaccharide\b', comp_type): 41 | # Separate out linking glycans from others. 42 | if 'linking' in comp_type: 43 | glycans_linking.append(name) 44 | else: 45 | glycans_other.append(name) 46 | 47 | # Ions have the word 'ion' in their name. 48 | comp_name = comp['_chem_comp.name'][0].lower() 49 | if re.findall(r'\bion\b', comp_name): 50 | ions.append(name) 51 | result = dict( 52 | glycans_linking=frozenset(glycans_linking), 53 | glycans_other=frozenset(glycans_other), 54 | ions=frozenset(ions), 55 | ) 56 | 57 | return result 58 | 59 | 60 | def main(argv: Sequence[str]) -> None: 61 | if len(argv) != 2: 62 | raise ValueError( 63 | 'Directory to write to must be specified as a command-line arguments.' 64 | ) 65 | 66 | print(f'Loading {_CCD_PICKLE_FILE}', flush=True) 67 | with open(_CCD_PICKLE_FILE, 'rb') as f: 68 | ccd: Mapping[str, Mapping[str, Sequence[str]]] = pickle.load(f) 69 | output_path = pathlib.Path(argv[1]) 70 | output_path.parent.mkdir(exist_ok=True) 71 | print('Finding ions and glycans', flush=True) 72 | result = find_ions_and_glycans_in_ccd(ccd) 73 | print(f'writing to {output_path}', flush=True) 74 | with output_path.open('wb') as f: 75 | pickle.dump(result, f) 76 | print('Done', flush=True) 77 | 78 | 79 | if __name__ == '__main__': 80 | main(sys.argv) 81 | -------------------------------------------------------------------------------- /src/alphafold3/constants/side_chains.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 DeepMind Technologies Limited 2 | # 3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of 4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/ 5 | # 6 | # To request access to the AlphaFold 3 model parameters, follow the process set 7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these 8 | # if received directly from Google. Use is subject to terms of use available at 9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md 10 | 11 | """Constants associated with side chains.""" 12 | 13 | from collections.abc import Mapping, Sequence 14 | import itertools 15 | 16 | # Format: The list for each AA type contains chi1, chi2, chi3, chi4 in 17 | # this order (or a relevant subset from chi1 onwards). ALA and GLY don't have 18 | # chi angles so their chi angle lists are empty. 19 | CHI_ANGLES_ATOMS: Mapping[str, Sequence[tuple[str, ...]]] = { 20 | 'ALA': [], 21 | # Chi5 in arginine is always 0 +- 5 degrees, so ignore it. 22 | 'ARG': [ 23 | ('N', 'CA', 'CB', 'CG'), 24 | ('CA', 'CB', 'CG', 'CD'), 25 | ('CB', 'CG', 'CD', 'NE'), 26 | ('CG', 'CD', 'NE', 'CZ'), 27 | ], 28 | 'ASN': [('N', 'CA', 'CB', 'CG'), ('CA', 'CB', 'CG', 'OD1')], 29 | 'ASP': [('N', 'CA', 'CB', 'CG'), ('CA', 'CB', 'CG', 'OD1')], 30 | 'CYS': [('N', 'CA', 'CB', 'SG')], 31 | 'GLN': [ 32 | ('N', 'CA', 'CB', 'CG'), 33 | ('CA', 'CB', 'CG', 'CD'), 34 | ('CB', 'CG', 'CD', 'OE1'), 35 | ], 36 | 'GLU': [ 37 | ('N', 'CA', 'CB', 'CG'), 38 | ('CA', 'CB', 'CG', 'CD'), 39 | ('CB', 'CG', 'CD', 'OE1'), 40 | ], 41 | 'GLY': [], 42 | 'HIS': [('N', 'CA', 'CB', 'CG'), ('CA', 'CB', 'CG', 'ND1')], 43 | 'ILE': [('N', 'CA', 'CB', 'CG1'), ('CA', 'CB', 'CG1', 'CD1')], 44 | 'LEU': [('N', 'CA', 'CB', 'CG'), ('CA', 'CB', 'CG', 'CD1')], 45 | 'LYS': [ 46 | ('N', 'CA', 'CB', 'CG'), 47 | ('CA', 'CB', 'CG', 'CD'), 48 | ('CB', 'CG', 'CD', 'CE'), 49 | ('CG', 'CD', 'CE', 'NZ'), 50 | ], 51 | 'MET': [ 52 | ('N', 'CA', 'CB', 'CG'), 53 | ('CA', 'CB', 'CG', 'SD'), 54 | ('CB', 'CG', 'SD', 'CE'), 55 | ], 56 | 'PHE': [('N', 'CA', 'CB', 'CG'), ('CA', 'CB', 'CG', 'CD1')], 57 | 'PRO': [('N', 'CA', 'CB', 'CG'), ('CA', 'CB', 'CG', 'CD')], 58 | 'SER': [('N', 'CA', 'CB', 'OG')], 59 | 'THR': [('N', 'CA', 'CB', 'OG1')], 60 | 'TRP': [('N', 'CA', 'CB', 'CG'), ('CA', 'CB', 'CG', 'CD1')], 61 | 'TYR': [('N', 'CA', 'CB', 'CG'), ('CA', 'CB', 'CG', 'CD1')], 62 | 'VAL': [('N', 'CA', 'CB', 'CG1')], 63 | } 64 | 65 | CHI_GROUPS_FOR_ATOM = {} 66 | for res_name, chi_angle_atoms_for_res in CHI_ANGLES_ATOMS.items(): 67 | for chi_group_i, chi_group in enumerate(chi_angle_atoms_for_res): 68 | for atom_i, atom in enumerate(chi_group): 69 | CHI_GROUPS_FOR_ATOM.setdefault((res_name, atom), []).append( 70 | (chi_group_i, atom_i) 71 | ) 72 | 73 | # Mapping from (residue_name, atom_name) pairs to the atom's chi group index 74 | # and atom index within that group. 75 | CHI_GROUPS_FOR_ATOM: Mapping[tuple[str, str], Sequence[tuple[int, int]]] = ( 76 | CHI_GROUPS_FOR_ATOM 77 | ) 78 | 79 | MAX_NUM_CHI_ANGLES: int = 4 80 | ATOMS_PER_CHI_ANGLE: int = 4 81 | 82 | # A list of atoms for each AA type that are involved in chi angle calculations. 83 | CHI_ATOM_SETS: Mapping[str, set[str]] = { 84 | residue_name: set(itertools.chain(*atoms)) 85 | for residue_name, atoms in CHI_ANGLES_ATOMS.items() 86 | } 87 | 88 | # If chi angles given in fixed-length array, this matrix determines how to mask 89 | # them for each AA type. The order is as per restype_order (see below). 90 | CHI_ANGLES_MASK: Sequence[Sequence[float]] = ( 91 | (0.0, 0.0, 0.0, 0.0), # ALA 92 | (1.0, 1.0, 1.0, 1.0), # ARG 93 | (1.0, 1.0, 0.0, 0.0), # ASN 94 | (1.0, 1.0, 0.0, 0.0), # ASP 95 | (1.0, 0.0, 0.0, 0.0), # CYS 96 | (1.0, 1.0, 1.0, 0.0), # GLN 97 | (1.0, 1.0, 1.0, 0.0), # GLU 98 | (0.0, 0.0, 0.0, 0.0), # GLY 99 | (1.0, 1.0, 0.0, 0.0), # HIS 100 | (1.0, 1.0, 0.0, 0.0), # ILE 101 | (1.0, 1.0, 0.0, 0.0), # LEU 102 | (1.0, 1.0, 1.0, 1.0), # LYS 103 | (1.0, 1.0, 1.0, 0.0), # MET 104 | (1.0, 1.0, 0.0, 0.0), # PHE 105 | (1.0, 1.0, 0.0, 0.0), # PRO 106 | (1.0, 0.0, 0.0, 0.0), # SER 107 | (1.0, 0.0, 0.0, 0.0), # THR 108 | (1.0, 1.0, 0.0, 0.0), # TRP 109 | (1.0, 1.0, 0.0, 0.0), # TYR 110 | (1.0, 0.0, 0.0, 0.0), # VAL 111 | ) 112 | -------------------------------------------------------------------------------- /src/alphafold3/cpp.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2024 DeepMind Technologies Limited 2 | // 3 | // AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of 4 | // this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/ 5 | // 6 | // To request access to the AlphaFold 3 model parameters, follow the process set 7 | // out at https://github.com/google-deepmind/alphafold3. You may only use these 8 | // if received directly from Google. Use is subject to terms of use available at 9 | // https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md 10 | 11 | #include "alphafold3/data/cpp/msa_profile_pybind.h" 12 | #include "alphafold3/model/mkdssp_pybind.h" 13 | #include "alphafold3/parsers/cpp/cif_dict_pybind.h" 14 | #include "alphafold3/parsers/cpp/fasta_iterator_pybind.h" 15 | #include "alphafold3/parsers/cpp/msa_conversion_pybind.h" 16 | #include "alphafold3/structure/cpp/aggregation_pybind.h" 17 | #include "alphafold3/structure/cpp/membership_pybind.h" 18 | #include "alphafold3/structure/cpp/mmcif_atom_site_pybind.h" 19 | #include "alphafold3/structure/cpp/mmcif_layout_pybind.h" 20 | #include "alphafold3/structure/cpp/mmcif_struct_conn_pybind.h" 21 | #include "alphafold3/structure/cpp/mmcif_utils_pybind.h" 22 | #include "alphafold3/structure/cpp/string_array_pybind.h" 23 | #include "pybind11/pybind11.h" 24 | 25 | namespace alphafold3 { 26 | namespace { 27 | 28 | // Include all modules as submodules to simplify building. 29 | PYBIND11_MODULE(cpp, m) { 30 | RegisterModuleCifDict(m.def_submodule("cif_dict")); 31 | RegisterModuleFastaIterator(m.def_submodule("fasta_iterator")); 32 | RegisterModuleMsaConversion(m.def_submodule("msa_conversion")); 33 | RegisterModuleMmcifLayout(m.def_submodule("mmcif_layout")); 34 | RegisterModuleMmcifStructConn(m.def_submodule("mmcif_struct_conn")); 35 | RegisterModuleMembership(m.def_submodule("membership")); 36 | RegisterModuleMmcifUtils(m.def_submodule("mmcif_utils")); 37 | RegisterModuleAggregation(m.def_submodule("aggregation")); 38 | RegisterModuleStringArray(m.def_submodule("string_array")); 39 | RegisterModuleMmcifAtomSite(m.def_submodule("mmcif_atom_site")); 40 | RegisterModuleMkdssp(m.def_submodule("mkdssp")); 41 | RegisterModuleMsaProfile(m.def_submodule("msa_profile")); 42 | } 43 | 44 | } // namespace 45 | } // namespace alphafold3 46 | -------------------------------------------------------------------------------- /src/alphafold3/data/__pycache__/featurisation.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/data/__pycache__/featurisation.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/data/__pycache__/msa.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/data/__pycache__/msa.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/data/__pycache__/msa_config.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/data/__pycache__/msa_config.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/data/__pycache__/msa_features.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/data/__pycache__/msa_features.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/data/__pycache__/parsers.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/data/__pycache__/parsers.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/data/__pycache__/pipeline.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/data/__pycache__/pipeline.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/data/__pycache__/structure_stores.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/data/__pycache__/structure_stores.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/data/__pycache__/template_realign.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/data/__pycache__/template_realign.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/data/__pycache__/templates.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/data/__pycache__/templates.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/data/cpp/msa_profile_pybind.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2024 DeepMind Technologies Limited 2 | // 3 | // AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of 4 | // this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/ 5 | // 6 | // To request access to the AlphaFold 3 model parameters, follow the process set 7 | // out at https://github.com/google-deepmind/alphafold3. You may only use these 8 | // if received directly from Google. Use is subject to terms of use available at 9 | // https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md 10 | 11 | #include 12 | 13 | #include "absl/strings/str_cat.h" 14 | #include "pybind11/cast.h" 15 | #include "pybind11/numpy.h" 16 | #include "pybind11/pybind11.h" 17 | 18 | namespace { 19 | 20 | namespace py = pybind11; 21 | 22 | py::array_t ComputeMsaProfile( 23 | const py::array_t& msa, int num_residue_types) { 24 | if (msa.size() == 0) { 25 | throw py::value_error("The MSA must be non-empty."); 26 | } 27 | if (msa.ndim() != 2) { 28 | throw py::value_error(absl::StrCat("The MSA must be rectangular, got ", 29 | msa.ndim(), "-dimensional MSA array.")); 30 | } 31 | const int msa_depth = msa.shape()[0]; 32 | const int sequence_length = msa.shape()[1]; 33 | 34 | py::array_t profile({sequence_length, num_residue_types}); 35 | std::fill(profile.mutable_data(), profile.mutable_data() + profile.size(), 36 | 0.0f); 37 | auto profile_unchecked = profile.mutable_unchecked<2>(); 38 | 39 | const double normalized_count = 1.0 / msa_depth; 40 | const int* msa_it = msa.data(); 41 | for (int row_index = 0; row_index < msa_depth; ++row_index) { 42 | for (int column_index = 0; column_index < sequence_length; ++column_index) { 43 | const int residue_code = *(msa_it++); 44 | if (residue_code < 0 || residue_code >= num_residue_types) { 45 | throw py::value_error( 46 | absl::StrCat("All residue codes must be positive and smaller than " 47 | "num_residue_types ", 48 | num_residue_types, ", got ", residue_code)); 49 | } 50 | profile_unchecked(column_index, residue_code) += normalized_count; 51 | } 52 | } 53 | return profile; 54 | } 55 | 56 | constexpr char kComputeMsaProfileDoc[] = R"( 57 | Computes MSA profile for the given encoded MSA. 58 | 59 | Args: 60 | msa: A Numpy array of shape (num_msa, num_res) with the integer coded MSA. 61 | num_residue_types: Integer that determines the number of unique residue types. 62 | This will determine the shape of the output profile. 63 | 64 | Returns: 65 | A float Numpy array of shape (num_res, num_residue_types) with residue 66 | frequency (residue type count normalized by MSA depth) for every column of the 67 | MSA. 68 | )"; 69 | 70 | } // namespace 71 | 72 | namespace alphafold3 { 73 | 74 | void RegisterModuleMsaProfile(pybind11::module m) { 75 | m.def("compute_msa_profile", &ComputeMsaProfile, py::arg("msa"), 76 | py::arg("num_residue_types"), py::doc(kComputeMsaProfileDoc + 1)); 77 | } 78 | 79 | } // namespace alphafold3 80 | -------------------------------------------------------------------------------- /src/alphafold3/data/cpp/msa_profile_pybind.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 DeepMind Technologies Limited 3 | * 4 | * AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of 5 | * this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/ 6 | * 7 | * To request access to the AlphaFold 3 model parameters, follow the process set 8 | * out at https://github.com/google-deepmind/alphafold3. You may only use these 9 | * if received directly from Google. Use is subject to terms of use available at 10 | * https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md 11 | */ 12 | 13 | #ifndef ALPHAFOLD3_SRC_ALPHAFOLD3_DATA_PYTHON_MSA_PROFILE_PYBIND_H_ 14 | #define ALPHAFOLD3_SRC_ALPHAFOLD3_DATA_PYTHON_MSA_PROFILE_PYBIND_H_ 15 | 16 | #include "pybind11/pybind11.h" 17 | 18 | namespace alphafold3 { 19 | 20 | void RegisterModuleMsaProfile(pybind11::module m); 21 | 22 | } 23 | 24 | #endif // ALPHAFOLD3_SRC_ALPHAFOLD3_DATA_PYTHON_MSA_PROFILE_PYBIND_H_ 25 | -------------------------------------------------------------------------------- /src/alphafold3/data/featurisation.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 DeepMind Technologies Limited 2 | # 3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of 4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/ 5 | # 6 | # To request access to the AlphaFold 3 model parameters, follow the process set 7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these 8 | # if received directly from Google. Use is subject to terms of use available at 9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md 10 | 11 | """AlphaFold 3 featurisation pipeline.""" 12 | 13 | from collections.abc import Sequence 14 | import datetime 15 | import time 16 | 17 | from alphafold3.common import folding_input 18 | from alphafold3.constants import chemical_components 19 | from alphafold3.model import features 20 | from alphafold3.model.pipeline import pipeline 21 | import numpy as np 22 | 23 | 24 | def validate_fold_input(fold_input: folding_input.Input): 25 | """Validates the fold input contains MSA and templates for featurisation.""" 26 | for i, chain in enumerate(fold_input.protein_chains): 27 | if chain.unpaired_msa is None: 28 | raise ValueError(f'Protein chain {i + 1} is missing unpaired MSA.') 29 | if chain.paired_msa is None: 30 | raise ValueError(f'Protein chain {i + 1} is missing paired MSA.') 31 | if chain.templates is None: 32 | raise ValueError(f'Protein chain {i + 1} is missing Templates.') 33 | for i, chain in enumerate(fold_input.rna_chains): 34 | if chain.unpaired_msa is None: 35 | raise ValueError(f'RNA chain {i + 1} is missing unpaired MSA.') 36 | 37 | 38 | def featurise_input( 39 | fold_input: folding_input.Input, 40 | ccd: chemical_components.Ccd, 41 | buckets: Sequence[int] | None, 42 | max_template_date: datetime.date | None = None, 43 | verbose: bool = False, 44 | ) -> Sequence[features.BatchDict]: 45 | """Featurise the folding input. 46 | 47 | Args: 48 | fold_input: The input to featurise. 49 | ccd: The chemical components dictionary. 50 | buckets: Bucket sizes to pad the data to, to avoid excessive re-compilation 51 | of the model. If None, calculate the appropriate bucket size from the 52 | number of tokens. If not None, must be a sequence of at least one integer, 53 | in strictly increasing order. Will raise an error if the number of tokens 54 | is more than the largest bucket size. 55 | max_template_date: Optional max template date to prevent data leakage in 56 | validation. 57 | verbose: Whether to print progress messages. 58 | 59 | Returns: 60 | A featurised batch for each rng_seed in the input. 61 | """ 62 | # import pdb; pdb.set_trace() 63 | validate_fold_input(fold_input) 64 | 65 | # Set up data pipeline for single use. 66 | data_pipeline = pipeline.WholePdbPipeline( 67 | config=pipeline.WholePdbPipeline.Config( 68 | buckets=buckets, max_template_date=max_template_date 69 | ), 70 | ) 71 | 72 | batches = [] 73 | for rng_seed in fold_input.rng_seeds: 74 | featurisation_start_time = time.time() 75 | if verbose: 76 | print(f'Featurising {fold_input.name} with rng_seed {rng_seed}.') 77 | batch = data_pipeline.process_item( 78 | fold_input=fold_input, 79 | ccd=ccd, 80 | random_state=np.random.RandomState(rng_seed), 81 | random_seed=rng_seed, 82 | ) 83 | if verbose: 84 | print( 85 | f'Featurising {fold_input.name} with rng_seed {rng_seed} ' 86 | f'took {time.time() - featurisation_start_time:.2f} seconds.' 87 | ) 88 | batches.append(batch) 89 | 90 | return batches 91 | -------------------------------------------------------------------------------- /src/alphafold3/data/msa_identifiers.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 DeepMind Technologies Limited 2 | # 3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of 4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/ 5 | # 6 | # To request access to the AlphaFold 3 model parameters, follow the process set 7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these 8 | # if received directly from Google. Use is subject to terms of use available at 9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md 10 | 11 | """Utilities for extracting identifiers from MSA sequence descriptions.""" 12 | 13 | import dataclasses 14 | import re 15 | 16 | 17 | # Sequences coming from UniProtKB database come in the 18 | # `db|UniqueIdentifier|EntryName` format, e.g. `tr|A0A146SKV9|A0A146SKV9_FUNHE` 19 | # or `sp|P0C2L1|A3X1_LOXLA` (for TREMBL/Swiss-Prot respectively). 20 | _UNIPROT_PATTERN = re.compile( 21 | r""" 22 | ^ 23 | # UniProtKB/TrEMBL or UniProtKB/Swiss-Prot 24 | (?:tr|sp) 25 | \| 26 | # A primary accession number of the UniProtKB entry. 27 | (?P[A-Za-z0-9]{6,10}) 28 | # Occasionally there is a _0 or _1 isoform suffix, which we ignore. 29 | (?:_\d)? 30 | \| 31 | # TREMBL repeats the accession ID here. Swiss-Prot has a mnemonic 32 | # protein ID code. 33 | (?:[A-Za-z0-9]+) 34 | _ 35 | # A mnemonic species identification code. 36 | (?P([A-Za-z0-9]){1,5}) 37 | # Small BFD uses a final value after an underscore, which we ignore. 38 | (?:_\d+)? 39 | $ 40 | """, 41 | re.VERBOSE, 42 | ) 43 | 44 | 45 | @dataclasses.dataclass(frozen=True) 46 | class Identifiers: 47 | species_id: str = '' 48 | 49 | 50 | def _parse_sequence_identifier(msa_sequence_identifier: str) -> Identifiers: 51 | """Gets species from an msa sequence identifier. 52 | 53 | The sequence identifier has the format specified by 54 | _UNIPROT_TREMBL_ENTRY_NAME_PATTERN or _UNIPROT_SWISSPROT_ENTRY_NAME_PATTERN. 55 | An example of a sequence identifier: `tr|A0A146SKV9|A0A146SKV9_FUNHE` 56 | 57 | Args: 58 | msa_sequence_identifier: a sequence identifier. 59 | 60 | Returns: 61 | An `Identifiers` instance with species_id. These 62 | can be empty in the case where no identifier was found. 63 | """ 64 | matches = re.search(_UNIPROT_PATTERN, msa_sequence_identifier.strip()) 65 | if matches: 66 | return Identifiers(species_id=matches.group('SpeciesIdentifier')) 67 | return Identifiers() 68 | 69 | 70 | def _extract_sequence_identifier(description: str) -> str | None: 71 | """Extracts sequence identifier from description. Returns None if no match.""" 72 | split_description = description.split() 73 | if split_description: 74 | return split_description[0].partition('/')[0] 75 | else: 76 | return None 77 | 78 | 79 | def get_identifiers(description: str) -> Identifiers: 80 | """Computes extra MSA features from the description.""" 81 | sequence_identifier = _extract_sequence_identifier(description) 82 | if sequence_identifier is None: 83 | return Identifiers() 84 | else: 85 | return _parse_sequence_identifier(sequence_identifier) 86 | -------------------------------------------------------------------------------- /src/alphafold3/data/msa_store.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 DeepMind Technologies Limited 2 | # 3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of 4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/ 5 | # 6 | # To request access to the AlphaFold 3 model parameters, follow the process set 7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these 8 | # if received directly from Google. Use is subject to terms of use available at 9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md 10 | 11 | """Interface and implementations for fetching MSA data.""" 12 | 13 | from collections.abc import Sequence 14 | from typing import Protocol, TypeAlias 15 | 16 | from alphafold3.data import msa 17 | from alphafold3.data import msa_config 18 | 19 | 20 | MsaErrors: TypeAlias = Sequence[tuple[msa_config.RunConfig, str]] 21 | 22 | 23 | class MsaProvider(Protocol): 24 | """Interface for providing Multiple Sequence Alignments.""" 25 | 26 | def __call__( 27 | self, 28 | query_sequence: str, 29 | chain_polymer_type: str, 30 | ) -> tuple[msa.Msa, MsaErrors]: 31 | """Retrieve MSA for the given polymer query_sequence. 32 | 33 | Args: 34 | query_sequence: The residue sequence of the polymer to search for. 35 | chain_polymer_type: The polymer type of the query_sequence. This must 36 | match the chain_polymer_type of the provider. 37 | 38 | Returns: 39 | A tuple containing the MSA and MsaErrors. MsaErrors is a Sequence 40 | containing a tuple for each msa_query that failed. Each tuple contains 41 | the failing query and the associated error message. 42 | """ 43 | 44 | 45 | class EmptyMsaProvider: 46 | """MSA provider that returns just the query sequence, useful for testing.""" 47 | 48 | def __init__(self, chain_polymer_type: str): 49 | self._chain_polymer_type = chain_polymer_type 50 | 51 | def __call__( 52 | self, query_sequence: str, chain_polymer_type: str 53 | ) -> tuple[msa.Msa, MsaErrors]: 54 | """Returns an MSA containing just the query sequence, never errors.""" 55 | if chain_polymer_type != self._chain_polymer_type: 56 | raise ValueError( 57 | f'EmptyMsaProvider of type {self._chain_polymer_type} called with ' 58 | f'sequence of {chain_polymer_type=}, {query_sequence=}.' 59 | ) 60 | return ( 61 | msa.Msa.from_empty( 62 | query_sequence=query_sequence, 63 | chain_poly_type=self._chain_polymer_type, 64 | ), 65 | (), 66 | ) 67 | -------------------------------------------------------------------------------- /src/alphafold3/data/structure_stores.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 DeepMind Technologies Limited 2 | # 3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of 4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/ 5 | # 6 | # To request access to the AlphaFold 3 model parameters, follow the process set 7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these 8 | # if received directly from Google. Use is subject to terms of use available at 9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md 10 | 11 | """Library for loading structure data from various sources.""" 12 | 13 | from collections.abc import Mapping, Sequence 14 | import functools 15 | import os 16 | import pathlib 17 | import tarfile 18 | 19 | 20 | class NotFoundError(KeyError): 21 | """Raised when the structure store doesn't contain the requested target.""" 22 | 23 | 24 | class StructureStore: 25 | """Handles the retrieval of mmCIF files from a filesystem.""" 26 | 27 | def __init__( 28 | self, 29 | structures: str | os.PathLike[str] | Mapping[str, str], 30 | ): 31 | """Initialises the instance. 32 | 33 | Args: 34 | structures: Path of the directory where the mmCIF files are or a Mapping 35 | from target name to mmCIF string. 36 | """ 37 | if isinstance(structures, Mapping): 38 | self._structure_mapping = structures 39 | self._structure_path = None 40 | self._structure_tar = None 41 | else: 42 | self._structure_mapping = None 43 | path_str = os.fspath(structures) 44 | if path_str.endswith('.tar'): 45 | self._structure_tar = tarfile.open(path_str, 'r') 46 | self._structure_path = None 47 | else: 48 | self._structure_path = pathlib.Path(structures) 49 | self._structure_tar = None 50 | 51 | @functools.cached_property 52 | def _tar_members(self) -> Mapping[str, tarfile.TarInfo]: 53 | assert self._structure_tar is not None 54 | return { 55 | path.stem: tarinfo 56 | for tarinfo in self._structure_tar.getmembers() 57 | if tarinfo.isfile() 58 | and (path := pathlib.Path(tarinfo.path.lower())).suffix == '.cif' 59 | } 60 | 61 | def get_mmcif_str(self, target_name: str) -> str: 62 | """Returns an mmCIF for a given `target_name`. 63 | 64 | Args: 65 | target_name: Name specifying the target mmCIF. 66 | 67 | Raises: 68 | NotFoundError: If the target is not found. 69 | """ 70 | if self._structure_mapping is not None: 71 | try: 72 | return self._structure_mapping[target_name] 73 | except KeyError as e: 74 | raise NotFoundError(f'{target_name=} not found') from e 75 | 76 | if self._structure_tar is not None: 77 | try: 78 | member = self._tar_members[target_name] 79 | if struct_file := self._structure_tar.extractfile(member): 80 | return struct_file.read().decode() 81 | else: 82 | raise NotFoundError(f'{target_name=} not found') 83 | except KeyError: 84 | raise NotFoundError(f'{target_name=} not found') from None 85 | 86 | filepath = self._structure_path / f'{target_name}.cif' 87 | try: 88 | return filepath.read_text() 89 | except FileNotFoundError as e: 90 | raise NotFoundError(f'{target_name=} not found at {filepath=}') from e 91 | 92 | def target_names(self) -> Sequence[str]: 93 | """Returns all targets in the store.""" 94 | if self._structure_mapping is not None: 95 | return [*self._structure_mapping.keys()] 96 | elif self._structure_tar is not None: 97 | return sorted(self._tar_members.keys()) 98 | elif self._structure_path is not None: 99 | return sorted([path.stem for path in self._structure_path.glob('*.cif')]) 100 | return () 101 | -------------------------------------------------------------------------------- /src/alphafold3/data/template_store.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 DeepMind Technologies Limited 2 | # 3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of 4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/ 5 | # 6 | # To request access to the AlphaFold 3 model parameters, follow the process set 7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these 8 | # if received directly from Google. Use is subject to terms of use available at 9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md 10 | 11 | """Interface and implementations for fetching templates data.""" 12 | 13 | from collections.abc import Mapping 14 | import datetime 15 | from typing import Any, Protocol, TypeAlias 16 | 17 | 18 | TemplateFeatures: TypeAlias = Mapping[str, Any] 19 | 20 | 21 | class TemplateFeatureProvider(Protocol): 22 | """Interface for providing Template Features.""" 23 | 24 | def __call__( 25 | self, 26 | sequence: str, 27 | release_date: datetime.date | None, 28 | include_ligand_features: bool = True, 29 | ) -> TemplateFeatures: 30 | """Retrieve template features for the given sequence and release_date. 31 | 32 | Args: 33 | sequence: The residue sequence of the query. 34 | release_date: The release_date of the template query, this is used to 35 | filter templates for training, ensuring that they do not leak structure 36 | information from the future. 37 | include_ligand_features: Whether to include ligand features. 38 | 39 | Returns: 40 | Template features: A mapping of template feature labels to features, which 41 | may be numpy arrays, bytes objects, or for the special case of label 42 | `ligand_features`, a nested feature map of labels to numpy arrays. 43 | 44 | Raises: 45 | TemplateRetrievalError if the template features were not found. 46 | """ 47 | -------------------------------------------------------------------------------- /src/alphafold3/data/tools/__pycache__/hmmalign.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/data/tools/__pycache__/hmmalign.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/data/tools/__pycache__/hmmbuild.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/data/tools/__pycache__/hmmbuild.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/data/tools/__pycache__/hmmsearch.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/data/tools/__pycache__/hmmsearch.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/data/tools/__pycache__/jackhmmer.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/data/tools/__pycache__/jackhmmer.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/data/tools/__pycache__/msa_tool.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/data/tools/__pycache__/msa_tool.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/data/tools/__pycache__/nhmmer.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/data/tools/__pycache__/nhmmer.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/data/tools/__pycache__/rdkit_utils.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/data/tools/__pycache__/rdkit_utils.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/data/tools/__pycache__/subprocess_utils.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/data/tools/__pycache__/subprocess_utils.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/data/tools/msa_tool.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 DeepMind Technologies Limited 2 | # 3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of 4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/ 5 | # 6 | # To request access to the AlphaFold 3 model parameters, follow the process set 7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these 8 | # if received directly from Google. Use is subject to terms of use available at 9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md 10 | 11 | """Defines protocol for MSA tools.""" 12 | 13 | import dataclasses 14 | from typing import Protocol 15 | 16 | 17 | @dataclasses.dataclass(frozen=True, slots=True, kw_only=True) 18 | class MsaToolResult: 19 | """The result of a MSA tool query.""" 20 | 21 | target_sequence: str 22 | e_value: float 23 | a3m: str 24 | 25 | 26 | class MsaTool(Protocol): 27 | """Interface for MSA tools.""" 28 | 29 | def query(self, target_sequence: str) -> MsaToolResult: 30 | """Runs the MSA tool on the target sequence.""" 31 | -------------------------------------------------------------------------------- /src/alphafold3/data/tools/subprocess_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 DeepMind Technologies Limited 2 | # 3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of 4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/ 5 | # 6 | # To request access to the AlphaFold 3 model parameters, follow the process set 7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these 8 | # if received directly from Google. Use is subject to terms of use available at 9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md 10 | 11 | """Helper functions for launching external tools.""" 12 | 13 | from collections.abc import Sequence 14 | import os 15 | import subprocess 16 | import time 17 | from typing import Any 18 | 19 | from absl import logging 20 | 21 | 22 | def create_query_fasta_file(sequence: str, path: str, linewidth: int = 80): 23 | """Creates a fasta file with the sequence with line width limit.""" 24 | with open(path, 'w') as f: 25 | f.write('>query\n') 26 | 27 | i = 0 28 | while i < len(sequence): 29 | f.write(f'{sequence[i:(i + linewidth)]}\n') 30 | i += linewidth 31 | 32 | 33 | def check_binary_exists(path: str, name: str) -> None: 34 | """Checks if a binary exists on the given path and raises otherwise.""" 35 | if not os.path.exists(path): 36 | raise RuntimeError(f'{name} binary not found at {path}') 37 | 38 | 39 | def run( 40 | cmd: Sequence[str], 41 | cmd_name: str, 42 | log_on_process_error: bool = False, 43 | log_stderr: bool = False, 44 | log_stdout: bool = False, 45 | max_out_streams_len: int | None = 500_000, 46 | **run_kwargs, 47 | ) -> subprocess.CompletedProcess[Any]: 48 | """Launches a subprocess, times it, and checks for errors. 49 | 50 | Args: 51 | cmd: Command to launch. 52 | cmd_name: Human-readable command name to be used in logs. 53 | log_on_process_error: Whether to use `logging.error` to log the process' 54 | stderr on failure. 55 | log_stderr: Whether to log the stderr of the command. 56 | log_stdout: Whether to log the stdout of the command. 57 | max_out_streams_len: Max length of prefix of stdout and stderr included in 58 | the exception message. Set to `None` to disable truncation. 59 | **run_kwargs: Any other kwargs for `subprocess.run`. 60 | 61 | Returns: 62 | The completed process object. 63 | 64 | Raises: 65 | RuntimeError: if the process completes with a non-zero return code. 66 | """ 67 | 68 | logging.info('Launching subprocess "%s"', ' '.join(cmd)) 69 | 70 | start_time = time.time() 71 | try: 72 | completed_process = subprocess.run( 73 | cmd, 74 | check=True, 75 | stderr=subprocess.PIPE, 76 | stdout=subprocess.PIPE, 77 | text=True, 78 | **run_kwargs, 79 | ) 80 | except subprocess.CalledProcessError as e: 81 | if log_on_process_error: 82 | # Logs have a 15k character limit, so log the error line by line. 83 | logging.error('%s failed. %s stderr begin:', cmd_name, cmd_name) 84 | for error_line in e.stderr.splitlines(): 85 | if stripped_error_line := error_line.strip(): 86 | logging.error(stripped_error_line) 87 | logging.error('%s stderr end.', cmd_name) 88 | 89 | error_msg = ( 90 | f'{cmd_name} failed' 91 | f'\nstdout:\n{e.stdout[:max_out_streams_len]}\n' 92 | f'\nstderr:\n{e.stderr[:max_out_streams_len]}' 93 | ) 94 | raise RuntimeError(error_msg) from e 95 | end_time = time.time() 96 | 97 | logging.info('Finished %s in %.3f seconds', cmd_name, end_time - start_time) 98 | stdout, stderr = completed_process.stdout, completed_process.stderr 99 | 100 | if log_stdout and stdout: 101 | logging.info('%s stdout:\n%s', cmd_name, stdout) 102 | 103 | if log_stderr and stderr: 104 | logging.info('%s stderr:\n%s', cmd_name, stderr) 105 | 106 | return completed_process 107 | -------------------------------------------------------------------------------- /src/alphafold3/jax/attention/__pycache__/attention.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/jax/attention/__pycache__/attention.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/jax/attention/__pycache__/attention_base.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/jax/attention/__pycache__/attention_base.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/jax/attention/__pycache__/flash_attention.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/jax/attention/__pycache__/flash_attention.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/jax/attention/__pycache__/xla_attention.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/jax/attention/__pycache__/xla_attention.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/jax/attention/attention_call_arg_specs.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 DeepMind Technologies Limited 2 | # 3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of 4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/ 5 | # 6 | # To request access to the AlphaFold 3 model parameters, follow the process set 7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these 8 | # if received directly from Google. Use is subject to terms of use available at 9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md 10 | 11 | """Attention call argument specifications. 12 | 13 | Attention argument specifications used by users of the library. 14 | They are the most important test cases, and also cases for optimize 15 | performance of via autotuning. 16 | """ 17 | 18 | from typing import Any 19 | 20 | import jax 21 | 22 | ShapedArray = jax.ShapeDtypeStruct 23 | 24 | 25 | def _make_argspec( 26 | *, 27 | q_shape, 28 | dtype, 29 | k_shape=None, 30 | v_shape=None, 31 | bias_shape=None, 32 | mask_shape=None, 33 | **kwargs, 34 | ) -> dict[str, Any]: 35 | """Make argspec from shapes and kwargs.""" 36 | if k_shape is None: 37 | k_shape = q_shape 38 | if v_shape is None: 39 | v_shape = k_shape 40 | 41 | return dict( 42 | query=ShapedArray(q_shape, dtype), 43 | key=ShapedArray(k_shape, dtype), 44 | value=ShapedArray(v_shape, dtype), 45 | bias=ShapedArray(bias_shape, dtype) if bias_shape is not None else None, 46 | mask=ShapedArray(mask_shape, 'bool_') if mask_shape is not None else None, 47 | **kwargs, 48 | ) 49 | 50 | 51 | # A subset of the full set of argument specifications. Useful for tap-tests and 52 | # microbenchmarks. 53 | CALL_ARG_SPECS = dict( 54 | vanilla_f32=_make_argspec(q_shape=(8, 1024, 4, 128), dtype='float32'), 55 | vanilla_bf16=_make_argspec(q_shape=(8, 1024, 4, 128), dtype='bfloat16'), 56 | alphafold=_make_argspec( 57 | q_shape=(384, 384, 4, 32), 58 | bias_shape=(1, 4, 384, 384), 59 | mask_shape=(384, 1, 1, 384), 60 | dtype='bfloat16', 61 | ), 62 | ) 63 | -------------------------------------------------------------------------------- /src/alphafold3/jax/common/__pycache__/array_view.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/jax/common/__pycache__/array_view.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/jax/common/__pycache__/precision.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/jax/common/__pycache__/precision.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/jax/common/__pycache__/triton_utils.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/jax/common/__pycache__/triton_utils.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/jax/common/precision.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 DeepMind Technologies Limited 2 | # 3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of 4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/ 5 | # 6 | # To request access to the AlphaFold 3 model parameters, follow the process set 7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these 8 | # if received directly from Google. Use is subject to terms of use available at 9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md 10 | 11 | """Precision classes and utilities.""" 12 | 13 | import enum 14 | 15 | import jax 16 | import jax.numpy as jnp 17 | 18 | 19 | @enum.unique 20 | class DotPrecision(enum.Enum): 21 | """Precision for `dot` operation. 22 | 23 | Naming scheme: {OPERAND_DTYPE}_{ACCUMULATOR_DTYPE}[_{NUM_PASSES}x] 24 | """ 25 | 26 | BF16_F32 = "bf16_f32" 27 | 28 | # GPU only precisions. 29 | F32_F32 = "f32_f32" # Full f32 precision (doesn't use TensorCores). 30 | TF32_F32 = "tf32_f32" # Equivalent to `DEFAULT`/`HIGH` on GPU. 31 | TF32_F32_3X = "tf32_f32_3x" 32 | F16_F16 = "f16_f16" 33 | F16_F32 = "f16_f32" 34 | 35 | @property 36 | def operand_dtype(self) -> jnp.dtype: 37 | match self: 38 | case DotPrecision.BF16_F32: 39 | return jnp.bfloat16 40 | case DotPrecision.F16_F16 | DotPrecision.F16_F32: 41 | return jnp.float16 42 | case _: 43 | return jnp.float32 44 | 45 | @property 46 | def accumulator_dtype(self) -> jnp.dtype: 47 | return jnp.float16 if (self == DotPrecision.F16_F16) else jnp.float32 48 | 49 | 50 | _JAX_GPU_PRECISION_MAP = { 51 | (jnp.float16, jax.lax.Precision.DEFAULT): DotPrecision.F16_F32, 52 | (jnp.bfloat16, jax.lax.Precision.DEFAULT): DotPrecision.BF16_F32, 53 | (jnp.float32, jax.lax.Precision.DEFAULT): DotPrecision.TF32_F32, 54 | (jnp.float32, jax.lax.Precision.HIGH): DotPrecision.TF32_F32, 55 | (jnp.float32, jax.lax.Precision.HIGHEST): DotPrecision.F32_F32, 56 | } 57 | 58 | _JAX_CPU_PRECISION_MAP = { 59 | (jnp.float16, jax.lax.Precision.DEFAULT): DotPrecision.F16_F32, 60 | (jnp.bfloat16, jax.lax.Precision.DEFAULT): DotPrecision.F32_F32, 61 | (jnp.float32, jax.lax.Precision.DEFAULT): DotPrecision.F32_F32, 62 | (jnp.float32, jax.lax.Precision.HIGH): DotPrecision.F32_F32, 63 | (jnp.float32, jax.lax.Precision.HIGHEST): DotPrecision.F32_F32, 64 | } 65 | 66 | 67 | def _create_jax_precision_map(): 68 | precision_map = {} 69 | for (dtype, jax_precision), dot_precision in _JAX_GPU_PRECISION_MAP.items(): 70 | precision_map[("gpu", jnp.dtype(dtype), jax_precision)] = dot_precision 71 | for (dtype, jax_precision), dot_precision in _JAX_CPU_PRECISION_MAP.items(): 72 | precision_map[("cpu", jnp.dtype(dtype), jax_precision)] = dot_precision 73 | return precision_map 74 | 75 | 76 | _JAX_PRECISION_MAP = _create_jax_precision_map() 77 | 78 | 79 | def get_equivalent_dot_precision( 80 | a_dtype: jnp.dtype, b_dtype: jnp.dtype, jax_precision: jax.lax.Precision 81 | ) -> DotPrecision: 82 | """Returns `DotPrecision` replicating default XLA behaviour.""" 83 | if a_dtype != b_dtype: 84 | raise ValueError("Cannot infer precision if operand types differ.") 85 | 86 | backend = jax.default_backend().lower() 87 | if (jax_precision != jax.lax.Precision.DEFAULT) and (a_dtype != jnp.float32): 88 | raise ValueError( 89 | "`jax.lax.Precision` values other than `DEFAULT` only have an effect if" 90 | " the operand type is `float32`." 91 | ) 92 | return _JAX_PRECISION_MAP[(backend, a_dtype, jax_precision)] 93 | -------------------------------------------------------------------------------- /src/alphafold3/jax/common/triton_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 DeepMind Technologies Limited 2 | # 3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of 4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/ 5 | # 6 | # To request access to the AlphaFold 3 model parameters, follow the process set 7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these 8 | # if received directly from Google. Use is subject to terms of use available at 9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md 10 | 11 | """Triton utils.""" 12 | 13 | from collections.abc import Callable, Mapping 14 | 15 | from alphafold3.jax.common import precision as precision_lib 16 | import jax 17 | import jax.numpy as jnp 18 | import triton 19 | import triton.language as tl 20 | 21 | 22 | _JNP_TO_TL_DTYPES: Mapping[jnp.dtype, tl.dtype] = { 23 | jnp.bool_: tl.int1, 24 | jnp.int8: tl.int8, 25 | jnp.int16: tl.int16, 26 | jnp.int32: tl.int32, 27 | jnp.int64: tl.int64, 28 | jnp.uint8: tl.uint8, 29 | jnp.uint16: tl.uint16, 30 | jnp.uint32: tl.uint32, 31 | jnp.uint64: tl.uint64, 32 | jnp.float16: tl.float16, 33 | jnp.bfloat16: tl.bfloat16, 34 | jnp.float32: tl.float32, 35 | jnp.float64: tl.float64, 36 | } 37 | 38 | 39 | def jnp_to_tl_dtype(jnp_dtype: jnp.dtype) -> tl.dtype: 40 | return _JNP_TO_TL_DTYPES[jnp_dtype] 41 | 42 | 43 | def get_tl_dot_fn( 44 | precision: precision_lib.DotPrecision, 45 | ) -> Callable[..., tl.tensor]: 46 | """Returns a tl `dot` implementation with the specified precision. 47 | 48 | Args: 49 | precision: The `dot` precision. 50 | """ 51 | if not is_precision_supported(precision): 52 | raise ValueError(f'Unsupported dot precision: {precision}') 53 | 54 | if precision == precision_lib.DotPrecision.TF32_F32_3X: 55 | return _dot_tf32_f32_3x 56 | 57 | in_dtype = jnp_to_tl_dtype(precision.operand_dtype) 58 | out_dtype = jnp_to_tl_dtype(precision.accumulator_dtype) 59 | allow_tf32 = precision == precision_lib.DotPrecision.TF32_F32 60 | 61 | @tl.core.extern 62 | def _dot_fn( 63 | a: tl.core.tensor, 64 | b: tl.core.tensor, 65 | *, 66 | trans_a: bool = False, 67 | trans_b: bool = False, 68 | _builder, 69 | ): 70 | if in_dtype == tl.float32: 71 | tl.static_assert(a.dtype == tl.float32, _builder=_builder) 72 | tl.static_assert(b.dtype == tl.float32, _builder=_builder) 73 | else: 74 | tl.static_assert(a.dtype.is_standard_floating(), _builder=_builder) 75 | tl.static_assert(b.dtype.is_standard_floating(), _builder=_builder) 76 | a = a.to(in_dtype, _builder=_builder) 77 | b = b.to(in_dtype, _builder=_builder) 78 | a = tl.trans(a, _builder=_builder) if trans_a else a 79 | b = tl.trans(b, _builder=_builder) if trans_b else b 80 | return tl.dot( 81 | a, b, allow_tf32=allow_tf32, out_dtype=out_dtype, _builder=_builder 82 | ) 83 | 84 | return _dot_fn 85 | 86 | 87 | def is_precision_supported(precision: precision_lib.DotPrecision) -> bool: 88 | return precision in { 89 | precision_lib.DotPrecision.F32_F32, 90 | precision_lib.DotPrecision.TF32_F32, 91 | precision_lib.DotPrecision.F16_F32, 92 | precision_lib.DotPrecision.BF16_F32, 93 | precision_lib.DotPrecision.TF32_F32_3X, 94 | } 95 | 96 | 97 | @triton.jit 98 | def _dot_tf32_f32_3x(a, b, trans_a=False, trans_b=False): 99 | """Perform the 3-pass tf32 dot function.""" 100 | tl.static_assert(a.dtype == tl.float32) 101 | tl.static_assert(b.dtype == tl.float32) 102 | a_ = (a.to(tl.uint32, bitcast=True) & 0xFFFFE000).to(tl.float32, bitcast=True) 103 | b_ = (b.to(tl.uint32, bitcast=True) & 0xFFFFE000).to(tl.float32, bitcast=True) 104 | a_err = a - a_ 105 | b_err = b - b_ 106 | if trans_a: 107 | a_ = tl.trans(a_) 108 | a_err = tl.trans(a_err) 109 | if trans_b: 110 | b_ = tl.trans(b_) 111 | b_err = tl.trans(b_err) 112 | # Add smallest terms first for better accuracy. 113 | return tl.dot(a_, b_, out_dtype=tl.float32) + ( 114 | tl.dot(a_, b_err, out_dtype=tl.float32) 115 | + tl.dot(a_err, b_, out_dtype=tl.float32) 116 | ) 117 | 118 | 119 | def has_triton_support() -> bool: 120 | """Returns True if Triton is supported by the default JAX device.""" 121 | if jax.default_backend() != 'gpu': 122 | return False 123 | 124 | # Only currently supported for Ampere and above. 125 | return float(jax.devices()[0].compute_capability) >= 8.0 126 | -------------------------------------------------------------------------------- /src/alphafold3/jax/gated_linear_unit/__pycache__/block.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/jax/gated_linear_unit/__pycache__/block.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/jax/gated_linear_unit/__pycache__/gated_linear_unit.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/jax/gated_linear_unit/__pycache__/gated_linear_unit.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/jax/gated_linear_unit/__pycache__/gated_linear_unit_base.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/jax/gated_linear_unit/__pycache__/gated_linear_unit_base.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/jax/gated_linear_unit/__pycache__/matmul_config.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/jax/gated_linear_unit/__pycache__/matmul_config.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/jax/gated_linear_unit/__pycache__/matmul_ext.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/jax/gated_linear_unit/__pycache__/matmul_ext.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/jax/gated_linear_unit/block.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 DeepMind Technologies Limited 2 | # 3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of 4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/ 5 | # 6 | # To request access to the AlphaFold 3 model parameters, follow the process set 7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these 8 | # if received directly from Google. Use is subject to terms of use available at 9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md 10 | 11 | """Pallas block load / store utilities.""" 12 | 13 | from collections.abc import Sequence 14 | from typing import Any, TypeAlias 15 | 16 | from alphafold3.jax.common import array_view 17 | import jax 18 | import jax.experimental 19 | from jax.experimental import pallas as pl 20 | import jax.numpy as jnp 21 | import jaxtyping 22 | from jaxtyping import Int # pylint: disable=g-importing-member 23 | import numpy as np 24 | import typeguard 25 | 26 | ArrayT: TypeAlias = Any 27 | ScalarInt: TypeAlias = ( 28 | Int[ArrayT, ""] | Int[np.generic, ""] | Int[jnp.generic, ""] 29 | ) 30 | 31 | 32 | @jaxtyping.jaxtyped(typechecker=typeguard.typechecked) 33 | def load_block( 34 | ref, 35 | idx: Sequence[int | ScalarInt], 36 | *, 37 | block_shape: Sequence[int | None], 38 | other=None, 39 | **kwargs, 40 | ) -> jax.Array: 41 | """Loads a block from the given `ref`, masking where necessary.""" 42 | idx, mask = _get_block_indexer_and_mask(ref, idx, block_shape=block_shape) 43 | if isinstance(ref, array_view.ArrayView): 44 | idx = ref[idx].offsets 45 | ref = ref.base 46 | other = None if mask is None else other 47 | with jax.experimental.enable_x64(): 48 | return pl.load(ref, idx, mask=mask, other=other, **kwargs) 49 | 50 | 51 | @jaxtyping.jaxtyped(typechecker=typeguard.typechecked) 52 | def store_block( 53 | ref, 54 | val: jax.Array, 55 | idx: Sequence[int | ScalarInt], 56 | *, 57 | block_shape: Sequence[int | None] | None = None, 58 | **kwargs, 59 | ): 60 | """Stores a block from the given `ref`, masking where necessary.""" 61 | if block_shape is None: 62 | block_shape = val.shape 63 | idx, mask = _get_block_indexer_and_mask(ref, idx, block_shape=block_shape) 64 | if isinstance(ref, array_view.ArrayView): 65 | idx = ref[idx].offsets 66 | ref = ref.base 67 | with jax.experimental.enable_x64(): 68 | pl.store(ref, idx, val.astype(ref.dtype), mask=mask, **kwargs) 69 | 70 | 71 | def in_bounds_mask( 72 | idx: Sequence[int | slice | pl.Slice | jax.Array], 73 | shape: Sequence[int], 74 | *, 75 | check: Sequence[bool] | None = None, 76 | ) -> jax.Array | None: 77 | """Returns a boolean mask denoting which indices are within bounds. 78 | 79 | Args: 80 | idx: Indices for each dimension. 81 | shape: Shape designating the valid bounds. 82 | check: Whether or not to check bounds in each dimension. Useful for ignoring 83 | indices known to be in bounds. Defaults to all True. 84 | """ 85 | if check is None: 86 | check = [True] * len(shape) 87 | 88 | # Remove `int` indexed dims (mask shape must match slice result shape). 89 | shape = [dim for i, dim in enumerate(shape) if not isinstance(idx[i], int)] 90 | check = [chk for i, chk in enumerate(check) if not isinstance(idx[i], int)] 91 | idx = [idx for idx in idx if not isinstance(idx, int)] 92 | 93 | mask = None 94 | for i, (dim_idx, dim, chk) in enumerate(zip(idx, shape, check, strict=True)): 95 | if not chk: 96 | continue 97 | 98 | if isinstance(dim_idx, slice): 99 | dim_idx = pl.Slice.from_slice(dim_idx, dim) 100 | if isinstance(dim_idx, pl.Slice): 101 | dim_idx = dim_idx.start + dim_idx.stride * jnp.arange(dim_idx.size) 102 | if dim_idx.ndim != 1: 103 | raise NotImplementedError("Only one-dimensional indices are supported.") 104 | 105 | bcast_axes = [a for a in range(len(shape)) if a != i] 106 | dim_mask = jnp.expand_dims(dim_idx < dim, bcast_axes) 107 | mask = dim_mask if mask is None else (mask & dim_mask) 108 | return mask 109 | 110 | 111 | def _get_block_indexer_and_mask( 112 | ref, idx: Sequence[int | ScalarInt], *, block_shape: Sequence[int | None] 113 | ) -> tuple[tuple[int | slice | pl.Slice, ...], jax.Array | None]: 114 | """Return indices and mask for loading / storing a block.""" 115 | shape = ref.shape 116 | idxs = [] 117 | check = [] 118 | for dim, block_idx, block_dim in zip(shape, idx, block_shape, strict=True): 119 | if block_dim is None: 120 | idxs.append(block_idx) 121 | check.append(False) 122 | else: 123 | idxs.append(pl.dslice(block_dim * block_idx, block_dim)) 124 | check.append(dim % block_dim != 0) 125 | 126 | return tuple(idxs), in_bounds_mask(idxs, shape, check=check) 127 | -------------------------------------------------------------------------------- /src/alphafold3/jax/gated_linear_unit/gated_linear_unit_base.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 DeepMind Technologies Limited 2 | # 3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of 4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/ 5 | # 6 | # To request access to the AlphaFold 3 model parameters, follow the process set 7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these 8 | # if received directly from Google. Use is subject to terms of use available at 9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md 10 | 11 | """Common types for gated linear unit kernels.""" 12 | 13 | import abc 14 | from collections.abc import Callable 15 | import functools 16 | from typing import Any 17 | 18 | import jax 19 | import jax.numpy as jnp 20 | import jaxtyping 21 | from jaxtyping import Array, Float # pylint: disable=g-importing-member,g-multiple-import 22 | import typeguard 23 | 24 | 25 | class GatedLinearUnit(abc.ABC): 26 | """Gated linear unit.""" 27 | 28 | def __call__( 29 | self, 30 | x: Float[Array, '*B M K'], 31 | weight: Float[Array, 'K 2 N'], 32 | *, 33 | activation: Callable[[jax.Array], jax.Array] | None = None, 34 | precision: jax.lax.Precision | None = None, 35 | **kwargs, 36 | ) -> Float[Array, '*B M N']: 37 | """Applies a gated linear unit (https://arxiv.org/abs/1612.08083). 38 | 39 | Computes `activation(x @ weight[:, 0]) * x @ weight[:, 1]`. 40 | 41 | Args: 42 | x: the input array. 43 | weight: the combined weight array. 44 | activation: optional activation function. 45 | precision: specifies the matrix multiplication precision. Either `None` 46 | (default), which means the default precision for the backend, or a 47 | `jax.lax.Precision` enum. 48 | **kwargs: additional keyword arguments. 49 | 50 | Returns: 51 | The output array. 52 | """ 53 | return self._fwd( 54 | x, weight, activation=activation, precision=precision, **kwargs 55 | ) 56 | 57 | # Default vmap rule. 58 | @property 59 | def vmap_rule_forward(self) -> Callable[..., Any]: 60 | def _vmap_rule( 61 | axis_size, in_batched, *args, fn: jax.custom_batching.custom_vmap 62 | ): 63 | sequential_vmap = jax.custom_batching.sequential_vmap(fn.fun) 64 | return sequential_vmap.vmap_rule(axis_size, in_batched, *args) 65 | 66 | return _vmap_rule 67 | 68 | def apply_vmap_rule_forward( 69 | self, fn: Callable[..., Any], **kwargs 70 | ) -> jax.custom_batching.custom_vmap: 71 | fn_closed = functools.partial(fn, **kwargs) 72 | fn_closed = jax.custom_batching.custom_vmap(fn_closed) 73 | vmap_rule = functools.partial(self.vmap_rule_forward, fn=fn_closed) 74 | fn_closed.def_vmap(vmap_rule) 75 | return fn_closed 76 | 77 | @abc.abstractmethod 78 | def _fwd( 79 | self, 80 | x: Float[Array, '*B M K'], 81 | weight: Float[Array, 'K 2 N'], 82 | *, 83 | activation: Callable[[jax.Array], jax.Array] | None, 84 | precision: jax.lax.Precision | None, 85 | ) -> Float[Array, '*B M N']: 86 | """Gated linear unit.""" 87 | ... 88 | 89 | 90 | @jaxtyping.jaxtyped(typechecker=typeguard.typechecked) 91 | def gated_linear_unit_xla( 92 | x: Float[Array, '*B M K'], 93 | weight: Float[Array, 'K 2 N'], 94 | *, 95 | activation: Callable[[jax.Array], jax.Array] | None = None, 96 | precision: jax.lax.Precision | None = None, 97 | ) -> Float[Array, '*B M N']: 98 | """Applies a gated linear unit (https://arxiv.org/abs/1612.08083). 99 | 100 | Computes `activation(x @ weight[:, 0]) * x @ weight[:, 1]`. 101 | 102 | This is SwiGLU when `activation=jax.nn.swish`, GEGLU when 103 | `activation=jax.nn.gelu`, REGLU when `activation=jax.nn.relu`, and GLU when 104 | `activation=jax.nn.sigmoid` (https://arxiv.org/abs/2002.05202). 105 | 106 | Args: 107 | x: the input array. 108 | weight: the combined weight array. 109 | activation: optional activation function. 110 | precision: specifies the matrix multiplication precision. Either `None` 111 | (default), which means the default precision for the backend, or a 112 | `jax.lax.Precision` enum. 113 | 114 | Returns: 115 | The output array. 116 | """ 117 | 118 | weight_reshaped = jax.lax.collapse( 119 | weight, start_dimension=-2, stop_dimension=None 120 | ) 121 | assert weight_reshaped.ndim == 2 122 | 123 | y = jnp.dot(x, weight_reshaped, precision=precision) 124 | 125 | # Apply activation and compute product of FP8/FP16/BF16 in FP32. 126 | y = y.astype(jnp.promote_types(x.dtype, jnp.float32)) 127 | a, b = jnp.split(y, 2, axis=-1) 128 | out = a * b if activation is None else activation(a) * b 129 | out = out.astype(x.dtype) 130 | return out 131 | -------------------------------------------------------------------------------- /src/alphafold3/jax/gated_linear_unit/matmul_config.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 DeepMind Technologies Limited 2 | # 3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of 4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/ 5 | # 6 | # To request access to the AlphaFold 3 model parameters, follow the process set 7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these 8 | # if received directly from Google. Use is subject to terms of use available at 9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md 10 | 11 | """Auto-tuned configs for matmul.""" 12 | 13 | import dataclasses 14 | import functools 15 | import math 16 | 17 | import jax 18 | from jax.experimental import pallas as pl 19 | 20 | 21 | @dataclasses.dataclass(frozen=True, kw_only=True) 22 | class Config: 23 | block_m: int 24 | block_n: int 25 | block_k: int 26 | num_warps: int 27 | num_stages: int 28 | 29 | 30 | @functools.cache 31 | def _get_best_block_size( 32 | m: int, n: int, k: int, core_count: int 33 | ) -> tuple[int, int, int]: 34 | """Returns the best block size for the given shape.""" 35 | min_block_dim = 32 36 | block_m = min(max(min_block_dim, pl.next_power_of_2(m)), 128) 37 | block_n = min(max(min_block_dim, pl.next_power_of_2(n)), 256) 38 | block_n = min(block_n, (128 * 128) // block_m) 39 | block_k = 32 40 | split_k = 1 41 | num_blocks = pl.cdiv(m, block_m) * pl.cdiv(n, block_n) 42 | while num_blocks < core_count: 43 | if block_m > min_block_dim: 44 | block_m //= 2 45 | num_blocks = pl.cdiv(m, block_m) * pl.cdiv(n, block_n) 46 | elif split_k * block_k < pl.next_power_of_2(k): 47 | split_k *= 2 48 | num_blocks *= 2 49 | else: 50 | break 51 | return block_m, block_n, block_k 52 | 53 | 54 | def _abstractify(x): 55 | return jax.api_util.shaped_abstractify(x) if isinstance(x, jax.Array) else x 56 | 57 | 58 | def get_config( 59 | x: jax.Array, w: jax.Array, core_count: int | None = None 60 | ) -> Config: 61 | """Returns a config for the given args.""" 62 | if core_count is None: 63 | core_count = jax.devices()[0].core_count 64 | x = _abstractify(x) 65 | w = _abstractify(w) 66 | m, k = math.prod(x.shape[:-1]), x.shape[-1] 67 | n = w.shape[1] 68 | if n >= m: # Prefer `block_n` > `block_m`. 69 | block_m, block_n, block_k = _get_best_block_size(m, n, k, core_count) 70 | else: 71 | block_n, block_m, block_k = _get_best_block_size(n, m, k, core_count) 72 | return Config( 73 | block_m=block_m, 74 | block_n=block_n // 2, # Halve `block_n` as we read two `w` blocks. 75 | block_k=block_k, 76 | num_warps=4, 77 | num_stages=4, 78 | ) 79 | -------------------------------------------------------------------------------- /src/alphafold3/jax/geometry/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 DeepMind Technologies Limited 2 | # 3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of 4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/ 5 | # 6 | # To request access to the AlphaFold 3 model parameters, follow the process set 7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these 8 | # if received directly from Google. Use is subject to terms of use available at 9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md 10 | 11 | """Geometry Module.""" 12 | 13 | from alphafold3.jax.geometry import rigid_matrix_vector 14 | from alphafold3.jax.geometry import rotation_matrix 15 | from alphafold3.jax.geometry import struct_of_array 16 | from alphafold3.jax.geometry import vector 17 | 18 | Rot3Array = rotation_matrix.Rot3Array 19 | Rigid3Array = rigid_matrix_vector.Rigid3Array 20 | 21 | StructOfArray = struct_of_array.StructOfArray 22 | 23 | Vec3Array = vector.Vec3Array 24 | square_euclidean_distance = vector.square_euclidean_distance 25 | euclidean_distance = vector.euclidean_distance 26 | dihedral_angle = vector.dihedral_angle 27 | dot = vector.dot 28 | cross = vector.cross 29 | -------------------------------------------------------------------------------- /src/alphafold3/jax/geometry/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/jax/geometry/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/jax/geometry/__pycache__/rigid_matrix_vector.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/jax/geometry/__pycache__/rigid_matrix_vector.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/jax/geometry/__pycache__/rotation_matrix.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/jax/geometry/__pycache__/rotation_matrix.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/jax/geometry/__pycache__/struct_of_array.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/jax/geometry/__pycache__/struct_of_array.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/jax/geometry/__pycache__/utils.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/jax/geometry/__pycache__/utils.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/jax/geometry/__pycache__/vector.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/jax/geometry/__pycache__/vector.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/model/__pycache__/confidence_types.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/model/__pycache__/confidence_types.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/model/__pycache__/confidences.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/model/__pycache__/confidences.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/model/__pycache__/data3.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/model/__pycache__/data3.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/model/__pycache__/data_constants.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/model/__pycache__/data_constants.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/model/__pycache__/feat_batch.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/model/__pycache__/feat_batch.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/model/__pycache__/features.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/model/__pycache__/features.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/model/__pycache__/merging_features.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/model/__pycache__/merging_features.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/model/__pycache__/mmcif_metadata.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/model/__pycache__/mmcif_metadata.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/model/__pycache__/model_config.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/model/__pycache__/model_config.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/model/__pycache__/msa_pairing.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/model/__pycache__/msa_pairing.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/model/__pycache__/params.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/model/__pycache__/params.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/model/__pycache__/post_processing.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/model/__pycache__/post_processing.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/model/__pycache__/protein_data_processing.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/model/__pycache__/protein_data_processing.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/model/atom_layout/__pycache__/atom_layout.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/model/atom_layout/__pycache__/atom_layout.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/model/components/__pycache__/base_model.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/model/components/__pycache__/base_model.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/model/components/__pycache__/haiku_modules.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/model/components/__pycache__/haiku_modules.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/model/components/__pycache__/mapping.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/model/components/__pycache__/mapping.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/model/components/__pycache__/utils.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/model/components/__pycache__/utils.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/model/components/base_model.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 DeepMind Technologies Limited 2 | # 3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of 4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/ 5 | # 6 | # To request access to the AlphaFold 3 model parameters, follow the process set 7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these 8 | # if received directly from Google. Use is subject to terms of use available at 9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md 10 | 11 | """Defines interface of a BaseModel.""" 12 | 13 | from collections.abc import Callable, Mapping 14 | import dataclasses 15 | from typing import Any, TypeAlias 16 | from alphafold3 import structure 17 | from alphafold3.model import features 18 | import haiku as hk 19 | import jax 20 | import numpy as np 21 | 22 | ModelResult: TypeAlias = Mapping[str, Any] 23 | ScalarNumberOrArray: TypeAlias = Mapping[str, float | int | np.ndarray] 24 | 25 | # Eval result will contain scalars (e.g. metrics or losses), selected from the 26 | # forward pass outputs or computed in the online evaluation; np.ndarrays or 27 | # jax.Arrays generated from the forward pass outputs (e.g. distogram expected 28 | # distances) or batch inputs; protein structures (predicted and ground-truth). 29 | EvalResultValue: TypeAlias = ( 30 | float | int | np.ndarray | jax.Array | structure.Structure 31 | ) 32 | # Eval result may be None for some metrics if they are not computable. 33 | EvalResults: TypeAlias = Mapping[str, EvalResultValue | None] 34 | # Interface metrics are all floats or None. 35 | InterfaceMetrics: TypeAlias = Mapping[str, float | None] 36 | # Interface results are a mapping from interface name to mappings from score 37 | # type to metric value. 38 | InterfaceResults: TypeAlias = Mapping[str, Mapping[str, InterfaceMetrics]] 39 | # Eval output consists of full eval results and a dict of interface metrics. 40 | EvalOutput: TypeAlias = tuple[EvalResults, InterfaceResults] 41 | 42 | # Signature for `apply` method of hk.transform_with_state called on a BaseModel. 43 | ForwardFn: TypeAlias = Callable[ 44 | [hk.Params, hk.State, jax.Array, features.BatchDict], 45 | tuple[ModelResult, hk.State], 46 | ] 47 | 48 | 49 | @dataclasses.dataclass(frozen=True) 50 | class InferenceResult: 51 | """Postprocessed model result.""" 52 | 53 | # Predicted protein structure. 54 | predicted_structure: structure.Structure = dataclasses.field() 55 | # Useful numerical data (scalars or arrays) to be saved at inference time. 56 | numerical_data: ScalarNumberOrArray = dataclasses.field(default_factory=dict) 57 | # Smaller numerical data (usually scalar) to be saved as inference metadata. 58 | metadata: ScalarNumberOrArray = dataclasses.field(default_factory=dict) 59 | # Additional dict for debugging, e.g. raw outputs of a model forward pass. 60 | debug_outputs: ModelResult | None = dataclasses.field(default_factory=dict) 61 | # Model identifier. 62 | model_id: bytes = b'' 63 | -------------------------------------------------------------------------------- /src/alphafold3/model/components/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 DeepMind Technologies Limited 2 | # 3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of 4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/ 5 | # 6 | # To request access to the AlphaFold 3 model parameters, follow the process set 7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these 8 | # if received directly from Google. Use is subject to terms of use available at 9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md 10 | 11 | """Utility functions for training AlphaFold and similar models.""" 12 | 13 | from collections import abc 14 | import contextlib 15 | import numbers 16 | 17 | from alphafold3.model import features 18 | import haiku as hk 19 | import jax.numpy as jnp 20 | import numpy as np 21 | 22 | 23 | VALID_DTYPES = [np.float32, np.float64, np.int8, np.int32, np.int64, bool] 24 | 25 | 26 | def remove_invalidly_typed_feats( 27 | batch: features.BatchDict, 28 | ) -> features.BatchDict: 29 | """Remove features of types we don't want to send to the TPU e.g. strings.""" 30 | return { 31 | k: v 32 | for k, v in batch.items() 33 | if hasattr(v, 'dtype') and v.dtype in VALID_DTYPES 34 | } 35 | 36 | 37 | def bfloat16_getter(next_getter, value, context): 38 | """Ensures that a bfloat16 parameter is provided by casting if necessary.""" 39 | if context.original_dtype == jnp.bfloat16: 40 | if value.dtype != jnp.bfloat16: 41 | value = value.astype(jnp.bfloat16) 42 | return next_getter(value) 43 | 44 | 45 | @contextlib.contextmanager 46 | def bfloat16_context(): 47 | with hk.custom_getter(bfloat16_getter): 48 | yield 49 | 50 | 51 | def mask_mean(mask, value, axis=None, keepdims=False, eps=1e-10): 52 | """Masked mean.""" 53 | 54 | mask_shape = mask.shape 55 | value_shape = value.shape 56 | 57 | assert len(mask_shape) == len( 58 | value_shape 59 | ), 'Shapes are not compatible, shapes: {}, {}'.format(mask_shape, value_shape) 60 | 61 | if isinstance(axis, numbers.Integral): 62 | axis = [axis] 63 | elif axis is None: 64 | axis = list(range(len(mask_shape))) 65 | assert isinstance( 66 | axis, abc.Iterable 67 | ), 'axis needs to be either an iterable, integer or "None"' 68 | 69 | broadcast_factor = 1.0 70 | for axis_ in axis: 71 | value_size = value_shape[axis_] 72 | mask_size = mask_shape[axis_] 73 | if mask_size == 1: 74 | broadcast_factor *= value_size 75 | else: 76 | error = f'Shapes are not compatible, shapes: {mask_shape}, {value_shape}' 77 | assert mask_size == value_size, error 78 | 79 | return jnp.sum(mask * value, keepdims=keepdims, axis=axis) / ( 80 | jnp.maximum( 81 | jnp.sum(mask, keepdims=keepdims, axis=axis) * broadcast_factor, eps 82 | ) 83 | ) 84 | -------------------------------------------------------------------------------- /src/alphafold3/model/data3.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 DeepMind Technologies Limited 2 | # 3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of 4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/ 5 | # 6 | # To request access to the AlphaFold 3 model parameters, follow the process set 7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these 8 | # if received directly from Google. Use is subject to terms of use available at 9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md 10 | 11 | """Protein features that are computed from parsed mmCIF objects.""" 12 | 13 | from collections.abc import Mapping, MutableMapping 14 | import datetime 15 | from typing import TypeAlias 16 | 17 | from alphafold3.constants import residue_names 18 | from alphafold3.cpp import msa_profile 19 | from alphafold3.model import protein_data_processing 20 | import numpy as np 21 | 22 | 23 | FeatureDict: TypeAlias = Mapping[str, np.ndarray] 24 | MutableFeatureDict: TypeAlias = MutableMapping[str, np.ndarray] 25 | 26 | 27 | def fix_features(msa_features: MutableFeatureDict) -> MutableFeatureDict: 28 | """Renames the deletion_matrix feature.""" 29 | msa_features['deletion_matrix'] = msa_features.pop('deletion_matrix_int') 30 | return msa_features 31 | 32 | 33 | def get_profile_features( 34 | msa: np.ndarray, deletion_matrix: np.ndarray 35 | ) -> FeatureDict: 36 | """Returns the MSA profile and deletion_mean features.""" 37 | num_restypes = residue_names.POLYMER_TYPES_NUM_WITH_UNKNOWN_AND_GAP 38 | profile = msa_profile.compute_msa_profile( 39 | msa=msa, num_residue_types=num_restypes 40 | ) 41 | 42 | return { 43 | 'profile': profile.astype(np.float32), 44 | 'deletion_mean': np.mean(deletion_matrix, axis=0), 45 | } 46 | 47 | 48 | def fix_template_features( 49 | sequence: str, 50 | template_features: FeatureDict, 51 | ) -> FeatureDict: 52 | """Convert template features to AlphaFold 3 format. 53 | 54 | Args: 55 | sequence: amino acid sequence of the protein. 56 | template_features: Template features for the protein. 57 | 58 | Returns: 59 | Updated template_features for the chain. 60 | """ 61 | num_res = len(sequence) 62 | if not template_features['template_aatype'].shape[0]: 63 | template_features = empty_template_features(num_res) 64 | else: 65 | template_release_timestamp = [ 66 | _get_timestamp(x.decode('utf-8')) 67 | for x in template_features['template_release_date'] 68 | ] 69 | 70 | # Convert from atom37 to dense atom 71 | dense_atom_indices = np.take( 72 | protein_data_processing.PROTEIN_AATYPE_DENSE_ATOM_TO_ATOM37, 73 | template_features['template_aatype'], 74 | axis=0, 75 | ) 76 | 77 | atom_mask = np.take_along_axis( 78 | template_features['template_all_atom_masks'], dense_atom_indices, axis=2 79 | ) 80 | atom_positions = np.take_along_axis( 81 | template_features['template_all_atom_positions'], 82 | dense_atom_indices[..., None], 83 | axis=2, 84 | ) 85 | atom_positions *= atom_mask[..., None] 86 | 87 | template_features = { 88 | 'template_aatype': template_features['template_aatype'], 89 | 'template_atom_mask': atom_mask.astype(np.int32), 90 | 'template_atom_positions': atom_positions.astype(np.float32), 91 | 'template_domain_names': np.array( 92 | template_features['template_domain_names'], dtype=object 93 | ), 94 | 'template_release_timestamp': np.array( 95 | template_release_timestamp, dtype=np.float32 96 | ), 97 | } 98 | return template_features 99 | 100 | 101 | def empty_template_features(num_res: int) -> FeatureDict: 102 | """Creates a fully masked out template features to allow padding to work. 103 | 104 | Args: 105 | num_res: The length of the target chain. 106 | 107 | Returns: 108 | Empty template features for the chain. 109 | """ 110 | template_features = { 111 | 'template_aatype': np.zeros(num_res, dtype=np.int32)[None, ...], 112 | 'template_atom_mask': np.zeros( 113 | (num_res, protein_data_processing.NUM_DENSE), dtype=np.int32 114 | )[None, ...], 115 | 'template_atom_positions': np.zeros( 116 | (num_res, protein_data_processing.NUM_DENSE, 3), dtype=np.float32 117 | )[None, ...], 118 | 'template_domain_names': np.array([b''], dtype=object), 119 | 'template_release_timestamp': np.array([0.0], dtype=np.float32), 120 | } 121 | return template_features 122 | 123 | 124 | def _get_timestamp(date_str: str): 125 | dt = datetime.datetime.fromisoformat(date_str) 126 | dt = dt.replace(tzinfo=datetime.timezone.utc) 127 | return dt.timestamp() 128 | -------------------------------------------------------------------------------- /src/alphafold3/model/data_constants.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 DeepMind Technologies Limited 2 | # 3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of 4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/ 5 | # 6 | # To request access to the AlphaFold 3 model parameters, follow the process set 7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these 8 | # if received directly from Google. Use is subject to terms of use available at 9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md 10 | 11 | """Constants shared across modules in the AlphaFold data pipeline.""" 12 | 13 | from alphafold3.constants import residue_names 14 | 15 | MSA_GAP_IDX = residue_names.PROTEIN_TYPES_ONE_LETTER_WITH_UNKNOWN_AND_GAP.index( 16 | '-' 17 | ) 18 | 19 | # Feature groups. 20 | NUM_SEQ_NUM_RES_MSA_FEATURES = ('msa', 'msa_mask', 'deletion_matrix') 21 | NUM_SEQ_MSA_FEATURES = ('msa_species_identifiers',) 22 | TEMPLATE_FEATURES = ( 23 | 'template_aatype', 24 | 'template_atom_positions', 25 | 'template_atom_mask', 26 | ) 27 | MSA_PAD_VALUES = {'msa': MSA_GAP_IDX, 'msa_mask': 1, 'deletion_matrix': 0} 28 | -------------------------------------------------------------------------------- /src/alphafold3/model/diffusion/__pycache__/atom_cross_attention.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/model/diffusion/__pycache__/atom_cross_attention.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/model/diffusion/__pycache__/confidence_head.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/model/diffusion/__pycache__/confidence_head.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/model/diffusion/__pycache__/diffusion_head.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/model/diffusion/__pycache__/diffusion_head.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/model/diffusion/__pycache__/diffusion_transformer.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/model/diffusion/__pycache__/diffusion_transformer.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/model/diffusion/__pycache__/distogram_head.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/model/diffusion/__pycache__/distogram_head.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/model/diffusion/__pycache__/featurization.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/model/diffusion/__pycache__/featurization.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/model/diffusion/__pycache__/model.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/model/diffusion/__pycache__/model.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/model/diffusion/__pycache__/modules.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/model/diffusion/__pycache__/modules.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/model/diffusion/__pycache__/template_modules.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/model/diffusion/__pycache__/template_modules.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/model/diffusion/distogram_head.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 DeepMind Technologies Limited 2 | # 3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of 4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/ 5 | # 6 | # To request access to the AlphaFold 3 model parameters, follow the process set 7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these 8 | # if received directly from Google. Use is subject to terms of use available at 9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md 10 | 11 | """Distogram head.""" 12 | 13 | from typing import Final 14 | 15 | from alphafold3.common import base_config 16 | from alphafold3.model import feat_batch 17 | from alphafold3.model import model_config 18 | from alphafold3.model.components import haiku_modules as hm 19 | import haiku as hk 20 | import jax 21 | import jax.numpy as jnp 22 | 23 | 24 | _CONTACT_THRESHOLD: Final[float] = 8.0 25 | _CONTACT_EPSILON: Final[float] = 1e-3 26 | 27 | 28 | class DistogramHead(hk.Module): 29 | """Distogram head.""" 30 | 31 | class Config(base_config.BaseConfig): 32 | first_break: float = 2.3125 33 | last_break: float = 21.6875 34 | num_bins: int = 64 35 | 36 | def __init__( 37 | self, 38 | config: Config, 39 | global_config: model_config.GlobalConfig, 40 | name='distogram_head', 41 | ): 42 | super().__init__(name=name) 43 | self.config = config 44 | self.global_config = global_config 45 | 46 | def __call__( 47 | self, 48 | batch: feat_batch.Batch, 49 | embeddings: dict[str, jnp.ndarray], 50 | ) -> dict[str, jnp.ndarray]: 51 | pair_act = embeddings['pair'] 52 | seq_mask = batch.token_features.mask.astype(bool) 53 | pair_mask = seq_mask[:, None] * seq_mask[None, :] 54 | 55 | left_half_logits = hm.Linear( 56 | self.config.num_bins, 57 | initializer=self.global_config.final_init, 58 | name='half_logits', 59 | )(pair_act) 60 | 61 | right_half_logits = left_half_logits 62 | logits = left_half_logits + jnp.swapaxes(right_half_logits, -2, -3) 63 | probs = jax.nn.softmax(logits, axis=-1) 64 | breaks = jnp.linspace( 65 | self.config.first_break, 66 | self.config.last_break, 67 | self.config.num_bins - 1, 68 | ) 69 | 70 | bin_tops = jnp.append(breaks, breaks[-1] + (breaks[-1] - breaks[-2])) 71 | threshold = _CONTACT_THRESHOLD + _CONTACT_EPSILON 72 | is_contact_bin = 1.0 * (bin_tops <= threshold) 73 | contact_probs = jnp.einsum( 74 | 'ijk,k->ij', probs, is_contact_bin, precision=jax.lax.Precision.HIGHEST 75 | ) 76 | contact_probs = pair_mask * contact_probs 77 | 78 | return { 79 | 'bin_edges': breaks, 80 | 'contact_probs': contact_probs, 81 | } 82 | -------------------------------------------------------------------------------- /src/alphafold3/model/feat_batch.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 DeepMind Technologies Limited 2 | # 3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of 4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/ 5 | # 6 | # To request access to the AlphaFold 3 model parameters, follow the process set 7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these 8 | # if received directly from Google. Use is subject to terms of use available at 9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md 10 | 11 | """Batch dataclass.""" 12 | from typing import Self 13 | 14 | from alphafold3.model import features 15 | import chex 16 | 17 | 18 | @chex.dataclass(mappable_dataclass=False, frozen=True) 19 | class Batch: 20 | """Dataclass containing batch.""" 21 | 22 | msa: features.MSA 23 | templates: features.Templates 24 | token_features: features.TokenFeatures 25 | ref_structure: features.RefStructure 26 | predicted_structure_info: features.PredictedStructureInfo 27 | polymer_ligand_bond_info: features.PolymerLigandBondInfo 28 | ligand_ligand_bond_info: features.LigandLigandBondInfo 29 | pseudo_beta_info: features.PseudoBetaInfo 30 | atom_cross_att: features.AtomCrossAtt 31 | convert_model_output: features.ConvertModelOutput 32 | frames: features.Frames 33 | 34 | @property 35 | def num_res(self) -> int: 36 | return self.token_features.aatype.shape[-1] 37 | 38 | @classmethod 39 | def from_data_dict(cls, batch: features.BatchDict) -> Self: 40 | """Construct batch object from dictionary.""" 41 | return cls( 42 | msa=features.MSA.from_data_dict(batch), 43 | templates=features.Templates.from_data_dict(batch), 44 | token_features=features.TokenFeatures.from_data_dict(batch), 45 | ref_structure=features.RefStructure.from_data_dict(batch), 46 | predicted_structure_info=features.PredictedStructureInfo.from_data_dict( 47 | batch 48 | ), 49 | polymer_ligand_bond_info=features.PolymerLigandBondInfo.from_data_dict( 50 | batch 51 | ), 52 | ligand_ligand_bond_info=features.LigandLigandBondInfo.from_data_dict( 53 | batch 54 | ), 55 | pseudo_beta_info=features.PseudoBetaInfo.from_data_dict(batch), 56 | atom_cross_att=features.AtomCrossAtt.from_data_dict(batch), 57 | convert_model_output=features.ConvertModelOutput.from_data_dict(batch), 58 | frames=features.Frames.from_data_dict(batch), 59 | ) 60 | 61 | def as_data_dict(self) -> features.BatchDict: 62 | """Converts batch object to dictionary.""" 63 | output = { 64 | **self.msa.as_data_dict(), 65 | **self.templates.as_data_dict(), 66 | **self.token_features.as_data_dict(), 67 | **self.ref_structure.as_data_dict(), 68 | **self.predicted_structure_info.as_data_dict(), 69 | **self.polymer_ligand_bond_info.as_data_dict(), 70 | **self.ligand_ligand_bond_info.as_data_dict(), 71 | **self.pseudo_beta_info.as_data_dict(), 72 | **self.atom_cross_att.as_data_dict(), 73 | **self.convert_model_output.as_data_dict(), 74 | **self.frames.as_data_dict(), 75 | } 76 | return output 77 | -------------------------------------------------------------------------------- /src/alphafold3/model/merging_features.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 DeepMind Technologies Limited 2 | # 3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of 4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/ 5 | # 6 | # To request access to the AlphaFold 3 model parameters, follow the process set 7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these 8 | # if received directly from Google. Use is subject to terms of use available at 9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md 10 | 11 | """Methods for merging existing features to create a new example. 12 | 13 | Covers: 14 | - Merging features across chains. 15 | - Merging the paired and unpaired parts of the MSA. 16 | """ 17 | 18 | from typing import TypeAlias 19 | 20 | from alphafold3.model import data_constants 21 | import jax.numpy as jnp 22 | import numpy as np 23 | 24 | NUM_SEQ_NUM_RES_MSA_FEATURES = data_constants.NUM_SEQ_NUM_RES_MSA_FEATURES 25 | NUM_SEQ_MSA_FEATURES = data_constants.NUM_SEQ_MSA_FEATURES 26 | MSA_PAD_VALUES = data_constants.MSA_PAD_VALUES 27 | 28 | 29 | xnp_ndarray: TypeAlias = np.ndarray | jnp.ndarray # pylint: disable=invalid-name 30 | BatchDict: TypeAlias = dict[str, xnp_ndarray] 31 | 32 | 33 | def _pad_features_to_max(feat_name: str, chains: list[BatchDict], axis: int): 34 | """Pad a set of features to the maximum size amongst all chains. 35 | 36 | Args: 37 | feat_name: The feature name to pad. 38 | chains: A list of chains with associated features. 39 | axis: Which axis to pad to the max. 40 | 41 | Returns: 42 | A list of features, all with the same size on the given axis. 43 | """ 44 | max_num_seq = np.max([chain[feat_name].shape[axis] for chain in chains]) 45 | 46 | padded_feats = [] 47 | for chain in chains: 48 | feat = chain[feat_name] 49 | 50 | padding = np.zeros_like(feat.shape) # pytype: disable=attribute-error 51 | padding[axis] = max_num_seq - feat.shape[axis] # pytype: disable=attribute-error 52 | padding = [(0, p) for p in padding] 53 | padded_feats.append( 54 | np.pad( 55 | feat, 56 | padding, 57 | mode='constant', 58 | constant_values=MSA_PAD_VALUES[feat_name], 59 | ) 60 | ) 61 | return padded_feats 62 | 63 | 64 | def merge_msa_features(feat_name: str, chains: list[BatchDict]) -> np.ndarray: 65 | """Merges MSA features with shape (NUM_SEQ, NUM_RES) across chains.""" 66 | expected_dtype = chains[0][feat_name].dtype 67 | if '_all_seq' in feat_name: 68 | return np.concatenate( 69 | [c.get(feat_name, np.array([], expected_dtype)) for c in chains], axis=1 70 | ) 71 | else: 72 | # Since each MSA can be of different lengths, we first need to pad them 73 | # all to the size of the largest MSA before concatenating. 74 | padded_feats = _pad_features_to_max(feat_name, chains, axis=0) 75 | return np.concatenate(padded_feats, axis=1) 76 | 77 | 78 | def merge_paired_and_unpaired_msa(example: BatchDict) -> BatchDict: 79 | """Concatenates the paired (all_seq) MSA features with the unpaired ones.""" 80 | new_example = dict(example) 81 | 82 | for feature_name in NUM_SEQ_NUM_RES_MSA_FEATURES + NUM_SEQ_MSA_FEATURES: 83 | if feature_name in example and feature_name + '_all_seq' in example: 84 | feat = example[feature_name] 85 | feat_all_seq = example[feature_name + '_all_seq'] 86 | merged_feat = np.concatenate([feat_all_seq, feat], axis=0) 87 | new_example[feature_name] = merged_feat 88 | 89 | new_example['num_alignments'] = np.array( 90 | new_example['msa'].shape[0], dtype=np.int32 91 | ) 92 | return new_example 93 | -------------------------------------------------------------------------------- /src/alphafold3/model/mkdssp_pybind.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2024 DeepMind Technologies Limited 2 | // 3 | // AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of 4 | // this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/ 5 | // 6 | // To request access to the AlphaFold 3 model parameters, follow the process set 7 | // out at https://github.com/google-deepmind/alphafold3. You may only use these 8 | // if received directly from Google. Use is subject to terms of use available at 9 | // https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md 10 | 11 | #include "alphafold3/model/mkdssp_pybind.h" 12 | 13 | #include 14 | 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | #include "absl/strings/string_view.h" 21 | #include "pybind11/pybind11.h" 22 | #include "pybind11/pytypes.h" 23 | 24 | namespace alphafold3 { 25 | namespace py = pybind11; 26 | 27 | void RegisterModuleMkdssp(pybind11::module m) { 28 | py::module site = py::module::import("site"); 29 | py::list paths = py::cast(site.attr("getsitepackages")()); 30 | // Find the first path that contains the libcifpp components.cif file. 31 | bool found = false; 32 | for (const auto& py_path : paths) { 33 | auto path_str = 34 | std::filesystem::path(py::cast(py_path)) / 35 | "share/libcifpp/components.cif"; 36 | if (std::filesystem::exists(path_str)) { 37 | setenv("LIBCIFPP_DATA_DIR", path_str.parent_path().c_str(), 0); 38 | found = true; 39 | break; 40 | } 41 | } 42 | if (!found) { 43 | throw py::type_error("Could not find the libcifpp components.cif file."); 44 | } 45 | m.def( 46 | "get_dssp", 47 | [](absl::string_view mmcif, int model_no, 48 | int min_poly_proline_stretch_length, 49 | bool calculate_surface_accessibility) { 50 | cif::file cif_file(mmcif.data(), mmcif.size()); 51 | dssp result(cif_file.front(), model_no, min_poly_proline_stretch_length, 52 | calculate_surface_accessibility); 53 | std::stringstream sstream; 54 | result.write_legacy_output(sstream); 55 | return sstream.str(); 56 | }, 57 | py::arg("mmcif"), py::arg("model_no") = 1, 58 | py::arg("min_poly_proline_stretch_length") = 3, 59 | py::arg("calculate_surface_accessibility") = false, 60 | py::doc("Gets secondary structure from an mmCIF file.")); 61 | } 62 | 63 | } // namespace alphafold3 64 | -------------------------------------------------------------------------------- /src/alphafold3/model/mkdssp_pybind.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 DeepMind Technologies Limited 3 | * 4 | * AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of 5 | * this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/ 6 | * 7 | * To request access to the AlphaFold 3 model parameters, follow the process set 8 | * out at https://github.com/google-deepmind/alphafold3. You may only use these 9 | * if received directly from Google. Use is subject to terms of use available at 10 | * https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md 11 | */ 12 | 13 | #ifndef ALPHAFOLD3_SRC_ALPHAFOLD3_MODEL_MKDSSP_PYBIND_H_ 14 | #define ALPHAFOLD3_SRC_ALPHAFOLD3_MODEL_MKDSSP_PYBIND_H_ 15 | 16 | 17 | #include "pybind11/pybind11.h" 18 | 19 | namespace alphafold3 { 20 | 21 | void RegisterModuleMkdssp(pybind11::module m); 22 | 23 | } 24 | 25 | 26 | #endif // ALPHAFOLD3_SRC_ALPHAFOLD3_MODEL_MKDSSP_PYBIND_H_ 27 | -------------------------------------------------------------------------------- /src/alphafold3/model/model_config.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 DeepMind Technologies Limited 2 | # 3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of 4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/ 5 | # 6 | # To request access to the AlphaFold 3 model parameters, follow the process set 7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these 8 | # if received directly from Google. Use is subject to terms of use available at 9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md 10 | 11 | """Config for the protein folding model and experiment.""" 12 | 13 | from collections.abc import Sequence 14 | from typing import Literal, TypeAlias 15 | 16 | from alphafold3.common import base_config 17 | from alphafold3.jax.attention import attention 18 | 19 | 20 | _Shape2DType: TypeAlias = tuple[int | None, int | None] 21 | 22 | 23 | class GlobalConfig(base_config.BaseConfig): 24 | bfloat16: Literal['all', 'none', 'intermediate'] = 'all' 25 | final_init: Literal['zeros', 'linear'] = 'zeros' 26 | pair_attention_chunk_size: Sequence[_Shape2DType] = ((1536, 128), (None, 32)) 27 | pair_transition_shard_spec: Sequence[_Shape2DType] = ( 28 | (2048, None), 29 | (None, 1024), 30 | ) 31 | # Note: flash_attention_implementation = 'xla' means no flash attention. 32 | flash_attention_implementation: attention.Implementation = 'triton' 33 | -------------------------------------------------------------------------------- /src/alphafold3/model/pipeline/__pycache__/inter_chain_bonds.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/model/pipeline/__pycache__/inter_chain_bonds.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/model/pipeline/__pycache__/pipeline.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/model/pipeline/__pycache__/pipeline.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/model/pipeline/__pycache__/structure_cleaning.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/model/pipeline/__pycache__/structure_cleaning.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/model/post_processing.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 DeepMind Technologies Limited 2 | # 3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of 4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/ 5 | # 6 | # To request access to the AlphaFold 3 model parameters, follow the process set 7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these 8 | # if received directly from Google. Use is subject to terms of use available at 9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md 10 | 11 | """Post-processing utilities for AlphaFold inference results.""" 12 | 13 | import dataclasses 14 | import datetime 15 | import os 16 | 17 | from alphafold3 import version 18 | from alphafold3.model import confidence_types 19 | from alphafold3.model import mmcif_metadata 20 | from alphafold3.model.components import base_model 21 | import numpy as np 22 | 23 | 24 | @dataclasses.dataclass(frozen=True, slots=True, kw_only=True) 25 | class ProcessedInferenceResult: 26 | """Stores attributes of a processed inference result. 27 | 28 | Attributes: 29 | cif: CIF file containing an inference result. 30 | mean_confidence_1d: Mean 1D confidence calculated from confidence_1d. 31 | ranking_score: Ranking score extracted from CIF metadata. 32 | structure_confidence_summary_json: Content of JSON file with structure 33 | confidences summary calculated from CIF file. 34 | structure_full_data_json: Content of JSON file with structure full 35 | confidences calculated from CIF file. 36 | model_id: Identifier of the model that produced the inference result. 37 | """ 38 | 39 | cif: bytes 40 | mean_confidence_1d: float 41 | ranking_score: float 42 | structure_confidence_summary_json: bytes 43 | structure_full_data_json: bytes 44 | model_id: bytes 45 | 46 | 47 | def post_process_inference_result( 48 | inference_result: base_model.InferenceResult, 49 | ) -> ProcessedInferenceResult: 50 | """Returns cif, confidence_1d_json, confidence_2d_json, mean_confidence_1d, and ranking confidence.""" 51 | 52 | # Add mmCIF metadata fields. 53 | timestamp = datetime.datetime.now().isoformat(sep=' ', timespec='seconds') 54 | cif_with_metadata = mmcif_metadata.add_metadata_to_mmcif( 55 | old_cif=inference_result.predicted_structure.to_mmcif_dict(), 56 | version=f'{version.__version__} @ {timestamp}', 57 | model_id=inference_result.model_id, 58 | ) 59 | cif = mmcif_metadata.add_legal_comment(cif_with_metadata.to_string()) 60 | cif = cif.encode('utf-8') 61 | confidence_1d = confidence_types.AtomConfidence.from_inference_result( 62 | inference_result 63 | ) 64 | mean_confidence_1d = np.mean(confidence_1d.confidence) 65 | structure_confidence_summary_json = ( 66 | confidence_types.StructureConfidenceSummary.from_inference_result( 67 | inference_result 68 | ) 69 | .to_json() 70 | .encode('utf-8') 71 | ) 72 | structure_full_data_json = ( 73 | confidence_types.StructureConfidenceFull.from_inference_result( 74 | inference_result 75 | ) 76 | .to_json() 77 | .encode('utf-8') 78 | ) 79 | return ProcessedInferenceResult( 80 | cif=cif, 81 | mean_confidence_1d=mean_confidence_1d, 82 | ranking_score=float(inference_result.metadata['ranking_score']), 83 | structure_confidence_summary_json=structure_confidence_summary_json, 84 | structure_full_data_json=structure_full_data_json, 85 | model_id=inference_result.model_id, 86 | ) 87 | 88 | 89 | def write_output( 90 | inference_result: base_model.InferenceResult, 91 | output_dir: os.PathLike[str] | str, 92 | terms_of_use: str | None = None, 93 | name: str | None = None, 94 | ) -> None: 95 | """Writes processed inference result to a directory.""" 96 | processed_result = post_process_inference_result(inference_result) 97 | 98 | prefix = f'{name}_' if name is not None else '' 99 | 100 | with open(os.path.join(output_dir, f'{prefix}model.cif'), 'wb') as f: 101 | f.write(processed_result.cif) 102 | 103 | with open( 104 | os.path.join(output_dir, f'{prefix}summary_confidences.json'), 'wb' 105 | ) as f: 106 | f.write(processed_result.structure_confidence_summary_json) 107 | 108 | with open(os.path.join(output_dir, f'{prefix}confidences.json'), 'wb') as f: 109 | f.write(processed_result.structure_full_data_json) 110 | 111 | if terms_of_use is not None: 112 | with open(os.path.join(output_dir, 'TERMS_OF_USE.md'), 'wt') as f: 113 | f.write(terms_of_use) 114 | -------------------------------------------------------------------------------- /src/alphafold3/model/scoring/__pycache__/alignment.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/model/scoring/__pycache__/alignment.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/model/scoring/__pycache__/covalent_bond_cleaning.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/model/scoring/__pycache__/covalent_bond_cleaning.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/model/scoring/__pycache__/scoring.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/model/scoring/__pycache__/scoring.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/model/scoring/scoring.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 DeepMind Technologies Limited 2 | # 3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of 4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/ 5 | # 6 | # To request access to the AlphaFold 3 model parameters, follow the process set 7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these 8 | # if received directly from Google. Use is subject to terms of use available at 9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md 10 | 11 | """Library of scoring methods of the model outputs.""" 12 | 13 | from alphafold3.model import protein_data_processing 14 | import jax.numpy as jnp 15 | import numpy as np 16 | 17 | 18 | Array = jnp.ndarray | np.ndarray 19 | 20 | 21 | def pseudo_beta_fn( 22 | aatype: Array, 23 | dense_atom_positions: Array, 24 | dense_atom_masks: Array, 25 | is_ligand: Array | None = None, 26 | use_jax: bool | None = True, 27 | ) -> tuple[Array, Array] | Array: 28 | """Create pseudo beta atom positions and optionally mask. 29 | 30 | Args: 31 | aatype: [num_res] amino acid types. 32 | dense_atom_positions: [num_res, NUM_DENSE, 3] vector of all atom positions. 33 | dense_atom_masks: [num_res, NUM_DENSE] mask. 34 | is_ligand: [num_res] flag if something is a ligand. 35 | use_jax: whether to use jax for the computations. 36 | 37 | Returns: 38 | Pseudo beta dense atom positions and the corresponding mask. 39 | """ 40 | if use_jax: 41 | xnp = jnp 42 | else: 43 | xnp = np 44 | 45 | if is_ligand is None: 46 | is_ligand = xnp.zeros_like(aatype) 47 | 48 | pseudobeta_index_polymer = xnp.take( 49 | protein_data_processing.RESTYPE_PSEUDOBETA_INDEX, aatype, axis=0 50 | ).astype(xnp.int32) 51 | 52 | pseudobeta_index = jnp.where( 53 | is_ligand, 54 | jnp.zeros_like(pseudobeta_index_polymer), 55 | pseudobeta_index_polymer, 56 | ) 57 | 58 | pseudo_beta = xnp.take_along_axis( 59 | dense_atom_positions, pseudobeta_index[..., None, None], axis=-2 60 | ) 61 | pseudo_beta = xnp.squeeze(pseudo_beta, axis=-2) 62 | 63 | pseudo_beta_mask = xnp.take_along_axis( 64 | dense_atom_masks, pseudobeta_index[..., None], axis=-1 65 | ).astype(xnp.float32) 66 | pseudo_beta_mask = xnp.squeeze(pseudo_beta_mask, axis=-1) 67 | 68 | return pseudo_beta, pseudo_beta_mask 69 | -------------------------------------------------------------------------------- /src/alphafold3/parsers/cpp/cif_dict_pybind.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 DeepMind Technologies Limited 3 | * 4 | * AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of 5 | * this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/ 6 | * 7 | * To request access to the AlphaFold 3 model parameters, follow the process set 8 | * out at https://github.com/google-deepmind/alphafold3. You may only use these 9 | * if received directly from Google. Use is subject to terms of use available at 10 | * https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md 11 | */ 12 | 13 | #ifndef ALPHAFOLD3_SRC_ALPHAFOLD3_PARSERS_PYTHON_CIF_DICT_PYBIND_H_ 14 | #define ALPHAFOLD3_SRC_ALPHAFOLD3_PARSERS_PYTHON_CIF_DICT_PYBIND_H_ 15 | 16 | #include "pybind11/pybind11.h" 17 | 18 | namespace alphafold3 { 19 | 20 | void RegisterModuleCifDict(pybind11::module m); 21 | 22 | } 23 | 24 | #endif // ALPHAFOLD3_SRC_ALPHAFOLD3_PARSERS_PYTHON_CIF_DICT_PYBIND_H_ 25 | -------------------------------------------------------------------------------- /src/alphafold3/parsers/cpp/fasta_iterator.pyi: -------------------------------------------------------------------------------- 1 | # Copyright 2024 DeepMind Technologies Limited 2 | # 3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of 4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/ 5 | # 6 | # To request access to the AlphaFold 3 model parameters, follow the process set 7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these 8 | # if received directly from Google. Use is subject to terms of use available at 9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md 10 | 11 | class FastaFileIterator: 12 | def __init__(self, fasta_path: str) -> None: ... 13 | def __iter__(self) -> FastaFileIterator: ... 14 | def __next__(self) -> tuple[str,str]: ... 15 | 16 | class FastaStringIterator: 17 | def __init__(self, fasta_string: str | bytes) -> None: ... 18 | def __iter__(self) -> FastaStringIterator: ... 19 | def __next__(self) -> tuple[str,str]: ... 20 | 21 | def parse_fasta(fasta_string: str | bytes) -> list[str]: ... 22 | def parse_fasta_include_descriptions(fasta_string: str | bytes) -> tuple[list[str],list[str]]: ... 23 | -------------------------------------------------------------------------------- /src/alphafold3/parsers/cpp/fasta_iterator_lib.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2024 DeepMind Technologies Limited 2 | // 3 | // AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of 4 | // this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/ 5 | // 6 | // To request access to the AlphaFold 3 model parameters, follow the process set 7 | // out at https://github.com/google-deepmind/alphafold3. You may only use these 8 | // if received directly from Google. Use is subject to terms of use available at 9 | // https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md 10 | 11 | #include "alphafold3/parsers/cpp/fasta_iterator_lib.h" 12 | 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | #include "absl/status/status.h" 20 | #include "absl/status/statusor.h" 21 | #include "absl/strings/ascii.h" 22 | #include "absl/strings/str_cat.h" 23 | #include "absl/strings/str_split.h" 24 | #include "absl/strings/string_view.h" 25 | #include "absl/strings/strip.h" 26 | 27 | namespace alphafold3 { 28 | 29 | // Parse FASTA string and return list of strings with amino acid sequences. 30 | // Returns a list of amino acid sequences only. 31 | std::vector ParseFasta(absl::string_view fasta_string) { 32 | std::vector sequences; 33 | std::string* sequence = nullptr; 34 | for (absl::string_view line_raw : absl::StrSplit(fasta_string, '\n')) { 35 | absl::string_view line = absl::StripAsciiWhitespace(line_raw); 36 | if (absl::ConsumePrefix(&line, ">")) { 37 | sequence = &sequences.emplace_back(); 38 | } else if (!line.empty() && sequence != nullptr) { 39 | absl::StrAppend(sequence, line); 40 | } 41 | } 42 | return sequences; 43 | } 44 | 45 | // Parse FASTA string and return list of strings with amino acid sequences. 46 | // Returns two lists: The first one with amino acid sequences, the second with 47 | // the descriptions associated with each sequence. 48 | std::pair, std::vector> 49 | ParseFastaIncludeDescriptions(absl::string_view fasta_string) { 50 | std::pair, std::vector> result; 51 | auto& [sequences, descriptions] = result; 52 | std::string* sequence = nullptr; 53 | for (absl::string_view line_raw : absl::StrSplit(fasta_string, '\n')) { 54 | absl::string_view line = absl::StripAsciiWhitespace(line_raw); 55 | if (absl::ConsumePrefix(&line, ">")) { 56 | descriptions.emplace_back(line); 57 | sequence = &sequences.emplace_back(); 58 | } else if (!line.empty() && sequence != nullptr) { 59 | absl::StrAppend(sequence, line); 60 | } 61 | } 62 | return result; 63 | } 64 | 65 | absl::StatusOr> FastaFileIterator::Next() { 66 | std::string line_str; 67 | while (std::getline(reader_, line_str)) { 68 | absl::string_view line = line_str; 69 | line = absl::StripAsciiWhitespace(line); 70 | if (absl::ConsumePrefix(&line, ">")) { 71 | if (!description_.has_value()) { 72 | description_ = line; 73 | } else { 74 | std::pair output(sequence_, *description_); 75 | description_ = line; 76 | sequence_ = ""; 77 | return output; 78 | } 79 | } else if (description_.has_value()) { 80 | absl::StrAppend(&sequence_, line); 81 | } 82 | } 83 | has_next_ = false; 84 | reader_.close(); 85 | if (description_.has_value()) { 86 | return std::pair(sequence_, *description_); 87 | } else { 88 | return absl::InvalidArgumentError( 89 | absl::StrCat("Invalid FASTA file: ", filename_)); 90 | } 91 | } 92 | 93 | absl::StatusOr> 94 | FastaStringIterator::Next() { 95 | size_t consumed = 0; 96 | for (absl::string_view line_raw : absl::StrSplit(fasta_string_, '\n')) { 97 | consumed += line_raw.size() + 1; // +1 for the newline character. 98 | absl::string_view line = absl::StripAsciiWhitespace(line_raw); 99 | if (absl::ConsumePrefix(&line, ">")) { 100 | if (!description_.has_value()) { 101 | description_ = line; 102 | } else { 103 | std::pair output(sequence_, *description_); 104 | description_ = line; 105 | sequence_ = ""; 106 | fasta_string_.remove_prefix(consumed); 107 | return output; 108 | } 109 | } else if (description_.has_value()) { 110 | absl::StrAppend(&sequence_, line); 111 | } 112 | } 113 | has_next_ = false; 114 | if (description_.has_value()) { 115 | return std::pair(sequence_, *description_); 116 | } else { 117 | return absl::InvalidArgumentError("Invalid FASTA string"); 118 | } 119 | } 120 | 121 | } // namespace alphafold3 122 | -------------------------------------------------------------------------------- /src/alphafold3/parsers/cpp/fasta_iterator_lib.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 DeepMind Technologies Limited 3 | * 4 | * AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of 5 | * this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/ 6 | * 7 | * To request access to the AlphaFold 3 model parameters, follow the process set 8 | * out at https://github.com/google-deepmind/alphafold3. You may only use these 9 | * if received directly from Google. Use is subject to terms of use available at 10 | * https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md 11 | */ 12 | 13 | // A C++ implementation of a FASTA parser. 14 | #ifndef ALPHAFOLD3_SRC_ALPHAFOLD3_PARSERS_PYTHON_FASTA_ITERATOR_LIB_H_ 15 | #define ALPHAFOLD3_SRC_ALPHAFOLD3_PARSERS_PYTHON_FASTA_ITERATOR_LIB_H_ 16 | 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | 24 | #include "absl/status/statusor.h" 25 | #include "absl/strings/string_view.h" 26 | 27 | namespace alphafold3 { 28 | 29 | // Parse FASTA string and return list of strings with amino acid sequences. 30 | // Returns a list of amino acid sequences only. 31 | std::vector ParseFasta(absl::string_view fasta_string); 32 | 33 | // Parse FASTA string and return list of strings with amino acid sequences. 34 | // Returns two lists: The first one with amino acid sequences, the second with 35 | // the descriptions associated with each sequence. 36 | std::pair, std::vector> 37 | ParseFastaIncludeDescriptions(absl::string_view fasta_string); 38 | 39 | // Lazy FASTA parser for memory efficient FASTA parsing from a path. 40 | class FastaFileIterator { 41 | public: 42 | // Initialise FastaFileIterator with filename of fasta. If you initialize 43 | // reader_ with an invalid path or empty file, it won't fail, only 44 | // riegeli::ReadLine within the Next method will then return false. That will 45 | // then trigger the "Invalid FASTA file" error. 46 | explicit FastaFileIterator(absl::string_view fasta_path) 47 | : filename_(fasta_path), 48 | reader_(filename_, std::ios::in), 49 | has_next_(true) {} 50 | 51 | // Returns whether there are more sequences. Returns true before first call to 52 | // next even if the file is empty. 53 | bool HasNext() const { return has_next_; } 54 | 55 | // Fetches the next (sequence, description) from the file. 56 | absl::StatusOr> Next(); 57 | 58 | private: 59 | // Use riegeli::FileReader instead of FileLineIterator for about 2x speedup. 60 | std::string filename_; 61 | std::fstream reader_; 62 | std::optional description_; 63 | std::string sequence_; 64 | bool has_next_; 65 | }; 66 | 67 | // Lazy FASTA parser for memory efficient FASTA parsing from a string. 68 | class FastaStringIterator { 69 | public: 70 | // Initialise FastaStringIterator with a string_view of a FASTA. If you 71 | // initialize it with an invalid FASTA string, it won't fail, the Next method 72 | // will then return false. That will then trigger the "Invalid FASTA" error. 73 | // WARNING: The object backing the fasta_string string_view must not be 74 | // deleted while this Iterator is alive. 75 | explicit FastaStringIterator(absl::string_view fasta_string) 76 | : fasta_string_(fasta_string), has_next_(true) {} 77 | 78 | // Returns whether there are more sequences. Returns true before first call to 79 | // next even if the string is empty. 80 | bool HasNext() const { return has_next_; } 81 | 82 | // Fetches the next (sequence, description) from the string. 83 | absl::StatusOr> Next(); 84 | 85 | private: 86 | absl::string_view fasta_string_; 87 | bool has_next_; 88 | std::optional description_; 89 | std::string sequence_; 90 | }; 91 | 92 | } // namespace alphafold3 93 | 94 | #endif // ALPHAFOLD3_SRC_ALPHAFOLD3_PARSERS_PYTHON_FASTA_ITERATOR_LIB_H_ 95 | -------------------------------------------------------------------------------- /src/alphafold3/parsers/cpp/fasta_iterator_pybind.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2024 DeepMind Technologies Limited 2 | // 3 | // AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of 4 | // this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/ 5 | // 6 | // To request access to the AlphaFold 3 model parameters, follow the process set 7 | // out at https://github.com/google-deepmind/alphafold3. You may only use these 8 | // if received directly from Google. Use is subject to terms of use available at 9 | // https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md 10 | 11 | #include 12 | 13 | #include "absl/status/statusor.h" 14 | #include "absl/strings/string_view.h" 15 | #include "alphafold3/parsers/cpp/fasta_iterator_lib.h" 16 | #include "pybind11/attr.h" 17 | #include "pybind11/pybind11.h" 18 | #include "pybind11/pytypes.h" 19 | #include "pybind11/stl.h" 20 | 21 | namespace alphafold3 { 22 | namespace { 23 | 24 | namespace py = pybind11; 25 | 26 | template 27 | T ValueOrThrowValueError(absl::StatusOr value) { 28 | if (!value.ok()) throw py::value_error(value.status().ToString()); 29 | return *std::move(value); 30 | } 31 | 32 | constexpr char kFastaFileIteratorDoc[] = R"( 33 | Lazy FASTA parser for memory efficient FASTA parsing from a path.)"; 34 | 35 | constexpr char kFastaStringIteratorDoc[] = R"( 36 | Lazy FASTA parser for memory efficient FASTA parsing from a string. 37 | 38 | WARNING: The object backing the fasta_string string_view must not be 39 | deleted while the FastaStringIterator is alive. E.g. this will break: 40 | 41 | ``` 42 | # Make sure the fasta_string is not interned. 43 | fasta_string = '\n'.join(['>d\nS' for _ in range(10)]) 44 | iterator = fasta_iterator.FastaStringIterator(fasta_string) 45 | del fasta_string 46 | iterator.next() # Heap use-after-free. 47 | ``` 48 | )"; 49 | 50 | constexpr char kParseFastaDoc[] = R"( 51 | Parses a FASTA string and returns a list of amino-acid sequences. 52 | 53 | Args: 54 | fasta_string: The contents of a FASTA file. 55 | 56 | Returns: 57 | List of sequences in the FASTA file. Descriptions are ignored. 58 | )"; 59 | 60 | constexpr char kParseFastaIncludeDescriptionsDoc[] = R"( 61 | Parses a FASTA string, returns amino-acid sequences with descriptions. 62 | 63 | Args: 64 | fasta_string: The contents of a FASTA file. 65 | 66 | Returns: 67 | A tuple with two lists (sequences, descriptions): 68 | * A list of sequences. 69 | * A list of sequence descriptions taken from the comment lines. In the 70 | same order as the sequences. 71 | )"; 72 | 73 | class PythonFastaStringIterator : public FastaStringIterator { 74 | public: 75 | explicit PythonFastaStringIterator(py::object fasta_string) 76 | : FastaStringIterator(py::cast(fasta_string)), 77 | fasta_string_(std::move(fasta_string)) {} 78 | 79 | private: 80 | py::object fasta_string_; 81 | }; 82 | 83 | } // namespace 84 | 85 | void RegisterModuleFastaIterator(pybind11::module m) { 86 | py::class_(m, "FastaFileIterator", kFastaFileIteratorDoc) 87 | .def(py::init(), py::arg("fasta_path")) 88 | .def("__iter__", 89 | [](FastaFileIterator& iterator) -> FastaFileIterator& { 90 | return iterator; 91 | }) 92 | .def( 93 | "__next__", 94 | [](FastaFileIterator& iterator) { 95 | if (iterator.HasNext()) { 96 | return ValueOrThrowValueError(iterator.Next()); 97 | } else { 98 | throw py::stop_iteration(); 99 | } 100 | }, 101 | py::call_guard()); 102 | 103 | py::class_(m, "FastaStringIterator", 104 | kFastaStringIteratorDoc) 105 | .def(py::init(), py::arg("fasta_string")) 106 | .def("__iter__", 107 | [](PythonFastaStringIterator& iterator) 108 | -> PythonFastaStringIterator& { return iterator; }) 109 | .def( 110 | "__next__", 111 | [](PythonFastaStringIterator& iterator) { 112 | if (iterator.HasNext()) { 113 | return ValueOrThrowValueError(iterator.Next()); 114 | } else { 115 | throw py::stop_iteration(); 116 | } 117 | }, 118 | py::call_guard()); 119 | 120 | m.def("parse_fasta", &ParseFasta, py::arg("fasta_string"), 121 | py::call_guard(), py::doc(kParseFastaDoc + 1)); 122 | m.def("parse_fasta_include_descriptions", &ParseFastaIncludeDescriptions, 123 | py::arg("fasta_string"), py::call_guard(), 124 | py::doc(kParseFastaIncludeDescriptionsDoc + 1)); 125 | } 126 | 127 | } // namespace alphafold3 128 | -------------------------------------------------------------------------------- /src/alphafold3/parsers/cpp/fasta_iterator_pybind.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 DeepMind Technologies Limited 3 | * 4 | * AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of 5 | * this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/ 6 | * 7 | * To request access to the AlphaFold 3 model parameters, follow the process set 8 | * out at https://github.com/google-deepmind/alphafold3. You may only use these 9 | * if received directly from Google. Use is subject to terms of use available at 10 | * https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md 11 | */ 12 | 13 | #ifndef ALPHAFOLD3_SRC_ALPHAFOLD3_PARSERS_PYTHON_FASTA_ITERATOR_PYBIND_H_ 14 | #define ALPHAFOLD3_SRC_ALPHAFOLD3_PARSERS_PYTHON_FASTA_ITERATOR_PYBIND_H_ 15 | 16 | #include "pybind11/pybind11.h" 17 | 18 | namespace alphafold3 { 19 | 20 | void RegisterModuleFastaIterator(pybind11::module m); 21 | 22 | } 23 | 24 | #endif // ALPHAFOLD3_SRC_ALPHAFOLD3_PARSERS_PYTHON_FASTA_ITERATOR_PYBIND_H_ 25 | -------------------------------------------------------------------------------- /src/alphafold3/parsers/cpp/msa_conversion.pyi: -------------------------------------------------------------------------------- 1 | # Copyright 2024 DeepMind Technologies Limited 2 | # 3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of 4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/ 5 | # 6 | # To request access to the AlphaFold 3 model parameters, follow the process set 7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these 8 | # if received directly from Google. Use is subject to terms of use available at 9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md 10 | 11 | """Type annotations for Python bindings for `msa_conversion`. 12 | 13 | The type annotations in this file were modified from the automatically generated 14 | stubgen output. 15 | """ 16 | 17 | from collections.abc import Iterable 18 | 19 | 20 | def align_sequence_to_gapless_query( 21 | sequence: str | bytes, 22 | query_sequence: str | bytes, 23 | ) -> str: ... 24 | 25 | 26 | def convert_a3m_to_stockholm(a3m_sequences: Iterable[str]) -> list[str]: ... 27 | -------------------------------------------------------------------------------- /src/alphafold3/parsers/cpp/msa_conversion_pybind.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 DeepMind Technologies Limited 3 | * 4 | * AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of 5 | * this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/ 6 | * 7 | * To request access to the AlphaFold 3 model parameters, follow the process set 8 | * out at https://github.com/google-deepmind/alphafold3. You may only use these 9 | * if received directly from Google. Use is subject to terms of use available at 10 | * https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md 11 | */ 12 | 13 | #ifndef ALPHAFOLD3_SRC_ALPHAFOLD3_PARSERS_PYTHON_MSA_CONVERSION_PYBIND_H_ 14 | #define ALPHAFOLD3_SRC_ALPHAFOLD3_PARSERS_PYTHON_MSA_CONVERSION_PYBIND_H_ 15 | 16 | #include "pybind11/pybind11.h" 17 | 18 | namespace alphafold3 { 19 | 20 | void RegisterModuleMsaConversion(pybind11::module m); 21 | 22 | } 23 | 24 | #endif // ALPHAFOLD3_SRC_ALPHAFOLD3_PARSERS_PYTHON_MSA_CONVERSION_PYBIND_H_ 25 | -------------------------------------------------------------------------------- /src/alphafold3/scripts/copy_to_ssd.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2024 DeepMind Technologies Limited 3 | # 4 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of 5 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/ 6 | # 7 | # To request access to the AlphaFold 3 model parameters, follow the process set 8 | # out at https://github.com/google-deepmind/alphafold3. You may only use these 9 | # if received directly from Google. Use is subject to terms of use available at 10 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md 11 | 12 | set -euo pipefail 13 | 14 | readonly SOURCE_DIR=${1:-$HOME/public_databases} 15 | readonly TARGET_DIR=${2:-/mnt/disks/ssd/public_databases} 16 | 17 | mkdir -p "${TARGET_DIR}" 18 | 19 | FILES=(pdb_seqres_2022_09_28.fasta \ 20 | uniprot_all_2021_04.fa \ 21 | mgy_clusters_2022_05.fa \ 22 | uniref90_2022_05.fa \ 23 | bfd-first_non_consensus_sequences.fasta \ 24 | rfam_14_9_clust_seq_id_90_cov_80_rep_seq.fasta \ 25 | nt_rna_2023_02_23_clust_seq_id_90_cov_80_rep_seq.fasta \ 26 | rnacentral_active_seq_id_90_cov_80_linclust.fasta) 27 | 28 | NOT_COPIED_FILES=() 29 | 30 | while (( ${#FILES[@]} )); do 31 | # Get total size of files to copy in bytes 32 | SOURCE_FILES=( "${FILES[@]/#/${SOURCE_DIR}/}" ) 33 | TOTAL_SIZE=$(du -sbc "${SOURCE_FILES[@]}" | awk 'END{print $1}') 34 | 35 | # Get available space on target drive in bytes 36 | AVAILABLE_SPACE=$(df --portability --block-size=1 "$TARGET_DIR" | awk 'END{print $4}') 37 | 38 | # Compare sizes and copy if enough space 39 | if (( TOTAL_SIZE <= AVAILABLE_SPACE )); then 40 | printf 'Copying files... %s\n' "${FILES[@]}" 41 | echo "From ${SOURCE_DIR} -> ${TARGET_DIR}" 42 | 43 | for file in "${FILES[@]}"; do 44 | cp -r "${SOURCE_DIR}/${file}" "${TARGET_DIR}/" & 45 | done 46 | break 47 | else 48 | NOT_COPIED_FILES+=("${FILES[-1]}") 49 | unset 'FILES[-1]' 50 | fi 51 | done 52 | 53 | printf 'No room left on ssd for: %s\n' "${NOT_COPIED_FILES[@]}" 54 | wait 55 | -------------------------------------------------------------------------------- /src/alphafold3/scripts/gcp_mount_ssd.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2024 DeepMind Technologies Limited 3 | # 4 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of 5 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/ 6 | # 7 | # To request access to the AlphaFold 3 model parameters, follow the process set 8 | # out at https://github.com/google-deepmind/alphafold3. You may only use these 9 | # if received directly from Google. Use is subject to terms of use available at 10 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md 11 | 12 | set -euo pipefail 13 | 14 | readonly MOUNT_DIR="${1:-/mnt/disks/ssd}" 15 | 16 | if [[ -d "${MOUNT_DIR}" ]]; then 17 | echo "Mount directory ${MOUNT_DIR} already exists, skipping" 18 | exit 0 19 | fi 20 | 21 | for SSD_DISK in $(realpath "$(find /dev/disk/by-id/ | grep google-local)") 22 | do 23 | # Check if the disk is already formatted 24 | if ! blkid -o value -s TYPE "${SSD_DISK}" > /dev/null 2>&1; then 25 | echo "Disk ${SSD_DISK} is not formatted, format it." 26 | mkfs.ext4 -m 0 -E lazy_itable_init=0,lazy_journal_init=0,discard "${SSD_DISK}" || continue 27 | fi 28 | 29 | # Check if the disk is already mounted 30 | if grep -qs "^/dev/nvme0n1 " /proc/mounts; then 31 | grep -s "^/dev/nvme0n1 " /proc/mounts 32 | echo "Disk ${SSD_DISK} is already mounted, skip it." 33 | continue 34 | fi 35 | 36 | # Disk is not mounted, mount it 37 | echo "Mounting ${SSD_DISK} to ${MOUNT_DIR}" 38 | mkdir -p "${MOUNT_DIR}" 39 | chmod -R 777 "${MOUNT_DIR}" 40 | mount "${SSD_DISK}" "${MOUNT_DIR}" 41 | break 42 | done 43 | 44 | if [[ ! -d "${MOUNT_DIR}" ]]; then 45 | echo "No unmounted SSD disks found" 46 | exit 1 47 | fi 48 | -------------------------------------------------------------------------------- /src/alphafold3/structure/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 DeepMind Technologies Limited 2 | # 3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of 4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/ 5 | # 6 | # To request access to the AlphaFold 3 model parameters, follow the process set 7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these 8 | # if received directly from Google. Use is subject to terms of use available at 9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md 10 | 11 | """Structure module initialization.""" 12 | 13 | # pylint: disable=g-importing-member 14 | from alphafold3.structure.bioassemblies import BioassemblyData 15 | from alphafold3.structure.bonds import Bonds 16 | from alphafold3.structure.chemical_components import ChemCompEntry 17 | from alphafold3.structure.chemical_components import ChemicalComponentsData 18 | from alphafold3.structure.chemical_components import get_data_for_ccd_components 19 | from alphafold3.structure.chemical_components import populate_missing_ccd_data 20 | from alphafold3.structure.mmcif import BondParsingError 21 | from alphafold3.structure.parsing import BondAtomId 22 | from alphafold3.structure.parsing import from_atom_arrays 23 | from alphafold3.structure.parsing import from_mmcif 24 | from alphafold3.structure.parsing import from_parsed_mmcif 25 | from alphafold3.structure.parsing import from_res_arrays 26 | from alphafold3.structure.parsing import from_sequences_and_bonds 27 | from alphafold3.structure.parsing import ModelID 28 | from alphafold3.structure.parsing import SequenceFormat 29 | from alphafold3.structure.structure import ARRAY_FIELDS 30 | from alphafold3.structure.structure import AuthorNamingScheme 31 | from alphafold3.structure.structure import Bond 32 | from alphafold3.structure.structure import CascadeDelete 33 | from alphafold3.structure.structure import concat 34 | from alphafold3.structure.structure import enumerate_residues 35 | from alphafold3.structure.structure import fix_non_standard_polymer_residues 36 | from alphafold3.structure.structure import GLOBAL_FIELDS 37 | from alphafold3.structure.structure import make_empty_structure 38 | from alphafold3.structure.structure import MissingAtomError 39 | from alphafold3.structure.structure import MissingAuthorResidueIdError 40 | from alphafold3.structure.structure import multichain_residue_index 41 | from alphafold3.structure.structure import stack 42 | from alphafold3.structure.structure import Structure 43 | from alphafold3.structure.structure_tables import Atoms 44 | from alphafold3.structure.structure_tables import Chains 45 | from alphafold3.structure.structure_tables import Residues 46 | -------------------------------------------------------------------------------- /src/alphafold3/structure/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/structure/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/structure/__pycache__/bioassemblies.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/structure/__pycache__/bioassemblies.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/structure/__pycache__/bonds.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/structure/__pycache__/bonds.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/structure/__pycache__/chemical_components.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/structure/__pycache__/chemical_components.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/structure/__pycache__/mmcif.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/structure/__pycache__/mmcif.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/structure/__pycache__/parsing.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/structure/__pycache__/parsing.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/structure/__pycache__/sterics.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/structure/__pycache__/sterics.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/structure/__pycache__/structure.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/structure/__pycache__/structure.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/structure/__pycache__/structure_tables.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/structure/__pycache__/structure_tables.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/structure/__pycache__/table.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/structure/__pycache__/table.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/structure/__pycache__/test_utils.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/structure/__pycache__/test_utils.cpython-311.pyc -------------------------------------------------------------------------------- /src/alphafold3/structure/cpp/aggregation.pyi: -------------------------------------------------------------------------------- 1 | # Copyright 2024 DeepMind Technologies Limited 2 | # 3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of 4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/ 5 | # 6 | # To request access to the AlphaFold 3 model parameters, follow the process set 7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these 8 | # if received directly from Google. Use is subject to terms of use available at 9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md 10 | 11 | from collections.abc import Sequence 12 | 13 | def indices_grouped_by_value(values: Sequence[int]) -> dict[int, list[int]]: ... 14 | -------------------------------------------------------------------------------- /src/alphafold3/structure/cpp/aggregation_pybind.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2024 DeepMind Technologies Limited 2 | // 3 | // AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of 4 | // this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/ 5 | // 6 | // To request access to the AlphaFold 3 model parameters, follow the process set 7 | // out at https://github.com/google-deepmind/alphafold3. You may only use these 8 | // if received directly from Google. Use is subject to terms of use available at 9 | // https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md 10 | 11 | #include 12 | #include 13 | 14 | #include "absl/container/flat_hash_map.h" 15 | #include "absl/types/span.h" 16 | #include "pybind11/cast.h" 17 | #include "pybind11/numpy.h" 18 | #include "pybind11/pybind11.h" 19 | #include "pybind11_abseil/absl_casters.h" 20 | 21 | namespace { 22 | 23 | namespace py = pybind11; 24 | 25 | absl::flat_hash_map> IndicesGroupedByValue( 26 | absl::Span values) { 27 | absl::flat_hash_map> group_indices; 28 | for (int64_t i = 0, e = values.size(); i < e; ++i) { 29 | group_indices[values[i]].push_back(i); 30 | } 31 | return group_indices; 32 | } 33 | 34 | constexpr char kIndicesGroupedByValue[] = R"( 35 | Returns a map from value to a list of indices this value occupies. 36 | 37 | E.g. indices_grouped_by_value([1, 1, 2, 3, 3, 1, 1]) returns: 38 | {1: [0, 1, 5, 6], 2: [2], 3: [3, 4]} 39 | 40 | Args: 41 | values: a list of values to group. 42 | )"; 43 | 44 | } // namespace 45 | 46 | namespace alphafold3 { 47 | 48 | void RegisterModuleAggregation(py::module m) { 49 | m.def("indices_grouped_by_value", &IndicesGroupedByValue, py::arg("values"), 50 | py::doc(kIndicesGroupedByValue + 1), 51 | py::call_guard()); 52 | } 53 | 54 | } // namespace alphafold3 55 | -------------------------------------------------------------------------------- /src/alphafold3/structure/cpp/aggregation_pybind.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 DeepMind Technologies Limited 3 | * 4 | * AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of 5 | * this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/ 6 | * 7 | * To request access to the AlphaFold 3 model parameters, follow the process set 8 | * out at https://github.com/google-deepmind/alphafold3. You may only use these 9 | * if received directly from Google. Use is subject to terms of use available at 10 | * https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md 11 | */ 12 | 13 | #ifndef ALPHAFOLD3_SRC_ALPHAFOLD3_STRUCTURE_PYTHON_AGGREGATION_PYBIND_H_ 14 | #define ALPHAFOLD3_SRC_ALPHAFOLD3_STRUCTURE_PYTHON_AGGREGATION_PYBIND_H_ 15 | 16 | #include "pybind11/pybind11.h" 17 | 18 | namespace alphafold3 { 19 | 20 | void RegisterModuleAggregation(pybind11::module m); 21 | 22 | } 23 | 24 | #endif // ALPHAFOLD3_SRC_ALPHAFOLD3_STRUCTURE_PYTHON_AGGREGATION_PYBIND_H_ 25 | -------------------------------------------------------------------------------- /src/alphafold3/structure/cpp/membership.pyi: -------------------------------------------------------------------------------- 1 | # Copyright 2024 DeepMind Technologies Limited 2 | # 3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of 4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/ 5 | # 6 | # To request access to the AlphaFold 3 model parameters, follow the process set 7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these 8 | # if received directly from Google. Use is subject to terms of use available at 9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md 10 | 11 | import numpy 12 | 13 | 14 | def isin( 15 | array: numpy.ndarray[numpy.int64], 16 | test_elements: set[int], 17 | invert: bool = ..., 18 | ) -> numpy.ndarray[bool]: ... 19 | -------------------------------------------------------------------------------- /src/alphafold3/structure/cpp/membership_pybind.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2024 DeepMind Technologies Limited 2 | // 3 | // AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of 4 | // this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/ 5 | // 6 | // To request access to the AlphaFold 3 model parameters, follow the process set 7 | // out at https://github.com/google-deepmind/alphafold3. You may only use these 8 | // if received directly from Google. Use is subject to terms of use available at 9 | // https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | #include "absl/container/flat_hash_set.h" 17 | #include "pybind11/cast.h" 18 | #include "pybind11/numpy.h" 19 | #include "pybind11/pybind11.h" 20 | #include "pybind11_abseil/absl_casters.h" 21 | 22 | namespace { 23 | 24 | namespace py = pybind11; 25 | 26 | py::array_t IsIn(const py::array_t& array, 27 | const absl::flat_hash_set& test_elements, 28 | bool invert) { 29 | const size_t num_elements = array.size(); 30 | 31 | py::array_t output(num_elements); 32 | std::fill(output.mutable_data(), output.mutable_data() + output.size(), 33 | invert); 34 | 35 | // Shortcut: The output will be trivially always false if test_elements empty. 36 | if (test_elements.empty()) { 37 | return output; 38 | } 39 | 40 | for (size_t i = 0; i < num_elements; ++i) { 41 | if (test_elements.contains(array.data()[i])) { 42 | output.mutable_data()[i] = !invert; 43 | } 44 | } 45 | if (array.ndim() > 1) { 46 | auto shape = 47 | std::vector(array.shape(), array.shape() + array.ndim()); 48 | return output.reshape(shape); 49 | } 50 | return output; 51 | } 52 | 53 | constexpr char kIsInDoc[] = R"( 54 | Computes whether each element is in test_elements. 55 | 56 | Same use as np.isin, but much faster. If len(array) = n, len(test_elements) = m: 57 | * This function has complexity O(n). 58 | * np.isin with kind='sort' has complexity O(m*log(m) + n * log(m)). 59 | 60 | Args: 61 | array: Input NumPy array with dtype=np.int64. 62 | test_elements: The values against which to test each value of array. 63 | invert: If True, the values in the returned array are inverted, as if 64 | calculating `element not in test_elements`. Default is False. 65 | `isin(a, b, invert=True)` is equivalent to but faster than `~isin(a, b)`. 66 | 67 | Returns 68 | A boolean array of the same shape as the input array. Each value `val` is: 69 | * `val in test_elements` if `invert=False`, 70 | * `val not in test_elements` if `invert=True`. 71 | )"; 72 | 73 | } // namespace 74 | 75 | namespace alphafold3 { 76 | 77 | void RegisterModuleMembership(pybind11::module m) { 78 | m.def("isin", &IsIn, py::arg("array"), py::arg("test_elements"), 79 | py::kw_only(), py::arg("invert") = false, py::doc(kIsInDoc + 1)); 80 | } 81 | 82 | } // namespace alphafold3 83 | -------------------------------------------------------------------------------- /src/alphafold3/structure/cpp/membership_pybind.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 DeepMind Technologies Limited 3 | * 4 | * AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of 5 | * this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/ 6 | * 7 | * To request access to the AlphaFold 3 model parameters, follow the process set 8 | * out at https://github.com/google-deepmind/alphafold3. You may only use these 9 | * if received directly from Google. Use is subject to terms of use available at 10 | * https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md 11 | */ 12 | 13 | #ifndef ALPHAFOLD3_SRC_ALPHAFOLD3_STRUCTURE_PYTHON_MEMBERSHIP_PYBIND_H_ 14 | #define ALPHAFOLD3_SRC_ALPHAFOLD3_STRUCTURE_PYTHON_MEMBERSHIP_PYBIND_H_ 15 | 16 | #include "pybind11/pybind11.h" 17 | 18 | namespace alphafold3 { 19 | 20 | void RegisterModuleMembership(pybind11::module m); 21 | 22 | } 23 | 24 | #endif // ALPHAFOLD3_SRC_ALPHAFOLD3_STRUCTURE_PYTHON_MEMBERSHIP_PYBIND_H_ 25 | -------------------------------------------------------------------------------- /src/alphafold3/structure/cpp/mmcif_altlocs.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 DeepMind Technologies Limited 3 | * 4 | * AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of 5 | * this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/ 6 | * 7 | * To request access to the AlphaFold 3 model parameters, follow the process set 8 | * out at https://github.com/google-deepmind/alphafold3. You may only use these 9 | * if received directly from Google. Use is subject to terms of use available at 10 | * https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md 11 | */ 12 | 13 | #ifndef ALPHAFOLD3_SRC_ALPHAFOLD3_STRUCTURE_PYTHON_MMCIF_ALTLOCS_H_ 14 | #define ALPHAFOLD3_SRC_ALPHAFOLD3_STRUCTURE_PYTHON_MMCIF_ALTLOCS_H_ 15 | 16 | #include 17 | #include 18 | #include 19 | #include 20 | 21 | #include "absl/types/span.h" 22 | #include "alphafold3/structure/cpp/mmcif_layout.h" 23 | 24 | namespace alphafold3 { 25 | 26 | // Returns the list of indices that should be kept after resolving alt-locs. 27 | // 1) Partial Residue. Each cycle of alt-locs are resolved separately with the 28 | // highest occupancy alt-loc. Tie-breaks are resolved alphabetically. See 29 | // tests for examples. 30 | // 2) Whole Residue. These are resolved in two passes. 31 | // a) The residue with the highest occupancy is chosen. 32 | // b) The locations for a given residue are resolved. 33 | // All tie-breaks are resolved alphabetically. See tests for examples. 34 | // 35 | // Preconditions: layout and comp_ids, alt_ids, occupancies are all from same 36 | // mmCIF file and chain_indices are monotonically increasing and less than 37 | // layout.num_chains(). 38 | // 39 | // comp_ids from '_atom_site.label_comp_id'. 40 | // alt_ids from '_atom_site.label_alt_id'. 41 | // occupancies from '_atom_site.occupancy'. 42 | std::vector ResolveMmcifAltLocs( 43 | const MmcifLayout& layout, absl::Span comp_ids, 44 | absl::Span atom_ids, 45 | absl::Span alt_ids, 46 | absl::Span occupancies, 47 | absl::Span chain_indices); 48 | 49 | } // namespace alphafold3 50 | 51 | #endif // ALPHAFOLD3_SRC_ALPHAFOLD3_STRUCTURE_PYTHON_MMCIF_ALTLOCS_H_ 52 | -------------------------------------------------------------------------------- /src/alphafold3/structure/cpp/mmcif_atom_site.pyi: -------------------------------------------------------------------------------- 1 | # Copyright 2024 DeepMind Technologies Limited 2 | # 3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of 4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/ 5 | # 6 | # To request access to the AlphaFold 3 model parameters, follow the process set 7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these 8 | # if received directly from Google. Use is subject to terms of use available at 9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md 10 | 11 | from collections.abc import Callable 12 | from alphafold3.cpp import cif_dict 13 | 14 | 15 | def get_internal_to_author_chain_id_map( 16 | mmcif: cif_dict.CifDict 17 | ) -> dict[str,str]: ... 18 | 19 | 20 | def get_or_infer_type_symbol( 21 | mmcif: cif_dict.CifDict, 22 | atom_id_to_type_symbol: Callable[[str, str], str], 23 | ) -> list[str]: ... 24 | -------------------------------------------------------------------------------- /src/alphafold3/structure/cpp/mmcif_atom_site_pybind.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2024 DeepMind Technologies Limited 2 | // 3 | // AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of 4 | // this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/ 5 | // 6 | // To request access to the AlphaFold 3 model parameters, follow the process set 7 | // out at https://github.com/google-deepmind/alphafold3. You may only use these 8 | // if received directly from Google. Use is subject to terms of use available at 9 | // https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md 10 | 11 | #include 12 | 13 | #include "absl/container/flat_hash_map.h" 14 | #include "absl/log/check.h" 15 | #include "absl/strings/string_view.h" 16 | #include "absl/types/span.h" 17 | #include "alphafold3/parsers/cpp/cif_dict_lib.h" 18 | #include "pybind11/gil.h" 19 | #include "pybind11/pybind11.h" 20 | #include "pybind11/pytypes.h" 21 | #include "pybind11/stl.h" 22 | #include "pybind11_abseil/absl_casters.h" 23 | 24 | namespace alphafold3 { 25 | namespace { 26 | namespace py = pybind11; 27 | 28 | // If present, returns the _atom_site.type_symbol. If not, infers it using 29 | // _atom_site.label_comp_id (residue name), _atom_site.label_atom_id (atom name) 30 | // and the CCD. 31 | py::list GetOrInferTypeSymbol(const CifDict& mmcif, 32 | const py::object& atom_id_to_type_symbol) { 33 | const auto& type_symbol = mmcif["_atom_site.type_symbol"]; 34 | const int num_atom = mmcif["_atom_site.id"].size(); 35 | py::list patched_type_symbol(num_atom); 36 | if (type_symbol.empty()) { 37 | const auto& label_comp_id = mmcif["_atom_site.label_comp_id"]; 38 | const auto& label_atom_id = mmcif["_atom_site.label_atom_id"]; 39 | CHECK_EQ(label_comp_id.size(), num_atom); 40 | CHECK_EQ(label_atom_id.size(), num_atom); 41 | for (int i = 0; i < num_atom; i++) { 42 | patched_type_symbol[i] = 43 | atom_id_to_type_symbol(label_comp_id[i], label_atom_id[i]); 44 | } 45 | } else { 46 | for (int i = 0; i < num_atom; i++) { 47 | patched_type_symbol[i] = type_symbol[i]; 48 | } 49 | } 50 | return patched_type_symbol; 51 | } 52 | 53 | absl::flat_hash_map 54 | GetInternalToAuthorChainIdMap(const CifDict& mmcif) { 55 | const auto& label_asym_ids = mmcif["_atom_site.label_asym_id"]; 56 | const auto& auth_asym_ids = mmcif["_atom_site.auth_asym_id"]; 57 | CHECK_EQ(label_asym_ids.size(), auth_asym_ids.size()); 58 | 59 | absl::flat_hash_map mapping; 60 | for (size_t i = 0, num_rows = label_asym_ids.size(); i < num_rows; ++i) { 61 | // Use only the first internal_chain_id occurrence to generate the mapping. 62 | // It should not matter as there should not be a case where a single 63 | // internal chain ID would map to more than one author chain IDs (i.e. the 64 | // mapping should be injective). Since we need this method to be fast, we 65 | // choose not to check it. 66 | mapping.emplace(label_asym_ids[i], auth_asym_ids[i]); 67 | } 68 | return mapping; 69 | } 70 | 71 | } // namespace 72 | 73 | namespace py = pybind11; 74 | 75 | void RegisterModuleMmcifAtomSite(pybind11::module m) { 76 | m.def("get_or_infer_type_symbol", &GetOrInferTypeSymbol, py::arg("mmcif"), 77 | py::arg("atom_id_to_type_symbol")); 78 | 79 | m.def("get_internal_to_author_chain_id_map", &GetInternalToAuthorChainIdMap, 80 | py::arg("mmcif"), py::call_guard()); 81 | } 82 | 83 | } // namespace alphafold3 84 | -------------------------------------------------------------------------------- /src/alphafold3/structure/cpp/mmcif_atom_site_pybind.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 DeepMind Technologies Limited 3 | * 4 | * AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of 5 | * this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/ 6 | * 7 | * To request access to the AlphaFold 3 model parameters, follow the process set 8 | * out at https://github.com/google-deepmind/alphafold3. You may only use these 9 | * if received directly from Google. Use is subject to terms of use available at 10 | * https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md 11 | */ 12 | 13 | #ifndef ALPHAFOLD3_SRC_ALPHAFOLD3_STRUCTURE_PYTHON_MMCIF_ATOM_SITE_PYBIND_H_ 14 | #define ALPHAFOLD3_SRC_ALPHAFOLD3_STRUCTURE_PYTHON_MMCIF_ATOM_SITE_PYBIND_H_ 15 | 16 | #include "pybind11/pybind11.h" 17 | 18 | namespace alphafold3 { 19 | 20 | void RegisterModuleMmcifAtomSite(pybind11::module m); 21 | 22 | } 23 | 24 | #endif // ALPHAFOLD3_SRC_ALPHAFOLD3_STRUCTURE_PYTHON_MMCIF_ATOM_SITE_PYBIND_H_ 25 | -------------------------------------------------------------------------------- /src/alphafold3/structure/cpp/mmcif_layout.pyi: -------------------------------------------------------------------------------- 1 | # Copyright 2024 DeepMind Technologies Limited 2 | # 3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of 4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/ 5 | # 6 | # To request access to the AlphaFold 3 model parameters, follow the process set 7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these 8 | # if received directly from Google. Use is subject to terms of use available at 9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md 10 | 11 | from alphafold3.cpp import cif_dict 12 | 13 | class MmcifLayout: 14 | def atom_range(self, residue_index: int) -> tuple[int, int]: ... 15 | def chain_starts(self) -> list[int]: ... 16 | def chains(self) -> list[int]: ... 17 | def model_offset(self) -> int: ... 18 | def num_atoms(self) -> int: ... 19 | def num_chains(self) -> int: ... 20 | def num_models(self) -> int: ... 21 | def num_residues(self) -> int: ... 22 | def residue_range(self, chain_index: int) -> tuple[int, int]: ... 23 | def residue_starts(self) -> list[int]: ... 24 | def residues(self) -> list[int]: ... 25 | 26 | def from_mmcif(mmcif: cif_dict.CifDict, model_id: str = ...) -> MmcifLayout: ... 27 | -------------------------------------------------------------------------------- /src/alphafold3/structure/cpp/mmcif_layout_pybind.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2024 DeepMind Technologies Limited 2 | // 3 | // AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of 4 | // this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/ 5 | // 6 | // To request access to the AlphaFold 3 model parameters, follow the process set 7 | // out at https://github.com/google-deepmind/alphafold3. You may only use these 8 | // if received directly from Google. Use is subject to terms of use available at 9 | // https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md 10 | 11 | #include "alphafold3/structure/cpp/mmcif_layout.h" 12 | #include "pybind11/pybind11.h" 13 | #include "pybind11/pytypes.h" 14 | #include "pybind11/stl.h" 15 | 16 | namespace alphafold3 { 17 | 18 | namespace py = pybind11; 19 | 20 | void RegisterModuleMmcifLayout(pybind11::module m) { 21 | py::class_(m, "MmcifLayout") 22 | .def("__str__", &MmcifLayout::ToDebugString) 23 | .def("num_models", &MmcifLayout::num_models) 24 | .def("num_chains", &MmcifLayout::num_chains) 25 | .def("num_residues", &MmcifLayout::num_residues) 26 | .def("num_atoms", &MmcifLayout::num_atoms) 27 | .def("residue_range", &MmcifLayout::residue_range, py::arg("chain_index")) 28 | .def("atom_range", &MmcifLayout::atom_range, py::arg("residue_index")) 29 | .def("chains", &MmcifLayout::chains, 30 | py::doc("Returns a list of indices one past the last residue of " 31 | "each chain.")) 32 | .def( 33 | "chain_starts", &MmcifLayout::chain_starts, 34 | py::doc("Returns a list of indices of the first atom of each chain.")) 35 | .def("residues", &MmcifLayout::residues, 36 | py::doc("Returns a list of indices one past the last atom of each " 37 | "residue.")) 38 | .def("residue_starts", &MmcifLayout::residue_starts, 39 | py::doc( 40 | "Returns a list of indices of the first atom of each residue.")) 41 | .def("model_offset", &MmcifLayout::model_offset, 42 | py::doc("Returns the first atom index that is part of the specified " 43 | "model.")); 44 | 45 | m.def("from_mmcif", &MmcifLayout::Create, py::arg("mmcif"), 46 | py::arg("model_id") = ""); 47 | } 48 | 49 | } // namespace alphafold3 50 | -------------------------------------------------------------------------------- /src/alphafold3/structure/cpp/mmcif_layout_pybind.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 DeepMind Technologies Limited 3 | * 4 | * AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of 5 | * this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/ 6 | * 7 | * To request access to the AlphaFold 3 model parameters, follow the process set 8 | * out at https://github.com/google-deepmind/alphafold3. You may only use these 9 | * if received directly from Google. Use is subject to terms of use available at 10 | * https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md 11 | */ 12 | 13 | #ifndef ALPHAFOLD3_SRC_ALPHAFOLD3_STRUCTURE_PYTHON_MMCIF_LAYOUT_PYBIND_H_ 14 | #define ALPHAFOLD3_SRC_ALPHAFOLD3_STRUCTURE_PYTHON_MMCIF_LAYOUT_PYBIND_H_ 15 | 16 | #include "pybind11/pybind11.h" 17 | 18 | namespace alphafold3 { 19 | 20 | void RegisterModuleMmcifLayout(pybind11::module m); 21 | 22 | } 23 | 24 | #endif // ALPHAFOLD3_SRC_ALPHAFOLD3_STRUCTURE_PYTHON_MMCIF_LAYOUT_PYBIND_H_ 25 | -------------------------------------------------------------------------------- /src/alphafold3/structure/cpp/mmcif_struct_conn.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 DeepMind Technologies Limited 3 | * 4 | * AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of 5 | * this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/ 6 | * 7 | * To request access to the AlphaFold 3 model parameters, follow the process set 8 | * out at https://github.com/google-deepmind/alphafold3. You may only use these 9 | * if received directly from Google. Use is subject to terms of use available at 10 | * https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md 11 | */ 12 | 13 | #ifndef ALPHAFOLD3_SRC_ALPHAFOLD3_STRUCTURE_PYTHON_MMCIF_STRUCT_CONN_H_ 14 | #define ALPHAFOLD3_SRC_ALPHAFOLD3_STRUCTURE_PYTHON_MMCIF_STRUCT_CONN_H_ 15 | 16 | #include 17 | #include 18 | 19 | #include "absl/status/statusor.h" 20 | #include "absl/strings/string_view.h" 21 | #include "alphafold3/parsers/cpp/cif_dict_lib.h" 22 | 23 | namespace alphafold3 { 24 | 25 | // Returns a pair of atom indices for each row in the bonds table (aka 26 | // _struct_conn). The indices are simple 0-based indexes into the columns of 27 | // the _atom_site table in the input mmCIF, and do not necessarily correspond 28 | // to the values in _atom_site.id, or any other column. 29 | absl::StatusOr, std::vector>> 30 | GetBondAtomIndices(const CifDict& mmcif, absl::string_view model_id); 31 | 32 | } // namespace alphafold3 33 | 34 | #endif // ALPHAFOLD3_SRC_ALPHAFOLD3_STRUCTURE_PYTHON_MMCIF_STRUCT_CONN_H_ 35 | -------------------------------------------------------------------------------- /src/alphafold3/structure/cpp/mmcif_struct_conn.pyi: -------------------------------------------------------------------------------- 1 | # Copyright 2024 DeepMind Technologies Limited 2 | # 3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of 4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/ 5 | # 6 | # To request access to the AlphaFold 3 model parameters, follow the process set 7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these 8 | # if received directly from Google. Use is subject to terms of use available at 9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md 10 | 11 | from alphafold3.cpp import cif_dict 12 | 13 | def get_bond_atom_indices(mmcif_dict: cif_dict.CifDict, model_id: str) -> tuple[list[int],list[int]]: ... 14 | -------------------------------------------------------------------------------- /src/alphafold3/structure/cpp/mmcif_struct_conn_pybind.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2024 DeepMind Technologies Limited 2 | // 3 | // AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of 4 | // this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/ 5 | // 6 | // To request access to the AlphaFold 3 model parameters, follow the process set 7 | // out at https://github.com/google-deepmind/alphafold3. You may only use these 8 | // if received directly from Google. Use is subject to terms of use available at 9 | // https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md 10 | 11 | #include 12 | 13 | #include "absl/strings/string_view.h" 14 | #include "alphafold3/parsers/cpp/cif_dict_lib.h" 15 | #include "alphafold3/structure/cpp/mmcif_struct_conn.h" 16 | #include "pybind11/gil.h" 17 | #include "pybind11/pybind11.h" 18 | #include "pybind11/pytypes.h" 19 | #include "pybind11/stl.h" 20 | 21 | namespace alphafold3 { 22 | 23 | namespace py = pybind11; 24 | 25 | constexpr char kGetBondAtomIndices[] = R"( 26 | Extracts the indices of the atoms that participate in bonds. 27 | 28 | This function has a workaround for a known PDB issue: some mmCIFs have 29 | (2evw, 2g0v, 2g0x, 2g0z, 2g10, 2g11, 2g12, 2g14, 2grz, 2ntw as of 2024) 30 | multiple models and they set different whole-chain altloc in each model. 31 | The bond table however doesn't distinguish between models, so there are 32 | bonds that are valid only for some models. E.g. 2grz has model 1 with 33 | chain A with altloc A, and model 2 with chain A with altloc B. The bonds 34 | table lists a bond for each of these. This case is rather rare (10 cases 35 | in PDB as of 2024). For the offending bonds, the returned atom index is 36 | set to the size of the atom_site table, i.e. it is an invalid index. 37 | 38 | Args: 39 | mmcif: The mmCIF object to process. 40 | model_id: The ID of the model that the returned atoms will belong to. This 41 | should be a value in the mmCIF's _atom_site.pdbx_PDB_model_num column. 42 | 43 | Returns: 44 | Two lists of atom indices, `from_atoms` and `to_atoms`, each one having 45 | length num_bonds (as defined by _struct_conn, the bonds table). The bond 46 | i, defined by the i'th row in _struct_conn, is a bond from atom at index 47 | from_atoms[i], to the atom at index to_atoms[i]. The indices are simple 48 | 0-based indexes into the columns of the _atom_site table in the input 49 | mmCIF, and do not necessarily correspond to the values in _atom_site.id, 50 | or any other column. 51 | )"; 52 | 53 | void RegisterModuleMmcifStructConn(pybind11::module m) { 54 | m.def( 55 | "get_bond_atom_indices", 56 | [](const CifDict& mmcif, absl::string_view model_id) { 57 | auto result = GetBondAtomIndices(mmcif, model_id); 58 | if (result.ok()) { 59 | return *result; 60 | } 61 | throw py::value_error(std::string(result.status().message())); 62 | }, 63 | py::arg("mmcif_dict"), py::arg("model_id"), 64 | py::doc(kGetBondAtomIndices + 1), 65 | py::call_guard()); 66 | } 67 | 68 | } // namespace alphafold3 69 | -------------------------------------------------------------------------------- /src/alphafold3/structure/cpp/mmcif_struct_conn_pybind.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 DeepMind Technologies Limited 3 | * 4 | * AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of 5 | * this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/ 6 | * 7 | * To request access to the AlphaFold 3 model parameters, follow the process set 8 | * out at https://github.com/google-deepmind/alphafold3. You may only use these 9 | * if received directly from Google. Use is subject to terms of use available at 10 | * https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md 11 | */ 12 | 13 | #ifndef ALPHAFOLD3_SRC_ALPHAFOLD3_STRUCTURE_PYTHON_MMCIF_STRUCT_CONN_PYBIND_H_ 14 | #define ALPHAFOLD3_SRC_ALPHAFOLD3_STRUCTURE_PYTHON_MMCIF_STRUCT_CONN_PYBIND_H_ 15 | 16 | #include "pybind11/pybind11.h" 17 | 18 | namespace alphafold3 { 19 | 20 | void RegisterModuleMmcifStructConn(pybind11::module m); 21 | 22 | } 23 | 24 | #endif // ALPHAFOLD3_SRC_ALPHAFOLD3_STRUCTURE_PYTHON_MMCIF_STRUCT_CONN_PYBIND_H_ 25 | -------------------------------------------------------------------------------- /src/alphafold3/structure/cpp/mmcif_utils.pyi: -------------------------------------------------------------------------------- 1 | # Copyright 2024 DeepMind Technologies Limited 2 | # 3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of 4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/ 5 | # 6 | # To request access to the AlphaFold 3 model parameters, follow the process set 7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these 8 | # if received directly from Google. Use is subject to terms of use available at 9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md 10 | 11 | from collections.abc import Sequence 12 | 13 | import numpy as np 14 | 15 | from alphafold3.cpp import cif_dict 16 | from alphafold3.structure.python import mmcif_layout 17 | 18 | 19 | def filter( 20 | mmcif: cif_dict.CifDict, 21 | include_nucleotides: bool, 22 | include_ligands: bool = ..., 23 | include_water: bool = ..., 24 | include_other: bool = ..., 25 | model_id: str = ..., 26 | ) -> tuple[np.ndarray[int], mmcif_layout.MmcifLayout]: ... 27 | 28 | 29 | def fix_residues( 30 | layout: mmcif_layout.MmcifLayout, 31 | comp_id: Sequence[str], 32 | atom_id: Sequence[str], 33 | atom_x: Sequence[float], 34 | atom_y: Sequence[float], 35 | atom_z: Sequence[float], 36 | fix_arg: bool = ..., 37 | ) -> None: ... 38 | 39 | 40 | def read_layout( 41 | mmcif: cif_dict.CifDict, model_id: str = ... 42 | ) -> mmcif_layout.MmcifLayout: ... 43 | 44 | 45 | def selected_ligand_residue_mask( 46 | layout: mmcif_layout.MmcifLayout, 47 | atom_site_label_asym_ids: list[str], 48 | atom_site_label_seq_ids: list[str], 49 | atom_site_auth_seq_ids: list[str], 50 | atom_site_label_comp_ids: list[str], 51 | atom_site_pdbx_pdb_ins_codes: list[str], 52 | nonpoly_asym_ids: list[str], 53 | nonpoly_auth_seq_ids: list[str], 54 | nonpoly_pdb_ins_codes: list[str], 55 | nonpoly_mon_ids: list[str], 56 | branch_asym_ids: list[str], 57 | branch_auth_seq_ids: list[str], 58 | branch_pdb_ins_codes: list[str], 59 | branch_mon_ids: list[str], 60 | ) -> tuple[list[bool], list[bool]]: ... 61 | 62 | 63 | def selected_polymer_residue_mask( 64 | layout: mmcif_layout.MmcifLayout, 65 | atom_site_label_asym_ids: list[str], 66 | atom_site_label_seq_ids: list[str], 67 | atom_site_label_comp_ids: list[str], 68 | poly_seq_asym_ids: list[str], 69 | poly_seq_seq_ids: list[str], 70 | poly_seq_mon_ids: list[str], 71 | ) -> list[bool]: ... 72 | -------------------------------------------------------------------------------- /src/alphafold3/structure/cpp/mmcif_utils_pybind.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 DeepMind Technologies Limited 3 | * 4 | * AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of 5 | * this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/ 6 | * 7 | * To request access to the AlphaFold 3 model parameters, follow the process set 8 | * out at https://github.com/google-deepmind/alphafold3. You may only use these 9 | * if received directly from Google. Use is subject to terms of use available at 10 | * https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md 11 | */ 12 | 13 | #ifndef ALPHAFOLD3_SRC_ALPHAFOLD3_STRUCTURE_PYTHON_MMCIF_UTILS_PYBIND_H_ 14 | #define ALPHAFOLD3_SRC_ALPHAFOLD3_STRUCTURE_PYTHON_MMCIF_UTILS_PYBIND_H_ 15 | 16 | #include "pybind11/pybind11.h" 17 | 18 | namespace alphafold3 { 19 | 20 | void RegisterModuleMmcifUtils(pybind11::module m); 21 | 22 | } 23 | 24 | #endif // ALPHAFOLD3_SRC_ALPHAFOLD3_STRUCTURE_PYTHON_MMCIF_UTILS_PYBIND_H_ 25 | -------------------------------------------------------------------------------- /src/alphafold3/structure/cpp/string_array.pyi: -------------------------------------------------------------------------------- 1 | # Copyright 2024 DeepMind Technologies Limited 2 | # 3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of 4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/ 5 | # 6 | # To request access to the AlphaFold 3 model parameters, follow the process set 7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these 8 | # if received directly from Google. Use is subject to terms of use available at 9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md 10 | 11 | from collections.abc import Sequence 12 | from typing import overload 13 | 14 | import numpy as np 15 | 16 | def format_float_array( 17 | values: Sequence[float], num_decimal_places: int 18 | ) -> list[str]: ... 19 | def isin( 20 | array: np.ndarray[object], 21 | test_elements: set[str | bytes], 22 | invert: bool = ..., 23 | ) -> np.ndarray[bool]: ... 24 | @overload 25 | def remap( 26 | array: np.ndarray[object], 27 | mapping: dict[str, str], 28 | default_value: str, 29 | inplace: bool = ..., 30 | ) -> np.ndarray[object]: ... 31 | @overload 32 | def remap( 33 | array: np.ndarray[object], 34 | mapping: dict[str, str], 35 | inplace: bool = ..., 36 | ) -> np.ndarray[object]: ... 37 | -------------------------------------------------------------------------------- /src/alphafold3/structure/cpp/string_array_pybind.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2024 DeepMind Technologies Limited 3 | * 4 | * AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of 5 | * this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/ 6 | * 7 | * To request access to the AlphaFold 3 model parameters, follow the process set 8 | * out at https://github.com/google-deepmind/alphafold3. You may only use these 9 | * if received directly from Google. Use is subject to terms of use available at 10 | * https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md 11 | */ 12 | 13 | #ifndef ALPHAFOLD3_SRC_ALPHAFOLD3_STRUCTURE_PYTHON_STRING_ARRAY_PYBIND_H_ 14 | #define ALPHAFOLD3_SRC_ALPHAFOLD3_STRUCTURE_PYTHON_STRING_ARRAY_PYBIND_H_ 15 | 16 | #include "pybind11/pybind11.h" 17 | 18 | namespace alphafold3 { 19 | 20 | void RegisterModuleStringArray(pybind11::module m); 21 | 22 | } 23 | 24 | #endif // ALPHAFOLD3_SRC_ALPHAFOLD3_STRUCTURE_PYTHON_STRING_ARRAY_PYBIND_H_ 25 | -------------------------------------------------------------------------------- /src/alphafold3/test_data/alphafold_run_outputs/run_alphafold_test_output_bucket_1024.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/test_data/alphafold_run_outputs/run_alphafold_test_output_bucket_1024.pkl -------------------------------------------------------------------------------- /src/alphafold3/test_data/alphafold_run_outputs/run_alphafold_test_output_bucket_default.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/test_data/alphafold_run_outputs/run_alphafold_test_output_bucket_default.pkl -------------------------------------------------------------------------------- /src/alphafold3/test_data/featurised_example.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/test_data/featurised_example.pkl -------------------------------------------------------------------------------- /src/alphafold3/version.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 DeepMind Technologies Limited 2 | # 3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of 4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/ 5 | # 6 | # To request access to the AlphaFold 3 model parameters, follow the process set 7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these 8 | # if received directly from Google. Use is subject to terms of use available at 9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md 10 | 11 | """Single source of truth for the AlphaFold version.""" 12 | 13 | __version__ = '3.0.0' 14 | --------------------------------------------------------------------------------