├── .DS_Store
├── .gitattributes
├── CMakeLists.txt
├── LICENSE
├── OUTPUT_TERMS_OF_USE.md
├── README.md
├── WEIGHTS_PROHIBITED_USE_POLICY.md
├── WEIGHTS_TERMS_OF_USE.md
├── dev-requirements.txt
├── docker
├── Dockerfile
└── dockerignore
├── docs
├── contributing.md
├── header.jpg
├── input.md
├── installation.md
├── known_issues.md
├── output.md
└── performance.md
├── environment.yml
├── example
├── 1_extract_chains.py
├── 2_pdb2jax.py
├── 3_generate_json.py
├── 4_extract_iptm-ipae-pae-interaction.py
├── pdb
│ └── bcov_v4_r4_ems_p1-15H-GBL-16H-GBL-16H_0129_0001_0001_0002_000000002_0001_0001_19_34_H_.._ems_p1-16H-GBL-16H-GABBL-16H_0045_0001_0001_0002_0001_0001_0001_0001.pdb
├── subset_data.csv
└── subset_data_with_metrics.csv
├── fetch_databases.sh
├── pyproject.toml
├── requirements.txt
├── run_af3score.py
└── src
└── alphafold3
├── __init__.py
├── __pycache__
├── __init__.cpython-311.pyc
├── build_data.cpython-311.pyc
└── version.cpython-311.pyc
├── build_data.py
├── common
├── __pycache__
│ ├── base_config.cpython-311.pyc
│ ├── folding_input.cpython-311.pyc
│ └── resources.cpython-311.pyc
├── base_config.py
├── folding_input.py
├── resources.py
└── testing
│ ├── __pycache__
│ └── data.cpython-311.pyc
│ └── data.py
├── constants
├── __pycache__
│ ├── atom_types.cpython-311.pyc
│ ├── chemical_component_sets.cpython-311.pyc
│ ├── chemical_components.cpython-311.pyc
│ ├── mmcif_names.cpython-311.pyc
│ ├── periodic_table.cpython-311.pyc
│ ├── residue_names.cpython-311.pyc
│ └── side_chains.cpython-311.pyc
├── atom_types.py
├── chemical_component_sets.py
├── chemical_components.py
├── converters
│ ├── __pycache__
│ │ ├── ccd_pickle_gen.cpython-311.pyc
│ │ └── chemical_component_sets_gen.cpython-311.pyc
│ ├── ccd_pickle_gen.py
│ ├── chemical_component_sets.pickle
│ └── chemical_component_sets_gen.py
├── mmcif_names.py
├── periodic_table.py
├── residue_names.py
└── side_chains.py
├── cpp.cc
├── data
├── __pycache__
│ ├── featurisation.cpython-311.pyc
│ ├── msa.cpython-311.pyc
│ ├── msa_config.cpython-311.pyc
│ ├── msa_features.cpython-311.pyc
│ ├── parsers.cpython-311.pyc
│ ├── pipeline.cpython-311.pyc
│ ├── structure_stores.cpython-311.pyc
│ ├── template_realign.cpython-311.pyc
│ └── templates.cpython-311.pyc
├── cpp
│ ├── msa_profile_pybind.cc
│ └── msa_profile_pybind.h
├── featurisation.py
├── msa.py
├── msa_config.py
├── msa_features.py
├── msa_identifiers.py
├── msa_store.py
├── parsers.py
├── pipeline.py
├── structure_stores.py
├── template_realign.py
├── template_store.py
├── templates.py
└── tools
│ ├── __pycache__
│ ├── hmmalign.cpython-311.pyc
│ ├── hmmbuild.cpython-311.pyc
│ ├── hmmsearch.cpython-311.pyc
│ ├── jackhmmer.cpython-311.pyc
│ ├── msa_tool.cpython-311.pyc
│ ├── nhmmer.cpython-311.pyc
│ ├── rdkit_utils.cpython-311.pyc
│ └── subprocess_utils.cpython-311.pyc
│ ├── hmmalign.py
│ ├── hmmbuild.py
│ ├── hmmsearch.py
│ ├── jackhmmer.py
│ ├── msa_tool.py
│ ├── nhmmer.py
│ ├── rdkit_utils.py
│ └── subprocess_utils.py
├── jax
├── attention
│ ├── __pycache__
│ │ ├── attention.cpython-311.pyc
│ │ ├── attention_base.cpython-311.pyc
│ │ ├── flash_attention.cpython-311.pyc
│ │ └── xla_attention.cpython-311.pyc
│ ├── attention.py
│ ├── attention_base.py
│ ├── attention_call_arg_specs.py
│ ├── flash_attention.py
│ └── xla_attention.py
├── common
│ ├── __pycache__
│ │ ├── array_view.cpython-311.pyc
│ │ ├── precision.cpython-311.pyc
│ │ └── triton_utils.cpython-311.pyc
│ ├── array_view.py
│ ├── precision.py
│ └── triton_utils.py
├── gated_linear_unit
│ ├── __pycache__
│ │ ├── block.cpython-311.pyc
│ │ ├── gated_linear_unit.cpython-311.pyc
│ │ ├── gated_linear_unit_base.cpython-311.pyc
│ │ ├── matmul_config.cpython-311.pyc
│ │ └── matmul_ext.cpython-311.pyc
│ ├── block.py
│ ├── gated_linear_unit.py
│ ├── gated_linear_unit_base.py
│ ├── matmul_config.py
│ └── matmul_ext.py
└── geometry
│ ├── __init__.py
│ ├── __pycache__
│ ├── __init__.cpython-311.pyc
│ ├── rigid_matrix_vector.cpython-311.pyc
│ ├── rotation_matrix.cpython-311.pyc
│ ├── struct_of_array.cpython-311.pyc
│ ├── utils.cpython-311.pyc
│ └── vector.cpython-311.pyc
│ ├── rigid_matrix_vector.py
│ ├── rotation_matrix.py
│ ├── struct_of_array.py
│ ├── utils.py
│ └── vector.py
├── model
├── __pycache__
│ ├── confidence_types.cpython-311.pyc
│ ├── confidences.cpython-311.pyc
│ ├── data3.cpython-311.pyc
│ ├── data_constants.cpython-311.pyc
│ ├── feat_batch.cpython-311.pyc
│ ├── features.cpython-311.pyc
│ ├── merging_features.cpython-311.pyc
│ ├── mmcif_metadata.cpython-311.pyc
│ ├── model_config.cpython-311.pyc
│ ├── msa_pairing.cpython-311.pyc
│ ├── params.cpython-311.pyc
│ ├── post_processing.cpython-311.pyc
│ └── protein_data_processing.cpython-311.pyc
├── atom_layout
│ ├── __pycache__
│ │ └── atom_layout.cpython-311.pyc
│ └── atom_layout.py
├── components
│ ├── __pycache__
│ │ ├── base_model.cpython-311.pyc
│ │ ├── haiku_modules.cpython-311.pyc
│ │ ├── mapping.cpython-311.pyc
│ │ └── utils.cpython-311.pyc
│ ├── base_model.py
│ ├── haiku_modules.py
│ ├── mapping.py
│ └── utils.py
├── confidence_types.py
├── confidences.py
├── data3.py
├── data_constants.py
├── diffusion
│ ├── __pycache__
│ │ ├── atom_cross_attention.cpython-311.pyc
│ │ ├── confidence_head.cpython-311.pyc
│ │ ├── diffusion_head.cpython-311.pyc
│ │ ├── diffusion_transformer.cpython-311.pyc
│ │ ├── distogram_head.cpython-311.pyc
│ │ ├── featurization.cpython-311.pyc
│ │ ├── model.cpython-311.pyc
│ │ ├── modules.cpython-311.pyc
│ │ └── template_modules.cpython-311.pyc
│ ├── atom_cross_attention.py
│ ├── confidence_head.py
│ ├── diffusion_head.py
│ ├── diffusion_transformer.py
│ ├── distogram_head.py
│ ├── featurization.py
│ ├── model.py
│ ├── modules.py
│ └── template_modules.py
├── feat_batch.py
├── features.py
├── merging_features.py
├── mkdssp_pybind.cc
├── mkdssp_pybind.h
├── mmcif_metadata.py
├── model_config.py
├── msa_pairing.py
├── params.py
├── pipeline
│ ├── __pycache__
│ │ ├── inter_chain_bonds.cpython-311.pyc
│ │ ├── pipeline.cpython-311.pyc
│ │ └── structure_cleaning.cpython-311.pyc
│ ├── inter_chain_bonds.py
│ ├── pipeline.py
│ └── structure_cleaning.py
├── post_processing.py
├── protein_data_processing.py
└── scoring
│ ├── __pycache__
│ ├── alignment.cpython-311.pyc
│ ├── covalent_bond_cleaning.cpython-311.pyc
│ └── scoring.cpython-311.pyc
│ ├── alignment.py
│ ├── covalent_bond_cleaning.py
│ └── scoring.py
├── parsers
└── cpp
│ ├── cif_dict.pyi
│ ├── cif_dict_lib.cc
│ ├── cif_dict_lib.h
│ ├── cif_dict_pybind.cc
│ ├── cif_dict_pybind.h
│ ├── fasta_iterator.pyi
│ ├── fasta_iterator_lib.cc
│ ├── fasta_iterator_lib.h
│ ├── fasta_iterator_pybind.cc
│ ├── fasta_iterator_pybind.h
│ ├── msa_conversion.pyi
│ ├── msa_conversion_pybind.cc
│ └── msa_conversion_pybind.h
├── scripts
├── copy_to_ssd.sh
└── gcp_mount_ssd.sh
├── structure
├── __init__.py
├── __pycache__
│ ├── __init__.cpython-311.pyc
│ ├── bioassemblies.cpython-311.pyc
│ ├── bonds.cpython-311.pyc
│ ├── chemical_components.cpython-311.pyc
│ ├── mmcif.cpython-311.pyc
│ ├── parsing.cpython-311.pyc
│ ├── sterics.cpython-311.pyc
│ ├── structure.cpython-311.pyc
│ ├── structure_tables.cpython-311.pyc
│ ├── table.cpython-311.pyc
│ └── test_utils.cpython-311.pyc
├── bioassemblies.py
├── bonds.py
├── chemical_components.py
├── cpp
│ ├── aggregation.pyi
│ ├── aggregation_pybind.cc
│ ├── aggregation_pybind.h
│ ├── membership.pyi
│ ├── membership_pybind.cc
│ ├── membership_pybind.h
│ ├── mmcif_altlocs.cc
│ ├── mmcif_altlocs.h
│ ├── mmcif_atom_site.pyi
│ ├── mmcif_atom_site_pybind.cc
│ ├── mmcif_atom_site_pybind.h
│ ├── mmcif_layout.h
│ ├── mmcif_layout.pyi
│ ├── mmcif_layout_lib.cc
│ ├── mmcif_layout_pybind.cc
│ ├── mmcif_layout_pybind.h
│ ├── mmcif_struct_conn.h
│ ├── mmcif_struct_conn.pyi
│ ├── mmcif_struct_conn_lib.cc
│ ├── mmcif_struct_conn_pybind.cc
│ ├── mmcif_struct_conn_pybind.h
│ ├── mmcif_utils.pyi
│ ├── mmcif_utils_pybind.cc
│ ├── mmcif_utils_pybind.h
│ ├── string_array.pyi
│ ├── string_array_pybind.cc
│ └── string_array_pybind.h
├── mmcif.py
├── parsing.py
├── sterics.py
├── structure.py
├── structure_tables.py
├── table.py
└── test_utils.py
├── test_data
├── alphafold_run_outputs
│ ├── run_alphafold_test_output_bucket_1024.pkl
│ └── run_alphafold_test_output_bucket_default.pkl
├── featurised_example.json
├── featurised_example.pkl
├── miniature_databases
│ ├── bfd-first_non_consensus_sequences__subsampled_1000.fasta
│ ├── mgy_clusters__subsampled_1000.fa
│ ├── nt_rna_2023_02_23_clust_seq_id_90_cov_80_rep_seq__subsampled_1000.fasta
│ ├── pdb_seqres_2022_09_28__subsampled_1000.fasta
│ ├── rfam_14_4_clustered_rep_seq__subsampled_1000.fasta
│ ├── rnacentral_active_seq_id_90_cov_80_linclust__subsampled_1000.fasta
│ ├── uniprot_all__subsampled_1000.fasta
│ └── uniref90__subsampled_1000.fasta
└── model_config.json
└── version.py
/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/.DS_Store
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | src/alphafold3/constants/converters/ccd.pickle filter=lfs diff=lfs merge=lfs -text
2 |
--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # Copyright 2024 DeepMind Technologies Limited
2 | #
3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of
4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/
5 | #
6 | # To request access to the AlphaFold 3 model parameters, follow the process set
7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these
8 | # if received directly from Google. Use is subject to terms of use available at
9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md
10 |
11 | cmake_minimum_required(VERSION 3.28)
12 | project(
13 | "${SKBUILD_PROJECT_NAME}"
14 | LANGUAGES CXX
15 | VERSION "${SKBUILD_PROJECT_VERSION}")
16 |
17 | include(FetchContent)
18 | set(CMAKE_CXX_STANDARD 20)
19 | set(CMAKE_CXX_STANDARD_REQUIRED ON)
20 | set(CMAKE_POSITION_INDEPENDENT_CODE TRUE)
21 | set(ABSL_PROPAGATE_CXX_STD ON)
22 |
23 | # Remove support for scan deps, which is only useful when using C++ modules.
24 | unset(CMAKE_CXX_SCANDEP_SOURCE)
25 |
26 | FetchContent_Declare(
27 | abseil-cpp
28 | GIT_REPOSITORY https://github.com/abseil/abseil-cpp
29 | GIT_TAG d7aaad83b488fd62bd51c81ecf16cd938532cc0a # 20240116.2
30 | EXCLUDE_FROM_ALL)
31 |
32 | FetchContent_Declare(
33 | pybind11
34 | GIT_REPOSITORY https://github.com/pybind/pybind11
35 | GIT_TAG 2e0815278cb899b20870a67ca8205996ef47e70f # v2.12.0
36 | EXCLUDE_FROM_ALL)
37 |
38 | FetchContent_Declare(
39 | pybind11_abseil
40 | GIT_REPOSITORY https://github.com/pybind/pybind11_abseil
41 | GIT_TAG bddf30141f9fec8e577f515313caec45f559d319 # HEAD @ 2024-08-07
42 | EXCLUDE_FROM_ALL)
43 |
44 |
45 | FetchContent_Declare(
46 | cifpp
47 | GIT_REPOSITORY https://github.com/pdb-redo/libcifpp
48 | GIT_TAG ac98531a2fc8daf21131faa0c3d73766efa46180 # v7.0.3
49 | # Don't `EXCLUDE_FROM_ALL` as necessary for build_data.
50 | )
51 |
52 | FetchContent_Declare(
53 | dssp
54 | GIT_REPOSITORY https://github.com/PDB-REDO/dssp
55 | GIT_TAG 57560472b4260dc41f457706bc45fc6ef0bc0f10 # v4.4.7
56 | EXCLUDE_FROM_ALL)
57 |
58 | FetchContent_MakeAvailable(pybind11 abseil-cpp pybind11_abseil cifpp dssp)
59 |
60 | find_package(
61 | Python3
62 | COMPONENTS Interpreter Development NumPy
63 | REQUIRED)
64 |
65 | include_directories(${PYTHON_INCLUDE_DIRS})
66 | include_directories(src/)
67 |
68 | file(GLOB_RECURSE cpp_srcs src/alphafold3/*.cc)
69 | list(FILTER cpp_srcs EXCLUDE REGEX ".*\(_test\|_main\|_benchmark\).cc$")
70 |
71 | add_compile_definitions(NPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION)
72 |
73 | pybind11_add_module(cpp ${cpp_srcs})
74 |
75 | target_link_libraries(
76 | cpp
77 | PRIVATE absl::check
78 | absl::flat_hash_map
79 | absl::node_hash_map
80 | absl::strings
81 | absl::status
82 | absl::statusor
83 | absl::log
84 | pybind11_abseil::absl_casters
85 | Python3::NumPy
86 | dssp::dssp
87 | cifpp::cifpp)
88 |
89 | target_compile_definitions(cpp PRIVATE VERSION_INFO=${PROJECT_VERSION})
90 | install(TARGETS cpp LIBRARY DESTINATION alphafold3)
91 | install(
92 | FILES LICENSE
93 | OUTPUT_TERMS_OF_USE.md
94 | WEIGHTS_PROHIBITED_USE_POLICY.md
95 | WEIGHTS_TERMS_OF_USE.md
96 | DESTINATION alphafold3)
97 |
--------------------------------------------------------------------------------
/docker/Dockerfile:
--------------------------------------------------------------------------------
1 | # Copyright 2024 DeepMind Technologies Limited
2 | #
3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of
4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/
5 | #
6 | # To request access to the AlphaFold 3 model parameters, follow the process set
7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these
8 | # if received directly from Google. Use is subject to terms of use available at
9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md
10 |
11 | FROM nvidia/cuda:12.6.0-base-ubuntu22.04
12 |
13 | # Some RUN statements are combined together to make Docker build run faster.
14 | # Get latest package listing, install software-properties-common, git, wget,
15 | # compilers and libraries.
16 | # git is required for pyproject.toml toolchain's use of CMakeLists.txt.
17 | # gcc, g++, make are required for compiling hmmer and AlphaFold 3 libaries.
18 | # zlib is a required dependency of AlphaFold 3.
19 | RUN apt update --quiet \
20 | && apt install --yes --quiet software-properties-common \
21 | && apt install --yes --quiet git wget gcc g++ make zlib1g-dev zstd
22 |
23 | # Get apt repository of specific Python versions. Then install Python. Tell APT
24 | # this isn't an interactive TTY to avoid timezone prompt when installing.
25 | RUN add-apt-repository ppa:deadsnakes/ppa \
26 | && DEBIAN_FRONTEND=noninteractive apt install --yes --quiet python3.11 python3-pip python3.11-venv python3.11-dev
27 | RUN python3.11 -m venv /alphafold3_venv
28 | ENV PATH="/hmmer/bin:/alphafold3_venv/bin:$PATH"
29 | # Update pip to the latest version. Not necessary in Docker, but good to do when
30 | # this is used as a recipe for local installation since we rely on new pip
31 | # features for secure installs.
32 | RUN pip3 install --upgrade pip
33 |
34 | # Install HMMER. Do so before copying the source code, so that docker can cache
35 | # the image layer containing HMMER.
36 | RUN mkdir /hmmer_build /hmmer ; \
37 | wget http://eddylab.org/software/hmmer/hmmer-3.4.tar.gz --directory-prefix /hmmer_build ; \
38 | (cd /hmmer_build && tar zxf hmmer-3.4.tar.gz && rm hmmer-3.4.tar.gz) ; \
39 | (cd /hmmer_build/hmmer-3.4 && ./configure --prefix /hmmer) ; \
40 | (cd /hmmer_build/hmmer-3.4 && make -j8) ; \
41 | (cd /hmmer_build/hmmer-3.4 && make install) ; \
42 | (cd /hmmer_build/hmmer-3.4/easel && make install) ; \
43 | rm -R /hmmer_build
44 |
45 | # Copy the AlphaFold 3 source code from the local machine to the container and
46 | # set the working directory to there.
47 | COPY . /app/alphafold
48 | WORKDIR /app/alphafold
49 |
50 | # Install the Python dependencies AlphaFold 3 needs.
51 | RUN pip3 install -r dev-requirements.txt
52 | RUN pip3 install --no-deps .
53 | # Build chemical components database (this binary was installed by pip).
54 | RUN build_data
55 |
56 | # To work around a known XLA issue causing the compilation time to greatly
57 | # increase, the following environment variable setting XLA flags must be enabled
58 | # when running AlphaFold 3:
59 | ENV XLA_FLAGS="--xla_gpu_enable_triton_gemm=false"
60 | # Memory settings used for folding up to 5,120 tokens on A100 80 GB.
61 | ENV XLA_PYTHON_CLIENT_PREALLOCATE=true
62 | ENV XLA_CLIENT_MEM_FRACTION=0.95
63 |
64 | CMD ["python3", "run_alphafold.py"]
65 |
--------------------------------------------------------------------------------
/docker/dockerignore:
--------------------------------------------------------------------------------
1 | dockerignore
2 | Dockerfile
--------------------------------------------------------------------------------
/docs/contributing.md:
--------------------------------------------------------------------------------
1 | # How to Contribute
2 |
3 | We welcome small patches related to bug fixes and documentation, but we do not
4 | plan to make any major changes to this repository.
5 |
6 | ## Before You Begin
7 |
8 | ### Sign Our Contributor License Agreement
9 |
10 | Contributions to this project must be accompanied by a
11 | [Contributor License Agreement](https://cla.developers.google.com/about) (CLA).
12 | You (or your employer) retain the copyright to your contribution; this simply
13 | gives us permission to use and redistribute your contributions as part of the
14 | project.
15 |
16 | If you or your current employer have already signed the Google CLA (even if it
17 | was for a different project), you probably don't need to do it again.
18 |
19 | Visit to see your current agreements or to
20 | sign a new one.
21 |
22 | ### Review Our Community Guidelines
23 |
24 | This project follows
25 | [Google's Open Source Community Guidelines](https://opensource.google/conduct/).
26 |
27 | ## Contribution Process
28 |
29 | We won't accept pull requests directly, but if you send one, we will review it.
30 | If we send a fix based on your pull request, we will make sure to credit you in
31 | the release notes.
32 |
--------------------------------------------------------------------------------
/docs/header.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/docs/header.jpg
--------------------------------------------------------------------------------
/docs/known_issues.md:
--------------------------------------------------------------------------------
1 | # Known Issues
2 |
3 | ### Devices other than NVIDIA A100 or H100
4 |
5 | There are currently known unresolved numerical issues with using devices other
6 | than NVIDIA A100 and H100. For now, accuracy has only been validated for A100
7 | and H100 GPU device types. See
8 | [this Issue](https://github.com/google-deepmind/alphafold3/issues/59) for
9 | tracking.
10 |
--------------------------------------------------------------------------------
/example/1_extract_chains.py:
--------------------------------------------------------------------------------
1 | from Bio import PDB
2 | from Bio.PDB import Structure, Model, Chain
3 | from Bio.PDB import PDBParser, MMCIFIO
4 | import os
5 | import pandas as pd
6 | from tqdm import tqdm
7 | import multiprocessing as mp
8 | from pathlib import Path
9 |
10 | # Define dictionary for three-letter to one-letter amino acid conversion
11 | protein_letters_3to1 = {
12 | 'ALA': 'A', 'CYS': 'C', 'ASP': 'D', 'GLU': 'E',
13 | 'PHE': 'F', 'GLY': 'G', 'HIS': 'H', 'ILE': 'I',
14 | 'LYS': 'K', 'LEU': 'L', 'MET': 'M', 'ASN': 'N',
15 | 'PRO': 'P', 'GLN': 'Q', 'ARG': 'R', 'SER': 'S',
16 | 'THR': 'T', 'VAL': 'V', 'TRP': 'W', 'TYR': 'Y',
17 | 'MSE': 'M' # Selenomethionine is typically treated as methionine
18 | }
19 |
20 | def get_sequence(chain):
21 | """Get amino acid sequence of the chain"""
22 | sequence = ""
23 | for residue in chain:
24 | if residue.id[0] == ' ':
25 | try:
26 | resname = residue.get_resname().upper()
27 | sequence += protein_letters_3to1.get(resname, 'X')
28 | except:
29 | sequence += 'X'
30 | return sequence
31 |
32 | def process_single_pdb(args):
33 | input_pdb, output_dir_cif = args
34 | try:
35 | parser = PDB.PDBParser(QUIET=True)
36 | structure = parser.get_structure("structure", input_pdb)
37 | base_name = os.path.splitext(os.path.basename(input_pdb))[0]
38 |
39 | chain_sequences = {}
40 |
41 | for chain in structure[0]:
42 | chain_id = chain.id
43 | sequence = get_sequence(chain)
44 | chain_sequences[chain_id] = sequence
45 | new_structure = Structure.Structure("new_structure")
46 | new_model = Model.Model(0)
47 | new_structure.add(new_model)
48 | new_model.add(chain.copy())
49 |
50 | cif_io = MMCIFIO()
51 | cif_io.set_structure(new_structure)
52 | cif_output = os.path.join(output_dir_cif, f"{base_name}_chain_{chain_id}.cif")
53 | cif_io.save(cif_output)
54 |
55 | return base_name, chain_sequences
56 | except Exception as e:
57 | print(f"\nError processing {input_pdb}: {str(e)}")
58 | return None, None
59 |
60 | def main():
61 | input_dir = "./pdb" # Input directory
62 | output_dir_cif = "./complex_chain_cifs" # CIF output directory
63 |
64 | # Create output directory
65 | os.makedirs(output_dir_cif, exist_ok=True)
66 |
67 | # Get all PDB files
68 | pdb_files = list(Path(input_dir).glob("*.pdb"))
69 |
70 | # Prepare parameters for process pool
71 | args = [(str(f), output_dir_cif) for f in pdb_files]
72 |
73 | # Process files using process pool
74 | with mp.Pool(processes=mp.cpu_count()) as pool:
75 | results = list(tqdm(
76 | pool.imap(process_single_pdb, args),
77 | total=len(pdb_files),
78 | desc="Processing PDB files"
79 | ))
80 |
81 | # Collect results
82 | sequences_dict = {}
83 | for base_name, chain_sequences in results:
84 | if base_name is not None:
85 | sequences_dict[base_name] = chain_sequences
86 |
87 | # Find all possible chain IDs and sort them by custom order
88 | all_chain_ids = set()
89 | for complex_data in sequences_dict.values():
90 | all_chain_ids.update(complex_data.keys())
91 |
92 | def chain_sort_key(chain_id):
93 | if chain_id.startswith('B'):
94 | return ('0', chain_id)
95 | elif chain_id.startswith('A'):
96 | return ('2', chain_id)
97 | else:
98 | return ('1', chain_id)
99 |
100 | all_chain_ids = sorted(list(all_chain_ids), key=chain_sort_key)
101 |
102 | # Create DataFrame
103 | rows = []
104 | for complex_name, chain_data in sequences_dict.items():
105 | row = {'complex': complex_name}
106 | for chain_id in all_chain_ids:
107 | row[f'chain_{chain_id}_seq'] = chain_data.get(chain_id, '')
108 | rows.append(row)
109 |
110 | df = pd.DataFrame(rows)
111 | cols = ['complex'] + [col for col in df.columns if col != 'complex']
112 | df = df[cols]
113 |
114 | # Save CSV file
115 | df.to_csv('complex_chain_sequences.csv', index=False)
116 | print("\nSequence information has been saved to complex_chain_sequences.csv")
117 |
118 | if __name__ == "__main__":
119 | main()
--------------------------------------------------------------------------------
/example/3_generate_json.py:
--------------------------------------------------------------------------------
1 | import json
2 | import pandas as pd
3 | import os
4 |
5 | def format_msa_sequence(sequence):
6 | """Format MSA sequence"""
7 | return f">query\n{sequence}\n"
8 |
9 | def get_chain_sequences(row):
10 | """Get all non-empty chain sequences from row data"""
11 | chain_sequences = []
12 | # Get all chain-related columns in their order of appearance in CSV
13 | chain_columns = [col for col in row.index if col.startswith('chain_') and col.endswith('_seq')]
14 | for col in chain_columns:
15 | if pd.notna(row[col]) and row[col] != '':
16 | # Extract chain ID from column name (e.g., 'A' from 'chain_A_seq')
17 | chain_id = col.split('_')[1]
18 | chain_sequences.append((chain_id, row[col]))
19 | return chain_sequences
20 |
21 | def generate_json_files(csv_path, output_dir, cif_dir):
22 | """Generate JSON files from CSV file"""
23 | os.makedirs(output_dir, exist_ok=True)
24 |
25 | # Read CSV file
26 | df = pd.read_csv(csv_path)
27 | json_count = 0
28 |
29 | # Process each row
30 | for _, row in df.iterrows():
31 | complex_name = row['complex'] # Get name from complex column
32 |
33 | # Get sequences for all chains
34 | chain_sequences = get_chain_sequences(row)
35 |
36 | if not chain_sequences: # Skip if no valid chain sequences
37 | print(f"Warning: {complex_name} has no valid chain sequences")
38 | continue
39 |
40 | # Create a list of all chain sequences
41 | sequences = []
42 | for chain_id, sequence in chain_sequences:
43 | # Build cif file path
44 | cif_filename = f"{complex_name}_chain_{chain_id}.cif"
45 | cif_path = os.path.join(cif_dir, cif_filename)
46 |
47 | # Check if cif file exists
48 | if not os.path.exists(cif_path):
49 | print(f"Warning: {cif_filename} does not exist")
50 | continue
51 |
52 | sequences.append({
53 | "protein": {
54 | "id": chain_id,
55 | "sequence": sequence,
56 | "modifications": [],
57 | "unpairedMsa": format_msa_sequence(sequence),
58 | "pairedMsa": format_msa_sequence(sequence),
59 | "templates": [{
60 | "mmcifPath": cif_path,
61 | "queryIndices": list(range(len(sequence))),
62 | "templateIndices": list(range(len(sequence)))
63 | }]
64 | }
65 | })
66 |
67 | if not sequences: # Skip if no valid sequence data
68 | print(f"Warning: {complex_name} has no valid sequence data")
69 | continue
70 |
71 | # Create complete JSON data
72 | json_data = {
73 | "dialect": "alphafold3",
74 | "version": 1,
75 | "name": complex_name,
76 | "sequences": sequences,
77 | "modelSeeds": [10],
78 | "bondedAtomPairs": None,
79 | "userCCD": None
80 | }
81 |
82 | # Generate output file name - REMOVED _data suffix
83 | output_filename = f"{complex_name}.json" # Removed _data suffix to match H5 filename
84 | output_path = os.path.join(output_dir, output_filename)
85 |
86 | # Write JSON file
87 | with open(output_path, 'w') as f:
88 | json.dump(json_data, f, indent=2)
89 |
90 | chain_ids = [chain[0] for chain in chain_sequences]
91 | print(f"Generated JSON file: {output_filename} (chains: {', '.join(chain_ids)})")
92 | json_count += 1
93 |
94 | print(f"\nComplete, generated {json_count} JSON files")
95 |
96 | if __name__ == "__main__":
97 | csv_path = "./complex_chain_sequences.csv" # Path to the CSV file just generated
98 | output_dir = "./complex_json_files" # Output directory for JSON files
99 | cif_dir = "/lustre/grp/cmclab/liuyu/design/AF3Score/example/complex_chain_cifs" # Directory where CIF files are located
100 |
101 | generate_json_files(csv_path, output_dir, cif_dir)
--------------------------------------------------------------------------------
/example/subset_data.csv:
--------------------------------------------------------------------------------
1 | topo,scaff_class,description,kd_lb,kd_ub,low_conf,avid_doesnt_agree,avid_lb,avid_ub,lowest_conc,highest_conc,binder_4000_nm,binder_400_nm,target,binder_800_nm,one,SCORE:_x,af2_complex_rmsd,af2_monomer_rmsd,af2_target_rmsd,sasa,SCORE:_y,pae_binder,pae_interaction,pae_interaction1,pae_interaction2,pae_target,plddt_binder,plddt_target,plddt_total,time_x,ddg,contact_molecular_surface,ddg_norepack,ss_sc,score_per_res,mon_all_rmsd,mon_ca_rmsd,mon_plddt,global_lddt,interface_lddt,binder_lddt,time_y,rf2_pae_interaction,rf2_binder_lddt
2 | 3h,,bcov_v4_r4_ems_p1-15H-GBL-16H-GBL-16H_0129_0001_0001_0002_000000002_0001_0001_19_34_H_.._ems_p1-16H-GBL-16H-GABBL-16H_0045_0001_0001_0002_0001_0001_0001_0001,0.0,19.85538460557901,False,False,0.0,3991.816142372666,5.0,1000.0,True,True,IL7Ra,,1,SCORE:,1.885,0.562,0.357,1738.284,SCORE:,2.102,5.468999999999999,5.303999999999999,5.634,4.106,93.961,95.215,94.933,14.054,-64.37899999999999,504.437,-68.735,0.818,-3.851,1.719,0.591,94.812,0.980531,0.976447,0.981153,22.087951,21.828000000000003,0.989
3 |
--------------------------------------------------------------------------------
/example/subset_data_with_metrics.csv:
--------------------------------------------------------------------------------
1 | topo,scaff_class,description,kd_lb,kd_ub,low_conf,avid_doesnt_agree,avid_lb,avid_ub,lowest_conc,highest_conc,binder_4000_nm,binder_400_nm,target,binder_800_nm,one,SCORE:_x,af2_complex_rmsd,af2_monomer_rmsd,af2_target_rmsd,sasa,SCORE:_y,pae_binder,pae_interaction,pae_interaction1,pae_interaction2,pae_target,plddt_binder,plddt_target,plddt_total,time_x,ddg,contact_molecular_surface,ddg_norepack,ss_sc,score_per_res,mon_all_rmsd,mon_ca_rmsd,mon_plddt,global_lddt,interface_lddt,binder_lddt,time_y,rf2_pae_interaction,rf2_binder_lddt,AF3Score_monomer_ca_plddt,AF3Score_monomer_pae,AF3Score_monomer_ptm,AF3Score_complex_ca_plddt,AF3Score_complex_pae,AF3Score_complex_ptm,AF3Score_complex_iptm,AF3Score_pae_interaction,AF3Score_ipae
2 | 3h,,bcov_v4_r4_ems_p1-15H-GBL-16H-GBL-16H_0129_0001_0001_0002_000000002_0001_0001_19_34_H_.._ems_p1-16H-GBL-16H-GABBL-16H_0045_0001_0001_0002_0001_0001_0001_0001,0.0,19.85538460557901,False,False,0.0,3991.816142372666,5.0,1000.0,True,True,IL7Ra,,1,SCORE:,1.885,0.562,0.357,1738.284,SCORE:,2.102,5.468999999999999,5.303999999999999,5.634,4.106,93.961,95.215,94.933,14.054,-64.37899999999999,504.437,-68.735,0.818,-3.851,1.719,0.591,94.812,0.980531,0.976447,0.981153,22.087951,21.828000000000003,0.989,93.40089285714285,1.4386444708680144,0.8,95.07144578313253,4.306372477863261,0.9,0.89,5.620281273131014,2.4683658170914544
3 |
--------------------------------------------------------------------------------
/fetch_databases.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Copyright 2024 DeepMind Technologies Limited
3 | #
4 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of
5 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/
6 | #
7 | # To request access to the AlphaFold 3 model parameters, follow the process set
8 | # out at https://github.com/google-deepmind/alphafold3. You may only use these
9 | # if received directly from Google. Use is subject to terms of use available at
10 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md
11 |
12 | set -euo pipefail
13 |
14 | readonly db_dir=${1:-$HOME/public_databases}
15 |
16 | for cmd in wget tar zstd ; do
17 | if ! command -v "${cmd}" > /dev/null 2>&1; then
18 | echo "${cmd} is not installed. Please install it."
19 | fi
20 | done
21 |
22 | echo "Fetching databases to ${db_dir}"
23 | mkdir -p "${db_dir}"
24 |
25 | readonly SOURCE=https://storage.googleapis.com/alphafold-databases/v3.0
26 |
27 | echo "Start Fetching and Untarring 'pdb_2022_09_28_mmcif_files.tar'"
28 | wget --quiet --output-document=- \
29 | "${SOURCE}/pdb_2022_09_28_mmcif_files.tar.zst" | \
30 | tar --use-compress-program=zstd -xf - --directory="${db_dir}" &
31 |
32 | for NAME in mgy_clusters_2022_05.fa \
33 | bfd-first_non_consensus_sequences.fasta \
34 | uniref90_2022_05.fa uniprot_all_2021_04.fa \
35 | pdb_seqres_2022_09_28.fasta \
36 | rnacentral_active_seq_id_90_cov_80_linclust.fasta \
37 | nt_rna_2023_02_23_clust_seq_id_90_cov_80_rep_seq.fasta \
38 | rfam_14_9_clust_seq_id_90_cov_80_rep_seq.fasta ; do
39 | echo "Start Fetching '${NAME}'"
40 | wget --quiet --output-document=- "${SOURCE}/${NAME}.zst" | \
41 | zstd --decompress > "${db_dir}/${NAME}" &
42 | done
43 |
44 | wait
45 | echo "Complete"
46 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = [
3 | "scikit_build_core",
4 | "pybind11",
5 | "cmake>=3.28",
6 | "ninja",
7 | "numpy",
8 | ]
9 | build-backend = "scikit_build_core.build"
10 |
11 | [project]
12 | name = "alphafold3"
13 | version = "3.0.0"
14 | requires-python = ">=3.11"
15 | readme = "README.md"
16 | license = {file = "LICENSE"}
17 | dependencies = [
18 | "absl-py",
19 | "chex",
20 | "dm-haiku==0.0.13",
21 | "dm-tree",
22 | "jax==0.4.34",
23 | "jax[cuda12]==0.4.34",
24 | "jax-triton==0.2.0",
25 | "jaxtyping==0.2.34",
26 | "numpy",
27 | "rdkit==2024.3.5",
28 | "triton==3.1.0",
29 | "tqdm",
30 | "typeguard==2.13.3",
31 | "zstandard",
32 | ]
33 |
34 | [project.optional-dependencies]
35 | test = ["pytest>=6.0"]
36 |
37 | [tool.scikit-build]
38 | wheel.exclude = [
39 | "**.pyx",
40 | "**/CMakeLists.txt",
41 | "**.cc",
42 | "**.h"
43 | ]
44 | sdist.include = [
45 | "LICENSE",
46 | "OUTPUT_TERMS_OF_USE.md",
47 | "WEIGHTS_PROHIBITED_USE_POLICY.md",
48 | "WEIGHTS_TERMS_OF_USE.md",
49 | ]
50 |
51 | [tool.cibuildwheel]
52 | build = "cp3*-manylinux_x86_64"
53 | manylinux-x86_64-image = "manylinux_2_28"
54 |
55 | [project.scripts]
56 | build_data = "alphafold3.build_data:build_data"
57 |
--------------------------------------------------------------------------------
/src/alphafold3/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2024 DeepMind Technologies Limited
2 | #
3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of
4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/
5 | #
6 | # To request access to the AlphaFold 3 model parameters, follow the process set
7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these
8 | # if received directly from Google. Use is subject to terms of use available at
9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md
10 |
11 | """An implementation of the inference pipeline of AlphaFold 3."""
12 |
--------------------------------------------------------------------------------
/src/alphafold3/__pycache__/__init__.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/__pycache__/__init__.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/__pycache__/build_data.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/__pycache__/build_data.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/__pycache__/version.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/__pycache__/version.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/build_data.py:
--------------------------------------------------------------------------------
1 | # Copyright 2024 DeepMind Technologies Limited
2 | #
3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of
4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/
5 | #
6 | # To request access to the AlphaFold 3 model parameters, follow the process set
7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these
8 | # if received directly from Google. Use is subject to terms of use available at
9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md
10 |
11 | """Script for building intermediate data."""
12 |
13 | from importlib import resources
14 | import pathlib
15 | import site
16 |
17 | import alphafold3.constants.converters
18 | from alphafold3.constants.converters import ccd_pickle_gen
19 | from alphafold3.constants.converters import chemical_component_sets_gen
20 |
21 |
22 | def build_data():
23 | """Builds intermediate data."""
24 | for site_path in site.getsitepackages():
25 | path = pathlib.Path(site_path) / 'share/libcifpp/components.cif'
26 | print(path)
27 | if path.exists():
28 | cif_path = path
29 | break
30 | else:
31 | raise ValueError('Could not find components.cif')
32 |
33 | out_root = resources.files(alphafold3.constants.converters)
34 | ccd_pickle_path = out_root.joinpath('ccd.pickle')
35 | chemical_component_sets_pickle_path = out_root.joinpath(
36 | 'chemical_component_sets.pickle'
37 | )
38 | ccd_pickle_gen.main(['', str(cif_path), str(ccd_pickle_path)])
39 | chemical_component_sets_gen.main(
40 | ['', str(chemical_component_sets_pickle_path)]
41 | )
42 |
--------------------------------------------------------------------------------
/src/alphafold3/common/__pycache__/base_config.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/common/__pycache__/base_config.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/common/__pycache__/folding_input.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/common/__pycache__/folding_input.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/common/__pycache__/resources.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/common/__pycache__/resources.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/common/resources.py:
--------------------------------------------------------------------------------
1 | # Copyright 2024 DeepMind Technologies Limited
2 | #
3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of
4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/
5 | #
6 | # To request access to the AlphaFold 3 model parameters, follow the process set
7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these
8 | # if received directly from Google. Use is subject to terms of use available at
9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md
10 |
11 | """Load external resources, such as external tools or data resources."""
12 |
13 | from collections.abc import Iterator
14 | import os
15 | import pathlib
16 | import typing
17 | from typing import BinaryIO, Final, Literal, TextIO
18 |
19 | from importlib import resources
20 | import alphafold3.common
21 |
22 |
23 | _DATA_ROOT: Final[pathlib.Path] = (
24 | resources.files(alphafold3.common).joinpath('..').resolve()
25 | )
26 | ROOT = _DATA_ROOT
27 |
28 |
29 | def filename(name: str | os.PathLike[str]) -> str:
30 | """Returns the absolute path to an external resource.
31 |
32 | Note that this calls resources.GetResourceFilename under the hood and hence
33 | causes par file unpacking, which might be unfriendly on diskless machines.
34 |
35 |
36 | Args:
37 | name: the name of the resource corresponding to its path relative to the
38 | root of the repository.
39 | """
40 | return (_DATA_ROOT / name).as_posix()
41 |
42 |
43 | @typing.overload
44 | def open_resource(
45 | name: str | os.PathLike[str], mode: Literal['r', 'rt'] = 'rt'
46 | ) -> TextIO:
47 | ...
48 |
49 |
50 | @typing.overload
51 | def open_resource(
52 | name: str | os.PathLike[str], mode: Literal['rb']
53 | ) -> BinaryIO:
54 | ...
55 |
56 |
57 | def open_resource(
58 | name: str | os.PathLike[str], mode: str = 'rb'
59 | ) -> TextIO | BinaryIO:
60 | """Returns an open file object for the named resource.
61 |
62 | Args:
63 | name: the name of the resource corresponding to its path relative to the
64 | root of the repository.
65 | mode: the mode to use when opening the file.
66 | """
67 | return (_DATA_ROOT / name).open(mode)
68 |
69 |
70 | def get_resource_dir(path: str | os.PathLike[str]) -> os.PathLike[str]:
71 | return _DATA_ROOT / path
72 |
73 |
74 | def walk(path: str) -> Iterator[tuple[str, list[str], list[str]]]:
75 | """Walks the directory tree of resources similar to os.walk."""
76 | return os.walk((_DATA_ROOT / path).as_posix())
77 |
--------------------------------------------------------------------------------
/src/alphafold3/common/testing/__pycache__/data.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/common/testing/__pycache__/data.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/common/testing/data.py:
--------------------------------------------------------------------------------
1 | # Copyright 2024 DeepMind Technologies Limited
2 | #
3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of
4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/
5 | #
6 | # To request access to the AlphaFold 3 model parameters, follow the process set
7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these
8 | # if received directly from Google. Use is subject to terms of use available at
9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md
10 |
11 | """Module that provides an abstraction for accessing test data."""
12 |
13 | import os
14 | import pathlib
15 | from typing import Literal, overload
16 |
17 | from absl.testing import absltest
18 |
19 |
20 | class Data:
21 | """Provides an abstraction for accessing test data."""
22 |
23 | def __init__(self, data_dir: os.PathLike[str] | str):
24 | """Initiailizes data wrapper, providing users with high level data access.
25 |
26 | Args:
27 | data_dir: Directory containing test data.
28 | """
29 | self._data_dir = pathlib.Path(data_dir)
30 |
31 | def path(self, data_name: str | os.PathLike[str] | None = None) -> str:
32 | """Returns the path to a given test data.
33 |
34 | Args:
35 | data_name: the name of the test data file relative to data_dir. If not
36 | set, this will return the absolute path to the data directory.
37 | """
38 | data_dir_path = (
39 | pathlib.Path(absltest.get_default_test_srcdir()) / self._data_dir
40 | )
41 |
42 | if data_name:
43 | return str(data_dir_path / data_name)
44 |
45 | return str(data_dir_path)
46 |
47 | @overload
48 | def load(
49 | self, data_name: str | os.PathLike[str], mode: Literal['rt'] = 'rt'
50 | ) -> str:
51 | ...
52 |
53 | @overload
54 | def load(
55 | self, data_name: str | os.PathLike[str], mode: Literal['rb'] = 'rb'
56 | ) -> bytes:
57 | ...
58 |
59 | def load(
60 | self, data_name: str | os.PathLike[str], mode: str = 'rt'
61 | ) -> str | bytes:
62 | """Returns the contents of a given test data.
63 |
64 | Args:
65 | data_name: the name of the test data file relative to data_dir.
66 | mode: the mode in which to read the data file. Defaults to text ('rt').
67 | """
68 | with open(self.path(data_name), mode=mode) as f:
69 | return f.read()
70 |
--------------------------------------------------------------------------------
/src/alphafold3/constants/__pycache__/atom_types.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/constants/__pycache__/atom_types.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/constants/__pycache__/chemical_component_sets.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/constants/__pycache__/chemical_component_sets.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/constants/__pycache__/chemical_components.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/constants/__pycache__/chemical_components.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/constants/__pycache__/mmcif_names.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/constants/__pycache__/mmcif_names.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/constants/__pycache__/periodic_table.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/constants/__pycache__/periodic_table.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/constants/__pycache__/residue_names.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/constants/__pycache__/residue_names.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/constants/__pycache__/side_chains.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/constants/__pycache__/side_chains.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/constants/chemical_component_sets.py:
--------------------------------------------------------------------------------
1 | # Copyright 2024 DeepMind Technologies Limited
2 | #
3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of
4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/
5 | #
6 | # To request access to the AlphaFold 3 model parameters, follow the process set
7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these
8 | # if received directly from Google. Use is subject to terms of use available at
9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md
10 |
11 | """Sets of chemical components."""
12 |
13 | import pickle
14 | from typing import Final
15 |
16 | from alphafold3.common import resources
17 |
18 |
19 | _CCD_SETS_CCD_PICKLE_FILE = resources.filename(
20 | resources.ROOT / 'constants/converters/chemical_component_sets.pickle'
21 | )
22 |
23 | _CCD_SET = pickle.load(open(_CCD_SETS_CCD_PICKLE_FILE, 'rb'))
24 |
25 | # Glycan (or 'Saccharide') ligands.
26 | # _chem_comp.type containing 'saccharide' and 'linking' (when lower-case).
27 | GLYCAN_LINKING_LIGANDS: Final[frozenset[str]] = _CCD_SET['glycans_linking']
28 |
29 | # _chem_comp.type containing 'saccharide' and not 'linking' (when lower-case).
30 | GLYCAN_OTHER_LIGANDS: Final[frozenset[str]] = _CCD_SET['glycans_other']
31 |
32 | # Each of these molecules appears in over 1k PDB structures, are used to
33 | # facilitate crystallization conditions, but do not have biological relevance.
34 | COMMON_CRYSTALLIZATION_AIDS: Final[frozenset[str]] = frozenset({
35 | 'SO4', 'GOL', 'EDO', 'PO4', 'ACT', 'PEG', 'DMS', 'TRS', 'PGE', 'PG4', 'FMT',
36 | 'EPE', 'MPD', 'MES', 'CD', 'IOD',
37 | }) # pyformat: disable
38 |
--------------------------------------------------------------------------------
/src/alphafold3/constants/converters/__pycache__/ccd_pickle_gen.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/constants/converters/__pycache__/ccd_pickle_gen.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/constants/converters/__pycache__/chemical_component_sets_gen.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/constants/converters/__pycache__/chemical_component_sets_gen.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/constants/converters/ccd_pickle_gen.py:
--------------------------------------------------------------------------------
1 | # Copyright 2024 DeepMind Technologies Limited
2 | #
3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of
4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/
5 | #
6 | # To request access to the AlphaFold 3 model parameters, follow the process set
7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these
8 | # if received directly from Google. Use is subject to terms of use available at
9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md
10 |
11 | """Reads Chemical Components gz file and generates a CCD pickle file."""
12 |
13 | from collections.abc import Sequence
14 | import gzip
15 | import pickle
16 | import sys
17 |
18 | from alphafold3.cpp import cif_dict
19 | import tqdm
20 |
21 |
22 | def main(argv: Sequence[str]) -> None:
23 | if len(argv) != 3:
24 | raise ValueError('Must specify input_file components.cif and output_file')
25 |
26 | _, input_file, output_file = argv
27 |
28 | print(f'Parsing {input_file}', flush=True)
29 | if input_file.endswith('.gz'):
30 | opener = gzip.open
31 | else:
32 | opener = open
33 |
34 | with opener(input_file, 'rb') as f:
35 | whole_file = f.read()
36 | result = {
37 | key: {k: tuple(v) for k, v in value.items()}
38 | for key, value in tqdm.tqdm(
39 | cif_dict.parse_multi_data_cif(whole_file).items()
40 | )
41 | }
42 | assert len(result) == whole_file.count(b'data_')
43 |
44 | print(f'Writing {output_file}', flush=True)
45 | with open(output_file, 'wb') as f:
46 | pickle.dump(result, f, protocol=pickle.HIGHEST_PROTOCOL)
47 | print('Done', flush=True)
48 |
49 | if __name__ == '__main__':
50 | main(sys.argv)
51 |
--------------------------------------------------------------------------------
/src/alphafold3/constants/converters/chemical_component_sets.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/constants/converters/chemical_component_sets.pickle
--------------------------------------------------------------------------------
/src/alphafold3/constants/converters/chemical_component_sets_gen.py:
--------------------------------------------------------------------------------
1 | # Copyright 2024 DeepMind Technologies Limited
2 | #
3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of
4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/
5 | #
6 | # To request access to the AlphaFold 3 model parameters, follow the process set
7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these
8 | # if received directly from Google. Use is subject to terms of use available at
9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md
10 |
11 | """Script for updating chemical_component_sets.py."""
12 |
13 | from collections.abc import Mapping, Sequence
14 | import pathlib
15 | import pickle
16 | import re
17 | import sys
18 |
19 | from alphafold3.common import resources
20 | import tqdm
21 |
22 |
23 | _CCD_PICKLE_FILE = resources.filename(
24 | 'constants/converters/ccd.pickle'
25 | )
26 |
27 |
28 | def find_ions_and_glycans_in_ccd(
29 | ccd: Mapping[str, Mapping[str, Sequence[str]]],
30 | ) -> dict[str, frozenset[str]]:
31 | """Finds glycans and ions in all version of CCD."""
32 | glycans_linking = []
33 | glycans_other = []
34 | ions = []
35 | for name, comp in tqdm.tqdm(ccd.items()):
36 | if name == 'UNX':
37 | continue # Skip "unknown atom or ion".
38 | comp_type = comp['_chem_comp.type'][0].lower()
39 | # Glycans have the type 'saccharide'.
40 | if re.findall(r'\bsaccharide\b', comp_type):
41 | # Separate out linking glycans from others.
42 | if 'linking' in comp_type:
43 | glycans_linking.append(name)
44 | else:
45 | glycans_other.append(name)
46 |
47 | # Ions have the word 'ion' in their name.
48 | comp_name = comp['_chem_comp.name'][0].lower()
49 | if re.findall(r'\bion\b', comp_name):
50 | ions.append(name)
51 | result = dict(
52 | glycans_linking=frozenset(glycans_linking),
53 | glycans_other=frozenset(glycans_other),
54 | ions=frozenset(ions),
55 | )
56 |
57 | return result
58 |
59 |
60 | def main(argv: Sequence[str]) -> None:
61 | if len(argv) != 2:
62 | raise ValueError(
63 | 'Directory to write to must be specified as a command-line arguments.'
64 | )
65 |
66 | print(f'Loading {_CCD_PICKLE_FILE}', flush=True)
67 | with open(_CCD_PICKLE_FILE, 'rb') as f:
68 | ccd: Mapping[str, Mapping[str, Sequence[str]]] = pickle.load(f)
69 | output_path = pathlib.Path(argv[1])
70 | output_path.parent.mkdir(exist_ok=True)
71 | print('Finding ions and glycans', flush=True)
72 | result = find_ions_and_glycans_in_ccd(ccd)
73 | print(f'writing to {output_path}', flush=True)
74 | with output_path.open('wb') as f:
75 | pickle.dump(result, f)
76 | print('Done', flush=True)
77 |
78 |
79 | if __name__ == '__main__':
80 | main(sys.argv)
81 |
--------------------------------------------------------------------------------
/src/alphafold3/constants/side_chains.py:
--------------------------------------------------------------------------------
1 | # Copyright 2024 DeepMind Technologies Limited
2 | #
3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of
4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/
5 | #
6 | # To request access to the AlphaFold 3 model parameters, follow the process set
7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these
8 | # if received directly from Google. Use is subject to terms of use available at
9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md
10 |
11 | """Constants associated with side chains."""
12 |
13 | from collections.abc import Mapping, Sequence
14 | import itertools
15 |
16 | # Format: The list for each AA type contains chi1, chi2, chi3, chi4 in
17 | # this order (or a relevant subset from chi1 onwards). ALA and GLY don't have
18 | # chi angles so their chi angle lists are empty.
19 | CHI_ANGLES_ATOMS: Mapping[str, Sequence[tuple[str, ...]]] = {
20 | 'ALA': [],
21 | # Chi5 in arginine is always 0 +- 5 degrees, so ignore it.
22 | 'ARG': [
23 | ('N', 'CA', 'CB', 'CG'),
24 | ('CA', 'CB', 'CG', 'CD'),
25 | ('CB', 'CG', 'CD', 'NE'),
26 | ('CG', 'CD', 'NE', 'CZ'),
27 | ],
28 | 'ASN': [('N', 'CA', 'CB', 'CG'), ('CA', 'CB', 'CG', 'OD1')],
29 | 'ASP': [('N', 'CA', 'CB', 'CG'), ('CA', 'CB', 'CG', 'OD1')],
30 | 'CYS': [('N', 'CA', 'CB', 'SG')],
31 | 'GLN': [
32 | ('N', 'CA', 'CB', 'CG'),
33 | ('CA', 'CB', 'CG', 'CD'),
34 | ('CB', 'CG', 'CD', 'OE1'),
35 | ],
36 | 'GLU': [
37 | ('N', 'CA', 'CB', 'CG'),
38 | ('CA', 'CB', 'CG', 'CD'),
39 | ('CB', 'CG', 'CD', 'OE1'),
40 | ],
41 | 'GLY': [],
42 | 'HIS': [('N', 'CA', 'CB', 'CG'), ('CA', 'CB', 'CG', 'ND1')],
43 | 'ILE': [('N', 'CA', 'CB', 'CG1'), ('CA', 'CB', 'CG1', 'CD1')],
44 | 'LEU': [('N', 'CA', 'CB', 'CG'), ('CA', 'CB', 'CG', 'CD1')],
45 | 'LYS': [
46 | ('N', 'CA', 'CB', 'CG'),
47 | ('CA', 'CB', 'CG', 'CD'),
48 | ('CB', 'CG', 'CD', 'CE'),
49 | ('CG', 'CD', 'CE', 'NZ'),
50 | ],
51 | 'MET': [
52 | ('N', 'CA', 'CB', 'CG'),
53 | ('CA', 'CB', 'CG', 'SD'),
54 | ('CB', 'CG', 'SD', 'CE'),
55 | ],
56 | 'PHE': [('N', 'CA', 'CB', 'CG'), ('CA', 'CB', 'CG', 'CD1')],
57 | 'PRO': [('N', 'CA', 'CB', 'CG'), ('CA', 'CB', 'CG', 'CD')],
58 | 'SER': [('N', 'CA', 'CB', 'OG')],
59 | 'THR': [('N', 'CA', 'CB', 'OG1')],
60 | 'TRP': [('N', 'CA', 'CB', 'CG'), ('CA', 'CB', 'CG', 'CD1')],
61 | 'TYR': [('N', 'CA', 'CB', 'CG'), ('CA', 'CB', 'CG', 'CD1')],
62 | 'VAL': [('N', 'CA', 'CB', 'CG1')],
63 | }
64 |
65 | CHI_GROUPS_FOR_ATOM = {}
66 | for res_name, chi_angle_atoms_for_res in CHI_ANGLES_ATOMS.items():
67 | for chi_group_i, chi_group in enumerate(chi_angle_atoms_for_res):
68 | for atom_i, atom in enumerate(chi_group):
69 | CHI_GROUPS_FOR_ATOM.setdefault((res_name, atom), []).append(
70 | (chi_group_i, atom_i)
71 | )
72 |
73 | # Mapping from (residue_name, atom_name) pairs to the atom's chi group index
74 | # and atom index within that group.
75 | CHI_GROUPS_FOR_ATOM: Mapping[tuple[str, str], Sequence[tuple[int, int]]] = (
76 | CHI_GROUPS_FOR_ATOM
77 | )
78 |
79 | MAX_NUM_CHI_ANGLES: int = 4
80 | ATOMS_PER_CHI_ANGLE: int = 4
81 |
82 | # A list of atoms for each AA type that are involved in chi angle calculations.
83 | CHI_ATOM_SETS: Mapping[str, set[str]] = {
84 | residue_name: set(itertools.chain(*atoms))
85 | for residue_name, atoms in CHI_ANGLES_ATOMS.items()
86 | }
87 |
88 | # If chi angles given in fixed-length array, this matrix determines how to mask
89 | # them for each AA type. The order is as per restype_order (see below).
90 | CHI_ANGLES_MASK: Sequence[Sequence[float]] = (
91 | (0.0, 0.0, 0.0, 0.0), # ALA
92 | (1.0, 1.0, 1.0, 1.0), # ARG
93 | (1.0, 1.0, 0.0, 0.0), # ASN
94 | (1.0, 1.0, 0.0, 0.0), # ASP
95 | (1.0, 0.0, 0.0, 0.0), # CYS
96 | (1.0, 1.0, 1.0, 0.0), # GLN
97 | (1.0, 1.0, 1.0, 0.0), # GLU
98 | (0.0, 0.0, 0.0, 0.0), # GLY
99 | (1.0, 1.0, 0.0, 0.0), # HIS
100 | (1.0, 1.0, 0.0, 0.0), # ILE
101 | (1.0, 1.0, 0.0, 0.0), # LEU
102 | (1.0, 1.0, 1.0, 1.0), # LYS
103 | (1.0, 1.0, 1.0, 0.0), # MET
104 | (1.0, 1.0, 0.0, 0.0), # PHE
105 | (1.0, 1.0, 0.0, 0.0), # PRO
106 | (1.0, 0.0, 0.0, 0.0), # SER
107 | (1.0, 0.0, 0.0, 0.0), # THR
108 | (1.0, 1.0, 0.0, 0.0), # TRP
109 | (1.0, 1.0, 0.0, 0.0), # TYR
110 | (1.0, 0.0, 0.0, 0.0), # VAL
111 | )
112 |
--------------------------------------------------------------------------------
/src/alphafold3/cpp.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2024 DeepMind Technologies Limited
2 | //
3 | // AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of
4 | // this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/
5 | //
6 | // To request access to the AlphaFold 3 model parameters, follow the process set
7 | // out at https://github.com/google-deepmind/alphafold3. You may only use these
8 | // if received directly from Google. Use is subject to terms of use available at
9 | // https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md
10 |
11 | #include "alphafold3/data/cpp/msa_profile_pybind.h"
12 | #include "alphafold3/model/mkdssp_pybind.h"
13 | #include "alphafold3/parsers/cpp/cif_dict_pybind.h"
14 | #include "alphafold3/parsers/cpp/fasta_iterator_pybind.h"
15 | #include "alphafold3/parsers/cpp/msa_conversion_pybind.h"
16 | #include "alphafold3/structure/cpp/aggregation_pybind.h"
17 | #include "alphafold3/structure/cpp/membership_pybind.h"
18 | #include "alphafold3/structure/cpp/mmcif_atom_site_pybind.h"
19 | #include "alphafold3/structure/cpp/mmcif_layout_pybind.h"
20 | #include "alphafold3/structure/cpp/mmcif_struct_conn_pybind.h"
21 | #include "alphafold3/structure/cpp/mmcif_utils_pybind.h"
22 | #include "alphafold3/structure/cpp/string_array_pybind.h"
23 | #include "pybind11/pybind11.h"
24 |
25 | namespace alphafold3 {
26 | namespace {
27 |
28 | // Include all modules as submodules to simplify building.
29 | PYBIND11_MODULE(cpp, m) {
30 | RegisterModuleCifDict(m.def_submodule("cif_dict"));
31 | RegisterModuleFastaIterator(m.def_submodule("fasta_iterator"));
32 | RegisterModuleMsaConversion(m.def_submodule("msa_conversion"));
33 | RegisterModuleMmcifLayout(m.def_submodule("mmcif_layout"));
34 | RegisterModuleMmcifStructConn(m.def_submodule("mmcif_struct_conn"));
35 | RegisterModuleMembership(m.def_submodule("membership"));
36 | RegisterModuleMmcifUtils(m.def_submodule("mmcif_utils"));
37 | RegisterModuleAggregation(m.def_submodule("aggregation"));
38 | RegisterModuleStringArray(m.def_submodule("string_array"));
39 | RegisterModuleMmcifAtomSite(m.def_submodule("mmcif_atom_site"));
40 | RegisterModuleMkdssp(m.def_submodule("mkdssp"));
41 | RegisterModuleMsaProfile(m.def_submodule("msa_profile"));
42 | }
43 |
44 | } // namespace
45 | } // namespace alphafold3
46 |
--------------------------------------------------------------------------------
/src/alphafold3/data/__pycache__/featurisation.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/data/__pycache__/featurisation.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/data/__pycache__/msa.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/data/__pycache__/msa.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/data/__pycache__/msa_config.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/data/__pycache__/msa_config.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/data/__pycache__/msa_features.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/data/__pycache__/msa_features.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/data/__pycache__/parsers.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/data/__pycache__/parsers.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/data/__pycache__/pipeline.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/data/__pycache__/pipeline.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/data/__pycache__/structure_stores.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/data/__pycache__/structure_stores.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/data/__pycache__/template_realign.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/data/__pycache__/template_realign.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/data/__pycache__/templates.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/data/__pycache__/templates.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/data/cpp/msa_profile_pybind.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2024 DeepMind Technologies Limited
2 | //
3 | // AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of
4 | // this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/
5 | //
6 | // To request access to the AlphaFold 3 model parameters, follow the process set
7 | // out at https://github.com/google-deepmind/alphafold3. You may only use these
8 | // if received directly from Google. Use is subject to terms of use available at
9 | // https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md
10 |
11 | #include
12 |
13 | #include "absl/strings/str_cat.h"
14 | #include "pybind11/cast.h"
15 | #include "pybind11/numpy.h"
16 | #include "pybind11/pybind11.h"
17 |
18 | namespace {
19 |
20 | namespace py = pybind11;
21 |
22 | py::array_t ComputeMsaProfile(
23 | const py::array_t& msa, int num_residue_types) {
24 | if (msa.size() == 0) {
25 | throw py::value_error("The MSA must be non-empty.");
26 | }
27 | if (msa.ndim() != 2) {
28 | throw py::value_error(absl::StrCat("The MSA must be rectangular, got ",
29 | msa.ndim(), "-dimensional MSA array."));
30 | }
31 | const int msa_depth = msa.shape()[0];
32 | const int sequence_length = msa.shape()[1];
33 |
34 | py::array_t profile({sequence_length, num_residue_types});
35 | std::fill(profile.mutable_data(), profile.mutable_data() + profile.size(),
36 | 0.0f);
37 | auto profile_unchecked = profile.mutable_unchecked<2>();
38 |
39 | const double normalized_count = 1.0 / msa_depth;
40 | const int* msa_it = msa.data();
41 | for (int row_index = 0; row_index < msa_depth; ++row_index) {
42 | for (int column_index = 0; column_index < sequence_length; ++column_index) {
43 | const int residue_code = *(msa_it++);
44 | if (residue_code < 0 || residue_code >= num_residue_types) {
45 | throw py::value_error(
46 | absl::StrCat("All residue codes must be positive and smaller than "
47 | "num_residue_types ",
48 | num_residue_types, ", got ", residue_code));
49 | }
50 | profile_unchecked(column_index, residue_code) += normalized_count;
51 | }
52 | }
53 | return profile;
54 | }
55 |
56 | constexpr char kComputeMsaProfileDoc[] = R"(
57 | Computes MSA profile for the given encoded MSA.
58 |
59 | Args:
60 | msa: A Numpy array of shape (num_msa, num_res) with the integer coded MSA.
61 | num_residue_types: Integer that determines the number of unique residue types.
62 | This will determine the shape of the output profile.
63 |
64 | Returns:
65 | A float Numpy array of shape (num_res, num_residue_types) with residue
66 | frequency (residue type count normalized by MSA depth) for every column of the
67 | MSA.
68 | )";
69 |
70 | } // namespace
71 |
72 | namespace alphafold3 {
73 |
74 | void RegisterModuleMsaProfile(pybind11::module m) {
75 | m.def("compute_msa_profile", &ComputeMsaProfile, py::arg("msa"),
76 | py::arg("num_residue_types"), py::doc(kComputeMsaProfileDoc + 1));
77 | }
78 |
79 | } // namespace alphafold3
80 |
--------------------------------------------------------------------------------
/src/alphafold3/data/cpp/msa_profile_pybind.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2024 DeepMind Technologies Limited
3 | *
4 | * AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of
5 | * this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/
6 | *
7 | * To request access to the AlphaFold 3 model parameters, follow the process set
8 | * out at https://github.com/google-deepmind/alphafold3. You may only use these
9 | * if received directly from Google. Use is subject to terms of use available at
10 | * https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md
11 | */
12 |
13 | #ifndef ALPHAFOLD3_SRC_ALPHAFOLD3_DATA_PYTHON_MSA_PROFILE_PYBIND_H_
14 | #define ALPHAFOLD3_SRC_ALPHAFOLD3_DATA_PYTHON_MSA_PROFILE_PYBIND_H_
15 |
16 | #include "pybind11/pybind11.h"
17 |
18 | namespace alphafold3 {
19 |
20 | void RegisterModuleMsaProfile(pybind11::module m);
21 |
22 | }
23 |
24 | #endif // ALPHAFOLD3_SRC_ALPHAFOLD3_DATA_PYTHON_MSA_PROFILE_PYBIND_H_
25 |
--------------------------------------------------------------------------------
/src/alphafold3/data/featurisation.py:
--------------------------------------------------------------------------------
1 | # Copyright 2024 DeepMind Technologies Limited
2 | #
3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of
4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/
5 | #
6 | # To request access to the AlphaFold 3 model parameters, follow the process set
7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these
8 | # if received directly from Google. Use is subject to terms of use available at
9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md
10 |
11 | """AlphaFold 3 featurisation pipeline."""
12 |
13 | from collections.abc import Sequence
14 | import datetime
15 | import time
16 |
17 | from alphafold3.common import folding_input
18 | from alphafold3.constants import chemical_components
19 | from alphafold3.model import features
20 | from alphafold3.model.pipeline import pipeline
21 | import numpy as np
22 |
23 |
24 | def validate_fold_input(fold_input: folding_input.Input):
25 | """Validates the fold input contains MSA and templates for featurisation."""
26 | for i, chain in enumerate(fold_input.protein_chains):
27 | if chain.unpaired_msa is None:
28 | raise ValueError(f'Protein chain {i + 1} is missing unpaired MSA.')
29 | if chain.paired_msa is None:
30 | raise ValueError(f'Protein chain {i + 1} is missing paired MSA.')
31 | if chain.templates is None:
32 | raise ValueError(f'Protein chain {i + 1} is missing Templates.')
33 | for i, chain in enumerate(fold_input.rna_chains):
34 | if chain.unpaired_msa is None:
35 | raise ValueError(f'RNA chain {i + 1} is missing unpaired MSA.')
36 |
37 |
38 | def featurise_input(
39 | fold_input: folding_input.Input,
40 | ccd: chemical_components.Ccd,
41 | buckets: Sequence[int] | None,
42 | max_template_date: datetime.date | None = None,
43 | verbose: bool = False,
44 | ) -> Sequence[features.BatchDict]:
45 | """Featurise the folding input.
46 |
47 | Args:
48 | fold_input: The input to featurise.
49 | ccd: The chemical components dictionary.
50 | buckets: Bucket sizes to pad the data to, to avoid excessive re-compilation
51 | of the model. If None, calculate the appropriate bucket size from the
52 | number of tokens. If not None, must be a sequence of at least one integer,
53 | in strictly increasing order. Will raise an error if the number of tokens
54 | is more than the largest bucket size.
55 | max_template_date: Optional max template date to prevent data leakage in
56 | validation.
57 | verbose: Whether to print progress messages.
58 |
59 | Returns:
60 | A featurised batch for each rng_seed in the input.
61 | """
62 | # import pdb; pdb.set_trace()
63 | validate_fold_input(fold_input)
64 |
65 | # Set up data pipeline for single use.
66 | data_pipeline = pipeline.WholePdbPipeline(
67 | config=pipeline.WholePdbPipeline.Config(
68 | buckets=buckets, max_template_date=max_template_date
69 | ),
70 | )
71 |
72 | batches = []
73 | for rng_seed in fold_input.rng_seeds:
74 | featurisation_start_time = time.time()
75 | if verbose:
76 | print(f'Featurising {fold_input.name} with rng_seed {rng_seed}.')
77 | batch = data_pipeline.process_item(
78 | fold_input=fold_input,
79 | ccd=ccd,
80 | random_state=np.random.RandomState(rng_seed),
81 | random_seed=rng_seed,
82 | )
83 | if verbose:
84 | print(
85 | f'Featurising {fold_input.name} with rng_seed {rng_seed} '
86 | f'took {time.time() - featurisation_start_time:.2f} seconds.'
87 | )
88 | batches.append(batch)
89 |
90 | return batches
91 |
--------------------------------------------------------------------------------
/src/alphafold3/data/msa_identifiers.py:
--------------------------------------------------------------------------------
1 | # Copyright 2024 DeepMind Technologies Limited
2 | #
3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of
4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/
5 | #
6 | # To request access to the AlphaFold 3 model parameters, follow the process set
7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these
8 | # if received directly from Google. Use is subject to terms of use available at
9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md
10 |
11 | """Utilities for extracting identifiers from MSA sequence descriptions."""
12 |
13 | import dataclasses
14 | import re
15 |
16 |
17 | # Sequences coming from UniProtKB database come in the
18 | # `db|UniqueIdentifier|EntryName` format, e.g. `tr|A0A146SKV9|A0A146SKV9_FUNHE`
19 | # or `sp|P0C2L1|A3X1_LOXLA` (for TREMBL/Swiss-Prot respectively).
20 | _UNIPROT_PATTERN = re.compile(
21 | r"""
22 | ^
23 | # UniProtKB/TrEMBL or UniProtKB/Swiss-Prot
24 | (?:tr|sp)
25 | \|
26 | # A primary accession number of the UniProtKB entry.
27 | (?P[A-Za-z0-9]{6,10})
28 | # Occasionally there is a _0 or _1 isoform suffix, which we ignore.
29 | (?:_\d)?
30 | \|
31 | # TREMBL repeats the accession ID here. Swiss-Prot has a mnemonic
32 | # protein ID code.
33 | (?:[A-Za-z0-9]+)
34 | _
35 | # A mnemonic species identification code.
36 | (?P([A-Za-z0-9]){1,5})
37 | # Small BFD uses a final value after an underscore, which we ignore.
38 | (?:_\d+)?
39 | $
40 | """,
41 | re.VERBOSE,
42 | )
43 |
44 |
45 | @dataclasses.dataclass(frozen=True)
46 | class Identifiers:
47 | species_id: str = ''
48 |
49 |
50 | def _parse_sequence_identifier(msa_sequence_identifier: str) -> Identifiers:
51 | """Gets species from an msa sequence identifier.
52 |
53 | The sequence identifier has the format specified by
54 | _UNIPROT_TREMBL_ENTRY_NAME_PATTERN or _UNIPROT_SWISSPROT_ENTRY_NAME_PATTERN.
55 | An example of a sequence identifier: `tr|A0A146SKV9|A0A146SKV9_FUNHE`
56 |
57 | Args:
58 | msa_sequence_identifier: a sequence identifier.
59 |
60 | Returns:
61 | An `Identifiers` instance with species_id. These
62 | can be empty in the case where no identifier was found.
63 | """
64 | matches = re.search(_UNIPROT_PATTERN, msa_sequence_identifier.strip())
65 | if matches:
66 | return Identifiers(species_id=matches.group('SpeciesIdentifier'))
67 | return Identifiers()
68 |
69 |
70 | def _extract_sequence_identifier(description: str) -> str | None:
71 | """Extracts sequence identifier from description. Returns None if no match."""
72 | split_description = description.split()
73 | if split_description:
74 | return split_description[0].partition('/')[0]
75 | else:
76 | return None
77 |
78 |
79 | def get_identifiers(description: str) -> Identifiers:
80 | """Computes extra MSA features from the description."""
81 | sequence_identifier = _extract_sequence_identifier(description)
82 | if sequence_identifier is None:
83 | return Identifiers()
84 | else:
85 | return _parse_sequence_identifier(sequence_identifier)
86 |
--------------------------------------------------------------------------------
/src/alphafold3/data/msa_store.py:
--------------------------------------------------------------------------------
1 | # Copyright 2024 DeepMind Technologies Limited
2 | #
3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of
4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/
5 | #
6 | # To request access to the AlphaFold 3 model parameters, follow the process set
7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these
8 | # if received directly from Google. Use is subject to terms of use available at
9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md
10 |
11 | """Interface and implementations for fetching MSA data."""
12 |
13 | from collections.abc import Sequence
14 | from typing import Protocol, TypeAlias
15 |
16 | from alphafold3.data import msa
17 | from alphafold3.data import msa_config
18 |
19 |
20 | MsaErrors: TypeAlias = Sequence[tuple[msa_config.RunConfig, str]]
21 |
22 |
23 | class MsaProvider(Protocol):
24 | """Interface for providing Multiple Sequence Alignments."""
25 |
26 | def __call__(
27 | self,
28 | query_sequence: str,
29 | chain_polymer_type: str,
30 | ) -> tuple[msa.Msa, MsaErrors]:
31 | """Retrieve MSA for the given polymer query_sequence.
32 |
33 | Args:
34 | query_sequence: The residue sequence of the polymer to search for.
35 | chain_polymer_type: The polymer type of the query_sequence. This must
36 | match the chain_polymer_type of the provider.
37 |
38 | Returns:
39 | A tuple containing the MSA and MsaErrors. MsaErrors is a Sequence
40 | containing a tuple for each msa_query that failed. Each tuple contains
41 | the failing query and the associated error message.
42 | """
43 |
44 |
45 | class EmptyMsaProvider:
46 | """MSA provider that returns just the query sequence, useful for testing."""
47 |
48 | def __init__(self, chain_polymer_type: str):
49 | self._chain_polymer_type = chain_polymer_type
50 |
51 | def __call__(
52 | self, query_sequence: str, chain_polymer_type: str
53 | ) -> tuple[msa.Msa, MsaErrors]:
54 | """Returns an MSA containing just the query sequence, never errors."""
55 | if chain_polymer_type != self._chain_polymer_type:
56 | raise ValueError(
57 | f'EmptyMsaProvider of type {self._chain_polymer_type} called with '
58 | f'sequence of {chain_polymer_type=}, {query_sequence=}.'
59 | )
60 | return (
61 | msa.Msa.from_empty(
62 | query_sequence=query_sequence,
63 | chain_poly_type=self._chain_polymer_type,
64 | ),
65 | (),
66 | )
67 |
--------------------------------------------------------------------------------
/src/alphafold3/data/structure_stores.py:
--------------------------------------------------------------------------------
1 | # Copyright 2024 DeepMind Technologies Limited
2 | #
3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of
4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/
5 | #
6 | # To request access to the AlphaFold 3 model parameters, follow the process set
7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these
8 | # if received directly from Google. Use is subject to terms of use available at
9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md
10 |
11 | """Library for loading structure data from various sources."""
12 |
13 | from collections.abc import Mapping, Sequence
14 | import functools
15 | import os
16 | import pathlib
17 | import tarfile
18 |
19 |
20 | class NotFoundError(KeyError):
21 | """Raised when the structure store doesn't contain the requested target."""
22 |
23 |
24 | class StructureStore:
25 | """Handles the retrieval of mmCIF files from a filesystem."""
26 |
27 | def __init__(
28 | self,
29 | structures: str | os.PathLike[str] | Mapping[str, str],
30 | ):
31 | """Initialises the instance.
32 |
33 | Args:
34 | structures: Path of the directory where the mmCIF files are or a Mapping
35 | from target name to mmCIF string.
36 | """
37 | if isinstance(structures, Mapping):
38 | self._structure_mapping = structures
39 | self._structure_path = None
40 | self._structure_tar = None
41 | else:
42 | self._structure_mapping = None
43 | path_str = os.fspath(structures)
44 | if path_str.endswith('.tar'):
45 | self._structure_tar = tarfile.open(path_str, 'r')
46 | self._structure_path = None
47 | else:
48 | self._structure_path = pathlib.Path(structures)
49 | self._structure_tar = None
50 |
51 | @functools.cached_property
52 | def _tar_members(self) -> Mapping[str, tarfile.TarInfo]:
53 | assert self._structure_tar is not None
54 | return {
55 | path.stem: tarinfo
56 | for tarinfo in self._structure_tar.getmembers()
57 | if tarinfo.isfile()
58 | and (path := pathlib.Path(tarinfo.path.lower())).suffix == '.cif'
59 | }
60 |
61 | def get_mmcif_str(self, target_name: str) -> str:
62 | """Returns an mmCIF for a given `target_name`.
63 |
64 | Args:
65 | target_name: Name specifying the target mmCIF.
66 |
67 | Raises:
68 | NotFoundError: If the target is not found.
69 | """
70 | if self._structure_mapping is not None:
71 | try:
72 | return self._structure_mapping[target_name]
73 | except KeyError as e:
74 | raise NotFoundError(f'{target_name=} not found') from e
75 |
76 | if self._structure_tar is not None:
77 | try:
78 | member = self._tar_members[target_name]
79 | if struct_file := self._structure_tar.extractfile(member):
80 | return struct_file.read().decode()
81 | else:
82 | raise NotFoundError(f'{target_name=} not found')
83 | except KeyError:
84 | raise NotFoundError(f'{target_name=} not found') from None
85 |
86 | filepath = self._structure_path / f'{target_name}.cif'
87 | try:
88 | return filepath.read_text()
89 | except FileNotFoundError as e:
90 | raise NotFoundError(f'{target_name=} not found at {filepath=}') from e
91 |
92 | def target_names(self) -> Sequence[str]:
93 | """Returns all targets in the store."""
94 | if self._structure_mapping is not None:
95 | return [*self._structure_mapping.keys()]
96 | elif self._structure_tar is not None:
97 | return sorted(self._tar_members.keys())
98 | elif self._structure_path is not None:
99 | return sorted([path.stem for path in self._structure_path.glob('*.cif')])
100 | return ()
101 |
--------------------------------------------------------------------------------
/src/alphafold3/data/template_store.py:
--------------------------------------------------------------------------------
1 | # Copyright 2024 DeepMind Technologies Limited
2 | #
3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of
4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/
5 | #
6 | # To request access to the AlphaFold 3 model parameters, follow the process set
7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these
8 | # if received directly from Google. Use is subject to terms of use available at
9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md
10 |
11 | """Interface and implementations for fetching templates data."""
12 |
13 | from collections.abc import Mapping
14 | import datetime
15 | from typing import Any, Protocol, TypeAlias
16 |
17 |
18 | TemplateFeatures: TypeAlias = Mapping[str, Any]
19 |
20 |
21 | class TemplateFeatureProvider(Protocol):
22 | """Interface for providing Template Features."""
23 |
24 | def __call__(
25 | self,
26 | sequence: str,
27 | release_date: datetime.date | None,
28 | include_ligand_features: bool = True,
29 | ) -> TemplateFeatures:
30 | """Retrieve template features for the given sequence and release_date.
31 |
32 | Args:
33 | sequence: The residue sequence of the query.
34 | release_date: The release_date of the template query, this is used to
35 | filter templates for training, ensuring that they do not leak structure
36 | information from the future.
37 | include_ligand_features: Whether to include ligand features.
38 |
39 | Returns:
40 | Template features: A mapping of template feature labels to features, which
41 | may be numpy arrays, bytes objects, or for the special case of label
42 | `ligand_features`, a nested feature map of labels to numpy arrays.
43 |
44 | Raises:
45 | TemplateRetrievalError if the template features were not found.
46 | """
47 |
--------------------------------------------------------------------------------
/src/alphafold3/data/tools/__pycache__/hmmalign.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/data/tools/__pycache__/hmmalign.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/data/tools/__pycache__/hmmbuild.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/data/tools/__pycache__/hmmbuild.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/data/tools/__pycache__/hmmsearch.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/data/tools/__pycache__/hmmsearch.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/data/tools/__pycache__/jackhmmer.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/data/tools/__pycache__/jackhmmer.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/data/tools/__pycache__/msa_tool.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/data/tools/__pycache__/msa_tool.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/data/tools/__pycache__/nhmmer.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/data/tools/__pycache__/nhmmer.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/data/tools/__pycache__/rdkit_utils.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/data/tools/__pycache__/rdkit_utils.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/data/tools/__pycache__/subprocess_utils.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/data/tools/__pycache__/subprocess_utils.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/data/tools/msa_tool.py:
--------------------------------------------------------------------------------
1 | # Copyright 2024 DeepMind Technologies Limited
2 | #
3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of
4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/
5 | #
6 | # To request access to the AlphaFold 3 model parameters, follow the process set
7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these
8 | # if received directly from Google. Use is subject to terms of use available at
9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md
10 |
11 | """Defines protocol for MSA tools."""
12 |
13 | import dataclasses
14 | from typing import Protocol
15 |
16 |
17 | @dataclasses.dataclass(frozen=True, slots=True, kw_only=True)
18 | class MsaToolResult:
19 | """The result of a MSA tool query."""
20 |
21 | target_sequence: str
22 | e_value: float
23 | a3m: str
24 |
25 |
26 | class MsaTool(Protocol):
27 | """Interface for MSA tools."""
28 |
29 | def query(self, target_sequence: str) -> MsaToolResult:
30 | """Runs the MSA tool on the target sequence."""
31 |
--------------------------------------------------------------------------------
/src/alphafold3/data/tools/subprocess_utils.py:
--------------------------------------------------------------------------------
1 | # Copyright 2024 DeepMind Technologies Limited
2 | #
3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of
4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/
5 | #
6 | # To request access to the AlphaFold 3 model parameters, follow the process set
7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these
8 | # if received directly from Google. Use is subject to terms of use available at
9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md
10 |
11 | """Helper functions for launching external tools."""
12 |
13 | from collections.abc import Sequence
14 | import os
15 | import subprocess
16 | import time
17 | from typing import Any
18 |
19 | from absl import logging
20 |
21 |
22 | def create_query_fasta_file(sequence: str, path: str, linewidth: int = 80):
23 | """Creates a fasta file with the sequence with line width limit."""
24 | with open(path, 'w') as f:
25 | f.write('>query\n')
26 |
27 | i = 0
28 | while i < len(sequence):
29 | f.write(f'{sequence[i:(i + linewidth)]}\n')
30 | i += linewidth
31 |
32 |
33 | def check_binary_exists(path: str, name: str) -> None:
34 | """Checks if a binary exists on the given path and raises otherwise."""
35 | if not os.path.exists(path):
36 | raise RuntimeError(f'{name} binary not found at {path}')
37 |
38 |
39 | def run(
40 | cmd: Sequence[str],
41 | cmd_name: str,
42 | log_on_process_error: bool = False,
43 | log_stderr: bool = False,
44 | log_stdout: bool = False,
45 | max_out_streams_len: int | None = 500_000,
46 | **run_kwargs,
47 | ) -> subprocess.CompletedProcess[Any]:
48 | """Launches a subprocess, times it, and checks for errors.
49 |
50 | Args:
51 | cmd: Command to launch.
52 | cmd_name: Human-readable command name to be used in logs.
53 | log_on_process_error: Whether to use `logging.error` to log the process'
54 | stderr on failure.
55 | log_stderr: Whether to log the stderr of the command.
56 | log_stdout: Whether to log the stdout of the command.
57 | max_out_streams_len: Max length of prefix of stdout and stderr included in
58 | the exception message. Set to `None` to disable truncation.
59 | **run_kwargs: Any other kwargs for `subprocess.run`.
60 |
61 | Returns:
62 | The completed process object.
63 |
64 | Raises:
65 | RuntimeError: if the process completes with a non-zero return code.
66 | """
67 |
68 | logging.info('Launching subprocess "%s"', ' '.join(cmd))
69 |
70 | start_time = time.time()
71 | try:
72 | completed_process = subprocess.run(
73 | cmd,
74 | check=True,
75 | stderr=subprocess.PIPE,
76 | stdout=subprocess.PIPE,
77 | text=True,
78 | **run_kwargs,
79 | )
80 | except subprocess.CalledProcessError as e:
81 | if log_on_process_error:
82 | # Logs have a 15k character limit, so log the error line by line.
83 | logging.error('%s failed. %s stderr begin:', cmd_name, cmd_name)
84 | for error_line in e.stderr.splitlines():
85 | if stripped_error_line := error_line.strip():
86 | logging.error(stripped_error_line)
87 | logging.error('%s stderr end.', cmd_name)
88 |
89 | error_msg = (
90 | f'{cmd_name} failed'
91 | f'\nstdout:\n{e.stdout[:max_out_streams_len]}\n'
92 | f'\nstderr:\n{e.stderr[:max_out_streams_len]}'
93 | )
94 | raise RuntimeError(error_msg) from e
95 | end_time = time.time()
96 |
97 | logging.info('Finished %s in %.3f seconds', cmd_name, end_time - start_time)
98 | stdout, stderr = completed_process.stdout, completed_process.stderr
99 |
100 | if log_stdout and stdout:
101 | logging.info('%s stdout:\n%s', cmd_name, stdout)
102 |
103 | if log_stderr and stderr:
104 | logging.info('%s stderr:\n%s', cmd_name, stderr)
105 |
106 | return completed_process
107 |
--------------------------------------------------------------------------------
/src/alphafold3/jax/attention/__pycache__/attention.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/jax/attention/__pycache__/attention.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/jax/attention/__pycache__/attention_base.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/jax/attention/__pycache__/attention_base.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/jax/attention/__pycache__/flash_attention.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/jax/attention/__pycache__/flash_attention.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/jax/attention/__pycache__/xla_attention.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/jax/attention/__pycache__/xla_attention.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/jax/attention/attention_call_arg_specs.py:
--------------------------------------------------------------------------------
1 | # Copyright 2024 DeepMind Technologies Limited
2 | #
3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of
4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/
5 | #
6 | # To request access to the AlphaFold 3 model parameters, follow the process set
7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these
8 | # if received directly from Google. Use is subject to terms of use available at
9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md
10 |
11 | """Attention call argument specifications.
12 |
13 | Attention argument specifications used by users of the library.
14 | They are the most important test cases, and also cases for optimize
15 | performance of via autotuning.
16 | """
17 |
18 | from typing import Any
19 |
20 | import jax
21 |
22 | ShapedArray = jax.ShapeDtypeStruct
23 |
24 |
25 | def _make_argspec(
26 | *,
27 | q_shape,
28 | dtype,
29 | k_shape=None,
30 | v_shape=None,
31 | bias_shape=None,
32 | mask_shape=None,
33 | **kwargs,
34 | ) -> dict[str, Any]:
35 | """Make argspec from shapes and kwargs."""
36 | if k_shape is None:
37 | k_shape = q_shape
38 | if v_shape is None:
39 | v_shape = k_shape
40 |
41 | return dict(
42 | query=ShapedArray(q_shape, dtype),
43 | key=ShapedArray(k_shape, dtype),
44 | value=ShapedArray(v_shape, dtype),
45 | bias=ShapedArray(bias_shape, dtype) if bias_shape is not None else None,
46 | mask=ShapedArray(mask_shape, 'bool_') if mask_shape is not None else None,
47 | **kwargs,
48 | )
49 |
50 |
51 | # A subset of the full set of argument specifications. Useful for tap-tests and
52 | # microbenchmarks.
53 | CALL_ARG_SPECS = dict(
54 | vanilla_f32=_make_argspec(q_shape=(8, 1024, 4, 128), dtype='float32'),
55 | vanilla_bf16=_make_argspec(q_shape=(8, 1024, 4, 128), dtype='bfloat16'),
56 | alphafold=_make_argspec(
57 | q_shape=(384, 384, 4, 32),
58 | bias_shape=(1, 4, 384, 384),
59 | mask_shape=(384, 1, 1, 384),
60 | dtype='bfloat16',
61 | ),
62 | )
63 |
--------------------------------------------------------------------------------
/src/alphafold3/jax/common/__pycache__/array_view.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/jax/common/__pycache__/array_view.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/jax/common/__pycache__/precision.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/jax/common/__pycache__/precision.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/jax/common/__pycache__/triton_utils.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/jax/common/__pycache__/triton_utils.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/jax/common/precision.py:
--------------------------------------------------------------------------------
1 | # Copyright 2024 DeepMind Technologies Limited
2 | #
3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of
4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/
5 | #
6 | # To request access to the AlphaFold 3 model parameters, follow the process set
7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these
8 | # if received directly from Google. Use is subject to terms of use available at
9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md
10 |
11 | """Precision classes and utilities."""
12 |
13 | import enum
14 |
15 | import jax
16 | import jax.numpy as jnp
17 |
18 |
19 | @enum.unique
20 | class DotPrecision(enum.Enum):
21 | """Precision for `dot` operation.
22 |
23 | Naming scheme: {OPERAND_DTYPE}_{ACCUMULATOR_DTYPE}[_{NUM_PASSES}x]
24 | """
25 |
26 | BF16_F32 = "bf16_f32"
27 |
28 | # GPU only precisions.
29 | F32_F32 = "f32_f32" # Full f32 precision (doesn't use TensorCores).
30 | TF32_F32 = "tf32_f32" # Equivalent to `DEFAULT`/`HIGH` on GPU.
31 | TF32_F32_3X = "tf32_f32_3x"
32 | F16_F16 = "f16_f16"
33 | F16_F32 = "f16_f32"
34 |
35 | @property
36 | def operand_dtype(self) -> jnp.dtype:
37 | match self:
38 | case DotPrecision.BF16_F32:
39 | return jnp.bfloat16
40 | case DotPrecision.F16_F16 | DotPrecision.F16_F32:
41 | return jnp.float16
42 | case _:
43 | return jnp.float32
44 |
45 | @property
46 | def accumulator_dtype(self) -> jnp.dtype:
47 | return jnp.float16 if (self == DotPrecision.F16_F16) else jnp.float32
48 |
49 |
50 | _JAX_GPU_PRECISION_MAP = {
51 | (jnp.float16, jax.lax.Precision.DEFAULT): DotPrecision.F16_F32,
52 | (jnp.bfloat16, jax.lax.Precision.DEFAULT): DotPrecision.BF16_F32,
53 | (jnp.float32, jax.lax.Precision.DEFAULT): DotPrecision.TF32_F32,
54 | (jnp.float32, jax.lax.Precision.HIGH): DotPrecision.TF32_F32,
55 | (jnp.float32, jax.lax.Precision.HIGHEST): DotPrecision.F32_F32,
56 | }
57 |
58 | _JAX_CPU_PRECISION_MAP = {
59 | (jnp.float16, jax.lax.Precision.DEFAULT): DotPrecision.F16_F32,
60 | (jnp.bfloat16, jax.lax.Precision.DEFAULT): DotPrecision.F32_F32,
61 | (jnp.float32, jax.lax.Precision.DEFAULT): DotPrecision.F32_F32,
62 | (jnp.float32, jax.lax.Precision.HIGH): DotPrecision.F32_F32,
63 | (jnp.float32, jax.lax.Precision.HIGHEST): DotPrecision.F32_F32,
64 | }
65 |
66 |
67 | def _create_jax_precision_map():
68 | precision_map = {}
69 | for (dtype, jax_precision), dot_precision in _JAX_GPU_PRECISION_MAP.items():
70 | precision_map[("gpu", jnp.dtype(dtype), jax_precision)] = dot_precision
71 | for (dtype, jax_precision), dot_precision in _JAX_CPU_PRECISION_MAP.items():
72 | precision_map[("cpu", jnp.dtype(dtype), jax_precision)] = dot_precision
73 | return precision_map
74 |
75 |
76 | _JAX_PRECISION_MAP = _create_jax_precision_map()
77 |
78 |
79 | def get_equivalent_dot_precision(
80 | a_dtype: jnp.dtype, b_dtype: jnp.dtype, jax_precision: jax.lax.Precision
81 | ) -> DotPrecision:
82 | """Returns `DotPrecision` replicating default XLA behaviour."""
83 | if a_dtype != b_dtype:
84 | raise ValueError("Cannot infer precision if operand types differ.")
85 |
86 | backend = jax.default_backend().lower()
87 | if (jax_precision != jax.lax.Precision.DEFAULT) and (a_dtype != jnp.float32):
88 | raise ValueError(
89 | "`jax.lax.Precision` values other than `DEFAULT` only have an effect if"
90 | " the operand type is `float32`."
91 | )
92 | return _JAX_PRECISION_MAP[(backend, a_dtype, jax_precision)]
93 |
--------------------------------------------------------------------------------
/src/alphafold3/jax/common/triton_utils.py:
--------------------------------------------------------------------------------
1 | # Copyright 2024 DeepMind Technologies Limited
2 | #
3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of
4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/
5 | #
6 | # To request access to the AlphaFold 3 model parameters, follow the process set
7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these
8 | # if received directly from Google. Use is subject to terms of use available at
9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md
10 |
11 | """Triton utils."""
12 |
13 | from collections.abc import Callable, Mapping
14 |
15 | from alphafold3.jax.common import precision as precision_lib
16 | import jax
17 | import jax.numpy as jnp
18 | import triton
19 | import triton.language as tl
20 |
21 |
22 | _JNP_TO_TL_DTYPES: Mapping[jnp.dtype, tl.dtype] = {
23 | jnp.bool_: tl.int1,
24 | jnp.int8: tl.int8,
25 | jnp.int16: tl.int16,
26 | jnp.int32: tl.int32,
27 | jnp.int64: tl.int64,
28 | jnp.uint8: tl.uint8,
29 | jnp.uint16: tl.uint16,
30 | jnp.uint32: tl.uint32,
31 | jnp.uint64: tl.uint64,
32 | jnp.float16: tl.float16,
33 | jnp.bfloat16: tl.bfloat16,
34 | jnp.float32: tl.float32,
35 | jnp.float64: tl.float64,
36 | }
37 |
38 |
39 | def jnp_to_tl_dtype(jnp_dtype: jnp.dtype) -> tl.dtype:
40 | return _JNP_TO_TL_DTYPES[jnp_dtype]
41 |
42 |
43 | def get_tl_dot_fn(
44 | precision: precision_lib.DotPrecision,
45 | ) -> Callable[..., tl.tensor]:
46 | """Returns a tl `dot` implementation with the specified precision.
47 |
48 | Args:
49 | precision: The `dot` precision.
50 | """
51 | if not is_precision_supported(precision):
52 | raise ValueError(f'Unsupported dot precision: {precision}')
53 |
54 | if precision == precision_lib.DotPrecision.TF32_F32_3X:
55 | return _dot_tf32_f32_3x
56 |
57 | in_dtype = jnp_to_tl_dtype(precision.operand_dtype)
58 | out_dtype = jnp_to_tl_dtype(precision.accumulator_dtype)
59 | allow_tf32 = precision == precision_lib.DotPrecision.TF32_F32
60 |
61 | @tl.core.extern
62 | def _dot_fn(
63 | a: tl.core.tensor,
64 | b: tl.core.tensor,
65 | *,
66 | trans_a: bool = False,
67 | trans_b: bool = False,
68 | _builder,
69 | ):
70 | if in_dtype == tl.float32:
71 | tl.static_assert(a.dtype == tl.float32, _builder=_builder)
72 | tl.static_assert(b.dtype == tl.float32, _builder=_builder)
73 | else:
74 | tl.static_assert(a.dtype.is_standard_floating(), _builder=_builder)
75 | tl.static_assert(b.dtype.is_standard_floating(), _builder=_builder)
76 | a = a.to(in_dtype, _builder=_builder)
77 | b = b.to(in_dtype, _builder=_builder)
78 | a = tl.trans(a, _builder=_builder) if trans_a else a
79 | b = tl.trans(b, _builder=_builder) if trans_b else b
80 | return tl.dot(
81 | a, b, allow_tf32=allow_tf32, out_dtype=out_dtype, _builder=_builder
82 | )
83 |
84 | return _dot_fn
85 |
86 |
87 | def is_precision_supported(precision: precision_lib.DotPrecision) -> bool:
88 | return precision in {
89 | precision_lib.DotPrecision.F32_F32,
90 | precision_lib.DotPrecision.TF32_F32,
91 | precision_lib.DotPrecision.F16_F32,
92 | precision_lib.DotPrecision.BF16_F32,
93 | precision_lib.DotPrecision.TF32_F32_3X,
94 | }
95 |
96 |
97 | @triton.jit
98 | def _dot_tf32_f32_3x(a, b, trans_a=False, trans_b=False):
99 | """Perform the 3-pass tf32 dot function."""
100 | tl.static_assert(a.dtype == tl.float32)
101 | tl.static_assert(b.dtype == tl.float32)
102 | a_ = (a.to(tl.uint32, bitcast=True) & 0xFFFFE000).to(tl.float32, bitcast=True)
103 | b_ = (b.to(tl.uint32, bitcast=True) & 0xFFFFE000).to(tl.float32, bitcast=True)
104 | a_err = a - a_
105 | b_err = b - b_
106 | if trans_a:
107 | a_ = tl.trans(a_)
108 | a_err = tl.trans(a_err)
109 | if trans_b:
110 | b_ = tl.trans(b_)
111 | b_err = tl.trans(b_err)
112 | # Add smallest terms first for better accuracy.
113 | return tl.dot(a_, b_, out_dtype=tl.float32) + (
114 | tl.dot(a_, b_err, out_dtype=tl.float32)
115 | + tl.dot(a_err, b_, out_dtype=tl.float32)
116 | )
117 |
118 |
119 | def has_triton_support() -> bool:
120 | """Returns True if Triton is supported by the default JAX device."""
121 | if jax.default_backend() != 'gpu':
122 | return False
123 |
124 | # Only currently supported for Ampere and above.
125 | return float(jax.devices()[0].compute_capability) >= 8.0
126 |
--------------------------------------------------------------------------------
/src/alphafold3/jax/gated_linear_unit/__pycache__/block.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/jax/gated_linear_unit/__pycache__/block.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/jax/gated_linear_unit/__pycache__/gated_linear_unit.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/jax/gated_linear_unit/__pycache__/gated_linear_unit.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/jax/gated_linear_unit/__pycache__/gated_linear_unit_base.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/jax/gated_linear_unit/__pycache__/gated_linear_unit_base.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/jax/gated_linear_unit/__pycache__/matmul_config.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/jax/gated_linear_unit/__pycache__/matmul_config.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/jax/gated_linear_unit/__pycache__/matmul_ext.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/jax/gated_linear_unit/__pycache__/matmul_ext.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/jax/gated_linear_unit/block.py:
--------------------------------------------------------------------------------
1 | # Copyright 2024 DeepMind Technologies Limited
2 | #
3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of
4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/
5 | #
6 | # To request access to the AlphaFold 3 model parameters, follow the process set
7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these
8 | # if received directly from Google. Use is subject to terms of use available at
9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md
10 |
11 | """Pallas block load / store utilities."""
12 |
13 | from collections.abc import Sequence
14 | from typing import Any, TypeAlias
15 |
16 | from alphafold3.jax.common import array_view
17 | import jax
18 | import jax.experimental
19 | from jax.experimental import pallas as pl
20 | import jax.numpy as jnp
21 | import jaxtyping
22 | from jaxtyping import Int # pylint: disable=g-importing-member
23 | import numpy as np
24 | import typeguard
25 |
26 | ArrayT: TypeAlias = Any
27 | ScalarInt: TypeAlias = (
28 | Int[ArrayT, ""] | Int[np.generic, ""] | Int[jnp.generic, ""]
29 | )
30 |
31 |
32 | @jaxtyping.jaxtyped(typechecker=typeguard.typechecked)
33 | def load_block(
34 | ref,
35 | idx: Sequence[int | ScalarInt],
36 | *,
37 | block_shape: Sequence[int | None],
38 | other=None,
39 | **kwargs,
40 | ) -> jax.Array:
41 | """Loads a block from the given `ref`, masking where necessary."""
42 | idx, mask = _get_block_indexer_and_mask(ref, idx, block_shape=block_shape)
43 | if isinstance(ref, array_view.ArrayView):
44 | idx = ref[idx].offsets
45 | ref = ref.base
46 | other = None if mask is None else other
47 | with jax.experimental.enable_x64():
48 | return pl.load(ref, idx, mask=mask, other=other, **kwargs)
49 |
50 |
51 | @jaxtyping.jaxtyped(typechecker=typeguard.typechecked)
52 | def store_block(
53 | ref,
54 | val: jax.Array,
55 | idx: Sequence[int | ScalarInt],
56 | *,
57 | block_shape: Sequence[int | None] | None = None,
58 | **kwargs,
59 | ):
60 | """Stores a block from the given `ref`, masking where necessary."""
61 | if block_shape is None:
62 | block_shape = val.shape
63 | idx, mask = _get_block_indexer_and_mask(ref, idx, block_shape=block_shape)
64 | if isinstance(ref, array_view.ArrayView):
65 | idx = ref[idx].offsets
66 | ref = ref.base
67 | with jax.experimental.enable_x64():
68 | pl.store(ref, idx, val.astype(ref.dtype), mask=mask, **kwargs)
69 |
70 |
71 | def in_bounds_mask(
72 | idx: Sequence[int | slice | pl.Slice | jax.Array],
73 | shape: Sequence[int],
74 | *,
75 | check: Sequence[bool] | None = None,
76 | ) -> jax.Array | None:
77 | """Returns a boolean mask denoting which indices are within bounds.
78 |
79 | Args:
80 | idx: Indices for each dimension.
81 | shape: Shape designating the valid bounds.
82 | check: Whether or not to check bounds in each dimension. Useful for ignoring
83 | indices known to be in bounds. Defaults to all True.
84 | """
85 | if check is None:
86 | check = [True] * len(shape)
87 |
88 | # Remove `int` indexed dims (mask shape must match slice result shape).
89 | shape = [dim for i, dim in enumerate(shape) if not isinstance(idx[i], int)]
90 | check = [chk for i, chk in enumerate(check) if not isinstance(idx[i], int)]
91 | idx = [idx for idx in idx if not isinstance(idx, int)]
92 |
93 | mask = None
94 | for i, (dim_idx, dim, chk) in enumerate(zip(idx, shape, check, strict=True)):
95 | if not chk:
96 | continue
97 |
98 | if isinstance(dim_idx, slice):
99 | dim_idx = pl.Slice.from_slice(dim_idx, dim)
100 | if isinstance(dim_idx, pl.Slice):
101 | dim_idx = dim_idx.start + dim_idx.stride * jnp.arange(dim_idx.size)
102 | if dim_idx.ndim != 1:
103 | raise NotImplementedError("Only one-dimensional indices are supported.")
104 |
105 | bcast_axes = [a for a in range(len(shape)) if a != i]
106 | dim_mask = jnp.expand_dims(dim_idx < dim, bcast_axes)
107 | mask = dim_mask if mask is None else (mask & dim_mask)
108 | return mask
109 |
110 |
111 | def _get_block_indexer_and_mask(
112 | ref, idx: Sequence[int | ScalarInt], *, block_shape: Sequence[int | None]
113 | ) -> tuple[tuple[int | slice | pl.Slice, ...], jax.Array | None]:
114 | """Return indices and mask for loading / storing a block."""
115 | shape = ref.shape
116 | idxs = []
117 | check = []
118 | for dim, block_idx, block_dim in zip(shape, idx, block_shape, strict=True):
119 | if block_dim is None:
120 | idxs.append(block_idx)
121 | check.append(False)
122 | else:
123 | idxs.append(pl.dslice(block_dim * block_idx, block_dim))
124 | check.append(dim % block_dim != 0)
125 |
126 | return tuple(idxs), in_bounds_mask(idxs, shape, check=check)
127 |
--------------------------------------------------------------------------------
/src/alphafold3/jax/gated_linear_unit/gated_linear_unit_base.py:
--------------------------------------------------------------------------------
1 | # Copyright 2024 DeepMind Technologies Limited
2 | #
3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of
4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/
5 | #
6 | # To request access to the AlphaFold 3 model parameters, follow the process set
7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these
8 | # if received directly from Google. Use is subject to terms of use available at
9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md
10 |
11 | """Common types for gated linear unit kernels."""
12 |
13 | import abc
14 | from collections.abc import Callable
15 | import functools
16 | from typing import Any
17 |
18 | import jax
19 | import jax.numpy as jnp
20 | import jaxtyping
21 | from jaxtyping import Array, Float # pylint: disable=g-importing-member,g-multiple-import
22 | import typeguard
23 |
24 |
25 | class GatedLinearUnit(abc.ABC):
26 | """Gated linear unit."""
27 |
28 | def __call__(
29 | self,
30 | x: Float[Array, '*B M K'],
31 | weight: Float[Array, 'K 2 N'],
32 | *,
33 | activation: Callable[[jax.Array], jax.Array] | None = None,
34 | precision: jax.lax.Precision | None = None,
35 | **kwargs,
36 | ) -> Float[Array, '*B M N']:
37 | """Applies a gated linear unit (https://arxiv.org/abs/1612.08083).
38 |
39 | Computes `activation(x @ weight[:, 0]) * x @ weight[:, 1]`.
40 |
41 | Args:
42 | x: the input array.
43 | weight: the combined weight array.
44 | activation: optional activation function.
45 | precision: specifies the matrix multiplication precision. Either `None`
46 | (default), which means the default precision for the backend, or a
47 | `jax.lax.Precision` enum.
48 | **kwargs: additional keyword arguments.
49 |
50 | Returns:
51 | The output array.
52 | """
53 | return self._fwd(
54 | x, weight, activation=activation, precision=precision, **kwargs
55 | )
56 |
57 | # Default vmap rule.
58 | @property
59 | def vmap_rule_forward(self) -> Callable[..., Any]:
60 | def _vmap_rule(
61 | axis_size, in_batched, *args, fn: jax.custom_batching.custom_vmap
62 | ):
63 | sequential_vmap = jax.custom_batching.sequential_vmap(fn.fun)
64 | return sequential_vmap.vmap_rule(axis_size, in_batched, *args)
65 |
66 | return _vmap_rule
67 |
68 | def apply_vmap_rule_forward(
69 | self, fn: Callable[..., Any], **kwargs
70 | ) -> jax.custom_batching.custom_vmap:
71 | fn_closed = functools.partial(fn, **kwargs)
72 | fn_closed = jax.custom_batching.custom_vmap(fn_closed)
73 | vmap_rule = functools.partial(self.vmap_rule_forward, fn=fn_closed)
74 | fn_closed.def_vmap(vmap_rule)
75 | return fn_closed
76 |
77 | @abc.abstractmethod
78 | def _fwd(
79 | self,
80 | x: Float[Array, '*B M K'],
81 | weight: Float[Array, 'K 2 N'],
82 | *,
83 | activation: Callable[[jax.Array], jax.Array] | None,
84 | precision: jax.lax.Precision | None,
85 | ) -> Float[Array, '*B M N']:
86 | """Gated linear unit."""
87 | ...
88 |
89 |
90 | @jaxtyping.jaxtyped(typechecker=typeguard.typechecked)
91 | def gated_linear_unit_xla(
92 | x: Float[Array, '*B M K'],
93 | weight: Float[Array, 'K 2 N'],
94 | *,
95 | activation: Callable[[jax.Array], jax.Array] | None = None,
96 | precision: jax.lax.Precision | None = None,
97 | ) -> Float[Array, '*B M N']:
98 | """Applies a gated linear unit (https://arxiv.org/abs/1612.08083).
99 |
100 | Computes `activation(x @ weight[:, 0]) * x @ weight[:, 1]`.
101 |
102 | This is SwiGLU when `activation=jax.nn.swish`, GEGLU when
103 | `activation=jax.nn.gelu`, REGLU when `activation=jax.nn.relu`, and GLU when
104 | `activation=jax.nn.sigmoid` (https://arxiv.org/abs/2002.05202).
105 |
106 | Args:
107 | x: the input array.
108 | weight: the combined weight array.
109 | activation: optional activation function.
110 | precision: specifies the matrix multiplication precision. Either `None`
111 | (default), which means the default precision for the backend, or a
112 | `jax.lax.Precision` enum.
113 |
114 | Returns:
115 | The output array.
116 | """
117 |
118 | weight_reshaped = jax.lax.collapse(
119 | weight, start_dimension=-2, stop_dimension=None
120 | )
121 | assert weight_reshaped.ndim == 2
122 |
123 | y = jnp.dot(x, weight_reshaped, precision=precision)
124 |
125 | # Apply activation and compute product of FP8/FP16/BF16 in FP32.
126 | y = y.astype(jnp.promote_types(x.dtype, jnp.float32))
127 | a, b = jnp.split(y, 2, axis=-1)
128 | out = a * b if activation is None else activation(a) * b
129 | out = out.astype(x.dtype)
130 | return out
131 |
--------------------------------------------------------------------------------
/src/alphafold3/jax/gated_linear_unit/matmul_config.py:
--------------------------------------------------------------------------------
1 | # Copyright 2024 DeepMind Technologies Limited
2 | #
3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of
4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/
5 | #
6 | # To request access to the AlphaFold 3 model parameters, follow the process set
7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these
8 | # if received directly from Google. Use is subject to terms of use available at
9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md
10 |
11 | """Auto-tuned configs for matmul."""
12 |
13 | import dataclasses
14 | import functools
15 | import math
16 |
17 | import jax
18 | from jax.experimental import pallas as pl
19 |
20 |
21 | @dataclasses.dataclass(frozen=True, kw_only=True)
22 | class Config:
23 | block_m: int
24 | block_n: int
25 | block_k: int
26 | num_warps: int
27 | num_stages: int
28 |
29 |
30 | @functools.cache
31 | def _get_best_block_size(
32 | m: int, n: int, k: int, core_count: int
33 | ) -> tuple[int, int, int]:
34 | """Returns the best block size for the given shape."""
35 | min_block_dim = 32
36 | block_m = min(max(min_block_dim, pl.next_power_of_2(m)), 128)
37 | block_n = min(max(min_block_dim, pl.next_power_of_2(n)), 256)
38 | block_n = min(block_n, (128 * 128) // block_m)
39 | block_k = 32
40 | split_k = 1
41 | num_blocks = pl.cdiv(m, block_m) * pl.cdiv(n, block_n)
42 | while num_blocks < core_count:
43 | if block_m > min_block_dim:
44 | block_m //= 2
45 | num_blocks = pl.cdiv(m, block_m) * pl.cdiv(n, block_n)
46 | elif split_k * block_k < pl.next_power_of_2(k):
47 | split_k *= 2
48 | num_blocks *= 2
49 | else:
50 | break
51 | return block_m, block_n, block_k
52 |
53 |
54 | def _abstractify(x):
55 | return jax.api_util.shaped_abstractify(x) if isinstance(x, jax.Array) else x
56 |
57 |
58 | def get_config(
59 | x: jax.Array, w: jax.Array, core_count: int | None = None
60 | ) -> Config:
61 | """Returns a config for the given args."""
62 | if core_count is None:
63 | core_count = jax.devices()[0].core_count
64 | x = _abstractify(x)
65 | w = _abstractify(w)
66 | m, k = math.prod(x.shape[:-1]), x.shape[-1]
67 | n = w.shape[1]
68 | if n >= m: # Prefer `block_n` > `block_m`.
69 | block_m, block_n, block_k = _get_best_block_size(m, n, k, core_count)
70 | else:
71 | block_n, block_m, block_k = _get_best_block_size(n, m, k, core_count)
72 | return Config(
73 | block_m=block_m,
74 | block_n=block_n // 2, # Halve `block_n` as we read two `w` blocks.
75 | block_k=block_k,
76 | num_warps=4,
77 | num_stages=4,
78 | )
79 |
--------------------------------------------------------------------------------
/src/alphafold3/jax/geometry/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2024 DeepMind Technologies Limited
2 | #
3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of
4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/
5 | #
6 | # To request access to the AlphaFold 3 model parameters, follow the process set
7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these
8 | # if received directly from Google. Use is subject to terms of use available at
9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md
10 |
11 | """Geometry Module."""
12 |
13 | from alphafold3.jax.geometry import rigid_matrix_vector
14 | from alphafold3.jax.geometry import rotation_matrix
15 | from alphafold3.jax.geometry import struct_of_array
16 | from alphafold3.jax.geometry import vector
17 |
18 | Rot3Array = rotation_matrix.Rot3Array
19 | Rigid3Array = rigid_matrix_vector.Rigid3Array
20 |
21 | StructOfArray = struct_of_array.StructOfArray
22 |
23 | Vec3Array = vector.Vec3Array
24 | square_euclidean_distance = vector.square_euclidean_distance
25 | euclidean_distance = vector.euclidean_distance
26 | dihedral_angle = vector.dihedral_angle
27 | dot = vector.dot
28 | cross = vector.cross
29 |
--------------------------------------------------------------------------------
/src/alphafold3/jax/geometry/__pycache__/__init__.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/jax/geometry/__pycache__/__init__.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/jax/geometry/__pycache__/rigid_matrix_vector.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/jax/geometry/__pycache__/rigid_matrix_vector.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/jax/geometry/__pycache__/rotation_matrix.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/jax/geometry/__pycache__/rotation_matrix.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/jax/geometry/__pycache__/struct_of_array.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/jax/geometry/__pycache__/struct_of_array.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/jax/geometry/__pycache__/utils.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/jax/geometry/__pycache__/utils.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/jax/geometry/__pycache__/vector.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/jax/geometry/__pycache__/vector.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/model/__pycache__/confidence_types.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/model/__pycache__/confidence_types.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/model/__pycache__/confidences.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/model/__pycache__/confidences.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/model/__pycache__/data3.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/model/__pycache__/data3.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/model/__pycache__/data_constants.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/model/__pycache__/data_constants.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/model/__pycache__/feat_batch.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/model/__pycache__/feat_batch.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/model/__pycache__/features.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/model/__pycache__/features.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/model/__pycache__/merging_features.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/model/__pycache__/merging_features.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/model/__pycache__/mmcif_metadata.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/model/__pycache__/mmcif_metadata.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/model/__pycache__/model_config.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/model/__pycache__/model_config.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/model/__pycache__/msa_pairing.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/model/__pycache__/msa_pairing.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/model/__pycache__/params.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/model/__pycache__/params.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/model/__pycache__/post_processing.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/model/__pycache__/post_processing.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/model/__pycache__/protein_data_processing.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/model/__pycache__/protein_data_processing.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/model/atom_layout/__pycache__/atom_layout.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/model/atom_layout/__pycache__/atom_layout.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/model/components/__pycache__/base_model.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/model/components/__pycache__/base_model.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/model/components/__pycache__/haiku_modules.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/model/components/__pycache__/haiku_modules.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/model/components/__pycache__/mapping.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/model/components/__pycache__/mapping.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/model/components/__pycache__/utils.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/model/components/__pycache__/utils.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/model/components/base_model.py:
--------------------------------------------------------------------------------
1 | # Copyright 2024 DeepMind Technologies Limited
2 | #
3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of
4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/
5 | #
6 | # To request access to the AlphaFold 3 model parameters, follow the process set
7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these
8 | # if received directly from Google. Use is subject to terms of use available at
9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md
10 |
11 | """Defines interface of a BaseModel."""
12 |
13 | from collections.abc import Callable, Mapping
14 | import dataclasses
15 | from typing import Any, TypeAlias
16 | from alphafold3 import structure
17 | from alphafold3.model import features
18 | import haiku as hk
19 | import jax
20 | import numpy as np
21 |
22 | ModelResult: TypeAlias = Mapping[str, Any]
23 | ScalarNumberOrArray: TypeAlias = Mapping[str, float | int | np.ndarray]
24 |
25 | # Eval result will contain scalars (e.g. metrics or losses), selected from the
26 | # forward pass outputs or computed in the online evaluation; np.ndarrays or
27 | # jax.Arrays generated from the forward pass outputs (e.g. distogram expected
28 | # distances) or batch inputs; protein structures (predicted and ground-truth).
29 | EvalResultValue: TypeAlias = (
30 | float | int | np.ndarray | jax.Array | structure.Structure
31 | )
32 | # Eval result may be None for some metrics if they are not computable.
33 | EvalResults: TypeAlias = Mapping[str, EvalResultValue | None]
34 | # Interface metrics are all floats or None.
35 | InterfaceMetrics: TypeAlias = Mapping[str, float | None]
36 | # Interface results are a mapping from interface name to mappings from score
37 | # type to metric value.
38 | InterfaceResults: TypeAlias = Mapping[str, Mapping[str, InterfaceMetrics]]
39 | # Eval output consists of full eval results and a dict of interface metrics.
40 | EvalOutput: TypeAlias = tuple[EvalResults, InterfaceResults]
41 |
42 | # Signature for `apply` method of hk.transform_with_state called on a BaseModel.
43 | ForwardFn: TypeAlias = Callable[
44 | [hk.Params, hk.State, jax.Array, features.BatchDict],
45 | tuple[ModelResult, hk.State],
46 | ]
47 |
48 |
49 | @dataclasses.dataclass(frozen=True)
50 | class InferenceResult:
51 | """Postprocessed model result."""
52 |
53 | # Predicted protein structure.
54 | predicted_structure: structure.Structure = dataclasses.field()
55 | # Useful numerical data (scalars or arrays) to be saved at inference time.
56 | numerical_data: ScalarNumberOrArray = dataclasses.field(default_factory=dict)
57 | # Smaller numerical data (usually scalar) to be saved as inference metadata.
58 | metadata: ScalarNumberOrArray = dataclasses.field(default_factory=dict)
59 | # Additional dict for debugging, e.g. raw outputs of a model forward pass.
60 | debug_outputs: ModelResult | None = dataclasses.field(default_factory=dict)
61 | # Model identifier.
62 | model_id: bytes = b''
63 |
--------------------------------------------------------------------------------
/src/alphafold3/model/components/utils.py:
--------------------------------------------------------------------------------
1 | # Copyright 2024 DeepMind Technologies Limited
2 | #
3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of
4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/
5 | #
6 | # To request access to the AlphaFold 3 model parameters, follow the process set
7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these
8 | # if received directly from Google. Use is subject to terms of use available at
9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md
10 |
11 | """Utility functions for training AlphaFold and similar models."""
12 |
13 | from collections import abc
14 | import contextlib
15 | import numbers
16 |
17 | from alphafold3.model import features
18 | import haiku as hk
19 | import jax.numpy as jnp
20 | import numpy as np
21 |
22 |
23 | VALID_DTYPES = [np.float32, np.float64, np.int8, np.int32, np.int64, bool]
24 |
25 |
26 | def remove_invalidly_typed_feats(
27 | batch: features.BatchDict,
28 | ) -> features.BatchDict:
29 | """Remove features of types we don't want to send to the TPU e.g. strings."""
30 | return {
31 | k: v
32 | for k, v in batch.items()
33 | if hasattr(v, 'dtype') and v.dtype in VALID_DTYPES
34 | }
35 |
36 |
37 | def bfloat16_getter(next_getter, value, context):
38 | """Ensures that a bfloat16 parameter is provided by casting if necessary."""
39 | if context.original_dtype == jnp.bfloat16:
40 | if value.dtype != jnp.bfloat16:
41 | value = value.astype(jnp.bfloat16)
42 | return next_getter(value)
43 |
44 |
45 | @contextlib.contextmanager
46 | def bfloat16_context():
47 | with hk.custom_getter(bfloat16_getter):
48 | yield
49 |
50 |
51 | def mask_mean(mask, value, axis=None, keepdims=False, eps=1e-10):
52 | """Masked mean."""
53 |
54 | mask_shape = mask.shape
55 | value_shape = value.shape
56 |
57 | assert len(mask_shape) == len(
58 | value_shape
59 | ), 'Shapes are not compatible, shapes: {}, {}'.format(mask_shape, value_shape)
60 |
61 | if isinstance(axis, numbers.Integral):
62 | axis = [axis]
63 | elif axis is None:
64 | axis = list(range(len(mask_shape)))
65 | assert isinstance(
66 | axis, abc.Iterable
67 | ), 'axis needs to be either an iterable, integer or "None"'
68 |
69 | broadcast_factor = 1.0
70 | for axis_ in axis:
71 | value_size = value_shape[axis_]
72 | mask_size = mask_shape[axis_]
73 | if mask_size == 1:
74 | broadcast_factor *= value_size
75 | else:
76 | error = f'Shapes are not compatible, shapes: {mask_shape}, {value_shape}'
77 | assert mask_size == value_size, error
78 |
79 | return jnp.sum(mask * value, keepdims=keepdims, axis=axis) / (
80 | jnp.maximum(
81 | jnp.sum(mask, keepdims=keepdims, axis=axis) * broadcast_factor, eps
82 | )
83 | )
84 |
--------------------------------------------------------------------------------
/src/alphafold3/model/data3.py:
--------------------------------------------------------------------------------
1 | # Copyright 2024 DeepMind Technologies Limited
2 | #
3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of
4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/
5 | #
6 | # To request access to the AlphaFold 3 model parameters, follow the process set
7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these
8 | # if received directly from Google. Use is subject to terms of use available at
9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md
10 |
11 | """Protein features that are computed from parsed mmCIF objects."""
12 |
13 | from collections.abc import Mapping, MutableMapping
14 | import datetime
15 | from typing import TypeAlias
16 |
17 | from alphafold3.constants import residue_names
18 | from alphafold3.cpp import msa_profile
19 | from alphafold3.model import protein_data_processing
20 | import numpy as np
21 |
22 |
23 | FeatureDict: TypeAlias = Mapping[str, np.ndarray]
24 | MutableFeatureDict: TypeAlias = MutableMapping[str, np.ndarray]
25 |
26 |
27 | def fix_features(msa_features: MutableFeatureDict) -> MutableFeatureDict:
28 | """Renames the deletion_matrix feature."""
29 | msa_features['deletion_matrix'] = msa_features.pop('deletion_matrix_int')
30 | return msa_features
31 |
32 |
33 | def get_profile_features(
34 | msa: np.ndarray, deletion_matrix: np.ndarray
35 | ) -> FeatureDict:
36 | """Returns the MSA profile and deletion_mean features."""
37 | num_restypes = residue_names.POLYMER_TYPES_NUM_WITH_UNKNOWN_AND_GAP
38 | profile = msa_profile.compute_msa_profile(
39 | msa=msa, num_residue_types=num_restypes
40 | )
41 |
42 | return {
43 | 'profile': profile.astype(np.float32),
44 | 'deletion_mean': np.mean(deletion_matrix, axis=0),
45 | }
46 |
47 |
48 | def fix_template_features(
49 | sequence: str,
50 | template_features: FeatureDict,
51 | ) -> FeatureDict:
52 | """Convert template features to AlphaFold 3 format.
53 |
54 | Args:
55 | sequence: amino acid sequence of the protein.
56 | template_features: Template features for the protein.
57 |
58 | Returns:
59 | Updated template_features for the chain.
60 | """
61 | num_res = len(sequence)
62 | if not template_features['template_aatype'].shape[0]:
63 | template_features = empty_template_features(num_res)
64 | else:
65 | template_release_timestamp = [
66 | _get_timestamp(x.decode('utf-8'))
67 | for x in template_features['template_release_date']
68 | ]
69 |
70 | # Convert from atom37 to dense atom
71 | dense_atom_indices = np.take(
72 | protein_data_processing.PROTEIN_AATYPE_DENSE_ATOM_TO_ATOM37,
73 | template_features['template_aatype'],
74 | axis=0,
75 | )
76 |
77 | atom_mask = np.take_along_axis(
78 | template_features['template_all_atom_masks'], dense_atom_indices, axis=2
79 | )
80 | atom_positions = np.take_along_axis(
81 | template_features['template_all_atom_positions'],
82 | dense_atom_indices[..., None],
83 | axis=2,
84 | )
85 | atom_positions *= atom_mask[..., None]
86 |
87 | template_features = {
88 | 'template_aatype': template_features['template_aatype'],
89 | 'template_atom_mask': atom_mask.astype(np.int32),
90 | 'template_atom_positions': atom_positions.astype(np.float32),
91 | 'template_domain_names': np.array(
92 | template_features['template_domain_names'], dtype=object
93 | ),
94 | 'template_release_timestamp': np.array(
95 | template_release_timestamp, dtype=np.float32
96 | ),
97 | }
98 | return template_features
99 |
100 |
101 | def empty_template_features(num_res: int) -> FeatureDict:
102 | """Creates a fully masked out template features to allow padding to work.
103 |
104 | Args:
105 | num_res: The length of the target chain.
106 |
107 | Returns:
108 | Empty template features for the chain.
109 | """
110 | template_features = {
111 | 'template_aatype': np.zeros(num_res, dtype=np.int32)[None, ...],
112 | 'template_atom_mask': np.zeros(
113 | (num_res, protein_data_processing.NUM_DENSE), dtype=np.int32
114 | )[None, ...],
115 | 'template_atom_positions': np.zeros(
116 | (num_res, protein_data_processing.NUM_DENSE, 3), dtype=np.float32
117 | )[None, ...],
118 | 'template_domain_names': np.array([b''], dtype=object),
119 | 'template_release_timestamp': np.array([0.0], dtype=np.float32),
120 | }
121 | return template_features
122 |
123 |
124 | def _get_timestamp(date_str: str):
125 | dt = datetime.datetime.fromisoformat(date_str)
126 | dt = dt.replace(tzinfo=datetime.timezone.utc)
127 | return dt.timestamp()
128 |
--------------------------------------------------------------------------------
/src/alphafold3/model/data_constants.py:
--------------------------------------------------------------------------------
1 | # Copyright 2024 DeepMind Technologies Limited
2 | #
3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of
4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/
5 | #
6 | # To request access to the AlphaFold 3 model parameters, follow the process set
7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these
8 | # if received directly from Google. Use is subject to terms of use available at
9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md
10 |
11 | """Constants shared across modules in the AlphaFold data pipeline."""
12 |
13 | from alphafold3.constants import residue_names
14 |
15 | MSA_GAP_IDX = residue_names.PROTEIN_TYPES_ONE_LETTER_WITH_UNKNOWN_AND_GAP.index(
16 | '-'
17 | )
18 |
19 | # Feature groups.
20 | NUM_SEQ_NUM_RES_MSA_FEATURES = ('msa', 'msa_mask', 'deletion_matrix')
21 | NUM_SEQ_MSA_FEATURES = ('msa_species_identifiers',)
22 | TEMPLATE_FEATURES = (
23 | 'template_aatype',
24 | 'template_atom_positions',
25 | 'template_atom_mask',
26 | )
27 | MSA_PAD_VALUES = {'msa': MSA_GAP_IDX, 'msa_mask': 1, 'deletion_matrix': 0}
28 |
--------------------------------------------------------------------------------
/src/alphafold3/model/diffusion/__pycache__/atom_cross_attention.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/model/diffusion/__pycache__/atom_cross_attention.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/model/diffusion/__pycache__/confidence_head.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/model/diffusion/__pycache__/confidence_head.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/model/diffusion/__pycache__/diffusion_head.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/model/diffusion/__pycache__/diffusion_head.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/model/diffusion/__pycache__/diffusion_transformer.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/model/diffusion/__pycache__/diffusion_transformer.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/model/diffusion/__pycache__/distogram_head.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/model/diffusion/__pycache__/distogram_head.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/model/diffusion/__pycache__/featurization.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/model/diffusion/__pycache__/featurization.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/model/diffusion/__pycache__/model.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/model/diffusion/__pycache__/model.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/model/diffusion/__pycache__/modules.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/model/diffusion/__pycache__/modules.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/model/diffusion/__pycache__/template_modules.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/model/diffusion/__pycache__/template_modules.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/model/diffusion/distogram_head.py:
--------------------------------------------------------------------------------
1 | # Copyright 2024 DeepMind Technologies Limited
2 | #
3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of
4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/
5 | #
6 | # To request access to the AlphaFold 3 model parameters, follow the process set
7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these
8 | # if received directly from Google. Use is subject to terms of use available at
9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md
10 |
11 | """Distogram head."""
12 |
13 | from typing import Final
14 |
15 | from alphafold3.common import base_config
16 | from alphafold3.model import feat_batch
17 | from alphafold3.model import model_config
18 | from alphafold3.model.components import haiku_modules as hm
19 | import haiku as hk
20 | import jax
21 | import jax.numpy as jnp
22 |
23 |
24 | _CONTACT_THRESHOLD: Final[float] = 8.0
25 | _CONTACT_EPSILON: Final[float] = 1e-3
26 |
27 |
28 | class DistogramHead(hk.Module):
29 | """Distogram head."""
30 |
31 | class Config(base_config.BaseConfig):
32 | first_break: float = 2.3125
33 | last_break: float = 21.6875
34 | num_bins: int = 64
35 |
36 | def __init__(
37 | self,
38 | config: Config,
39 | global_config: model_config.GlobalConfig,
40 | name='distogram_head',
41 | ):
42 | super().__init__(name=name)
43 | self.config = config
44 | self.global_config = global_config
45 |
46 | def __call__(
47 | self,
48 | batch: feat_batch.Batch,
49 | embeddings: dict[str, jnp.ndarray],
50 | ) -> dict[str, jnp.ndarray]:
51 | pair_act = embeddings['pair']
52 | seq_mask = batch.token_features.mask.astype(bool)
53 | pair_mask = seq_mask[:, None] * seq_mask[None, :]
54 |
55 | left_half_logits = hm.Linear(
56 | self.config.num_bins,
57 | initializer=self.global_config.final_init,
58 | name='half_logits',
59 | )(pair_act)
60 |
61 | right_half_logits = left_half_logits
62 | logits = left_half_logits + jnp.swapaxes(right_half_logits, -2, -3)
63 | probs = jax.nn.softmax(logits, axis=-1)
64 | breaks = jnp.linspace(
65 | self.config.first_break,
66 | self.config.last_break,
67 | self.config.num_bins - 1,
68 | )
69 |
70 | bin_tops = jnp.append(breaks, breaks[-1] + (breaks[-1] - breaks[-2]))
71 | threshold = _CONTACT_THRESHOLD + _CONTACT_EPSILON
72 | is_contact_bin = 1.0 * (bin_tops <= threshold)
73 | contact_probs = jnp.einsum(
74 | 'ijk,k->ij', probs, is_contact_bin, precision=jax.lax.Precision.HIGHEST
75 | )
76 | contact_probs = pair_mask * contact_probs
77 |
78 | return {
79 | 'bin_edges': breaks,
80 | 'contact_probs': contact_probs,
81 | }
82 |
--------------------------------------------------------------------------------
/src/alphafold3/model/feat_batch.py:
--------------------------------------------------------------------------------
1 | # Copyright 2024 DeepMind Technologies Limited
2 | #
3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of
4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/
5 | #
6 | # To request access to the AlphaFold 3 model parameters, follow the process set
7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these
8 | # if received directly from Google. Use is subject to terms of use available at
9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md
10 |
11 | """Batch dataclass."""
12 | from typing import Self
13 |
14 | from alphafold3.model import features
15 | import chex
16 |
17 |
18 | @chex.dataclass(mappable_dataclass=False, frozen=True)
19 | class Batch:
20 | """Dataclass containing batch."""
21 |
22 | msa: features.MSA
23 | templates: features.Templates
24 | token_features: features.TokenFeatures
25 | ref_structure: features.RefStructure
26 | predicted_structure_info: features.PredictedStructureInfo
27 | polymer_ligand_bond_info: features.PolymerLigandBondInfo
28 | ligand_ligand_bond_info: features.LigandLigandBondInfo
29 | pseudo_beta_info: features.PseudoBetaInfo
30 | atom_cross_att: features.AtomCrossAtt
31 | convert_model_output: features.ConvertModelOutput
32 | frames: features.Frames
33 |
34 | @property
35 | def num_res(self) -> int:
36 | return self.token_features.aatype.shape[-1]
37 |
38 | @classmethod
39 | def from_data_dict(cls, batch: features.BatchDict) -> Self:
40 | """Construct batch object from dictionary."""
41 | return cls(
42 | msa=features.MSA.from_data_dict(batch),
43 | templates=features.Templates.from_data_dict(batch),
44 | token_features=features.TokenFeatures.from_data_dict(batch),
45 | ref_structure=features.RefStructure.from_data_dict(batch),
46 | predicted_structure_info=features.PredictedStructureInfo.from_data_dict(
47 | batch
48 | ),
49 | polymer_ligand_bond_info=features.PolymerLigandBondInfo.from_data_dict(
50 | batch
51 | ),
52 | ligand_ligand_bond_info=features.LigandLigandBondInfo.from_data_dict(
53 | batch
54 | ),
55 | pseudo_beta_info=features.PseudoBetaInfo.from_data_dict(batch),
56 | atom_cross_att=features.AtomCrossAtt.from_data_dict(batch),
57 | convert_model_output=features.ConvertModelOutput.from_data_dict(batch),
58 | frames=features.Frames.from_data_dict(batch),
59 | )
60 |
61 | def as_data_dict(self) -> features.BatchDict:
62 | """Converts batch object to dictionary."""
63 | output = {
64 | **self.msa.as_data_dict(),
65 | **self.templates.as_data_dict(),
66 | **self.token_features.as_data_dict(),
67 | **self.ref_structure.as_data_dict(),
68 | **self.predicted_structure_info.as_data_dict(),
69 | **self.polymer_ligand_bond_info.as_data_dict(),
70 | **self.ligand_ligand_bond_info.as_data_dict(),
71 | **self.pseudo_beta_info.as_data_dict(),
72 | **self.atom_cross_att.as_data_dict(),
73 | **self.convert_model_output.as_data_dict(),
74 | **self.frames.as_data_dict(),
75 | }
76 | return output
77 |
--------------------------------------------------------------------------------
/src/alphafold3/model/merging_features.py:
--------------------------------------------------------------------------------
1 | # Copyright 2024 DeepMind Technologies Limited
2 | #
3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of
4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/
5 | #
6 | # To request access to the AlphaFold 3 model parameters, follow the process set
7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these
8 | # if received directly from Google. Use is subject to terms of use available at
9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md
10 |
11 | """Methods for merging existing features to create a new example.
12 |
13 | Covers:
14 | - Merging features across chains.
15 | - Merging the paired and unpaired parts of the MSA.
16 | """
17 |
18 | from typing import TypeAlias
19 |
20 | from alphafold3.model import data_constants
21 | import jax.numpy as jnp
22 | import numpy as np
23 |
24 | NUM_SEQ_NUM_RES_MSA_FEATURES = data_constants.NUM_SEQ_NUM_RES_MSA_FEATURES
25 | NUM_SEQ_MSA_FEATURES = data_constants.NUM_SEQ_MSA_FEATURES
26 | MSA_PAD_VALUES = data_constants.MSA_PAD_VALUES
27 |
28 |
29 | xnp_ndarray: TypeAlias = np.ndarray | jnp.ndarray # pylint: disable=invalid-name
30 | BatchDict: TypeAlias = dict[str, xnp_ndarray]
31 |
32 |
33 | def _pad_features_to_max(feat_name: str, chains: list[BatchDict], axis: int):
34 | """Pad a set of features to the maximum size amongst all chains.
35 |
36 | Args:
37 | feat_name: The feature name to pad.
38 | chains: A list of chains with associated features.
39 | axis: Which axis to pad to the max.
40 |
41 | Returns:
42 | A list of features, all with the same size on the given axis.
43 | """
44 | max_num_seq = np.max([chain[feat_name].shape[axis] for chain in chains])
45 |
46 | padded_feats = []
47 | for chain in chains:
48 | feat = chain[feat_name]
49 |
50 | padding = np.zeros_like(feat.shape) # pytype: disable=attribute-error
51 | padding[axis] = max_num_seq - feat.shape[axis] # pytype: disable=attribute-error
52 | padding = [(0, p) for p in padding]
53 | padded_feats.append(
54 | np.pad(
55 | feat,
56 | padding,
57 | mode='constant',
58 | constant_values=MSA_PAD_VALUES[feat_name],
59 | )
60 | )
61 | return padded_feats
62 |
63 |
64 | def merge_msa_features(feat_name: str, chains: list[BatchDict]) -> np.ndarray:
65 | """Merges MSA features with shape (NUM_SEQ, NUM_RES) across chains."""
66 | expected_dtype = chains[0][feat_name].dtype
67 | if '_all_seq' in feat_name:
68 | return np.concatenate(
69 | [c.get(feat_name, np.array([], expected_dtype)) for c in chains], axis=1
70 | )
71 | else:
72 | # Since each MSA can be of different lengths, we first need to pad them
73 | # all to the size of the largest MSA before concatenating.
74 | padded_feats = _pad_features_to_max(feat_name, chains, axis=0)
75 | return np.concatenate(padded_feats, axis=1)
76 |
77 |
78 | def merge_paired_and_unpaired_msa(example: BatchDict) -> BatchDict:
79 | """Concatenates the paired (all_seq) MSA features with the unpaired ones."""
80 | new_example = dict(example)
81 |
82 | for feature_name in NUM_SEQ_NUM_RES_MSA_FEATURES + NUM_SEQ_MSA_FEATURES:
83 | if feature_name in example and feature_name + '_all_seq' in example:
84 | feat = example[feature_name]
85 | feat_all_seq = example[feature_name + '_all_seq']
86 | merged_feat = np.concatenate([feat_all_seq, feat], axis=0)
87 | new_example[feature_name] = merged_feat
88 |
89 | new_example['num_alignments'] = np.array(
90 | new_example['msa'].shape[0], dtype=np.int32
91 | )
92 | return new_example
93 |
--------------------------------------------------------------------------------
/src/alphafold3/model/mkdssp_pybind.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2024 DeepMind Technologies Limited
2 | //
3 | // AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of
4 | // this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/
5 | //
6 | // To request access to the AlphaFold 3 model parameters, follow the process set
7 | // out at https://github.com/google-deepmind/alphafold3. You may only use these
8 | // if received directly from Google. Use is subject to terms of use available at
9 | // https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md
10 |
11 | #include "alphafold3/model/mkdssp_pybind.h"
12 |
13 | #include
14 |
15 | #include
16 | #include
17 | #include
18 | #include
19 |
20 | #include "absl/strings/string_view.h"
21 | #include "pybind11/pybind11.h"
22 | #include "pybind11/pytypes.h"
23 |
24 | namespace alphafold3 {
25 | namespace py = pybind11;
26 |
27 | void RegisterModuleMkdssp(pybind11::module m) {
28 | py::module site = py::module::import("site");
29 | py::list paths = py::cast(site.attr("getsitepackages")());
30 | // Find the first path that contains the libcifpp components.cif file.
31 | bool found = false;
32 | for (const auto& py_path : paths) {
33 | auto path_str =
34 | std::filesystem::path(py::cast(py_path)) /
35 | "share/libcifpp/components.cif";
36 | if (std::filesystem::exists(path_str)) {
37 | setenv("LIBCIFPP_DATA_DIR", path_str.parent_path().c_str(), 0);
38 | found = true;
39 | break;
40 | }
41 | }
42 | if (!found) {
43 | throw py::type_error("Could not find the libcifpp components.cif file.");
44 | }
45 | m.def(
46 | "get_dssp",
47 | [](absl::string_view mmcif, int model_no,
48 | int min_poly_proline_stretch_length,
49 | bool calculate_surface_accessibility) {
50 | cif::file cif_file(mmcif.data(), mmcif.size());
51 | dssp result(cif_file.front(), model_no, min_poly_proline_stretch_length,
52 | calculate_surface_accessibility);
53 | std::stringstream sstream;
54 | result.write_legacy_output(sstream);
55 | return sstream.str();
56 | },
57 | py::arg("mmcif"), py::arg("model_no") = 1,
58 | py::arg("min_poly_proline_stretch_length") = 3,
59 | py::arg("calculate_surface_accessibility") = false,
60 | py::doc("Gets secondary structure from an mmCIF file."));
61 | }
62 |
63 | } // namespace alphafold3
64 |
--------------------------------------------------------------------------------
/src/alphafold3/model/mkdssp_pybind.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2024 DeepMind Technologies Limited
3 | *
4 | * AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of
5 | * this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/
6 | *
7 | * To request access to the AlphaFold 3 model parameters, follow the process set
8 | * out at https://github.com/google-deepmind/alphafold3. You may only use these
9 | * if received directly from Google. Use is subject to terms of use available at
10 | * https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md
11 | */
12 |
13 | #ifndef ALPHAFOLD3_SRC_ALPHAFOLD3_MODEL_MKDSSP_PYBIND_H_
14 | #define ALPHAFOLD3_SRC_ALPHAFOLD3_MODEL_MKDSSP_PYBIND_H_
15 |
16 |
17 | #include "pybind11/pybind11.h"
18 |
19 | namespace alphafold3 {
20 |
21 | void RegisterModuleMkdssp(pybind11::module m);
22 |
23 | }
24 |
25 |
26 | #endif // ALPHAFOLD3_SRC_ALPHAFOLD3_MODEL_MKDSSP_PYBIND_H_
27 |
--------------------------------------------------------------------------------
/src/alphafold3/model/model_config.py:
--------------------------------------------------------------------------------
1 | # Copyright 2024 DeepMind Technologies Limited
2 | #
3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of
4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/
5 | #
6 | # To request access to the AlphaFold 3 model parameters, follow the process set
7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these
8 | # if received directly from Google. Use is subject to terms of use available at
9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md
10 |
11 | """Config for the protein folding model and experiment."""
12 |
13 | from collections.abc import Sequence
14 | from typing import Literal, TypeAlias
15 |
16 | from alphafold3.common import base_config
17 | from alphafold3.jax.attention import attention
18 |
19 |
20 | _Shape2DType: TypeAlias = tuple[int | None, int | None]
21 |
22 |
23 | class GlobalConfig(base_config.BaseConfig):
24 | bfloat16: Literal['all', 'none', 'intermediate'] = 'all'
25 | final_init: Literal['zeros', 'linear'] = 'zeros'
26 | pair_attention_chunk_size: Sequence[_Shape2DType] = ((1536, 128), (None, 32))
27 | pair_transition_shard_spec: Sequence[_Shape2DType] = (
28 | (2048, None),
29 | (None, 1024),
30 | )
31 | # Note: flash_attention_implementation = 'xla' means no flash attention.
32 | flash_attention_implementation: attention.Implementation = 'triton'
33 |
--------------------------------------------------------------------------------
/src/alphafold3/model/pipeline/__pycache__/inter_chain_bonds.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/model/pipeline/__pycache__/inter_chain_bonds.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/model/pipeline/__pycache__/pipeline.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/model/pipeline/__pycache__/pipeline.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/model/pipeline/__pycache__/structure_cleaning.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/model/pipeline/__pycache__/structure_cleaning.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/model/post_processing.py:
--------------------------------------------------------------------------------
1 | # Copyright 2024 DeepMind Technologies Limited
2 | #
3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of
4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/
5 | #
6 | # To request access to the AlphaFold 3 model parameters, follow the process set
7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these
8 | # if received directly from Google. Use is subject to terms of use available at
9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md
10 |
11 | """Post-processing utilities for AlphaFold inference results."""
12 |
13 | import dataclasses
14 | import datetime
15 | import os
16 |
17 | from alphafold3 import version
18 | from alphafold3.model import confidence_types
19 | from alphafold3.model import mmcif_metadata
20 | from alphafold3.model.components import base_model
21 | import numpy as np
22 |
23 |
24 | @dataclasses.dataclass(frozen=True, slots=True, kw_only=True)
25 | class ProcessedInferenceResult:
26 | """Stores attributes of a processed inference result.
27 |
28 | Attributes:
29 | cif: CIF file containing an inference result.
30 | mean_confidence_1d: Mean 1D confidence calculated from confidence_1d.
31 | ranking_score: Ranking score extracted from CIF metadata.
32 | structure_confidence_summary_json: Content of JSON file with structure
33 | confidences summary calculated from CIF file.
34 | structure_full_data_json: Content of JSON file with structure full
35 | confidences calculated from CIF file.
36 | model_id: Identifier of the model that produced the inference result.
37 | """
38 |
39 | cif: bytes
40 | mean_confidence_1d: float
41 | ranking_score: float
42 | structure_confidence_summary_json: bytes
43 | structure_full_data_json: bytes
44 | model_id: bytes
45 |
46 |
47 | def post_process_inference_result(
48 | inference_result: base_model.InferenceResult,
49 | ) -> ProcessedInferenceResult:
50 | """Returns cif, confidence_1d_json, confidence_2d_json, mean_confidence_1d, and ranking confidence."""
51 |
52 | # Add mmCIF metadata fields.
53 | timestamp = datetime.datetime.now().isoformat(sep=' ', timespec='seconds')
54 | cif_with_metadata = mmcif_metadata.add_metadata_to_mmcif(
55 | old_cif=inference_result.predicted_structure.to_mmcif_dict(),
56 | version=f'{version.__version__} @ {timestamp}',
57 | model_id=inference_result.model_id,
58 | )
59 | cif = mmcif_metadata.add_legal_comment(cif_with_metadata.to_string())
60 | cif = cif.encode('utf-8')
61 | confidence_1d = confidence_types.AtomConfidence.from_inference_result(
62 | inference_result
63 | )
64 | mean_confidence_1d = np.mean(confidence_1d.confidence)
65 | structure_confidence_summary_json = (
66 | confidence_types.StructureConfidenceSummary.from_inference_result(
67 | inference_result
68 | )
69 | .to_json()
70 | .encode('utf-8')
71 | )
72 | structure_full_data_json = (
73 | confidence_types.StructureConfidenceFull.from_inference_result(
74 | inference_result
75 | )
76 | .to_json()
77 | .encode('utf-8')
78 | )
79 | return ProcessedInferenceResult(
80 | cif=cif,
81 | mean_confidence_1d=mean_confidence_1d,
82 | ranking_score=float(inference_result.metadata['ranking_score']),
83 | structure_confidence_summary_json=structure_confidence_summary_json,
84 | structure_full_data_json=structure_full_data_json,
85 | model_id=inference_result.model_id,
86 | )
87 |
88 |
89 | def write_output(
90 | inference_result: base_model.InferenceResult,
91 | output_dir: os.PathLike[str] | str,
92 | terms_of_use: str | None = None,
93 | name: str | None = None,
94 | ) -> None:
95 | """Writes processed inference result to a directory."""
96 | processed_result = post_process_inference_result(inference_result)
97 |
98 | prefix = f'{name}_' if name is not None else ''
99 |
100 | with open(os.path.join(output_dir, f'{prefix}model.cif'), 'wb') as f:
101 | f.write(processed_result.cif)
102 |
103 | with open(
104 | os.path.join(output_dir, f'{prefix}summary_confidences.json'), 'wb'
105 | ) as f:
106 | f.write(processed_result.structure_confidence_summary_json)
107 |
108 | with open(os.path.join(output_dir, f'{prefix}confidences.json'), 'wb') as f:
109 | f.write(processed_result.structure_full_data_json)
110 |
111 | if terms_of_use is not None:
112 | with open(os.path.join(output_dir, 'TERMS_OF_USE.md'), 'wt') as f:
113 | f.write(terms_of_use)
114 |
--------------------------------------------------------------------------------
/src/alphafold3/model/scoring/__pycache__/alignment.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/model/scoring/__pycache__/alignment.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/model/scoring/__pycache__/covalent_bond_cleaning.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/model/scoring/__pycache__/covalent_bond_cleaning.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/model/scoring/__pycache__/scoring.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/model/scoring/__pycache__/scoring.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/model/scoring/scoring.py:
--------------------------------------------------------------------------------
1 | # Copyright 2024 DeepMind Technologies Limited
2 | #
3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of
4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/
5 | #
6 | # To request access to the AlphaFold 3 model parameters, follow the process set
7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these
8 | # if received directly from Google. Use is subject to terms of use available at
9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md
10 |
11 | """Library of scoring methods of the model outputs."""
12 |
13 | from alphafold3.model import protein_data_processing
14 | import jax.numpy as jnp
15 | import numpy as np
16 |
17 |
18 | Array = jnp.ndarray | np.ndarray
19 |
20 |
21 | def pseudo_beta_fn(
22 | aatype: Array,
23 | dense_atom_positions: Array,
24 | dense_atom_masks: Array,
25 | is_ligand: Array | None = None,
26 | use_jax: bool | None = True,
27 | ) -> tuple[Array, Array] | Array:
28 | """Create pseudo beta atom positions and optionally mask.
29 |
30 | Args:
31 | aatype: [num_res] amino acid types.
32 | dense_atom_positions: [num_res, NUM_DENSE, 3] vector of all atom positions.
33 | dense_atom_masks: [num_res, NUM_DENSE] mask.
34 | is_ligand: [num_res] flag if something is a ligand.
35 | use_jax: whether to use jax for the computations.
36 |
37 | Returns:
38 | Pseudo beta dense atom positions and the corresponding mask.
39 | """
40 | if use_jax:
41 | xnp = jnp
42 | else:
43 | xnp = np
44 |
45 | if is_ligand is None:
46 | is_ligand = xnp.zeros_like(aatype)
47 |
48 | pseudobeta_index_polymer = xnp.take(
49 | protein_data_processing.RESTYPE_PSEUDOBETA_INDEX, aatype, axis=0
50 | ).astype(xnp.int32)
51 |
52 | pseudobeta_index = jnp.where(
53 | is_ligand,
54 | jnp.zeros_like(pseudobeta_index_polymer),
55 | pseudobeta_index_polymer,
56 | )
57 |
58 | pseudo_beta = xnp.take_along_axis(
59 | dense_atom_positions, pseudobeta_index[..., None, None], axis=-2
60 | )
61 | pseudo_beta = xnp.squeeze(pseudo_beta, axis=-2)
62 |
63 | pseudo_beta_mask = xnp.take_along_axis(
64 | dense_atom_masks, pseudobeta_index[..., None], axis=-1
65 | ).astype(xnp.float32)
66 | pseudo_beta_mask = xnp.squeeze(pseudo_beta_mask, axis=-1)
67 |
68 | return pseudo_beta, pseudo_beta_mask
69 |
--------------------------------------------------------------------------------
/src/alphafold3/parsers/cpp/cif_dict_pybind.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2024 DeepMind Technologies Limited
3 | *
4 | * AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of
5 | * this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/
6 | *
7 | * To request access to the AlphaFold 3 model parameters, follow the process set
8 | * out at https://github.com/google-deepmind/alphafold3. You may only use these
9 | * if received directly from Google. Use is subject to terms of use available at
10 | * https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md
11 | */
12 |
13 | #ifndef ALPHAFOLD3_SRC_ALPHAFOLD3_PARSERS_PYTHON_CIF_DICT_PYBIND_H_
14 | #define ALPHAFOLD3_SRC_ALPHAFOLD3_PARSERS_PYTHON_CIF_DICT_PYBIND_H_
15 |
16 | #include "pybind11/pybind11.h"
17 |
18 | namespace alphafold3 {
19 |
20 | void RegisterModuleCifDict(pybind11::module m);
21 |
22 | }
23 |
24 | #endif // ALPHAFOLD3_SRC_ALPHAFOLD3_PARSERS_PYTHON_CIF_DICT_PYBIND_H_
25 |
--------------------------------------------------------------------------------
/src/alphafold3/parsers/cpp/fasta_iterator.pyi:
--------------------------------------------------------------------------------
1 | # Copyright 2024 DeepMind Technologies Limited
2 | #
3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of
4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/
5 | #
6 | # To request access to the AlphaFold 3 model parameters, follow the process set
7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these
8 | # if received directly from Google. Use is subject to terms of use available at
9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md
10 |
11 | class FastaFileIterator:
12 | def __init__(self, fasta_path: str) -> None: ...
13 | def __iter__(self) -> FastaFileIterator: ...
14 | def __next__(self) -> tuple[str,str]: ...
15 |
16 | class FastaStringIterator:
17 | def __init__(self, fasta_string: str | bytes) -> None: ...
18 | def __iter__(self) -> FastaStringIterator: ...
19 | def __next__(self) -> tuple[str,str]: ...
20 |
21 | def parse_fasta(fasta_string: str | bytes) -> list[str]: ...
22 | def parse_fasta_include_descriptions(fasta_string: str | bytes) -> tuple[list[str],list[str]]: ...
23 |
--------------------------------------------------------------------------------
/src/alphafold3/parsers/cpp/fasta_iterator_lib.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2024 DeepMind Technologies Limited
2 | //
3 | // AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of
4 | // this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/
5 | //
6 | // To request access to the AlphaFold 3 model parameters, follow the process set
7 | // out at https://github.com/google-deepmind/alphafold3. You may only use these
8 | // if received directly from Google. Use is subject to terms of use available at
9 | // https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md
10 |
11 | #include "alphafold3/parsers/cpp/fasta_iterator_lib.h"
12 |
13 | #include
14 | #include
15 | #include
16 | #include
17 | #include
18 |
19 | #include "absl/status/status.h"
20 | #include "absl/status/statusor.h"
21 | #include "absl/strings/ascii.h"
22 | #include "absl/strings/str_cat.h"
23 | #include "absl/strings/str_split.h"
24 | #include "absl/strings/string_view.h"
25 | #include "absl/strings/strip.h"
26 |
27 | namespace alphafold3 {
28 |
29 | // Parse FASTA string and return list of strings with amino acid sequences.
30 | // Returns a list of amino acid sequences only.
31 | std::vector ParseFasta(absl::string_view fasta_string) {
32 | std::vector sequences;
33 | std::string* sequence = nullptr;
34 | for (absl::string_view line_raw : absl::StrSplit(fasta_string, '\n')) {
35 | absl::string_view line = absl::StripAsciiWhitespace(line_raw);
36 | if (absl::ConsumePrefix(&line, ">")) {
37 | sequence = &sequences.emplace_back();
38 | } else if (!line.empty() && sequence != nullptr) {
39 | absl::StrAppend(sequence, line);
40 | }
41 | }
42 | return sequences;
43 | }
44 |
45 | // Parse FASTA string and return list of strings with amino acid sequences.
46 | // Returns two lists: The first one with amino acid sequences, the second with
47 | // the descriptions associated with each sequence.
48 | std::pair, std::vector>
49 | ParseFastaIncludeDescriptions(absl::string_view fasta_string) {
50 | std::pair, std::vector> result;
51 | auto& [sequences, descriptions] = result;
52 | std::string* sequence = nullptr;
53 | for (absl::string_view line_raw : absl::StrSplit(fasta_string, '\n')) {
54 | absl::string_view line = absl::StripAsciiWhitespace(line_raw);
55 | if (absl::ConsumePrefix(&line, ">")) {
56 | descriptions.emplace_back(line);
57 | sequence = &sequences.emplace_back();
58 | } else if (!line.empty() && sequence != nullptr) {
59 | absl::StrAppend(sequence, line);
60 | }
61 | }
62 | return result;
63 | }
64 |
65 | absl::StatusOr> FastaFileIterator::Next() {
66 | std::string line_str;
67 | while (std::getline(reader_, line_str)) {
68 | absl::string_view line = line_str;
69 | line = absl::StripAsciiWhitespace(line);
70 | if (absl::ConsumePrefix(&line, ">")) {
71 | if (!description_.has_value()) {
72 | description_ = line;
73 | } else {
74 | std::pair output(sequence_, *description_);
75 | description_ = line;
76 | sequence_ = "";
77 | return output;
78 | }
79 | } else if (description_.has_value()) {
80 | absl::StrAppend(&sequence_, line);
81 | }
82 | }
83 | has_next_ = false;
84 | reader_.close();
85 | if (description_.has_value()) {
86 | return std::pair(sequence_, *description_);
87 | } else {
88 | return absl::InvalidArgumentError(
89 | absl::StrCat("Invalid FASTA file: ", filename_));
90 | }
91 | }
92 |
93 | absl::StatusOr>
94 | FastaStringIterator::Next() {
95 | size_t consumed = 0;
96 | for (absl::string_view line_raw : absl::StrSplit(fasta_string_, '\n')) {
97 | consumed += line_raw.size() + 1; // +1 for the newline character.
98 | absl::string_view line = absl::StripAsciiWhitespace(line_raw);
99 | if (absl::ConsumePrefix(&line, ">")) {
100 | if (!description_.has_value()) {
101 | description_ = line;
102 | } else {
103 | std::pair output(sequence_, *description_);
104 | description_ = line;
105 | sequence_ = "";
106 | fasta_string_.remove_prefix(consumed);
107 | return output;
108 | }
109 | } else if (description_.has_value()) {
110 | absl::StrAppend(&sequence_, line);
111 | }
112 | }
113 | has_next_ = false;
114 | if (description_.has_value()) {
115 | return std::pair(sequence_, *description_);
116 | } else {
117 | return absl::InvalidArgumentError("Invalid FASTA string");
118 | }
119 | }
120 |
121 | } // namespace alphafold3
122 |
--------------------------------------------------------------------------------
/src/alphafold3/parsers/cpp/fasta_iterator_lib.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2024 DeepMind Technologies Limited
3 | *
4 | * AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of
5 | * this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/
6 | *
7 | * To request access to the AlphaFold 3 model parameters, follow the process set
8 | * out at https://github.com/google-deepmind/alphafold3. You may only use these
9 | * if received directly from Google. Use is subject to terms of use available at
10 | * https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md
11 | */
12 |
13 | // A C++ implementation of a FASTA parser.
14 | #ifndef ALPHAFOLD3_SRC_ALPHAFOLD3_PARSERS_PYTHON_FASTA_ITERATOR_LIB_H_
15 | #define ALPHAFOLD3_SRC_ALPHAFOLD3_PARSERS_PYTHON_FASTA_ITERATOR_LIB_H_
16 |
17 | #include
18 | #include
19 | #include
20 | #include
21 | #include
22 | #include
23 |
24 | #include "absl/status/statusor.h"
25 | #include "absl/strings/string_view.h"
26 |
27 | namespace alphafold3 {
28 |
29 | // Parse FASTA string and return list of strings with amino acid sequences.
30 | // Returns a list of amino acid sequences only.
31 | std::vector ParseFasta(absl::string_view fasta_string);
32 |
33 | // Parse FASTA string and return list of strings with amino acid sequences.
34 | // Returns two lists: The first one with amino acid sequences, the second with
35 | // the descriptions associated with each sequence.
36 | std::pair, std::vector>
37 | ParseFastaIncludeDescriptions(absl::string_view fasta_string);
38 |
39 | // Lazy FASTA parser for memory efficient FASTA parsing from a path.
40 | class FastaFileIterator {
41 | public:
42 | // Initialise FastaFileIterator with filename of fasta. If you initialize
43 | // reader_ with an invalid path or empty file, it won't fail, only
44 | // riegeli::ReadLine within the Next method will then return false. That will
45 | // then trigger the "Invalid FASTA file" error.
46 | explicit FastaFileIterator(absl::string_view fasta_path)
47 | : filename_(fasta_path),
48 | reader_(filename_, std::ios::in),
49 | has_next_(true) {}
50 |
51 | // Returns whether there are more sequences. Returns true before first call to
52 | // next even if the file is empty.
53 | bool HasNext() const { return has_next_; }
54 |
55 | // Fetches the next (sequence, description) from the file.
56 | absl::StatusOr> Next();
57 |
58 | private:
59 | // Use riegeli::FileReader instead of FileLineIterator for about 2x speedup.
60 | std::string filename_;
61 | std::fstream reader_;
62 | std::optional description_;
63 | std::string sequence_;
64 | bool has_next_;
65 | };
66 |
67 | // Lazy FASTA parser for memory efficient FASTA parsing from a string.
68 | class FastaStringIterator {
69 | public:
70 | // Initialise FastaStringIterator with a string_view of a FASTA. If you
71 | // initialize it with an invalid FASTA string, it won't fail, the Next method
72 | // will then return false. That will then trigger the "Invalid FASTA" error.
73 | // WARNING: The object backing the fasta_string string_view must not be
74 | // deleted while this Iterator is alive.
75 | explicit FastaStringIterator(absl::string_view fasta_string)
76 | : fasta_string_(fasta_string), has_next_(true) {}
77 |
78 | // Returns whether there are more sequences. Returns true before first call to
79 | // next even if the string is empty.
80 | bool HasNext() const { return has_next_; }
81 |
82 | // Fetches the next (sequence, description) from the string.
83 | absl::StatusOr> Next();
84 |
85 | private:
86 | absl::string_view fasta_string_;
87 | bool has_next_;
88 | std::optional description_;
89 | std::string sequence_;
90 | };
91 |
92 | } // namespace alphafold3
93 |
94 | #endif // ALPHAFOLD3_SRC_ALPHAFOLD3_PARSERS_PYTHON_FASTA_ITERATOR_LIB_H_
95 |
--------------------------------------------------------------------------------
/src/alphafold3/parsers/cpp/fasta_iterator_pybind.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2024 DeepMind Technologies Limited
2 | //
3 | // AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of
4 | // this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/
5 | //
6 | // To request access to the AlphaFold 3 model parameters, follow the process set
7 | // out at https://github.com/google-deepmind/alphafold3. You may only use these
8 | // if received directly from Google. Use is subject to terms of use available at
9 | // https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md
10 |
11 | #include
12 |
13 | #include "absl/status/statusor.h"
14 | #include "absl/strings/string_view.h"
15 | #include "alphafold3/parsers/cpp/fasta_iterator_lib.h"
16 | #include "pybind11/attr.h"
17 | #include "pybind11/pybind11.h"
18 | #include "pybind11/pytypes.h"
19 | #include "pybind11/stl.h"
20 |
21 | namespace alphafold3 {
22 | namespace {
23 |
24 | namespace py = pybind11;
25 |
26 | template
27 | T ValueOrThrowValueError(absl::StatusOr value) {
28 | if (!value.ok()) throw py::value_error(value.status().ToString());
29 | return *std::move(value);
30 | }
31 |
32 | constexpr char kFastaFileIteratorDoc[] = R"(
33 | Lazy FASTA parser for memory efficient FASTA parsing from a path.)";
34 |
35 | constexpr char kFastaStringIteratorDoc[] = R"(
36 | Lazy FASTA parser for memory efficient FASTA parsing from a string.
37 |
38 | WARNING: The object backing the fasta_string string_view must not be
39 | deleted while the FastaStringIterator is alive. E.g. this will break:
40 |
41 | ```
42 | # Make sure the fasta_string is not interned.
43 | fasta_string = '\n'.join(['>d\nS' for _ in range(10)])
44 | iterator = fasta_iterator.FastaStringIterator(fasta_string)
45 | del fasta_string
46 | iterator.next() # Heap use-after-free.
47 | ```
48 | )";
49 |
50 | constexpr char kParseFastaDoc[] = R"(
51 | Parses a FASTA string and returns a list of amino-acid sequences.
52 |
53 | Args:
54 | fasta_string: The contents of a FASTA file.
55 |
56 | Returns:
57 | List of sequences in the FASTA file. Descriptions are ignored.
58 | )";
59 |
60 | constexpr char kParseFastaIncludeDescriptionsDoc[] = R"(
61 | Parses a FASTA string, returns amino-acid sequences with descriptions.
62 |
63 | Args:
64 | fasta_string: The contents of a FASTA file.
65 |
66 | Returns:
67 | A tuple with two lists (sequences, descriptions):
68 | * A list of sequences.
69 | * A list of sequence descriptions taken from the comment lines. In the
70 | same order as the sequences.
71 | )";
72 |
73 | class PythonFastaStringIterator : public FastaStringIterator {
74 | public:
75 | explicit PythonFastaStringIterator(py::object fasta_string)
76 | : FastaStringIterator(py::cast(fasta_string)),
77 | fasta_string_(std::move(fasta_string)) {}
78 |
79 | private:
80 | py::object fasta_string_;
81 | };
82 |
83 | } // namespace
84 |
85 | void RegisterModuleFastaIterator(pybind11::module m) {
86 | py::class_(m, "FastaFileIterator", kFastaFileIteratorDoc)
87 | .def(py::init(), py::arg("fasta_path"))
88 | .def("__iter__",
89 | [](FastaFileIterator& iterator) -> FastaFileIterator& {
90 | return iterator;
91 | })
92 | .def(
93 | "__next__",
94 | [](FastaFileIterator& iterator) {
95 | if (iterator.HasNext()) {
96 | return ValueOrThrowValueError(iterator.Next());
97 | } else {
98 | throw py::stop_iteration();
99 | }
100 | },
101 | py::call_guard());
102 |
103 | py::class_(m, "FastaStringIterator",
104 | kFastaStringIteratorDoc)
105 | .def(py::init(), py::arg("fasta_string"))
106 | .def("__iter__",
107 | [](PythonFastaStringIterator& iterator)
108 | -> PythonFastaStringIterator& { return iterator; })
109 | .def(
110 | "__next__",
111 | [](PythonFastaStringIterator& iterator) {
112 | if (iterator.HasNext()) {
113 | return ValueOrThrowValueError(iterator.Next());
114 | } else {
115 | throw py::stop_iteration();
116 | }
117 | },
118 | py::call_guard());
119 |
120 | m.def("parse_fasta", &ParseFasta, py::arg("fasta_string"),
121 | py::call_guard(), py::doc(kParseFastaDoc + 1));
122 | m.def("parse_fasta_include_descriptions", &ParseFastaIncludeDescriptions,
123 | py::arg("fasta_string"), py::call_guard(),
124 | py::doc(kParseFastaIncludeDescriptionsDoc + 1));
125 | }
126 |
127 | } // namespace alphafold3
128 |
--------------------------------------------------------------------------------
/src/alphafold3/parsers/cpp/fasta_iterator_pybind.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2024 DeepMind Technologies Limited
3 | *
4 | * AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of
5 | * this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/
6 | *
7 | * To request access to the AlphaFold 3 model parameters, follow the process set
8 | * out at https://github.com/google-deepmind/alphafold3. You may only use these
9 | * if received directly from Google. Use is subject to terms of use available at
10 | * https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md
11 | */
12 |
13 | #ifndef ALPHAFOLD3_SRC_ALPHAFOLD3_PARSERS_PYTHON_FASTA_ITERATOR_PYBIND_H_
14 | #define ALPHAFOLD3_SRC_ALPHAFOLD3_PARSERS_PYTHON_FASTA_ITERATOR_PYBIND_H_
15 |
16 | #include "pybind11/pybind11.h"
17 |
18 | namespace alphafold3 {
19 |
20 | void RegisterModuleFastaIterator(pybind11::module m);
21 |
22 | }
23 |
24 | #endif // ALPHAFOLD3_SRC_ALPHAFOLD3_PARSERS_PYTHON_FASTA_ITERATOR_PYBIND_H_
25 |
--------------------------------------------------------------------------------
/src/alphafold3/parsers/cpp/msa_conversion.pyi:
--------------------------------------------------------------------------------
1 | # Copyright 2024 DeepMind Technologies Limited
2 | #
3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of
4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/
5 | #
6 | # To request access to the AlphaFold 3 model parameters, follow the process set
7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these
8 | # if received directly from Google. Use is subject to terms of use available at
9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md
10 |
11 | """Type annotations for Python bindings for `msa_conversion`.
12 |
13 | The type annotations in this file were modified from the automatically generated
14 | stubgen output.
15 | """
16 |
17 | from collections.abc import Iterable
18 |
19 |
20 | def align_sequence_to_gapless_query(
21 | sequence: str | bytes,
22 | query_sequence: str | bytes,
23 | ) -> str: ...
24 |
25 |
26 | def convert_a3m_to_stockholm(a3m_sequences: Iterable[str]) -> list[str]: ...
27 |
--------------------------------------------------------------------------------
/src/alphafold3/parsers/cpp/msa_conversion_pybind.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2024 DeepMind Technologies Limited
3 | *
4 | * AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of
5 | * this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/
6 | *
7 | * To request access to the AlphaFold 3 model parameters, follow the process set
8 | * out at https://github.com/google-deepmind/alphafold3. You may only use these
9 | * if received directly from Google. Use is subject to terms of use available at
10 | * https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md
11 | */
12 |
13 | #ifndef ALPHAFOLD3_SRC_ALPHAFOLD3_PARSERS_PYTHON_MSA_CONVERSION_PYBIND_H_
14 | #define ALPHAFOLD3_SRC_ALPHAFOLD3_PARSERS_PYTHON_MSA_CONVERSION_PYBIND_H_
15 |
16 | #include "pybind11/pybind11.h"
17 |
18 | namespace alphafold3 {
19 |
20 | void RegisterModuleMsaConversion(pybind11::module m);
21 |
22 | }
23 |
24 | #endif // ALPHAFOLD3_SRC_ALPHAFOLD3_PARSERS_PYTHON_MSA_CONVERSION_PYBIND_H_
25 |
--------------------------------------------------------------------------------
/src/alphafold3/scripts/copy_to_ssd.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Copyright 2024 DeepMind Technologies Limited
3 | #
4 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of
5 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/
6 | #
7 | # To request access to the AlphaFold 3 model parameters, follow the process set
8 | # out at https://github.com/google-deepmind/alphafold3. You may only use these
9 | # if received directly from Google. Use is subject to terms of use available at
10 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md
11 |
12 | set -euo pipefail
13 |
14 | readonly SOURCE_DIR=${1:-$HOME/public_databases}
15 | readonly TARGET_DIR=${2:-/mnt/disks/ssd/public_databases}
16 |
17 | mkdir -p "${TARGET_DIR}"
18 |
19 | FILES=(pdb_seqres_2022_09_28.fasta \
20 | uniprot_all_2021_04.fa \
21 | mgy_clusters_2022_05.fa \
22 | uniref90_2022_05.fa \
23 | bfd-first_non_consensus_sequences.fasta \
24 | rfam_14_9_clust_seq_id_90_cov_80_rep_seq.fasta \
25 | nt_rna_2023_02_23_clust_seq_id_90_cov_80_rep_seq.fasta \
26 | rnacentral_active_seq_id_90_cov_80_linclust.fasta)
27 |
28 | NOT_COPIED_FILES=()
29 |
30 | while (( ${#FILES[@]} )); do
31 | # Get total size of files to copy in bytes
32 | SOURCE_FILES=( "${FILES[@]/#/${SOURCE_DIR}/}" )
33 | TOTAL_SIZE=$(du -sbc "${SOURCE_FILES[@]}" | awk 'END{print $1}')
34 |
35 | # Get available space on target drive in bytes
36 | AVAILABLE_SPACE=$(df --portability --block-size=1 "$TARGET_DIR" | awk 'END{print $4}')
37 |
38 | # Compare sizes and copy if enough space
39 | if (( TOTAL_SIZE <= AVAILABLE_SPACE )); then
40 | printf 'Copying files... %s\n' "${FILES[@]}"
41 | echo "From ${SOURCE_DIR} -> ${TARGET_DIR}"
42 |
43 | for file in "${FILES[@]}"; do
44 | cp -r "${SOURCE_DIR}/${file}" "${TARGET_DIR}/" &
45 | done
46 | break
47 | else
48 | NOT_COPIED_FILES+=("${FILES[-1]}")
49 | unset 'FILES[-1]'
50 | fi
51 | done
52 |
53 | printf 'No room left on ssd for: %s\n' "${NOT_COPIED_FILES[@]}"
54 | wait
55 |
--------------------------------------------------------------------------------
/src/alphafold3/scripts/gcp_mount_ssd.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Copyright 2024 DeepMind Technologies Limited
3 | #
4 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of
5 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/
6 | #
7 | # To request access to the AlphaFold 3 model parameters, follow the process set
8 | # out at https://github.com/google-deepmind/alphafold3. You may only use these
9 | # if received directly from Google. Use is subject to terms of use available at
10 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md
11 |
12 | set -euo pipefail
13 |
14 | readonly MOUNT_DIR="${1:-/mnt/disks/ssd}"
15 |
16 | if [[ -d "${MOUNT_DIR}" ]]; then
17 | echo "Mount directory ${MOUNT_DIR} already exists, skipping"
18 | exit 0
19 | fi
20 |
21 | for SSD_DISK in $(realpath "$(find /dev/disk/by-id/ | grep google-local)")
22 | do
23 | # Check if the disk is already formatted
24 | if ! blkid -o value -s TYPE "${SSD_DISK}" > /dev/null 2>&1; then
25 | echo "Disk ${SSD_DISK} is not formatted, format it."
26 | mkfs.ext4 -m 0 -E lazy_itable_init=0,lazy_journal_init=0,discard "${SSD_DISK}" || continue
27 | fi
28 |
29 | # Check if the disk is already mounted
30 | if grep -qs "^/dev/nvme0n1 " /proc/mounts; then
31 | grep -s "^/dev/nvme0n1 " /proc/mounts
32 | echo "Disk ${SSD_DISK} is already mounted, skip it."
33 | continue
34 | fi
35 |
36 | # Disk is not mounted, mount it
37 | echo "Mounting ${SSD_DISK} to ${MOUNT_DIR}"
38 | mkdir -p "${MOUNT_DIR}"
39 | chmod -R 777 "${MOUNT_DIR}"
40 | mount "${SSD_DISK}" "${MOUNT_DIR}"
41 | break
42 | done
43 |
44 | if [[ ! -d "${MOUNT_DIR}" ]]; then
45 | echo "No unmounted SSD disks found"
46 | exit 1
47 | fi
48 |
--------------------------------------------------------------------------------
/src/alphafold3/structure/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2024 DeepMind Technologies Limited
2 | #
3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of
4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/
5 | #
6 | # To request access to the AlphaFold 3 model parameters, follow the process set
7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these
8 | # if received directly from Google. Use is subject to terms of use available at
9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md
10 |
11 | """Structure module initialization."""
12 |
13 | # pylint: disable=g-importing-member
14 | from alphafold3.structure.bioassemblies import BioassemblyData
15 | from alphafold3.structure.bonds import Bonds
16 | from alphafold3.structure.chemical_components import ChemCompEntry
17 | from alphafold3.structure.chemical_components import ChemicalComponentsData
18 | from alphafold3.structure.chemical_components import get_data_for_ccd_components
19 | from alphafold3.structure.chemical_components import populate_missing_ccd_data
20 | from alphafold3.structure.mmcif import BondParsingError
21 | from alphafold3.structure.parsing import BondAtomId
22 | from alphafold3.structure.parsing import from_atom_arrays
23 | from alphafold3.structure.parsing import from_mmcif
24 | from alphafold3.structure.parsing import from_parsed_mmcif
25 | from alphafold3.structure.parsing import from_res_arrays
26 | from alphafold3.structure.parsing import from_sequences_and_bonds
27 | from alphafold3.structure.parsing import ModelID
28 | from alphafold3.structure.parsing import SequenceFormat
29 | from alphafold3.structure.structure import ARRAY_FIELDS
30 | from alphafold3.structure.structure import AuthorNamingScheme
31 | from alphafold3.structure.structure import Bond
32 | from alphafold3.structure.structure import CascadeDelete
33 | from alphafold3.structure.structure import concat
34 | from alphafold3.structure.structure import enumerate_residues
35 | from alphafold3.structure.structure import fix_non_standard_polymer_residues
36 | from alphafold3.structure.structure import GLOBAL_FIELDS
37 | from alphafold3.structure.structure import make_empty_structure
38 | from alphafold3.structure.structure import MissingAtomError
39 | from alphafold3.structure.structure import MissingAuthorResidueIdError
40 | from alphafold3.structure.structure import multichain_residue_index
41 | from alphafold3.structure.structure import stack
42 | from alphafold3.structure.structure import Structure
43 | from alphafold3.structure.structure_tables import Atoms
44 | from alphafold3.structure.structure_tables import Chains
45 | from alphafold3.structure.structure_tables import Residues
46 |
--------------------------------------------------------------------------------
/src/alphafold3/structure/__pycache__/__init__.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/structure/__pycache__/__init__.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/structure/__pycache__/bioassemblies.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/structure/__pycache__/bioassemblies.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/structure/__pycache__/bonds.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/structure/__pycache__/bonds.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/structure/__pycache__/chemical_components.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/structure/__pycache__/chemical_components.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/structure/__pycache__/mmcif.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/structure/__pycache__/mmcif.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/structure/__pycache__/parsing.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/structure/__pycache__/parsing.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/structure/__pycache__/sterics.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/structure/__pycache__/sterics.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/structure/__pycache__/structure.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/structure/__pycache__/structure.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/structure/__pycache__/structure_tables.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/structure/__pycache__/structure_tables.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/structure/__pycache__/table.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/structure/__pycache__/table.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/structure/__pycache__/test_utils.cpython-311.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/structure/__pycache__/test_utils.cpython-311.pyc
--------------------------------------------------------------------------------
/src/alphafold3/structure/cpp/aggregation.pyi:
--------------------------------------------------------------------------------
1 | # Copyright 2024 DeepMind Technologies Limited
2 | #
3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of
4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/
5 | #
6 | # To request access to the AlphaFold 3 model parameters, follow the process set
7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these
8 | # if received directly from Google. Use is subject to terms of use available at
9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md
10 |
11 | from collections.abc import Sequence
12 |
13 | def indices_grouped_by_value(values: Sequence[int]) -> dict[int, list[int]]: ...
14 |
--------------------------------------------------------------------------------
/src/alphafold3/structure/cpp/aggregation_pybind.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2024 DeepMind Technologies Limited
2 | //
3 | // AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of
4 | // this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/
5 | //
6 | // To request access to the AlphaFold 3 model parameters, follow the process set
7 | // out at https://github.com/google-deepmind/alphafold3. You may only use these
8 | // if received directly from Google. Use is subject to terms of use available at
9 | // https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md
10 |
11 | #include
12 | #include
13 |
14 | #include "absl/container/flat_hash_map.h"
15 | #include "absl/types/span.h"
16 | #include "pybind11/cast.h"
17 | #include "pybind11/numpy.h"
18 | #include "pybind11/pybind11.h"
19 | #include "pybind11_abseil/absl_casters.h"
20 |
21 | namespace {
22 |
23 | namespace py = pybind11;
24 |
25 | absl::flat_hash_map> IndicesGroupedByValue(
26 | absl::Span values) {
27 | absl::flat_hash_map> group_indices;
28 | for (int64_t i = 0, e = values.size(); i < e; ++i) {
29 | group_indices[values[i]].push_back(i);
30 | }
31 | return group_indices;
32 | }
33 |
34 | constexpr char kIndicesGroupedByValue[] = R"(
35 | Returns a map from value to a list of indices this value occupies.
36 |
37 | E.g. indices_grouped_by_value([1, 1, 2, 3, 3, 1, 1]) returns:
38 | {1: [0, 1, 5, 6], 2: [2], 3: [3, 4]}
39 |
40 | Args:
41 | values: a list of values to group.
42 | )";
43 |
44 | } // namespace
45 |
46 | namespace alphafold3 {
47 |
48 | void RegisterModuleAggregation(py::module m) {
49 | m.def("indices_grouped_by_value", &IndicesGroupedByValue, py::arg("values"),
50 | py::doc(kIndicesGroupedByValue + 1),
51 | py::call_guard());
52 | }
53 |
54 | } // namespace alphafold3
55 |
--------------------------------------------------------------------------------
/src/alphafold3/structure/cpp/aggregation_pybind.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2024 DeepMind Technologies Limited
3 | *
4 | * AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of
5 | * this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/
6 | *
7 | * To request access to the AlphaFold 3 model parameters, follow the process set
8 | * out at https://github.com/google-deepmind/alphafold3. You may only use these
9 | * if received directly from Google. Use is subject to terms of use available at
10 | * https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md
11 | */
12 |
13 | #ifndef ALPHAFOLD3_SRC_ALPHAFOLD3_STRUCTURE_PYTHON_AGGREGATION_PYBIND_H_
14 | #define ALPHAFOLD3_SRC_ALPHAFOLD3_STRUCTURE_PYTHON_AGGREGATION_PYBIND_H_
15 |
16 | #include "pybind11/pybind11.h"
17 |
18 | namespace alphafold3 {
19 |
20 | void RegisterModuleAggregation(pybind11::module m);
21 |
22 | }
23 |
24 | #endif // ALPHAFOLD3_SRC_ALPHAFOLD3_STRUCTURE_PYTHON_AGGREGATION_PYBIND_H_
25 |
--------------------------------------------------------------------------------
/src/alphafold3/structure/cpp/membership.pyi:
--------------------------------------------------------------------------------
1 | # Copyright 2024 DeepMind Technologies Limited
2 | #
3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of
4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/
5 | #
6 | # To request access to the AlphaFold 3 model parameters, follow the process set
7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these
8 | # if received directly from Google. Use is subject to terms of use available at
9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md
10 |
11 | import numpy
12 |
13 |
14 | def isin(
15 | array: numpy.ndarray[numpy.int64],
16 | test_elements: set[int],
17 | invert: bool = ...,
18 | ) -> numpy.ndarray[bool]: ...
19 |
--------------------------------------------------------------------------------
/src/alphafold3/structure/cpp/membership_pybind.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2024 DeepMind Technologies Limited
2 | //
3 | // AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of
4 | // this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/
5 | //
6 | // To request access to the AlphaFold 3 model parameters, follow the process set
7 | // out at https://github.com/google-deepmind/alphafold3. You may only use these
8 | // if received directly from Google. Use is subject to terms of use available at
9 | // https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md
10 |
11 | #include
12 | #include
13 | #include
14 | #include
15 |
16 | #include "absl/container/flat_hash_set.h"
17 | #include "pybind11/cast.h"
18 | #include "pybind11/numpy.h"
19 | #include "pybind11/pybind11.h"
20 | #include "pybind11_abseil/absl_casters.h"
21 |
22 | namespace {
23 |
24 | namespace py = pybind11;
25 |
26 | py::array_t IsIn(const py::array_t& array,
27 | const absl::flat_hash_set& test_elements,
28 | bool invert) {
29 | const size_t num_elements = array.size();
30 |
31 | py::array_t output(num_elements);
32 | std::fill(output.mutable_data(), output.mutable_data() + output.size(),
33 | invert);
34 |
35 | // Shortcut: The output will be trivially always false if test_elements empty.
36 | if (test_elements.empty()) {
37 | return output;
38 | }
39 |
40 | for (size_t i = 0; i < num_elements; ++i) {
41 | if (test_elements.contains(array.data()[i])) {
42 | output.mutable_data()[i] = !invert;
43 | }
44 | }
45 | if (array.ndim() > 1) {
46 | auto shape =
47 | std::vector(array.shape(), array.shape() + array.ndim());
48 | return output.reshape(shape);
49 | }
50 | return output;
51 | }
52 |
53 | constexpr char kIsInDoc[] = R"(
54 | Computes whether each element is in test_elements.
55 |
56 | Same use as np.isin, but much faster. If len(array) = n, len(test_elements) = m:
57 | * This function has complexity O(n).
58 | * np.isin with kind='sort' has complexity O(m*log(m) + n * log(m)).
59 |
60 | Args:
61 | array: Input NumPy array with dtype=np.int64.
62 | test_elements: The values against which to test each value of array.
63 | invert: If True, the values in the returned array are inverted, as if
64 | calculating `element not in test_elements`. Default is False.
65 | `isin(a, b, invert=True)` is equivalent to but faster than `~isin(a, b)`.
66 |
67 | Returns
68 | A boolean array of the same shape as the input array. Each value `val` is:
69 | * `val in test_elements` if `invert=False`,
70 | * `val not in test_elements` if `invert=True`.
71 | )";
72 |
73 | } // namespace
74 |
75 | namespace alphafold3 {
76 |
77 | void RegisterModuleMembership(pybind11::module m) {
78 | m.def("isin", &IsIn, py::arg("array"), py::arg("test_elements"),
79 | py::kw_only(), py::arg("invert") = false, py::doc(kIsInDoc + 1));
80 | }
81 |
82 | } // namespace alphafold3
83 |
--------------------------------------------------------------------------------
/src/alphafold3/structure/cpp/membership_pybind.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2024 DeepMind Technologies Limited
3 | *
4 | * AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of
5 | * this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/
6 | *
7 | * To request access to the AlphaFold 3 model parameters, follow the process set
8 | * out at https://github.com/google-deepmind/alphafold3. You may only use these
9 | * if received directly from Google. Use is subject to terms of use available at
10 | * https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md
11 | */
12 |
13 | #ifndef ALPHAFOLD3_SRC_ALPHAFOLD3_STRUCTURE_PYTHON_MEMBERSHIP_PYBIND_H_
14 | #define ALPHAFOLD3_SRC_ALPHAFOLD3_STRUCTURE_PYTHON_MEMBERSHIP_PYBIND_H_
15 |
16 | #include "pybind11/pybind11.h"
17 |
18 | namespace alphafold3 {
19 |
20 | void RegisterModuleMembership(pybind11::module m);
21 |
22 | }
23 |
24 | #endif // ALPHAFOLD3_SRC_ALPHAFOLD3_STRUCTURE_PYTHON_MEMBERSHIP_PYBIND_H_
25 |
--------------------------------------------------------------------------------
/src/alphafold3/structure/cpp/mmcif_altlocs.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2024 DeepMind Technologies Limited
3 | *
4 | * AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of
5 | * this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/
6 | *
7 | * To request access to the AlphaFold 3 model parameters, follow the process set
8 | * out at https://github.com/google-deepmind/alphafold3. You may only use these
9 | * if received directly from Google. Use is subject to terms of use available at
10 | * https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md
11 | */
12 |
13 | #ifndef ALPHAFOLD3_SRC_ALPHAFOLD3_STRUCTURE_PYTHON_MMCIF_ALTLOCS_H_
14 | #define ALPHAFOLD3_SRC_ALPHAFOLD3_STRUCTURE_PYTHON_MMCIF_ALTLOCS_H_
15 |
16 | #include
17 | #include
18 | #include
19 | #include
20 |
21 | #include "absl/types/span.h"
22 | #include "alphafold3/structure/cpp/mmcif_layout.h"
23 |
24 | namespace alphafold3 {
25 |
26 | // Returns the list of indices that should be kept after resolving alt-locs.
27 | // 1) Partial Residue. Each cycle of alt-locs are resolved separately with the
28 | // highest occupancy alt-loc. Tie-breaks are resolved alphabetically. See
29 | // tests for examples.
30 | // 2) Whole Residue. These are resolved in two passes.
31 | // a) The residue with the highest occupancy is chosen.
32 | // b) The locations for a given residue are resolved.
33 | // All tie-breaks are resolved alphabetically. See tests for examples.
34 | //
35 | // Preconditions: layout and comp_ids, alt_ids, occupancies are all from same
36 | // mmCIF file and chain_indices are monotonically increasing and less than
37 | // layout.num_chains().
38 | //
39 | // comp_ids from '_atom_site.label_comp_id'.
40 | // alt_ids from '_atom_site.label_alt_id'.
41 | // occupancies from '_atom_site.occupancy'.
42 | std::vector ResolveMmcifAltLocs(
43 | const MmcifLayout& layout, absl::Span comp_ids,
44 | absl::Span atom_ids,
45 | absl::Span alt_ids,
46 | absl::Span occupancies,
47 | absl::Span chain_indices);
48 |
49 | } // namespace alphafold3
50 |
51 | #endif // ALPHAFOLD3_SRC_ALPHAFOLD3_STRUCTURE_PYTHON_MMCIF_ALTLOCS_H_
52 |
--------------------------------------------------------------------------------
/src/alphafold3/structure/cpp/mmcif_atom_site.pyi:
--------------------------------------------------------------------------------
1 | # Copyright 2024 DeepMind Technologies Limited
2 | #
3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of
4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/
5 | #
6 | # To request access to the AlphaFold 3 model parameters, follow the process set
7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these
8 | # if received directly from Google. Use is subject to terms of use available at
9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md
10 |
11 | from collections.abc import Callable
12 | from alphafold3.cpp import cif_dict
13 |
14 |
15 | def get_internal_to_author_chain_id_map(
16 | mmcif: cif_dict.CifDict
17 | ) -> dict[str,str]: ...
18 |
19 |
20 | def get_or_infer_type_symbol(
21 | mmcif: cif_dict.CifDict,
22 | atom_id_to_type_symbol: Callable[[str, str], str],
23 | ) -> list[str]: ...
24 |
--------------------------------------------------------------------------------
/src/alphafold3/structure/cpp/mmcif_atom_site_pybind.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2024 DeepMind Technologies Limited
2 | //
3 | // AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of
4 | // this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/
5 | //
6 | // To request access to the AlphaFold 3 model parameters, follow the process set
7 | // out at https://github.com/google-deepmind/alphafold3. You may only use these
8 | // if received directly from Google. Use is subject to terms of use available at
9 | // https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md
10 |
11 | #include
12 |
13 | #include "absl/container/flat_hash_map.h"
14 | #include "absl/log/check.h"
15 | #include "absl/strings/string_view.h"
16 | #include "absl/types/span.h"
17 | #include "alphafold3/parsers/cpp/cif_dict_lib.h"
18 | #include "pybind11/gil.h"
19 | #include "pybind11/pybind11.h"
20 | #include "pybind11/pytypes.h"
21 | #include "pybind11/stl.h"
22 | #include "pybind11_abseil/absl_casters.h"
23 |
24 | namespace alphafold3 {
25 | namespace {
26 | namespace py = pybind11;
27 |
28 | // If present, returns the _atom_site.type_symbol. If not, infers it using
29 | // _atom_site.label_comp_id (residue name), _atom_site.label_atom_id (atom name)
30 | // and the CCD.
31 | py::list GetOrInferTypeSymbol(const CifDict& mmcif,
32 | const py::object& atom_id_to_type_symbol) {
33 | const auto& type_symbol = mmcif["_atom_site.type_symbol"];
34 | const int num_atom = mmcif["_atom_site.id"].size();
35 | py::list patched_type_symbol(num_atom);
36 | if (type_symbol.empty()) {
37 | const auto& label_comp_id = mmcif["_atom_site.label_comp_id"];
38 | const auto& label_atom_id = mmcif["_atom_site.label_atom_id"];
39 | CHECK_EQ(label_comp_id.size(), num_atom);
40 | CHECK_EQ(label_atom_id.size(), num_atom);
41 | for (int i = 0; i < num_atom; i++) {
42 | patched_type_symbol[i] =
43 | atom_id_to_type_symbol(label_comp_id[i], label_atom_id[i]);
44 | }
45 | } else {
46 | for (int i = 0; i < num_atom; i++) {
47 | patched_type_symbol[i] = type_symbol[i];
48 | }
49 | }
50 | return patched_type_symbol;
51 | }
52 |
53 | absl::flat_hash_map
54 | GetInternalToAuthorChainIdMap(const CifDict& mmcif) {
55 | const auto& label_asym_ids = mmcif["_atom_site.label_asym_id"];
56 | const auto& auth_asym_ids = mmcif["_atom_site.auth_asym_id"];
57 | CHECK_EQ(label_asym_ids.size(), auth_asym_ids.size());
58 |
59 | absl::flat_hash_map mapping;
60 | for (size_t i = 0, num_rows = label_asym_ids.size(); i < num_rows; ++i) {
61 | // Use only the first internal_chain_id occurrence to generate the mapping.
62 | // It should not matter as there should not be a case where a single
63 | // internal chain ID would map to more than one author chain IDs (i.e. the
64 | // mapping should be injective). Since we need this method to be fast, we
65 | // choose not to check it.
66 | mapping.emplace(label_asym_ids[i], auth_asym_ids[i]);
67 | }
68 | return mapping;
69 | }
70 |
71 | } // namespace
72 |
73 | namespace py = pybind11;
74 |
75 | void RegisterModuleMmcifAtomSite(pybind11::module m) {
76 | m.def("get_or_infer_type_symbol", &GetOrInferTypeSymbol, py::arg("mmcif"),
77 | py::arg("atom_id_to_type_symbol"));
78 |
79 | m.def("get_internal_to_author_chain_id_map", &GetInternalToAuthorChainIdMap,
80 | py::arg("mmcif"), py::call_guard());
81 | }
82 |
83 | } // namespace alphafold3
84 |
--------------------------------------------------------------------------------
/src/alphafold3/structure/cpp/mmcif_atom_site_pybind.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2024 DeepMind Technologies Limited
3 | *
4 | * AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of
5 | * this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/
6 | *
7 | * To request access to the AlphaFold 3 model parameters, follow the process set
8 | * out at https://github.com/google-deepmind/alphafold3. You may only use these
9 | * if received directly from Google. Use is subject to terms of use available at
10 | * https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md
11 | */
12 |
13 | #ifndef ALPHAFOLD3_SRC_ALPHAFOLD3_STRUCTURE_PYTHON_MMCIF_ATOM_SITE_PYBIND_H_
14 | #define ALPHAFOLD3_SRC_ALPHAFOLD3_STRUCTURE_PYTHON_MMCIF_ATOM_SITE_PYBIND_H_
15 |
16 | #include "pybind11/pybind11.h"
17 |
18 | namespace alphafold3 {
19 |
20 | void RegisterModuleMmcifAtomSite(pybind11::module m);
21 |
22 | }
23 |
24 | #endif // ALPHAFOLD3_SRC_ALPHAFOLD3_STRUCTURE_PYTHON_MMCIF_ATOM_SITE_PYBIND_H_
25 |
--------------------------------------------------------------------------------
/src/alphafold3/structure/cpp/mmcif_layout.pyi:
--------------------------------------------------------------------------------
1 | # Copyright 2024 DeepMind Technologies Limited
2 | #
3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of
4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/
5 | #
6 | # To request access to the AlphaFold 3 model parameters, follow the process set
7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these
8 | # if received directly from Google. Use is subject to terms of use available at
9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md
10 |
11 | from alphafold3.cpp import cif_dict
12 |
13 | class MmcifLayout:
14 | def atom_range(self, residue_index: int) -> tuple[int, int]: ...
15 | def chain_starts(self) -> list[int]: ...
16 | def chains(self) -> list[int]: ...
17 | def model_offset(self) -> int: ...
18 | def num_atoms(self) -> int: ...
19 | def num_chains(self) -> int: ...
20 | def num_models(self) -> int: ...
21 | def num_residues(self) -> int: ...
22 | def residue_range(self, chain_index: int) -> tuple[int, int]: ...
23 | def residue_starts(self) -> list[int]: ...
24 | def residues(self) -> list[int]: ...
25 |
26 | def from_mmcif(mmcif: cif_dict.CifDict, model_id: str = ...) -> MmcifLayout: ...
27 |
--------------------------------------------------------------------------------
/src/alphafold3/structure/cpp/mmcif_layout_pybind.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2024 DeepMind Technologies Limited
2 | //
3 | // AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of
4 | // this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/
5 | //
6 | // To request access to the AlphaFold 3 model parameters, follow the process set
7 | // out at https://github.com/google-deepmind/alphafold3. You may only use these
8 | // if received directly from Google. Use is subject to terms of use available at
9 | // https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md
10 |
11 | #include "alphafold3/structure/cpp/mmcif_layout.h"
12 | #include "pybind11/pybind11.h"
13 | #include "pybind11/pytypes.h"
14 | #include "pybind11/stl.h"
15 |
16 | namespace alphafold3 {
17 |
18 | namespace py = pybind11;
19 |
20 | void RegisterModuleMmcifLayout(pybind11::module m) {
21 | py::class_(m, "MmcifLayout")
22 | .def("__str__", &MmcifLayout::ToDebugString)
23 | .def("num_models", &MmcifLayout::num_models)
24 | .def("num_chains", &MmcifLayout::num_chains)
25 | .def("num_residues", &MmcifLayout::num_residues)
26 | .def("num_atoms", &MmcifLayout::num_atoms)
27 | .def("residue_range", &MmcifLayout::residue_range, py::arg("chain_index"))
28 | .def("atom_range", &MmcifLayout::atom_range, py::arg("residue_index"))
29 | .def("chains", &MmcifLayout::chains,
30 | py::doc("Returns a list of indices one past the last residue of "
31 | "each chain."))
32 | .def(
33 | "chain_starts", &MmcifLayout::chain_starts,
34 | py::doc("Returns a list of indices of the first atom of each chain."))
35 | .def("residues", &MmcifLayout::residues,
36 | py::doc("Returns a list of indices one past the last atom of each "
37 | "residue."))
38 | .def("residue_starts", &MmcifLayout::residue_starts,
39 | py::doc(
40 | "Returns a list of indices of the first atom of each residue."))
41 | .def("model_offset", &MmcifLayout::model_offset,
42 | py::doc("Returns the first atom index that is part of the specified "
43 | "model."));
44 |
45 | m.def("from_mmcif", &MmcifLayout::Create, py::arg("mmcif"),
46 | py::arg("model_id") = "");
47 | }
48 |
49 | } // namespace alphafold3
50 |
--------------------------------------------------------------------------------
/src/alphafold3/structure/cpp/mmcif_layout_pybind.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2024 DeepMind Technologies Limited
3 | *
4 | * AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of
5 | * this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/
6 | *
7 | * To request access to the AlphaFold 3 model parameters, follow the process set
8 | * out at https://github.com/google-deepmind/alphafold3. You may only use these
9 | * if received directly from Google. Use is subject to terms of use available at
10 | * https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md
11 | */
12 |
13 | #ifndef ALPHAFOLD3_SRC_ALPHAFOLD3_STRUCTURE_PYTHON_MMCIF_LAYOUT_PYBIND_H_
14 | #define ALPHAFOLD3_SRC_ALPHAFOLD3_STRUCTURE_PYTHON_MMCIF_LAYOUT_PYBIND_H_
15 |
16 | #include "pybind11/pybind11.h"
17 |
18 | namespace alphafold3 {
19 |
20 | void RegisterModuleMmcifLayout(pybind11::module m);
21 |
22 | }
23 |
24 | #endif // ALPHAFOLD3_SRC_ALPHAFOLD3_STRUCTURE_PYTHON_MMCIF_LAYOUT_PYBIND_H_
25 |
--------------------------------------------------------------------------------
/src/alphafold3/structure/cpp/mmcif_struct_conn.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2024 DeepMind Technologies Limited
3 | *
4 | * AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of
5 | * this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/
6 | *
7 | * To request access to the AlphaFold 3 model parameters, follow the process set
8 | * out at https://github.com/google-deepmind/alphafold3. You may only use these
9 | * if received directly from Google. Use is subject to terms of use available at
10 | * https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md
11 | */
12 |
13 | #ifndef ALPHAFOLD3_SRC_ALPHAFOLD3_STRUCTURE_PYTHON_MMCIF_STRUCT_CONN_H_
14 | #define ALPHAFOLD3_SRC_ALPHAFOLD3_STRUCTURE_PYTHON_MMCIF_STRUCT_CONN_H_
15 |
16 | #include
17 | #include
18 |
19 | #include "absl/status/statusor.h"
20 | #include "absl/strings/string_view.h"
21 | #include "alphafold3/parsers/cpp/cif_dict_lib.h"
22 |
23 | namespace alphafold3 {
24 |
25 | // Returns a pair of atom indices for each row in the bonds table (aka
26 | // _struct_conn). The indices are simple 0-based indexes into the columns of
27 | // the _atom_site table in the input mmCIF, and do not necessarily correspond
28 | // to the values in _atom_site.id, or any other column.
29 | absl::StatusOr, std::vector>>
30 | GetBondAtomIndices(const CifDict& mmcif, absl::string_view model_id);
31 |
32 | } // namespace alphafold3
33 |
34 | #endif // ALPHAFOLD3_SRC_ALPHAFOLD3_STRUCTURE_PYTHON_MMCIF_STRUCT_CONN_H_
35 |
--------------------------------------------------------------------------------
/src/alphafold3/structure/cpp/mmcif_struct_conn.pyi:
--------------------------------------------------------------------------------
1 | # Copyright 2024 DeepMind Technologies Limited
2 | #
3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of
4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/
5 | #
6 | # To request access to the AlphaFold 3 model parameters, follow the process set
7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these
8 | # if received directly from Google. Use is subject to terms of use available at
9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md
10 |
11 | from alphafold3.cpp import cif_dict
12 |
13 | def get_bond_atom_indices(mmcif_dict: cif_dict.CifDict, model_id: str) -> tuple[list[int],list[int]]: ...
14 |
--------------------------------------------------------------------------------
/src/alphafold3/structure/cpp/mmcif_struct_conn_pybind.cc:
--------------------------------------------------------------------------------
1 | // Copyright 2024 DeepMind Technologies Limited
2 | //
3 | // AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of
4 | // this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/
5 | //
6 | // To request access to the AlphaFold 3 model parameters, follow the process set
7 | // out at https://github.com/google-deepmind/alphafold3. You may only use these
8 | // if received directly from Google. Use is subject to terms of use available at
9 | // https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md
10 |
11 | #include
12 |
13 | #include "absl/strings/string_view.h"
14 | #include "alphafold3/parsers/cpp/cif_dict_lib.h"
15 | #include "alphafold3/structure/cpp/mmcif_struct_conn.h"
16 | #include "pybind11/gil.h"
17 | #include "pybind11/pybind11.h"
18 | #include "pybind11/pytypes.h"
19 | #include "pybind11/stl.h"
20 |
21 | namespace alphafold3 {
22 |
23 | namespace py = pybind11;
24 |
25 | constexpr char kGetBondAtomIndices[] = R"(
26 | Extracts the indices of the atoms that participate in bonds.
27 |
28 | This function has a workaround for a known PDB issue: some mmCIFs have
29 | (2evw, 2g0v, 2g0x, 2g0z, 2g10, 2g11, 2g12, 2g14, 2grz, 2ntw as of 2024)
30 | multiple models and they set different whole-chain altloc in each model.
31 | The bond table however doesn't distinguish between models, so there are
32 | bonds that are valid only for some models. E.g. 2grz has model 1 with
33 | chain A with altloc A, and model 2 with chain A with altloc B. The bonds
34 | table lists a bond for each of these. This case is rather rare (10 cases
35 | in PDB as of 2024). For the offending bonds, the returned atom index is
36 | set to the size of the atom_site table, i.e. it is an invalid index.
37 |
38 | Args:
39 | mmcif: The mmCIF object to process.
40 | model_id: The ID of the model that the returned atoms will belong to. This
41 | should be a value in the mmCIF's _atom_site.pdbx_PDB_model_num column.
42 |
43 | Returns:
44 | Two lists of atom indices, `from_atoms` and `to_atoms`, each one having
45 | length num_bonds (as defined by _struct_conn, the bonds table). The bond
46 | i, defined by the i'th row in _struct_conn, is a bond from atom at index
47 | from_atoms[i], to the atom at index to_atoms[i]. The indices are simple
48 | 0-based indexes into the columns of the _atom_site table in the input
49 | mmCIF, and do not necessarily correspond to the values in _atom_site.id,
50 | or any other column.
51 | )";
52 |
53 | void RegisterModuleMmcifStructConn(pybind11::module m) {
54 | m.def(
55 | "get_bond_atom_indices",
56 | [](const CifDict& mmcif, absl::string_view model_id) {
57 | auto result = GetBondAtomIndices(mmcif, model_id);
58 | if (result.ok()) {
59 | return *result;
60 | }
61 | throw py::value_error(std::string(result.status().message()));
62 | },
63 | py::arg("mmcif_dict"), py::arg("model_id"),
64 | py::doc(kGetBondAtomIndices + 1),
65 | py::call_guard());
66 | }
67 |
68 | } // namespace alphafold3
69 |
--------------------------------------------------------------------------------
/src/alphafold3/structure/cpp/mmcif_struct_conn_pybind.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2024 DeepMind Technologies Limited
3 | *
4 | * AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of
5 | * this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/
6 | *
7 | * To request access to the AlphaFold 3 model parameters, follow the process set
8 | * out at https://github.com/google-deepmind/alphafold3. You may only use these
9 | * if received directly from Google. Use is subject to terms of use available at
10 | * https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md
11 | */
12 |
13 | #ifndef ALPHAFOLD3_SRC_ALPHAFOLD3_STRUCTURE_PYTHON_MMCIF_STRUCT_CONN_PYBIND_H_
14 | #define ALPHAFOLD3_SRC_ALPHAFOLD3_STRUCTURE_PYTHON_MMCIF_STRUCT_CONN_PYBIND_H_
15 |
16 | #include "pybind11/pybind11.h"
17 |
18 | namespace alphafold3 {
19 |
20 | void RegisterModuleMmcifStructConn(pybind11::module m);
21 |
22 | }
23 |
24 | #endif // ALPHAFOLD3_SRC_ALPHAFOLD3_STRUCTURE_PYTHON_MMCIF_STRUCT_CONN_PYBIND_H_
25 |
--------------------------------------------------------------------------------
/src/alphafold3/structure/cpp/mmcif_utils.pyi:
--------------------------------------------------------------------------------
1 | # Copyright 2024 DeepMind Technologies Limited
2 | #
3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of
4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/
5 | #
6 | # To request access to the AlphaFold 3 model parameters, follow the process set
7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these
8 | # if received directly from Google. Use is subject to terms of use available at
9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md
10 |
11 | from collections.abc import Sequence
12 |
13 | import numpy as np
14 |
15 | from alphafold3.cpp import cif_dict
16 | from alphafold3.structure.python import mmcif_layout
17 |
18 |
19 | def filter(
20 | mmcif: cif_dict.CifDict,
21 | include_nucleotides: bool,
22 | include_ligands: bool = ...,
23 | include_water: bool = ...,
24 | include_other: bool = ...,
25 | model_id: str = ...,
26 | ) -> tuple[np.ndarray[int], mmcif_layout.MmcifLayout]: ...
27 |
28 |
29 | def fix_residues(
30 | layout: mmcif_layout.MmcifLayout,
31 | comp_id: Sequence[str],
32 | atom_id: Sequence[str],
33 | atom_x: Sequence[float],
34 | atom_y: Sequence[float],
35 | atom_z: Sequence[float],
36 | fix_arg: bool = ...,
37 | ) -> None: ...
38 |
39 |
40 | def read_layout(
41 | mmcif: cif_dict.CifDict, model_id: str = ...
42 | ) -> mmcif_layout.MmcifLayout: ...
43 |
44 |
45 | def selected_ligand_residue_mask(
46 | layout: mmcif_layout.MmcifLayout,
47 | atom_site_label_asym_ids: list[str],
48 | atom_site_label_seq_ids: list[str],
49 | atom_site_auth_seq_ids: list[str],
50 | atom_site_label_comp_ids: list[str],
51 | atom_site_pdbx_pdb_ins_codes: list[str],
52 | nonpoly_asym_ids: list[str],
53 | nonpoly_auth_seq_ids: list[str],
54 | nonpoly_pdb_ins_codes: list[str],
55 | nonpoly_mon_ids: list[str],
56 | branch_asym_ids: list[str],
57 | branch_auth_seq_ids: list[str],
58 | branch_pdb_ins_codes: list[str],
59 | branch_mon_ids: list[str],
60 | ) -> tuple[list[bool], list[bool]]: ...
61 |
62 |
63 | def selected_polymer_residue_mask(
64 | layout: mmcif_layout.MmcifLayout,
65 | atom_site_label_asym_ids: list[str],
66 | atom_site_label_seq_ids: list[str],
67 | atom_site_label_comp_ids: list[str],
68 | poly_seq_asym_ids: list[str],
69 | poly_seq_seq_ids: list[str],
70 | poly_seq_mon_ids: list[str],
71 | ) -> list[bool]: ...
72 |
--------------------------------------------------------------------------------
/src/alphafold3/structure/cpp/mmcif_utils_pybind.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2024 DeepMind Technologies Limited
3 | *
4 | * AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of
5 | * this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/
6 | *
7 | * To request access to the AlphaFold 3 model parameters, follow the process set
8 | * out at https://github.com/google-deepmind/alphafold3. You may only use these
9 | * if received directly from Google. Use is subject to terms of use available at
10 | * https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md
11 | */
12 |
13 | #ifndef ALPHAFOLD3_SRC_ALPHAFOLD3_STRUCTURE_PYTHON_MMCIF_UTILS_PYBIND_H_
14 | #define ALPHAFOLD3_SRC_ALPHAFOLD3_STRUCTURE_PYTHON_MMCIF_UTILS_PYBIND_H_
15 |
16 | #include "pybind11/pybind11.h"
17 |
18 | namespace alphafold3 {
19 |
20 | void RegisterModuleMmcifUtils(pybind11::module m);
21 |
22 | }
23 |
24 | #endif // ALPHAFOLD3_SRC_ALPHAFOLD3_STRUCTURE_PYTHON_MMCIF_UTILS_PYBIND_H_
25 |
--------------------------------------------------------------------------------
/src/alphafold3/structure/cpp/string_array.pyi:
--------------------------------------------------------------------------------
1 | # Copyright 2024 DeepMind Technologies Limited
2 | #
3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of
4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/
5 | #
6 | # To request access to the AlphaFold 3 model parameters, follow the process set
7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these
8 | # if received directly from Google. Use is subject to terms of use available at
9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md
10 |
11 | from collections.abc import Sequence
12 | from typing import overload
13 |
14 | import numpy as np
15 |
16 | def format_float_array(
17 | values: Sequence[float], num_decimal_places: int
18 | ) -> list[str]: ...
19 | def isin(
20 | array: np.ndarray[object],
21 | test_elements: set[str | bytes],
22 | invert: bool = ...,
23 | ) -> np.ndarray[bool]: ...
24 | @overload
25 | def remap(
26 | array: np.ndarray[object],
27 | mapping: dict[str, str],
28 | default_value: str,
29 | inplace: bool = ...,
30 | ) -> np.ndarray[object]: ...
31 | @overload
32 | def remap(
33 | array: np.ndarray[object],
34 | mapping: dict[str, str],
35 | inplace: bool = ...,
36 | ) -> np.ndarray[object]: ...
37 |
--------------------------------------------------------------------------------
/src/alphafold3/structure/cpp/string_array_pybind.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2024 DeepMind Technologies Limited
3 | *
4 | * AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of
5 | * this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/
6 | *
7 | * To request access to the AlphaFold 3 model parameters, follow the process set
8 | * out at https://github.com/google-deepmind/alphafold3. You may only use these
9 | * if received directly from Google. Use is subject to terms of use available at
10 | * https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md
11 | */
12 |
13 | #ifndef ALPHAFOLD3_SRC_ALPHAFOLD3_STRUCTURE_PYTHON_STRING_ARRAY_PYBIND_H_
14 | #define ALPHAFOLD3_SRC_ALPHAFOLD3_STRUCTURE_PYTHON_STRING_ARRAY_PYBIND_H_
15 |
16 | #include "pybind11/pybind11.h"
17 |
18 | namespace alphafold3 {
19 |
20 | void RegisterModuleStringArray(pybind11::module m);
21 |
22 | }
23 |
24 | #endif // ALPHAFOLD3_SRC_ALPHAFOLD3_STRUCTURE_PYTHON_STRING_ARRAY_PYBIND_H_
25 |
--------------------------------------------------------------------------------
/src/alphafold3/test_data/alphafold_run_outputs/run_alphafold_test_output_bucket_1024.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/test_data/alphafold_run_outputs/run_alphafold_test_output_bucket_1024.pkl
--------------------------------------------------------------------------------
/src/alphafold3/test_data/alphafold_run_outputs/run_alphafold_test_output_bucket_default.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/test_data/alphafold_run_outputs/run_alphafold_test_output_bucket_default.pkl
--------------------------------------------------------------------------------
/src/alphafold3/test_data/featurised_example.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mingchenchen/AF3Score/168ffc98ffe94f91822f55a9519dfd5fe040b728/src/alphafold3/test_data/featurised_example.pkl
--------------------------------------------------------------------------------
/src/alphafold3/version.py:
--------------------------------------------------------------------------------
1 | # Copyright 2024 DeepMind Technologies Limited
2 | #
3 | # AlphaFold 3 source code is licensed under CC BY-NC-SA 4.0. To view a copy of
4 | # this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/
5 | #
6 | # To request access to the AlphaFold 3 model parameters, follow the process set
7 | # out at https://github.com/google-deepmind/alphafold3. You may only use these
8 | # if received directly from Google. Use is subject to terms of use available at
9 | # https://github.com/google-deepmind/alphafold3/blob/main/WEIGHTS_TERMS_OF_USE.md
10 |
11 | """Single source of truth for the AlphaFold version."""
12 |
13 | __version__ = '3.0.0'
14 |
--------------------------------------------------------------------------------