├── src ├── __init__.py └── dataloader.py ├── LICENSE ├── README.md ├── 0_Objectives.ipynb ├── 1_Proteins_as_Graphs.ipynb ├── 4_Training_and_Tracking.ipynb ├── 2_Graph_Datasets.ipynb ├── 3_Geometric_Deep_Learning_Models.ipynb └── dataset.txt /src/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 PickyBinders 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Geometric Deep Learning for Protein Structure Data with PyTorch Lightning 2 | 3 | - `10:00 - 10:15`: [Introduction](0_Objectives.ipynb) 4 | - `10:15 - 11:00`: [Notebook 1 - Proteins as Graphs](1_Proteins_as_Graphs.ipynb) 5 | - `11:00 - 11:30`: _Break_ 6 | - `11:30 - 12:30`: [Notebook 2 - Graph Datasets and DataLoaders](2_Graph_Datasets.ipynb) 7 | - `12:30 - 13:30`: _Lunch_ 8 | - `13:30 - 13:45`: Introduction to geometric deep learning 9 | - `13:45 - 15:00`: [Notebook 3 - Geometric Deep Learning](3_Geometric_Deep_Learning_Models.ipynb) 10 | - `15:00 - 15:30`: _Break_ 11 | - `15:30 - 16:30`: [Notebook 4 - Training and Tracking](4_Training_and_Tracking.ipynb) 12 | - `16:30 - 17:00`: Wrap-up 13 | 14 | Link to the google colab search for github repos: https://colab.research.google.com/github/ 15 | with the repository: https://github.com/PickyBinders/geometric-learning-protein-structures-course 16 | 17 | ## Objectives 18 | Develop a code-base for exploring, training and evaluating graph deep learning models using protein structures as input for a residue-level prediction task. 19 | - Learn how to featurize protein structures as graphs using [Graphein](https://graphein.ai/) 20 | - Understand the data loading and processing pipeline for graph datasets using [PyTorch Geometric](https://pytorch-geometric.readthedocs.io) 21 | - Learn how to implement graph neural networks using [PyTorch Geometric](https://pytorch-geometric.readthedocs.io) 22 | - Understand the typical deep learning training and evaluation loops using [PyTorch Lightning](https://lightning.ai/docs/pytorch/stable/) 23 | 24 | ## Task and Dataset 25 | 26 | - **Given an input protein chain, predict for each residue whether or not it belongs to a protein-protein interface.** 27 | - The dataset (in `dataset.txt`) is a subset of the [MaSIF-site dataset](https://www.nature.com/articles/s41592-019-0666-6). 28 | - Each line is a PDB ID and a chain. We'll use these to extract residues at the interface with other chains and label them as positive examples. All other residues are negative examples. 29 | 30 | 31 | -------------------------------------------------------------------------------- /src/dataloader.py: -------------------------------------------------------------------------------- 1 | import graphein 2 | graphein.verbose(enabled=False) 3 | import warnings 4 | warnings.filterwarnings("ignore") 5 | from graphein.protein.config import ProteinGraphConfig 6 | from graphein.protein.graphs import construct_graph 7 | from graphein.protein.features.nodes import amino_acid as graphein_nodes 8 | from graphein.protein import edges as graphein_edges 9 | from graphein.protein.subgraphs import extract_subgraph 10 | from functools import partial 11 | from graphein.ml import GraphFormatConvertor 12 | import torch 13 | import lightning 14 | from torch.utils.data import random_split 15 | from torch_geometric.loader import DataLoader 16 | from torch_geometric.data import Dataset 17 | from pathlib import Path 18 | import pickle 19 | 20 | def load_graph(pdb_id, chain): 21 | graph_config = ProteinGraphConfig( 22 | node_metadata_functions = [graphein_nodes.amino_acid_one_hot, graphein_nodes.meiler_embedding], 23 | edge_construction_functions = [graphein_edges.add_peptide_bonds, 24 | partial(graphein_edges.add_distance_threshold, 25 | threshold=8., 26 | long_interaction_threshold=2)], 27 | ) 28 | graph = construct_graph(pdb_code=pdb_id, config=graph_config, verbose=False) 29 | interface_residues = set() 30 | for source, target, kind in graph.edges(data=True): 31 | c1, c2 = source.split(":")[0], target.split(":")[0] 32 | if 'distance_threshold' in kind['kind']: 33 | if c1 == chain and c2 != chain: 34 | interface_residues.add(source) 35 | elif c2 == chain and c1 != chain: 36 | interface_residues.add(target) 37 | graph = extract_subgraph(graph, chains=chain) 38 | for node, data in graph.nodes(data=True): 39 | if node in interface_residues: 40 | data['interface_label'] = 1 41 | else: 42 | data['interface_label'] = 0 43 | return graph 44 | 45 | class ProteinDataset(Dataset): 46 | """ 47 | torch-geometric Dataset class for loading protein files as graphs. 48 | """ 49 | def __init__(self, root, 50 | protein_names: list): 51 | columns = [ 52 | "chain_id", 53 | "coords", 54 | "edge_index", 55 | "kind", 56 | "node_id", 57 | "residue_number", 58 | "amino_acid_one_hot", 59 | "meiler", 60 | "interface_label", 61 | ] 62 | self.convertor = GraphFormatConvertor(src_format="nx", dst_format="pyg", columns=columns, verbose=None) 63 | self.protein_names = protein_names 64 | super(ProteinDataset, self).__init__(root) 65 | 66 | def download(self): 67 | for protein_name in self.protein_names: 68 | output = Path(self.raw_dir) / f'{protein_name}.pkl' 69 | if not output.exists(): 70 | pdb_id, chain = protein_name.split("_") 71 | graphein_graph = load_graph(pdb_id, chain) 72 | with open(output, "wb") as f: 73 | pickle.dump(graphein_graph, f) 74 | 75 | @property 76 | def raw_file_names(self): 77 | return [Path(self.raw_dir) / f"{protein_name}.pkl" for protein_name in self.protein_names if (Path(self.raw_dir) / f"{protein_name}.pt").exists()] 78 | 79 | @property 80 | def processed_file_names(self): 81 | return [Path(self.processed_dir) / f"{protein_name}.pt" for protein_name in self.protein_names if (Path(self.processed_dir) / f"{protein_name}.pt").exists()] 82 | 83 | def process(self): 84 | for protein_name in self.protein_names: 85 | output = Path(self.processed_dir) / f'{protein_name}.pt' 86 | if not output.exists(): 87 | with open(Path(self.raw_dir) / f"{protein_name}.pkl", "rb") as f: 88 | graphein_graph = pickle.load(f) 89 | torch_data = self.convertor(graphein_graph) 90 | torch.save(torch_data, output) 91 | 92 | def len(self): 93 | return len(self.processed_file_names) 94 | 95 | def get(self, idx): 96 | data = torch.load(self.processed_file_names[idx]) 97 | return data 98 | 99 | 100 | class ProteinGraphDataModule(lightning.LightningDataModule): 101 | def __init__(self, root, dataset_file, batch_size=8): 102 | super().__init__() 103 | self.root = root 104 | self.dataset_file = dataset_file 105 | with open(dataset_file) as f: 106 | self.protein_names = [line.strip() for line in f] 107 | self.protein_names = self.protein_names[:20] # SMALL DATASET FOR TESTING 108 | self.batch_size = batch_size 109 | 110 | def prepare_data(self): 111 | ProteinDataset(root=self.root, protein_names=self.protein_names) 112 | 113 | def setup(self, stage): 114 | dataset = ProteinDataset(root=self.root, protein_names=self.protein_names) 115 | # Here we just do a random split of 80/10/10 for train/val/test 116 | train_idx, val_idx, test_idx = random_split(range(len(dataset)), [0.8, 0.1, 0.1]) 117 | self.train, self.val, self.test = dataset[list(train_idx)], dataset[list(val_idx)], dataset[list(test_idx)] 118 | 119 | def train_dataloader(self): 120 | return DataLoader(self.train, batch_size=self.batch_size) 121 | 122 | def val_dataloader(self): 123 | return DataLoader(self.val, batch_size=self.batch_size) 124 | 125 | def test_dataloader(self): 126 | return DataLoader(self.test, batch_size=self.batch_size) -------------------------------------------------------------------------------- /0_Objectives.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "editable": true, 7 | "slideshow": { 8 | "slide_type": "slide" 9 | }, 10 | "tags": [] 11 | }, 12 | "source": [ 13 | "## Geometric Deep Learning for Protein Structure Data with PyTorch Lightning\n", 14 | "\n", 15 | "- `10:00 - 10:15`: Introduction\n", 16 | "- `10:15 - 11:00`: [Notebook 1 - Proteins as Graphs]()\n", 17 | "- `11:00 - 11:30`: _Break_\n", 18 | "- `11:30 - 12:30`: [Notebook 2 - Graph Datasets and DataLoaders]()\n", 19 | "- `12:30 - 13:30`: _Lunch_\n", 20 | "- `13:30 - 13:45`: Introduction to geometric deep learning\n", 21 | "- `13:45 - 15:00`: [Notebook 3 - Geometric Deep Learning]()\n", 22 | "- `15:00 - 15:30`: _Break_\n", 23 | "- `15:30 - 16:30`: [Notebook 4 - Training and Tracking]()\n", 24 | "- `16:30 - 17:00`: Wrap-up\n", 25 | "\n", 26 | "**Repository: https://github.com/PickyBinders/geometric-learning-protein-structures-course**" 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "metadata": { 32 | "editable": true, 33 | "slideshow": { 34 | "slide_type": "slide" 35 | }, 36 | "tags": [] 37 | }, 38 | "source": [ 39 | "## Background knowledge\n", 40 | "\n", 41 | "- Python programming\n", 42 | "- Protein structures\n", 43 | "- Deep learning\n", 44 | "- Graph neural networks" 45 | ] 46 | }, 47 | { 48 | "cell_type": "markdown", 49 | "metadata": { 50 | "editable": true, 51 | "slideshow": { 52 | "slide_type": "slide" 53 | }, 54 | "tags": [] 55 | }, 56 | "source": [ 57 | "### Why graphs?\n", 58 | "- **Graphs** are a natural way to represent **interactions** between entities where the task at hand is affected both by local neighboring connections and global graph topology.\n", 59 | "\n", 60 | "- **Proteins** are made up of amino acids that are connected by chemical bonds, and **contributions from \"neighboring\" atoms can affect the properties of a given atom** to drive protein-protein binding, protein folding, and other biological processes that make up the protein's function.\n", 61 | "\n", 62 | "- When analyzing protein-protein binding, aspects such as residue-residue interactions, residue-solvent interactions, and conformational changes contribute to the entropic and enthalpic factors that drive the binding process.\n", 63 | "\n", 64 | "- These interactions can be represented as a graph, where nodes represent atoms or amino acids and edges represent interactions between them.\n", 65 | "\n", 66 | "
\n", 67 | " \n", 68 | "
" 69 | ] 70 | }, 71 | { 72 | "cell_type": "markdown", 73 | "metadata": { 74 | "editable": true, 75 | "slideshow": { 76 | "slide_type": "slide" 77 | }, 78 | "tags": [] 79 | }, 80 | "source": [ 81 | "### Why deep learning?\n", 82 | "- Deep learning methods can produce data-driven features from the input representation (**feature extraction / feature learning**), useful in learning from complex, high-dimensional data for tasks where the exact features and their relationships are not known.\n", 83 | "\n", 84 | "![https://www.youtube.com/watch?v=LeeUzusWz5g](https://i0.wp.com/semiengineering.com/wp-content/uploads/2018/01/MLvsDL.png?resize=733%2C405&ssl=1)" 85 | ] 86 | }, 87 | { 88 | "cell_type": "markdown", 89 | "metadata": { 90 | "editable": true, 91 | "slideshow": { 92 | "slide_type": "slide" 93 | }, 94 | "tags": [] 95 | }, 96 | "source": [ 97 | "- Different deep learning architectures can cope with different **unstructured data representations** (i.e not arranged as vectors of features) such as text sequences, speech signals, images and graphs.\n", 98 | "\n", 99 | "![](https://sebastianraschka.com/images/blog/2022/deep-learning-for-tabular-data/unstructured-structured.jpeg)" 100 | ] 101 | }, 102 | { 103 | "cell_type": "markdown", 104 | "metadata": { 105 | "editable": true, 106 | "slideshow": { 107 | "slide_type": "slide" 108 | }, 109 | "tags": [] 110 | }, 111 | "source": [ 112 | "### Why geometric deep learning?\n", 113 | "- **Geometric deep learning** is a subfield of deep learning that focuses on learning from data that is represented as graphs or manifolds.\n", 114 | "\n", 115 | "![](https://hyperparameter.space/img/equivariance/geometric_domains.png)" 116 | ] 117 | }, 118 | { 119 | "cell_type": "markdown", 120 | "metadata": { 121 | "editable": true, 122 | "slideshow": { 123 | "slide_type": "slide" 124 | }, 125 | "tags": [] 126 | }, 127 | "source": [ 128 | "- It is particularly useful for learning from data that has a **non-Euclidean structure** such as social networks, 3D shapes, and molecule/protein structures.\n", 129 | "- These models can preserve both **local geometric relations** (e.g., the immediate connections between nodes in a graph or neighboring residues) and **global topological features** (e.g., the overall shape or structure of a protein), which are crucial for understanding the underlying properties of the data.\n", 130 | "- Many geometric data types, like graphs representing protein interactions, are **sparse** in nature. Geometric deep learning models can efficiently handle such sparsity, learning significant insights from limited interactions, which is often challenging for traditional models.\n" 131 | ] 132 | }, 133 | { 134 | "cell_type": "markdown", 135 | "metadata": { 136 | "editable": true, 137 | "slideshow": { 138 | "slide_type": "slide" 139 | }, 140 | "tags": [] 141 | }, 142 | "source": [ 143 | "## Objectives\n", 144 | "Develop a code-base for exploring, training and evaluating graph deep learning models using protein structures as input for a residue-level prediction task.\n", 145 | "- Learn how to featurize protein structures as graphs using [Graphein]()\n", 146 | "- Understand the data loading and processing pipeline for graph datasets using [PyTorch Geometric]()\n", 147 | "- Learn how to implement graph neural networks using [PyTorch Geometric]()\n", 148 | "- Understand the typical deep learning training and evaluation loops using [PyTorch Lightning]()" 149 | ] 150 | }, 151 | { 152 | "cell_type": "markdown", 153 | "metadata": { 154 | "editable": true, 155 | "slideshow": { 156 | "slide_type": "slide" 157 | }, 158 | "tags": [] 159 | }, 160 | "source": [ 161 | "## Task and Dataset\n", 162 | "\n", 163 | "- **Given an input protein chain, predict for each residue whether or not it belongs to a protein-protein interface.**\n", 164 | "- The dataset (in `dataset.txt`) is a subset of the [MaSIF-site dataset](https://www.nature.com/articles/s41592-019-0666-6). \n", 165 | "- Each line is a PDB ID and a chain. We'll use these to extract residues at the interface with other chains and label them as positive examples. All other residues are negative examples." 166 | ] 167 | }, 168 | { 169 | "cell_type": "markdown", 170 | "metadata": { 171 | "editable": true, 172 | "slideshow": { 173 | "slide_type": "slide" 174 | }, 175 | "tags": [] 176 | }, 177 | "source": [ 178 | "## Tips\n", 179 | "\n", 180 | "- Use the `??` operator to get the documentation of a function or class in Jupyter.\n", 181 | "- Play around with different parameters for the functions and classes to understand their behavior.\n", 182 | "- Many of the classes involved in deep learning are \"abstract classes\" that provide a blueprint for other classes to inherit from. These are of the form `class MyClass(ABC):`. Abstract classes often have methods that need to be implemented by the inheriting class. In practice, this just means that there are a set of functions (which have a fixed name and fixed input arguments) that you need to implement in your class, and you can find out what these are by looking at the documentation or source code of the abstract class. Apart from this, you can add any other methods or attributes to your class as you see fit." 183 | ] 184 | } 185 | ], 186 | "metadata": { 187 | "kernelspec": { 188 | "display_name": "geometric-learning", 189 | "language": "python", 190 | "name": "geometric-learning" 191 | }, 192 | "language_info": { 193 | "codemirror_mode": { 194 | "name": "ipython", 195 | "version": 3 196 | }, 197 | "file_extension": ".py", 198 | "mimetype": "text/x-python", 199 | "name": "python", 200 | "nbconvert_exporter": "python", 201 | "pygments_lexer": "ipython3", 202 | "version": "3.8.18" 203 | } 204 | }, 205 | "nbformat": 4, 206 | "nbformat_minor": 4 207 | } 208 | -------------------------------------------------------------------------------- /1_Proteins_as_Graphs.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "editable": true, 8 | "slideshow": { 9 | "slide_type": "" 10 | }, 11 | "tags": [] 12 | }, 13 | "outputs": [], 14 | "source": [ 15 | "# !pip install --use-pep517 \"graphein[extras]\" lightning torch torch-geometric tensorboard nbformat \"jsonargparse[signatures]\" ipywidgets tabulate" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": null, 21 | "metadata": { 22 | "editable": true, 23 | "slideshow": { 24 | "slide_type": "" 25 | }, 26 | "tags": [] 27 | }, 28 | "outputs": [], 29 | "source": [ 30 | "import graphein\n", 31 | "graphein.verbose(enabled=False)\n", 32 | "from graphein.protein.config import ProteinGraphConfig\n", 33 | "from graphein.protein.graphs import construct_graph\n", 34 | "from graphein.protein.features.nodes import amino_acid as graphein_nodes\n", 35 | "from graphein.protein import edges as graphein_edges\n", 36 | "from graphein.protein.subgraphs import extract_subgraph\n", 37 | "from graphein.protein.visualisation import plotly_protein_structure_graph\n", 38 | "from functools import partial\n", 39 | "from matplotlib import colormaps" 40 | ] 41 | }, 42 | { 43 | "cell_type": "markdown", 44 | "metadata": {}, 45 | "source": [ 46 | "## Converting proteins to featurized graphs\n", 47 | "\n", 48 | "The [graphein](https://graphein.ai/) library provides functionality for producing a number of types of graph-based representations of proteins. We'll use it to construct [NetworkX](https://github.com/networkx/networkx) graphs from protein structures, extract interface residues, and to featurise the nodes and edges of the graph" 49 | ] 50 | }, 51 | { 52 | "cell_type": "markdown", 53 | "metadata": {}, 54 | "source": [ 55 | "Here we use the node features implemented in `graphein.protein.features.nodes.amino_acid`, but there's many more kinds of node features available in the library (see the full [API](https://graphein.ai/modules/graphein.protein.html#features))" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": null, 61 | "metadata": { 62 | "editable": true, 63 | "slideshow": { 64 | "slide_type": "" 65 | }, 66 | "tags": [] 67 | }, 68 | "outputs": [], 69 | "source": [ 70 | "graph_config = ProteinGraphConfig(\n", 71 | " node_metadata_functions = [graphein_nodes.amino_acid_one_hot, graphein_nodes.meiler_embedding],\n", 72 | " edge_construction_functions = [graphein_edges.add_peptide_bonds, partial(graphein_edges.add_distance_threshold, \n", 73 | " threshold=8., \n", 74 | " long_interaction_threshold=2)]\n", 75 | ")" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": null, 81 | "metadata": { 82 | "editable": true, 83 | "slideshow": { 84 | "slide_type": "" 85 | }, 86 | "tags": [] 87 | }, 88 | "outputs": [], 89 | "source": [ 90 | "graph = construct_graph(pdb_code='1A0G', config=graph_config)" 91 | ] 92 | }, 93 | { 94 | "cell_type": "markdown", 95 | "metadata": {}, 96 | "source": [ 97 | "Now we have a graph object consisting of nodes and edges, each associated with the attributes we've specified" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": null, 103 | "metadata": { 104 | "editable": true, 105 | "slideshow": { 106 | "slide_type": "" 107 | }, 108 | "tags": [] 109 | }, 110 | "outputs": [], 111 | "source": [ 112 | "i = 0\n", 113 | "for (node, node_data) in graph.nodes(data=True):\n", 114 | " print(\"Node:\", node)\n", 115 | " print(\"Node attributes:\", node_data)\n", 116 | " if i > 5:\n", 117 | " break\n", 118 | " i += 1" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": null, 124 | "metadata": {}, 125 | "outputs": [], 126 | "source": [ 127 | "i = 0\n", 128 | "for (start_node, end_node, edge_data) in graph.edges(data=True):\n", 129 | " print(f\"Edge between {start_node} and {end_node}\")\n", 130 | " print(\"Edge attributes:\", edge_data)\n", 131 | " if i > 5:\n", 132 | " break\n", 133 | " i += 1" 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": null, 139 | "metadata": { 140 | "editable": true, 141 | "slideshow": { 142 | "slide_type": "" 143 | }, 144 | "tags": [] 145 | }, 146 | "outputs": [], 147 | "source": [ 148 | "p = plotly_protein_structure_graph(\n", 149 | " graph,\n", 150 | " colour_edges_by=\"kind\",\n", 151 | " colour_nodes_by='chain_id',\n", 152 | " label_node_ids=False,\n", 153 | " plot_title=\"Peptide backbone graph with distance connections.Nodes coloured by chain.\",\n", 154 | " node_size_multiplier=1\n", 155 | " )\n", 156 | "p.show()" 157 | ] 158 | }, 159 | { 160 | "cell_type": "markdown", 161 | "metadata": {}, 162 | "source": [ 163 | "We can extract interface residues from this graph by checking for edges between chains:" 164 | ] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "execution_count": null, 169 | "metadata": { 170 | "editable": true, 171 | "slideshow": { 172 | "slide_type": "" 173 | }, 174 | "tags": [] 175 | }, 176 | "outputs": [], 177 | "source": [ 178 | "interface_residues = set()\n", 179 | "for source, target, kind in graph.edges(data=True):\n", 180 | " if 'distance_threshold' in kind['kind']:\n", 181 | " if source.split(\":\")[0] == \"A\" and target.split(\":\")[0] != \"A\":\n", 182 | " interface_residues.add(source)\n", 183 | " elif target.split(\":\")[0] == \"A\" and source.split(\":\")[0] != \"A\":\n", 184 | " interface_residues.add(target)\n", 185 | "interface_residues" 186 | ] 187 | }, 188 | { 189 | "cell_type": "markdown", 190 | "metadata": { 191 | "editable": true, 192 | "slideshow": { 193 | "slide_type": "" 194 | }, 195 | "tags": [] 196 | }, 197 | "source": [ 198 | "This information can be added to the graph as an `interface_label` node feature:" 199 | ] 200 | }, 201 | { 202 | "cell_type": "code", 203 | "execution_count": null, 204 | "metadata": { 205 | "editable": true, 206 | "slideshow": { 207 | "slide_type": "" 208 | }, 209 | "tags": [] 210 | }, 211 | "outputs": [], 212 | "source": [ 213 | "for node, data in graph.nodes(data=True):\n", 214 | " if node in interface_residues:\n", 215 | " data['interface_label'] = 1\n", 216 | " else:\n", 217 | " data['interface_label'] = 0" 218 | ] 219 | }, 220 | { 221 | "cell_type": "markdown", 222 | "metadata": {}, 223 | "source": [ 224 | "Let's see where the interface is for this example:" 225 | ] 226 | }, 227 | { 228 | "cell_type": "code", 229 | "execution_count": null, 230 | "metadata": { 231 | "editable": true, 232 | "slideshow": { 233 | "slide_type": "" 234 | }, 235 | "tags": [] 236 | }, 237 | "outputs": [], 238 | "source": [ 239 | "p = plotly_protein_structure_graph(\n", 240 | " graph,\n", 241 | " colour_edges_by='kind',\n", 242 | " colour_nodes_by='interface_label',\n", 243 | " label_node_ids=False,\n", 244 | " edge_color_map=colormaps['Pastel2'],\n", 245 | " plot_title=\"Peptide backbone graph with distance connections. Nodes coloured by interface labels.\",\n", 246 | " node_size_multiplier=1\n", 247 | " )\n", 248 | "p.show()" 249 | ] 250 | }, 251 | { 252 | "cell_type": "markdown", 253 | "metadata": {}, 254 | "source": [ 255 | "Since our task is to predict interface residues given just one input chain, we'll extract the subgraph for the chain of interest:" 256 | ] 257 | }, 258 | { 259 | "cell_type": "code", 260 | "execution_count": null, 261 | "metadata": { 262 | "editable": true, 263 | "slideshow": { 264 | "slide_type": "" 265 | }, 266 | "tags": [] 267 | }, 268 | "outputs": [], 269 | "source": [ 270 | "chain_subgraph = extract_subgraph(graph, chains=\"A\")\n", 271 | "\n", 272 | "p = plotly_protein_structure_graph(\n", 273 | " chain_subgraph,\n", 274 | " colour_edges_by=\"kind\",\n", 275 | " colour_nodes_by=\"interface_label\",\n", 276 | " label_node_ids=False,\n", 277 | " edge_color_map=colormaps['Pastel2'],\n", 278 | " plot_title=\"Peptide backbone graph. Nodes coloured by interface_label.\",\n", 279 | " node_size_multiplier=1\n", 280 | " )\n", 281 | "p.show()" 282 | ] 283 | }, 284 | { 285 | "cell_type": "markdown", 286 | "metadata": {}, 287 | "source": [ 288 | "We put all this together in a function to use in the later notebooks. Feel free to add other node features, edge types, and edge features to your function." 289 | ] 290 | }, 291 | { 292 | "cell_type": "code", 293 | "execution_count": null, 294 | "metadata": { 295 | "editable": true, 296 | "slideshow": { 297 | "slide_type": "" 298 | }, 299 | "tags": [] 300 | }, 301 | "outputs": [], 302 | "source": [ 303 | "def load_graph(pdb_id, chain):\n", 304 | " graph_config = ProteinGraphConfig(\n", 305 | " node_metadata_functions = [graphein_nodes.amino_acid_one_hot, graphein_nodes.meiler_embedding],\n", 306 | " edge_construction_functions = [graphein_edges.add_peptide_bonds, \n", 307 | " partial(graphein_edges.add_distance_threshold, \n", 308 | " threshold=8.,\n", 309 | " long_interaction_threshold=2)],\n", 310 | " )\n", 311 | " graph = construct_graph(pdb_code=pdb_id, config=graph_config, verbose=False)\n", 312 | " interface_residues = set()\n", 313 | " for source, target, kind in graph.edges(data=True):\n", 314 | " c1, c2 = source.split(\":\")[0], target.split(\":\")[0]\n", 315 | " if 'distance_threshold' in kind['kind']:\n", 316 | " if c1 == chain and c2 != chain:\n", 317 | " interface_residues.add(source)\n", 318 | " elif c2 == chain and c1 != chain:\n", 319 | " interface_residues.add(target)\n", 320 | " graph = extract_subgraph(graph, chains=chain)\n", 321 | " for node, data in graph.nodes(data=True):\n", 322 | " if node in interface_residues:\n", 323 | " data['interface_label'] = 1\n", 324 | " else:\n", 325 | " data['interface_label'] = 0\n", 326 | " return graph" 327 | ] 328 | }, 329 | { 330 | "cell_type": "markdown", 331 | "metadata": {}, 332 | "source": [ 333 | "## Bonus\n", 334 | "\n", 335 | "\n", 336 | "We can also add our own edge functions or node features that are not implemented in the graphein API. For example, we can calculate the solvent accessible surface area (SASA) for each residue and include it in as a node feature." 337 | ] 338 | }, 339 | { 340 | "cell_type": "code", 341 | "execution_count": null, 342 | "metadata": {}, 343 | "outputs": [], 344 | "source": [ 345 | "from Bio.PDB.mmtf import MMTFParser\n", 346 | "from Bio.PDB.SASA import ShrakeRupley\n", 347 | "import warnings\n", 348 | "warnings.filterwarnings(\"ignore\") # to ignore warnings when parsing pdb structures\n", 349 | "\n", 350 | "def add_sasa(pdb_id, graph):\n", 351 | " struct = MMTFParser.get_structure_from_url(pdb_id)\n", 352 | " sr = ShrakeRupley()\n", 353 | " sr.compute(struct, level=\"R\") # residue level\n", 354 | " for _, data in graph.nodes(data=True):\n", 355 | " # add SASA to node features\n", 356 | " data['sasa'] = struct[0][data['chain_id']][data['residue_number']].sasa\n", 357 | " return graph\n", 358 | "\n", 359 | "graph = load_graph(\"1A0G\", \"A\")\n", 360 | "graph = add_sasa(\"1A0G\", graph)" 361 | ] 362 | }, 363 | { 364 | "cell_type": "code", 365 | "execution_count": null, 366 | "metadata": {}, 367 | "outputs": [], 368 | "source": [ 369 | "i = 0\n", 370 | "for (node, node_data) in graph.nodes(data=True):\n", 371 | " print(\"Node:\", node)\n", 372 | " print(\"Node attributes:\", node_data)\n", 373 | " if i > 5:\n", 374 | " break\n", 375 | " i += 1" 376 | ] 377 | }, 378 | { 379 | "cell_type": "code", 380 | "execution_count": null, 381 | "metadata": {}, 382 | "outputs": [], 383 | "source": [ 384 | "import matplotlib.pyplot as plt\n", 385 | "import seaborn as sns\n", 386 | "\n", 387 | "sasa, interface_labels = [], []\n", 388 | "for (node, node_data) in graph.nodes(data=True):\n", 389 | " sasa.append(node_data['sasa'])\n", 390 | " interface_labels.append(node_data['interface_label'])\n", 391 | "data = {\n", 392 | " \"sasa\": sasa,\n", 393 | " \"interface_labels\": interface_labels\n", 394 | "}\n", 395 | "\n", 396 | "plt.figure(figsize=(14, 8))\n", 397 | "sns.violinplot(x=\"interface_labels\", y=\"sasa\", data=data)\n", 398 | "plt.title(\"SASA between interface and non-interface residues\")\n", 399 | "plt.tight_layout()\n", 400 | "plt.show()" 401 | ] 402 | }, 403 | { 404 | "cell_type": "markdown", 405 | "metadata": {}, 406 | "source": [ 407 | "What other node or edge features would you like to include in your graph?" 408 | ] 409 | } 410 | ], 411 | "metadata": { 412 | "kernelspec": { 413 | "display_name": "Python 3 (ipykernel)", 414 | "language": "python", 415 | "name": "python3" 416 | }, 417 | "language_info": { 418 | "codemirror_mode": { 419 | "name": "ipython", 420 | "version": 3 421 | }, 422 | "file_extension": ".py", 423 | "mimetype": "text/x-python", 424 | "name": "python", 425 | "nbconvert_exporter": "python", 426 | "pygments_lexer": "ipython3", 427 | "version": "3.8.18" 428 | } 429 | }, 430 | "nbformat": 4, 431 | "nbformat_minor": 4 432 | } 433 | -------------------------------------------------------------------------------- /4_Training_and_Tracking.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "editable": true, 8 | "slideshow": { 9 | "slide_type": "" 10 | }, 11 | "tags": [] 12 | }, 13 | "outputs": [], 14 | "source": [ 15 | "# !pip install --use-pep517 \"graphein[extras]\" lightning torch torch-geometric tensorboard nbformat \"jsonargparse[signatures]\" ipywidgets tabulate" 16 | ] 17 | }, 18 | { 19 | "cell_type": "markdown", 20 | "metadata": { 21 | "editable": true, 22 | "slideshow": { 23 | "slide_type": "" 24 | }, 25 | "tags": [] 26 | }, 27 | "source": [ 28 | "There are a variety of free and paid resources available for interactively tracking training performance of deep learning models. Here we'll use [TensorBoard](https://github.com/tensorflow/tensorboard) which is free and open-source. Another popular option with a free tier is [Weights & Biases](https://wandb.ai/site), which has some additional features and integrations. \n", 29 | "\n" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": null, 35 | "metadata": { 36 | "editable": true, 37 | "slideshow": { 38 | "slide_type": "" 39 | }, 40 | "tags": [] 41 | }, 42 | "outputs": [], 43 | "source": [ 44 | "import lightning\n", 45 | "import torch\n", 46 | "from torch import nn\n", 47 | "from torch_geometric import nn as graph_nn\n", 48 | "from src import dataloader\n", 49 | "from lightning.pytorch import callbacks\n", 50 | "from lightning.pytorch.loggers import TensorBoardLogger\n", 51 | "from sklearn import metrics" 52 | ] 53 | }, 54 | { 55 | "cell_type": "markdown", 56 | "metadata": { 57 | "editable": true, 58 | "slideshow": { 59 | "slide_type": "" 60 | }, 61 | "tags": [] 62 | }, 63 | "source": [ 64 | "We can actually visualise TensorBoard within the notebook with some cell magic to tell Jupyter to run the server in the background and display it in the notebook, by looking for files in the `lightning_logs` directory. As we run training, log files will be written to this directory by default and the TensorBoard display will update automatically (refreshes every 30 seconds)." 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": null, 70 | "metadata": { 71 | "editable": true, 72 | "slideshow": { 73 | "slide_type": "" 74 | }, 75 | "tags": [] 76 | }, 77 | "outputs": [], 78 | "source": [ 79 | "%load_ext tensorboard\n", 80 | "%tensorboard --logdir lightning_logs/ --bind_all" 81 | ] 82 | }, 83 | { 84 | "cell_type": "markdown", 85 | "metadata": { 86 | "editable": true, 87 | "slideshow": { 88 | "slide_type": "" 89 | }, 90 | "tags": [] 91 | }, 92 | "source": [ 93 | "Let's set up the LightningModule again and this time also log the loss value on our validation set. Note that the `validation_step` function automatically runs with gradients disabled, meaning that weights are frozen and the model is in evaluation mode, so we don't need to worry about that. We'll also save the predicted values in an `outputs` variable." 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": null, 99 | "metadata": { 100 | "editable": true, 101 | "slideshow": { 102 | "slide_type": "" 103 | }, 104 | "tags": [] 105 | }, 106 | "outputs": [], 107 | "source": [ 108 | "class InterfaceModule(lightning.LightningModule):\n", 109 | " \"\"\"\n", 110 | " LightningModule wrapping a GAT model.\n", 111 | " \"\"\"\n", 112 | " def __init__(self):\n", 113 | " super().__init__()\n", 114 | " self.model = graph_nn.GAT(in_channels=20,\n", 115 | " hidden_channels=32,\n", 116 | " num_layers=2,\n", 117 | " heads=2,\n", 118 | " out_channels=1,\n", 119 | " dropout=0.01,\n", 120 | " jk=\"last\", v2=True)\n", 121 | " self.loss_function = nn.BCEWithLogitsLoss()\n", 122 | " self.train_step_outputs = []\n", 123 | " self.validation_step_outputs = []\n", 124 | "\n", 125 | " def forward(self, node_attributes, edge_index):\n", 126 | " return self.model(node_attributes, edge_index)\n", 127 | "\n", 128 | " def training_step(self, batch, batch_idx):\n", 129 | " y_pred = self(batch.amino_acid_one_hot.float(), batch.edge_index)\n", 130 | " y_true = batch.interface_label.float().view(-1, 1)\n", 131 | " loss = self.loss_function(y_pred, y_true)\n", 132 | " self.log('train_loss', loss, on_step=True, on_epoch=True, sync_dist=True,\n", 133 | " batch_size=batch.batch_size)\n", 134 | " self.train_step_outputs.append((y_pred.detach().cpu(), y_true.detach().cpu())) # SAVE OUTPUTS\n", 135 | " return loss\n", 136 | " \n", 137 | " def validation_step(self, batch, batch_idx):\n", 138 | " y_pred = self(batch.amino_acid_one_hot.float(), batch.edge_index)\n", 139 | " y_true = batch.interface_label.float().view(-1, 1)\n", 140 | " loss = self.loss_function(y_pred, y_true)\n", 141 | " self.log('val_loss', loss, on_step=True, on_epoch=True, sync_dist=True,\n", 142 | " batch_size=batch.batch_size)\n", 143 | " self.validation_step_outputs.append((y_pred.detach().cpu(), y_true.detach().cpu())) # SAVE OUTPUTS\n", 144 | " return loss\n", 145 | "\n", 146 | " def configure_optimizers(self):\n", 147 | " return torch.optim.Adam(params=self.model.parameters(), lr=0.001, weight_decay=0.0001)" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": null, 153 | "metadata": { 154 | "editable": true, 155 | "slideshow": { 156 | "slide_type": "" 157 | }, 158 | "tags": [] 159 | }, 160 | "outputs": [], 161 | "source": [ 162 | "trainer = lightning.Trainer(log_every_n_steps=1, max_epochs=10, accelerator='cpu')" 163 | ] 164 | }, 165 | { 166 | "cell_type": "markdown", 167 | "metadata": { 168 | "editable": true, 169 | "slideshow": { 170 | "slide_type": "" 171 | }, 172 | "tags": [] 173 | }, 174 | "source": [ 175 | "We add the TensorBoard logger to our Trainer and train the model as before" 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": null, 181 | "metadata": { 182 | "editable": true, 183 | "slideshow": { 184 | "slide_type": "" 185 | }, 186 | "tags": [] 187 | }, 188 | "outputs": [], 189 | "source": [ 190 | "model = InterfaceModule()\n", 191 | "datamodule = dataloader.ProteinGraphDataModule(\"./test_data\", \"dataset.txt\")\n", 192 | "logger = TensorBoardLogger(\"lightning_logs\", name=\"gat\")\n", 193 | "trainer.fit(model=model, datamodule=datamodule)" 194 | ] 195 | }, 196 | { 197 | "cell_type": "markdown", 198 | "metadata": { 199 | "editable": true, 200 | "slideshow": { 201 | "slide_type": "" 202 | }, 203 | "tags": [] 204 | }, 205 | "source": [ 206 | "Every new run you train will be added to the logger so you can keep track of the improvements made over time." 207 | ] 208 | }, 209 | { 210 | "cell_type": "markdown", 211 | "metadata": {}, 212 | "source": [ 213 | "## Adding callbacks" 214 | ] 215 | }, 216 | { 217 | "cell_type": "markdown", 218 | "metadata": { 219 | "editable": true, 220 | "slideshow": { 221 | "slide_type": "" 222 | }, 223 | "tags": [] 224 | }, 225 | "source": [ 226 | "PyTorch Lightning has a number of [built-in callbacks](https://lightning.ai/docs/pytorch/stable/extensions/callbacks.html) which are used to perform actions at various points during training. For example, the `ModelCheckpoint` callback saves the model after each epoch, and the `EarlyStopping` callback stops training if the validation loss has not improved for a certain number of epochs." 227 | ] 228 | }, 229 | { 230 | "cell_type": "code", 231 | "execution_count": null, 232 | "metadata": { 233 | "editable": true, 234 | "slideshow": { 235 | "slide_type": "" 236 | }, 237 | "tags": [] 238 | }, 239 | "outputs": [], 240 | "source": [ 241 | "model = InterfaceModule()\n", 242 | "datamodule = dataloader.ProteinGraphDataModule(\"./test_data\", \"dataset.txt\")\n", 243 | "trainer = lightning.Trainer(\n", 244 | " max_epochs=50,\n", 245 | " logger=logger,\n", 246 | " log_every_n_steps=1,\n", 247 | " callbacks=[callbacks.EarlyStopping(monitor=\"val_loss\", patience=2), # stop training if validation loss does not improve for 2 epochs\n", 248 | " callbacks.ModelCheckpoint(monitor=\"val_loss\", save_top_k=1)], # save the best model based on validation loss\n", 249 | " accelerator=\"cpu\",\n", 250 | "\n", 251 | ")\n", 252 | "trainer.fit(model=model, datamodule=datamodule)" 253 | ] 254 | }, 255 | { 256 | "cell_type": "markdown", 257 | "metadata": { 258 | "editable": true, 259 | "slideshow": { 260 | "slide_type": "" 261 | }, 262 | "tags": [] 263 | }, 264 | "source": [ 265 | "We can also add custom callbacks to log and track things that we are interested in, such as the precision-recall curve or the ROC-AUC curve. \n", 266 | "\n", 267 | "Here's a function to calculate such curves and return the images and AUC values:" 268 | ] 269 | }, 270 | { 271 | "cell_type": "code", 272 | "execution_count": null, 273 | "metadata": { 274 | "editable": true, 275 | "slideshow": { 276 | "slide_type": "" 277 | }, 278 | "tags": [] 279 | }, 280 | "outputs": [], 281 | "source": [ 282 | "def get_metrics_and_curves(metric_type, y_pred, y_true, threshold=0.5):\n", 283 | " \"\"\"\n", 284 | " Calculate metrics and curves for a given metric type\n", 285 | " ROC: Receiver Operating Characteristic curve, metric = Area under the curve\n", 286 | " PR: Precision-Recall curve, metric = Area under the curve (Average precision)\n", 287 | " CM: Confusion Matrix, metric = F1 score\n", 288 | "\n", 289 | " Parameters\n", 290 | " ----------\n", 291 | " metric_type : str\n", 292 | " One of \"ROC\", \"PR\"\n", 293 | " y_pred : torch.Tensor\n", 294 | " Predicted labels\n", 295 | " y_true : torch.Tensor\n", 296 | " True labels\n", 297 | "\n", 298 | " Returns\n", 299 | " -------\n", 300 | " metric_value : float\n", 301 | " Value of the metric\n", 302 | " metric_disp : matplotlib.figure.Figure\n", 303 | " Figure of the curve/matrix\n", 304 | " \"\"\"\n", 305 | " y_true = y_true.cpu().detach().numpy()\n", 306 | " y_pred = y_pred.cpu().detach().numpy()\n", 307 | " if metric_type == \"ROC\":\n", 308 | " # Receiver Operating Characteristic Curve\n", 309 | " fpr, tpr, _ = metrics.roc_curve(y_true, y_pred, pos_label=1)\n", 310 | " roc_auc = metrics.auc(fpr, tpr)\n", 311 | " roc_disp = metrics.RocCurveDisplay(fpr=fpr, tpr=tpr, roc_auc=roc_auc).plot()\n", 312 | " return roc_auc, roc_disp.figure_\n", 313 | " elif metric_type == \"PR\":\n", 314 | " # Precision-Recall Curve\n", 315 | " precision, recall, _ = metrics.precision_recall_curve(y_true, y_pred, pos_label=1)\n", 316 | " pr_auc = metrics.auc(recall, precision)\n", 317 | " pr_disp = metrics.PrecisionRecallDisplay(precision=precision, recall=recall, average_precision=pr_auc).plot()\n", 318 | " return pr_auc, pr_disp.figure_" 319 | ] 320 | }, 321 | { 322 | "cell_type": "markdown", 323 | "metadata": { 324 | "editable": true, 325 | "slideshow": { 326 | "slide_type": "" 327 | }, 328 | "tags": [] 329 | }, 330 | "source": [ 331 | "To add these to the logger, we can use the in-built functions of the Callback class to get the images at the end of each train/validation epoch and send them to the logger. Don't forget to clear the `output` variables at the end of each epoch to initialize them for the next epoch." 332 | ] 333 | }, 334 | { 335 | "cell_type": "code", 336 | "execution_count": null, 337 | "metadata": { 338 | "editable": true, 339 | "slideshow": { 340 | "slide_type": "" 341 | }, 342 | "tags": [] 343 | }, 344 | "outputs": [], 345 | "source": [ 346 | "class LogMetrics(callbacks.Callback):\n", 347 | " \"\"\"\n", 348 | " Log metrics and curves for validation and training\n", 349 | "\n", 350 | " Scalars: ROC/val_AUC, ROC/train_AUC, PR/val_AUC, PR/train_AUC\n", 351 | " Images: ROC/val, ROC/train, PR/val, PR/train\n", 352 | " \"\"\"\n", 353 | " def on_train_epoch_end(self, trainer, pl_module):\n", 354 | " outputs = torch.cat([x[0] for x in pl_module.train_step_outputs], dim=0)\n", 355 | " labels = torch.cat([x[1] for x in pl_module.train_step_outputs], dim=0)\n", 356 | " for metric in [\"ROC\", \"PR\"]:\n", 357 | " metric_auc, metric_disp = get_metrics_and_curves(metric, outputs, labels)\n", 358 | " pl_module.log(f\"{metric}/train_AUC\", metric_auc)\n", 359 | " trainer.logger.experiment.add_figure(f\"{metric}/train\", metric_disp, global_step=trainer.global_step)\n", 360 | " pl_module.train_step_outputs.clear()\n", 361 | "\n", 362 | " def on_validation_epoch_end(self, trainer, pl_module):\n", 363 | " outputs = torch.cat([x[0] for x in pl_module.validation_step_outputs], dim=0)\n", 364 | " labels = torch.cat([x[1] for x in pl_module.validation_step_outputs], dim=0)\n", 365 | " for metric in [\"ROC\", \"PR\"]:\n", 366 | " metric_auc, metric_disp = get_metrics_and_curves(metric, outputs, labels)\n", 367 | " pl_module.log(f\"{metric}/val_AUC\", metric_auc)\n", 368 | " trainer.logger.experiment.add_figure(f\"{metric}/val\", metric_disp, global_step=trainer.global_step)\n", 369 | " pl_module.validation_step_outputs.clear()" 370 | ] 371 | }, 372 | { 373 | "cell_type": "code", 374 | "execution_count": null, 375 | "metadata": { 376 | "editable": true, 377 | "slideshow": { 378 | "slide_type": "" 379 | }, 380 | "tags": [] 381 | }, 382 | "outputs": [], 383 | "source": [ 384 | "logger = TensorBoardLogger(\"lightning_logs\", name=\"gat\")\n", 385 | "model = InterfaceModule()\n", 386 | "datamodule = dataloader.ProteinGraphDataModule(\"./test_data\", \"dataset.txt\")\n", 387 | "trainer = lightning.Trainer(\n", 388 | " max_epochs=20,\n", 389 | " logger=logger,\n", 390 | " log_every_n_steps=1,\n", 391 | " callbacks=[callbacks.EarlyStopping(monitor=\"val_loss\", patience=2),\n", 392 | " callbacks.ModelCheckpoint(monitor=\"val_loss\"),\n", 393 | " LogMetrics()],\n", 394 | " accelerator=\"cpu\",\n", 395 | ")\n", 396 | "trainer.fit(model=model, datamodule=datamodule)" 397 | ] 398 | }, 399 | { 400 | "cell_type": "markdown", 401 | "metadata": { 402 | "editable": true, 403 | "slideshow": { 404 | "slide_type": "" 405 | }, 406 | "tags": [] 407 | }, 408 | "source": [ 409 | "## Moving things to scripts and config files" 410 | ] 411 | }, 412 | { 413 | "cell_type": "markdown", 414 | "metadata": { 415 | "editable": true, 416 | "slideshow": { 417 | "slide_type": "" 418 | }, 419 | "tags": [] 420 | }, 421 | "source": [ 422 | "```sh\n", 423 | "src/\n", 424 | " __init__.py\n", 425 | " dataloader.py\n", 426 | " load_graph\n", 427 | " ProteinDataset\n", 428 | " ProteinGraphDataModule\n", 429 | " models.py\n", 430 | " InterfaceModel\n", 431 | " InterfaceModule\n", 432 | " callbacks.py\n", 433 | " get_metrics_and_curves\n", 434 | " LogMetrics\n", 435 | "train.py\n", 436 | "config.yaml\n", 437 | "```" 438 | ] 439 | }, 440 | { 441 | "cell_type": "markdown", 442 | "metadata": { 443 | "editable": true, 444 | "slideshow": { 445 | "slide_type": "" 446 | }, 447 | "tags": [] 448 | }, 449 | "source": [ 450 | "In `config.yaml`:\n", 451 | "\n", 452 | "```yaml\n", 453 | "seed_everything: true\n", 454 | "model:\n", 455 | " class_path: src.models.InterfaceModule\n", 456 | " init_args:\n", 457 | " in_channels: 20\n", 458 | " num_layers: 2\n", 459 | " hidden_channels: 32\n", 460 | " heads: 2\n", 461 | " out_channels: 1\n", 462 | " dropout: 0.01\n", 463 | " jk: last\n", 464 | " v2: true\n", 465 | "data:\n", 466 | " class_path: src.dataloader.ProteinGraphDataModule\n", 467 | " init_args:\n", 468 | " root: ./\n", 469 | " columns:\n", 470 | " - chain_id\n", 471 | " - coords\n", 472 | " - edge_index\n", 473 | " - kind\n", 474 | " - node_id\n", 475 | " - residue_number\n", 476 | " - meiler\n", 477 | " - amino_acid_one_hot\n", 478 | " - interface_label\n", 479 | " batch_size: 32\n", 480 | " num_workers: 4\n", 481 | "optimizer:\n", 482 | " class_path: torch.optim.Adam\n", 483 | " init_args:\n", 484 | " lr: 0.001\n", 485 | " weight_decay: 0.0001\n", 486 | "trainer:\n", 487 | " logger:\n", 488 | " - class_path: lightning.pytorch.loggers.TensorBoardLogger\n", 489 | " init_args:\n", 490 | " save_dir: lightning_logs\n", 491 | " name: interface\n", 492 | " log_graph: true\n", 493 | " enable_checkpointing: true\n", 494 | " callbacks:\n", 495 | " - class_path: lightning.pytorch.callbacks.EarlyStopping\n", 496 | " init_args:\n", 497 | " patience: 5\n", 498 | " monitor: val_loss\n", 499 | " mode: min\n", 500 | " - class_path: lightning.pytorch.callbacks.ModelCheckpoint\n", 501 | " init_args:\n", 502 | " save_top_k: 3\n", 503 | " monitor: val_loss\n", 504 | " mode: min\n", 505 | " filename: \"{epoch:02d}-{val_loss:.2f}\"\n", 506 | " - class_path: src.callbacks.LogMetrics\n", 507 | " enable_progress_bar: true\n", 508 | " max_epochs: -1\n", 509 | " log_every_n_steps: 1\n", 510 | " accelerator: cpu\n", 511 | " strategy: auto\n", 512 | " precision: 32\n", 513 | "```" 514 | ] 515 | }, 516 | { 517 | "cell_type": "markdown", 518 | "metadata": { 519 | "editable": true, 520 | "slideshow": { 521 | "slide_type": "" 522 | }, 523 | "tags": [] 524 | }, 525 | "source": [ 526 | "And in `train.py`:\n", 527 | "\n", 528 | "```python\n", 529 | "from lightning.pytorch.cli import LightningCLI\n", 530 | "import torch\n", 531 | "import warnings\n", 532 | "warnings.filterwarnings('ignore')\n", 533 | "\n", 534 | "def main():\n", 535 | " \"\"\"\n", 536 | " Run with python main.py fit -c config.yaml\n", 537 | " Or in an sbatch script with srun python main.py fit -c config.yaml\n", 538 | " \"\"\"\n", 539 | " torch.set_float32_matmul_precision('medium')\n", 540 | " cli = LightningCLI(save_config_kwargs={\"overwrite\": True})\n", 541 | "\n", 542 | "if __name__ == '__main__':\n", 543 | " main()\n", 544 | "```" 545 | ] 546 | }, 547 | { 548 | "cell_type": "code", 549 | "execution_count": null, 550 | "metadata": { 551 | "editable": true, 552 | "slideshow": { 553 | "slide_type": "" 554 | }, 555 | "tags": [] 556 | }, 557 | "outputs": [], 558 | "source": [] 559 | } 560 | ], 561 | "metadata": { 562 | "kernelspec": { 563 | "display_name": "geometric-learning", 564 | "language": "python", 565 | "name": "python3" 566 | }, 567 | "language_info": { 568 | "codemirror_mode": { 569 | "name": "ipython", 570 | "version": 3 571 | }, 572 | "file_extension": ".py", 573 | "mimetype": "text/x-python", 574 | "name": "python", 575 | "nbconvert_exporter": "python", 576 | "pygments_lexer": "ipython3", 577 | "version": "3.8.18" 578 | } 579 | }, 580 | "nbformat": 4, 581 | "nbformat_minor": 4 582 | } 583 | -------------------------------------------------------------------------------- /2_Graph_Datasets.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "# !pip install --use-pep517 \"graphein[extras]\" lightning torch torch-geometric tensorboard nbformat \"jsonargparse[signatures]\" ipywidgets tabulate" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": { 16 | "editable": true, 17 | "slideshow": { 18 | "slide_type": "" 19 | }, 20 | "tags": [] 21 | }, 22 | "outputs": [], 23 | "source": [ 24 | "import graphein\n", 25 | "graphein.verbose(enabled=False)\n", 26 | "import warnings\n", 27 | "warnings.filterwarnings(\"ignore\")\n", 28 | "from graphein.ml import GraphFormatConvertor\n", 29 | "import torch\n", 30 | "import lightning\n", 31 | "from torch.utils.data import random_split\n", 32 | "from torch_geometric.loader import DataLoader\n", 33 | "from torch_geometric.data import Dataset\n", 34 | "from pathlib import Path\n", 35 | "import pickle\n", 36 | "import matplotlib.pyplot as plt\n", 37 | "import seaborn as sns\n", 38 | "from torch_geometric.utils import to_networkx\n", 39 | "from graphein.protein.visualisation import plotly_protein_structure_graph" 40 | ] 41 | }, 42 | { 43 | "cell_type": "markdown", 44 | "metadata": {}, 45 | "source": [ 46 | "Don't forget to use your `load_graph` function from the previous notebook:" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": null, 52 | "metadata": {}, 53 | "outputs": [], 54 | "source": [ 55 | "from graphein.protein.config import ProteinGraphConfig\n", 56 | "from graphein.protein.graphs import construct_graph\n", 57 | "from graphein.protein.features.nodes import amino_acid as graphein_nodes\n", 58 | "from graphein.protein import edges as graphein_edges\n", 59 | "from graphein.protein.subgraphs import extract_subgraph\n", 60 | "from functools import partial\n", 61 | "\n", 62 | "def load_graph(pdb_id, chain):\n", 63 | " graph_config = ProteinGraphConfig(\n", 64 | " node_metadata_functions = [graphein_nodes.amino_acid_one_hot, graphein_nodes.meiler_embedding],\n", 65 | " edge_construction_functions = [graphein_edges.add_peptide_bonds, \n", 66 | " partial(graphein_edges.add_distance_threshold, \n", 67 | " threshold=8.,\n", 68 | " long_interaction_threshold=2)],\n", 69 | " )\n", 70 | " graph = construct_graph(pdb_code=pdb_id, config=graph_config, verbose=False)\n", 71 | " interface_residues = set()\n", 72 | " for source, target, kind in graph.edges(data=True):\n", 73 | " c1, c2 = source.split(\":\")[0], target.split(\":\")[0]\n", 74 | " if 'distance_threshold' in kind['kind']:\n", 75 | " if c1 == chain and c2 != chain:\n", 76 | " interface_residues.add(source)\n", 77 | " elif c2 == chain and c1 != chain:\n", 78 | " interface_residues.add(target)\n", 79 | " graph = extract_subgraph(graph, chains=chain)\n", 80 | " for node, data in graph.nodes(data=True):\n", 81 | " if node in interface_residues:\n", 82 | " data['interface_label'] = 1\n", 83 | " else:\n", 84 | " data['interface_label'] = 0\n", 85 | " return graph" 86 | ] 87 | }, 88 | { 89 | "cell_type": "markdown", 90 | "metadata": {}, 91 | "source": [ 92 | "## Converting graphs to deep learning datasets\n", 93 | "\n", 94 | "Deep learning libraries like PyTorch, and by extension PyTorch-Geometric, have some standardized ways of handling data and datasets, in order to optimize the operations they perform on the various numeric features involved. In this notebook, we will see how to convert a graph into a torch `Data` object, which is the standard way of representing a graph in PyTorch-Geometric. Then we'll go from a single graph to a `Dataset` of graphs, which is the standard way of representing a dataset in PyTorch. And finally, we'll see how to wrap this `Dataset` into a Lightning `DataModule`, which is the standard way of handling data-related operations in PyTorch-Lightning." 95 | ] 96 | }, 97 | { 98 | "cell_type": "markdown", 99 | "metadata": {}, 100 | "source": [ 101 | "### Data \n", 102 | "\n", 103 | "We first need to make a torch `Data` object from our graphs. This is easily done with graphein's conversion functions, specifically the `GraphFormatConvertor`, where you can specify which features of the NetworkX graph you'd like to retain in the `Data` object." 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": null, 109 | "metadata": {}, 110 | "outputs": [], 111 | "source": [ 112 | "columns = [\n", 113 | " \"chain_id\",\n", 114 | " \"coords\",\n", 115 | " \"edge_index\",\n", 116 | " \"node_id\",\n", 117 | " \"residue_number\",\n", 118 | " \"amino_acid_one_hot\",\n", 119 | " \"meiler\",\n", 120 | " \"interface_label\"\n", 121 | "]\n", 122 | "convertor = GraphFormatConvertor(src_format=\"nx\", # From NetworkX \n", 123 | " dst_format=\"pyg\", # To PyTorch Geometric\n", 124 | " columns=columns, # The columns to be used\n", 125 | " verbose=None)\n", 126 | "graphein_graph = load_graph(\"1A0G\", \"A\")\n", 127 | "torch_data = convertor(graphein_graph)" 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": null, 133 | "metadata": {}, 134 | "outputs": [], 135 | "source": [ 136 | "torch_data" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": null, 142 | "metadata": {}, 143 | "outputs": [], 144 | "source": [ 145 | "list(zip(torch_data.node_id[:5], torch_data.interface_label[:5]))" 146 | ] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "execution_count": null, 151 | "metadata": {}, 152 | "outputs": [], 153 | "source": [ 154 | "torch_data.edge_index.T[:5]" 155 | ] 156 | }, 157 | { 158 | "cell_type": "markdown", 159 | "metadata": {}, 160 | "source": [ 161 | "For example, here are the amino acid types across interface and non-interface residues:" 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": null, 167 | "metadata": {}, 168 | "outputs": [], 169 | "source": [ 170 | "extracted_amino_acids = [s.split(\":\")[1] for s in torch_data.node_id]\n", 171 | "\n", 172 | "data_with_aa = {\"amino acid\": extracted_amino_acids, \"interface labels\": torch_data.interface_label}\n", 173 | "\n", 174 | "plt.figure(figsize=(14, 8))\n", 175 | "sns.countplot(x=\"amino acid\", hue=\"interface labels\", data=data_with_aa, palette=\"Set2\")\n", 176 | "plt.title(\"Distribution of amino acids types across interface and non-interface residues\")\n", 177 | "plt.ylabel(\"Count\")\n", 178 | "plt.xlabel(\"Amino acid\")\n", 179 | "plt.legend(title=\"Interface label\", loc='upper right')\n", 180 | "plt.tight_layout()\n", 181 | "plt.show()" 182 | ] 183 | }, 184 | { 185 | "cell_type": "markdown", 186 | "metadata": {}, 187 | "source": [ 188 | "We can use the `to_networkx` function to convert the `Data` object back to a NetworkX graph" 189 | ] 190 | }, 191 | { 192 | "cell_type": "code", 193 | "execution_count": null, 194 | "metadata": { 195 | "editable": true, 196 | "slideshow": { 197 | "slide_type": "" 198 | }, 199 | "tags": [] 200 | }, 201 | "outputs": [], 202 | "source": [ 203 | "graphein_graph_again = to_networkx(torch_data, \n", 204 | " node_attrs=[\"chain_id\",\n", 205 | " \"coords\",\n", 206 | " \"node_id\",\n", 207 | " \"residue_number\",\n", 208 | " \"amino_acid_one_hot\",\n", 209 | " \"meiler\",\n", 210 | " \"interface_label\"],\n", 211 | " to_undirected=True)" 212 | ] 213 | }, 214 | { 215 | "cell_type": "code", 216 | "execution_count": null, 217 | "metadata": { 218 | "editable": true, 219 | "slideshow": { 220 | "slide_type": "" 221 | }, 222 | "tags": [] 223 | }, 224 | "outputs": [], 225 | "source": [ 226 | "p = plotly_protein_structure_graph(\n", 227 | " graphein_graph_again,\n", 228 | " colour_edges_by=None,\n", 229 | " colour_nodes_by='interface_label',\n", 230 | " label_node_ids=False,\n", 231 | " plot_title=\"Peptide backbone graph with distance connections. Nodes coloured by interface labels.\",\n", 232 | " node_size_multiplier=1\n", 233 | " )\n", 234 | "p.show()" 235 | ] 236 | }, 237 | { 238 | "cell_type": "markdown", 239 | "metadata": {}, 240 | "source": [ 241 | "### Dataset" 242 | ] 243 | }, 244 | { 245 | "cell_type": "markdown", 246 | "metadata": {}, 247 | "source": [ 248 | "The `torch_geometric.data.Dataset` class is a standard way of representing a graph dataset in PyTorch. It is an abstract class that you can subclass to create your own dataset. The functions that need to be included are:\n", 249 | "\n", 250 | "- `download()`: this downloads the dataset (in our case from `dataset.txt`) and saves each data point (in our case as a pickle file containing the graphein graph that our `load_graph` function returns) in `self.raw_dir`.\n", 251 | "- `process()`: this processes the data from `self.raw_dir` to torch-geometric `Data` objects (as we did above), and saves them as `.pt` files in `self.processed_dir`.\n", 252 | "- property functions: `raw_file_names`, `processed_file_names` return the names of the raw pickle and processed pt files for each data point.\n", 253 | "- `len()`: this returns the number of graphs in the dataset\n", 254 | "- `get()`: this returns the `Data` object for a given index\n", 255 | "\n", 256 | "See the [documentation](https://pytorch-geometric.readthedocs.io/en/latest/generated/torch_geometric.data.Dataset.html#torch_geometric.data.Dataset) for more." 257 | ] 258 | }, 259 | { 260 | "cell_type": "code", 261 | "execution_count": null, 262 | "metadata": {}, 263 | "outputs": [], 264 | "source": [ 265 | "class ProteinDataset(Dataset):\n", 266 | " \"\"\"\n", 267 | " torch-geometric Dataset class for loading protein files as graphs.\n", 268 | " \"\"\"\n", 269 | " def __init__(self, root,\n", 270 | " protein_names: list):\n", 271 | " columns = [\n", 272 | " \"chain_id\",\n", 273 | " \"coords\",\n", 274 | " \"edge_index\",\n", 275 | " \"kind\",\n", 276 | " \"node_id\",\n", 277 | " \"residue_number\",\n", 278 | " \"amino_acid_one_hot\",\n", 279 | " \"meiler\",\n", 280 | " \"interface_label\",\n", 281 | " ]\n", 282 | " self.convertor = GraphFormatConvertor(src_format=\"nx\", dst_format=\"pyg\", columns=columns, verbose=None)\n", 283 | " self.protein_names = protein_names\n", 284 | " super(ProteinDataset, self).__init__(root)\n", 285 | "\n", 286 | " def download(self):\n", 287 | " for protein_name in self.protein_names:\n", 288 | " output = Path(self.raw_dir) / f'{protein_name}.pkl'\n", 289 | " if not output.exists():\n", 290 | " pdb_id, chain = protein_name.split(\"_\")\n", 291 | " graphein_graph = load_graph(pdb_id, chain)\n", 292 | " with open(output, \"wb\") as f:\n", 293 | " pickle.dump(graphein_graph, f)\n", 294 | "\n", 295 | " @property\n", 296 | " def raw_file_names(self):\n", 297 | " return [Path(self.raw_dir) / f\"{protein_name}.pkl\" for protein_name in self.protein_names if (Path(self.raw_dir) / f\"{protein_name}.pt\").exists()]\n", 298 | "\n", 299 | " @property\n", 300 | " def processed_file_names(self):\n", 301 | " return [Path(self.processed_dir) / f\"{protein_name}.pt\" for protein_name in self.protein_names if (Path(self.processed_dir) / f\"{protein_name}.pt\").exists()]\n", 302 | "\n", 303 | " def process(self):\n", 304 | " for protein_name in self.protein_names:\n", 305 | " output = Path(self.processed_dir) / f'{protein_name}.pt'\n", 306 | " if not output.exists():\n", 307 | " with open(Path(self.raw_dir) / f\"{protein_name}.pkl\", \"rb\") as f:\n", 308 | " graphein_graph = pickle.load(f)\n", 309 | " torch_data = self.convertor(graphein_graph)\n", 310 | " torch.save(torch_data, output)\n", 311 | "\n", 312 | " def len(self):\n", 313 | " return len(self.processed_file_names)\n", 314 | "\n", 315 | " def get(self, idx):\n", 316 | " data = torch.load(self.processed_file_names[idx])\n", 317 | " return data\n" 318 | ] 319 | }, 320 | { 321 | "cell_type": "markdown", 322 | "metadata": {}, 323 | "source": [ 324 | "Let's make our dataset! We run it for the first 20 proteins to save time" 325 | ] 326 | }, 327 | { 328 | "cell_type": "code", 329 | "execution_count": null, 330 | "metadata": {}, 331 | "outputs": [], 332 | "source": [ 333 | "with open('dataset.txt') as f:\n", 334 | " protein_names = [line.strip() for line in f]\n", 335 | "\n", 336 | "dataset = ProteinDataset(root='./test_data', protein_names=protein_names[:20])\n" 337 | ] 338 | }, 339 | { 340 | "cell_type": "code", 341 | "execution_count": null, 342 | "metadata": {}, 343 | "outputs": [], 344 | "source": [ 345 | "dataset" 346 | ] 347 | }, 348 | { 349 | "cell_type": "code", 350 | "execution_count": null, 351 | "metadata": {}, 352 | "outputs": [], 353 | "source": [ 354 | "dataset[0]" 355 | ] 356 | }, 357 | { 358 | "cell_type": "code", 359 | "execution_count": null, 360 | "metadata": {}, 361 | "outputs": [], 362 | "source": [ 363 | "all_amino_acids = []\n", 364 | "all_interface_labels = []\n", 365 | "\n", 366 | "for torch_graph in dataset:\n", 367 | " extracted_amino_acids = [s.split(\":\")[1] for s in torch_graph.node_id]\n", 368 | " all_amino_acids.extend(extracted_amino_acids)\n", 369 | " all_interface_labels.extend(torch_graph.interface_label.tolist())\n", 370 | "\n", 371 | "data_with_aa = {\n", 372 | " \"amino acid\": all_amino_acids,\n", 373 | " \"interface labels\": all_interface_labels\n", 374 | "}\n", 375 | "\n", 376 | "plt.figure(figsize=(14, 8))\n", 377 | "sns.countplot(x=\"amino acid\", hue=\"interface labels\", data=data_with_aa, palette=\"Set2\")\n", 378 | "plt.title(\"Distribution of amino acids types across interface and non-interface residues\")\n", 379 | "plt.ylabel(\"Count\")\n", 380 | "plt.xlabel(\"Amino acid\")\n", 381 | "plt.legend(title=\"Interface label\", loc='upper right')\n", 382 | "plt.tight_layout()\n", 383 | "plt.show()" 384 | ] 385 | }, 386 | { 387 | "cell_type": "markdown", 388 | "metadata": {}, 389 | "source": [ 390 | "Graphein also has a built-in `ProteinGraphDataset` class that combines these steps. It also has some nice features like \n", 391 | "- the ability to load a dataset of proteins from both the PDB or AlphaFold Database directory of PDB files\n", 392 | "- the ability to apply custom transformations from your bioinformatics tools of choice to the PDB files (with the `pdb_transform` argument)." 393 | ] 394 | }, 395 | { 396 | "cell_type": "markdown", 397 | "metadata": {}, 398 | "source": [ 399 | "### DataModule\n", 400 | "\n", 401 | "Now that we have our `Dataset` ready, we need to specificy how the `Data` objects within the created Dataset are split into training, validation and test sets. This is where PyTorch Lightning's DataModule comes in ([documentation](https://lightning.ai/docs/pytorch/stable/data/datamodule.html)). The `DataModule` is a class that encapsulates the logic for loading, batching and splitting the data. It's a way of separating the logic for data loading and batching separate from both the data ingestion and the model and training logic, which makes the code more modular and easier to maintain. It also makes it easier to switch between different datasets and data loading strategies.\n", 402 | "\n", 403 | "To define a `DataModule` the following methods are necessary:\n", 404 | "- `prepare_data()` - this defines the downloading and IO operations that are generally slower and need to only be run once. In our case it just runs the Dataset function once to download and process all the pickle and pt files. This is called once in the beginning of training so all the future calls of Dataset in setup (which is called on every node/process) just loads the data from the saved files.\n", 405 | "- `setup()` - this defines how to split the dataset. It also takes a `stage` argument (one of `fit,validate,test,predict`).\n", 406 | "- `train_dataloader()` - this returns the `DataLoader` for the training data\n", 407 | "\n", 408 | "and the following are optional:\n", 409 | "- `val_dataloader()` - this returns the `DataLoader` for the validation data\n", 410 | "- `test_dataloader()` - this returns the `DataLoader` for the test data\n", 411 | "- `predict_dataloader()` - this returns the `DataLoader` for the inference data" 412 | ] 413 | }, 414 | { 415 | "cell_type": "code", 416 | "execution_count": null, 417 | "metadata": { 418 | "editable": true, 419 | "slideshow": { 420 | "slide_type": "" 421 | }, 422 | "tags": [] 423 | }, 424 | "outputs": [], 425 | "source": [ 426 | "class ProteinGraphDataModule(lightning.LightningDataModule):\n", 427 | " def __init__(self, root, dataset_file, batch_size=8):\n", 428 | " super().__init__()\n", 429 | " self.root = root\n", 430 | " self.dataset_file = dataset_file\n", 431 | " with open(dataset_file) as f:\n", 432 | " self.protein_names = [line.strip() for line in f]\n", 433 | " self.protein_names = self.protein_names[:20] # SMALL DATASET FOR TESTING\n", 434 | " self.batch_size = batch_size\n", 435 | "\n", 436 | " def prepare_data(self):\n", 437 | " ProteinDataset(root=self.root, protein_names=self.protein_names)\n", 438 | " \n", 439 | " def setup(self, stage):\n", 440 | " dataset = ProteinDataset(root=self.root, protein_names=self.protein_names)\n", 441 | " # Here we just do a random split of 80/10/10 for train/val/test\n", 442 | " train_idx, val_idx, test_idx = random_split(range(len(dataset)), [0.8, 0.1, 0.1])\n", 443 | " self.train, self.val, self.test = dataset[list(train_idx)], dataset[list(val_idx)], dataset[list(test_idx)]\n", 444 | "\n", 445 | " def train_dataloader(self):\n", 446 | " return DataLoader(self.train, batch_size=self.batch_size)\n", 447 | "\n", 448 | " def val_dataloader(self):\n", 449 | " return DataLoader(self.val, batch_size=self.batch_size)\n", 450 | "\n", 451 | " def test_dataloader(self):\n", 452 | " return DataLoader(self.test, batch_size=self.batch_size)" 453 | ] 454 | }, 455 | { 456 | "cell_type": "markdown", 457 | "metadata": {}, 458 | "source": [ 459 | "The `DataModule` is now ready, give it a try and loop through the dataloader to see how they work!" 460 | ] 461 | }, 462 | { 463 | "cell_type": "code", 464 | "execution_count": null, 465 | "metadata": { 466 | "editable": true, 467 | "slideshow": { 468 | "slide_type": "" 469 | }, 470 | "tags": [] 471 | }, 472 | "outputs": [], 473 | "source": [ 474 | "datamodule = ProteinGraphDataModule(\"./test_data\", \"dataset.txt\")\n", 475 | "datamodule.prepare_data()\n", 476 | "datamodule.setup(\"fit\")\n", 477 | "\n", 478 | "train_loader = datamodule.train_dataloader()\n", 479 | "example_train_protein = datamodule.train[0]\n", 480 | "example_train_batch = next(iter(train_loader))" 481 | ] 482 | }, 483 | { 484 | "cell_type": "markdown", 485 | "metadata": {}, 486 | "source": [ 487 | "We have an example train data point (`example_train_protein`) but training is almost always done on batches of data points controlled by the `batch_size`. This batch size defines the number of input graphs looked at in each iteration of the training process (one forward and backward pass). It has a trade-off between the speed of the training and the generalizability of the model.\n", 488 | "\n", 489 | "In the graph neural network setting, a batch essentially combines all the graphs of the individual proteins into a bigger graph, with an additional batch attribute that specifies which protein each node belongs to. Since there are no edges between the different proteins, training on this batch graph is equivalent to training on the individual graphs separately, since no information flows between the different proteins. This is what is returned by the `train_dataloader` of the `DataModule`, in `example_train_batch`. \n", 490 | "\n", 491 | "Let's check what each of variables contain." 492 | ] 493 | }, 494 | { 495 | "cell_type": "code", 496 | "execution_count": null, 497 | "metadata": {}, 498 | "outputs": [], 499 | "source": [ 500 | "example_train_protein" 501 | ] 502 | }, 503 | { 504 | "cell_type": "code", 505 | "execution_count": null, 506 | "metadata": {}, 507 | "outputs": [], 508 | "source": [ 509 | "example_train_batch" 510 | ] 511 | }, 512 | { 513 | "cell_type": "markdown", 514 | "metadata": { 515 | "editable": true, 516 | "slideshow": { 517 | "slide_type": "" 518 | }, 519 | "tags": [] 520 | }, 521 | "source": [ 522 | "Move the `load_graph`, `ProteinDataset` and `ProteinGraphDataModule` functions and classes to `src/dataloader.py` so that we can use them in the next notebooks." 523 | ] 524 | } 525 | ], 526 | "metadata": { 527 | "kernelspec": { 528 | "display_name": "Python 3 (ipykernel)", 529 | "language": "python", 530 | "name": "python3" 531 | }, 532 | "language_info": { 533 | "codemirror_mode": { 534 | "name": "ipython", 535 | "version": 3 536 | }, 537 | "file_extension": ".py", 538 | "mimetype": "text/x-python", 539 | "name": "python", 540 | "nbconvert_exporter": "python", 541 | "pygments_lexer": "ipython3", 542 | "version": "3.8.18" 543 | } 544 | }, 545 | "nbformat": 4, 546 | "nbformat_minor": 4 547 | } 548 | -------------------------------------------------------------------------------- /3_Geometric_Deep_Learning_Models.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "editable": true, 7 | "slideshow": { 8 | "slide_type": "slide" 9 | }, 10 | "tags": [] 11 | }, 12 | "source": [ 13 | "## Deep learning\n", 14 | "\n", 15 | "![](https://www.frontiersin.org/files/Articles/420104/fgene-10-00214-HTML/image_m/fgene-10-00214-g001.jpg)\n", 16 | "\n", 17 | "The network structure of a deep learning model. Here we select a network structure with two hidden layers as an illustration, where X nodes constitute the input layer, Hs for the hidden layers, Y for the output layer, and f (·) denotes an activation function.\n", 18 | "\n", 19 | "[Interactive example](https://cs.stanford.edu/people/karpathy/convnetjs/demo/classify2d.html)" 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "metadata": { 25 | "editable": true, 26 | "slideshow": { 27 | "slide_type": "slide" 28 | }, 29 | "tags": [] 30 | }, 31 | "source": [ 32 | "![](https://www.frontiersin.org/files/Articles/420104/fgene-10-00214-HTML/image_m/fgene-10-00214-g002.jpg)\n", 33 | "\n", 34 | "The general analysis procedure commonly adopted in deep learning, which covers training data preparation, model construction, hyperparameter fine-tuning (in training loop), prediction and performance evaluation. Basically, it still follows the requisite schema in machine learning." 35 | ] 36 | }, 37 | { 38 | "cell_type": "markdown", 39 | "metadata": { 40 | "editable": true, 41 | "slideshow": { 42 | "slide_type": "slide" 43 | }, 44 | "tags": [] 45 | }, 46 | "source": [ 47 | "## Geometric Deep Learning Models\n", 48 | "\n", 49 | "Typically, graph neural networks work by passing messages between nodes in the graph whereever there is an edge, and then aggregating these messages at each node.\n", 50 | "\n", 51 | "![](https://www.aritrasen.com/wp-content/uploads/2022/11/msg_1.jpg)\n", 52 | "\n", 53 | "This is done iteratively for a number of layers, and then the final node representations are used to make predictions. This way, the model can learn to take into account the structure of the graph when making predictions and information flows between connected nodes." 54 | ] 55 | }, 56 | { 57 | "cell_type": "markdown", 58 | "metadata": { 59 | "editable": true, 60 | "slideshow": { 61 | "slide_type": "slide" 62 | }, 63 | "tags": [] 64 | }, 65 | "source": [ 66 | "\n", 67 | "\n", 68 | "![](https://ars.els-cdn.com/content/image/1-s2.0-S2666651021000012-gr2_lrg.jpg)" 69 | ] 70 | }, 71 | { 72 | "cell_type": "markdown", 73 | "metadata": { 74 | "editable": true, 75 | "slideshow": { 76 | "slide_type": "slide" 77 | }, 78 | "tags": [] 79 | }, 80 | "source": [ 81 | "Sometimes you may even change the graph structure itself, by pooling nodes or edges, or by adding new edges or nodes. This can be useful for tasks like graph classification, where you want to make a prediction about the entire graph, or for tasks like graph generation, where you want to create new graphs that are similar to the ones you've seen before.\n", 82 | "\n", 83 | "![](https://www.researchgate.net/profile/Lavender-Jiang-2/publication/343441194/figure/fig4/AS:921001206509568@1596595207558/Graph-pooling-and-graph-aggregation-Graph-pooling-left-accepts-a-graph-signal-and.ppm)" 84 | ] 85 | }, 86 | { 87 | "cell_type": "markdown", 88 | "metadata": { 89 | "editable": true, 90 | "slideshow": { 91 | "slide_type": "slide" 92 | }, 93 | "tags": [] 94 | }, 95 | "source": [ 96 | "So, depending on the way messages are passed, aggregated, pooled, and transformed, there are a number of different architectures that can be used to build graph neural networks. Here's a comprehensive [review](https://www.sciencedirect.com/science/article/pii/S2666651021000012) and a [book](https://arxiv.org/pdf/2104.13478.pdf) to learn more about the different architectures, how they work, and how they can be used for different tasks.\n", 97 | "\n", 98 | "![](https://ars.els-cdn.com/content/image/1-s2.0-S2666651021000012-gr3_lrg.jpg)" 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": null, 104 | "metadata": { 105 | "editable": true, 106 | "slideshow": { 107 | "slide_type": "" 108 | }, 109 | "tags": [] 110 | }, 111 | "outputs": [], 112 | "source": [ 113 | "from torch_geometric import nn as graph_nn\n", 114 | "from torch import nn\n", 115 | "from src import dataloader\n", 116 | "import torch\n", 117 | "import lightning" 118 | ] 119 | }, 120 | { 121 | "cell_type": "markdown", 122 | "metadata": { 123 | "editable": true, 124 | "slideshow": { 125 | "slide_type": "" 126 | }, 127 | "tags": [] 128 | }, 129 | "source": [ 130 | "## PyTorch-Geometric models and layers\n", 131 | "\n", 132 | "In this notebook, we'll take a look at some of the different architectures and building block layers that are implemented in the `torch-geometric` library, and how they can be used to build and train models for graph-based tasks.\n", 133 | "\n", 134 | "- [torch_geometric.nn](https://pytorch-geometric.readthedocs.io/en/latest/modules/nn.html) has a variety of graph layers that can be used to build custom GNN architectures. These include:\n", 135 | " - [Convolutional layers](https://pytorch-geometric.readthedocs.io/en/latest/modules/nn.html#convolutional-layers): These define how the message passing step is accomplished across edges in the graph. `GCNConv` is a simple example of a graph convolution layer, while `GATConv` is a more complex example with attention mechanisms.\n", 136 | " - [Aggregation Operators](https://pytorch-geometric.readthedocs.io/en/latest/modules/nn.html#aggregation-operators): These define how messages are aggregated at each node. \n", 137 | " - [Pooling layers](https://pytorch-geometric.readthedocs.io/en/latest/modules/nn.html#id45): These define how nodes are aggregated into a single node.\n", 138 | "\n", 139 | "- [torch_geometric.nn.models](https://pytorch-geometric.readthedocs.io/en/latest/modules/nn.html#models) has more complex model architectures with a variety of of these layers already defined and combined inside.\n", 140 | "- The [PyGModelHubMixin](https://pytorch-geometric.readthedocs.io/en/latest/modules/nn.html#torch_geometric.nn.model_hub.PyGModelHubMixin) class can be used to load pre-trained models or other model architectures from the [HuggingFace Model Hub](https://huggingface.co/models?pipeline_tag=graph-ml&sort=trending)" 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": null, 146 | "metadata": {}, 147 | "outputs": [], 148 | "source": [ 149 | "# !pip install --use-pep517 \"graphein[extras]\" lightning torch torch-geometric tensorboard nbformat \"jsonargparse[signatures]\" ipywidgets tabulate" 150 | ] 151 | }, 152 | { 153 | "cell_type": "markdown", 154 | "metadata": { 155 | "editable": true, 156 | "slideshow": { 157 | "slide_type": "" 158 | }, 159 | "tags": [] 160 | }, 161 | "source": [ 162 | "We can load our datamodule from the previous notebook and get a train batch to test out these layers and models:" 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": null, 168 | "metadata": { 169 | "editable": true, 170 | "slideshow": { 171 | "slide_type": "" 172 | }, 173 | "tags": [] 174 | }, 175 | "outputs": [], 176 | "source": [ 177 | "datamodule = dataloader.ProteinGraphDataModule(\"./test_data\", \"dataset.txt\")\n", 178 | "datamodule.prepare_data()\n", 179 | "datamodule.setup(\"fit\")\n", 180 | "\n", 181 | "train_loader = datamodule.train_dataloader()\n", 182 | "example_train_batch = next(iter(train_loader))" 183 | ] 184 | }, 185 | { 186 | "cell_type": "markdown", 187 | "metadata": { 188 | "editable": true, 189 | "slideshow": { 190 | "slide_type": "" 191 | }, 192 | "tags": [] 193 | }, 194 | "source": [ 195 | "## Graph neural network layers" 196 | ] 197 | }, 198 | { 199 | "cell_type": "markdown", 200 | "metadata": {}, 201 | "source": [ 202 | "Here's how we would define a graph convolutional layer that takes amino acid one hot embeddings as input node features along with the edge index to define the graph, and converts them to a 64-dimensional embedding via convolution operations across the graph:" 203 | ] 204 | }, 205 | { 206 | "cell_type": "code", 207 | "execution_count": null, 208 | "metadata": { 209 | "editable": true, 210 | "slideshow": { 211 | "slide_type": "" 212 | }, 213 | "tags": [] 214 | }, 215 | "outputs": [], 216 | "source": [ 217 | "layer = graph_nn.GCNConv(in_channels=20, out_channels=1)\n", 218 | "example_output = layer(example_train_batch.amino_acid_one_hot.float(), example_train_batch.edge_index)\n", 219 | "example_output.shape" 220 | ] 221 | }, 222 | { 223 | "cell_type": "markdown", 224 | "metadata": { 225 | "editable": true, 226 | "slideshow": { 227 | "slide_type": "" 228 | }, 229 | "tags": [] 230 | }, 231 | "source": [ 232 | "**Try out some of the other layers in the torch_geometric.nn module!**" 233 | ] 234 | }, 235 | { 236 | "cell_type": "markdown", 237 | "metadata": {}, 238 | "source": [ 239 | "## Graph neural network models" 240 | ] 241 | }, 242 | { 243 | "cell_type": "markdown", 244 | "metadata": {}, 245 | "source": [ 246 | "We can also try out some of the pre-defined models in the `torch_geometric.nn.models` module, such as the `GAT` model which applies a series of `GATv2Conv` layers that uses attention mechanisms to weight the importance of different nodes in the graph when aggregating information from neighbors, followed by a Linear layer to convert the node embeddings to a 64-dimensional output." 247 | ] 248 | }, 249 | { 250 | "cell_type": "code", 251 | "execution_count": null, 252 | "metadata": { 253 | "editable": true, 254 | "slideshow": { 255 | "slide_type": "" 256 | }, 257 | "tags": [] 258 | }, 259 | "outputs": [], 260 | "source": [ 261 | "model = graph_nn.GAT(in_channels=20,\n", 262 | " hidden_channels=32,\n", 263 | " num_layers=3,\n", 264 | " heads=2,\n", 265 | " out_channels=64,\n", 266 | " dropout=0.01,\n", 267 | " jk=\"last\", \n", 268 | " v2=True)\n", 269 | "print(graph_nn.summary(model, example_train_batch.amino_acid_one_hot.float(), example_train_batch.edge_index))\n", 270 | "print(model(example_train_batch.amino_acid_one_hot.float(), example_train_batch.edge_index).shape)" 271 | ] 272 | }, 273 | { 274 | "cell_type": "markdown", 275 | "metadata": { 276 | "editable": true, 277 | "slideshow": { 278 | "slide_type": "" 279 | }, 280 | "tags": [] 281 | }, 282 | "source": [ 283 | "**Try out some of the other models in the torch_geometric.nn module!**" 284 | ] 285 | }, 286 | { 287 | "cell_type": "markdown", 288 | "metadata": {}, 289 | "source": [ 290 | "We can combine layers into custom architectures. Here is an example of a simple architecture that uses a GATConv layer and a GCNConv layer with some activation functions in between, and finally a linear layer to convert the 64-dimensional node embeddings to one value per node." 291 | ] 292 | }, 293 | { 294 | "cell_type": "code", 295 | "execution_count": null, 296 | "metadata": { 297 | "editable": true, 298 | "slideshow": { 299 | "slide_type": "" 300 | }, 301 | "tags": [] 302 | }, 303 | "outputs": [], 304 | "source": [ 305 | "model = graph_nn.Sequential('x, edge_index', [\n", 306 | " (graph_nn.GATConv(in_channels=20, out_channels=64, heads=2, concat=False), 'x, edge_index -> x'),\n", 307 | " nn.ReLU(inplace=True),\n", 308 | " (graph_nn.GCNConv(in_channels=64, out_channels=64), 'x, edge_index -> x'),\n", 309 | " nn.ReLU(inplace=True),\n", 310 | " nn.Linear(64, 1),\n", 311 | "])\n", 312 | "\n", 313 | "print(graph_nn.summary(model, example_train_batch.amino_acid_one_hot.float(), example_train_batch.edge_index))" 314 | ] 315 | }, 316 | { 317 | "cell_type": "markdown", 318 | "metadata": {}, 319 | "source": [ 320 | "## Defining losses" 321 | ] 322 | }, 323 | { 324 | "cell_type": "markdown", 325 | "metadata": {}, 326 | "source": [ 327 | "In order to train such models with our data for our task of interface residue prediction, we need to define a loss function that takes the output of the model (the prediction) and the target labels and computes a loss value that the optimizer can use to update the model parameters. A typical choice for binary classification tasks is the binary cross entropy loss, which is implemented in PyTorch as `torch.nn.BCEWithLogitsLoss`. This loss function takes the raw output of the model and the target labels, and applies the sigmoid function to the model output to get the predicted probabilities, and then computes the binary cross entropy loss between the predicted probabilities and the target labels, defined as\n", 328 | "\n", 329 | "$$\n", 330 | "\\text{loss} = -\\frac{1}{N} \\sum_{i=1}^N \\left[ y_i \\log(p_i) + (1 - y_i) \\log(1 - p_i) \\right]\n", 331 | "$$\n", 332 | "\n", 333 | "where $N$ is the number of residues, $y_i$ is the target label for residue $i$ (1 if it's an interface residue, 0 if not), and $p_i$ is the predicted probability for residue $i$." 334 | ] 335 | }, 336 | { 337 | "cell_type": "code", 338 | "execution_count": null, 339 | "metadata": { 340 | "editable": true, 341 | "slideshow": { 342 | "slide_type": "" 343 | }, 344 | "tags": [] 345 | }, 346 | "outputs": [], 347 | "source": [ 348 | "nn.BCEWithLogitsLoss()(model(example_train_batch.amino_acid_one_hot.float(), example_train_batch.edge_index), \n", 349 | " example_train_batch.interface_label.view(-1, 1).float())" 350 | ] 351 | }, 352 | { 353 | "cell_type": "markdown", 354 | "metadata": {}, 355 | "source": [ 356 | "## Training a model" 357 | ] 358 | }, 359 | { 360 | "cell_type": "markdown", 361 | "metadata": {}, 362 | "source": [ 363 | "Given a model that predicts interface probabilities and a loss that compared them with the true interface labels, what we now need is a training loop that will iterate over the training data in batches, compute the loss, and use an optimizer to update the model parameters based on the loss value.\n", 364 | "\n", 365 | "All of this is encapsulated within the `LightningModule` class in PyTorch Lightning.\n", 366 | "\n", 367 | "![](https://lightningaidev.wpengine.com/wp-content/uploads/2023/10/pl-walk-lit-module.png)" 368 | ] 369 | }, 370 | { 371 | "cell_type": "code", 372 | "execution_count": null, 373 | "metadata": { 374 | "editable": true, 375 | "slideshow": { 376 | "slide_type": "" 377 | }, 378 | "tags": [] 379 | }, 380 | "outputs": [], 381 | "source": [ 382 | "class InterfaceModule(lightning.LightningModule):\n", 383 | " \"\"\"\n", 384 | " LightningModule wrapping a GAT model.\n", 385 | " \"\"\"\n", 386 | " def __init__(self, in_channels=20, hidden_channels=32, num_layers=2, heads=2, out_channels=1, dropout=0.01, jk=\"last\"):\n", 387 | " super().__init__()\n", 388 | " self.model = graph_nn.GAT(in_channels=in_channels,\n", 389 | " hidden_channels=hidden_channels,\n", 390 | " num_layers=num_layers,\n", 391 | " heads=heads,\n", 392 | " out_channels=out_channels,\n", 393 | " dropout=dropout,\n", 394 | " jk=jk, v2=True)\n", 395 | " self.loss_function = nn.BCEWithLogitsLoss()\n", 396 | "\n", 397 | " def forward(self, node_attributes, edge_index):\n", 398 | " return self.model(node_attributes, edge_index)\n", 399 | "\n", 400 | " def training_step(self, batch, batch_idx):\n", 401 | " out = self(batch.amino_acid_one_hot.float(), batch.edge_index)\n", 402 | " loss = self.loss_function(out, batch.interface_label.float().view(-1, 1))\n", 403 | " self.log('train_loss', loss, on_step=True, on_epoch=True, sync_dist=True,\n", 404 | " batch_size=batch.batch_size)\n", 405 | " return loss\n", 406 | "\n", 407 | " def configure_optimizers(self):\n", 408 | " return torch.optim.Adam(params=self.model.parameters(), lr=0.001, weight_decay=0.0001)" 409 | ] 410 | }, 411 | { 412 | "cell_type": "markdown", 413 | "metadata": {}, 414 | "source": [ 415 | "The `Trainer` class then combines the training loop defined in the LightningModule with the data loading functions in the LightningDataModule. We set the `max_epochs` to 5, meaning that the training loop will iterate over the entire training data 5 times, updating the model parameters with each batch." 416 | ] 417 | }, 418 | { 419 | "cell_type": "code", 420 | "execution_count": null, 421 | "metadata": { 422 | "editable": true, 423 | "slideshow": { 424 | "slide_type": "" 425 | }, 426 | "tags": [] 427 | }, 428 | "outputs": [], 429 | "source": [ 430 | "trainer = lightning.Trainer(max_epochs=10, accelerator='cpu')\n", 431 | "model = InterfaceModule()\n", 432 | "datamodule = dataloader.ProteinGraphDataModule(\"./test_data\", \"dataset.txt\")\n", 433 | "trainer.fit(model=model, datamodule=datamodule)" 434 | ] 435 | }, 436 | { 437 | "cell_type": "markdown", 438 | "metadata": {}, 439 | "source": [ 440 | "**Congratulations, your model is training!**" 441 | ] 442 | }, 443 | { 444 | "cell_type": "markdown", 445 | "metadata": { 446 | "editable": true, 447 | "slideshow": { 448 | "slide_type": "" 449 | }, 450 | "tags": [] 451 | }, 452 | "source": [ 453 | "The next step is to monitor the performance of the model on the validation data (and maybe even stop training when the performance stops improving). We'd probably like to see some metrics beyond the loss value (like accuracy, precision, recall) and how those change over time, both on the training data and the validation data to make sure the model is learning something useful and not overfitting. All of this needs more complex logging and monitoring than what we've done so far, covered in the next notebook." 454 | ] 455 | }, 456 | { 457 | "cell_type": "markdown", 458 | "metadata": { 459 | "editable": true, 460 | "slideshow": { 461 | "slide_type": "" 462 | }, 463 | "tags": [] 464 | }, 465 | "source": [ 466 | "### [PeSTo: parameter-free geometric deep learning for accurate prediction of protein binding interfaces](https://www.nature.com/articles/s41467-023-37701-8)\n", 467 | "\n", 468 | "![PeSTO](https://media.springernature.com/full/springer-static/image/art%3A10.1038%2Fs41467-023-37701-8/MediaObjects/41467_2023_37701_Fig1_HTML.png)\n", 469 | "\n", 470 | "a Primary geometric transformer acting on the scalar and vectorial state of an atom at layer t. The interactions between the central atom and the nearest neighbors are encoded. A transformer is used to decode and filter the interactions information and to compute the new state of the central atom (Supplementary Algorithm 1). b The architecture of PeSTo for the prediction of interaction interfaces. The model is composed of multiple layers of geometric transformers with a set number of nearest neighbors (nn) and residual connections. The structure is reduced to a residue representation through an attention-based geometric pooling (Supplementary Algorithm 2). The residue states are collapsed, and the final prediction is computed from a multi-layer perceptron (MLP). c Example of application of the primary geometric transformer to all atoms in a structure.\n", 471 | "\n", 472 | "---" 473 | ] 474 | }, 475 | { 476 | "cell_type": "markdown", 477 | "metadata": { 478 | "editable": true, 479 | "slideshow": { 480 | "slide_type": "" 481 | }, 482 | "tags": [] 483 | }, 484 | "source": [ 485 | "### [Structure-based protein function prediction using graph convolutional networks (DeepFri)](https://www.nature.com/articles/s41467-021-23303-9)\n", 486 | "![DeepFri](https://media.springernature.com/full/springer-static/image/art%3A10.1038%2Fs41467-021-23303-9/MediaObjects/41467_2021_23303_Fig1_HTML.png?as=webp)\n", 487 | "a LSTM language model, pre-trained on ~10 million Pfam protein sequences, used for extracting residue-level features of PDB sequence. b Our GCN with three graph convolutional layers for learning complex structure–function relationships.\n", 488 | "\n", 489 | "---" 490 | ] 491 | }, 492 | { 493 | "cell_type": "markdown", 494 | "metadata": { 495 | "editable": true, 496 | "slideshow": { 497 | "slide_type": "" 498 | }, 499 | "tags": [] 500 | }, 501 | "source": [ 502 | "### [Deciphering interaction fingerprints from protein molecular surfaces using geometric deep learning](https://www.nature.com/articles/s41592-019-0666-6)\n", 503 | "![MaSIF](https://media.springernature.com/full/springer-static/esm/art%3A10.1038%2Fs41592-019-0666-6/MediaObjects/41592_2019_666_Fig15_ESM.jpg?as=webp)\n", 504 | "Patches are fed through convolutional layers followed by a series of fully connected layers (FC5, FC4, FC2), and finally a sigmoid cross-entropy loss.\n", 505 | "\n", 506 | "---" 507 | ] 508 | }, 509 | { 510 | "cell_type": "markdown", 511 | "metadata": { 512 | "editable": true, 513 | "slideshow": { 514 | "slide_type": "" 515 | }, 516 | "tags": [] 517 | }, 518 | "source": [ 519 | "### [ScanNet: an interpretable geometric deep learning model for structure-based protein binding site prediction](https://www.nature.com/articles/s41592-022-01490-7)\n", 520 | "![ScanNet](https://media.springernature.com/full/springer-static/image/art%3A10.1038%2Fs41592-022-01490-7/MediaObjects/41592_2022_1490_Fig1_HTML.png)\n", 521 | "ScanNet inputs are the primary sequence, tertiary structure and, optionally, position–weight matrix computed from a MSA of evolutionarily related proteins. First, for each atom, neighboring atoms are extracted from the structure and positioned in a local coordinate frame (top left). The resulting point cloud is passed through a set of trainable, linear filters detecting specific spatio-chemical arrangements (top middle), yielding an atomic-scale representation (top right). After aggregation of the atomic representation at the amino acid level and concatenation with amino acid attributes, the process is reiterated with amino acids to obtain a representation of an amino acid (bottom). The latter is projected and locally averaged for residue-wise classification.\n", 522 | "\n", 523 | "---" 524 | ] 525 | } 526 | ], 527 | "metadata": { 528 | "kernelspec": { 529 | "display_name": "Python 3 (ipykernel)", 530 | "language": "python", 531 | "name": "python3" 532 | }, 533 | "language_info": { 534 | "codemirror_mode": { 535 | "name": "ipython", 536 | "version": 3 537 | }, 538 | "file_extension": ".py", 539 | "mimetype": "text/x-python", 540 | "name": "python", 541 | "nbconvert_exporter": "python", 542 | "pygments_lexer": "ipython3", 543 | "version": "3.8.18" 544 | } 545 | }, 546 | "nbformat": 4, 547 | "nbformat_minor": 4 548 | } 549 | -------------------------------------------------------------------------------- /dataset.txt: -------------------------------------------------------------------------------- 1 | 1A0G_B 2 | 1A0H_D 3 | 1A22_A 4 | 1A6J_B 5 | 1A79_C 6 | 1AA7_A 7 | 1ACB_I 8 | 1AIH_D 9 | 1ARZ_C 10 | 1AVA_C 11 | 1AVO_K 12 | 1AVO_L 13 | 1AZS_AB 14 | 1B0N_A 15 | 1B27_A 16 | 1B27_D 17 | 1B35_B 18 | 1B35_C 19 | 1B4U_C 20 | 1B65_D 21 | 1B6C_B 22 | 1B9L_D 23 | 1B9M_A 24 | 1B9Y_C 25 | 1BBH_A 26 | 1BC5_A 27 | 1BCP_H 28 | 1BCP_L 29 | 1BCR_A 30 | 1BCR_B 31 | 1BDF_D 32 | 1BIH_A 33 | 1BJA_A 34 | 1BO4_A 35 | 1BPL_A 36 | 1BVN_T 37 | 1BVP_2 38 | 1C3X_A 39 | 1C8N_C 40 | 1C8O_A 41 | 1C8U_B 42 | 1CGI_I 43 | 1CLI_A 44 | 1CMV_B 45 | 1CN4_B 46 | 1CQ3_B 47 | 1CQI_B 48 | 1D8H_A 49 | 1D9E_B 50 | 1DB2_A 51 | 1DBQ_B 52 | 1DHK_B 53 | 1DJ8_B 54 | 1DK4_A 55 | 1DLE_A 56 | 1DLT_A 57 | 1DM9_B 58 | 1DML_A 59 | 1DNW_B 60 | 1DPJ_A 61 | 1DQN_A 62 | 1DS8_L 63 | 1DXX_C 64 | 1E3U_A 65 | 1E44_A 66 | 1E4K_C 67 | 1E6E_B 68 | 1E96_B 69 | 1EAI_C 70 | 1EBO_F 71 | 1EER_A 72 | 1EEX_G 73 | 1EF1_D 74 | 1EFV_B 75 | 1EI1_B 76 | 1EK6_A 77 | 1EL6_A 78 | 1EM8_A 79 | 1EM8_B 80 | 1ETE_C 81 | 1EWJ_D 82 | 1EWY_C 83 | 1EXB_ABDC 84 | 1EZI_A 85 | 1EZU_AB 86 | 1F15_B 87 | 1F34_B 88 | 1F36_A 89 | 1F37_B 90 | 1F3M_B 91 | 1F3M_D 92 | 1F3R_B 93 | 1F3V_A 94 | 1F51_AB 95 | 1F51_E 96 | 1F5M_B 97 | 1F5Z_C 98 | 1F8Y_B 99 | 1FAK_HL 100 | 1FB1_A 101 | 1FBV_A 102 | 1FBV_C 103 | 1FCD_D 104 | 1FFW_A 105 | 1FJ1_E 106 | 1FK8_A 107 | 1FLC_F 108 | 1FNE_C 109 | 1FNE_D 110 | 1FOD_1 111 | 1FOD_2 112 | 1FQ1_A 113 | 1FS0_E 114 | 1FS0_G 115 | 1FSK_A 116 | 1FU5_A 117 | 1FYH_A 118 | 1FYH_B 119 | 1FZ0_E 120 | 1G0S_B 121 | 1G0U_Z 122 | 1G31_C 123 | 1G5G_F 124 | 1G5H_A 125 | 1G60_A 126 | 1G64_B 127 | 1G6W_D 128 | 1G8K_B 129 | 1G8Y_G 130 | 1GCQ_C 131 | 1GGP_B 132 | 1GK0_C 133 | 1GLA_F 134 | 1GMW_D 135 | 1GO4_H 136 | 1GPQ_B 137 | 1GPW_A 138 | 1GQA_A 139 | 1GQG_B 140 | 1GRC_A 141 | 1GRI_A 142 | 1GRN_B 143 | 1GT7_P 144 | 1GU9_H 145 | 1GUQ_A 146 | 1GUS_C 147 | 1GVM_F 148 | 1GX7_D 149 | 1GXD_C 150 | 1GXJ_B 151 | 1GZS_B 152 | 1H0D_C 153 | 1H21_C 154 | 1H2I_P 155 | 1H2S_A 156 | 1H2S_B 157 | 1H2U_Y 158 | 1H3L_B 159 | 1H6W_A 160 | 1H9D_A 161 | 1H9D_B 162 | 1HAA_A 163 | 1HCF_AB 164 | 1HE1_B 165 | 1HI9_D 166 | 1HJR_D 167 | 1HKQ_A 168 | 1HXM_C 169 | 1HXM_D 170 | 1HYR_A 171 | 1I1A_A 172 | 1I1D_C 173 | 1I4D_AB 174 | 1I4E_A 175 | 1I50_J 176 | 1I78_A 177 | 1I7Q_D 178 | 1I8L_A 179 | 1IAH_B 180 | 1IAR_A 181 | 1IAR_B 182 | 1IB1_E 183 | 1ICF_I 184 | 1IGU_B 185 | 1II8_A 186 | 1II8_B 187 | 1IJF_B 188 | 1IJK_BC 189 | 1IJL_B 190 | 1IJX_E 191 | 1IK9_A 192 | 1INN_A 193 | 1IQ6_B 194 | 1IR2_7 195 | 1IRA_X 196 | 1IRA_Y 197 | 1IS7_R 198 | 1IWA_F 199 | 1J1D_C 200 | 1J2U_A 201 | 1J2W_B 202 | 1J3K_B 203 | 1J3L_A 204 | 1J3R_A 205 | 1J4U_B 206 | 1J7D_A 207 | 1J7V_L 208 | 1J9I_A 209 | 1JD6_A 210 | 1JFM_E 211 | 1JIW_I 212 | 1JK9_B 213 | 1JK9_C 214 | 1JKG_A 215 | 1JKG_B 216 | 1JMT_A 217 | 1JMV_A 218 | 1JN9_C 219 | 1JNB_F 220 | 1JNP_A 221 | 1JNZ_B 222 | 1JPY_Y 223 | 1JSU_C 224 | 1JWH_A 225 | 1JWH_CD 226 | 1JX7_E 227 | 1JYI_A 228 | 1JYO_D 229 | 1JYO_E 230 | 1JZD_AB 231 | 1JZD_C 232 | 1JZT_A 233 | 1K0Z_B 234 | 1K2E_A 235 | 1K3S_A 236 | 1K4R_C 237 | 1K50_B 238 | 1K5D_AB 239 | 1K5D_C 240 | 1K66_B 241 | 1K74_AB 242 | 1K74_DE 243 | 1KBQ_A 244 | 1KCA_G 245 | 1KCF_A 246 | 1KI9_C 247 | 1KIG_I 248 | 1KJN_B 249 | 1KKL_ABC 250 | 1KKL_H 251 | 1KNX_E 252 | 1KNY_A 253 | 1KTK_B 254 | 1KTZ_B 255 | 1KVE_C 256 | 1KVE_D 257 | 1KXJ_A 258 | 1KY6_A 259 | 1L0A_A 260 | 1L2W_C 261 | 1L2W_J 262 | 1L4Z_B 263 | 1L5X_B 264 | 1L8C_B 265 | 1L8D_A 266 | 1L8L_A 267 | 1LBH_A 268 | 1LFD_C 269 | 1LHR_B 270 | 1LI1_D 271 | 1LJ2_B 272 | 1LK5_A 273 | 1LKY_D 274 | 1LM7_A 275 | 1LQS_L 276 | 1LQW_A 277 | 1LSS_B 278 | 1LT7_B 279 | 1LUC_A 280 | 1LWI_A 281 | 1LX5_B 282 | 1LZW_B 283 | 1M1E_B 284 | 1M1F_A 285 | 1M27_AB 286 | 1M7E_C 287 | 1M7U_A 288 | 1MAH_F 289 | 1MHP_B 290 | 1MKM_B 291 | 1MOE_A 292 | 1MPS_H 293 | 1MPS_M 294 | 1MR1_D 295 | 1MR9_B 296 | 1MWQ_A 297 | 1MZW_A 298 | 1N0E_A 299 | 1N0L_C 300 | 1N0L_D 301 | 1N1E_A 302 | 1N4M_B 303 | 1N80_D 304 | 1N9J_B 305 | 1N9S_F 306 | 1NA1_A 307 | 1NA6_B 308 | 1NBA_C 309 | 1NF3_A 310 | 1NF3_C 311 | 1NHW_B 312 | 1NHW_D 313 | 1NKQ_E 314 | 1NKV_A 315 | 1NLN_A 316 | 1NN4_C 317 | 1NOV_A 318 | 1NP3_C 319 | 1NP6_B 320 | 1NPO_A 321 | 1NPP_A 322 | 1NSN_S 323 | 1NT2_B 324 | 1NU9_C 325 | 1NVI_D 326 | 1NVI_E 327 | 1NVM_A 328 | 1O1N_A 329 | 1O28_A 330 | 1O63_A 331 | 1O94_C 332 | 1O94_D 333 | 1O9I_A 334 | 1O9Y_D 335 | 1OA8_A 336 | 1OEY_A 337 | 1OEY_J 338 | 1OFU_A 339 | 1OFU_Y 340 | 1OG6_C 341 | 1OIA_A 342 | 1OJH_D 343 | 1OLP_C 344 | 1OMO_B 345 | 1OOQ_A 346 | 1OPH_A 347 | 1OQ1_D 348 | 1OQB_D 349 | 1OQC_D 350 | 1OQJ_A 351 | 1OR6_A 352 | 1OR7_A 353 | 1OR7_C 354 | 1ORQ_C 355 | 1ORY_A 356 | 1OSG_F 357 | 1OSM_C 358 | 1OTW_A 359 | 1OW4_A 360 | 1OX9_D 361 | 1OY1_A 362 | 1OY5_C 363 | 1OYV_I 364 | 1P27_A 365 | 1P27_B 366 | 1P32_A 367 | 1P3H_H 368 | 1P4O_B 369 | 1P4Q_A 370 | 1P4Q_B 371 | 1P69_A 372 | 1P7G_J 373 | 1P8C_E 374 | 1P9O_B 375 | 1PB0_A 376 | 1PBI_B 377 | 1PC6_A 378 | 1PCF_F 379 | 1PIV_3 380 | 1PK1_A 381 | 1PK1_B 382 | 1PM3_A 383 | 1PNB_B 384 | 1POI_D 385 | 1PQZ_A 386 | 1PUG_C 387 | 1PUI_A 388 | 1PVH_A 389 | 1PXV_B 390 | 1PXV_D 391 | 1PYA_F 392 | 1PYT_A 393 | 1PZM_B 394 | 1PZR_A 395 | 1Q05_B 396 | 1Q0K_K 397 | 1Q23_H 398 | 1Q2H_A 399 | 1Q40_C 400 | 1Q40_D 401 | 1Q5H_B 402 | 1Q6I_B 403 | 1Q74_A 404 | 1Q7L_C 405 | 1Q7L_D 406 | 1Q8M_A 407 | 1QA9_A 408 | 1QA9_B 409 | 1QAV_B 410 | 1QB3_A 411 | 1QCB_H 412 | 1QD6_D 413 | 1QE6_D 414 | 1QEX_B 415 | 1QFH_B 416 | 1QG7_B 417 | 1QHH_A 418 | 1QHH_B 419 | 1QIP_D 420 | 1QJG_E 421 | 1QL0_A 422 | 1QOL_G 423 | 1QS4_B 424 | 1QSO_B 425 | 1QXN_B 426 | 1QXO_D 427 | 1QYN_A 428 | 1R15_E 429 | 1R1U_A 430 | 1R30_B 431 | 1R4A_H 432 | 1R52_C 433 | 1R5I_D 434 | 1R5X_A 435 | 1R6Q_C 436 | 1R7H_B 437 | 1R7I_B 438 | 1R8O_B 439 | 1R8S_E 440 | 1RDF_B 441 | 1RF8_A 442 | 1RF8_B 443 | 1RHG_A 444 | 1RHZ_B 445 | 1RK4_A 446 | 1RLB_ABCD 447 | 1RLB_E 448 | 1RO7_C 449 | 1RPN_D 450 | 1RV6_VW 451 | 1RY7_A 452 | 1RY7_B 453 | 1S1Q_A 454 | 1S28_C 455 | 1S3N_A 456 | 1S3Z_A 457 | 1S4C_B 458 | 1S4K_A 459 | 1S59_C 460 | 1S7M_A 461 | 1SBB_B 462 | 1SFK_A 463 | 1SFX_A 464 | 1SG2_C 465 | 1SGJ_A 466 | 1SHS_E 467 | 1SHZ_C 468 | 1SIE_B 469 | 1SKV_A 470 | 1SLQ_E 471 | 1SOT_A 472 | 1SP8_D 473 | 1SPB_P 474 | 1SPI_A 475 | 1SPP_B 476 | 1SQ7_B 477 | 1SQB_D 478 | 1SQE_B 479 | 1SQX_I 480 | 1SR4_A 481 | 1SS4_A 482 | 1STM_D 483 | 1STZ_A 484 | 1SU1_A 485 | 1SUS_C 486 | 1SV0_C 487 | 1SWJ_B 488 | 1SY6_A 489 | 1SYX_A 490 | 1SYX_B 491 | 1SZH_A 492 | 1T0B_C 493 | 1T0F_A 494 | 1T0P_B 495 | 1T0R_C 496 | 1T1V_A 497 | 1T33_A 498 | 1T3L_A 499 | 1T3U_B 500 | 1T3W_B 501 | 1T62_B 502 | 1T6B_Y 503 | 1T6S_A 504 | 1T70_C 505 | 1T8Q_B 506 | 1T9G_C 507 | 1TA3_B 508 | 1TBA_A 509 | 1TBX_A 510 | 1TDQ_A 511 | 1TDT_A 512 | 1TE1_A 513 | 1TE1_B 514 | 1TH1_C 515 | 1TH7_H 516 | 1THN_C 517 | 1TIJ_A 518 | 1TJO_A 519 | 1TKV_B 520 | 1TLH_B 521 | 1TLJ_A 522 | 1TMQ_B 523 | 1TMX_A 524 | 1TOA_A 525 | 1TQ8_D 526 | 1TU1_B 527 | 1TUE_Q 528 | 1TY4_A 529 | 1TY9_B 530 | 1U07_A 531 | 1U2V_G 532 | 1U4F_D 533 | 1U5U_A 534 | 1U5W_E 535 | 1U69_B 536 | 1U6G_B 537 | 1U6J_G 538 | 1U7H_A 539 | 1U7I_A 540 | 1U7Z_B 541 | 1U8S_B 542 | 1UAD_C 543 | 1UAN_A 544 | 1UD0_A 545 | 1UDI_I 546 | 1UHE_A 547 | 1UI5_B 548 | 1UIJ_A 549 | 1UJ3_C 550 | 1UK4_A 551 | 1UL1_A 552 | 1ULG_D 553 | 1UNN_C 554 | 1UR6_B 555 | 1USV_B 556 | 1USY_A 557 | 1UT4_B 558 | 1UUN_A 559 | 1V1H_E 560 | 1V54_Q 561 | 1V5X_A 562 | 1V8D_A 563 | 1V8H_B 564 | 1V8P_G 565 | 1VB5_B 566 | 1VCB_F 567 | 1VCH_D 568 | 1VDD_A 569 | 1VEA_B 570 | 1VET_A 571 | 1VEU_B 572 | 1VG0_B 573 | 1VGL_D 574 | 1VGO_B 575 | 1VH0_B 576 | 1VH5_B 577 | 1VH6_B 578 | 1VHZ_A 579 | 1VJL_A 580 | 1VJQ_A 581 | 1VP2_A 582 | 1VR9_A 583 | 1VRB_B 584 | 1VS3_A 585 | 1VSG_A 586 | 1VSQ_B 587 | 1VYH_M 588 | 1VZY_A 589 | 1W1I_E 590 | 1W39_C 591 | 1W3B_A 592 | 1W3Z_A 593 | 1W6U_C 594 | 1W9Z_B 595 | 1WD6_A 596 | 1WDJ_A 597 | 1WDW_A 598 | 1WDX_C 599 | 1WDZ_A 600 | 1WEK_D 601 | 1WKQ_A 602 | 1WLP_B 603 | 1WMI_A 604 | 1WMI_B 605 | 1WN1_A 606 | 1WOQ_B 607 | 1WOV_A 608 | 1WP7_A 609 | 1WPX_B 610 | 1WQ1_G 611 | 1WRD_A 612 | 1WRS_R 613 | 1WSC_A 614 | 1WTU_B 615 | 1WWP_B 616 | 1WWR_B 617 | 1WX0_D 618 | 1WYW_A 619 | 1WYZ_D 620 | 1WZ3_B 621 | 1WZ7_B 622 | 1X2H_C 623 | 1X8D_A 624 | 1X9A_B 625 | 1XCR_B 626 | 1XD9_A 627 | 1XDK_F 628 | 1XEB_A 629 | 1XFS_A 630 | 1XG2_B 631 | 1XIW_F 632 | 1XJ7_A 633 | 1XKP_A 634 | 1XKP_B 635 | 1XKP_C 636 | 1XL3_D 637 | 1XLY_A 638 | 1XM3_B 639 | 1XOU_A 640 | 1XOU_B 641 | 1XPJ_A 642 | 1XQH_A 643 | 1XR0_B 644 | 1XSQ_B 645 | 1XT9_A 646 | 1XTG_B 647 | 1XTT_C 648 | 1XUV_A 649 | 1XV2_A 650 | 1XWD_A 651 | 1XX7_B 652 | 1XY7_B 653 | 1XZ8_A 654 | 1Y0B_D 655 | 1Y0G_B 656 | 1Y0H_A 657 | 1Y10_A 658 | 1Y1A_B 659 | 1Y1O_A 660 | 1Y23_D 661 | 1Y2I_E 662 | 1Y43_B 663 | 1Y4M_A 664 | 1Y4Z_C 665 | 1Y6K_R 666 | 1Y7R_B 667 | 1Y82_A 668 | 1Y8T_A 669 | 1Y96_A 670 | 1YAF_A 671 | 1YAV_B 672 | 1YC6_E 673 | 1YCS_B 674 | 1YD8_H 675 | 1YDG_E 676 | 1YEW_I 677 | 1YEW_J 678 | 1YF9_B 679 | 1YJ7_D 680 | 1YJD_C 681 | 1YKE_C 682 | 1YKE_D 683 | 1YLM_B 684 | 1YLQ_A 685 | 1YLX_A 686 | 1YNJ_K 687 | 1YNT_G 688 | 1YOX_C 689 | 1YQF_F 690 | 1YUM_D 691 | 1YWK_A 692 | 1YYV_A 693 | 1Z0K_B 694 | 1Z2X_A 695 | 1Z3G_A 696 | 1Z69_C 697 | 1Z6O_B 698 | 1Z6O_M 699 | 1Z7M_B 700 | 1Z8K_A 701 | 1Z94_C 702 | 1ZBX_A 703 | 1ZC4_D 704 | 1ZDN_A 705 | 1ZFN_A 706 | 1ZHI_B 707 | 1ZKE_F 708 | 1ZM4_B 709 | 1ZNP_B 710 | 1ZOQ_B 711 | 1ZPQ_D 712 | 1ZPS_B 713 | 1ZTX_E 714 | 1ZUD_4 715 | 1ZXM_B 716 | 1ZXZ_A 717 | 1ZY4_A 718 | 1ZY7_A 719 | 2A19_A 720 | 2A1F_C 721 | 2A1S_D 722 | 2A2J_A 723 | 2A2L_B 724 | 2A2O_E 725 | 2A4W_B 726 | 2A5D_B 727 | 2A5T_B 728 | 2A5Z_C 729 | 2A6A_A 730 | 2A6P_B 731 | 2A6Q_F 732 | 2A78_B 733 | 2A7K_H 734 | 2A7U_B 735 | 2A7W_J 736 | 2ABZ_E 737 | 2ACM_A 738 | 2ADF_A 739 | 2ADL_A 740 | 2AF6_B 741 | 2AFF_A 742 | 2AHM_B 743 | 2AHM_F 744 | 2ANU_A 745 | 2AO9_A 746 | 2AP6_B 747 | 2AQX_A 748 | 2ARJ_Q 749 | 2ARK_D 750 | 2ASS_A 751 | 2AST_B 752 | 2AVU_D 753 | 2AVU_F 754 | 2AW2_A 755 | 2AXW_B 756 | 2AZE_A 757 | 2AZE_B 758 | 2B2K_B 759 | 2B3D_A 760 | 2B3S_A 761 | 2B4J_C 762 | 2B5F_A 763 | 2B5L_C 764 | 2B8W_B 765 | 2B99_B 766 | 2B9Z_B 767 | 2BA0_B 768 | 2BAY_E 769 | 2BE1_A 770 | 2BEX_D 771 | 2BGC_F 772 | 2BGW_A 773 | 2BM8_H 774 | 2BOL_A 775 | 2BQZ_A 776 | 2BTF_P 777 | 2BUJ_A 778 | 2BYC_A 779 | 2BYK_A 780 | 2BYK_B 781 | 2C0L_B 782 | 2C2I_A 783 | 2C2L_D 784 | 2C2X_A 785 | 2C35_D 786 | 2C37_T 787 | 2C41_H 788 | 2C57_A 789 | 2C5J_B 790 | 2C7M_A 791 | 2CAY_A 792 | 2CH8_A 793 | 2CJR_A 794 | 2CLB_A 795 | 2CLY_D 796 | 2CLY_E 797 | 2CN4_A 798 | 2CS7_A 799 | 2CT9_B 800 | 2CW6_C 801 | 2CWO_B 802 | 2CZV_B 803 | 2CZV_D 804 | 2D00_F 805 | 2D13_B 806 | 2D1P_G 807 | 2D2A_A 808 | 2D42_B 809 | 2D4C_B 810 | 2D4U_A 811 | 2D8D_B 812 | 2DB4_B 813 | 2DB7_A 814 | 2DD4_C 815 | 2DD5_J 816 | 2DDK_A 817 | 2DDM_B 818 | 2DDZ_B 819 | 2DE5_E 820 | 2DI3_A 821 | 2DJF_A 822 | 2DM9_B 823 | 2DOI_X 824 | 2DPF_D 825 | 2DPP_B 826 | 2DSP_B 827 | 2DT5_B 828 | 2DUM_C 829 | 2DUP_A 830 | 2DVT_A 831 | 2DWN_C 832 | 2DX7_B 833 | 2DXB_H 834 | 2DYM_G 835 | 2E0N_B 836 | 2E1M_A 837 | 2E1M_B 838 | 2E1M_C 839 | 2E2K_C 840 | 2E4M_A 841 | 2E4M_C 842 | 2E6F_A 843 | 2E7S_G 844 | 2E85_B 845 | 2E9D_A 846 | 2E9X_E 847 | 2E9X_G 848 | 2EAY_B 849 | 2EBY_A 850 | 2ED6_G 851 | 2EF7_A 852 | 2EF8_A 853 | 2EFD_A 854 | 2EIL_F 855 | 2EJ0_B 856 | 2EJG_B 857 | 2EJG_D 858 | 2EK5_A 859 | 2EKD_C 860 | 2EKY_B 861 | 2EQ5_A 862 | 2ERJ_A 863 | 2ERJ_G 864 | 2ERJ_H 865 | 2ES4_A 866 | 2ES4_E 867 | 2ETE_A 868 | 2EWN_A 869 | 2F07_A 870 | 2F08_C 871 | 2F1V_B 872 | 2F2A_C 873 | 2F2F_B 874 | 2F2F_C 875 | 2F3T_F 876 | 2F4E_A 877 | 2F4M_B 878 | 2F5I_B 879 | 2F6L_A 880 | 2F8B_A 881 | 2F9I_C 882 | 2F9I_D 883 | 2F9W_B 884 | 2F9Z_A 885 | 2F9Z_C 886 | 2FBE_C 887 | 2FBK_B 888 | 2FDB_M 889 | 2FDO_A 890 | 2FEW_A 891 | 2FEW_B 892 | 2FG6_E 893 | 2FG8_D 894 | 2FJC_H 895 | 2FKD_G 896 | 2FKW_O 897 | 2FLF_F 898 | 2FMY_C 899 | 2FNP_A 900 | 2FPW_B 901 | 2FQM_E 902 | 2FS5_B 903 | 2FSY_E 904 | 2FTX_A 905 | 2FTX_B 906 | 2FU5_A 907 | 2FUR_A 908 | 2G2U_A 909 | 2G2U_B 910 | 2G38_C 911 | 2G38_D 912 | 2G3K_D 913 | 2G3O_C 914 | 2G42_A 915 | 2G5C_B 916 | 2G77_A 917 | 2G9W_B 918 | 2GA0_C 919 | 2GAF_A 920 | 2GAN_A 921 | 2GED_A 922 | 2GEF_B 923 | 2GHW_C 924 | 2GIA_B 925 | 2GIA_G 926 | 2GJV_D 927 | 2GL0_B 928 | 2GM3_D 929 | 2GMY_B 930 | 2GN8_B 931 | 2GQS_B 932 | 2GS9_A 933 | 2GSV_B 934 | 2GT2_A 935 | 2GTY_B 936 | 2GV5_C 937 | 2GXF_A 938 | 2GZ4_A 939 | 2H4O_B 940 | 2H6B_A 941 | 2H6L_C 942 | 2H7V_C 943 | 2H9E_C 944 | 2H9G_R 945 | 2HA9_B 946 | 2HBV_A 947 | 2HCR_B 948 | 2HD0_I 949 | 2HDW_A 950 | 2HEG_A 951 | 2HEK_A 952 | 2HET_A 953 | 2HEY_G 954 | 2HJ9_C 955 | 2HL3_A 956 | 2HQL_A 957 | 2HRK_A 958 | 2HRK_B 959 | 2HRV_A 960 | 2HSN_A 961 | 2HTB_D 962 | 2HTH_B 963 | 2HU2_A 964 | 2HVW_B 965 | 2HWW_A 966 | 2HXI_B 967 | 2HXO_B 968 | 2HZB_A 969 | 2HZM_G 970 | 2HZS_F 971 | 2I04_A 972 | 2I14_C 973 | 2I15_A 974 | 2I1A_D 975 | 2I2R_D 976 | 2I5B_A 977 | 2I5G_A 978 | 2I79_B 979 | 2I7R_B 980 | 2I9F_D 981 | 2I9L_L 982 | 2IA9_B 983 | 2IAZ_B 984 | 2IBG_B 985 | 2IDE_C 986 | 2IHS_A 987 | 2IHW_A 988 | 2IJ0_B 989 | 2INU_C 990 | 2IOU_D 991 | 2ISJ_C 992 | 2IU9_A 993 | 2IW5_B 994 | 2IWP_B 995 | 2IX2_B 996 | 2IY1_A 997 | 2IYB_A 998 | 2IZ5_B 999 | 2IZZ_B 1000 | 2J0Q_I 1001 | 2J28_Q 1002 | 2J28_R 1003 | 2J4E_F 1004 | 2J4L_F 1005 | 2J59_M 1006 | 2J6R_A 1007 | 2J88_A 1008 | 2JAQ_A 1009 | 2JGZ_B 1010 | 2JKI_S 1011 | 2JOD_A 1012 | 2JW1_A 1013 | 2JZ0_A 1014 | 2K2S_A 1015 | 2K2S_B 1016 | 2K42_A 1017 | 2K7I_B 1018 | 2K7L_A 1019 | 2KA4_B 1020 | 2KHM_B 1021 | 2KLH_B 1022 | 2KSP_A 1023 | 2L0I_A 1024 | 2L0T_B 1025 | 2L14_A 1026 | 2L1C_A 1027 | 2L9S_B 1028 | 2LAG_A 1029 | 2LAG_B 1030 | 2LBU_D 1031 | 2LD7_A 1032 | 2LD7_B 1033 | 2LFH_B 1034 | 2LFS_A 1035 | 2LFW_A 1036 | 2LFW_B 1037 | 2LKM_B 1038 | 2LOX_A 1039 | 2LP4_A 1040 | 2LPB_A 1041 | 2LQH_B 1042 | 2LUH_A 1043 | 2LUH_B 1044 | 2LVO_C 1045 | 2LXM_A 1046 | 2LY4_A 1047 | 2M0G_A 1048 | 2M5B_A 1049 | 2MEV_1 1050 | 2MJ5_B 1051 | 2MP0_A 1052 | 2MTA_A 1053 | 2MV7_A 1054 | 2MWS_B 1055 | 2N01_B 1056 | 2N1D_A 1057 | 2N2H_B 1058 | 2N73_A 1059 | 2N73_B 1060 | 2NBV_A 1061 | 2NBV_B 1062 | 2NN3_D 1063 | 2NNN_B 1064 | 2NNW_B 1065 | 2NUD_B 1066 | 2NUU_L 1067 | 2NUX_B 1068 | 2NV4_A 1069 | 2NX4_A 1070 | 2NXM_B 1071 | 2NXN_B 1072 | 2NYX_D 1073 | 2NYZ_AB 1074 | 2NZ7_B 1075 | 2O20_B 1076 | 2O3A_B 1077 | 2O42_A 1078 | 2O74_F 1079 | 2O8G_J 1080 | 2O8V_A 1081 | 2OCF_A 1082 | 2ODM_B 1083 | 2OGX_B 1084 | 2OIE_C 1085 | 2OKI_A 1086 | 2OL5_B 1087 | 2OND_A 1088 | 2OOR_C 1089 | 2OPD_B 1090 | 2OPI_B 1091 | 2OQ2_B 1092 | 2OS5_D 1093 | 2OS9_B 1094 | 2OSZ_C 1095 | 2OT3_A 1096 | 2OTP_B 1097 | 2OTX_A 1098 | 2OUL_A 1099 | 2OVI_A 1100 | 2OX7_A 1101 | 2OYY_A 1102 | 2OZA_A 1103 | 2OZN_A 1104 | 2OZN_B 1105 | 2P04_A 1106 | 2P1J_B 1107 | 2P22_A 1108 | 2P22_C 1109 | 2P2C_L 1110 | 2P4W_B 1111 | 2P4Z_B 1112 | 2P5R_A 1113 | 2P5X_A 1114 | 2P62_A 1115 | 2P6I_B 1116 | 2P9L_D 1117 | 2P9L_F 1118 | 2PCC_A 1119 | 2PCC_B 1120 | 2PF4_E 1121 | 2PG1_G 1122 | 2PHD_A 1123 | 2PIF_B 1124 | 2PIH_B 1125 | 2PJW_H 1126 | 2PJW_V 1127 | 2PLA_A 1128 | 2PLG_A 1129 | 2PLR_A 1130 | 2PM7_C 1131 | 2PMS_C 1132 | 2PMV_A 1133 | 2PNH_A 1134 | 2PPY_F 1135 | 2PQA_A 1136 | 2PQA_B 1137 | 2PRO_B 1138 | 2PRU_B 1139 | 2PTT_A 1140 | 2PTT_B 1141 | 2PUK_E 1142 | 2PUL_A 1143 | 2PV2_C 1144 | 2PZD_B 1145 | 2Q0O_B 1146 | 2Q0O_C 1147 | 2Q3F_A 1148 | 2Q5E_E 1149 | 2Q7F_B 1150 | 2Q7N_B 1151 | 2Q97_T 1152 | 2QA7_C 1153 | 2QAZ_C 1154 | 2QGQ_G 1155 | 2QH9_A 1156 | 2QIY_B 1157 | 2QJA_D 1158 | 2QKM_E 1159 | 2QKM_F 1160 | 2QKO_B 1161 | 2QL2_A 1162 | 2QL2_B 1163 | 2QLC_C 1164 | 2QLP_A 1165 | 2QLZ_B 1166 | 2QNF_A 1167 | 2QNU_B 1168 | 2QQ1_A 1169 | 2QV0_B 1170 | 2QYI_D 1171 | 2QYP_B 1172 | 2R19_B 1173 | 2R2C_A 1174 | 2R56_A 1175 | 2R6A_C 1176 | 2R6U_D 1177 | 2R78_C 1178 | 2RA6_C 1179 | 2RBB_A 1180 | 2RG7_B 1181 | 2RGN_B 1182 | 2RHK_C 1183 | 2RK0_A 1184 | 2RK9_B 1185 | 2RL7_D 1186 | 2RP4_C 1187 | 2RVB_B 1188 | 2SPC_A 1189 | 2UTG_B 1190 | 2UV1_B 1191 | 2UZ3_C 1192 | 2V0O_A 1193 | 2V4I_G 1194 | 2V4I_H 1195 | 2V6B_C 1196 | 2V92_B 1197 | 2VDB_B 1198 | 2VDW_E 1199 | 2VDW_F 1200 | 2VE7_B 1201 | 2VE7_D 1202 | 2VEE_A 1203 | 2VEF_A 1204 | 2VER_A 1205 | 2VGL_S 1206 | 2VJF_C 1207 | 2VKO_A 1208 | 2VLQ_B 1209 | 2VPV_A 1210 | 2VQA_A 1211 | 2VSG_A 1212 | 2VUB_A 1213 | 2VX8_B 1214 | 2VXQ_A 1215 | 2W01_A 1216 | 2W0C_C 1217 | 2W1T_A 1218 | 2W2B_B 1219 | 2W2U_B 1220 | 2W2W_A 1221 | 2W4S_C 1222 | 2W6H_G 1223 | 2W6H_H 1224 | 2W84_A 1225 | 2WAM_A 1226 | 2WB1_S 1227 | 2WD5_A 1228 | 2WD5_B 1229 | 2WFF_1 1230 | 2WFF_3 1231 | 2WFL_A 1232 | 2WG3_A 1233 | 2WG5_B 1234 | 2WJ0_A 1235 | 2WJ8_D 1236 | 2WJV_D 1237 | 2WL8_A 1238 | 2WMM_A 1239 | 2WO3_B 1240 | 2WQZ_D 1241 | 2WTH_B 1242 | 2WUS_R 1243 | 2WVE_B 1244 | 2WWB_B 1245 | 2WZP_B 1246 | 2X17_L 1247 | 2X1X_E 1248 | 2X1X_R 1249 | 2X53_W 1250 | 2X6T_B 1251 | 2X7X_A 1252 | 2X8K_B 1253 | 2X9A_A 1254 | 2X9A_D 1255 | 2X9Q_A 1256 | 2XCM_F 1257 | 2XGF_C 1258 | 2XGY_A 1259 | 2XPP_A 1260 | 2XRA_A 1261 | 2XRN_B 1262 | 2XSO_H 1263 | 2XVC_A 1264 | 2XVT_F 1265 | 2XWG_B 1266 | 2XWQ_C 1267 | 2XWX_A 1268 | 2XYK_B 1269 | 2Y1H_B 1270 | 2Y3M_B 1271 | 2Y7R_G 1272 | 2Y8Q_A 1273 | 2Y9X_B 1274 | 2Y9X_F 1275 | 2YC2_A 1276 | 2YC2_D 1277 | 2YF3_D 1278 | 2YGD_D 1279 | 2YGG_A 1280 | 2YIL_C 1281 | 2YJE_M 1282 | 2YM9_C 1283 | 2YVE_A 1284 | 2YVL_B 1285 | 2YVS_B 1286 | 2YWM_A 1287 | 2YXO_B 1288 | 2YY7_A 1289 | 2YYS_A 1290 | 2YYV_B 1291 | 2YZE_C 1292 | 2YZJ_A 1293 | 2YZR_B 1294 | 2YZS_B 1295 | 2Z0E_A 1296 | 2Z0L_H 1297 | 2Z0P_D 1298 | 2Z0T_C 1299 | 2Z2L_A 1300 | 2Z2L_B 1301 | 2Z2M_C 1302 | 2Z2R_A 1303 | 2Z2S_A 1304 | 2Z2S_B 1305 | 2Z3F_E 1306 | 2Z3J_A 1307 | 2Z3N_B 1308 | 2Z3Q_B 1309 | 2Z3R_G 1310 | 2Z58_B 1311 | 2Z5C_D 1312 | 2Z5C_E 1313 | 2Z6E_D 1314 | 2Z6J_B 1315 | 2Z7E_A 1316 | 2Z8I_C 1317 | 2Z8I_D 1318 | 2Z8M_B 1319 | 2Z9O_A 1320 | 2ZB9_A 1321 | 2ZBC_F 1322 | 2ZC6_D 1323 | 2ZCN_A 1324 | 2ZET_C 1325 | 2ZFW_E 1326 | 2ZG6_B 1327 | 2ZIH_A 1328 | 2ZJD_C 1329 | 2ZME_A 1330 | 2ZME_B 1331 | 2ZME_C 1332 | 2ZMV_B 1333 | 2ZNJ_B 1334 | 2ZNZ_B 1335 | 2ZQK_M 1336 | 2ZRT_C 1337 | 2ZSI_A 1338 | 2ZSI_B 1339 | 2ZSU_A 1340 | 2ZUQ_A 1341 | 2ZV3_B 1342 | 2ZVY_A 1343 | 2ZW3_E 1344 | 2ZXW_U 1345 | 2ZXX_E 1346 | 2ZXX_F 1347 | 2ZYZ_A 1348 | 2ZYZ_B 1349 | 3A1P_C 1350 | 3A1P_D 1351 | 3A1Y_C 1352 | 3A1Y_G 1353 | 3A2W_J 1354 | 3A4S_D 1355 | 3A5I_A 1356 | 3A6M_A 1357 | 3A7O_D 1358 | 3A98_C 1359 | 3A98_D 1360 | 3AAA_AB 1361 | 3AAA_C 1362 | 3AAD_A 1363 | 3AB0_A 1364 | 3ABD_B 1365 | 3AD8_D 1366 | 3AE8_B 1367 | 3AE8_C 1368 | 3AFK_A 1369 | 3AFQ_D 1370 | 3AGF_A 1371 | 3AJ1_C 1372 | 3AJB_A 1373 | 3AJF_A 1374 | 3AJY_A 1375 | 3ALZ_B 1376 | 3ANS_B 1377 | 3AON_A 1378 | 3AON_B 1379 | 3APY_A 1380 | 3AQQ_B 1381 | 3ASO_B 1382 | 3ASO_I 1383 | 3AXG_H 1384 | 3AXJ_A 1385 | 3AXJ_B 1386 | 3AXY_B 1387 | 3AXY_D 1388 | 3AYH_A 1389 | 3AYH_B 1390 | 3B42_B 1391 | 3B5K_B 1392 | 3B6A_A 1393 | 3B83_G 1394 | 3B9I_B 1395 | 3B9K_F 1396 | 3BAL_B 1397 | 3BDB_B 1398 | 3BEG_A 1399 | 3BEG_B 1400 | 3BEY_D 1401 | 3BFW_C 1402 | 3BG2_B 1403 | 3BJB_B 1404 | 3BK3_C 1405 | 3BL5_C 1406 | 3BLH_B 1407 | 3BM2_B 1408 | 3BN3_B 1409 | 3BNV_E 1410 | 3BNW_A 1411 | 3BP8_D 1412 | 3BPD_F 1413 | 3BQB_X 1414 | 3BRC_B 1415 | 3BRJ_D 1416 | 3BRW_B 1417 | 3BS5_A 1418 | 3BS5_B 1419 | 3BT3_B 1420 | 3BTI_A 1421 | 3BU2_C 1422 | 3BUA_D 1423 | 3BX7_C 1424 | 3BXJ_B 1425 | 3BYW_F 1426 | 3C5W_P 1427 | 3C6M_C 1428 | 3C8I_B 1429 | 3C9A_B 1430 | 3C9G_A 1431 | 3CAE_G 1432 | 3CAZ_B 1433 | 3CEW_D 1434 | 3CF4_G 1435 | 3CFI_A 1436 | 3CFI_B 1437 | 3CG8_B 1438 | 3CGY_A 1439 | 3CI0_J 1440 | 3CI0_K 1441 | 3CJH_F 1442 | 3CJH_I 1443 | 3CJI_C 1444 | 3CKD_C 1445 | 3CO2_D 1446 | 3CQ9_C 1447 | 3CQC_A 1448 | 3CQC_B 1449 | 3CQZ_C 1450 | 3CRC_A 1451 | 3CSY_M 1452 | 3CTM_D 1453 | 3CUE_O 1454 | 3CW2_M 1455 | 3D03_C 1456 | 3D0T_D 1457 | 3D0W_A 1458 | 3D12_B 1459 | 3D1E_A 1460 | 3D1M_D 1461 | 3D36_B 1462 | 3D3K_A 1463 | 3D3N_B 1464 | 3D4I_D 1465 | 3D54_D 1466 | 3D5N_H 1467 | 3D5S_C 1468 | 3D6R_B 1469 | 3D72_B 1470 | 3D87_A 1471 | 3D87_B 1472 | 3D8A_G 1473 | 3D8U_B 1474 | 3DAW_B 1475 | 3DCA_C 1476 | 3DDC_B 1477 | 3DDV_B 1478 | 3DFE_A 1479 | 3DGC_L 1480 | 3DGC_R 1481 | 3DGE_B 1482 | 3DGP_A 1483 | 3DGP_B 1484 | 3DH7_C 1485 | 3DI2_B 1486 | 3DI3_A 1487 | 3DKB_D 1488 | 3DKU_A 1489 | 3DKW_I 1490 | 3DNM_A 1491 | 3DOF_A 1492 | 3DOF_B 1493 | 3DP3_B 1494 | 3DPI_A 1495 | 3DPY_A 1496 | 3DRX_C 1497 | 3DSN_B 1498 | 3DTN_A 1499 | 3DTO_C 1500 | 3DTP_F 1501 | 3DUK_B 1502 | 3DVP_B 1503 | 3DZ2_B 1504 | 3DZM_A 1505 | 3E08_D 1506 | 3E0J_D 1507 | 3E17_A 1508 | 3E48_B 1509 | 3E4C_A 1510 | 3E6M_D 1511 | 3E96_B 1512 | 3EAB_A 1513 | 3EAS_B 1514 | 3ECS_H 1515 | 3EFY_A 1516 | 3EG5_B 1517 | 3EGV_A 1518 | 3EH4_B 1519 | 3EMO_C 1520 | 3ENT_A 1521 | 3EPY_A 1522 | 3ES5_A 1523 | 3ESK_A 1524 | 3EUH_B 1525 | 3EUH_E 1526 | 3EUK_E 1527 | 3EVY_B 1528 | 3EZ0_A 1529 | 3EZJ_C 1530 | 3EZQ_C 1531 | 3EZQ_D 1532 | 3F1I_H 1533 | 3F1I_S 1534 | 3F62_A 1535 | 3F6O_A 1536 | 3F7P_C 1537 | 3F8F_A 1538 | 3F8U_B 1539 | 3F90_F 1540 | 3FA4_G 1541 | 3FAU_B 1542 | 3FBN_D 1543 | 3FCG_A 1544 | 3FD4_B 1545 | 3FF8_D 1546 | 3FFU_B 1547 | 3FJG_D 1548 | 3FJU_B 1549 | 3FK9_A 1550 | 3FKF_A 1551 | 3FLD_B 1552 | 3FLP_N 1553 | 3FMA_C 1554 | 3FN1_B 1555 | 3FOK_H 1556 | 3FPU_A 1557 | 3FPV_A 1558 | 3FQ6_B 1559 | 3FRY_A 1560 | 3FSH_A 1561 | 3FUY_B 1562 | 3FVM_B 1563 | 3FWC_N 1564 | 3FWE_B 1565 | 3FWS_B 1566 | 3FXD_B 1567 | 3FY6_A 1568 | 3G13_A 1569 | 3G33_B 1570 | 3G3B_A 1571 | 3G3Z_B 1572 | 3G5W_E 1573 | 3G67_A 1574 | 3G7K_B 1575 | 3G8L_A 1576 | 3GAA_A 1577 | 3GAS_C 1578 | 3GBU_D 1579 | 3GC3_A 1580 | 3GFK_A 1581 | 3GFK_B 1582 | 3GIF_A 1583 | 3GJB_B 1584 | 3GJO_C 1585 | 3GLC_A 1586 | 3GNI_A 1587 | 3GNI_B 1588 | 3GOR_C 1589 | 3GQI_B 1590 | 3GTY_S 1591 | 3GU3_B 1592 | 3GUY_H 1593 | 3GVI_A 1594 | 3GWH_B 1595 | 3GWL_A 1596 | 3GXW_D 1597 | 3GXY_A 1598 | 3GZ1_A 1599 | 3GZ8_B 1600 | 3H11_A 1601 | 3H2U_A 1602 | 3H2U_B 1603 | 3H3M_A 1604 | 3H4S_E 1605 | 3H6S_E 1606 | 3H90_D 1607 | 3HB1_C 1608 | 3HCS_A 1609 | 3HEF_B 1610 | 3HG9_B 1611 | 3HH0_B 1612 | 3HHH_B 1613 | 3HJP_D 1614 | 3HJU_A 1615 | 3HKL_B 1616 | 3HL4_B 1617 | 3HL6_B 1618 | 3HL9_B 1619 | 3HMX_AB 1620 | 3HO7_A 1621 | 3HPK_A 1622 | 3HQI_A 1623 | 3HQR_A 1624 | 3HR7_B 1625 | 3HRQ_B 1626 | 3HS3_A 1627 | 3HSE_B 1628 | 3HT2_A 1629 | 3HTA_A 1630 | 3HTU_C 1631 | 3HY2_Y 1632 | 3HZ4_A 1633 | 3HZO_A 1634 | 3I5V_A 1635 | 3I71_B 1636 | 3I7U_C 1637 | 3I9S_C 1638 | 3I9V_E 1639 | 3I9V_G 1640 | 3IA0_I 1641 | 3IA3_A 1642 | 3IB6_B 1643 | 3IEC_B 1644 | 3IF8_B 1645 | 3IFQ_C 1646 | 3IGM_B 1647 | 3IKK_A 1648 | 3IMO_D 1649 | 3IOY_A 1650 | 3IR1_C 1651 | 3IX8_D 1652 | 3JVC_A 1653 | 3JXO_B 1654 | 3K1I_B 1655 | 3K1I_C 1656 | 3K25_B 1657 | 3K33_A 1658 | 3K4Y_A 1659 | 3K53_A 1660 | 3K6C_C 1661 | 3K6E_B 1662 | 3K6G_A 1663 | 3K6T_C 1664 | 3K8P_C 1665 | 3KAS_B 1666 | 3KB4_D 1667 | 3KBT_A 1668 | 3KBT_D 1669 | 3KCP_A 1670 | 3KCP_B 1671 | 3KCV_I 1672 | 3KDQ_C 1673 | 3KG8_A 1674 | 3KH8_A 1675 | 3KIH_B 1676 | 3KIK_A 1677 | 3KIN_D 1678 | 3KLS_X 1679 | 3KMH_B 1680 | 3KML_G 1681 | 3KMT_B 1682 | 3KNW_A 1683 | 3KOC_F 1684 | 3KP1_H 1685 | 3KTM_F 1686 | 3KTS_E 1687 | 3KWV_F 1688 | 3KXE_A 1689 | 3KXE_C 1690 | 3KXY_B 1691 | 3KXY_T 1692 | 3KYG_B 1693 | 3KYS_C 1694 | 3KZ1_A 1695 | 3KZH_A 1696 | 3KZI_D 1697 | 3KZI_E 1698 | 3KZQ_A 1699 | 3L0I_C 1700 | 3L0W_B 1701 | 3L6Y_A 1702 | 3L7T_C 1703 | 3L82_A 1704 | 3L82_B 1705 | 3L89_M 1706 | 3L9J_C 1707 | 3L9K_C 1708 | 3LA6_I 1709 | 3LB6_A 1710 | 3LB6_C 1711 | 3LB8_C 1712 | 3LBX_A 1713 | 3LBX_B 1714 | 3LD8_A 1715 | 3LFK_A 1716 | 3LG2_B 1717 | 3LIS_B 1718 | 3LJL_A 1719 | 3LK4_7 1720 | 3LKU_F 1721 | 3LLK_B 1722 | 3LLM_B 1723 | 3LMS_B 1724 | 3LNN_A 1725 | 3LQ6_B 1726 | 3LQV_B 1727 | 3LRJ_D 1728 | 3LRU_A 1729 | 3LSJ_A 1730 | 3LVK_B 1731 | 3LYV_C 1732 | 3M1C_B 1733 | 3M1R_A 1734 | 3M3R_D 1735 | 3M4W_A 1736 | 3M65_A 1737 | 3M6N_B 1738 | 3M85_B 1739 | 3M8E_A 1740 | 3M8J_A 1741 | 3M8N_D 1742 | 3ME4_A 1743 | 3MES_B 1744 | 3MEX_A 1745 | 3MF0_A 1746 | 3MJ7_A 1747 | 3MJ7_B 1748 | 3MJK_Y 1749 | 3MJO_A 1750 | 3MJQ_A 1751 | 3MK7_B 1752 | 3MK7_I 1753 | 3MKQ_F 1754 | 3MKR_B 1755 | 3MLG_A 1756 | 3MOQ_C 1757 | 3MPO_B 1758 | 3MQ0_B 1759 | 3MTS_B 1760 | 3MZK_B 1761 | 3N10_B 1762 | 3N4S_B 1763 | 3N50_F 1764 | 3N7R_B 1765 | 3N7R_C 1766 | 3NAP_B 1767 | 3NAU_B 1768 | 3NE2_A 1769 | 3NEK_B 1770 | 3NFC_E 1771 | 3NGZ_A 1772 | 3NHV_B 1773 | 3NI7_A 1774 | 3NIC_C 1775 | 3NJ2_B 1776 | 3NJC_A 1777 | 3NK3_A 1778 | 3NMZ_C 1779 | 3NO7_A 1780 | 3NPG_C 1781 | 3NR1_B 1782 | 3NRT_C 1783 | 3NUT_C 1784 | 3NWS_D 1785 | 3NXZ_D 1786 | 3NYB_A 1787 | 3NYB_B 1788 | 3O0R_C 1789 | 3O34_A 1790 | 3O61_B 1791 | 3O6B_A 1792 | 3O7X_D 1793 | 3OAK_A 1794 | 3OED_C 1795 | 3OEU_2 1796 | 3OEU_M 1797 | 3OKJ_D 1798 | 3ON0_A 1799 | 3ONA_A 1800 | 3ONA_B 1801 | 3ONL_A 1802 | 3OR1_E 1803 | 3OSY_E 1804 | 3OTE_A 1805 | 3OWT_A 1806 | 3OYF_A 1807 | 3P01_B 1808 | 3P0C_A 1809 | 3P16_A 1810 | 3P52_B 1811 | 3P71_T 1812 | 3P83_B 1813 | 3P83_F 1814 | 3P8B_C 1815 | 3P8B_D 1816 | 3PCC_P 1817 | 3PCQ_C 1818 | 3PCQ_D 1819 | 3PCS_A 1820 | 3PH0_A 1821 | 3PH0_C 1822 | 3PH1_A 1823 | 3PHF_D 1824 | 3PIM_B 1825 | 3PJS_N 1826 | 3PNR_B 1827 | 3PQK_B 1828 | 3PRB_B 1829 | 3PRU_B 1830 | 3PV6_B 1831 | 3PVR_A 1832 | 3PVR_C 1833 | 3Q0V_B 1834 | 3Q0Y_C 1835 | 3Q4H_A 1836 | 3Q4H_B 1837 | 3Q6A_D 1838 | 3Q7G_A 1839 | 3Q9U_A 1840 | 3QB2_B 1841 | 3QBR_A 1842 | 3QBU_A 1843 | 3QC8_B 1844 | 3QFM_A 1845 | 3QGK_J 1846 | 3QIS_A 1847 | 3QNA_D 1848 | 3QNI_A 1849 | 3QOE_A 1850 | 3QP8_D 1851 | 3QPB_F 1852 | 3QQ2_B 1853 | 3QRF_G 1854 | 3QYF_A 1855 | 3R1F_B 1856 | 3R1G_B 1857 | 3R2J_D 1858 | 3R45_A 1859 | 3R45_B 1860 | 3R4D_A 1861 | 3R4D_B 1862 | 3R4Q_D 1863 | 3R6A_B 1864 | 3R8K_B 1865 | 3RBX_B 1866 | 3RDZ_C 1867 | 3REP_A 1868 | 3RF1_A 1869 | 3RGB_G 1870 | 3RHU_B 1871 | 3RJR_C 1872 | 3RKO_K 1873 | 3RPG_B 1874 | 3RPG_C 1875 | 3RTY_G 1876 | 3RV0_B 1877 | 3RVZ_A 1878 | 3S1M_H 1879 | 3S2D_F 1880 | 3S6L_C 1881 | 3S6N_E 1882 | 3S8V_X 1883 | 3S97_A 1884 | 3S97_C 1885 | 3S9C_A 1886 | 3SCH_A 1887 | 3SF4_A 1888 | 3SF5_C 1889 | 3SGB_E 1890 | 3SGQ_I 1891 | 3SIP_E 1892 | 3SJA_J 1893 | 3SJR_A 1894 | 3SL7_B 1895 | 3SM1_A 1896 | 3SPS_A 1897 | 3SQD_A 1898 | 3STB_C 1899 | 3SYN_A 1900 | 3T2N_B 1901 | 3T30_G 1902 | 3T38_A 1903 | 3T3A_A 1904 | 3T3X_A 1905 | 3T4N_A 1906 | 3TAC_B 1907 | 3TEO_J 1908 | 3TG1_B 1909 | 3TG9_B 1910 | 3TGN_B 1911 | 3THT_B 1912 | 3TIX_A 1913 | 3TL8_A 1914 | 3TL8_B 1915 | 3TMK_F 1916 | 3TND_B 1917 | 3TND_E 1918 | 3TNF_B 1919 | 3TPU_A 1920 | 3TS8_C 1921 | 3TSZ_A 1922 | 3TUZ_F 1923 | 3TW8_C 1924 | 3TXQ_D 1925 | 3TY6_E 1926 | 3U52_E 1927 | 3U5Z_A 1928 | 3U61_H 1929 | 3UC0_B 1930 | 3UIN_D 1931 | 3UKV_B 1932 | 3UL4_B 1933 | 3ULA_D 1934 | 3ULQ_A 1935 | 3ULQ_B 1936 | 3UYO_A 1937 | 3UZ0_A 1938 | 3UZ0_B 1939 | 3UZP_A 1940 | 3V3K_A 1941 | 3V3K_B 1942 | 3V6O_A 1943 | 3V6Z_F 1944 | 3VCB_A 1945 | 3VEP_D 1946 | 3VG8_F 1947 | 3VG9_A 1948 | 3VLB_B 1949 | 3W8I_B 1950 | 3WA5_B 1951 | 3WDG_B 1952 | 3WQB_B 1953 | 3ZET_A 1954 | 3ZQP_C 1955 | 3ZTG_B 1956 | 3ZV0_B 1957 | 3ZV0_D 1958 | 3ZYJ_B 1959 | 4A94_D 1960 | 4AWX_B 1961 | 4B4S_A 1962 | 4BD9_B 1963 | 4BFI_A 1964 | 4BFI_B 1965 | 4BKX_A 1966 | 4BQD_A 1967 | 4C7N_B 1968 | 4CAD_I 1969 | 4CT0_B 1970 | 4DBG_A 1971 | 4DBG_B 1972 | 4DID_B 1973 | 4DX8_A 1974 | 4DX8_J 1975 | 4ETW_A 1976 | 4ETW_B 1977 | 4EXP_A 1978 | 4F0A_B 1979 | 4F2M_F 1980 | 4F37_A 1981 | 4F38_B 1982 | 4F48_A 1983 | 4F48_B 1984 | 4F9L_A 1985 | 4FQ0_B 1986 | 4FQ0_C 1987 | 4FQJ_A 1988 | 4G59_C 1989 | 4G6U_A 1990 | 4G6V_A 1991 | 4G6V_B 1992 | 4G80_T 1993 | 4GAM_AFBGCH 1994 | 4GAM_D 1995 | 4GH7_A 1996 | 4GH7_B 1997 | 4GI3_C 1998 | 4H88_A 1999 | 4HCP_A 2000 | 4HEP_A 2001 | 4HX3_A 2002 | 4HX3_BD 2003 | 4I6L_A 2004 | 4IC7_B 2005 | 4IYP_A 2006 | 4IZ7_B 2007 | 4J4P_A 2008 | 4JCV_E 2009 | 4JEU_B 2010 | 4JW3_A 2011 | 4K1R_A 2012 | 4K24_A 2013 | 4K2U_B 2014 | 4KDI_C 2015 | 4KFZ_A 2016 | 4KT3_A 2017 | 4LLO_B 2018 | 4LU5_A 2019 | 4LU5_IM 2020 | 4M5F_A 2021 | 4M6B_A 2022 | 4MI8_A 2023 | 4MJS_A 2024 | 4MJS_B 2025 | 4MRT_A 2026 | 4MRT_C 2027 | 4MWF_C 2028 | 4N7Z_A 2029 | 4NIQ_A 2030 | 4NL9_A 2031 | 4NOO_A 2032 | 4NOO_B 2033 | 4NP4_A 2034 | 4OFY_A 2035 | 4OII_A 2036 | 4OKV_E 2037 | 4ONS_A 2038 | 4ONS_B 2039 | 4OVN_F 2040 | 4PBW_B 2041 | 4PLJ_B 2042 | 4PLO_B 2043 | 4PW9_A 2044 | 4PW9_B 2045 | 4PZ6_A 2046 | 4Q6I_J 2047 | 4QT8_C 2048 | 4QXA_B 2049 | 4RDQ_E 2050 | 4RS1_B 2051 | 4RWS_C 2052 | 4RWT_D 2053 | 4TQ1_A 2054 | 4U0Q_B 2055 | 4U1G_A 2056 | 4U4C_B 2057 | 4UEM_A 2058 | 4UEM_B 2059 | 4UF1_A 2060 | 4UI1_D 2061 | 4V2C_A 2062 | 4V2C_B 2063 | 4WFF_A 2064 | 4XAK_A 2065 | 4XLW_B 2066 | 4XWJ_A 2067 | 4XXB_A 2068 | 4YH7_B 2069 | 4YN0_A 2070 | 4YPI_C 2071 | 4YWC_A 2072 | 4ZGQ_A 2073 | 4ZGY_B 2074 | 4ZII_A 2075 | 4ZLT_B 2076 | 4ZRJ_B 2077 | 4ZRP_C 2078 | 4ZSO_E 2079 | 5ABV_B 2080 | 5AYS_A 2081 | 5B64_A 2082 | 5B64_B 2083 | 5B76_A 2084 | 5BNQ_A 2085 | 5C8J_I 2086 | 5CTR_A 2087 | 5CTR_C 2088 | 5CZF_D 2089 | 5CZX_A 2090 | 5D1Z_I 2091 | 5D8J_A 2092 | 5D93_A 2093 | 5DFW_A 2094 | 5DHV_N 2095 | 5DOB_A 2096 | 5DOB_B 2097 | 5EE5_A 2098 | 5EO9_B 2099 | 5ESV_E 2100 | 5F3X_B 2101 | 5F5S_A 2102 | 5F5S_B 2103 | 5FB8_C 2104 | 5GPG_B 2105 | 5H35_E 2106 | 5HBT_B 2107 | 5HDQ_A 2108 | 5IKC_M 2109 | 5IP4_E 2110 | 5IUS_A 2111 | 5IUS_C 2112 | 5J4A_A 2113 | 5J4A_B 2114 | 5JQ6_A 2115 | 5K59_A 2116 | 5KTE_A 2117 | 5LSP_P 2118 | 5LXQ_A 2119 | 5SZJ_B 2120 | 5VEB_X 2121 | 5W2B_A 2122 | 5XBM_C 2123 | 5YOY_C 2124 | 6BPE_D 2125 | 6CK9_B 2126 | 6CYF_Q 2127 | 6ELU_D 2128 | 1A99_D 2129 | 1AUI_A 2130 | 1BO6_A 2131 | 1BOU_B 2132 | 1BPL_B 2133 | 1C7N_D 2134 | 1CS0_B 2135 | 1DM5_F 2136 | 1EFV_A 2137 | 1EHI_A 2138 | 1EV7_A 2139 | 1EWY_A 2140 | 1EZ1_B 2141 | 1F06_A 2142 | 1F6M_A 2143 | 1FFV_C 2144 | 1FR8_A 2145 | 1FT8_C 2146 | 1GP2_BG 2147 | 1GX7_A 2148 | 1H6D_A 2149 | 1HF2_C 2150 | 1I2M_B 2151 | 1IUG_A 2152 | 1IXS_B 2153 | 1IZ1_B 2154 | 1J0E_B 2155 | 1JR3_D 2156 | 1JTD_B 2157 | 1K3R_B 2158 | 1KIZ_B 2159 | 1KYQ_B 2160 | 1LEH_B 2161 | 1M1T_B 2162 | 1M32_D 2163 | 1MB2_F 2164 | 1NCA_N 2165 | 1ND6_B 2166 | 1O4U_B 2167 | 1O57_D 2168 | 1O61_A 2169 | 1OMZ_B 2170 | 1ONW_B 2171 | 1P6X_A 2172 | 1P9E_A 2173 | 1PK8_A 2174 | 1POI_A 2175 | 1R0K_D 2176 | 1R8J_A 2177 | 1RQD_A 2178 | 1SOJ_J 2179 | 1ST0_A 2180 | 1SUW_D 2181 | 1SXJ_D 2182 | 1SXJ_E 2183 | 1SZ2_B 2184 | 1TO6_A 2185 | 1U0R_C 2186 | 1U2V_C 2187 | 1UL1_X 2188 | 1V7C_A 2189 | 1VHK_C 2190 | 1W1W_B 2191 | 1W23_A 2192 | 1WDW_H 2193 | 1WKH_B 2194 | 1WKV_B 2195 | 1WX1_B 2196 | 1X7O_A 2197 | 1X9J_G 2198 | 1XF9_A 2199 | 1XG2_A 2200 | 1XI8_A 2201 | 1XQS_A 2202 | 1XXI_C 2203 | 1XXI_E 2204 | 1Y56_B 2205 | 1Y8Q_A 2206 | 1YKJ_A 2207 | 1Z85_B 2208 | 1ZCT_B 2209 | 1ZH8_A 2210 | 1ZHH_A 2211 | 1ZXO_C 2212 | 2AF4_C 2213 | 2AUN_A 2214 | 2AW6_A 2215 | 2AYO_A 2216 | 2B3Z_D 2217 | 2BP7_B 2218 | 2C0L_A 2219 | 2CE8_A 2220 | 2CH5_A 2221 | 2CVO_B 2222 | 2DG0_E 2223 | 2E2P_A 2224 | 2E5F_A 2225 | 2E7J_B 2226 | 2E89_A 2227 | 2EJW_A 2228 | 2EP5_B 2229 | 2F4M_A 2230 | 2F4N_C 2231 | 2FV2_C 2232 | 2GSZ_F 2233 | 2GVQ_C 2234 | 2GZA_A 2235 | 2HYX_C 2236 | 2I3T_A 2237 | 2I7N_B 2238 | 2IJZ_G 2239 | 2IP2_B 2240 | 2J0Q_B 2241 | 2J5T_G 2242 | 2J6X_G 2243 | 2MTA_HL 2244 | 2OBN_D 2245 | 2OGJ_E 2246 | 2OOR_AB 2247 | 2OZK_B 2248 | 2PMS_A 2249 | 2PP1_A 2250 | 2PVP_A 2251 | 2QDH_A 2252 | 2QFC_A 2253 | 2QGI_A 2254 | 2QXV_A 2255 | 2QYO_A 2256 | 2R87_A 2257 | 2R8Q_B 2258 | 2UX8_C 2259 | 2V9P_G 2260 | 2VN8_A 2261 | 2VUN_C 2262 | 2WPX_B 2263 | 2WUS_A 2264 | 2WVM_A 2265 | 2X2E_D 2266 | 2X5D_D 2267 | 2XWT_C 2268 | 2Y0M_A 2269 | 2Y5B_A 2270 | 2YCH_A 2271 | 2Z4R_B 2272 | 2Z50_A 2273 | 2Z71_C 2274 | 2ZBK_A 2275 | 2ZIU_A 2276 | 2ZIU_B 2277 | 2ZUC_B 2278 | 3AB1_B 2279 | 3AUY_A 2280 | 3B5U_J 2281 | 3BH6_B 2282 | 3BM5_A 2283 | 3BP8_AB 2284 | 3BT1_U 2285 | 3BWO_D 2286 | 3C0B_C 2287 | 3C0K_B 2288 | 3C3J_F 2289 | 3C48_A 2290 | 3CQ6_A 2291 | 3D6K_B 2292 | 3DDM_B 2293 | 3DHW_D 2294 | 3DP7_A 2295 | 3DZ2_A 2296 | 3E5P_C 2297 | 3E9M_A 2298 | 3EIQ_C 2299 | 3ES8_H 2300 | 3EZ6_B 2301 | 3FHC_A 2302 | 3G8Q_C 2303 | 3GZT_O 2304 | 3H6G_B 2305 | 3H9G_A 2306 | 3HE3_F 2307 | 3HLI_A 2308 | 3HMK_B 2309 | 3HPV_B 2310 | 3HWS_D 2311 | 3HXJ_A 2312 | 3IF8_A 2313 | 3IGF_B 2314 | 3IO1_A 2315 | 3ISL_B 2316 | 3IX1_B 2317 | 3JSK_G 2318 | 3K5H_A 2319 | 3KKI_A 2320 | 3KL9_J 2321 | 3L9W_B 2322 | 3LEE_A 2323 | 3LKU_E 2324 | 3LMA_C 2325 | 3LVK_AC 2326 | 3M2T_A 2327 | 3MCA_B 2328 | 3MGC_A 2329 | 3MKR_A 2330 | 3MZK_D 2331 | 3NND_C 2332 | 3NTQ_A 2333 | 3NVN_A 2334 | 3NVV_B 2335 | 3O5T_A 2336 | 3OQB_D 2337 | 3OV3_B 2338 | 3P9I_C 2339 | 3PUZ_B 2340 | 3PWS_A 2341 | 3QE9_Y 2342 | 3QKW_C 2343 | 3QML_D 2344 | 3QW2_B 2345 | 3R0Q_A 2346 | 3RAM_D 2347 | 3RFH_A 2348 | 3RHF_B 2349 | 3S5U_E 2350 | 3SF5_D 2351 | 3SJA_I 2352 | 3SN6_A 2353 | 3SYL_A 2354 | 3SZP_A 2355 | 3T5P_A 2356 | 3THO_A 2357 | 3THO_B 2358 | 3TII_B 2359 | 3TQC_B 2360 | 3TWO_B 2361 | 3U5Z_B 2362 | 3UI2_A 2363 | 3UK7_A 2364 | 3V5N_C 2365 | 3VGK_E 2366 | 3VH0_D 2367 | 3VH3_A 2368 | 3ZWL_B 2369 | 4BKX_B 2370 | 4DVG_B 2371 | 4V0O_F 2372 | 4XL5_C 2373 | 4Y61_B 2374 | 4YEB_A 2375 | 4ZGY_A 2376 | 4ZRJ_A 2377 | 5BV7_A 2378 | 5TIH_A 2379 | 5XIM_A 2380 | 7MDH_B 2381 | --------------------------------------------------------------------------------