├── .dockerignore ├── Dockerfile ├── LICENCE ├── README.md ├── datasets └── custom │ ├── 1a05B │ ├── 1a05B.pdb │ └── pockets │ │ ├── pocket0_atm.pdb │ │ ├── pocket0_vert.pqr │ │ ├── pocket1_atm.pdb │ │ ├── pocket1_vert.pqr │ │ ├── pocket2_atm.pdb │ │ ├── pocket2_vert.pqr │ │ ├── pocket3_atm.pdb │ │ ├── pocket3_vert.pqr │ │ ├── pocket4_atm.pdb │ │ ├── pocket4_vert.pqr │ │ ├── pocket5_atm.pdb │ │ ├── pocket5_vert.pqr │ │ ├── pocket6_atm.pdb │ │ └── pocket6_vert.pqr │ ├── 1a9t │ ├── 1a9t.pdb │ ├── 1a9t_clean.pdb │ ├── 1a9t_lig_1.pdb │ └── 1a9t_site_1.pdb │ └── pairs.csv ├── datasets_downloader.sh ├── deeplytough ├── datasets │ ├── __init__.py │ ├── custom.py │ ├── prospeccts.py │ ├── toughm1.py │ └── vertex.py ├── engine │ ├── datasets.py │ ├── models.py │ └── predictor.py ├── matchers │ ├── __init__.py │ ├── deeply_tough.py │ ├── pocket_matcher.py │ └── tough_officials.py ├── misc │ ├── cc_ligands.py │ ├── ligand_extract.py │ └── utils.py └── scripts │ ├── custom_evaluation.py │ ├── prospeccts_benchmark.py │ ├── toughm1_benchmark.py │ ├── train.py │ └── vertex_benchmark.py ├── networks ├── deeplytough_prospeccts.pth.tar ├── deeplytough_toughm1_test.pth.tar └── deeplytough_vertex.pth.tar ├── overview.png ├── requirements.txt └── results ├── Prospeccts-DeeplyTough-deeplytough_prospeccts-P1.2.csv ├── Prospeccts-DeeplyTough-deeplytough_prospeccts-P1.csv ├── Prospeccts-DeeplyTough-deeplytough_prospeccts-P2.csv ├── Prospeccts-DeeplyTough-deeplytough_prospeccts-P3.csv ├── Prospeccts-DeeplyTough-deeplytough_prospeccts-P4.csv ├── Prospeccts-DeeplyTough-deeplytough_prospeccts-P5.2.csv ├── Prospeccts-DeeplyTough-deeplytough_prospeccts-P5.csv ├── Prospeccts-DeeplyTough-deeplytough_prospeccts-P6.2.csv ├── Prospeccts-DeeplyTough-deeplytough_prospeccts-P6.csv ├── Prospeccts-DeeplyTough-deeplytough_prospeccts-P7.csv ├── ToughM1-DeeplyTough-deeplytough_toughm1_test.csv └── Vertex-DeeplyTough-deeplytough_vertex.csv /.dockerignore: -------------------------------------------------------------------------------- 1 | datasets -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04 2 | SHELL ["/bin/bash", "-c"] 3 | 4 | # APT dependencies 5 | RUN apt-get update && apt-get install -y \ 6 | apt-utils \ 7 | bzip2 \ 8 | ca-certificates \ 9 | git \ 10 | curl \ 11 | sysstat \ 12 | wget \ 13 | unzip \ 14 | # for fpocket 15 | libnetcdf-dev && \ 16 | apt-get clean 17 | 18 | RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-4.5.4-Linux-x86_64.sh -O ~/miniconda.sh && \ 19 | /bin/bash ~/miniconda.sh -b -p /miniconda && \ 20 | rm ~/miniconda.sh && \ 21 | /miniconda/bin/conda clean -tipsy && \ 22 | ln -s /miniconda/etc/profile.d/conda.sh /etc/profile.d/conda.sh && \ 23 | echo ". /miniconda/etc/profile.d/conda.sh" >> ~/.bashrc && \ 24 | echo "conda activate base" >> ~/.bashrc 25 | ENV PATH=/miniconda/bin:${PATH} 26 | 27 | # setup python 3 env 28 | RUN conda update -y -q conda && \ 29 | conda create -y -n deeplytough python=3.6 30 | 31 | # setup python 2 env 32 | RUN conda create -y -n deeplytough_mgltools python=2.7 33 | RUN conda install -y -n deeplytough_mgltools -c bioconda mgltools=1.5.6 34 | 35 | # Source code 36 | ADD . /app 37 | WORKDIR /app 38 | ENV PYTHONPATH=/app/deeplytough:$PYTHONPATH 39 | 40 | # htmd 41 | RUN apt-get -y install openbabel 42 | RUN source activate deeplytough; \ 43 | curl -LO https://github.com/Acellera/htmd/archive/refs/tags/1.13.10.tar.gz && \ 44 | tar -xvzf 1.13.10.tar.gz && rm 1.13.10.tar.gz && cd htmd-1.13.10 && \ 45 | python setup.py install && \ 46 | cd .. && \ 47 | rm -rf htmd-1.13.10; 48 | 49 | 50 | RUN source activate deeplytough; \ 51 | pip install --upgrade pip; \ 52 | pip install --no-cache-dir -r /app/requirements.txt \ 53 | pip install --ignore-installed llvmlite==0.28 54 | 55 | # rot covariant convolutions (includes also the 'experiments' code) 56 | RUN source activate deeplytough; \ 57 | git clone https://github.com/mariogeiger/se3cnn && \ 58 | cd se3cnn && \ 59 | git reset --hard 6b976bea4ea17e1bd5655f0f030c6e2bb1637b57 && \ 60 | mv experiments se3cnn; sed -i "s/exclude=\['experiments\*'\]//g" setup.py && \ 61 | python setup.py install && \ 62 | cd .. && \ 63 | rm -rf se3cnn; 64 | RUN source activate deeplytough; \ 65 | git clone https://github.com/AMLab-Amsterdam/lie_learn && \ 66 | cd lie_learn && python setup.py install && cd .. && rm -rf lie_learn 67 | 68 | # fpocket2 69 | RUN curl -LO -k https://netcologne.dl.sourceforge.net/project/fpocket/fpocket2.tar.gz && \ 70 | tar -xvzf fpocket2.tar.gz && rm fpocket2.tar.gz && cd fpocket2 && \ 71 | sed -i 's/\$(LFLAGS) \$\^ -o \$@/\$\^ -o \$@ \$(LFLAGS)/g' makefile && make && \ 72 | mv bin/fpocket bin/fpocket2 && mv bin/dpocket bin/dpocket2 && mv bin/mdpocket bin/mdpocket2 && mv bin/tpocket bin/tpocket2 73 | ENV PATH=/app/fpocket2/bin:${PATH} 74 | -------------------------------------------------------------------------------- /LICENCE: -------------------------------------------------------------------------------- 1 | (c) BenevolentAI Limited 2019. All rights reserved. 2 | For licensing enquiries, please contact hello@benevolent.ai 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DeeplyTough 2 | 3 | This is the official PyTorch implementation of our paper *DeeplyTough: Learning Structural Comparison of Protein Binding Sites*, available from . 4 | 5 | ![DeeplyTough overview figure](overview.png?raw=true "DeeplyTough overview figure.") 6 | 7 | ## Setup 8 | 9 | ### Code setup 10 | 11 | The software is ready for Docker: the image can be created from `Dockerfile` by running `docker build -t deeplytough .` (image size ~4.7GB so you may have to increase the disk space available to docker). The DeeplyTough tool is then accessible within `deeplytough` conda environment inside the container with `source activate deeplytough`. 12 | 13 | Alternatively, environment `deeplytough` can be created inside local [conda](https://conda.io/en/latest/miniconda.html) by executing the following steps from the root of this repository (linux only): 14 | 15 | ```bash 16 | # create new python 3 env and activate 17 | conda create -y -n deeplytough python=3.6 18 | conda activate deeplytough 19 | 20 | # install legacy version of htmd from source 21 | curl -LO https://github.com/Acellera/htmd/archive/refs/tags/1.13.10.tar.gz && \ 22 | tar -xvzf 1.13.10.tar.gz && rm 1.13.10.tar.gz && cd htmd-1.13.10 && \ 23 | python setup.py install && \ 24 | cd .. && \ 25 | rm -rf htmd-1.13.10; 26 | 27 | # install remaining python3 reqs 28 | apt-get -y install openbabel 29 | pip install --upgrade pip && pip install -r requirements.txt && pip install --ignore-installed llvmlite==0.28 30 | 31 | # install legacy se3nn library from source 32 | git clone https://github.com/mariogeiger/se3cnn && cd se3cnn && git reset --hard 6b976bea4ea17e1bd5655f0f030c6e2bb1637b57 && mv experiments se3cnn; sed -i "s/exclude=\['experiments\*'\]//g" setup.py && python setup.py install && cd .. && rm -rf se3cnn 33 | git clone https://github.com/AMLab-Amsterdam/lie_learn && cd lie_learn && python setup.py install && cd .. && rm -rf lie_learn 34 | 35 | # create python2 env used for protein structure preprocessing 36 | conda create -y -n deeplytough_mgltools python=2.7 37 | conda install -y -n deeplytough_mgltools -c bioconda mgltools=1.5.6 38 | ``` 39 | 40 | ### Dataset setup 41 | 42 | #### Training and benchmark datasets 43 | 44 | The tool comes with built-in support for three datasets: TOUGH-M1 (Govindaraj and Brylinski, 2018), Vertex (Chen et al., 2016), and ProSPECCTs (Ehrt et al., 2018). These datasets must be downloaded if one wishes to either retrain the network or evaluate on one of these benchmarks. The datasets can be prepared in two steps: 45 | 46 | 1. Set `STRUCTURE_DATA_DIR` environment variable to a directory that will contain the datasets (about 27 GB): `export STRUCTURE_DATA_DIR=/path_to_a_dir` 47 | 2. Run `datasets_downloader.sh` from the root of this repository and get yourself a coffee 48 | 49 | This will download PDB files, extracted pockets and pre-process input features. It will also download lists of pocket pairs provided by the respective dataset authors. By downloading Prospeccts, you accept their [terms of use](http://www.ccb.tu-dortmund.de/ag-koch/prospeccts/license_en.pdf). 50 | 51 | Note that this is a convenience and we also provide code for data pre-processing: in case one wishes to start from the respective base datasets, pre-processing may be triggered using the `--db_preprocessing 1` flag when running any of our training and evaluation scripts. For the TOUGH-M1 dataset in particular, fpocket2 is required and can be installed as follows: 52 | ```bash 53 | curl -O -L https://netcologne.dl.sourceforge.net/project/fpocket/fpocket2.tar.gz && tar -xvzf fpocket2.tar.gz && rm fpocket2.tar.gz && cd fpocket2 && sed -i 's/\$(LFLAGS) \$\^ -o \$@/\$\^ -o \$@ \$(LFLAGS)/g' makefile && make && mv bin/fpocket bin/fpocket2 && mv bin/dpocket bin/dpocket2 && mv bin/mdpocket bin/mdpocket2 && mv bin/tpocket bin/tpocket2 54 | ``` 55 | 56 | #### Custom datasets 57 | 58 | The tool also supports an easy way of computing pocket distances for a user-defined set of pocket pairs. This requires providing i) a set of PDB structures, ii) pockets in PDB format (extracted around bound ligands or detected using any pocket detection algorithm), iii) a CSV file defining the pairing. A toy custom dataset example is provided in `datasets/custom`. The CSV file contains a quadruplet on each line indicating pairs to evaluate: `relative_path_to_pdbA, relative_path_to_pocketA, relative_path_to_pdbB, relative_path_to_pocketB`, where paths are relative to the directory containing the CSV file and the pdb extension may be omitted. `STRUCTURE_DATA_DIR` environment variable must be set to the parent directory containing the custom dataset (in the example `/path_to_this_repository/datasets`). 59 | 60 | ### Environment setup 61 | 62 | To run the evaluation and training scripts, please first set the `DEEPLYTOUGH` environment variable to the directory containing this repository and then update the `PYTHONPATH` and `PATH` variables respectively: 63 | ```bash 64 | export DEEPLYTOUGH=/path_to_this_repository 65 | export PYTHONPATH=$DEEPLYTOUGH/deeplytough:$PYTHONPATH 66 | export PATH=$DEEPLYTOUGH/fpocket2/bin:$PATH 67 | ``` 68 | 69 | ## Evaluation 70 | 71 | We provide pre-trained networks in the `networks` directory in this repository. The following commands assume a GPU and a 4-core CPU available; use `--device 'cpu'` if there is no GPU and set `--nworkers` parameter accordingly if there are fewer cores available. 72 | 73 | * Evaluation on TOUGH-M1: 74 | ```bash 75 | python $DEEPLYTOUGH/deeplytough/scripts/toughm1_benchmark.py --output_dir $DEEPLYTOUGH/results --device 'cuda:0' --nworkers 4 --net $DEEPLYTOUGH/networks/deeplytough_toughm1_test.pth.tar 76 | ``` 77 | 78 | * Evaluation on Vertex: 79 | ```bash 80 | python $DEEPLYTOUGH/deeplytough/scripts/vertex_benchmark.py --output_dir $DEEPLYTOUGH/results --device 'cuda:0' --nworkers 4 --net $DEEPLYTOUGH/networks/deeplytough_vertex.pth.tar 81 | ``` 82 | 83 | * Evaluation on ProSPECCTs: 84 | ```bash 85 | python $DEEPLYTOUGH/deeplytough/scripts/prospeccts_benchmark.py --output_dir $DEEPLYTOUGH/results --device 'cuda:0' --nworkers 4 --net $DEEPLYTOUGH/networks/deeplytough_prospeccts.pth.tar 86 | ``` 87 | 88 | * Evaluation on a custom dataset, located in `$STRUCTURE_DATA_DIR/some_custom_name` directory: 89 | ```bash 90 | python $DEEPLYTOUGH/deeplytough/scripts/custom_evaluation.py --dataset_subdir 'some_custom_name' --output_dir $DEEPLYTOUGH/results --device 'cuda:0' --nworkers 4 --net $DEEPLYTOUGH/networks/deeplytough_toughm1_test.pth.tar 91 | ``` 92 | Note that networks `deeplytough_prospeccts.pth.tar` and `deeplytough_vertex.pth.tar` may also be used, producing different results. 93 | 94 | Each of these commands will output to `$DEEPLYTOUGH/results` a CSV file with the resulting similarity scores (negative distances) as well as a pickle file with more detailed results (please see the code). The CSV files are already provided in this repository for conveniency. 95 | 96 | 97 | ## Training 98 | 99 | Training requires a GPU with >=11GB of memory and takes about 1.5 days on recent hardware. In addition, at least a 4-core CPU is recommended due to volumetric input pre-processing being an expensive task. 100 | 101 | * Training for TOUGH-M1 evaluation: 102 | ```bash 103 | python $DEEPLYTOUGH/deeplytough/scripts/train.py --output_dir $DEEPLYTOUGH/results/TTTT_forTough --device 'cuda:0' --seed 4 104 | ``` 105 | 106 | * Training for Vertex evaluation: 107 | ```bash 108 | python $DEEPLYTOUGH/deeplytough/scripts/train.py --output_dir $DEEPLYTOUGH/results/TTTT_forVertex --device 'cuda:0' --db_exclude_vertex 'uniprot' --db_split_strategy 'none' 109 | ``` 110 | 111 | * Training for ProSPECCTs evaluation: 112 | ```bash 113 | python $DEEPLYTOUGH/deeplytough/scripts/train.py --output_dir $DEEPLYTOUGH/results/TTTT_forProspeccts --device 'cuda:0' --db_exclude_prospeccts 'uniprot' --db_split_strategy 'none' --model_config 'se_4_4_4_4_7_3_2_batch_1,se_8_8_8_8_3_1_1_batch_1,se_16_16_16_16_3_1_2_batch_1,se_32_32_32_32_3_0_1_batch_1,se_256_0_0_0_3_0_2_batch_1,r,b,c_128_1' 114 | ``` 115 | 116 | Note that due to non-determinism inherent to the currently established process of training deep networks, it is nearly impossible to exactly reproduce the pre-trained networks in `networks` directory. 117 | 118 | Also note the convenience of an output directory containing "TTTT" will afford this substring being replaced by the current `datetime`. 119 | 120 | ## Changelog 121 | 122 | - 23.02.2020: Updated code to follow our revised [JCIM paper](https://pubs.acs.org/doi/abs/10.1021/acs.jcim.9b00554), in particular away moving from UniProt-based splitting strategy as in our [BioRxiv](https://www.biorxiv.org/content/10.1101/600304v1) paper to sequence-based clustering approach whereby protein structures sharing more than 30% sequence identity are always allocated to the same testing/training set. We have also made data pre-processing more robust and frozen the versions of several dependencies. The old code is kept in `old_bioarxiv_version` branch, though note the legacy splitting behavior can be turned on also in the current `master` by setting `--db_split_strategy` command line argument in the scripts to `uniprot_folds` instead of `seqclust`. 123 | - 08.12.2020: pinned versions of requirements and updated DockerFile and README to reflect build instructions 124 | - 28.09.2021: replaced conda htmd with source build in dockerfile to relieve dependency solver (patched: 2.12.2021, also added biopython fn to remove non-protein atoms instead of VMD which is deprecated) 125 | 126 | ## License Terms 127 | 128 | (c) BenevolentAI Limited 2019. All rights reserved.
129 | For licensing enquiries, please contact hello@benevolent.ai 130 | -------------------------------------------------------------------------------- /datasets/custom/1a05B/pockets/pocket0_atm.pdb: -------------------------------------------------------------------------------- 1 | HEADER 2 | HEADER This is a pdb format file writen by the programm fpocket. 3 | HEADER It represents the atoms contacted by the voronoi vertices of the pocket. 4 | HEADER 5 | HEADER Information about the pocket 1: 6 | HEADER 0 - Pocket Score : 31.2963 7 | HEADER 1 - Drug Score : 0.7720 8 | HEADER 2 - Number of V. Vertices : 123 9 | HEADER 3 - Mean alpha-sphere radius : 4.5479 10 | HEADER 4 - Mean alpha-sphere SA : 0.5003 11 | HEADER 5 - Mean B-factor : 0.0000 12 | HEADER 6 - Hydrophobicity Score : 38.3478 13 | HEADER 7 - Polarity Score : 6 14 | HEADER 8 - Volume Score : 4.2174 15 | HEADER 9 - Real volume (approximation) : 1388.1013 16 | HEADER 10 - Charge Score : -3 17 | HEADER 11 - Local hydrophobic density Score : 50.3896 18 | HEADER 12 - Number of apolar alpha sphere : 77 19 | HEADER 13 - Proportion of apolar alpha sphere : 0.6260 20 | ATOM 777 CB GLN B 108 8.717 39.545 17.097 1.00 0.00 C 0 21 | ATOM 800 CZ PHE B 110 4.139 41.627 12.679 1.00 0.00 C 0 22 | ATOM 798 CE1 PHE B 110 4.062 41.402 14.048 1.00 0.00 C 0 23 | ATOM 778 CG GLN B 108 9.239 40.935 16.764 1.00 0.00 C 0 24 | ATOM 941 OD1 ASP B 128 13.380 37.971 17.376 1.00 0.00 O 0 25 | ATOM 780 OE1 GLN B 108 11.355 40.912 17.879 1.00 0.00 O 0 26 | ATOM 942 OD2 ASP B 128 14.176 39.950 16.880 1.00 0.00 O 0 27 | ATOM 1371 NH1 ARG B 181 14.113 34.782 16.056 1.00 0.00 N 0 28 | ATOM 2306 NH1 ARG B 304 10.055 34.175 7.392 1.00 0.00 N 0 29 | ATOM 2414 OE1 GLU B 317 9.414 37.896 6.723 1.00 0.00 O 0 30 | ATOM 2307 NH2 ARG B 304 11.172 35.688 6.069 1.00 0.00 N 0 31 | ATOM 2305 CZ ARG B 304 11.065 34.463 6.577 1.00 0.00 C 0 32 | ATOM 2415 OE2 GLU B 317 8.282 36.351 7.779 1.00 0.00 O 0 33 | ATOM 776 O GLN B 108 5.777 39.958 18.438 1.00 0.00 O 0 34 | ATOM 2013 CD PRO B 263 3.471 34.893 14.055 1.00 0.00 C 0 35 | ATOM 2006 CD2 LEU B 262 3.538 36.582 18.127 1.00 0.00 C 0 36 | ATOM 2410 O GLU B 317 5.623 42.496 6.884 1.00 0.00 O 0 37 | ATOM 799 CE2 PHE B 110 4.198 42.930 12.197 1.00 0.00 C 0 38 | ATOM 2411 CB GLU B 317 7.222 39.643 6.556 1.00 0.00 C 0 39 | ATOM 2440 OE1 GLN B 321 6.826 46.002 11.208 1.00 0.00 O 0 40 | ATOM 2254 CD2 LEU B 297 3.915 38.315 9.071 1.00 0.00 C 0 41 | ATOM 2412 CG GLU B 317 7.039 38.173 6.901 1.00 0.00 C 0 42 | ATOM 2431 CG1 VAL B 320 1.710 42.194 9.271 1.00 0.00 C 0 43 | ATOM 2413 CD GLU B 317 8.341 37.432 7.160 1.00 0.00 C 0 44 | ATOM 797 CD2 PHE B 110 4.180 44.005 13.089 1.00 0.00 C 0 45 | ATOM 795 CG PHE B 110 4.100 43.795 14.461 1.00 0.00 C 0 46 | ATOM 796 CD1 PHE B 110 4.044 42.485 14.931 1.00 0.00 C 0 47 | ATOM 766 CG PRO B 106 7.625 33.484 14.857 1.00 0.00 C 0 48 | ATOM 2253 CD1 LEU B 297 2.745 38.570 11.265 1.00 0.00 C 0 49 | ATOM 2012 CG PRO B 263 4.235 34.182 12.937 1.00 0.00 C 0 50 | ATOM 1994 O MET B 261 1.567 37.668 14.307 1.00 0.00 O 0 51 | ATOM 764 O PRO B 106 6.477 34.748 17.432 1.00 0.00 O 0 52 | ATOM 765 CB PRO B 106 8.798 33.959 15.639 1.00 0.00 C 0 53 | ATOM 807 CD PRO B 111 7.053 47.000 15.315 1.00 0.00 C 0 54 | ATOM 785 O ILE B 109 7.420 44.109 17.781 1.00 0.00 O 0 55 | ATOM 791 CA PHE B 110 5.252 45.064 16.357 1.00 0.00 C 0 56 | ATOM 773 N GLN B 108 7.613 38.097 18.746 1.00 0.00 N 0 57 | ATOM 1361 O GLY B 180 19.899 34.614 12.174 1.00 0.00 O 0 58 | ATOM 2353 OE2 GLU B 310 13.419 35.517 4.107 1.00 0.00 O 0 59 | ATOM 2304 NE ARG B 304 11.944 33.524 6.233 1.00 0.00 N 0 60 | ATOM 1367 CG ARG B 181 17.391 33.770 14.986 1.00 0.00 C 0 61 | ATOM 2316 CE1 HIS B 305 12.013 28.487 8.593 1.00 0.00 C 0 62 | ATOM 1368 CD ARG B 181 16.338 33.013 15.800 1.00 0.00 C 0 63 | ATOM 1342 O ALA B 177 18.601 30.684 14.560 1.00 0.00 O 0 64 | ATOM 2303 CD ARG B 304 11.899 32.134 6.677 1.00 0.00 C 0 65 | ATOM 1343 CB ALA B 177 16.484 28.164 14.915 1.00 0.00 C 0 66 | ATOM 1340 CA ALA B 177 17.820 28.434 14.246 1.00 0.00 C 0 67 | ATOM 958 CD2 LEU B 130 13.462 30.218 15.915 1.00 0.00 C 0 68 | ATOM 2317 NE2 HIS B 305 10.935 29.188 8.896 1.00 0.00 N 0 69 | ATOM 957 CD1 LEU B 130 11.044 30.704 16.260 1.00 0.00 C 0 70 | ATOM 2280 CE MET B 301 7.937 30.342 11.637 1.00 0.00 C 0 71 | ATOM 746 CB LEU B 104 9.192 27.445 14.321 1.00 0.00 C 0 72 | ATOM 748 CD1 LEU B 104 10.708 26.844 12.446 1.00 0.00 C 0 73 | ATOM 2250 O LEU B 297 3.914 33.886 8.641 1.00 0.00 O 0 74 | ATOM 2251 CB LEU B 297 2.622 36.389 10.004 1.00 0.00 C 0 75 | ATOM 2278 CG MET B 301 6.421 31.672 9.710 1.00 0.00 C 0 76 | ATOM 2279 SD MET B 301 6.249 30.877 11.331 1.00 0.00 S 0 77 | ATOM 1359 CA GLY B 180 20.886 32.446 12.048 1.00 0.00 C 0 78 | ATOM 1331 O ARG B 176 20.283 28.162 13.030 1.00 0.00 O 0 79 | TER 80 | END 81 | -------------------------------------------------------------------------------- /datasets/custom/1a05B/pockets/pocket0_vert.pqr: -------------------------------------------------------------------------------- 1 | HEADER 2 | HEADER This is a pqr format file writen by the programm fpocket. 3 | HEADER It represent the voronoi vertices of a single pocket found by the 4 | HEADER algorithm. 5 | HEADER 6 | HEADER Information about the pocket 1: 7 | HEADER 0 - Pocket Score : 31.2963 8 | HEADER 1 - Drug Score : 0.7720 9 | HEADER 2 - Number of V. Vertices : 123 10 | HEADER 3 - Mean alpha-sphere radius : 4.5479 11 | HEADER 4 - Mean alpha-sphere SA : 0.5003 12 | HEADER 5 - Mean B-factor : 0.0000 13 | HEADER 6 - Hydrophobicity Score : 38.3478 14 | HEADER 7 - Polarity Score : 6 15 | HEADER 8 - Volume Score : 4.2174 16 | HEADER 9 - Real volume (approximation) : 1388.1013 17 | HEADER 10 - Charge Score : -3 18 | HEADER 11 - Local hydrophobic density Score : 50.3896 19 | HEADER 12 - Number of apolar alpha sphere : 77 20 | HEADER 13 - Proportion of apolar alpha sphere : 0.6260 21 | ATOM 6305 APOL STP 1 7.642 39.870 13.292 0.00 3.97 22 | ATOM 7250 POL STP 1 11.528 39.321 14.953 0.00 3.34 23 | ATOM 7250 POL STP 1 11.249 39.039 15.477 0.00 3.05 24 | ATOM 9791 POL STP 1 12.728 37.300 11.225 0.00 5.62 25 | ATOM 13742 POL STP 1 13.128 36.868 11.068 0.00 5.50 26 | ATOM 7250 POL STP 1 12.393 38.151 11.677 0.00 5.79 27 | ATOM 7251 POL STP 1 11.456 38.615 12.023 0.00 5.72 28 | ATOM 7250 POL STP 1 11.957 37.983 11.841 0.00 5.72 29 | ATOM 9791 POL STP 1 12.462 37.277 11.351 0.00 5.58 30 | ATOM 6699 APOL STP 1 5.747 37.969 15.028 0.00 3.95 31 | ATOM 6699 APOL STP 1 5.519 37.990 15.124 0.00 3.86 32 | ATOM 4006 POL STP 1 8.316 41.692 10.537 0.00 4.61 33 | ATOM 6302 APOL STP 1 8.673 41.487 10.849 0.00 4.89 34 | ATOM 6302 APOL STP 1 7.704 41.364 10.400 0.00 4.24 35 | ATOM 6302 APOL STP 1 6.525 40.749 10.006 0.00 3.69 36 | ATOM 6302 APOL STP 1 7.503 39.817 10.800 0.00 4.26 37 | ATOM 4677 APOL STP 1 4.672 41.290 9.593 0.00 3.11 38 | ATOM 6302 APOL STP 1 4.944 41.220 9.672 0.00 3.14 39 | ATOM 6304 APOL STP 1 4.589 41.238 9.677 0.00 3.06 40 | ATOM 6302 APOL STP 1 8.032 39.814 11.067 0.00 4.59 41 | ATOM 4621 APOL STP 1 7.546 38.910 10.921 0.00 4.12 42 | ATOM 6302 APOL STP 1 7.796 39.161 11.384 0.00 4.60 43 | ATOM 6302 APOL STP 1 7.756 39.373 11.193 0.00 4.51 44 | ATOM 3591 APOL STP 1 9.417 40.602 11.386 0.00 5.39 45 | ATOM 6302 APOL STP 1 9.216 40.514 11.437 0.00 5.34 46 | ATOM 3594 POL STP 1 10.014 41.331 11.176 0.00 5.66 47 | ATOM 3595 POL STP 1 10.162 39.271 11.802 0.00 5.31 48 | ATOM 6302 APOL STP 1 9.407 41.192 11.313 0.00 5.46 49 | ATOM 6302 APOL STP 1 8.701 39.402 11.891 0.00 5.14 50 | ATOM 7251 POL STP 1 11.395 38.535 12.046 0.00 5.72 51 | ATOM 3587 APOL STP 1 8.032 42.870 13.441 0.00 4.03 52 | ATOM 6302 APOL STP 1 8.575 42.104 12.506 0.00 4.47 53 | ATOM 6698 APOL STP 1 7.991 43.375 13.919 0.00 3.95 54 | ATOM 11163 APOL STP 1 7.651 42.609 13.747 0.00 3.80 55 | ATOM 6302 APOL STP 1 7.653 42.603 13.742 0.00 3.80 56 | ATOM 11163 APOL STP 1 7.651 42.609 13.747 0.00 3.80 57 | ATOM 3746 APOL STP 1 7.564 37.909 12.494 0.00 5.02 58 | ATOM 4621 APOL STP 1 7.556 37.902 12.484 0.00 5.01 59 | ATOM 3093 APOL STP 1 6.632 37.886 13.265 0.00 4.42 60 | ATOM 3746 APOL STP 1 7.049 37.934 13.019 0.00 4.69 61 | ATOM 3746 APOL STP 1 6.788 37.872 13.381 0.00 4.51 62 | ATOM 6901 APOL STP 1 5.032 38.032 13.874 0.00 3.51 63 | ATOM 3746 APOL STP 1 6.879 37.626 13.600 0.00 4.39 64 | ATOM 6304 APOL STP 1 7.109 37.973 12.951 0.00 4.72 65 | ATOM 3746 APOL STP 1 7.562 37.910 12.498 0.00 5.02 66 | ATOM 3746 APOL STP 1 6.651 37.386 14.361 0.00 4.05 67 | ATOM 3746 APOL STP 1 8.405 37.754 12.509 0.00 4.94 68 | ATOM 3746 APOL STP 1 7.565 37.911 12.494 0.00 5.02 69 | ATOM 4621 APOL STP 1 7.569 37.929 12.488 0.00 5.02 70 | ATOM 3746 APOL STP 1 7.179 37.163 14.706 0.00 3.71 71 | ATOM 6700 POL STP 1 5.786 37.845 15.151 0.00 3.91 72 | ATOM 6304 APOL STP 1 7.567 37.982 12.481 0.00 5.01 73 | ATOM 6305 APOL STP 1 7.593 38.001 12.462 0.00 5.01 74 | ATOM 6305 APOL STP 1 8.585 39.207 12.002 0.00 5.11 75 | ATOM 4573 POL STP 1 8.067 43.438 13.943 0.00 3.95 76 | ATOM 6698 POL STP 1 7.995 43.398 13.938 0.00 3.95 77 | ATOM 6698 POL STP 1 7.981 43.433 13.950 0.00 3.93 78 | ATOM 6698 APOL STP 1 7.828 43.535 14.093 0.00 3.76 79 | ATOM 11159 APOL STP 1 7.131 43.059 14.835 0.00 3.14 80 | ATOM 11161 APOL STP 1 7.651 42.662 13.790 0.00 3.79 81 | ATOM 6698 APOL STP 1 7.898 43.512 13.935 0.00 3.85 82 | ATOM 11159 APOL STP 1 7.479 42.924 14.499 0.00 3.49 83 | ATOM 6914 APOL STP 1 5.179 38.020 15.117 0.00 3.72 84 | ATOM 6700 POL STP 1 5.747 37.833 15.185 0.00 3.89 85 | ATOM 9122 POL STP 1 5.955 37.793 15.539 0.00 3.62 86 | ATOM 9122 POL STP 1 5.829 37.718 15.838 0.00 3.43 87 | ATOM 7250 POL STP 1 11.230 37.632 12.286 0.00 5.54 88 | ATOM 7250 POL STP 1 11.187 38.277 12.200 0.00 5.63 89 | ATOM 7250 POL STP 1 11.027 37.831 12.360 0.00 5.54 90 | ATOM 9791 POL STP 1 11.458 36.364 11.885 0.00 5.19 91 | ATOM 13742 POL STP 1 14.686 36.528 10.578 0.00 5.78 92 | ATOM 11921 POL STP 1 15.765 36.323 8.999 0.00 5.49 93 | ATOM 13742 POL STP 1 15.170 36.173 9.823 0.00 5.51 94 | ATOM 13742 POL STP 1 14.683 36.022 10.596 0.00 5.63 95 | ATOM 11659 APOL STP 1 14.977 31.533 11.223 0.00 5.00 96 | ATOM 11934 APOL STP 1 15.591 31.885 10.416 0.00 5.26 97 | ATOM 11934 APOL STP 1 14.409 32.224 11.131 0.00 5.11 98 | ATOM 11634 APOL STP 1 14.982 31.102 11.377 0.00 4.84 99 | ATOM 11634 APOL STP 1 14.957 31.125 11.396 0.00 4.85 100 | ATOM 11659 APOL STP 1 14.915 31.414 11.336 0.00 4.95 101 | ATOM 11652 APOL STP 1 15.403 30.982 13.037 0.00 3.55 102 | ATOM 11663 APOL STP 1 14.288 32.058 11.308 0.00 5.03 103 | ATOM 11934 APOL STP 1 13.718 32.650 11.426 0.00 5.11 104 | ATOM 11934 APOL STP 1 14.360 32.234 11.154 0.00 5.11 105 | ATOM 7215 APOL STP 1 11.962 33.004 12.093 0.00 4.85 106 | ATOM 9791 POL STP 1 11.941 33.248 11.877 0.00 4.95 107 | ATOM 9523 APOL STP 1 12.149 32.858 11.979 0.00 4.92 108 | ATOM 9523 POL STP 1 12.168 32.856 11.954 0.00 4.93 109 | ATOM 7215 APOL STP 1 10.767 29.990 13.132 0.00 3.22 110 | ATOM 7531 APOL STP 1 11.737 30.596 12.433 0.00 3.89 111 | ATOM 9523 APOL STP 1 11.758 30.594 12.433 0.00 3.89 112 | ATOM 9523 APOL STP 1 11.923 31.556 12.233 0.00 4.21 113 | ATOM 9797 APOL STP 1 13.092 29.894 12.056 0.00 3.89 114 | ATOM 9788 POL STP 1 12.117 33.072 11.807 0.00 5.00 115 | ATOM 11934 POL STP 1 13.715 32.651 11.427 0.00 5.11 116 | ATOM 11933 POL STP 1 12.659 33.091 11.552 0.00 5.03 117 | ATOM 9791 APOL STP 1 10.560 33.794 11.681 0.00 4.34 118 | ATOM 9791 POL STP 1 10.010 35.228 11.635 0.00 4.37 119 | ATOM 4621 APOL STP 1 6.073 35.940 10.508 0.00 3.52 120 | ATOM 4621 APOL STP 1 5.475 36.821 11.500 0.00 3.25 121 | ATOM 3746 APOL STP 1 7.944 33.453 11.760 0.00 3.11 122 | ATOM 3746 APOL STP 1 7.089 33.799 11.838 0.00 3.08 123 | ATOM 9791 APOL STP 1 9.154 34.095 11.251 0.00 3.96 124 | ATOM 3595 POL STP 1 6.595 35.050 10.531 0.00 3.48 125 | ATOM 3746 APOL STP 1 7.630 34.957 11.304 0.00 3.85 126 | ATOM 4621 POL STP 1 6.088 35.940 10.507 0.00 3.53 127 | ATOM 9791 POL STP 1 8.906 34.406 11.199 0.00 3.98 128 | ATOM 11637 APOL STP 1 13.895 29.243 12.051 0.00 4.01 129 | ATOM 11933 POL STP 1 13.690 34.220 10.990 0.00 5.11 130 | ATOM 13742 POL STP 1 13.736 34.416 10.955 0.00 5.13 131 | ATOM 11934 POL STP 1 14.205 34.169 10.883 0.00 5.21 132 | ATOM 13742 POL STP 1 14.458 35.186 10.696 0.00 5.39 133 | ATOM 11934 APOL STP 1 13.968 32.989 11.272 0.00 5.11 134 | ATOM 11934 POL STP 1 15.087 33.967 10.339 0.00 5.19 135 | ATOM 13742 POL STP 1 14.749 35.528 10.506 0.00 5.49 136 | ATOM 11934 APOL STP 1 13.717 32.651 11.427 0.00 5.11 137 | ATOM 11934 POL STP 1 15.731 31.955 10.310 0.00 5.28 138 | ATOM 7573 POL STP 1 17.754 30.252 10.573 0.00 4.10 139 | ATOM 9794 APOL STP 1 17.066 30.040 9.226 0.00 5.32 140 | ATOM 9794 APOL STP 1 16.501 30.793 9.813 0.00 5.19 141 | ATOM 9795 POL STP 1 16.036 31.669 10.005 0.00 5.32 142 | ATOM 11934 APOL STP 1 16.042 31.673 9.985 0.00 5.32 143 | ATOM 11934 POL STP 1 16.004 31.696 10.030 0.00 5.32 144 | TER 145 | END 146 | -------------------------------------------------------------------------------- /datasets/custom/1a05B/pockets/pocket1_atm.pdb: -------------------------------------------------------------------------------- 1 | HEADER 2 | HEADER This is a pdb format file writen by the programm fpocket. 3 | HEADER It represents the atoms contacted by the voronoi vertices of the pocket. 4 | HEADER 5 | HEADER Information about the pocket 2: 6 | HEADER 0 - Pocket Score : 21.6873 7 | HEADER 1 - Drug Score : 0.7210 8 | HEADER 2 - Number of V. Vertices : 53 9 | HEADER 3 - Mean alpha-sphere radius : 3.3450 10 | HEADER 4 - Mean alpha-sphere SA : 0.4503 11 | HEADER 5 - Mean B-factor : 0.0000 12 | HEADER 6 - Hydrophobicity Score : 37.6667 13 | HEADER 7 - Polarity Score : 8 14 | HEADER 8 - Volume Score : 4.2500 15 | HEADER 9 - Real volume (approximation) : 185.8746 16 | HEADER 10 - Charge Score : 0 17 | HEADER 11 - Local hydrophobic density Score : 42.8636 18 | HEADER 12 - Number of apolar alpha sphere : 44 19 | HEADER 13 - Proportion of apolar alpha sphere : 0.8302 20 | ATOM 1430 CA ASP B 189 13.973 21.185 31.449 1.00 0.00 C 0 21 | ATOM 1494 CB THR B 197 13.501 17.149 31.666 1.00 0.00 C 0 22 | ATOM 1425 O VAL B 188 13.512 22.183 28.819 1.00 0.00 O 0 23 | ATOM 1433 CB ASP B 189 15.184 20.254 31.245 1.00 0.00 C 0 24 | ATOM 1521 CG TRP B 200 15.526 17.930 26.868 1.00 0.00 C 0 25 | ATOM 1496 CG2 THR B 197 12.002 16.892 31.556 1.00 0.00 C 0 26 | ATOM 1485 C THR B 196 13.902 13.606 30.584 1.00 0.00 C 0 27 | ATOM 1489 CG2 THR B 196 11.553 12.341 29.126 1.00 0.00 C 0 28 | ATOM 1490 N THR B 197 13.726 14.671 31.366 1.00 0.00 N 0 29 | ATOM 1491 CA THR B 197 14.308 15.975 31.047 1.00 0.00 C 0 30 | ATOM 1213 CD2 TYR B 162 11.175 14.387 25.797 1.00 0.00 C 0 31 | ATOM 1486 O THR B 196 14.602 13.620 29.571 1.00 0.00 O 0 32 | ATOM 1520 CB TRP B 200 15.774 16.677 27.692 1.00 0.00 C 0 33 | ATOM 1879 CD2 PHE B 244 9.816 21.210 30.524 1.00 0.00 C 0 34 | ATOM 1852 N GLY B 241 10.571 20.868 25.539 1.00 0.00 N 0 35 | ATOM 1523 CD2 TRP B 200 14.598 18.077 25.783 1.00 0.00 C 0 36 | ATOM 1853 CA GLY B 241 9.644 19.818 25.147 1.00 0.00 C 0 37 | ATOM 1850 OG1 THR B 240 11.096 22.753 27.463 1.00 0.00 O 0 38 | ATOM 1526 CE3 TRP B 200 13.676 17.219 25.168 1.00 0.00 C 0 39 | ATOM 1881 CE2 PHE B 244 10.050 20.407 31.650 1.00 0.00 C 0 40 | ATOM 1019 OD2 ASP B 138 6.900 17.016 26.918 1.00 0.00 O 0 41 | ATOM 1018 OD1 ASP B 138 7.244 15.006 27.702 1.00 0.00 O 0 42 | ATOM 1215 CE2 TYR B 162 10.369 15.427 25.351 1.00 0.00 C 0 43 | ATOM 1197 SD MET B 160 8.474 14.361 31.479 1.00 0.00 S 0 44 | ATOM 1198 CE MET B 160 7.306 15.687 31.217 1.00 0.00 C 0 45 | ATOM 1880 CE1 PHE B 244 7.719 20.559 32.224 1.00 0.00 C 0 46 | ATOM 1878 CD1 PHE B 244 7.496 21.360 31.099 1.00 0.00 C 0 47 | ATOM 1882 CZ PHE B 244 9.002 20.082 32.497 1.00 0.00 C 0 48 | ATOM 1877 CG PHE B 244 8.542 21.690 30.238 1.00 0.00 C 0 49 | ATOM 1868 CB MET B 243 4.347 19.098 28.648 1.00 0.00 C 0 50 | ATOM 1872 N PHE B 244 6.199 21.510 28.216 1.00 0.00 N 0 51 | ATOM 1525 CE2 TRP B 200 14.722 19.399 25.304 1.00 0.00 C 0 52 | TER 53 | END 54 | -------------------------------------------------------------------------------- /datasets/custom/1a05B/pockets/pocket1_vert.pqr: -------------------------------------------------------------------------------- 1 | HEADER 2 | HEADER This is a pqr format file writen by the programm fpocket. 3 | HEADER It represent the voronoi vertices of a single pocket found by the 4 | HEADER algorithm. 5 | HEADER 6 | HEADER Information about the pocket 2: 7 | HEADER 0 - Pocket Score : 21.6873 8 | HEADER 1 - Drug Score : 0.7210 9 | HEADER 2 - Number of V. Vertices : 53 10 | HEADER 3 - Mean alpha-sphere radius : 3.3450 11 | HEADER 4 - Mean alpha-sphere SA : 0.4503 12 | HEADER 5 - Mean B-factor : 0.0000 13 | HEADER 6 - Hydrophobicity Score : 37.6667 14 | HEADER 7 - Polarity Score : 8 15 | HEADER 8 - Volume Score : 4.2500 16 | HEADER 9 - Real volume (approximation) : 185.8746 17 | HEADER 10 - Charge Score : 0 18 | HEADER 11 - Local hydrophobic density Score : 42.8636 19 | HEADER 12 - Number of apolar alpha sphere : 44 20 | HEADER 13 - Proportion of apolar alpha sphere : 0.8302 21 | ATOM 15604 APOL STP 2 13.011 19.130 29.293 0.00 3.13 22 | ATOM 15847 APOL STP 2 13.268 18.947 28.970 0.00 3.25 23 | ATOM 15605 APOL STP 2 12.542 19.191 29.398 0.00 3.20 24 | ATOM 15850 APOL STP 2 12.663 18.789 28.689 0.00 3.50 25 | ATOM 14408 APOL STP 2 12.026 15.348 28.926 0.00 3.05 26 | ATOM 14408 APOL STP 2 12.225 15.519 28.738 0.00 3.14 27 | ATOM 15342 APOL STP 2 12.115 15.474 28.673 0.00 3.21 28 | ATOM 14408 APOL STP 2 12.220 15.527 28.700 0.00 3.17 29 | ATOM 15342 APOL STP 2 12.223 15.584 28.610 0.00 3.23 30 | ATOM 15342 APOL STP 2 12.155 15.503 28.655 0.00 3.22 31 | ATOM 13071 APOL STP 2 12.919 17.312 28.649 0.00 3.08 32 | ATOM 15348 APOL STP 2 12.448 16.354 28.242 0.00 3.39 33 | ATOM 15850 APOL STP 2 12.872 17.613 28.586 0.00 3.18 34 | ATOM 15344 APOL STP 2 12.688 15.870 28.246 0.00 3.24 35 | ATOM 15342 APOL STP 2 12.148 15.440 28.649 0.00 3.19 36 | ATOM 9450 POL STP 2 11.828 18.892 28.426 0.00 3.72 37 | ATOM 16806 POL STP 2 11.962 18.808 28.340 0.00 3.74 38 | ATOM 14653 APOL STP 2 10.993 18.446 28.330 0.00 3.72 39 | ATOM 15608 APOL STP 2 12.211 19.346 29.515 0.00 3.20 40 | ATOM 11454 POL STP 2 11.557 19.745 28.327 0.00 3.16 41 | ATOM 14654 APOL STP 2 11.422 18.173 28.048 0.00 3.78 42 | ATOM 13347 APOL STP 2 9.600 17.924 29.099 0.00 3.59 43 | ATOM 14653 APOL STP 2 10.190 18.069 28.454 0.00 3.78 44 | ATOM 13236 POL STP 2 10.083 17.007 28.544 0.00 3.57 45 | ATOM 15342 APOL STP 2 10.361 15.939 28.722 0.00 3.41 46 | ATOM 14654 APOL STP 2 10.337 17.818 28.285 0.00 3.79 47 | ATOM 13086 APOL STP 2 10.373 15.517 28.914 0.00 3.39 48 | ATOM 15342 APOL STP 2 10.382 15.523 28.900 0.00 3.40 49 | ATOM 13086 APOL STP 2 9.808 16.295 29.231 0.00 3.25 50 | ATOM 13237 POL STP 2 9.605 17.132 29.072 0.00 3.46 51 | ATOM 13347 APOL STP 2 9.413 17.815 29.302 0.00 3.55 52 | ATOM 13347 APOL STP 2 8.220 18.466 29.772 0.00 3.26 53 | ATOM 14656 APOL STP 2 8.215 18.467 29.767 0.00 3.26 54 | ATOM 13347 APOL STP 2 8.700 18.213 29.492 0.00 3.36 55 | ATOM 13349 APOL STP 2 8.221 18.465 29.776 0.00 3.26 56 | ATOM 15714 APOL STP 2 8.220 18.445 29.609 0.00 3.32 57 | ATOM 14654 APOL STP 2 11.286 17.962 28.001 0.00 3.78 58 | ATOM 15344 APOL STP 2 11.890 16.858 28.103 0.00 3.45 59 | ATOM 16806 APOL STP 2 11.682 18.386 28.127 0.00 3.75 60 | ATOM 15714 APOL STP 2 8.203 18.467 29.720 0.00 3.28 61 | ATOM 16249 APOL STP 2 7.030 18.513 30.034 0.00 3.08 62 | ATOM 15348 APOL STP 2 12.430 16.407 28.222 0.00 3.40 63 | ATOM 16799 APOL STP 2 12.565 17.310 28.303 0.00 3.33 64 | ATOM 15608 APOL STP 2 12.197 19.354 29.579 0.00 3.16 65 | ATOM 15714 APOL STP 2 8.125 18.467 29.625 0.00 3.31 66 | ATOM 16247 APOL STP 2 7.290 18.497 29.713 0.00 3.19 67 | ATOM 15714 POL STP 2 7.608 18.960 29.187 0.00 3.07 68 | ATOM 16245 POL STP 2 7.329 18.882 29.308 0.00 3.06 69 | ATOM 15714 POL STP 2 8.544 19.432 28.060 0.00 3.14 70 | ATOM 15714 APOL STP 2 8.834 19.233 28.196 0.00 3.21 71 | ATOM 16803 APOL STP 2 12.536 18.775 28.591 0.00 3.55 72 | ATOM 16799 APOL STP 2 12.609 17.400 28.342 0.00 3.31 73 | ATOM 16805 POL STP 2 12.582 19.626 27.456 0.00 3.04 74 | TER 75 | END 76 | -------------------------------------------------------------------------------- /datasets/custom/1a05B/pockets/pocket2_atm.pdb: -------------------------------------------------------------------------------- 1 | HEADER 2 | HEADER This is a pdb format file writen by the programm fpocket. 3 | HEADER It represents the atoms contacted by the voronoi vertices of the pocket. 4 | HEADER 5 | HEADER Information about the pocket 3: 6 | HEADER 0 - Pocket Score : 12.3766 7 | HEADER 1 - Drug Score : 0.0193 8 | HEADER 2 - Number of V. Vertices : 57 9 | HEADER 3 - Mean alpha-sphere radius : 4.4969 10 | HEADER 4 - Mean alpha-sphere SA : 0.5756 11 | HEADER 5 - Mean B-factor : 0.0000 12 | HEADER 6 - Hydrophobicity Score : 9.3571 13 | HEADER 7 - Polarity Score : 6 14 | HEADER 8 - Volume Score : 3.7857 15 | HEADER 9 - Real volume (approximation) : 826.3259 16 | HEADER 10 - Charge Score : -2 17 | HEADER 11 - Local hydrophobic density Score : 9.0000 18 | HEADER 12 - Number of apolar alpha sphere : 10 19 | HEADER 13 - Proportion of apolar alpha sphere : 0.1754 20 | ATOM 616 CB GLU B 88 -7.798 19.454 20.555 1.00 0.00 C 0 21 | ATOM 1893 OD1 ASP B 246 0.272 23.890 24.629 1.00 0.00 O 0 22 | ATOM 641 CD2 LEU B 91 -4.085 21.703 17.519 1.00 0.00 C 0 23 | ATOM 619 OE1 GLU B 88 -9.595 21.284 22.256 1.00 0.00 O 0 24 | ATOM 615 O GLU B 88 -6.210 17.658 18.666 1.00 0.00 O 0 25 | ATOM 647 CG LEU B 92 -3.208 16.649 19.890 1.00 0.00 C 0 26 | ATOM 648 CD1 LEU B 92 -3.824 15.957 21.103 1.00 0.00 C 0 27 | ATOM 649 CD2 LEU B 92 -1.875 17.298 20.247 1.00 0.00 C 0 28 | ATOM 1037 CE2 TYR B 140 0.432 18.683 23.581 1.00 0.00 C 0 29 | ATOM 1039 OH TYR B 140 0.486 20.519 25.086 1.00 0.00 O 0 30 | ATOM 617 CG GLU B 88 -8.783 19.126 21.659 1.00 0.00 C 0 31 | ATOM 1894 OD2 ASP B 246 0.074 23.492 26.759 1.00 0.00 O 0 32 | ATOM 678 NH1 ARG B 95 -0.155 20.717 19.001 1.00 0.00 N 0 33 | ATOM 2098 CD GLU B 275 -1.551 25.240 14.921 1.00 0.00 C 0 34 | ATOM 760 NH2 ARG B 105 1.030 25.852 18.961 1.00 0.00 N 0 35 | ATOM 2104 O PRO B 276 -2.147 28.875 15.415 1.00 0.00 O 0 36 | ATOM 2099 OE1 GLU B 275 -2.791 25.234 15.085 1.00 0.00 O 0 37 | ATOM 2109 CA ILE B 277 -1.809 30.286 17.686 1.00 0.00 C 0 38 | ATOM 2127 CA GLY B 279 -7.506 29.453 18.831 1.00 0.00 C 0 39 | ATOM 2111 O ILE B 277 -2.574 31.009 19.862 1.00 0.00 O 0 40 | ATOM 1924 OD2 ASP B 250 -1.580 29.386 25.610 1.00 0.00 O 0 41 | ATOM 1923 OD1 ASP B 250 -0.212 29.633 23.912 1.00 0.00 O 0 42 | ATOM 497 O VAL B 73 -9.673 24.487 18.145 1.00 0.00 O 0 43 | ATOM 759 NH1 ARG B 105 1.532 27.448 20.533 1.00 0.00 N 0 44 | ATOM 679 NH2 ARG B 95 -0.498 22.592 17.730 1.00 0.00 N 0 45 | ATOM 2126 N GLY B 279 -6.317 30.248 18.582 1.00 0.00 N 0 46 | ATOM 2100 OE2 GLU B 275 -0.777 24.549 15.618 1.00 0.00 O 0 47 | ATOM 2114 CG2 ILE B 277 0.561 29.453 17.411 1.00 0.00 C 0 48 | ATOM 2110 C ILE B 277 -2.786 30.982 18.642 1.00 0.00 C 0 49 | ATOM 982 NH1 ARG B 133 2.481 23.584 21.571 1.00 0.00 N 0 50 | ATOM 983 NH2 ARG B 133 2.146 21.508 22.495 1.00 0.00 N 0 51 | ATOM 500 CG2 VAL B 73 -8.065 22.284 16.742 1.00 0.00 C 0 52 | ATOM 2112 CB ILE B 277 -0.389 30.220 18.319 1.00 0.00 C 0 53 | TER 54 | END 55 | -------------------------------------------------------------------------------- /datasets/custom/1a05B/pockets/pocket2_vert.pqr: -------------------------------------------------------------------------------- 1 | HEADER 2 | HEADER This is a pqr format file writen by the programm fpocket. 3 | HEADER It represent the voronoi vertices of a single pocket found by the 4 | HEADER algorithm. 5 | HEADER 6 | HEADER Information about the pocket 3: 7 | HEADER 0 - Pocket Score : 12.3766 8 | HEADER 1 - Drug Score : 0.0193 9 | HEADER 2 - Number of V. Vertices : 57 10 | HEADER 3 - Mean alpha-sphere radius : 4.4969 11 | HEADER 4 - Mean alpha-sphere SA : 0.5756 12 | HEADER 5 - Mean B-factor : 0.0000 13 | HEADER 6 - Hydrophobicity Score : 9.3571 14 | HEADER 7 - Polarity Score : 6 15 | HEADER 8 - Volume Score : 3.7857 16 | HEADER 9 - Real volume (approximation) : 826.3259 17 | HEADER 10 - Charge Score : -2 18 | HEADER 11 - Local hydrophobic density Score : 9.0000 19 | HEADER 12 - Number of apolar alpha sphere : 10 20 | HEADER 13 - Proportion of apolar alpha sphere : 0.1754 21 | ATOM 3140 POL STP 3 -4.634 23.240 22.609 0.00 5.34 22 | ATOM 4191 APOL STP 3 -4.943 18.744 21.164 0.00 3.00 23 | ATOM 14097 APOL STP 3 -4.317 19.580 21.959 0.00 3.76 24 | ATOM 4191 APOL STP 3 -4.300 19.938 20.570 0.00 3.53 25 | ATOM 14095 APOL STP 3 -4.214 20.036 20.747 0.00 3.64 26 | ATOM 16605 APOL STP 3 -4.058 20.092 23.350 0.00 4.71 27 | ATOM 16225 APOL STP 3 -4.567 19.703 24.552 0.00 5.15 28 | ATOM 16230 POL STP 3 -4.836 21.079 24.721 0.00 5.36 29 | ATOM 16605 APOL STP 3 -4.401 19.838 24.218 0.00 5.01 30 | ATOM 16230 POL STP 3 -4.627 21.794 24.172 0.00 5.35 31 | ATOM 16230 POL STP 3 -4.823 21.879 24.891 0.00 5.48 32 | ATOM 15238 APOL STP 3 -3.650 21.348 22.410 0.00 4.92 33 | ATOM 16230 POL STP 3 -3.652 21.436 22.472 0.00 4.98 34 | ATOM 16230 POL STP 3 -3.641 21.365 22.469 0.00 4.96 35 | ATOM 16605 APOL STP 3 -3.751 21.022 22.751 0.00 4.86 36 | ATOM 16605 POL STP 3 -3.455 21.263 22.446 0.00 4.80 37 | ATOM 16230 POL STP 3 -3.714 21.631 22.540 0.00 5.04 38 | ATOM 16230 POL STP 3 -3.604 21.628 22.473 0.00 4.98 39 | ATOM 11206 POL STP 3 -1.814 26.711 17.757 0.00 3.21 40 | ATOM 12786 POL STP 3 -2.442 26.767 18.368 0.00 3.64 41 | ATOM 11180 POL STP 3 -4.244 25.999 21.925 0.00 5.67 42 | ATOM 11180 POL STP 3 -3.844 26.138 21.895 0.00 5.43 43 | ATOM 10449 POL STP 3 -5.071 25.204 22.273 0.00 5.99 44 | ATOM 14125 POL STP 3 -5.419 25.120 21.862 0.00 5.68 45 | ATOM 11180 POL STP 3 -4.082 25.773 21.492 0.00 5.69 46 | ATOM 10473 POL STP 3 -3.313 26.160 21.711 0.00 5.15 47 | ATOM 11180 POL STP 3 -3.477 26.143 21.683 0.00 5.27 48 | ATOM 11180 POL STP 3 -3.348 26.241 21.641 0.00 5.15 49 | ATOM 5554 POL STP 3 -3.144 24.728 21.238 0.00 4.89 50 | ATOM 15237 POL STP 3 -2.804 23.304 21.468 0.00 4.45 51 | ATOM 11180 POL STP 3 -4.059 25.773 21.477 0.00 5.68 52 | ATOM 5553 POL STP 3 -3.218 25.481 19.338 0.00 4.28 53 | ATOM 5826 POL STP 3 -3.870 26.095 19.883 0.00 4.99 54 | ATOM 9195 POL STP 3 -2.473 25.404 18.616 0.00 3.55 55 | ATOM 10444 POL STP 3 -4.170 26.046 19.652 0.00 4.84 56 | ATOM 10452 POL STP 3 -3.961 25.996 20.489 0.00 5.22 57 | ATOM 11180 POL STP 3 -3.736 26.306 19.716 0.00 4.85 58 | ATOM 11206 POL STP 3 -1.813 26.681 17.761 0.00 3.20 59 | ATOM 11180 POL STP 3 -4.018 25.881 21.023 0.00 5.45 60 | ATOM 11180 POL STP 3 -3.957 26.007 20.542 0.00 5.23 61 | ATOM 12788 POL STP 3 -3.700 26.338 19.676 0.00 4.81 62 | ATOM 12786 POL STP 3 -3.726 26.350 19.662 0.00 4.80 63 | ATOM 11206 POL STP 3 -1.615 26.762 17.622 0.00 3.10 64 | ATOM 12786 POL STP 3 -3.977 27.033 18.431 0.00 3.98 65 | ATOM 12788 POL STP 3 -1.502 27.244 17.932 0.00 3.07 66 | ATOM 12786 POL STP 3 -3.755 26.707 19.534 0.00 4.47 67 | ATOM 5563 POL STP 3 -1.250 23.859 21.190 0.00 3.76 68 | ATOM 15242 POL STP 3 -1.208 23.456 21.261 0.00 3.70 69 | ATOM 8201 POL STP 3 -0.273 25.148 21.768 0.00 3.17 70 | ATOM 17048 POL STP 3 -0.858 22.854 21.574 0.00 3.42 71 | ATOM 17046 POL STP 3 -1.343 21.381 22.227 0.00 3.50 72 | ATOM 17048 POL STP 3 -1.507 21.732 22.256 0.00 3.67 73 | ATOM 14133 POL STP 3 -6.421 23.404 20.544 0.00 4.18 74 | ATOM 14133 APOL STP 3 -6.514 23.267 20.329 0.00 4.03 75 | ATOM 14133 POL STP 3 -8.068 22.450 19.831 0.00 3.09 76 | ATOM 14284 POL STP 3 -2.003 27.475 19.958 0.00 3.58 77 | ATOM 14284 POL STP 3 -2.068 27.443 19.882 0.00 3.60 78 | TER 79 | END 80 | -------------------------------------------------------------------------------- /datasets/custom/1a05B/pockets/pocket3_atm.pdb: -------------------------------------------------------------------------------- 1 | HEADER 2 | HEADER This is a pdb format file writen by the programm fpocket. 3 | HEADER It represents the atoms contacted by the voronoi vertices of the pocket. 4 | HEADER 5 | HEADER Information about the pocket 4: 6 | HEADER 0 - Pocket Score : 9.4369 7 | HEADER 1 - Drug Score : 0.0151 8 | HEADER 2 - Number of V. Vertices : 38 9 | HEADER 3 - Mean alpha-sphere radius : 3.8006 10 | HEADER 4 - Mean alpha-sphere SA : 0.5440 11 | HEADER 5 - Mean B-factor : 0.0000 12 | HEADER 6 - Hydrophobicity Score : 2.0000 13 | HEADER 7 - Polarity Score : 6 14 | HEADER 8 - Volume Score : 3.4444 15 | HEADER 9 - Real volume (approximation) : 317.2007 16 | HEADER 10 - Charge Score : 0 17 | HEADER 11 - Local hydrophobic density Score : 0.0000 18 | HEADER 12 - Number of apolar alpha sphere : 1 19 | HEADER 13 - Proportion of apolar alpha sphere : 0.0263 20 | ATOM 455 OD1 ASP B 66 5.860 18.995 -1.981 1.00 0.00 O 0 21 | ATOM 2053 CA GLY B 270 9.573 16.999 3.743 1.00 0.00 C 0 22 | ATOM 17 NZ LYS B 2 8.810 22.073 -3.139 1.00 0.00 N 0 23 | ATOM 2055 O GLY B 270 7.396 17.521 2.913 1.00 0.00 O 0 24 | ATOM 16 CE LYS B 2 7.846 22.729 -2.211 1.00 0.00 C 0 25 | ATOM 2321 O SER B 306 8.842 24.533 0.114 1.00 0.00 O 0 26 | ATOM 452 O ASP B 66 4.941 20.591 2.518 1.00 0.00 O 0 27 | ATOM 2054 C GLY B 270 8.125 17.424 3.903 1.00 0.00 C 0 28 | ATOM 2319 CA SER B 306 8.253 25.040 2.378 1.00 0.00 C 0 29 | ATOM 2311 O HIS B 305 10.268 24.804 4.224 1.00 0.00 O 0 30 | ATOM 2323 OG SER B 306 6.752 23.965 3.951 1.00 0.00 O 0 31 | ATOM 2052 N GLY B 270 10.318 16.859 4.984 1.00 0.00 N 0 32 | ATOM 2059 O ARG B 271 7.338 19.544 7.060 1.00 0.00 O 0 33 | ATOM 2056 N ARG B 271 7.707 17.675 5.144 1.00 0.00 N 0 34 | ATOM 2042 O GLY B 268 10.199 18.322 7.755 1.00 0.00 O 0 35 | ATOM 2071 CB ALA B 272 5.977 22.340 6.659 1.00 0.00 C 0 36 | ATOM 2038 CD2 LEU B 267 8.395 23.797 8.770 1.00 0.00 C 0 37 | ATOM 2037 CD1 LEU B 267 10.611 24.187 9.869 1.00 0.00 C 0 38 | ATOM 2035 CB LEU B 267 9.419 22.044 10.242 1.00 0.00 C 0 39 | ATOM 2058 C ARG B 271 6.348 19.294 6.366 1.00 0.00 C 0 40 | ATOM 2322 CB SER B 306 6.813 24.658 2.721 1.00 0.00 C 0 41 | ATOM 453 CB ASP B 66 4.528 20.423 -0.652 1.00 0.00 C 0 42 | TER 43 | END 44 | -------------------------------------------------------------------------------- /datasets/custom/1a05B/pockets/pocket3_vert.pqr: -------------------------------------------------------------------------------- 1 | HEADER 2 | HEADER This is a pqr format file writen by the programm fpocket. 3 | HEADER It represent the voronoi vertices of a single pocket found by the 4 | HEADER algorithm. 5 | HEADER 6 | HEADER Information about the pocket 4: 7 | HEADER 0 - Pocket Score : 9.4369 8 | HEADER 1 - Drug Score : 0.0151 9 | HEADER 2 - Number of V. Vertices : 38 10 | HEADER 3 - Mean alpha-sphere radius : 3.8006 11 | HEADER 4 - Mean alpha-sphere SA : 0.5440 12 | HEADER 5 - Mean B-factor : 0.0000 13 | HEADER 6 - Hydrophobicity Score : 2.0000 14 | HEADER 7 - Polarity Score : 6 15 | HEADER 8 - Volume Score : 3.4444 16 | HEADER 9 - Real volume (approximation) : 317.2007 17 | HEADER 10 - Charge Score : 0 18 | HEADER 11 - Local hydrophobic density Score : 0.0000 19 | HEADER 12 - Number of apolar alpha sphere : 1 20 | HEADER 13 - Proportion of apolar alpha sphere : 0.0263 21 | ATOM 3693 POL STP 4 10.275 18.446 -0.622 0.00 4.65 22 | ATOM 5191 POL STP 4 9.201 19.749 0.107 0.00 4.01 23 | ATOM 7540 POL STP 4 10.191 20.295 0.749 0.00 4.49 24 | ATOM 13751 POL STP 4 8.126 20.353 0.627 0.00 3.71 25 | ATOM 7540 POL STP 4 10.195 20.294 0.750 0.00 4.49 26 | ATOM 7540 POL STP 4 9.783 20.983 2.262 0.00 4.26 27 | ATOM 13746 POL STP 4 8.566 20.763 1.082 0.00 3.90 28 | ATOM 11939 POL STP 4 9.649 21.053 2.369 0.00 4.22 29 | ATOM 7540 POL STP 4 9.800 21.014 2.325 0.00 4.26 30 | ATOM 11939 POL STP 4 9.659 21.053 2.371 0.00 4.23 31 | ATOM 11939 POL STP 4 9.343 21.104 2.598 0.00 4.09 32 | ATOM 13742 POL STP 4 8.851 21.128 2.144 0.00 3.96 33 | ATOM 3077 POL STP 4 10.232 20.834 4.632 0.00 3.99 34 | ATOM 3722 POL STP 4 9.991 20.867 4.445 0.00 3.95 35 | ATOM 4965 POL STP 4 10.004 20.504 4.732 0.00 3.67 36 | ATOM 4919 POL STP 4 10.250 20.836 4.656 0.00 3.99 37 | ATOM 4966 POL STP 4 9.941 20.648 4.579 0.00 3.76 38 | ATOM 9813 POL STP 4 9.295 21.083 4.126 0.00 3.85 39 | ATOM 4966 POL STP 4 9.948 20.288 4.932 0.00 3.45 40 | ATOM 9813 POL STP 4 8.502 20.917 4.040 0.00 3.52 41 | ATOM 9309 POL STP 4 9.281 22.094 5.987 0.00 3.38 42 | ATOM 9813 POL STP 4 9.017 21.866 5.292 0.00 3.37 43 | ATOM 9313 POL STP 4 10.233 21.732 6.299 0.00 3.71 44 | ATOM 9316 POL STP 4 11.148 21.954 6.726 0.00 3.89 45 | ATOM 11299 POL STP 4 9.939 21.490 7.084 0.00 3.25 46 | ATOM 11300 APOL STP 4 11.120 21.933 6.791 0.00 3.85 47 | ATOM 11939 POL STP 4 9.401 21.116 2.661 0.00 4.10 48 | ATOM 9813 POL STP 4 7.887 21.213 4.577 0.00 3.04 49 | ATOM 9813 POL STP 4 8.349 20.893 4.034 0.00 3.46 50 | ATOM 9813 POL STP 4 8.258 20.866 3.968 0.00 3.45 51 | ATOM 13756 POL STP 4 7.743 20.510 3.866 0.00 3.11 52 | ATOM 13746 POL STP 4 8.060 20.826 3.856 0.00 3.40 53 | ATOM 13746 POL STP 4 8.377 20.896 3.289 0.00 3.54 54 | ATOM 13742 POL STP 4 8.851 21.128 2.145 0.00 3.96 55 | ATOM 11959 POL STP 4 8.236 21.598 1.879 0.00 3.48 56 | ATOM 13742 POL STP 4 8.823 21.148 2.132 0.00 3.94 57 | ATOM 13742 POL STP 4 8.223 21.582 1.870 0.00 3.49 58 | ATOM 13751 POL STP 4 7.865 20.485 0.526 0.00 3.54 59 | TER 60 | END 61 | -------------------------------------------------------------------------------- /datasets/custom/1a05B/pockets/pocket4_atm.pdb: -------------------------------------------------------------------------------- 1 | HEADER 2 | HEADER This is a pdb format file writen by the programm fpocket. 3 | HEADER It represents the atoms contacted by the voronoi vertices of the pocket. 4 | HEADER 5 | HEADER Information about the pocket 5: 6 | HEADER 0 - Pocket Score : 8.7122 7 | HEADER 1 - Drug Score : 0.0209 8 | HEADER 2 - Number of V. Vertices : 40 9 | HEADER 3 - Mean alpha-sphere radius : 3.6301 10 | HEADER 4 - Mean alpha-sphere SA : 0.5389 11 | HEADER 5 - Mean B-factor : 0.0000 12 | HEADER 6 - Hydrophobicity Score : 29.0000 13 | HEADER 7 - Polarity Score : 3 14 | HEADER 8 - Volume Score : 2.8889 15 | HEADER 9 - Real volume (approximation) : 199.4893 16 | HEADER 10 - Charge Score : 0 17 | HEADER 11 - Local hydrophobic density Score : 12.0000 18 | HEADER 12 - Number of apolar alpha sphere : 13 19 | HEADER 13 - Proportion of apolar alpha sphere : 0.3250 20 | ATOM 824 CD2 LEU B 113 -0.926 46.988 16.686 1.00 0.00 C 0 21 | ATOM 2532 CB ILE B 333 -4.258 45.069 16.779 1.00 0.00 C 0 22 | ATOM 2534 CG2 ILE B 333 -3.496 43.832 17.235 1.00 0.00 C 0 23 | ATOM 1978 OG SER B 258 -1.675 43.123 19.946 1.00 0.00 O 0 24 | ATOM 2529 CA ILE B 333 -5.333 45.456 17.828 1.00 0.00 C 0 25 | ATOM 2539 O ALA B 334 -8.094 48.772 19.973 1.00 0.00 O 0 26 | ATOM 2530 C ILE B 333 -5.668 46.936 17.681 1.00 0.00 C 0 27 | ATOM 845 CB ALA B 116 -1.310 45.121 22.731 1.00 0.00 C 0 28 | ATOM 2523 O ASP B 332 -6.429 44.034 19.912 1.00 0.00 O 0 29 | ATOM 820 O LEU B 113 1.499 46.382 20.827 1.00 0.00 O 0 30 | ATOM 821 CB LEU B 113 1.040 46.210 18.033 1.00 0.00 C 0 31 | ATOM 818 CA LEU B 113 1.379 47.535 18.715 1.00 0.00 C 0 32 | ATOM 833 N ASP B 115 1.680 49.023 22.867 1.00 0.00 N 0 33 | ATOM 811 O GLN B 112 0.954 50.130 17.759 1.00 0.00 O 0 34 | ATOM 837 CB ASP B 115 -0.176 50.492 23.534 1.00 0.00 C 0 35 | ATOM 2531 O ILE B 333 -4.780 47.757 17.460 1.00 0.00 O 0 36 | ATOM 841 N ALA B 116 -0.202 47.118 23.619 1.00 0.00 N 0 37 | ATOM 819 C LEU B 113 1.959 47.274 20.110 1.00 0.00 C 0 38 | ATOM 2545 CB ALA B 335 -6.147 52.517 20.325 1.00 0.00 C 0 39 | ATOM 840 OD2 ASP B 115 -2.343 50.562 24.551 1.00 0.00 O 0 40 | ATOM 842 CA ALA B 116 -0.967 45.925 23.979 1.00 0.00 C 0 41 | ATOM 2542 CA ALA B 335 -7.116 51.342 20.363 1.00 0.00 C 0 42 | ATOM 2541 N ALA B 335 -7.003 50.562 19.139 1.00 0.00 N 0 43 | ATOM 2538 C ALA B 334 -7.530 49.346 19.037 1.00 0.00 C 0 44 | TER 45 | END 46 | -------------------------------------------------------------------------------- /datasets/custom/1a05B/pockets/pocket4_vert.pqr: -------------------------------------------------------------------------------- 1 | HEADER 2 | HEADER This is a pqr format file writen by the programm fpocket. 3 | HEADER It represent the voronoi vertices of a single pocket found by the 4 | HEADER algorithm. 5 | HEADER 6 | HEADER Information about the pocket 5: 7 | HEADER 0 - Pocket Score : 8.7122 8 | HEADER 1 - Drug Score : 0.0209 9 | HEADER 2 - Number of V. Vertices : 40 10 | HEADER 3 - Mean alpha-sphere radius : 3.6301 11 | HEADER 4 - Mean alpha-sphere SA : 0.5389 12 | HEADER 5 - Mean B-factor : 0.0000 13 | HEADER 6 - Hydrophobicity Score : 29.0000 14 | HEADER 7 - Polarity Score : 3 15 | HEADER 8 - Volume Score : 2.8889 16 | HEADER 9 - Real volume (approximation) : 199.4893 17 | HEADER 10 - Charge Score : 0 18 | HEADER 11 - Local hydrophobic density Score : 12.0000 19 | HEADER 12 - Number of apolar alpha sphere : 13 20 | HEADER 13 - Proportion of apolar alpha sphere : 0.3250 21 | ATOM 14280 APOL STP 5 -2.558 46.092 19.256 0.00 3.17 22 | ATOM 14280 APOL STP 5 -2.582 46.082 19.269 0.00 3.17 23 | ATOM 11098 POL STP 5 -4.629 47.306 21.649 0.00 4.12 24 | ATOM 12774 APOL STP 5 -4.247 46.816 21.143 0.00 3.74 25 | ATOM 8974 POL STP 5 -1.372 46.142 19.873 0.00 3.04 26 | ATOM 11084 APOL STP 5 -1.688 46.352 19.764 0.00 3.23 27 | ATOM 11084 APOL STP 5 -1.625 46.713 19.880 0.00 3.28 28 | ATOM 11084 APOL STP 5 -1.573 46.817 19.865 0.00 3.25 29 | ATOM 8811 POL STP 5 -0.484 49.408 20.532 0.00 3.21 30 | ATOM 9034 POL STP 5 -2.089 49.038 20.241 0.00 4.08 31 | ATOM 6700 APOL STP 5 -1.004 48.741 20.774 0.00 3.37 32 | ATOM 8811 APOL STP 5 -0.783 48.945 20.720 0.00 3.27 33 | ATOM 8805 POL STP 5 -0.902 48.741 20.837 0.00 3.30 34 | ATOM 9036 POL STP 5 -2.134 48.740 20.409 0.00 4.08 35 | ATOM 8976 POL STP 5 -1.041 48.474 20.739 0.00 3.29 36 | ATOM 8976 POL STP 5 -1.758 47.884 20.458 0.00 3.61 37 | ATOM 9036 POL STP 5 -2.145 48.144 20.355 0.00 3.93 38 | ATOM 11084 APOL STP 5 -1.689 46.970 19.955 0.00 3.36 39 | ATOM 9034 POL STP 5 -2.584 50.256 20.068 0.00 4.23 40 | ATOM 11072 POL STP 5 -2.004 48.888 19.796 0.00 3.80 41 | ATOM 9036 POL STP 5 -2.941 49.892 20.503 0.00 4.15 42 | ATOM 9038 POL STP 5 -2.792 48.758 20.900 0.00 4.10 43 | ATOM 11079 APOL STP 5 -2.124 47.872 20.206 0.00 3.82 44 | ATOM 6702 POL STP 5 -3.440 47.940 22.431 0.00 3.55 45 | ATOM 9038 POL STP 5 -3.260 48.383 21.209 0.00 4.09 46 | ATOM 4443 POL STP 5 -3.672 49.509 20.983 0.00 3.95 47 | ATOM 9036 POL STP 5 -3.511 49.523 20.844 0.00 4.02 48 | ATOM 4752 POL STP 5 -4.286 48.590 21.512 0.00 4.11 49 | ATOM 9038 POL STP 5 -4.251 48.555 21.482 0.00 4.13 50 | ATOM 9038 POL STP 5 -4.254 48.034 21.689 0.00 4.27 51 | ATOM 9038 POL STP 5 -4.449 48.554 21.222 0.00 3.86 52 | ATOM 11096 POL STP 5 -5.154 48.057 20.492 0.00 3.07 53 | ATOM 11098 POL STP 5 -4.453 47.598 21.602 0.00 4.16 54 | ATOM 11084 POL STP 5 -2.487 46.504 19.760 0.00 3.48 55 | ATOM 12774 POL STP 5 -2.724 46.401 19.865 0.00 3.44 56 | ATOM 12774 POL STP 5 -2.576 46.309 19.509 0.00 3.34 57 | ATOM 12774 APOL STP 5 -3.946 46.860 20.899 0.00 3.65 58 | ATOM 12774 POL STP 5 -3.676 45.565 20.589 0.00 3.22 59 | ATOM 14280 APOL STP 5 -2.568 46.109 19.274 0.00 3.19 60 | ATOM 14276 APOL STP 5 -2.663 46.267 19.118 0.00 3.07 61 | TER 62 | END 63 | -------------------------------------------------------------------------------- /datasets/custom/1a05B/pockets/pocket5_atm.pdb: -------------------------------------------------------------------------------- 1 | HEADER 2 | HEADER This is a pdb format file writen by the programm fpocket. 3 | HEADER It represents the atoms contacted by the voronoi vertices of the pocket. 4 | HEADER 5 | HEADER Information about the pocket 6: 6 | HEADER 0 - Pocket Score : 4.1031 7 | HEADER 1 - Drug Score : 0.0270 8 | HEADER 2 - Number of V. Vertices : 40 9 | HEADER 3 - Mean alpha-sphere radius : 3.7167 10 | HEADER 4 - Mean alpha-sphere SA : 0.5788 11 | HEADER 5 - Mean B-factor : 0.0000 12 | HEADER 6 - Hydrophobicity Score : 33.8000 13 | HEADER 7 - Polarity Score : 2 14 | HEADER 8 - Volume Score : 3.0000 15 | HEADER 9 - Real volume (approximation) : 482.5637 16 | HEADER 10 - Charge Score : 1 17 | HEADER 11 - Local hydrophobic density Score : 18.1905 18 | HEADER 12 - Number of apolar alpha sphere : 21 19 | HEADER 13 - Proportion of apolar alpha sphere : 0.5250 20 | ATOM 25 CE LYS B 3 -0.356 16.396 -4.234 1.00 0.00 C 0 21 | ATOM 443 CB ALA B 64 -0.976 13.625 -0.882 1.00 0.00 C 0 22 | ATOM 424 CD1 LEU B 61 -5.676 16.959 -0.218 1.00 0.00 C 0 23 | ATOM 26 NZ LYS B 3 -0.114 14.921 -4.231 1.00 0.00 N 0 24 | ATOM 273 CG2 THR B 38 -6.349 19.627 -3.455 1.00 0.00 C 0 25 | ATOM 272 OG1 THR B 38 -5.445 19.742 -1.223 1.00 0.00 O 0 26 | ATOM 22 CB LYS B 3 0.384 19.347 -3.225 1.00 0.00 C 0 27 | ATOM 255 CG ARG B 36 -2.968 20.872 -6.723 1.00 0.00 C 0 28 | ATOM 253 O ARG B 36 -2.193 22.460 -3.936 1.00 0.00 O 0 29 | ATOM 27 N ILE B 4 -0.630 22.107 -1.585 1.00 0.00 N 0 30 | ATOM 30 O ILE B 4 -3.268 22.282 -0.555 1.00 0.00 O 0 31 | ATOM 29 C ILE B 4 -2.284 22.155 0.172 1.00 0.00 C 0 32 | ATOM 448 CB ALA B 65 -0.165 18.573 1.157 1.00 0.00 C 0 33 | ATOM 23 CG LYS B 3 0.698 18.673 -4.566 1.00 0.00 C 0 34 | ATOM 35 N ALA B 5 -2.314 21.493 1.326 1.00 0.00 N 0 35 | ATOM 421 O LEU B 61 -2.603 15.972 1.501 1.00 0.00 O 0 36 | ATOM 444 N ALA B 65 0.350 16.276 0.532 1.00 0.00 N 0 37 | ATOM 445 CA ALA B 65 0.758 17.664 0.364 1.00 0.00 C 0 38 | ATOM 39 CB ALA B 5 -3.323 19.651 2.586 1.00 0.00 C 0 39 | ATOM 422 CB LEU B 61 -5.920 15.599 1.837 1.00 0.00 C 0 40 | ATOM 419 CA LEU B 61 -4.665 14.740 1.666 1.00 0.00 C 0 41 | ATOM 414 CG GLN B 60 -6.258 10.451 1.913 1.00 0.00 C 0 42 | ATOM 393 O ALA B 57 -7.759 12.902 3.123 1.00 0.00 O 0 43 | ATOM 415 CD GLN B 60 -6.838 9.281 1.122 1.00 0.00 C 0 44 | ATOM 417 NE2 GLN B 60 -6.059 8.762 0.174 1.00 0.00 N 0 45 | ATOM 412 O GLN B 60 -2.888 12.603 1.636 1.00 0.00 O 0 46 | ATOM 418 N LEU B 61 -4.813 13.487 2.399 1.00 0.00 N 0 47 | ATOM 411 C GLN B 60 -3.902 12.520 2.328 1.00 0.00 C 0 48 | ATOM 256 CD ARG B 36 -3.715 20.790 -8.045 1.00 0.00 C 0 49 | TER 50 | END 51 | -------------------------------------------------------------------------------- /datasets/custom/1a05B/pockets/pocket5_vert.pqr: -------------------------------------------------------------------------------- 1 | HEADER 2 | HEADER This is a pqr format file writen by the programm fpocket. 3 | HEADER It represent the voronoi vertices of a single pocket found by the 4 | HEADER algorithm. 5 | HEADER 6 | HEADER Information about the pocket 6: 7 | HEADER 0 - Pocket Score : 4.1031 8 | HEADER 1 - Drug Score : 0.0270 9 | HEADER 2 - Number of V. Vertices : 40 10 | HEADER 3 - Mean alpha-sphere radius : 3.7167 11 | HEADER 4 - Mean alpha-sphere SA : 0.5788 12 | HEADER 5 - Mean B-factor : 0.0000 13 | HEADER 6 - Hydrophobicity Score : 33.8000 14 | HEADER 7 - Polarity Score : 2 15 | HEADER 8 - Volume Score : 3.0000 16 | HEADER 9 - Real volume (approximation) : 482.5637 17 | HEADER 10 - Charge Score : 1 18 | HEADER 11 - Local hydrophobic density Score : 18.1905 19 | HEADER 12 - Number of apolar alpha sphere : 21 20 | HEADER 13 - Proportion of apolar alpha sphere : 0.5250 21 | ATOM 3066 APOL STP 6 -3.844 15.069 -3.098 0.00 3.90 22 | ATOM 3066 APOL STP 6 -5.191 14.844 -4.850 0.00 5.11 23 | ATOM 3186 APOL STP 6 -3.633 17.319 -3.134 0.00 3.58 24 | ATOM 3999 APOL STP 6 -3.018 18.535 -3.446 0.00 3.51 25 | ATOM 5513 APOL STP 6 -2.994 18.703 -3.956 0.00 3.52 26 | ATOM 3999 POL STP 6 -2.562 19.809 -2.367 0.00 3.10 27 | ATOM 11007 POL STP 6 -2.596 19.883 -2.334 0.00 3.06 28 | ATOM 3999 POL STP 6 -2.255 19.336 -1.382 0.00 3.22 29 | ATOM 3999 POL STP 6 -2.989 19.198 -3.492 0.00 3.39 30 | ATOM 11007 POL STP 6 -2.262 19.348 -1.388 0.00 3.21 31 | ATOM 9941 APOL STP 6 -2.118 19.309 -1.251 0.00 3.19 32 | ATOM 9944 APOL STP 6 -2.226 19.237 -1.278 0.00 3.26 33 | ATOM 5513 APOL STP 6 -2.982 19.139 -3.778 0.00 3.42 34 | ATOM 5513 APOL STP 6 -2.451 18.740 -4.460 0.00 3.15 35 | ATOM 9941 POL STP 6 -2.469 19.242 -0.736 0.00 3.06 36 | ATOM 3066 APOL STP 6 -2.783 16.320 -1.867 0.00 3.39 37 | ATOM 3187 POL STP 6 -2.452 17.764 -1.631 0.00 3.61 38 | ATOM 3994 POL STP 6 -2.010 16.532 -1.549 0.00 3.16 39 | ATOM 3997 POL STP 6 -2.446 17.770 -1.628 0.00 3.61 40 | ATOM 3994 POL STP 6 -2.099 17.513 -1.511 0.00 3.42 41 | ATOM 9944 APOL STP 6 -2.158 17.621 -1.526 0.00 3.48 42 | ATOM 9944 POL STP 6 -2.010 17.520 -1.237 0.00 3.20 43 | ATOM 9944 APOL STP 6 -2.238 17.692 -1.552 0.00 3.52 44 | ATOM 9944 POL STP 6 -2.412 17.856 -1.545 0.00 3.59 45 | ATOM 9942 POL STP 6 -2.888 18.482 -0.181 0.00 3.04 46 | ATOM 9944 APOL STP 6 -2.133 17.663 -1.544 0.00 3.46 47 | ATOM 4870 APOL STP 6 -7.187 13.047 -1.494 0.00 4.38 48 | ATOM 5860 APOL STP 6 -7.970 13.098 -1.554 0.00 4.69 49 | ATOM 3068 APOL STP 6 -7.413 12.827 -2.228 0.00 4.91 50 | ATOM 4872 APOL STP 6 -6.603 12.795 -2.104 0.00 4.66 51 | ATOM 5860 APOL STP 6 -8.326 13.030 -1.858 0.00 5.02 52 | ATOM 5860 APOL STP 6 -7.018 13.054 -0.284 0.00 3.49 53 | ATOM 4872 APOL STP 6 -5.478 12.709 -2.810 0.00 4.98 54 | ATOM 6945 POL STP 6 -5.214 12.533 -2.351 0.00 4.62 55 | ATOM 6945 POL STP 6 -5.389 12.376 -1.558 0.00 4.06 56 | ATOM 11309 POL STP 6 -6.996 13.048 -0.244 0.00 3.46 57 | ATOM 11312 POL STP 6 -6.988 13.078 -0.191 0.00 3.41 58 | ATOM 11309 POL STP 6 -5.358 12.394 -1.031 0.00 3.64 59 | ATOM 11314 POL STP 6 -5.309 12.343 -0.850 0.00 3.48 60 | ATOM 5513 APOL STP 6 -4.545 16.422 -6.430 0.00 4.73 61 | TER 62 | END 63 | -------------------------------------------------------------------------------- /datasets/custom/1a05B/pockets/pocket6_atm.pdb: -------------------------------------------------------------------------------- 1 | HEADER 2 | HEADER This is a pdb format file writen by the programm fpocket. 3 | HEADER It represents the atoms contacted by the voronoi vertices of the pocket. 4 | HEADER 5 | HEADER Information about the pocket 7: 6 | HEADER 0 - Pocket Score : 2.1149 7 | HEADER 1 - Drug Score : 0.0379 8 | HEADER 2 - Number of V. Vertices : 37 9 | HEADER 3 - Mean alpha-sphere radius : 4.1927 10 | HEADER 4 - Mean alpha-sphere SA : 0.5667 11 | HEADER 5 - Mean B-factor : 0.0000 12 | HEADER 6 - Hydrophobicity Score : 42.3333 13 | HEADER 7 - Polarity Score : 1 14 | HEADER 8 - Volume Score : 4.0000 15 | HEADER 9 - Real volume (approximation) : 776.3873 16 | HEADER 10 - Charge Score : 1 17 | HEADER 11 - Local hydrophobic density Score : 13.0000 18 | HEADER 12 - Number of apolar alpha sphere : 14 19 | HEADER 13 - Proportion of apolar alpha sphere : 0.3784 20 | ATOM 584 CB ALA B 84 -11.074 9.100 21.538 1.00 0.00 C 0 21 | ATOM 1077 CD ARG B 145 -2.317 7.950 25.886 1.00 0.00 C 0 22 | ATOM 1062 OE1 GLN B 143 -1.420 4.559 24.472 1.00 0.00 O 0 23 | ATOM 628 OE1 GLN B 89 -6.663 12.953 20.671 1.00 0.00 O 0 24 | ATOM 1080 NH1 ARG B 145 -1.203 10.176 24.601 1.00 0.00 N 0 25 | ATOM 1063 NE2 GLN B 143 0.328 5.630 23.564 1.00 0.00 N 0 26 | ATOM 655 CG ARG B 93 -4.935 11.740 15.736 1.00 0.00 C 0 27 | ATOM 1046 CD1 PHE B 141 0.777 11.953 20.069 1.00 0.00 C 0 28 | ATOM 1048 CE1 PHE B 141 0.185 12.978 19.333 1.00 0.00 C 0 29 | ATOM 583 O ALA B 84 -10.684 11.299 19.399 1.00 0.00 O 0 30 | ATOM 656 CD ARG B 93 -6.440 11.595 15.874 1.00 0.00 C 0 31 | ATOM 659 NH1 ARG B 93 -9.182 10.995 15.638 1.00 0.00 N 0 32 | ATOM 1081 NH2 ARG B 145 -2.470 11.616 25.854 1.00 0.00 N 0 33 | ATOM 629 NE2 GLN B 89 -7.477 14.433 22.148 1.00 0.00 N 0 34 | ATOM 648 CD1 LEU B 92 -3.824 15.957 21.103 1.00 0.00 C 0 35 | ATOM 1031 O TYR B 140 0.019 12.566 23.885 1.00 0.00 O 0 36 | ATOM 685 CG LYS B 96 1.624 10.982 16.217 1.00 0.00 C 0 37 | ATOM 684 CB LYS B 96 1.025 12.311 15.784 1.00 0.00 C 0 38 | ATOM 651 CA ARG B 93 -3.195 13.379 14.879 1.00 0.00 C 0 39 | ATOM 645 O LEU B 92 -1.154 14.729 16.204 1.00 0.00 O 0 40 | ATOM 646 CB LEU B 92 -3.030 15.645 18.745 1.00 0.00 C 0 41 | ATOM 624 O GLN B 89 -6.080 14.674 16.644 1.00 0.00 O 0 42 | ATOM 650 N ARG B 93 -3.360 14.480 15.825 1.00 0.00 N 0 43 | ATOM 644 C LEU B 92 -2.319 15.072 16.407 1.00 0.00 C 0 44 | ATOM 1079 CZ ARG B 145 -2.118 10.379 25.538 1.00 0.00 C 0 45 | ATOM 687 CE LYS B 96 3.402 9.900 17.581 1.00 0.00 C 0 46 | ATOM 1043 O PHE B 141 2.752 8.611 23.062 1.00 0.00 O 0 47 | ATOM 1044 CB PHE B 141 2.620 11.055 21.544 1.00 0.00 C 0 48 | ATOM 1041 CA PHE B 141 2.037 10.902 22.960 1.00 0.00 C 0 49 | TER 50 | END 51 | -------------------------------------------------------------------------------- /datasets/custom/1a05B/pockets/pocket6_vert.pqr: -------------------------------------------------------------------------------- 1 | HEADER 2 | HEADER This is a pqr format file writen by the programm fpocket. 3 | HEADER It represent the voronoi vertices of a single pocket found by the 4 | HEADER algorithm. 5 | HEADER 6 | HEADER Information about the pocket 7: 7 | HEADER 0 - Pocket Score : 2.1149 8 | HEADER 1 - Drug Score : 0.0379 9 | HEADER 2 - Number of V. Vertices : 37 10 | HEADER 3 - Mean alpha-sphere radius : 4.1927 11 | HEADER 4 - Mean alpha-sphere SA : 0.5667 12 | HEADER 5 - Mean B-factor : 0.0000 13 | HEADER 6 - Hydrophobicity Score : 42.3333 14 | HEADER 7 - Polarity Score : 1 15 | HEADER 8 - Volume Score : 4.0000 16 | HEADER 9 - Real volume (approximation) : 776.3873 17 | HEADER 10 - Charge Score : 1 18 | HEADER 11 - Local hydrophobic density Score : 13.0000 19 | HEADER 12 - Number of apolar alpha sphere : 14 20 | HEADER 13 - Proportion of apolar alpha sphere : 0.3784 21 | ATOM 5710 POL STP 7 -5.477 7.093 20.879 0.00 5.98 22 | ATOM 16639 POL STP 7 -4.363 7.580 20.418 0.00 5.85 23 | ATOM 16639 POL STP 7 -4.116 7.578 20.074 0.00 5.98 24 | ATOM 16639 POL STP 7 -4.025 7.630 19.984 0.00 5.98 25 | ATOM 16639 POL STP 7 -3.614 7.538 20.994 0.00 5.08 26 | ATOM 16258 APOL STP 7 -3.307 10.207 19.602 0.00 4.47 27 | ATOM 16639 POL STP 7 -3.497 8.595 19.932 0.00 5.44 28 | ATOM 16639 POL STP 7 -3.327 7.920 19.741 0.00 5.76 29 | ATOM 7959 POL STP 7 -6.928 8.410 19.349 0.00 4.74 30 | ATOM 7959 POL STP 7 -7.344 7.959 18.810 0.00 4.76 31 | ATOM 10060 POL STP 7 -4.352 13.260 23.341 0.00 3.54 32 | ATOM 14552 POL STP 7 -3.370 12.475 22.344 0.00 3.72 33 | ATOM 13950 APOL STP 7 -1.837 10.662 17.353 0.00 3.66 34 | ATOM 13952 APOL STP 7 -1.893 11.258 17.228 0.00 3.42 35 | ATOM 16258 APOL STP 7 -1.983 9.735 17.880 0.00 4.16 36 | ATOM 13952 APOL STP 7 -1.740 11.808 17.161 0.00 3.13 37 | ATOM 13947 APOL STP 7 -3.150 12.540 20.449 0.00 3.54 38 | ATOM 14552 POL STP 7 -2.954 12.211 21.447 0.00 3.86 39 | ATOM 5705 POL STP 7 -4.424 12.900 18.549 0.00 3.09 40 | ATOM 13950 APOL STP 7 -3.404 11.946 19.141 0.00 3.74 41 | ATOM 13952 APOL STP 7 -2.938 12.332 18.346 0.00 3.34 42 | ATOM 15298 APOL STP 7 -2.753 12.532 18.245 0.00 3.17 43 | ATOM 13952 APOL STP 7 -2.410 12.049 17.691 0.00 3.21 44 | ATOM 13952 POL STP 7 -2.251 12.168 17.581 0.00 3.11 45 | ATOM 15298 POL STP 7 -2.275 12.274 17.651 0.00 3.06 46 | ATOM 16630 POL STP 7 -3.338 12.175 22.240 0.00 3.76 47 | ATOM 16637 POL STP 7 -2.961 11.731 21.403 0.00 3.97 48 | ATOM 16630 POL STP 7 -4.004 11.313 22.557 0.00 3.65 49 | ATOM 16637 POL STP 7 -2.967 11.673 21.370 0.00 3.97 50 | ATOM 16637 POL STP 7 -2.932 11.716 21.403 0.00 3.95 51 | ATOM 16257 APOL STP 7 -0.884 7.204 18.759 0.00 5.20 52 | ATOM 16258 APOL STP 7 -2.325 7.140 18.459 0.00 5.95 53 | ATOM 16257 POL STP 7 0.198 7.584 19.587 0.00 4.43 54 | ATOM 16257 APOL STP 7 0.880 8.187 19.810 0.00 3.78 55 | ATOM 16258 APOL STP 7 0.191 8.986 21.157 0.00 3.21 56 | ATOM 16639 POL STP 7 -0.421 8.804 21.358 0.00 3.61 57 | ATOM 16639 POL STP 7 -0.615 8.510 21.158 0.00 3.87 58 | TER 59 | END 60 | -------------------------------------------------------------------------------- /datasets/custom/1a9t/1a9t_lig_1.pdb: -------------------------------------------------------------------------------- 1 | HETATM 1 N1 HPA A 290 26.078 89.932 73.254 1.00 2.02 N 2 | HETATM 2 C2 HPA A 290 25.067 89.042 73.005 1.00 2.07 C 3 | HETATM 3 N3 HPA A 290 23.780 89.402 73.068 1.00 7.39 N 4 | HETATM 4 C4 HPA A 290 23.609 90.739 73.400 1.00 6.34 C 5 | HETATM 5 C5 HPA A 290 24.582 91.671 73.657 1.00 3.04 C 6 | HETATM 6 C6 HPA A 290 25.936 91.296 73.593 1.00 5.62 C 7 | HETATM 7 O6 HPA A 290 26.919 92.035 73.788 1.00 7.06 O 8 | HETATM 8 N7 HPA A 290 24.023 92.876 73.957 1.00 2.84 N 9 | HETATM 9 C8 HPA A 290 22.672 92.685 73.878 1.00 5.21 C 10 | HETATM 10 N9 HPA A 290 22.380 91.387 73.534 1.00 6.23 N 11 | TER 11 HPA A 290 12 | END 13 | -------------------------------------------------------------------------------- /datasets/custom/1a9t/1a9t_site_1.pdb: -------------------------------------------------------------------------------- 1 | ATOM 1 N SER A 33 14.515 92.489 75.414 1.00 12.39 N 2 | ATOM 2 CA SER A 33 15.056 93.101 76.625 1.00 15.31 C 3 | ATOM 3 C SER A 33 14.979 94.618 76.598 1.00 15.60 C 4 | ATOM 4 O SER A 33 15.444 95.257 75.645 1.00 14.10 O 5 | ATOM 5 CB SER A 33 16.509 92.672 76.838 1.00 13.75 C 6 | ATOM 6 OG SER A 33 16.602 91.297 77.171 1.00 15.62 O 7 | ATOM 7 N TYR A 88 18.783 79.740 76.620 1.00 5.41 N 8 | ATOM 8 CA TYR A 88 19.667 79.718 77.792 1.00 4.99 C 9 | ATOM 9 C TYR A 88 18.934 79.583 79.133 1.00 5.48 C 10 | ATOM 10 O TYR A 88 19.565 79.376 80.163 1.00 6.48 O 11 | ATOM 11 CB TYR A 88 20.612 80.934 77.820 1.00 6.31 C 12 | ATOM 12 CG TYR A 88 19.943 82.283 78.007 1.00 4.48 C 13 | ATOM 13 CD1 TYR A 88 19.336 82.629 79.220 1.00 2.02 C 14 | ATOM 14 CD2 TYR A 88 19.900 83.203 76.963 1.00 2.01 C 15 | ATOM 15 CE1 TYR A 88 18.699 83.850 79.383 1.00 2.05 C 16 | ATOM 16 CE2 TYR A 88 19.268 84.422 77.113 1.00 3.36 C 17 | ATOM 17 CZ TYR A 88 18.667 84.740 78.319 1.00 3.76 C 18 | ATOM 18 OH TYR A 88 17.993 85.928 78.434 1.00 8.75 O 19 | ATOM 19 N ASN A 115 14.605 92.990 69.439 1.00 2.18 N 20 | ATOM 20 CA ASN A 115 15.559 92.480 70.406 1.00 2.69 C 21 | ATOM 21 C ASN A 115 16.616 93.526 70.680 1.00 2.06 C 22 | ATOM 22 O ASN A 115 16.671 94.555 70.013 1.00 3.81 O 23 | ATOM 23 CB ASN A 115 16.207 91.180 69.906 1.00 4.04 C 24 | ATOM 24 CG ASN A 115 16.998 91.366 68.616 1.00 5.63 C 25 | ATOM 25 OD1 ASN A 115 17.919 92.175 68.552 1.00 7.94 O 26 | ATOM 26 ND2 ASN A 115 16.648 90.601 67.588 1.00 6.47 N 27 | ATOM 27 N ALA A 116 17.395 93.286 71.720 1.00 2.00 N 28 | ATOM 28 CA ALA A 116 18.491 94.153 72.106 1.00 6.29 C 29 | ATOM 29 C ALA A 116 19.676 93.332 71.611 1.00 5.87 C 30 | ATOM 30 O ALA A 116 19.703 92.118 71.823 1.00 5.76 O 31 | ATOM 31 CB ALA A 116 18.536 94.287 73.627 1.00 2.04 C 32 | ATOM 32 N ALA A 117 20.615 93.955 70.909 1.00 6.43 N 33 | ATOM 33 CA ALA A 117 21.765 93.221 70.391 1.00 4.39 C 34 | ATOM 34 C ALA A 117 23.063 93.977 70.578 1.00 5.22 C 35 | ATOM 35 O ALA A 117 23.070 95.204 70.709 1.00 5.13 O 36 | ATOM 36 CB ALA A 117 21.568 92.903 68.914 1.00 4.02 C 37 | ATOM 37 N GLY A 118 24.161 93.228 70.598 1.00 8.26 N 38 | ATOM 38 CA GLY A 118 25.472 93.826 70.738 1.00 5.53 C 39 | ATOM 39 C GLY A 118 25.914 94.198 69.342 1.00 5.34 C 40 | ATOM 40 O GLY A 118 25.671 93.450 68.391 1.00 6.80 O 41 | ATOM 41 N GLY A 119 26.542 95.358 69.207 1.00 5.90 N 42 | ATOM 42 CA GLY A 119 26.984 95.806 67.906 1.00 2.94 C 43 | ATOM 43 C GLY A 119 28.329 95.263 67.484 1.00 5.83 C 44 | ATOM 44 O GLY A 119 29.303 95.329 68.237 1.00 7.05 O 45 | ATOM 45 N LEU A 120 28.372 94.702 66.281 1.00 5.80 N 46 | ATOM 46 CA LEU A 120 29.598 94.158 65.721 1.00 4.97 C 47 | ATOM 47 C LEU A 120 30.091 95.111 64.644 1.00 7.66 C 48 | ATOM 48 O LEU A 120 31.286 95.334 64.509 1.00 9.42 O 49 | ATOM 49 CB LEU A 120 29.358 92.767 65.124 1.00 4.31 C 50 | ATOM 50 CG LEU A 120 29.036 91.599 66.066 1.00 5.41 C 51 | ATOM 51 CD1 LEU A 120 28.690 90.361 65.251 1.00 2.70 C 52 | ATOM 52 CD2 LEU A 120 30.213 91.301 66.971 1.00 4.67 C 53 | ATOM 53 N TYR A 192 21.817 89.687 61.568 1.00 3.19 N 54 | ATOM 54 CA TYR A 192 21.799 89.379 62.984 1.00 4.11 C 55 | ATOM 55 C TYR A 192 22.315 87.948 63.161 1.00 4.26 C 56 | ATOM 56 O TYR A 192 21.931 87.051 62.408 1.00 3.82 O 57 | ATOM 57 CB TYR A 192 20.361 89.504 63.497 1.00 2.53 C 58 | ATOM 58 CG TYR A 192 20.143 89.078 64.924 1.00 4.49 C 59 | ATOM 59 CD1 TYR A 192 19.814 87.755 65.235 1.00 4.56 C 60 | ATOM 60 CD2 TYR A 192 20.227 90.000 65.964 1.00 6.21 C 61 | ATOM 61 CE1 TYR A 192 19.571 87.369 66.542 1.00 4.86 C 62 | ATOM 62 CE2 TYR A 192 19.985 89.619 67.279 1.00 3.59 C 63 | ATOM 63 CZ TYR A 192 19.654 88.308 67.556 1.00 4.35 C 64 | ATOM 64 OH TYR A 192 19.377 87.938 68.842 1.00 2.00 O 65 | ATOM 65 N VAL A 193 23.223 87.744 64.109 1.00 5.58 N 66 | ATOM 66 CA VAL A 193 23.742 86.408 64.373 1.00 2.82 C 67 | ATOM 67 C VAL A 193 23.300 85.962 65.764 1.00 6.09 C 68 | ATOM 68 O VAL A 193 23.428 86.705 66.748 1.00 6.78 O 69 | ATOM 69 CB VAL A 193 25.301 86.324 64.231 1.00 6.21 C 70 | ATOM 70 CG1 VAL A 193 26.005 87.179 65.266 1.00 2.00 C 71 | ATOM 71 CG2 VAL A 193 25.765 84.872 64.336 1.00 3.59 C 72 | ATOM 72 N LEU A 195 23.502 83.231 68.937 1.00 2.65 N 73 | ATOM 73 CA LEU A 195 24.305 82.230 69.625 1.00 4.15 C 74 | ATOM 74 C LEU A 195 23.550 81.887 70.916 1.00 4.51 C 75 | ATOM 75 O LEU A 195 22.630 82.609 71.303 1.00 5.17 O 76 | ATOM 76 CB LEU A 195 25.746 82.704 69.877 1.00 4.28 C 77 | ATOM 77 CG LEU A 195 26.124 84.132 70.274 1.00 10.10 C 78 | ATOM 78 CD1 LEU A 195 25.474 84.536 71.604 1.00 7.30 C 79 | ATOM 79 CD2 LEU A 195 27.656 84.210 70.362 1.00 2.75 C 80 | ATOM 80 N GLY A 197 24.363 81.709 74.454 1.00 4.02 N 81 | ATOM 81 CA GLY A 197 24.412 82.468 75.689 1.00 5.18 C 82 | ATOM 82 C GLY A 197 24.422 81.483 76.849 1.00 6.03 C 83 | ATOM 83 O GLY A 197 24.566 80.281 76.614 1.00 3.01 O 84 | ATOM 84 N PRO A 198 24.188 81.931 78.098 1.00 4.82 N 85 | ATOM 85 CA PRO A 198 23.898 83.297 78.547 1.00 3.78 C 86 | ATOM 86 C PRO A 198 25.108 84.207 78.776 1.00 3.45 C 87 | ATOM 87 O PRO A 198 24.948 85.391 79.052 1.00 7.74 O 88 | ATOM 88 CB PRO A 198 23.149 83.059 79.847 1.00 4.24 C 89 | ATOM 89 CG PRO A 198 23.895 81.905 80.425 1.00 5.69 C 90 | ATOM 90 CD PRO A 198 24.104 80.986 79.228 1.00 4.99 C 91 | ATOM 91 N ASN A 199 26.314 83.663 78.703 1.00 2.05 N 92 | ATOM 92 CA ASN A 199 27.501 84.490 78.907 1.00 4.47 C 93 | ATOM 93 C ASN A 199 27.723 85.410 77.718 1.00 3.88 C 94 | ATOM 94 O ASN A 199 27.227 85.153 76.619 1.00 2.60 O 95 | ATOM 95 CB ASN A 199 28.757 83.627 79.120 1.00 4.63 C 96 | ATOM 96 CG ASN A 199 29.093 82.776 77.911 1.00 7.00 C 97 | ATOM 97 OD1 ASN A 199 28.553 81.681 77.743 1.00 9.89 O 98 | ATOM 98 ND2 ASN A 199 29.985 83.271 77.058 1.00 4.40 N 99 | ATOM 99 N PHE A 200 28.466 86.485 77.944 1.00 4.84 N 100 | ATOM 100 CA PHE A 200 28.782 87.410 76.877 1.00 4.66 C 101 | ATOM 101 C PHE A 200 30.057 86.909 76.193 1.00 4.84 C 102 | ATOM 102 O PHE A 200 30.812 86.132 76.767 1.00 4.81 O 103 | ATOM 103 CB PHE A 200 28.909 88.838 77.404 1.00 3.75 C 104 | ATOM 104 CG PHE A 200 27.583 89.483 77.727 1.00 4.52 C 105 | ATOM 105 CD1 PHE A 200 26.622 89.655 76.739 1.00 4.39 C 106 | ATOM 106 CD2 PHE A 200 27.303 89.931 79.008 1.00 6.31 C 107 | ATOM 107 CE1 PHE A 200 25.402 90.265 77.021 1.00 7.48 C 108 | ATOM 108 CE2 PHE A 200 26.086 90.543 79.302 1.00 11.38 C 109 | ATOM 109 CZ PHE A 200 25.133 90.712 78.306 1.00 7.63 C 110 | ATOM 110 N GLU A 201 30.291 87.374 74.974 1.00 3.72 N 111 | ATOM 111 CA GLU A 201 31.403 86.932 74.145 1.00 3.15 C 112 | ATOM 112 C GLU A 201 32.828 87.354 74.520 1.00 5.41 C 113 | ATOM 113 O GLU A 201 33.051 88.326 75.245 1.00 5.47 O 114 | ATOM 114 CB GLU A 201 31.124 87.320 72.683 1.00 2.00 C 115 | ATOM 115 CG GLU A 201 29.788 86.812 72.085 1.00 3.69 C 116 | ATOM 116 CD GLU A 201 28.545 87.523 72.639 1.00 6.30 C 117 | ATOM 117 OE1 GLU A 201 28.596 88.751 72.836 1.00 9.20 O 118 | ATOM 118 OE2 GLU A 201 27.513 86.860 72.870 1.00 8.95 O 119 | ATOM 119 N CYS A 206 34.091 87.601 69.116 1.00 6.65 N 120 | ATOM 120 CA CYS A 206 33.538 88.780 68.462 1.00 8.06 C 121 | ATOM 121 C CYS A 206 34.200 89.072 67.120 1.00 6.15 C 122 | ATOM 122 O CYS A 206 33.519 89.394 66.146 1.00 9.13 O 123 | ATOM 123 CB CYS A 206 33.645 89.993 69.385 1.00 9.83 C 124 | ATOM 124 SG CYS A 206 32.446 89.975 70.742 1.00 15.35 S 125 | ATOM 125 N LEU A 209 32.866 86.517 64.677 1.00 7.77 N 126 | ATOM 126 CA LEU A 209 31.486 86.788 64.280 1.00 7.77 C 127 | ATOM 127 C LEU A 209 31.423 87.915 63.262 1.00 8.59 C 128 | ATOM 128 O LEU A 209 30.667 87.846 62.291 1.00 7.66 O 129 | ATOM 129 CB LEU A 209 30.630 87.125 65.504 1.00 8.83 C 130 | ATOM 130 CG LEU A 209 30.424 85.977 66.499 1.00 11.20 C 131 | ATOM 131 CD1 LEU A 209 29.659 86.462 67.720 1.00 9.01 C 132 | ATOM 132 CD2 LEU A 209 29.684 84.836 65.815 1.00 10.52 C 133 | ATOM 133 N ALA A 216 25.297 92.400 64.130 1.00 2.96 N 134 | ATOM 134 CA ALA A 216 24.846 92.455 65.513 1.00 4.05 C 135 | ATOM 135 C ALA A 216 24.717 91.029 66.032 1.00 4.65 C 136 | ATOM 136 O ALA A 216 24.399 90.115 65.276 1.00 3.07 O 137 | ATOM 137 CB ALA A 216 23.504 93.167 65.597 1.00 2.22 C 138 | ATOM 138 N VAL A 217 24.961 90.841 67.321 1.00 5.77 N 139 | ATOM 139 CA VAL A 217 24.882 89.515 67.926 1.00 3.87 C 140 | ATOM 140 C VAL A 217 23.832 89.512 69.044 1.00 7.11 C 141 | ATOM 141 O VAL A 217 23.666 90.517 69.749 1.00 5.82 O 142 | ATOM 142 CB VAL A 217 26.266 89.075 68.472 1.00 3.89 C 143 | ATOM 143 CG1 VAL A 217 26.806 90.087 69.488 1.00 4.41 C 144 | ATOM 144 CG2 VAL A 217 26.181 87.686 69.081 1.00 5.16 C 145 | ATOM 145 N GLY A 218 23.124 88.394 69.188 1.00 3.73 N 146 | ATOM 146 CA GLY A 218 22.097 88.279 70.211 1.00 2.92 C 147 | ATOM 147 C GLY A 218 21.813 86.825 70.540 1.00 3.81 C 148 | ATOM 148 O GLY A 218 22.360 85.930 69.888 1.00 2.23 O 149 | ATOM 149 N MET A 219 20.920 86.586 71.500 1.00 3.66 N 150 | ATOM 150 CA MET A 219 20.590 85.230 71.952 1.00 4.39 C 151 | ATOM 151 C MET A 219 19.120 84.827 71.759 1.00 5.56 C 152 | ATOM 152 O MET A 219 18.562 84.067 72.567 1.00 5.99 O 153 | ATOM 153 CB MET A 219 20.949 85.091 73.439 1.00 5.01 C 154 | ATOM 154 CG MET A 219 22.357 85.537 73.799 1.00 8.26 C 155 | ATOM 155 SD MET A 219 22.608 85.739 75.586 1.00 6.21 S 156 | ATOM 156 CE MET A 219 24.294 86.353 75.647 1.00 4.47 C 157 | ATOM 157 N SER A 220 18.485 85.308 70.697 1.00 4.84 N 158 | ATOM 158 CA SER A 220 17.084 84.975 70.462 1.00 3.75 C 159 | ATOM 159 C SER A 220 16.672 85.289 69.029 1.00 3.12 C 160 | ATOM 160 O SER A 220 17.500 85.284 68.121 1.00 2.08 O 161 | ATOM 161 CB SER A 220 16.209 85.800 71.396 1.00 3.99 C 162 | ATOM 162 OG SER A 220 16.190 87.150 70.958 1.00 2.24 O 163 | ATOM 163 N LEU A 240 17.901 96.749 68.655 1.00 6.70 N 164 | ATOM 164 CA LEU A 240 18.266 98.016 69.262 1.00 5.76 C 165 | ATOM 165 C LEU A 240 19.707 97.719 69.663 1.00 5.96 C 166 | ATOM 166 O LEU A 240 19.958 96.738 70.375 1.00 5.20 O 167 | ATOM 167 CB LEU A 240 17.433 98.296 70.516 1.00 4.73 C 168 | ATOM 168 CG LEU A 240 17.969 99.436 71.388 1.00 7.46 C 169 | ATOM 169 CD1 LEU A 240 17.834 100.763 70.667 1.00 6.41 C 170 | ATOM 170 CD2 LEU A 240 17.236 99.477 72.717 1.00 9.88 C 171 | ATOM 171 N ILE A 241 20.655 98.491 69.143 1.00 4.80 N 172 | ATOM 172 CA ILE A 241 22.064 98.285 69.468 1.00 6.05 C 173 | ATOM 173 C ILE A 241 22.291 98.877 70.861 1.00 8.55 C 174 | ATOM 174 O ILE A 241 22.377 100.096 71.015 1.00 10.17 O 175 | ATOM 175 CB ILE A 241 22.986 98.949 68.398 1.00 7.13 C 176 | ATOM 176 CG1 ILE A 241 22.646 98.389 67.006 1.00 2.01 C 177 | ATOM 177 CG2 ILE A 241 24.478 98.731 68.734 1.00 2.12 C 178 | ATOM 178 CD1 ILE A 241 22.681 96.864 66.907 1.00 2.13 C 179 | ATOM 179 N THR A 242 22.350 98.012 71.874 1.00 7.24 N 180 | ATOM 180 CA THR A 242 22.520 98.449 73.257 1.00 8.15 C 181 | ATOM 181 C THR A 242 23.952 98.678 73.718 1.00 6.35 C 182 | ATOM 182 O THR A 242 24.190 99.368 74.712 1.00 11.82 O 183 | ATOM 183 CB THR A 242 21.857 97.467 74.246 1.00 10.49 C 184 | ATOM 184 OG1 THR A 242 22.374 96.140 74.047 1.00 5.64 O 185 | ATOM 185 CG2 THR A 242 20.347 97.472 74.061 1.00 9.31 C 186 | ATOM 186 N ASN A 243 24.905 98.113 73.002 1.00 4.12 N 187 | ATOM 187 CA ASN A 243 26.295 98.259 73.386 1.00 7.15 C 188 | ATOM 188 C ASN A 243 27.205 97.873 72.245 1.00 7.48 C 189 | ATOM 189 O ASN A 243 26.810 97.152 71.333 1.00 7.76 O 190 | ATOM 190 CB ASN A 243 26.601 97.360 74.589 1.00 5.38 C 191 | ATOM 191 CG ASN A 243 26.358 95.885 74.294 1.00 7.48 C 192 | ATOM 192 OD1 ASN A 243 25.219 95.465 74.115 1.00 8.43 O 193 | ATOM 193 ND2 ASN A 243 27.428 95.100 74.222 1.00 7.84 N 194 | ATOM 194 N LYS A 244 28.427 98.375 72.299 1.00 10.56 N 195 | ATOM 195 CA LYS A 244 29.428 98.059 71.300 1.00 14.03 C 196 | ATOM 196 C LYS A 244 30.253 96.916 71.870 1.00 13.90 C 197 | ATOM 197 O LYS A 244 30.704 96.961 73.015 1.00 10.72 O 198 | ATOM 198 CB LYS A 244 30.325 99.268 71.012 1.00 19.27 C 199 | ATOM 199 CG LYS A 244 29.640 100.390 70.230 1.00 22.50 C 200 | ATOM 200 CD LYS A 244 29.180 99.895 68.864 1.00 30.25 C 201 | ATOM 201 CE LYS A 244 30.361 99.448 67.999 1.00 30.83 C 202 | ATOM 202 NZ LYS A 244 29.915 98.764 66.757 1.00 28.37 N 203 | ATOM 203 N VAL A 245 30.372 95.858 71.083 1.00 16.13 N 204 | ATOM 204 CA VAL A 245 31.123 94.672 71.463 1.00 16.96 C 205 | ATOM 205 C VAL A 245 32.623 94.977 71.643 1.00 15.53 C 206 | ATOM 206 O VAL A 245 33.207 95.739 70.875 1.00 18.19 O 207 | ATOM 207 CB VAL A 245 30.894 93.579 70.396 1.00 16.29 C 208 | ATOM 208 CG1 VAL A 245 32.115 92.744 70.209 1.00 17.98 C 209 | ATOM 209 CG2 VAL A 245 29.698 92.727 70.779 1.00 11.89 C 210 | ATOM 210 N ILE A 246 33.232 94.388 72.668 1.00 15.63 N 211 | ATOM 211 CA ILE A 246 34.661 94.585 72.951 1.00 15.09 C 212 | ATOM 212 C ILE A 246 35.473 93.787 71.919 1.00 15.81 C 213 | ATOM 213 O ILE A 246 35.379 92.564 71.860 1.00 12.58 O 214 | ATOM 214 CB ILE A 246 35.043 94.095 74.387 1.00 15.26 C 215 | ATOM 215 CG1 ILE A 246 34.133 94.721 75.450 1.00 11.92 C 216 | ATOM 216 CG2 ILE A 246 36.510 94.422 74.692 1.00 16.57 C 217 | ATOM 217 CD1 ILE A 246 34.313 96.207 75.652 1.00 14.36 C 218 | ATOM 218 N LYS A 254 33.249 95.972 80.008 1.00 43.68 N 219 | ATOM 219 CA LYS A 254 32.662 94.704 79.594 1.00 40.31 C 220 | ATOM 220 C LYS A 254 31.137 94.725 79.636 1.00 36.88 C 221 | ATOM 221 O LYS A 254 30.536 95.453 80.429 1.00 35.77 O 222 | ATOM 222 CB LYS A 254 33.228 93.546 80.426 1.00 42.74 C 223 | ATOM 223 CG LYS A 254 33.221 93.762 81.927 1.00 46.09 C 224 | ATOM 224 CD LYS A 254 34.202 92.812 82.600 1.00 51.45 C 225 | ATOM 225 CE LYS A 254 34.215 92.972 84.118 1.00 55.19 C 226 | ATOM 226 NZ LYS A 254 32.950 92.504 84.763 1.00 57.44 N 227 | ATOM 227 N ALA A 255 30.524 93.976 78.721 1.00 33.21 N 228 | ATOM 228 CA ALA A 255 29.071 93.890 78.613 1.00 30.75 C 229 | ATOM 229 C ALA A 255 28.397 93.424 79.897 1.00 29.14 C 230 | ATOM 230 O ALA A 255 28.917 92.559 80.606 1.00 25.21 O 231 | ATOM 231 CB ALA A 255 28.681 92.978 77.449 1.00 27.15 C 232 | ATOM 232 N ASN A 256 27.233 94.005 80.176 1.00 29.06 N 233 | ATOM 233 CA ASN A 256 26.447 93.679 81.362 1.00 28.32 C 234 | ATOM 234 C ASN A 256 24.971 93.893 81.036 1.00 25.99 C 235 | ATOM 235 O ASN A 256 24.627 94.770 80.246 1.00 27.77 O 236 | ATOM 236 CB ASN A 256 26.868 94.552 82.551 1.00 29.45 C 237 | ATOM 237 CG ASN A 256 26.586 96.022 82.325 1.00 32.50 C 238 | ATOM 238 OD1 ASN A 256 25.561 96.542 82.767 1.00 30.07 O 239 | ATOM 239 ND2 ASN A 256 27.489 96.698 81.626 1.00 33.15 N 240 | ATOM 240 N HIS A 257 24.108 93.090 81.648 1.00 24.78 N 241 | ATOM 241 CA HIS A 257 22.666 93.148 81.416 1.00 24.77 C 242 | ATOM 242 C HIS A 257 21.972 94.468 81.751 1.00 25.56 C 243 | ATOM 243 O HIS A 257 21.048 94.871 81.043 1.00 23.54 O 244 | ATOM 244 CB HIS A 257 21.973 91.998 82.159 1.00 26.52 C 245 | ATOM 245 CG HIS A 257 20.548 91.774 81.750 1.00 27.80 C 246 | ATOM 246 ND1 HIS A 257 20.155 91.687 80.431 1.00 28.65 N 247 | ATOM 247 CD2 HIS A 257 19.425 91.597 82.487 1.00 28.26 C 248 | ATOM 248 CE1 HIS A 257 18.853 91.463 80.372 1.00 28.83 C 249 | ATOM 249 NE2 HIS A 257 18.387 91.405 81.607 1.00 29.35 N 250 | ATOM 250 N GLU A 259 23.169 97.479 81.560 1.00 28.90 N 251 | ATOM 251 CA GLU A 259 23.504 98.483 80.553 1.00 26.46 C 252 | ATOM 252 C GLU A 259 22.559 98.333 79.356 1.00 24.03 C 253 | ATOM 253 O GLU A 259 22.137 99.330 78.765 1.00 23.08 O 254 | ATOM 254 CB GLU A 259 24.972 98.349 80.110 1.00 26.29 C 255 | ATOM 255 CG GLU A 259 25.163 97.714 78.722 1.00 32.25 C 256 | ATOM 256 CD GLU A 259 26.604 97.331 78.413 1.00 32.55 C 257 | ATOM 257 OE1 GLU A 259 27.496 98.208 78.493 1.00 32.66 O 258 | ATOM 258 OE2 GLU A 259 26.835 96.148 78.071 1.00 30.99 O 259 | ATOM 259 N VAL A 260 22.215 97.095 79.003 1.00 20.99 N 260 | ATOM 260 CA VAL A 260 21.312 96.885 77.874 1.00 20.00 C 261 | ATOM 261 C VAL A 260 19.900 97.347 78.234 1.00 19.97 C 262 | ATOM 262 O VAL A 260 19.222 97.977 77.416 1.00 19.12 O 263 | ATOM 263 CB VAL A 260 21.302 95.410 77.355 1.00 19.07 C 264 | ATOM 264 CG1 VAL A 260 22.686 94.789 77.477 1.00 17.49 C 265 | ATOM 265 CG2 VAL A 260 20.229 94.586 78.032 1.00 16.96 C 266 | ATOM 266 N LEU A 261 19.474 97.066 79.467 1.00 19.35 N 267 | ATOM 267 CA LEU A 261 18.148 97.472 79.932 1.00 20.06 C 268 | ATOM 268 C LEU A 261 18.100 98.989 80.007 1.00 20.50 C 269 | ATOM 269 O LEU A 261 17.112 99.616 79.624 1.00 22.46 O 270 | ATOM 270 CB LEU A 261 17.852 96.889 81.314 1.00 18.97 C 271 | ATOM 271 CG LEU A 261 17.719 95.371 81.394 1.00 18.61 C 272 | ATOM 272 CD1 LEU A 261 17.403 94.962 82.816 1.00 18.95 C 273 | ATOM 273 CD2 LEU A 261 16.621 94.905 80.453 1.00 21.45 C 274 | TER 274 LEU A 261 275 | END 276 | -------------------------------------------------------------------------------- /datasets/custom/pairs.csv: -------------------------------------------------------------------------------- 1 | 1a9t/1a9t_clean , 1a9t/1a9t_site_1 , 1a05B/1a05B , 1a05B/pockets/pocket0_atm 2 | 1a9t/1a9t_clean , 1a9t/1a9t_site_1 , 1a05B/1a05B , 1a05B/pockets/pocket1_atm 3 | 1a9t/1a9t_clean , 1a9t/1a9t_site_1 , 1a05B/1a05B , 1a05B/pockets/pocket2_atm 4 | 1a9t/1a9t_clean , 1a9t/1a9t_site_1 , 1a05B/1a05B , 1a05B/pockets/pocket3_atm 5 | 1a9t/1a9t_clean , 1a9t/1a9t_site_1 , 1a05B/1a05B , 1a05B/pockets/pocket4_atm 6 | 1a9t/1a9t_clean , 1a9t/1a9t_site_1 , 1a05B/1a05B , 1a05B/pockets/pocket5_atm 7 | 1a9t/1a9t_clean , 1a9t/1a9t_site_1 , 1a05B/1a05B , 1a05B/pockets/pocket6_atm -------------------------------------------------------------------------------- /datasets_downloader.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | if [ -z "$STRUCTURE_DATA_DIR" ]; then 4 | echo "STRUCTURE_DATA_DIR not set" 5 | exit 6 | fi 7 | if ! type wget > /dev/null; then 8 | echo "wget not installed" 9 | exit 10 | fi 11 | if ! type unzip > /dev/null; then 12 | echo "unzip not installed" 13 | exit 14 | fi 15 | 16 | cd $STRUCTURE_DATA_DIR 17 | 18 | # TOUGH-M1 dataset 19 | mkdir TOUGH-M1 20 | wget https://zenodo.org/record/3687317/files/dt_tough.zip?download=1 -O dt_tough.zip && unzip dt_tough.zip 21 | rm dt_tough.zip 22 | wget https://dataverse.harvard.edu/api/access/datafile/:persistentId?persistentId=doi:10.7910/DVN/L7H7JJ/UFO5CB -O official_tough_m1.tar.gz && tar -xvzf official_tough_m1.tar.gz -C TOUGH-M1 23 | rm official_tough_m1.tar.gz 24 | wget https://osf.io/tmgne/download -O TOUGH-M1/TOUGH-M1_positive.list 25 | wget https://osf.io/6dn5s/download -O TOUGH-M1/TOUGH-M1_pocket.list 26 | wget https://osf.io/3aypv/download -O TOUGH-M1/TOUGH-M1_negative.list 27 | 28 | # Vertex dataset 29 | mkdir Vertex 30 | wget https://zenodo.org/record/3687317/files/dt_vertex.zip?download=1 -O dt_vertex.zip && unzip dt_vertex.zip 31 | rm dt_vertex.zip 32 | wget http://pubs.acs.org/doi/suppl/10.1021/acs.jcim.6b00118/suppl_file/ci6b00118_si_002.zip && unzip ci6b00118_si_002.zip -d Vertex 33 | rm ci6b00118_si_002.zip 34 | 35 | # ProSPECCTs 36 | mkdir prospeccts 37 | for FILE in kahraman_structures.tar.gz identical_structures.tar.gz identical_structures_similar_ligands.tar.gz barelier_structures.tar.gz decoy.tar.gz review_structures.tar.gz NMR_structures.tar.gz 38 | do 39 | wget www.ewit.ccb.tu-dortmund.de/ag-koch/prospeccts/ --post-data "file=${FILE}&licenseagreement=accept&action=Download" -O $FILE && tar -xvzf $FILE -C prospeccts 40 | rm $FILE 41 | done 42 | wget https://zenodo.org/record/3687317/files/dt_prospeccts.zip?download=1 -O dt_prospeccts.zip && unzip dt_prospeccts.zip 43 | rm dt_prospeccts.zip 44 | 45 | 46 | -------------------------------------------------------------------------------- /deeplytough/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .toughm1 import ToughM1 2 | from .vertex import Vertex 3 | from .prospeccts import Prospeccts 4 | from .custom import Custom -------------------------------------------------------------------------------- /deeplytough/datasets/custom.py: -------------------------------------------------------------------------------- 1 | import os 2 | from misc.utils import htmd_featurizer 3 | 4 | 5 | class Custom: 6 | """ An arbitrary user dataset 7 | 8 | Assumes that the dataset is placed in `$STRUCTURE_DATA_DIR/relpath`, containing 9 | bunch of protein and pocket structures, which are referred in `pairs.csv`. This 10 | file contains a quadruplet on each line indicating matches to evaluate: 11 | 12 | relative_path_to_pdbA, relative_path_to_pocketA, relative_path_to_pdbB, relative_path_to_pocketB 13 | """ 14 | 15 | def __init__(self, relpath='custom'): 16 | self.relpath = relpath 17 | 18 | def preprocess_once(self): 19 | """ Computes featurization """ 20 | htmd_featurizer(self.get_structures(), skip_existing=True) 21 | 22 | def get_structures(self): 23 | """ Get list of PDB structures with metainfo """ 24 | 25 | root = os.path.join(os.environ.get('STRUCTURE_DATA_DIR'), self.relpath) 26 | npz_root = os.path.join(os.environ.get('STRUCTURE_DATA_DIR'), 'processed/htmd', self.relpath) 27 | 28 | custom_pdbs = set() 29 | with open(os.path.join(root, 'pairs.csv')) as f: 30 | for i, line in enumerate(f.readlines()): 31 | tokens = line.split(',') 32 | assert len(tokens)==4, 'pairs.csv is expected to have four columns.' 33 | custom_pdbs.add((tokens[0].strip(), tokens[1].strip())) 34 | custom_pdbs.add((tokens[2].strip(), tokens[3].strip())) 35 | 36 | entries = [] 37 | for pdb, pocket in custom_pdbs: 38 | pdb1 = pdb if os.path.splitext(pdb)[1] != '' else pdb + '.pdb' 39 | pocket1 = pocket if os.path.splitext(pocket)[1] != '' else pocket + '.pdb' 40 | entries.append({'protein': os.path.join(root, pdb1), 41 | 'pocket': os.path.join(root, pocket1), 42 | 'protein_htmd': os.path.join(npz_root, pdb1.replace('.pdb', '.npz')), 43 | 'key': pdb + ',' + pocket}) 44 | 45 | return entries 46 | 47 | def evaluate_matching(self, descriptor_entries, matcher): 48 | """ 49 | Compute pocket matching scores on the custom dataset. 50 | :param descriptor_entries: List of entries 51 | :param matcher: PocketMatcher instance 52 | :return: 53 | """ 54 | 55 | target_dict = {d['key']: d for d in descriptor_entries} 56 | root = os.path.join(os.environ.get('STRUCTURE_DATA_DIR'), self.relpath) 57 | 58 | pairs = [] 59 | with open(os.path.join(root, 'pairs.csv')) as f: 60 | for i, line in enumerate(f.readlines()): 61 | tokens = line.split(',') 62 | assert len(tokens)==4, 'pairs.csv is expected to have four columns.' 63 | key1 = tokens[0].strip() + ',' + tokens[1].strip() 64 | key2 = tokens[2].strip() + ',' + tokens[3].strip() 65 | pairs.append((target_dict[key1], target_dict[key2])) 66 | 67 | scores = matcher.pair_match(pairs) 68 | return {'pairs': pairs, 'scores': scores} 69 | -------------------------------------------------------------------------------- /deeplytough/datasets/prospeccts.py: -------------------------------------------------------------------------------- 1 | import os 2 | import glob 3 | import pickle 4 | import requests 5 | import string 6 | import concurrent.futures 7 | import numpy as np 8 | from sklearn.metrics import precision_recall_curve, roc_curve, roc_auc_score 9 | from misc.utils import htmd_featurizer, voc_ap, RcsbPdbClusters 10 | from misc.ligand_extract import PocketFromLigandDetector 11 | 12 | import logging 13 | 14 | logger = logging.getLogger(__name__) 15 | 16 | 17 | class Prospeccts: 18 | """ ProSPECCTs dataset by Ehrt et al (http://www.ccb.tu-dortmund.de/ag-koch/prospeccts/) """ 19 | 20 | dbnames = ['P1', 'P1.2', 'P2', 'P3', 'P4', 'P5', 'P5.2', 'P6', 'P6.2', 'P7'] 21 | 22 | def __init__(self, dbname): 23 | self.dbname = dbname 24 | 25 | @staticmethod 26 | def _get_pdb_code_from_raw_pdb(pdbpath): 27 | search_string = os.path.basename(pdbpath)[:2] 28 | logger.info(f'searching for pdb id using string: {search_string}') 29 | with open(pdbpath, 'r') as f: 30 | g = f.readlines() 31 | pdb_code = None 32 | while pdb_code is None and len(g): 33 | line = g.pop(0) 34 | for s in line.split(): 35 | if search_string in s: 36 | maybe_code = s[:4] 37 | # check this is a real NMR pdb code 38 | try: 39 | logger.info(f"checking whether {maybe_code} is a real NMR entry in the PDB") 40 | r = requests.get(f"https://www.ebi.ac.uk/pdbe/api/pdb/entry/experiment/{maybe_code}") 41 | exp = r.json()[maybe_code][0]['experimental_method'] 42 | except Exception as e: 43 | continue 44 | # if pdb is real, and the experimental method is NMR. Eureka! 45 | if "NMR" in exp: 46 | pdb_code = maybe_code 47 | return pdb_code 48 | 49 | @staticmethod 50 | def _extract_pocket_and_get_uniprot(pdbpath): 51 | fname = os.path.basename(pdbpath).split('.')[0] 52 | if '_' in fname: 53 | return None, None 54 | 55 | # 1) Extract the pocket 56 | detector = PocketFromLigandDetector(include_het_resname=False, save_clean_structure=True, 57 | keep_other_hets=False, min_lig_atoms=1, allowed_lig_names=['LIG']) 58 | detector.run_one(pdbpath, os.path.dirname(pdbpath)) 59 | 60 | # 2) Attempt to map to Uniprots (fails from time to time, return 'None' in that case) 61 | pdb_code = fname[:4].lower() 62 | query_chain_id = fname[4].upper() if len(fname) > 4 else '' 63 | result = set() 64 | 65 | # 2b) In the case of NMR structures, Prospeccts has incomplete PDB IDs (e.g. 'cz00A' is really '1cz2 00 A') 66 | # Therefore for this dataset, try to get the full PDB ID from the raw PDB text 67 | if "NMR_structures" in pdbpath: 68 | pdb_code = Prospeccts._get_pdb_code_from_raw_pdb(pdbpath) 69 | if not pdb_code: 70 | pdb_code = 'XXXX' 71 | 72 | try: 73 | r = requests.get(f'http://www.ebi.ac.uk/pdbe/api/mappings/uniprot/{pdb_code}') 74 | fam = r.json()[pdb_code]['UniProt'] 75 | except Exception as e: 76 | # this logically fails for artificial proteins not in PDB, such as in decoys (P3, P4), but that's fine. 77 | logger.warning(f'PDB not found {e} {pdb_code} {query_chain_id}') 78 | return fname, 'None' 79 | for fam_id in fam.keys(): 80 | for chain in fam[fam_id]['mappings']: 81 | if not query_chain_id: 82 | result.add(fam_id) 83 | elif chain['chain_id'] == query_chain_id: 84 | if len(result) > 0 and fam_id != next(iter(result)): 85 | logger.warning(f'Duplicate chain {fam_id} {result}') 86 | result.add(fam_id) 87 | if not result: 88 | logger.warning(f'Chain not found {pdb_code} chain {query_chain_id}') 89 | return fname, result 90 | 91 | def preprocess_once(self): 92 | if self.dbname == 'P1': # batch downloading and mapping together and do it just once, e.g. with P1 93 | logger.info('Preprocessing: extracting pockets and obtaining uniprots, this will take time.') 94 | all_pdbs = glob.glob(os.environ['STRUCTURE_DATA_DIR'] + '/prospeccts/**/*.pdb', recursive=True) 95 | all_pdbs = [pdb for pdb in all_pdbs if (pdb.count('_site') + pdb.count('_lig') + pdb.count('_clean')) == 0] 96 | 97 | code5_to_seqclusts = {} 98 | clusterer = RcsbPdbClusters(identity=30) 99 | 100 | code5_to_uniprot = {} 101 | with concurrent.futures.ProcessPoolExecutor() as executor: 102 | for code, uniprot in executor.map(Prospeccts._extract_pocket_and_get_uniprot, all_pdbs): 103 | if code: 104 | code5_to_uniprot[code] = uniprot 105 | seqclusts = set([clusterer.get_seqclust(code[:4], c) for c in string.ascii_uppercase]) 106 | code5_to_seqclusts[code] = seqclusts 107 | 108 | pickle.dump({ 109 | 'code5_to_uniprot': code5_to_uniprot, 110 | 'code5_to_seqclusts': code5_to_seqclusts 111 | }, 112 | open(os.path.join(os.environ['STRUCTURE_DATA_DIR'], 'prospeccts', 'pdbcode_mappings.pickle'), 'wb') 113 | ) 114 | 115 | htmd_featurizer(self.get_structures(extra_mappings=False), skip_existing=True) 116 | 117 | def _prospeccts_paths(self): 118 | if self.dbname == 'P1': 119 | dir1, dir2, listfn = 'identical_structures', 'identical_structures', 'identical_structures.csv' 120 | elif self.dbname == 'P1.2': 121 | dir1, dir2, listfn = 'identical_structures_similar_ligands', 'identical_structures_similar_ligands', 'identical_structures_similar_ligands.csv' 122 | elif self.dbname == 'P2': 123 | dir1, dir2, listfn = 'NMR_structures', 'NMR_structures', 'NMR_structures.csv' 124 | elif self.dbname == 'P3': 125 | dir1, dir2, listfn = 'decoy', 'decoy_structures', 'decoy_structures5.csv' 126 | elif self.dbname == 'P4': 127 | dir1, dir2, listfn = 'decoy', 'decoy_shape_structures', 'decoy_structures5.csv' 128 | elif self.dbname == 'P5': 129 | dir1, dir2, listfn = 'kahraman_structures', 'kahraman_structures', 'kahraman_structures80.csv' 130 | elif self.dbname == 'P5.2': 131 | dir1, dir2, listfn = 'kahraman_structures', 'kahraman_structures', 'kahraman_structures.csv' 132 | elif self.dbname == 'P6': 133 | dir1, dir2, listfn = 'barelier_structures', 'barelier_structures', 'barelier_structures.csv' 134 | elif self.dbname == 'P6.2': 135 | dir1, dir2, listfn = 'barelier_structures', 'barelier_structures_cofactors', 'barelier_structures.csv' 136 | elif self.dbname == 'P7': 137 | dir1, dir2, listfn = 'review_structures', 'review_structures', 'review_structures.csv' 138 | else: 139 | raise NotImplementedError 140 | return dir1, dir2, listfn 141 | 142 | def get_structures(self, extra_mappings=True): 143 | """ Get list of PDB structures with metainfo """ 144 | dir1, dir2, listfn = self._prospeccts_paths() 145 | root = os.path.join(os.environ.get('STRUCTURE_DATA_DIR'), 'prospeccts', dir1) 146 | npz_root = os.path.join(os.environ.get('STRUCTURE_DATA_DIR'), 'processed/htmd/prospeccts', dir1) 147 | 148 | db_pdbs = set() 149 | with open(os.path.join(root, listfn)) as f: 150 | for line in f.readlines(): 151 | tokens = line.split(',') 152 | db_pdbs.add(tokens[0]) 153 | db_pdbs.add(tokens[1]) 154 | 155 | code5_to_seqclusts, code5_to_uniprot = None, None 156 | if extra_mappings: 157 | mapping = pickle.load(open(os.path.join(os.environ['STRUCTURE_DATA_DIR'], 'prospeccts', 'pdbcode_mappings.pickle'), 'rb')) 158 | code5_to_seqclusts = mapping['code5_to_seqclusts'] 159 | code5_to_uniprot = mapping['code5_to_uniprot'] 160 | 161 | entries = [] 162 | for pdb in db_pdbs: 163 | entries.append({ 164 | 'protein': root + f'/{dir2}/{pdb}_clean.pdb', 165 | 'pocket': root + f'/{dir2}/{pdb}_site_1.pdb', 166 | 'ligand': root + f'/{dir2}/{pdb}_lig_1.pdb', 167 | 'protein_htmd': npz_root + f'/{dir2}/{pdb}_clean.npz', 168 | 'code5': pdb, 169 | 'code': pdb[:4], 170 | 'uniprot': code5_to_uniprot[pdb] if code5_to_uniprot else 'None', 171 | 'seqclusts': code5_to_seqclusts[pdb] if code5_to_seqclusts else 'None', 172 | }) 173 | return entries 174 | 175 | def evaluate_matching(self, descriptor_entries, matcher): 176 | """ 177 | Evaluate pocket matching on one Prospeccts dataset 178 | The evaluation metrics is AUC 179 | 180 | :param descriptor_entries: List of entries 181 | :param matcher: PocketMatcher instance 182 | """ 183 | 184 | target_dict = {d['code5']: d for d in descriptor_entries} 185 | pairs = [] 186 | positives = [] 187 | 188 | dir1, dir2, listfn = self._prospeccts_paths() 189 | root = os.path.join(os.environ.get('STRUCTURE_DATA_DIR'), 'prospeccts', dir1) 190 | 191 | with open(os.path.join(root, listfn)) as f: 192 | for line in f.readlines(): 193 | tokens = line.split(',') 194 | id1, id2, cls = tokens[0], tokens[1], tokens[2].strip() 195 | if id1 in target_dict and id2 in target_dict: 196 | pairs.append((target_dict[id1], target_dict[id2])) 197 | positives.append(cls == 'active') 198 | else: 199 | logger.warning(f'Detection entry missing for {id1},{id2}') 200 | 201 | scores = matcher.pair_match(pairs) 202 | 203 | goodidx = np.flatnonzero(np.isfinite(np.array(scores))) 204 | if len(goodidx) != len(scores): 205 | logger.warning(f'Ignoring {len(scores) - len(goodidx)} pairs') 206 | positives_clean, scores_clean = np.array(positives)[goodidx], np.array(scores)[goodidx] 207 | else: 208 | positives_clean, scores_clean = positives, scores 209 | 210 | # Calculate metrics 211 | fpr, tpr, roc_thresholds = roc_curve(positives_clean, scores_clean) 212 | auc = roc_auc_score(positives_clean, scores_clean) 213 | precision, recall, thresholds = precision_recall_curve(positives_clean, scores_clean) 214 | ap = voc_ap(recall[::-1], precision[::-1]) 215 | 216 | results = { 217 | 'ap': ap, 218 | 'pr': precision, 219 | 're': recall, 220 | 'th': thresholds, 221 | 'auc': auc, 222 | 'fpr': fpr, 223 | 'tpr': tpr, 224 | 'th_roc': roc_thresholds, 225 | 'pairs': pairs, 226 | 'scores': scores, 227 | 'pos_mask': positives 228 | } 229 | return results 230 | -------------------------------------------------------------------------------- /deeplytough/datasets/toughm1.py: -------------------------------------------------------------------------------- 1 | import concurrent.futures 2 | import logging 3 | import os 4 | import pickle 5 | import subprocess 6 | import tempfile 7 | import urllib.request 8 | from collections import defaultdict 9 | 10 | import Bio.PDB as PDB 11 | import numpy as np 12 | import requests 13 | from sklearn.metrics import precision_recall_curve, roc_curve, roc_auc_score 14 | from sklearn.model_selection import KFold, GroupShuffleSplit 15 | 16 | from misc.utils import htmd_featurizer, voc_ap, RcsbPdbClusters, pdb_check_obsolete 17 | 18 | logger = logging.getLogger(__name__) 19 | 20 | 21 | class ToughM1: 22 | """ 23 | TOUGH-M1 dataset by Govindaraj and Brylinski 24 | https://osf.io/6ngbs/wiki/home/ 25 | """ 26 | def __init__(self): 27 | self.tough_data_dir = os.path.join(os.environ.get('STRUCTURE_DATA_DIR'), 'TOUGH-M1') 28 | 29 | @staticmethod 30 | def _preprocess_worker(entry): 31 | 32 | def struct_to_centroid(structure): 33 | return np.mean(np.array([atom.get_coord() for atom in structure.get_atoms()]), axis=0) 34 | 35 | def pdb_chain_to_uniprot(pdb_code, query_chain_id): 36 | """ 37 | Get pdb chain mapping to uniprot accession using the pdbe api 38 | """ 39 | result = 'None' 40 | r = requests.get(f'http://www.ebi.ac.uk/pdbe/api/mappings/uniprot/{pdb_code}') 41 | fam = r.json()[pdb_code]['UniProt'] 42 | 43 | for fam_id in fam.keys(): 44 | for chain in fam[fam_id]['mappings']: 45 | if chain['chain_id'] == query_chain_id: 46 | if result != 'None' and fam_id != result: 47 | logger.warning(f'DUPLICATE {fam_id} {result}') 48 | result = fam_id 49 | if result == 'None': 50 | logger.warning(f'No uniprot accession found for {pdb_code}: {query_chain_id}') 51 | return result 52 | 53 | # 1) We won't be using provided `.fpocket` files because they don't contain the actual atoms, just 54 | # Voronoii centers. So we run fpocket2 ourselves, it seems to be equivalent to published results. 55 | try: 56 | command = ['fpocket2', '-f', entry['protein']] 57 | subprocess.run(command, check=True) 58 | except subprocess.CalledProcessError as e: 59 | logger.warning('Calling fpocket2 failed, please make sure it is on the PATH') 60 | raise e 61 | 62 | # 2) Some chains have been renamed since TOUGH-M1 dataset was released so one cannot directly retrieve 63 | # uniprot accessions corresponding to a given chain. So we first locate corresponding chains in the 64 | # original pdb files, get their ids and translate those to uniprot using the SIFTS webservices. 65 | parser = PDB.PDBParser(PERMISSIVE=True, QUIET=True) 66 | tough_str = parser.get_structure('t', entry['protein']) 67 | tough_c = struct_to_centroid(tough_str) 68 | 69 | # 2a) Some structures are now obsolete since TOUGH-M1 was published, for these, get superceding entry 70 | pdb_code = entry['code'].lower() 71 | superceded = pdb_check_obsolete(entry['code']) 72 | if superceded: 73 | pdb_code = superceded 74 | # 2b) try to download pdb from RSCB mirror site 75 | with tempfile.TemporaryDirectory() as tmpdir: 76 | fname = tmpdir + '/prot.pdb' 77 | try: 78 | urllib.request.urlretrieve(f"http://files.rcsb.org/download/{pdb_code}.pdb", fname) 79 | except: 80 | logger.info(f'Could not download PDB: {pdb_code}') 81 | return [entry['code5'], 'None', 'None'] 82 | orig_str = parser.get_structure('o', fname) 83 | 84 | # TOUGH authors haven't re-centered the chains so we can roughly find them just by centroids :) 85 | dists = [] 86 | ids = [] 87 | for model in orig_str: 88 | for chain in model: 89 | if len(chain) < 20: # ignore chains with fewer than 20 residues 90 | continue 91 | dists.append(np.linalg.norm(struct_to_centroid(chain) - tough_c)) 92 | ids.append(chain.id) 93 | chain_id = ids[np.argmin(dists)] 94 | if np.min(dists) > 5: 95 | logger.warning(f"Suspiciously large distance when trying to map tough structure to downloaded one" 96 | f"DIST {dists} {ids} {entry['code']} {pdb_code}") 97 | return [entry['code5'], 'None', 'None'] 98 | 99 | uniprot = pdb_chain_to_uniprot(pdb_code.lower(), chain_id) 100 | return [entry['code5'], uniprot, pdb_code.lower() + chain_id] 101 | 102 | def preprocess_once(self): 103 | """ 104 | Re-run fpocket2 and try to obtain Uniprot Accession for each PDB entry. 105 | Needs to be called just once in a lifetime 106 | """ 107 | code5_to_uniprot = {} 108 | code5_to_seqclust = {} 109 | uniprot_to_code5 = defaultdict(list) 110 | logger.info('Preprocessing: obtaining uniprot accessions, this will take time.') 111 | entries = self.get_structures(extra_mappings=False) 112 | clusterer = RcsbPdbClusters(identity=30) 113 | 114 | with concurrent.futures.ProcessPoolExecutor() as executor: 115 | for code5, uniprot, code5new in executor.map(ToughM1._preprocess_worker, entries): 116 | code5_to_uniprot[code5] = uniprot 117 | uniprot_to_code5[uniprot] = uniprot_to_code5[uniprot] + [code5] 118 | code5_to_seqclust[code5] = clusterer.get_seqclust(code5new[:4], code5new[4:5]) 119 | 120 | unclustered = [k for k,v in code5_to_seqclust.items() if v == 'None'] 121 | if len(unclustered) > 0: 122 | logger.info(f"Unable to get clusters for {len(unclustered)} entries: {unclustered}") 123 | 124 | # write uniprot mapping to file 125 | pickle.dump({ 126 | 'code5_to_uniprot': code5_to_uniprot, 127 | 'uniprot_to_code5': uniprot_to_code5, 128 | 'code5_to_seqclust': code5_to_seqclust 129 | }, 130 | open(os.path.join(self.tough_data_dir, 'pdbcode_mappings.pickle'), 'wb') 131 | ) 132 | 133 | # prepare coordinates and feature channels for descriptor calculation 134 | htmd_featurizer(self.get_structures(), skip_existing=True) 135 | 136 | def get_structures(self, extra_mappings=True): 137 | """ 138 | Get list of PDB structures with metainfo 139 | """ 140 | root = os.path.join(self.tough_data_dir, 'TOUGH-M1_dataset') 141 | npz_root = os.path.join(os.environ.get('STRUCTURE_DATA_DIR'), 'processed/htmd/TOUGH-M1/TOUGH-M1_dataset') 142 | fname_uniprot_mapping = os.path.join(self.tough_data_dir, 'pdbcode_mappings.pickle') 143 | 144 | # try to load translation pickle 145 | code5_to_uniprot = None 146 | code5_to_seqclust = None 147 | if extra_mappings: 148 | mapping = pickle.load(open(fname_uniprot_mapping, 'rb')) 149 | code5_to_uniprot = mapping['code5_to_uniprot'] 150 | code5_to_seqclust = mapping['code5_to_seqclust'] 151 | 152 | entries = [] 153 | with open(os.path.join(self.tough_data_dir, 'TOUGH-M1_pocket.list')) as f: 154 | for line in f.readlines(): 155 | code5, pocketnr, _ = line.split() 156 | entries.append({ 157 | 'protein': root + f'/{code5}/{code5}.pdb', 158 | 'pocket': root + f'/{code5}/{code5}_out/pockets/pocket{int(pocketnr)-1}_vert.pqr', 159 | 'ligand': root + f'/{code5}/{code5}00.pdb', 160 | 'protein_htmd': npz_root + f'/{code5}/{code5}.npz', 161 | 'code5': code5, 162 | 'code': code5[:4], 163 | 'uniprot': code5_to_uniprot[code5] if code5_to_uniprot else 'None', 164 | 'seqclust': code5_to_seqclust[code5] if code5_to_seqclust else 'None' 165 | }) 166 | return entries 167 | 168 | def get_structures_splits(self, fold_nr, strategy='seqclust', n_folds=5, seed=0): 169 | pdb_entries = self.get_structures() 170 | 171 | if strategy == 'pdb_folds': 172 | splitter = KFold(n_splits=n_folds, shuffle=True, random_state=seed) 173 | folds = list(splitter.split(pdb_entries)) 174 | train_idx, test_idx = folds[fold_nr] 175 | return [pdb_entries[i] for i in train_idx], [pdb_entries[i] for i in test_idx] 176 | 177 | elif strategy == 'uniprot_folds': 178 | splitter = GroupShuffleSplit(n_splits=n_folds, test_size=1.0/n_folds, random_state=seed) 179 | pdb_entries = list(filter(lambda entry: entry['uniprot'] != 'None', pdb_entries)) 180 | folds = list(splitter.split(pdb_entries, groups=[e['uniprot'] for e in pdb_entries])) 181 | train_idx, test_idx = folds[fold_nr] 182 | return [pdb_entries[i] for i in train_idx], [pdb_entries[i] for i in test_idx] 183 | 184 | elif strategy == 'seqclust': 185 | splitter = GroupShuffleSplit(n_splits=n_folds, test_size=1.0/n_folds, random_state=seed) 186 | pdb_entries = list(filter(lambda entry: entry['seqclust'] != 'None', pdb_entries)) 187 | folds = list(splitter.split(pdb_entries, groups=[e['seqclust'] for e in pdb_entries])) 188 | train_idx, test_idx = folds[fold_nr] 189 | return [pdb_entries[i] for i in train_idx], [pdb_entries[i] for i in test_idx] 190 | 191 | elif strategy == 'none': 192 | return pdb_entries, pdb_entries 193 | else: 194 | raise NotImplementedError 195 | 196 | def evaluate_matching(self, descriptor_entries, matcher): 197 | """ 198 | Evaluate pocket matching on TOUGH-M1 dataset. The evaluation metrics is AUC. 199 | 200 | :param descriptor_entries: List of entries 201 | :param matcher: PocketMatcher instance 202 | """ 203 | 204 | target_dict = {d['code5']: d for d in descriptor_entries} 205 | pairs = [] 206 | positives = [] 207 | 208 | def parse_file_list(f): 209 | f_pairs = [] 210 | for line in f.readlines(): 211 | id1, id2 = line.split()[:2] 212 | if id1 in target_dict and id2 in target_dict: 213 | f_pairs.append((target_dict[id1], target_dict[id2])) 214 | return f_pairs 215 | 216 | with open(os.path.join(self.tough_data_dir, 'TOUGH-M1_positive.list')) as f: 217 | pos_pairs = parse_file_list(f) 218 | pairs.extend(pos_pairs) 219 | positives.extend([True] * len(pos_pairs)) 220 | 221 | with open(os.path.join(self.tough_data_dir, 'TOUGH-M1_negative.list')) as f: 222 | neg_pairs = parse_file_list(f) 223 | pairs.extend(neg_pairs) 224 | positives.extend([False] * len(neg_pairs)) 225 | 226 | scores = matcher.pair_match(pairs) 227 | 228 | goodidx = np.flatnonzero(np.isfinite(np.array(scores))) 229 | if len(goodidx) != len(scores): 230 | logger.warning(f'Ignoring {len(scores) - len(goodidx)} pairs') 231 | positives_clean, scores_clean = np.array(positives)[goodidx], np.array(scores)[goodidx] 232 | else: 233 | positives_clean, scores_clean = positives, scores 234 | 235 | # Calculate metrics 236 | fpr, tpr, roc_thresholds = roc_curve(positives_clean, scores_clean) 237 | auc = roc_auc_score(positives_clean, scores_clean) 238 | precision, recall, thresholds = precision_recall_curve(positives_clean, scores_clean) 239 | ap = voc_ap(recall[::-1], precision[::-1]) 240 | 241 | results = { 242 | 'ap': ap, 243 | 'pr': precision, 244 | 're': recall, 245 | 'th': thresholds, 246 | 'auc': auc, 247 | 'fpr': fpr, 248 | 'tpr': tpr, 249 | 'th_roc': roc_thresholds, 250 | 'pairs': pairs, 251 | 'scores': scores, 252 | 'pos_mask': positives 253 | } 254 | return results 255 | -------------------------------------------------------------------------------- /deeplytough/datasets/vertex.py: -------------------------------------------------------------------------------- 1 | import concurrent.futures 2 | import logging 3 | import os 4 | import pickle 5 | import string 6 | import urllib.request 7 | from collections import defaultdict 8 | 9 | import numpy as np 10 | from sklearn.metrics import precision_recall_curve, roc_curve, roc_auc_score 11 | from tqdm.autonotebook import tqdm 12 | 13 | from misc.ligand_extract import PocketFromLigandDetector 14 | from misc.utils import htmd_featurizer, voc_ap, RcsbPdbClusters 15 | 16 | logger = logging.getLogger(__name__) 17 | 18 | 19 | class Vertex: 20 | """ 21 | Vertex dataset by Chen et al 22 | http://pubs.acs.org/doi/suppl/10.1021/acs.jcim.6b00118/suppl_file/ci6b00118_si_002.zip 23 | """ 24 | 25 | @staticmethod 26 | def _download_pdb_and_extract_pocket(entry): 27 | code = entry['code'] 28 | entry_dir = os.path.dirname(entry['protein']) 29 | os.makedirs(entry_dir, exist_ok=True) 30 | fname = f'{entry_dir}/{code}.pdb' 31 | try: 32 | urllib.request.urlretrieve(f'http://files.rcsb.org/download/{code.upper()}.pdb', fname) 33 | detector = PocketFromLigandDetector(include_het_resname=False, save_clean_structure=True, 34 | keep_other_hets=False, min_lig_atoms=3) 35 | detector.run_one(fname, entry_dir) 36 | except Exception as e: 37 | logger.warning(f'PROBLEM DOWNLOADING AND EXTRACTING {code}:') 38 | logger.exception(e) 39 | return code 40 | 41 | def preprocess_once(self): 42 | """ 43 | Download pdb files and extract pocket around ligands 44 | """ 45 | logger.info('Preprocessing: downloading data and extracting pockets, this will take time.') 46 | entries = self.get_structures(extra_mappings=False) 47 | 48 | code5_to_seqclusts = {} 49 | clusterer = RcsbPdbClusters(identity=30) 50 | for entry in entries: 51 | # entries are defined by site integers in the vertex set 52 | chains = string.ascii_uppercase # play it safe and take all possible chains for a protein 53 | seqclusts = set([clusterer.get_seqclust(entry['code'], c) for c in chains]) 54 | code5_to_seqclusts[entry['code5']] = seqclusts 55 | pickle.dump({'code5_to_seqclusts': code5_to_seqclusts}, 56 | open(os.path.join(os.environ['STRUCTURE_DATA_DIR'], 'Vertex' , 'pdbcode_mappings.pickle'), 'wb')) 57 | with concurrent.futures.ProcessPoolExecutor() as executor: 58 | for _ in executor.map(Vertex._download_pdb_and_extract_pocket, entries): 59 | pass 60 | 61 | htmd_featurizer(entries, skip_existing=True) 62 | 63 | def get_structures(self, extra_mappings=True): 64 | """ 65 | Get list of PDB structures with metainfo 66 | """ 67 | 68 | root = os.path.join(os.environ.get('STRUCTURE_DATA_DIR'), 'Vertex') 69 | npz_root = os.path.join(os.environ.get('STRUCTURE_DATA_DIR'), 'processed/htmd/Vertex') 70 | 71 | # Read in a set of (pdb_chain, uniprot, ligand_cc) tuples 72 | vertex_pdbs = set() 73 | with open(os.path.join(root, 'protein_pairs.tsv')) as f: 74 | for i, line in enumerate(f.readlines()): 75 | if i > 1: 76 | tokens = line.split('\t') 77 | vertex_pdbs.add((tokens[0].lower(), tokens[2], tokens[1])) 78 | vertex_pdbs.add((tokens[5].lower(), tokens[7], tokens[6])) 79 | 80 | code5_to_seqclusts = None 81 | if extra_mappings: 82 | mapping = pickle.load(open(os.path.join(os.environ['STRUCTURE_DATA_DIR'], 'Vertex', 'pdbcode_mappings.pickle'), 'rb')) 83 | code5_to_seqclusts = mapping['code5_to_seqclusts'] 84 | 85 | # Generate entries for the Vertex set 86 | entries = [] 87 | for n, (code5, uniprot, ligand_cc) in enumerate(vertex_pdbs): 88 | pdb_code = code5[:4] 89 | entries.append({ 90 | 'protein': root + f'/{pdb_code}/{pdb_code}_clean.pdb', 91 | 'pocket': root + f'/{pdb_code}/{pdb_code}_site_{int(code5[5])}.pdb', 92 | 'ligand': root + f'/{pdb_code}/{pdb_code}_lig_{int(code5[5])}.pdb', 93 | 'protein_htmd': npz_root + f'/{pdb_code}/{pdb_code}_clean.npz', 94 | 'code5': code5, 95 | 'code': code5[:4], 96 | 'lig_cc': ligand_cc, 97 | 'uniprot': uniprot, 98 | 'seqclusts': code5_to_seqclusts[code5] if code5_to_seqclusts else 'None' 99 | }) 100 | return entries 101 | 102 | @staticmethod 103 | def evaluate_matching(descriptor_entries, matcher): 104 | """ 105 | Evaluate pocket matching on Vertex dataset 106 | The evaluation metric is AUC 107 | 108 | :param descriptor_entries: List of entries 109 | :param matcher: PocketMatcher instance 110 | """ 111 | 112 | target_dict = {d['code5']: i for i, d in enumerate(descriptor_entries)} 113 | prot_pairs = defaultdict(list) 114 | prot_positives = {} 115 | 116 | # Assemble dictionary pair-of-uniprots -> list_of_pairs_of_indices_into_descriptor_entries 117 | with open(os.path.join(os.environ.get('STRUCTURE_DATA_DIR'), 'Vertex', 'protein_pairs.tsv')) as f: 118 | for i, line in enumerate(f.readlines()): 119 | if i > 1: 120 | tokens = line.split('\t') 121 | pdb1, id1, pdb2, id2, cls = tokens[0].lower(), tokens[2], tokens[5].lower(), tokens[7], int(tokens[-1]) 122 | if pdb1 in target_dict and pdb2 in target_dict: 123 | key = (id1, id2) if id1 < id2 else (id2, id1) 124 | prot_pairs[key] = prot_pairs[key] + [(target_dict[pdb1], target_dict[pdb2])] 125 | if key in prot_positives: 126 | assert prot_positives[key] == (cls == 1) 127 | else: 128 | prot_positives[key] = (cls == 1) 129 | 130 | positives = [] 131 | scores = [] 132 | keys_out = [] 133 | 134 | # Evaluate each protein pairs (taking max over all pdb pocket scores, see Fig 1B in Chen et al) 135 | for key, pdb_pairs in tqdm(prot_pairs.items()): 136 | unique_idxs = list(set([p[0] for p in pdb_pairs] + [p[1] for p in pdb_pairs])) 137 | 138 | complete_scores = matcher.complete_match([descriptor_entries[i] for i in unique_idxs]) 139 | 140 | sel_scores = [] 141 | for pair in pdb_pairs: 142 | i, j = unique_idxs.index(pair[0]), unique_idxs.index(pair[1]) 143 | if np.isfinite(complete_scores[i, j]): 144 | sel_scores.append(complete_scores[i, j]) 145 | 146 | if len(sel_scores) > 0: 147 | positives.append(prot_positives[key]) 148 | keys_out.append(key) 149 | scores.append(max(sel_scores)) 150 | else: 151 | logger.warning(f'Skipping a pair, could not be evaluated') 152 | 153 | # Calculate metrics 154 | fpr, tpr, roc_thresholds = roc_curve(positives, scores) 155 | auc = roc_auc_score(positives, scores) 156 | precision, recall, thresholds = precision_recall_curve(positives, scores) 157 | ap = voc_ap(recall[::-1], precision[::-1]) 158 | 159 | results = { 160 | 'ap': ap, 161 | 'pr': precision, 162 | 're': recall, 163 | 'th': thresholds, 164 | 'auc': auc, 165 | 'fpr': fpr, 166 | 'tpr': tpr, 167 | 'th_roc': roc_thresholds, 168 | 'pairs': keys_out, 169 | 'scores': scores, 170 | 'pos_mask': positives 171 | } 172 | return results 173 | -------------------------------------------------------------------------------- /deeplytough/engine/datasets.py: -------------------------------------------------------------------------------- 1 | import ctypes 2 | import logging 3 | import math 4 | import os 5 | import random 6 | 7 | import htmd.home 8 | import numpy as np 9 | import transforms3d 10 | from torch.utils.data import Dataset 11 | 12 | from datasets import ToughM1, Vertex, Prospeccts 13 | from misc.utils import center_from_pdb_file 14 | 15 | logger = logging.getLogger(__name__) 16 | 17 | occupancylib = ctypes.cdll.LoadLibrary(os.path.join(htmd.home.home(libDir=True), 'occupancy_ext.so')) 18 | 19 | 20 | class VoxelizedDataset(Dataset): 21 | """ Abstract base class for dataset of voxelized proteins """ 22 | 23 | # 'hydrophobic', 'aromatic', 'hbond_acceptor', 'hbond_donor' 24 | # 'positive_ionizable', 'negative_ionizable', 'metal', 'occupancies' 25 | num_channels = 8 26 | 27 | def __init__(self, pdb_list, box_size, augm_rot=False, augm_mirror_prob=0.0): 28 | """ 29 | :param pdb_list: List of pdb files (as dicts, with key 'protein_htmd'). 30 | :param box_size: Patch size 31 | :param augm_rot: Rotation augmentation 32 | :param augm_mirror_prob: Mirroring probability for each axis 33 | """ 34 | 35 | self.pdb_list = [] 36 | self.pdb_idx = [] 37 | 38 | for i, pdb_entry in enumerate(pdb_list): 39 | if not os.path.exists(pdb_entry['protein_htmd']): 40 | logging.warning(f"HTMD featurization file not found: {pdb_entry['protein_htmd']}," 41 | f"corresponding pdb likely could not be parsed") 42 | continue 43 | self.pdb_list.append(pdb_entry) 44 | self.pdb_idx.append(i) 45 | 46 | assert len(self.pdb_list) > 0, f'No HTMD could be found but {len(pdb_list)}' \ 47 | f'PDB files were given, please call preprocess_once() on the dataset' 48 | logger.info('Dataset size: %d', len(self.pdb_list)) 49 | 50 | self._resolution = 1.0 51 | self._box_size = box_size 52 | self._augm_rot = augm_rot 53 | self._augm_mirror_prob = augm_mirror_prob 54 | 55 | def __len__(self): 56 | return len(self.pdb_list) 57 | 58 | def __getitem__(self, idx): 59 | raise NotImplementedError() 60 | 61 | def _sample_augmentation(self): 62 | """ Samples random rotation and mirroring, returns a 3x3 matrix """ 63 | M = np.eye(3) 64 | if self._augm_rot: 65 | angle = random.uniform(0, 2*math.pi) 66 | M = np.dot(transforms3d.axangles.axangle2mat(np.random.uniform(size=3), angle), M) 67 | if self._augm_mirror_prob > 0: 68 | if random.random() < self._augm_mirror_prob/2: 69 | M = np.dot(transforms3d.zooms.zfdir2mat(-1, [1, 0, 0]), M) 70 | if random.random() < self._augm_mirror_prob/2: 71 | M = np.dot(transforms3d.zooms.zfdir2mat(-1, [0, 1, 0]), M) 72 | if random.random() < self._augm_mirror_prob/2: 73 | M = np.dot(transforms3d.zooms.zfdir2mat(-1, [0, 0, 1]), M) 74 | return M 75 | 76 | def _extract_volume(self, coords, channels, center, num_voxels, resolution=1.0): 77 | """ Computes dense volume for htmd preprocessed coordinates """ 78 | assert center.size == 3 79 | num_voxels = np.array(num_voxels) 80 | if num_voxels[0] % 2 == 0 and num_voxels[1] % 2 == 0 and num_voxels[2] % 2 == 0: 81 | # place the center point at one of the two middle voxels (not centered, but center will not be quantized) 82 | start = center - resolution * (num_voxels // 2) 83 | end = center + resolution * (num_voxels // 2 - 1) 84 | else: 85 | # center the box around the center point 86 | start = center - resolution * (num_voxels // 2) 87 | end = center + resolution * (num_voxels // 2) 88 | 89 | gridx, gridy, gridz = np.meshgrid(np.linspace(start[0], end[0], num_voxels[0]), 90 | np.linspace(start[1], end[1], num_voxels[1]), 91 | np.linspace(start[2], end[2], num_voxels[2]), indexing='ij') 92 | 93 | centers = np.stack([gridx, gridy, gridz], axis=-1).reshape(-1, 3) 94 | volume = self._getOccupancyC(coords, centers, channels) 95 | volume = volume.reshape( 96 | num_voxels[0], num_voxels[1], num_voxels[2], -1).transpose((3, 0, 1, 2)).astype(np.float32) 97 | return volume, start, centers 98 | 99 | @staticmethod 100 | def _getOccupancyC(coords, centers, channelsigmas): # adapted from voxeldescriptors.py in HTMD 101 | """ Calls the C code to calculate the voxels values for each property """ 102 | 103 | centers = centers.astype(np.float64) 104 | coords = coords.astype(np.float32) 105 | channelsigmas = channelsigmas.astype(np.float64) 106 | 107 | nchannels = channelsigmas.shape[1] 108 | occus = np.zeros((centers.shape[0], nchannels), dtype=np.float64) 109 | 110 | occupancylib.descriptor_ext(centers.ctypes.data_as(ctypes.POINTER(ctypes.c_double)), 111 | coords.ctypes.data_as(ctypes.POINTER(ctypes.c_float)), 112 | channelsigmas.ctypes.data_as(ctypes.POINTER(ctypes.c_double)), 113 | occus.ctypes.data_as(ctypes.POINTER(ctypes.c_double)), 114 | ctypes.c_int(occus.shape[0]), # n of centers 115 | ctypes.c_int(coords.shape[0]), # n of atoms 116 | ctypes.c_int(nchannels)) # n of channels 117 | return occus 118 | 119 | 120 | class PdbTupleVoxelizedDataset(VoxelizedDataset): 121 | """ Abstract base class for dataset of tuples of subvolumes of voxelized proteins """ 122 | 123 | def __init__(self, pos_pairs, neg_pairs, pdb_list, box_size, augm_rot=False, augm_mirror_prob=0.0, 124 | max_sampling_dist=4.0, augm_robustness=False, augm_decoy_prob=0, db_pairs_limit=0): 125 | super().__init__(pdb_list, box_size, augm_rot, augm_mirror_prob) 126 | 127 | self._max_sampling_dist = max_sampling_dist 128 | self._augm_robustness = augm_robustness 129 | self._decoy_prob = augm_decoy_prob 130 | 131 | # map pdb code -> entry 132 | self._pdb_map = {} 133 | for i, pdb_entry in enumerate(self.pdb_list): 134 | code = pdb_entry['code5'] if 'code5' in pdb_entry else pdb_entry['code'] 135 | self._pdb_map[code] = i 136 | 137 | # filter pairs to those supported by pdbs 138 | self._pos_pairs = list(filter(lambda p: p[0] in self._pdb_map and p[1] in self._pdb_map, pos_pairs)) 139 | self._neg_pairs = list(filter(lambda p: p[0] in self._pdb_map and p[1] in self._pdb_map, neg_pairs)) 140 | if db_pairs_limit > 0: 141 | self._pos_pairs = self._pos_pairs[:db_pairs_limit] 142 | self._neg_pairs = self._neg_pairs[:db_pairs_limit] 143 | logger.info('Dataset positive pairs: %d, negative pairs: %d', len(self._pos_pairs), len(self._neg_pairs)) 144 | num_eff_pdbs = set( 145 | [p[0] for p in self._pos_pairs] + 146 | [p[1] for p in self._pos_pairs] + 147 | [p[0] for p in self._neg_pairs] + 148 | [p[1] for p in self._neg_pairs] 149 | ) 150 | logger.info('Effective number of PDB files: %d', len(num_eff_pdbs)) 151 | assert len(self._pos_pairs) > 0 and len(self._neg_pairs) > 0 152 | 153 | def _get_patch(self, idx, allow_decoy=False): 154 | 155 | container = np.load(self.pdb_list[idx]['protein_htmd']) 156 | struct_coords = container['coords'] 157 | struct_channels = container['channels'] 158 | center = center_from_pdb_file(self.pdb_list[idx]['pocket']) 159 | 160 | v, r = np.random.normal(size=3), np.random.uniform(size=1, high=self._max_sampling_dist) 161 | center = center + r * v / (np.linalg.norm(v) + 1e-10) 162 | centers = [center] 163 | 164 | # Decoys (random negative points) can be added to increase the variability of negatives (e.g. empty space:P) 165 | # Should be added into only one member of a negative pair (then matching anything to a decoy will be penalized; 166 | # having decoys in both pair members would be difficult, a decoy might in fact match another decoy) 167 | if allow_decoy and random.uniform(0, 1) <= self._decoy_prob: 168 | struct_min = np.amin(struct_coords, axis=0) 169 | struct_max = np.amax(struct_coords, axis=0) 170 | centers = [struct_min + (struct_max - struct_min) * np.random.uniform(size=3)] 171 | 172 | shape = [self._box_size]*3 173 | volumes = [] 174 | 175 | if self._augm_robustness: 176 | centers.extend(centers) # same centers but with different augmentation 177 | 178 | for center in centers: 179 | # data augmentation by rotation and mirroring 180 | if self._augm_rot or self._augm_mirror_prob > 0: 181 | M = self._sample_augmentation() 182 | struct_coords_aug = np.dot(struct_coords, M.T) 183 | center = np.dot(center, M.T) 184 | else: 185 | struct_coords_aug = struct_coords 186 | 187 | # crop point cloud and convert it into a volume 188 | volume, start, grid_pts = self._extract_volume(struct_coords_aug, struct_channels, center, shape, 189 | self._resolution) 190 | volumes.append(volume) 191 | 192 | return volumes 193 | 194 | 195 | class PdbPairVoxelizedDataset(PdbTupleVoxelizedDataset): 196 | """ Dataset of pairs of voxelized pockets """ 197 | 198 | def __len__(self): 199 | # positive pairs as the driving entity 200 | return len(self._pos_pairs) * 2 201 | 202 | def __getitem__(self, idx): 203 | if idx % 2 == 0: 204 | cls = 1 # positive class 205 | pair = self._pos_pairs[idx // 2] 206 | else: 207 | cls = 0 # negative class 208 | pair = random.choice(self._neg_pairs) 209 | 210 | first_vols = self._get_patch(self._pdb_map[pair[0]]) 211 | second_vols = self._get_patch(self._pdb_map[pair[1]], allow_decoy=(cls == 'neg')) 212 | 213 | return {'inputs': np.stack(first_vols + second_vols), 'targets': np.array([cls], dtype=np.float32)} 214 | 215 | 216 | class PointOfInterestVoxelizedDataset(VoxelizedDataset): 217 | """ Dataset of voxelized subvolumes around interest points """ 218 | 219 | def __init__(self, pdb_list, point_list, box_size): 220 | super().__init__(pdb_list, box_size=box_size, augm_rot=False, augm_mirror_prob=0) 221 | self._extraction_points = point_list 222 | 223 | def __getitem__(self, idx): 224 | container = np.load(self.pdb_list[idx]['protein_htmd']) 225 | struct_coords = container['coords'] 226 | struct_channels = container['channels'] 227 | shape = [self._box_size] * 3 228 | volumes = [] 229 | for center in self._extraction_points[self.pdb_idx[idx]]: 230 | volume, start, grid_pts = self._extract_volume(struct_coords, struct_channels, center, shape, 231 | self._resolution) 232 | volumes.append(volume) 233 | 234 | return {'inputs': np.stack(volumes), 'pdb_idx': self.pdb_idx[idx]} 235 | 236 | 237 | def create_tough_dataset(args, fold_nr, n_folds=5, seed=0, exclude_Vertex_from_train=False, 238 | exclude_Prospeccts_from_train=False): 239 | 240 | if args.db_preprocessing: 241 | ToughM1().preprocess_once() 242 | 243 | pdb_train, pdb_test = ToughM1().get_structures_splits(fold_nr, strategy=args.db_split_strategy, 244 | n_folds=n_folds, seed=seed) 245 | 246 | # Vertex 247 | if exclude_Vertex_from_train: 248 | if args.db_preprocessing: 249 | Vertex().preprocess_once() 250 | 251 | # Get Vertex dataset 252 | vertex = Vertex().get_structures() 253 | 254 | # Exclude entries from tough training set that exist in the vertex set 255 | logger.info(f'Before Vertex filter {len(pdb_train)}') 256 | if exclude_Vertex_from_train == 'uniprot': 257 | vertex_ups = set([entry['uniprot'] for entry in vertex] + ['None']) 258 | pdb_train = list(filter(lambda entry: entry['uniprot'] not in vertex_ups, pdb_train)) 259 | elif exclude_Vertex_from_train == 'pdb': 260 | vertex_pdbs = set([entry['code'] for entry in vertex]) 261 | pdb_train = list(filter(lambda entry: entry['code'] not in vertex_pdbs, pdb_train)) 262 | elif exclude_Vertex_from_train == 'seqclust': 263 | vertex_seqclusts = set([c for entry in vertex for c in entry['seqclusts']] + ['None']) 264 | pdb_train = list(filter(lambda entry: entry['seqclust'] not in vertex_seqclusts, pdb_train)) 265 | else: 266 | raise NotImplementedError() 267 | logger.info(f'After Vertex filter {len(pdb_train)}') 268 | 269 | # ProSPECCTS 270 | if exclude_Prospeccts_from_train: 271 | if args.db_preprocessing: 272 | for dbname in Prospeccts.dbnames: 273 | Prospeccts(dbname).preprocess_once() 274 | 275 | # Get ProSPECCTs datasets 276 | all_prospeccts = [entry for dbname in Prospeccts.dbnames for entry in Prospeccts(dbname).get_structures()] 277 | 278 | # Exclude entries from tough training set that exist in the ProSPECCTs sets 279 | logger.info(f'Before Prospeccts filter {len(pdb_train)}') 280 | if exclude_Prospeccts_from_train == 'uniprot': 281 | prospeccts_ups = set([u for entry in all_prospeccts for u in entry['uniprot']] + ['None']) 282 | pdb_train = list(filter(lambda entry: entry['uniprot'] not in prospeccts_ups, pdb_train)) 283 | elif exclude_Prospeccts_from_train == 'pdb': 284 | prospeccts_pdbs = set([entry['code'] for entry in all_prospeccts]) 285 | pdb_train = list(filter(lambda entry: entry['code'].lower() not in prospeccts_pdbs, pdb_train)) 286 | elif exclude_Prospeccts_from_train == 'seqclust': 287 | prospeccts_seqclusts = set([c for entry in all_prospeccts for c in entry['seqclusts']] + ['None']) 288 | pdb_train = list(filter(lambda entry: entry['seqclust'] not in prospeccts_seqclusts, pdb_train)) 289 | else: 290 | raise NotImplementedError() 291 | logger.info(f'After Prospeccts filter {len(pdb_train)}') 292 | 293 | # Read TOUGH-M1 negative and positive pocket pairs 294 | with open(os.path.join(os.environ.get('STRUCTURE_DATA_DIR'), 'TOUGH-M1', 'TOUGH-M1_positive.list')) as f: 295 | pos_pairs = [line.split()[:2] for line in f.readlines()] 296 | with open(os.path.join(os.environ.get('STRUCTURE_DATA_DIR'), 'TOUGH-M1', 'TOUGH-M1_negative.list')) as f: 297 | neg_pairs = [line.split()[:2] for line in f.readlines()] 298 | 299 | # Apply random seed, shuffle pairs and then return to original unseeded random state 300 | rndstate = random.getstate() 301 | random.seed(seed) 302 | random.shuffle(pos_pairs) 303 | random.shuffle(neg_pairs) 304 | if args.db_size_limit > 0: 305 | random.shuffle(pdb_train) 306 | pdb_train = pdb_train[:args.db_size_limit] 307 | random.setstate(rndstate) 308 | 309 | train_db = PdbPairVoxelizedDataset(pos_pairs, neg_pairs, pdb_train, box_size=args.patch_size, 310 | augm_rot=args.augm_rot, augm_mirror_prob=args.augm_mirror_prob, 311 | max_sampling_dist=args.augm_sampling_dist, 312 | augm_robustness=args.stability_loss_weight > 0, 313 | augm_decoy_prob=args.augm_decoy_prob, db_pairs_limit=-args.db_size_limit) 314 | test_db = PdbPairVoxelizedDataset(pos_pairs, neg_pairs, pdb_test, box_size=args.patch_size, augm_rot=False, 315 | augm_mirror_prob=0.0, max_sampling_dist=args.augm_sampling_dist) 316 | return train_db, test_db 317 | -------------------------------------------------------------------------------- /deeplytough/engine/models.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as nnf 4 | from se3cnn.blocks import GatedBlock 5 | 6 | 7 | class VoxelNetwork(nn.Module): 8 | """ 9 | Network for 3D voxel patch classification 10 | """ 11 | 12 | def __init__(self, config, nfeat): 13 | """ 14 | Build the netwrok in a flexible way based on `config` string, which contains sequence 15 | of comma-delimited layer definiton tokens layer_arg1_arg2_... See README.md for examples 16 | 17 | :param config: 18 | :param nfeat: Number of input channels 19 | """ 20 | super().__init__() 21 | self.register_buffer('scaler_mean', torch.zeros(1, nfeat, 1, 1, 1)) 22 | self.register_buffer('scaler_std', torch.ones(1, nfeat, 1, 1, 1)) 23 | 24 | for d, conf in enumerate(config.split(',')): 25 | conf = conf.strip().split('_') 26 | 27 | if conf[0] == 'b': # Batch norm; 28 | self.add_module(str(d), nn.BatchNorm3d(nfeat)) 29 | elif conf[0] == 'r': # ReLU 30 | self.add_module(str(d), nn.ReLU(True)) 31 | 32 | elif conf[0] == 'm': # Max pooling 33 | kernel_size = int(conf[1]) 34 | self.add_module(str(d), nn.MaxPool3d(kernel_size)) 35 | elif conf[0] == 'a': # Avg pooling 36 | kernel_size = int(conf[1]) 37 | self.add_module(str(d), nn.AvgPool3d(kernel_size)) 38 | 39 | elif conf[0] == 'c': # 3D convolution args: output feat, kernel size, padding, stride 40 | nfeato = int(conf[1]) 41 | kernel_size = int(conf[2]) 42 | padding = int(conf[3]) if len(conf) > 3 else 0 43 | stride = int(conf[4]) if len(conf) > 4 else 1 44 | self.add_module(str(d), nn.Conv3d(nfeat, nfeato, kernel_size, stride, padding)) 45 | nfeat = nfeato 46 | 47 | elif conf[0] == 'se': # SE(3)-covariant block args: output feat, mult1, mult2, mult3, kernel size, padding, stride, bnnorm, smoothing 48 | nfeato = int(conf[1]) 49 | mult1 = int(conf[2]) 50 | mult2 = int(conf[3]) 51 | mult3 = int(conf[4]) 52 | kernel_size = int(conf[5]) 53 | padding = int(conf[6]) if len(conf) > 6 else 0 54 | stride = int(conf[7]) if len(conf) > 7 else 1 55 | normalization = conf[8] if len(conf) > 8 else None 56 | smooth = bool(int(conf[9])) if len(conf) > 9 else False 57 | 58 | if isinstance(nfeat, int): 59 | nfeat = (nfeat,) 60 | nfeato = tuple([n for n in (nfeato, mult1, mult2, mult3) if n > 0]) 61 | activation = (None, nnf.sigmoid) 62 | elif mult1 <= 0: 63 | nfeato = (nfeato,) 64 | activation = None 65 | else: 66 | nfeato = tuple([n for n in (nfeato, mult1, mult2, mult3) if n > 0]) 67 | activation = (nnf.relu, nnf.sigmoid) 68 | 69 | conv = GatedBlock(nfeat, nfeato, size=kernel_size, padding=padding, stride=stride, 70 | activation=activation, normalization=normalization, smooth_stride=smooth) 71 | self.add_module(str(d), conv) 72 | 73 | if mult1 <= 0: 74 | nfeato = nfeato[0] 75 | nfeat = nfeato 76 | 77 | else: 78 | raise NotImplementedError('Unknown module: ' + conf[0]) 79 | 80 | self.nfeato = nfeat 81 | 82 | def set_input_scaler(self, scaler): 83 | """ 84 | Sets scaling of inputs 85 | 86 | :param scaler: 87 | :return: 88 | """ 89 | self.scaler_mean.copy_(torch.Tensor(scaler.mean_).view(1, scaler.mean_.size, 1, 1, 1)) 90 | self.scaler_std.copy_(torch.Tensor(scaler.scale_).view(1, scaler.scale_.size, 1, 1, 1)) 91 | 92 | def forward(self, inputs): 93 | inputs = (inputs - self.scaler_mean) / self.scaler_std 94 | for key, module in self._modules.items(): 95 | inputs = module(inputs) 96 | return inputs 97 | 98 | 99 | def create_model(args, dataset, device): 100 | """ 101 | Creates a model 102 | """ 103 | model = VoxelNetwork(args.model_config, dataset.num_channels) 104 | print(f'Total number of parameters: {sum([p.numel() for p in model.parameters()])}') 105 | print(model) 106 | return model.to(device) 107 | -------------------------------------------------------------------------------- /deeplytough/engine/predictor.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | 4 | import numpy as np 5 | import torch 6 | import torch.nn.functional as nnf 7 | from tqdm.autonotebook import tqdm 8 | 9 | from engine.datasets import PointOfInterestVoxelizedDataset 10 | from engine.models import create_model 11 | 12 | logger = logging.getLogger(__name__) 13 | 14 | 15 | def load_model(model_dir, device): 16 | """ 17 | Loads the model from file 18 | """ 19 | if isinstance(device, str): 20 | device = torch.device(device) 21 | fname = os.path.join(model_dir, 'model.pth.tar') if 'pth.tar' not in model_dir else model_dir 22 | checkpoint = torch.load(fname, map_location=str(device)) 23 | model = create_model(checkpoint['args'], PointOfInterestVoxelizedDataset, device) 24 | model.load_state_dict(checkpoint['state_dict']) 25 | 26 | return model, checkpoint['args'] 27 | 28 | 29 | def load_and_precompute_point_feats(model, args, pdb_list, point_list, device, nworkers, batch_size): 30 | """ 31 | Compute descriptors for every (pdb, point) pair given 32 | """ 33 | model.eval() 34 | if isinstance(device, str): 35 | device = torch.device(device) 36 | 37 | dataset = PointOfInterestVoxelizedDataset(pdb_list, point_list, box_size=args.patch_size) 38 | loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, num_workers=nworkers) 39 | 40 | with torch.no_grad(): 41 | feats = [None] * len(point_list) 42 | 43 | for batch in tqdm(loader): 44 | inputs = batch['inputs'].squeeze(1).to(device) 45 | outputs = model(inputs) 46 | if args.l2_normed_descriptors: 47 | outputs = nnf.normalize(outputs) 48 | descriptors = outputs.cpu().float() 49 | for b in range(descriptors.shape[0]): 50 | feats[batch['pdb_idx'][b]] = descriptors[b].view(-1, descriptors[b].shape[0]) 51 | 52 | return feats 53 | 54 | 55 | def match_precomputed_point_pairs(descriptors_A, descriptors_B): 56 | """ 57 | Match pairs of descriptors. Some may be None, then their distance is NaN 58 | """ 59 | with torch.no_grad(): 60 | distances = [] 61 | 62 | for feats_A, feats_B in tqdm(zip(descriptors_A, descriptors_B)): 63 | if feats_A is None or feats_B is None: 64 | distances.append(np.nan) 65 | else: 66 | distances.append(nnf.pairwise_distance(feats_A, feats_B).numpy()) 67 | 68 | return np.squeeze(np.array(distances)) 69 | 70 | 71 | def match_precomputed_points_bipartite(descriptors_A, descriptors_B): 72 | """ 73 | Matches the Cartesian product of descriptors (bipartite or complete matching, if B is None) 74 | Some may be None, then their distance is NaN 75 | """ 76 | with torch.no_grad(): 77 | 78 | def assemble(descriptors): 79 | try: 80 | nfeat = next(filter(lambda x: x is not None, descriptors)).shape[1] 81 | except StopIteration: 82 | return None 83 | feats = torch.full((len(descriptors), nfeat), np.nan, dtype=torch.float64) 84 | for i, f in enumerate(descriptors): 85 | if f is not None: 86 | feats[i, :] = f 87 | return feats 88 | 89 | feats_A = assemble(descriptors_A) 90 | 91 | if descriptors_B is not None: 92 | feats_B = assemble(descriptors_B) 93 | else: 94 | feats_B = feats_A 95 | descriptors_B = descriptors_A 96 | 97 | if feats_A is None or feats_B is None: 98 | return np.full((len(descriptors_A), len(descriptors_B)), np.nan) 99 | else: 100 | return bag_distances(feats_A, feats_B).numpy() 101 | 102 | 103 | def bag_euclidean_distances2(x, y=None): 104 | """ 105 | Input: x is a Nxd matrix 106 | y is an optional Mxd matirx 107 | Output: dist is a NxM matrix where dist[i,j] is the square norm between x[i,:] and y[j,:] 108 | if y is not given then use 'y=x'. 109 | i.e. dist[i,j] = ||x[i,:]-y[j,:]||^2 110 | (https://discuss.pytorch.org/t/efficient-distance-matrix-computation/9065/2) 111 | """ 112 | x_norm2 = (x**2).sum(1).view(-1, 1) 113 | if y is not None: 114 | y_t = torch.transpose(y, 0, 1) 115 | y_norm2 = (y**2).sum(1).view(1, -1) 116 | else: 117 | y_t = torch.transpose(x, 0, 1) 118 | y_norm2 = x_norm2.view(1, -1) 119 | 120 | dist = x_norm2 + y_norm2 - 2.0 * torch.mm(x, y_t) 121 | return torch.clamp(dist, min=0) 122 | 123 | 124 | def bag_distances(x, y): 125 | if x.shape[0] == 1: 126 | return nnf.pairwise_distance(x, y) 127 | else: 128 | # eps because derivative of sqrt at 0 is nan .. but no gradient if vectors identical due to clamping 129 | return torch.sqrt(bag_euclidean_distances2(x, y) + 1e-8) 130 | -------------------------------------------------------------------------------- /deeplytough/matchers/__init__.py: -------------------------------------------------------------------------------- 1 | from .deeply_tough import DeeplyTough 2 | from .tough_officials import ToughOfficials -------------------------------------------------------------------------------- /deeplytough/matchers/deeply_tough.py: -------------------------------------------------------------------------------- 1 | import concurrent.futures 2 | import logging 3 | import os 4 | from tqdm.autonotebook import tqdm 5 | from .pocket_matcher import PocketMatcher 6 | from engine.predictor import load_model, match_precomputed_point_pairs, load_and_precompute_point_feats, match_precomputed_points_bipartite 7 | from misc.utils import center_from_pdb_file 8 | 9 | logger = logging.getLogger(__name__) 10 | 11 | 12 | class DeeplyTough(PocketMatcher): 13 | """ 14 | Pocket matching with DeeplyTough, with precomputing of descriptors. 15 | """ 16 | 17 | def __init__(self, model_dir, device='cpu', batch_size=30, nworkers=1): 18 | """ 19 | """ 20 | self.model_dir = model_dir 21 | self.device = device 22 | self.batch_size = batch_size 23 | self.nworkers = nworkers 24 | self.model, self.args = load_model(model_dir, device) 25 | 26 | def precompute_descriptors(self, entries): 27 | """ 28 | Precompute pocket descriptors/features. 29 | 30 | :param entries: List of entries. Required keys: `protein`, `pocket`. 31 | :return: entries but with `descriptor` keys. 32 | """ 33 | 34 | pdb_list, point_list = [], [] 35 | 36 | with concurrent.futures.ProcessPoolExecutor() as executor: 37 | jobs = [executor.submit(center_from_pdb_file, entry['pocket']) for entry in entries] 38 | for job, entry in tqdm(zip(jobs, entries)): 39 | center = job.result() 40 | if center is not None: 41 | pdb_list.append(entry) 42 | point_list.append([center]) 43 | else: 44 | logger.warning('Pocket not found, skipping: ' + os.path.basename(entry['pocket'])) 45 | 46 | feats = load_and_precompute_point_feats(self.model, self.args, pdb_list, point_list, self.device, self.nworkers, self.batch_size) 47 | for entry, feat in zip(pdb_list, feats): 48 | entry['descriptor'] = feat 49 | return pdb_list 50 | 51 | def pair_match(self, entry_pairs): 52 | """ 53 | Computes matches between given pairs of pockets. 54 | 55 | :param entry_pairs: List of tuples. Required keys: `descriptors`. 56 | :return: np.array, score vector (negative distance) 57 | """ 58 | 59 | featslist_A = [entry['descriptor'] for entry, _ in entry_pairs] 60 | featslist_B = [entry['descriptor'] for _, entry in entry_pairs] 61 | distances = match_precomputed_point_pairs(featslist_A, featslist_B) 62 | return -distances 63 | 64 | def complete_match(self, entries): 65 | 66 | featslist = [entry['descriptor'] for entry in entries] 67 | distances = match_precomputed_points_bipartite(featslist, None) 68 | return -distances 69 | 70 | def bipartite_match(self, entries_a, entries_b): 71 | 72 | featslist_A = [entry['descriptor'] for entry in entries_a] 73 | featslist_B = [entry['descriptor'] for entry in entries_b] 74 | distances = match_precomputed_points_bipartite(featslist_A, featslist_B) 75 | return -distances 76 | -------------------------------------------------------------------------------- /deeplytough/matchers/pocket_matcher.py: -------------------------------------------------------------------------------- 1 | from abc import abstractmethod 2 | 3 | 4 | class PocketMatcher(object): 5 | """ 6 | Base class for pocket matcher 7 | """ 8 | 9 | @abstractmethod 10 | def bipartite_match(self, entries_a, entries_b): 11 | """ 12 | Computes all matches between pockets from `entries_a` and pockets from `entries_b`. 13 | 14 | :param entries_a: List of dicts. Required keys: `protein`, `pocket`. 15 | :param entries_b: List of dicts. Required keys: `protein`, `pocket`. 16 | :return: np.array, score matrix 17 | """ 18 | raise NotImplementedError 19 | 20 | @abstractmethod 21 | def pair_match(self, entry_pairs): 22 | """ 23 | Computes matches between given pairs of entries. 24 | 25 | :param entry_pairs: List of tuples of dicts. Required keys: `protein`, `pocket`. 26 | :return: np.array, score vector 27 | """ 28 | raise NotImplementedError 29 | 30 | @abstractmethod 31 | def complete_match(self, entries): 32 | """ 33 | Computes all matches between given `entries`. 34 | 35 | :param entries: List of dicts. Required keys: `protein`, `pocket`. 36 | :return: np.array, score matrix 37 | """ 38 | raise NotImplementedError 39 | -------------------------------------------------------------------------------- /deeplytough/matchers/tough_officials.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | from .pocket_matcher import PocketMatcher 4 | 5 | 6 | class ToughOfficials(PocketMatcher): 7 | """ 8 | Return the precomputed results for several methods in official Tough dataset repo 9 | """ 10 | 11 | def __init__(self, alg_name, score_column): 12 | self.scores = {} 13 | for cls in ['positive', 'negative']: 14 | with open(os.path.join( 15 | os.environ.get('STRUCTURE_DATA_DIR'), 'TOUGH-M1', f'{alg_name}-TOUGH-M1_{cls}.score')) as f: 16 | for line in f.readlines(): 17 | s = line.split() 18 | self.scores[s[0] + s[1]] = float(s[score_column]) 19 | 20 | def pair_match(self, entry_pairs): 21 | 22 | scores = np.full((len(entry_pairs)), np.nan) 23 | 24 | for i, (entry_a, entry_b) in enumerate(entry_pairs): 25 | score = self.scores.get(entry_a['code5'] + entry_b['code5'], None) 26 | if score is None: 27 | score = self.scores.get(entry_a['code5'] + entry_b['code5'], None) 28 | assert score is not None 29 | scores[i] = score 30 | 31 | return scores 32 | 33 | 34 | -------------------------------------------------------------------------------- /deeplytough/misc/cc_ligands.py: -------------------------------------------------------------------------------- 1 | stabilisers = { 2 | "B3P", "PGE", "6JZ", "15P", "PE3", "XPE", "7PE", "M2M", "13P", 3 | "3PP", "PX4", "3OL", "OC9", "AE3", "6JZ", "XPE", "211", "ODI", 4 | "DIA", "PG5", "CXE", "ME2", "P4G", "TOE", "PG5", "PE8", "ZPG", 5 | "PE3", "MXE" 6 | } 7 | 8 | excipients = { 9 | "SO4", "SUL", " CL", " BR", " CA", " MG", " NI", " MN", " CU", "PO4", 10 | " CD", "NH4", " CO", " NA", " K", " ZN", " FE", "AZI", "A", "Ad", "C", 11 | "Cd", "CD", "CD2", "G", "Gd", "T", "Td", "A", "Ar", "Cr", "G", "Gr", 12 | "U", "Ur", "YG", "I", "Ir", "CR", "CR2", "CR3", "CAC", "CO2", "CO3", 13 | "CYN", "FS4", "MO6", "NCO", "NO3", "SCN", "SF4", " SE", " PB", "AU", 14 | "AU1", "AU3", "BR", "BR1", "CA", "CA2", "CL", "CL1", "CMP", "CO", "CO3", 15 | "CPR", "CS", "CS1", "CU", "CU1", "CU2", "AG", "AG1", "AL", "AL3", "F", 16 | "F1", "FE", "FE2", "FE3", "IR", "IR3", "K", "K1", "KR", "FUC", "MAN", 17 | "GAL", "MAL", "NAG", "GOL", "MPD", "BGC", "PEG", "EDO", "GLC", "PG4", 18 | "BOG", "HTO", "ACX", "BMA", "FUC-a-L", "GAL-b-D", "GLC-b-D", "GCU", 19 | "GCU-b-D", "CEG", "CEG-b-D", "MAN-b-D", "NAG-b-D", "RIB", "FRC", "FRU", 20 | "XYS", "XLS", "C8E", "CE9", "CRY", "DOX", "EGL", "F6P", "NDG", "NGA", 21 | "P6G", "SIA", "SUC", "XYS", "1PE", "OLC", "POP", "MES", "EPE", "PYR", 22 | "GLC", "CIT", "FLC", "TAR", "HC4", "MYR", "HED", "DTT", "BME", "TRS", 23 | "MPD", "ABA", "ACE", "ACT", "CME", "CSD", "CSO", "DMS", "EOH", "FMT", 24 | "GTT", "HED", "IMD", "IOH", "IPA", "LDA", "LLP", "MYR", "PEP", "PYR", 25 | "PXY", "OXE", "TMT", "TMZ", "2CV", "PLQ", "TAM", "1PG", "12P", "XP4", 26 | "PL3", "PE4", "PEU", "MPG", "B8M", "BOM", "B7M", "2PE", "STE", "DME", 27 | "PLM", "PG0", "PE5", "PG6", "P33", "HEZ", "F23", "DTV", "SDS", "DTU", 28 | "DTD", "MRD", "MRY", "P33", "BU1", "LHG", "D10", "OCT", "LI1", "ETE", 29 | "TZZ", "DEP", "DKA", "OLA", "MRD", "ACD", "MLR", "POG", "BTB", "PC1", 30 | "ACY", " DT", "3GD", "MAE", "CA3", "144", "CP", "0KA", "A71", "UVW", 31 | "BET", "PBU", "UAP", "SER", "CDL", "CEY", "LMN", "J7Z", "DA", "SIN", 32 | " I", "PLC", "BME-BME", "FNE", "FUM", "MAK", " CP", "PAE", "DTL", "HLT", 33 | "ASC", "FPP", "FII", "D1D", "PCT", "TTN", "HDA", "EDO-EDO", "PGA", 34 | "XXD", "INS", "217", "BHL", "16D", "HSE", "OPE", "HCS", "SOR", "SKM", 35 | "KIV", "FCN", "TRA", "TRC", "MTL", "MZP", "KDG", "DHK" 36 | } 37 | 38 | saccharide = { 39 | "NAG", "MAN", "BMA", "FUC", "GAL", "BGC", "GLC", "NDG", "BOG", "SUC", "XYP", "FUL", 40 | "MAL", "GLA", "LMT", "A2G", "NGA", "F6P", "XYS", "LAT", "MMA", "DMU", "BNG", "RAM", 41 | "KDO", "CBI", "PRP", "TRE", "SGN", "FBP", "BDP", "IDS", "FRU", "ACR", "CAP", "AHR", 42 | "GCU", "DGD", "G6P", "ADA", "BCD", "LBT", "G2F", "S6P", "LMU", "IPT", "GCS", "FDP", 43 | "CTT", "CBS", "BHG", "5RP", "SGC", "RIB", "KDA", "SCR", "ORP", "MTT", "G6D", "ASG", 44 | "RIP", "NBG", "MAG", "GLO", "FCA", "GMH", "G1P", "AMU", "R5P", "GLP", "6PG", "XMM", 45 | "MGL", "BEM", "ACX", "XLS", "NAA", "GCD", "CTR", "16G", "NGS", "MBG", "LGU", "BGL", 46 | "PA5", "MFU", "GTR", "FU4", "2FP", "SGA", "RER", "GN1", "GLD", "GCV", "G16", "ARA", 47 | "AGL", "XUL", "RNS", "MAV", "HSX", "GYP", "GCO", "ASO", "ARB", "AAL", "149", "TDG", 48 | "RUB", "R1P", "N9S", "MAW", "M6P", "LGC", "IDY", "IDR", "GP1", "GAD", "DAF", "CTO", 49 | "BXP", "AGH", "ABE", "2FG", "147", "SHG", "PSJ", "MXY", "M8C", "GLF", "GCN", "FUB", 50 | "FSI", "FCB", "E4P", "DFX", "DDA", "CT3", "CBK", "10M", "TCB", "QPS", "GUP", "GS1", 51 | "GL0", "G4D", "FUD", "BDR", "ARE", "AIG", "SSG", "RUU", "RAF", "NTP", "NG6", "MAB", 52 | "MA3", "GP4", "GC4", "ERI", "ERE", "DVC", "DQR", "DLG", "DDL", "DAG", "BXY", "B9D", 53 | "AOS", "2M4", "2DG", "1GL", "0MK", "Z6J", "XBP", "SUS", "SIO", "RG1", "QDK", "NTO", 54 | "NHF", "NGC", "MN0", "MFB", "MDM", "MDA", "LOG", "LB2", "LAK", "KO2", "IDU", "IDG", 55 | "HSQ", "GCT", "G4S", "FXP", "FCT", "DR2", "DNO", "BXX", "BDG", "B4G", "B2G", "AXR", 56 | "ABL", "3CM", "2F8", "X2F", "UCD", "TYV", "TOC", "TOA", "TAG", "SUP", "SOL", "SOE", 57 | "SI3", "SGD", "ROR", "RF5", "REL", "RAT", "R2B", "PTQ", "PNW", "PNG", "NGY", "NGE", 58 | "NG1", "MRP", "MLB", "LSM", "LCN", "LAI", "L6S", "KBG", "HSY", "GU9", "GU8", "GU6", 59 | "GU5", "GU4", "GU3", "GU2", "GU1", "GU0", "GNX", "GLS", "G6S", "G3I", "FFC", "F1P", 60 | "EPG", "EBG", "DT6", "DSR", "DR4", "DR3", "DOM", "DGS", "CRA", "CR6", "CEG", "CDR", 61 | "BMX", "BBK", "B6D", "B0D", "AXP", "ARI", "AFP", "ABF", "7JZ", "5SP", "5GF", "3SA", 62 | "2GL", "289", "ZDM", "YO5", "XXR", "XLF", "XDP", "X5S", "X4S", "X1X", "VG1", "UDC", 63 | "TMX", "TMR", "TM9", "TM6", "TM5", "T6P", "SHB", "SG7", "SG6", "SG5", "SG4", "SF9", 64 | "SF6", "SDD", "SA0", "RST", "RPA", "RNT", "RGG", "RBL", "RB5", "RAO", "RAE", "R1X", 65 | "QV4", "PSV", "PNA", "P6P", "P53", "P3M", "OX2", "OPM", "NYT", "NXD", "NTF", "NM9", 66 | "NM6", "NGF", "NGB", "N1L", "MUG", "MMN", "MG5", "MDP", "MCU", "MAT", "MA2", "MA1", 67 | "M3M", "M1F", "LXZ", "LXB", "LVZ", "LTM", "LPK", "LOX", "LM2", "LDY", "LAG", "KTU", 68 | "KO1", "KFN", "KDM", "ISL", "IN1", "IDX", "IAB", "HSJ", "HSH", "HSG", "HS2", "HMS", 69 | "HDL", "H2P", "GYV", "GUZ", "GUF", "GTK", "GTH", "GQ4", "GQ2", "GQ1", "GPM", "GPH", 70 | "GLT", "GLG", "GL9", "GL7", "GL6", "GL5", "GL2", "GIV", "GFP", "GFG", "GE1", "GCW", 71 | "GC1", "GAC", "FIX", "FDQ", "F1X", "EJT", "EGA", "EBQ", "EAG", "E5G", "DRI", "DQQ", 72 | "DP5", "DLF", "DIG", "DFR", "DEL", "D6G", "CR1", "CGF", "C5X", "C4X", "C3X", "BXF", 73 | "BGS", "BGP", "B8D", "B16", "AOG", "ALL", "AFR", "AFO", "AFD", "AF1", "ACG", "ABD", 74 | "ABC", "AAO", "A1Q", "6SA", "5MM", "5DI", "50A", "4NN", "4CQ", "48Z", "46Z", "46M", 75 | "46D", "3ZW", "3MG", "3MF", "3LR", "3FM", "3DY", "3DO", "34V", "32O", "2M5", "2FL", 76 | "293", "291", "27C", "26Y", "26W", "26V", "26R", "26Q", "26M", "1S3", "1LL", "1JB", 77 | "1GN", "1BW", "18Y", "18T", "15L", "14T", "0YT", "0XY", "0V4", "0TS", "0NZ", "0BD", 78 | "045" 79 | } 80 | 81 | ignore_list = set() 82 | ignore_list.update(excipients) 83 | ignore_list.update(saccharide) 84 | ignore_list.update(stabilisers) 85 | -------------------------------------------------------------------------------- /deeplytough/misc/ligand_extract.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import Bio.PDB as PDB 4 | import numpy as np 5 | 6 | from misc.cc_ligands import ignore_list 7 | from misc.utils import NonUniqueStructureBuilder 8 | 9 | 10 | def residue_dist_to_ligand(protein_residue, ligand_residue): 11 | """Returns distance from the protein to the closest non-hydrogen ligand atom. This seems to be consistent with 12 | what pdbbind is using""" 13 | distances = [] 14 | for lig_atom in ligand_residue.child_list: 15 | if not lig_atom.element or lig_atom.element[0] != 'H': 16 | for prot_atom in protein_residue.child_list: 17 | diff_vector = prot_atom.coord - lig_atom.coord 18 | distances.append(np.sqrt(np.sum(diff_vector * diff_vector))) 19 | return min(distances) if len(distances) else 1e10 20 | 21 | 22 | class NearLigandSelect(PDB.Select): 23 | def __init__(self, distance_threshold, ligand_residue, keep_lig_in_site, keep_water, keep_other_hets=False): 24 | self.threshold = distance_threshold 25 | self.ligand_res = ligand_residue 26 | self.keep_water = keep_water 27 | self.keep_lig = keep_lig_in_site 28 | self.keep_other_hets = keep_other_hets 29 | 30 | def accept_residue(self, residue): 31 | if residue == self.ligand_res: 32 | return self.keep_lig 33 | elif not self.keep_other_hets and residue.get_id()[0].strip() != '': 34 | return False 35 | else: 36 | if not self.keep_water: 37 | if residue.resname == 'HOH': 38 | return False 39 | dist = residue_dist_to_ligand(residue, self.ligand_res) 40 | return dist < self.threshold 41 | 42 | 43 | class LigandOnlySelect(PDB.Select): 44 | def __init__(self, ligand_residue): 45 | self.ligand_residue = ligand_residue 46 | 47 | def accept_residue(self, residue): 48 | if residue == self.ligand_residue: 49 | return True # change this to False if you don't want the ligand 50 | else: 51 | return False 52 | 53 | 54 | class ChainOnlySelect(PDB.Select): 55 | def accept_residue(self, residue): 56 | return residue.get_id()[0].strip() == '' 57 | 58 | 59 | def filter_unwanted_het_ids(het_list): 60 | het_list = filter(lambda x: x not in ignore_list, het_list) 61 | return list(het_list) 62 | 63 | 64 | def get_het_residues_from_pdb(model, remove_duplicates=False, min_lig_atoms=-1, allowed_names=None): 65 | resi_list = [] 66 | res_names = [] 67 | for res in model.get_residues(): 68 | if res._id[0].startswith('H_'): 69 | if remove_duplicates and res.resname in res_names: 70 | continue 71 | if min_lig_atoms > 0 and len(list(res.get_atoms())) < min_lig_atoms: 72 | continue 73 | if allowed_names and res.resname not in allowed_names: 74 | continue 75 | resi_list.append(res) 76 | res_names.append(res.resname) 77 | wanted_res_names = filter_unwanted_het_ids(set(res_names)) 78 | wanted_resi_list = [] 79 | for res in resi_list: 80 | if res.resname in wanted_res_names: 81 | wanted_resi_list.append(res) 82 | return wanted_resi_list 83 | 84 | 85 | class PocketFromLigandDetector: 86 | """ 87 | Extracts pockets around a ligand (which is either part of the input PDB file, or already separated in 88 | a different file). 89 | """ 90 | def __init__(self, distance_threshold=8.0, ligand_fname_pattern=None, include_het_resname=True, 91 | save_clean_structure=False, keep_other_hets=True, min_lig_atoms=-1, allowed_lig_names=None): 92 | """ 93 | 94 | :param distance_threshold: Max distance between residue and ligand 95 | :param ligand_fname_pattern: A tuple (old, new) used to obtain ligand's file name by replacing `old` with `new` 96 | """ 97 | 98 | self.distance_threshold = distance_threshold 99 | self.ligand_fname_pattern = ('', '') if ligand_fname_pattern is None else ligand_fname_pattern 100 | self.keep_water = False 101 | self.include_het_resname = include_het_resname 102 | self.save_clean_structure = save_clean_structure 103 | self.keep_other_hets = keep_other_hets 104 | self.min_lig_atoms = min_lig_atoms 105 | self.allowed_lig_names = allowed_lig_names 106 | 107 | def run_one(self, pdb_path, output_dir): 108 | 109 | # parse structure object (permissive flag but let's not lose any atoms by using a custom builder) 110 | parser = PDB.PDBParser(PERMISSIVE=1, QUIET=True, structure_builder=NonUniqueStructureBuilder()) 111 | # only consider the first model in the pdb file 112 | structure = parser.get_structure('X', pdb_path) 113 | model = structure[0] 114 | 115 | # Get ligand (het) to extract the site around 116 | if self.ligand_fname_pattern[0]: 117 | ligand_path = pdb_path.replace(self.ligand_fname_pattern[0], self.ligand_fname_pattern[1]) 118 | ligand = parser.get_structure('L', ligand_path) 119 | het_list = list(ligand.get_residues()) 120 | else: 121 | # get het entries of interest (filter using static dictionaries) 122 | het_list = get_het_residues_from_pdb(model, remove_duplicates=False, min_lig_atoms=self.min_lig_atoms, 123 | allowed_names=self.allowed_lig_names) 124 | 125 | # Setup a PDB writer and load protein 126 | io = PDB.PDBIO() 127 | io.set_structure(model) 128 | 129 | # create output directory and split input pdb_path 130 | os.makedirs(output_dir, exist_ok=True) 131 | name, ext = os.path.basename(pdb_path).rsplit('.', 1) 132 | 133 | for n, het in enumerate(het_list): 134 | 135 | # Set name of output site file 136 | if self.include_het_resname: 137 | site_name = f"{name}_site_{n+1}_{het.resname}.{ext}" 138 | else: 139 | site_name = f"{name}_site_{n+1}.{ext}" 140 | fname = os.path.join(output_dir, site_name) 141 | 142 | io.save(fname, NearLigandSelect(self.distance_threshold, het, keep_lig_in_site=False, 143 | keep_water=self.keep_water, keep_other_hets=self.keep_other_hets)) 144 | if not self.ligand_fname_pattern[0]: 145 | if self.include_het_resname: 146 | lig_name = f"{name}_lig_{n+1}_{het.resname}.{ext}" 147 | else: 148 | lig_name = f"{name}_lig_{n+1}.{ext}" 149 | io.save(os.path.join(output_dir, lig_name), LigandOnlySelect(het)) 150 | 151 | if self.save_clean_structure: 152 | io.save(os.path.join(output_dir, f'{name}_clean.{ext}'), ChainOnlySelect()) 153 | -------------------------------------------------------------------------------- /deeplytough/misc/utils.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import subprocess 4 | import tempfile 5 | import urllib.request as request 6 | from functools import lru_cache 7 | 8 | import Bio.PDB as PDB 9 | import htmd.molecule.molecule as htmdmol 10 | import htmd.molecule.voxeldescriptors as htmdvox 11 | import numpy as np 12 | import requests 13 | from scipy.spatial import ConvexHull 14 | from scipy.spatial.qhull import QhullError 15 | 16 | logger = logging.getLogger(__name__) 17 | 18 | 19 | def failsafe_hull(coords): 20 | """ 21 | Wrapper of ConvexHull which returns None if hull cannot be computed for given points (e.g. all colinear or too few) 22 | """ 23 | coords = np.array(coords) 24 | if coords.shape[0] > 3: 25 | try: 26 | return ConvexHull(coords) 27 | except QhullError as e: 28 | if 'hull precision error' not in str(e) and 'input is less than 3-dimensional' not in str(e): 29 | raise e 30 | return None 31 | 32 | 33 | def hull_centroid_3d(hull): 34 | """ 35 | The centroid of a 3D polytope. Taken over from http://www.alecjacobson.com/weblog/?p=3854 and 36 | http://www2.imperial.ac.uk/~rn/centroid.pdf. 37 | For >nD ones, https://stackoverflow.com/questions/4824141/how-do-i-calculate-a-3d-centroid 38 | 39 | :param hull: scipy.spatial.ConvexHull 40 | :return: 41 | """ 42 | if hull is None: 43 | return None 44 | 45 | A = hull.points[hull.simplices[:, 0], :] 46 | B = hull.points[hull.simplices[:, 1], :] 47 | C = hull.points[hull.simplices[:, 2], :] 48 | N = np.cross(B-A, C-A) 49 | 50 | # get consistent outer orientation (compensate for the lack of ordering in scipy's facetes), assume a convex hull 51 | M = np.mean(hull.points[hull.vertices, :], axis=0) 52 | sign = np.sign(np.sum((A - M) * N, axis=1, keepdims=True)) 53 | N = N * sign 54 | 55 | vol = np.sum(N*A)/6 56 | centroid = 1/(2*vol)*(1/24 * np.sum(N*((A+B)**2 + (B+C)**2 + (C+A)**2), axis=0)) 57 | return centroid 58 | 59 | 60 | def point_in_hull(point, hull, tolerance=1e-12): 61 | # https://stackoverflow.com/questions/16750618/whats-an-efficient-way-to-find-if-a-point-lies-in-the-convex-hull-of-a-point-cl 62 | return all((np.dot(eq[:-1], point) + eq[-1] <= tolerance) for eq in hull.equations) 63 | 64 | 65 | def structure_to_coord(structure, allow_off_chain=False, allow_hydrogen=False): 66 | coords = [] 67 | for chains in structure: 68 | for chain in chains: 69 | if allow_off_chain or chain.get_id().strip() != '': 70 | for residue in chain: 71 | for atom in residue: 72 | if allow_hydrogen or atom.get_name()[0] != 'H': 73 | coords.append(atom.get_coord()) 74 | return np.array(coords) 75 | 76 | 77 | class NonUniqueStructureBuilder(PDB.StructureBuilder.StructureBuilder): 78 | """This makes PDB more forgiving by being able to load atoms with non-unique names within a residue.""" 79 | 80 | @staticmethod 81 | def _number_to_3char_name(n): 82 | code = '' 83 | for k in range(3): 84 | r = n % 36 85 | code = chr(ord('A')+r if r < 26 else ord('0')+r-26) + code 86 | n = n // 36 87 | assert n == 0, 'number cannot fit 3 characters' 88 | return code 89 | 90 | def init_atom(self, name, coord, b_factor, occupancy, altloc, fullname, serial_number=None, element=None): 91 | 92 | for attempt in range(10000): 93 | try: 94 | return super().init_atom(name, coord, b_factor, occupancy, altloc, fullname, serial_number, element) 95 | except PDB.PDBExceptions.PDBConstructionException: 96 | name = name[0] + self._number_to_3char_name(attempt) 97 | 98 | 99 | def center_from_pdb_file(filepath): 100 | """ Returns the geometric center of a PDB-file structure """ 101 | parser = PDB.PDBParser(PERMISSIVE=1, QUIET=True, structure_builder=NonUniqueStructureBuilder()) 102 | try: 103 | pocket = parser.get_structure('pocket', filepath) 104 | except FileNotFoundError: 105 | return None 106 | coords = structure_to_coord(pocket, allow_off_chain=True, allow_hydrogen=True) 107 | if len(coords) > 3: 108 | return hull_centroid_3d(failsafe_hull(coords)) 109 | elif len(coords) > 0: 110 | return np.mean(coords, axis=0) 111 | else: 112 | return None 113 | 114 | 115 | def remove_water_and_hets(pdb_path: str, output_file: str) -> str: 116 | 117 | class NonWaterAndHetsSelect(PDB.Select): 118 | def accept_residue(self, residue): 119 | if residue.id[0] != ' ': 120 | return False 121 | elif residue.resname == 'HOH': 122 | return False 123 | return True 124 | 125 | parser = PDB.PDBParser(PERMISSIVE=1, QUIET=True, structure_builder=NonUniqueStructureBuilder()) 126 | structure = parser.get_structure('protein', pdb_path) 127 | model = structure[0] 128 | 129 | io = PDB.PDBIO() 130 | io.set_structure(model) 131 | io.save(output_file, NonWaterAndHetsSelect()) 132 | 133 | 134 | def htmd_featurizer(pdb_entries, skip_existing=True): 135 | """ Ensures than all entries have their HTMD featurization precomputed """ 136 | # - note: this is massively hacky but the data also tends to be quite dirty... 137 | 138 | # - Mgltools is Python 2.5 only script destroying Python3 environments, so we have to call another conda env 139 | # - unaddressed warnings info: http://mgldev.scripps.edu/pipermail/mglsupport/2008-December/000091.html 140 | # - note: http://autodock.scripps.edu/faqs-help/how-to/how-to-prepare-a-receptor-file-for-autodock4 141 | # - note: http://mgldev.scripps.edu/pipermail/autodock/2008-April/003946.html 142 | mgl_command = 'source activate deeplytough_mgltools; pythonsh ' \ 143 | '$CONDA_PREFIX/MGLToolsPckgs/AutoDockTools/Utilities24/prepare_receptor4.py ' \ 144 | '-r {} -U nphs_lps_waters -A hydrogens' 145 | 146 | for entry in pdb_entries: 147 | pdb_path = os.path.abspath(entry['protein']) 148 | npz_path = os.path.abspath(entry['protein_htmd']) 149 | if skip_existing and os.path.exists(npz_path): 150 | continue 151 | 152 | logger.info(f'Pre-processing {pdb_path} with HTMD...') 153 | if not os.path.exists(pdb_path): 154 | logger.error(f'{pdb_path} not found, skipping its pre-preprocessing.') 155 | continue 156 | 157 | output_dir = os.path.dirname(npz_path) 158 | os.makedirs(output_dir, exist_ok=True) 159 | 160 | def compute_channels(): 161 | pdbqt_path = os.path.join(output_dir, os.path.basename(pdb_path)) + 'qt' 162 | if not os.path.exists(pdbqt_path) and os.path.exists(pdbqt_path.replace('.pdb', '_model1.pdb')): 163 | os.rename(pdbqt_path.replace('.pdb', '_model1.pdb'), pdbqt_path) 164 | mol = htmdmol.Molecule(pdbqt_path) 165 | 166 | # this no longer works (2/12/2021 – non trivial fix, replaced with earlier `remove_water_and_hets` 167 | # mol.filter('protein') 168 | 169 | # slaughtered getVoxelDescriptors() 170 | channels = htmdvox._getAtomtypePropertiesPDBQT(mol) 171 | sigmas = htmdvox._getRadii(mol) 172 | channels = sigmas[:, np.newaxis] * channels.astype(float) 173 | coords = mol.coords[:, :, mol.frame] 174 | 175 | np.savez(npz_path, channels=channels, coords=coords) 176 | 177 | with tempfile.TemporaryDirectory() as tmpdirname: 178 | # use biopython to remove all non-protein atoms 179 | pdb_path_tempdir1 = os.path.join(tmpdirname, os.path.basename(pdb_path)) # same name different dir 180 | remove_water_and_hets(pdb_path, pdb_path_tempdir1) 181 | 182 | # process pdb -> pdbqt (output written to `output_dir`) 183 | try: 184 | subprocess.run(['/bin/bash', '-ic', mgl_command.format(pdb_path_tempdir1)], cwd=output_dir, check=True) 185 | except Exception as err1: 186 | try: 187 | # Put input through obabel to handle some problematic formattings, it's parser seems quite robust 188 | # (could go directly to pdbqt with `-xr -xc -h` but somehow the partial charges are all zero) 189 | with tempfile.TemporaryDirectory() as tmpdirname2: 190 | pdb_path_tempdir2 = os.path.join(tmpdirname2, os.path.basename(pdb_path)) 191 | subprocess.run(['obabel', pdb_path_tempdir1, '-O', pdb_path_tempdir2, '-h'], check=True) 192 | subprocess.run(['/bin/bash', '-ic', mgl_command.format(pdb_path_tempdir2)], 193 | cwd=output_dir, check=True) 194 | except Exception as err2: 195 | logger.exception(err2) 196 | continue 197 | 198 | # compute channels 199 | try: 200 | compute_channels() 201 | except Exception as err3: 202 | logger.exception(err3) 203 | 204 | 205 | def voc_ap(rec, prec): 206 | """ 207 | Compute VOC AP given precision and recall. 208 | Taken from https://github.com/marvis/pytorch-yolo2/blob/master/scripts/voc_eval.py 209 | Different from scikit's average_precision_score (https://github.com/scikit-learn/scikit-learn/issues/4577) 210 | """ 211 | # first append sentinel values at the end 212 | mrec = np.concatenate(([0.], rec, [1.])) 213 | mpre = np.concatenate(([0.], prec, [0.])) 214 | 215 | # compute the precision envelope 216 | for i in range(mpre.size - 1, 0, -1): 217 | mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) 218 | 219 | # to calculate area under PR curve, look for points 220 | # where X axis (recall) changes value 221 | i = np.where(mrec[1:] != mrec[:-1])[0] 222 | 223 | # and sum (\Delta recall) * prec 224 | ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) 225 | return ap 226 | 227 | 228 | @lru_cache() 229 | def pdb_check_obsolete(pdb_code): 230 | """ Check the status of a pdb, if it is obsolete return the superceding PDB ID else return None """ 231 | try: 232 | r = requests.get(f'https://www.ebi.ac.uk/pdbe/api/pdb/entry/status/{pdb_code}').json() 233 | except: 234 | logger.info(f"Could not check obsolete status of {pdb_code}") 235 | return None 236 | if r[pdb_code][0]['status_code'] == 'OBS': 237 | pdb_code = r[pdb_code][0]['superceded_by'][0] 238 | return pdb_code 239 | else: 240 | return None 241 | 242 | 243 | class RcsbPdbClusters: 244 | def __init__(self, identity=30): 245 | self.cluster_dir = os.environ.get('STRUCTURE_DATA_DIR') 246 | self.identity = identity 247 | self.clusters = {} 248 | self._fetch_cluster_file() 249 | 250 | def _download_cluster_sets(self, cluster_file_path): 251 | os.makedirs(os.path.dirname(cluster_file_path), exist_ok=True) 252 | # Note that the files changes frequently as do the ordering of cluster within 253 | request.urlretrieve(f'https://cdn.rcsb.org/resources/sequence/clusters/bc-{self.identity}.out', cluster_file_path) 254 | 255 | def _fetch_cluster_file(self): 256 | """ load cluster file if found else download and load """ 257 | cluster_file_path = os.path.join(self.cluster_dir, f"bc-{self.identity}.out") 258 | logging.info(f"cluster file path: {cluster_file_path}") 259 | if not os.path.exists(cluster_file_path): 260 | logging.warning("Cluster definition not found, will download a fresh one.") 261 | logging.warning("However, this will very likely lead to silent incompatibilities with any old 'pdbcode_mappings.pickle' files! Please better remove those manually.") 262 | self._download_cluster_sets(cluster_file_path) 263 | 264 | for n, line in enumerate(open(cluster_file_path, 'rb')): 265 | for id in line.decode('ascii').split(): 266 | self.clusters[id] = n 267 | 268 | def get_seqclust(self, pdbCode, chainId, check_obsolete=True): 269 | """ Get sequence cluster ID for a pdbcode chain using RCSB mmseq2/blastclust predefined clusters """ 270 | query_str = f"{pdbCode.upper()}_{chainId.upper()}" # e.g. 1ATP_I 271 | seqclust = self.clusters.get(query_str, 'None') 272 | 273 | if check_obsolete and seqclust == 'None': 274 | superceded = pdb_check_obsolete(pdbCode) 275 | if superceded is not None: 276 | logging.info(f"Assigning cluster for obsolete entry via superceding: {pdbCode}->{superceded} {chainId}") 277 | return self.get_seqclust(superceded, chainId, check_obsolete=False) # assumes chain is same in superceding entry 278 | if seqclust == 'None': 279 | logging.info(f"unable to assign cluster to {pdbCode}{chainId}") 280 | return seqclust 281 | -------------------------------------------------------------------------------- /deeplytough/scripts/custom_evaluation.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import logging 3 | import os 4 | import pickle 5 | 6 | from datasets import Custom 7 | from matchers import DeeplyTough 8 | 9 | logging.basicConfig(level=logging.INFO) 10 | logger = logging.getLogger(__name__) 11 | 12 | 13 | def get_cli_args(): 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument('--dataset_subdir', type=str, default='custom', help='Dataset directory within $STRUCTURE_DATA_DIR') 16 | parser.add_argument('--output_dir', type=str, help='Output directory for result pickle') 17 | parser.add_argument('--alg', type=str, default='DeeplyTough', help='Algorithm type') 18 | parser.add_argument('--net', type=str, default='', help='DeeplyTough network filepath') 19 | parser.add_argument('--device', type=str, default='cpu', help='cpu or cuda:0') 20 | parser.add_argument('--nworkers', default=1, type=int, help='Num subprocesses to use for data loading. 0 means that the data will be loaded in the main process') 21 | parser.add_argument('--batch_size', default=30, type=int) 22 | parser.add_argument('--db_preprocessing', default=1, type=int, help='Bool: whether to run preprocessing for the dataset') 23 | 24 | return parser.parse_args() 25 | 26 | 27 | def main(): 28 | args = get_cli_args() 29 | 30 | database = Custom(args.dataset_subdir) 31 | 32 | if args.db_preprocessing: 33 | database.preprocess_once() 34 | 35 | # Retrieve structures 36 | entries = database.get_structures() 37 | 38 | # Get matcher and perform any necessary pre-compututations 39 | if args.alg == 'DeeplyTough': 40 | matcher = DeeplyTough(args.net, device=args.device, batch_size=args.batch_size, nworkers=args.nworkers) 41 | entries = matcher.precompute_descriptors(entries) 42 | else: 43 | raise NotImplementedError 44 | 45 | # Evaluate pocket pairs 46 | results = database.evaluate_matching(entries, matcher) 47 | results['benchmark_args'] = args 48 | 49 | # Format output file names 50 | fname = f"Custom-{args.alg}-{os.path.basename(os.path.dirname(args.net))}.pickle" 51 | fname_txt = os.path.join(args.output_dir, fname.replace('.pickle', '.csv')) 52 | 53 | # Make sure output directory exists 54 | os.makedirs(args.output_dir, exist_ok=True) 55 | 56 | # Write pickle 57 | pickle.dump(results, open(os.path.join(args.output_dir, fname), 'wb')) 58 | 59 | # Write csv results 60 | with open(fname_txt, 'w') as f: 61 | for p, s in zip(results['pairs'], results['scores']): 62 | f.write(f"{p[0]['key']},{p[1]['key']},{s}\n") 63 | 64 | # Done! 65 | print(f'Evaluation finished, see {fname_txt}') 66 | 67 | 68 | if __name__ == '__main__': 69 | main() 70 | -------------------------------------------------------------------------------- /deeplytough/scripts/prospeccts_benchmark.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import logging 3 | import os 4 | import pickle 5 | 6 | from datasets import Prospeccts 7 | from matchers import DeeplyTough 8 | 9 | logging.basicConfig(level=logging.INFO) 10 | logger = logging.getLogger(__name__) 11 | 12 | 13 | def get_cli_args(): 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument('--output_dir', type=str, help='Output directory for result pickle') 16 | parser.add_argument('--alg', type=str, default='DeeplyTough', help='Algorithm type') 17 | parser.add_argument('--net', type=str, default='', help='DeeplyTough network filepath') 18 | parser.add_argument('--device', type=str, default='cpu', help='cpu or cuda:0') 19 | parser.add_argument('--nworkers', default=1, type=int, help='Num subprocesses to use for data loading. 0 means that the data will be loaded in the main process') 20 | parser.add_argument('--batch_size', default=30, type=int) 21 | parser.add_argument('--dbname', type=str, default='all', help='Prospeccts dataset name (P1 .. P7, or all)') 22 | parser.add_argument('--db_preprocessing', default=0, type=int, help='Bool: whether to run preprocessing for the dataset') 23 | 24 | return parser.parse_args() 25 | 26 | 27 | def main(): 28 | args = get_cli_args() 29 | 30 | dbnames = args.dbname if args.dbname != 'all' else Prospeccts.dbnames 31 | 32 | for dbname in dbnames: 33 | database = Prospeccts(dbname) 34 | 35 | if args.db_preprocessing: 36 | database.preprocess_once() 37 | 38 | # Retrieve structures 39 | entries = database.get_structures() 40 | 41 | # Get matcher and perform any necessary pre-computations 42 | if args.alg == 'DeeplyTough': 43 | matcher = DeeplyTough(args.net, device=args.device, batch_size=args.batch_size, nworkers=args.nworkers) 44 | entries = matcher.precompute_descriptors(entries) 45 | else: 46 | raise NotImplementedError 47 | 48 | # Evaluate pocket pairs 49 | results = database.evaluate_matching(entries, matcher) 50 | results['benchmark_args'] = args 51 | 52 | # Format output file names 53 | fname = f"Prospeccts-{args.alg}-{os.path.basename(os.path.dirname(args.net))}-{dbname}.pickle" 54 | 55 | # Make sure output directory exists 56 | os.makedirs(args.output_dir, exist_ok=True) 57 | 58 | # Write pickle 59 | pickle.dump(results, open(os.path.join(args.output_dir, fname), 'wb')) 60 | 61 | # Write csv results 62 | with open(os.path.join(args.output_dir, fname.replace('.pickle','.csv')), 'w') as f: 63 | for p, s in zip(results['pairs'], results['scores']): 64 | f.write(f"{p[0]['code5']},{p[1]['code5']},{s}\n") 65 | 66 | # Done! 67 | print(f"Testing finished on {dbname}, AUC = {results['auc']}") 68 | 69 | 70 | if __name__ == '__main__': 71 | main() 72 | -------------------------------------------------------------------------------- /deeplytough/scripts/toughm1_benchmark.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import logging 3 | import os 4 | import pickle 5 | 6 | from datasets import ToughM1 7 | from matchers import DeeplyTough, ToughOfficials 8 | 9 | logging.basicConfig(level=logging.INFO) 10 | logger = logging.getLogger(__name__) 11 | 12 | 13 | def get_cli_args(): 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument('--output_dir', type=str, help='Output directory for result pickle') 16 | parser.add_argument('--alg', type=str, default='DeeplyTough', help='Algorithm type') 17 | parser.add_argument('--net', type=str, default='', help='DeeplyTough network filepath') 18 | parser.add_argument('--device', type=str, default='cpu', help='cpu or cuda:0') 19 | parser.add_argument('--nworkers', default=1, type=int, help='Num subprocesses to use for data loading. 0 means that the data will be loaded in the main process') 20 | parser.add_argument('--batch_size', default=30, type=int) 21 | parser.add_argument('--cvfold', default=0, type=int, help='Fold left-out for testing in leave-one-out setting') 22 | parser.add_argument('--cvseed', default=7, type=int, help='Dataset shuffling seed') 23 | parser.add_argument('--num_folds', default=5, type=int, help='Num folds') 24 | parser.add_argument('--db_split_strategy', default='seqclust', help="pdb_folds|uniprot_folds|seqclust") 25 | parser.add_argument('--db_preprocessing', default=0, type=int, help='Bool: if 1, run preprocessing for the dataset') 26 | 27 | return parser.parse_args() 28 | 29 | 30 | def main(): 31 | args = get_cli_args() 32 | 33 | database = ToughM1() 34 | 35 | if args.db_preprocessing: 36 | database.preprocess_once() 37 | 38 | # Retrieve structures 39 | _, entries = database.get_structures_splits(args.cvfold, strategy=args.db_split_strategy, 40 | n_folds=args.num_folds, seed=args.cvseed) 41 | 42 | # Get matcher and perform any necessary pre-computations 43 | if args.alg == 'DeeplyTough': 44 | matcher = DeeplyTough(args.net, device=args.device, batch_size=args.batch_size, nworkers=args.nworkers) 45 | if matcher.args.seed != args.cvseed or matcher.args.cvfold != args.cvfold: 46 | logger.warning('Likely not evaluating on the test set for this network') 47 | entries = matcher.precompute_descriptors(entries) 48 | elif args.alg == 'OfiGlosa': 49 | matcher = ToughOfficials('G-LoSA', 2) 50 | elif args.alg == 'OfiApoc': 51 | matcher = ToughOfficials('APoc', 2) 52 | elif args.alg == 'OfiSiteEngine': 53 | matcher = ToughOfficials('SiteEngine', 3) 54 | else: 55 | raise NotImplementedError 56 | 57 | # Evaluate pocket pairs 58 | results = database.evaluate_matching(entries, matcher) 59 | results['benchmark_args'] = args 60 | 61 | # Format output file names 62 | fname = f'ToughM1-{args.alg}-{os.path.basename(os.path.dirname(args.net))}.pickle' 63 | 64 | # Make sure output directory exists 65 | os.makedirs(args.output_dir, exist_ok=True) 66 | 67 | # Write pickle 68 | pickle.dump(results, open(os.path.join(args.output_dir, fname), 'wb')) 69 | 70 | # Write csv results 71 | with open(os.path.join(args.output_dir, fname.replace('.pickle', '.csv')), 'w') as f: 72 | for p, s in zip(results['pairs'], results['scores']): 73 | f.write(f"{p[0]['code5']},{p[1]['code5']},{s}\n") 74 | 75 | # Done! 76 | print(f"Testing finished, AUC = {results['auc']}") 77 | 78 | 79 | if __name__ == '__main__': 80 | main() 81 | -------------------------------------------------------------------------------- /deeplytough/scripts/train.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import ast 3 | import datetime 4 | import logging 5 | import math 6 | import os 7 | import random 8 | import sys 9 | import time 10 | 11 | import numpy as np 12 | import torch 13 | import torch.nn.functional as nnf 14 | import torch.optim as optim 15 | from sklearn.preprocessing import StandardScaler 16 | from tensorboardX import SummaryWriter 17 | from torch.optim.lr_scheduler import MultiStepLR 18 | from tqdm import tqdm 19 | 20 | from engine.datasets import create_tough_dataset 21 | from engine.models import create_model 22 | 23 | logging.basicConfig(level=logging.INFO) 24 | logger = logging.getLogger(__name__) 25 | 26 | 27 | def get_cli_args(): 28 | parser = argparse.ArgumentParser() 29 | 30 | # Optimization arguments 31 | parser.add_argument('--wd', default=5.0e-4, type=float, help='Weight decay') 32 | parser.add_argument('--lr', default=1e-3, type=float, help='Initial learning rate') 33 | parser.add_argument('--lr_decay', default=0.2, type=float, help='Multiplicative factor used on learning rate at `lr_steps`') 34 | parser.add_argument('--lr_steps', default='[100,150]', help='List of epochs where the learning rate is decreased by `lr_decay`') 35 | parser.add_argument('--momentum', default=0.9, type=float, help='Momentum for sgd') 36 | parser.add_argument('--epochs', default=151, type=int, help='Number of epochs to train. If <=0, only testing will be done.') 37 | parser.add_argument('--batch_size', default=16, type=int, help='Batch size') 38 | parser.add_argument('--batch_parts', default=1, type=int, help='Batch can be evaluated sequentially in multiple shards, >=1, very useful in low memory settings, though computation is not strictly equivalent due to batch normalization runnning statistics.') 39 | parser.add_argument('--optim', default='adam', help='Optimizer: sgd|adam') 40 | parser.add_argument('--max_train_samples', default=2500, type=int, help='Max train samples per epoch (good for large datasets)') 41 | parser.add_argument('--max_test_samples', default=100, type=int, help='Max test samples per epoch (good for large datasets)') 42 | 43 | # Experiment arguments 44 | parser.add_argument('--device', default='cuda:0', type=str) 45 | parser.add_argument('--nworkers', default=3, type=int, help='Num subprocesses to use for data loading. 0 means that the data will be loaded in the main process') 46 | parser.add_argument('--test_nth_epoch', default=1, type=int, help='Test each n-th epoch during training') 47 | parser.add_argument('--resume', default='', help='Loads a previously saved model.') 48 | 49 | # Dataset 50 | parser.add_argument('--output_dir', default='results', help='Directory to store results') 51 | parser.add_argument('--cvfold', default=0, type=int, help='Fold left-out for testing in leave-one-out setting') 52 | parser.add_argument('--num_folds', default=5, type=int, help='Num folds') 53 | parser.add_argument('--augm_rot', default=1, type=int, help='Training augmentation: Bool, random rotation') 54 | parser.add_argument('--augm_mirror_prob', default=0, type=float, help='Training augmentation: Probability of mirroring about axes') 55 | parser.add_argument('--augm_sampling_dist', default=2.0, type=float, help='Training augmentation: Max distance from fpocket centers') 56 | parser.add_argument('--augm_decoy_prob', default=0.1, type=float, help='Training augmentation: Probability of negative decoy') 57 | parser.add_argument('--patch_size', default=24, type=int, help='Patch size for training') 58 | parser.add_argument('--input_normalization', default=1, type=int, help='Bool: whether to normalize input statistics') 59 | parser.add_argument('--db_exclude_vertex', default='', type=str, help='Whether to exclude Vertex dataset proteins in the training fold: (|seqclust|uniprot|pdb)') 60 | parser.add_argument('--db_exclude_prospeccts', default='', type=str, help='Whether to exclude Prospeccts dataset proteins in the training fold: (|seqclust|uniprot|pdb)') 61 | parser.add_argument('--db_split_strategy', default='seqclust', help="pdb_folds|uniprot_folds|seqclust|none") 62 | parser.add_argument('--db_preprocessing', default=0, type=int, help='Bool: whether to run preprocessing for the dataset') 63 | parser.add_argument('--db_size_limit', default=0, type=int, help='Artification restriction of database size, either on # pdbs (>0) or # pairs (<0)') 64 | 65 | # Model 66 | parser.add_argument('--model_config', default='se_16_16_16_16_7_3_2_batch_1,se_32_32_32_32_3_1_1_batch_1,se_48_48_48_48_3_1_2_batch_1,se_64_64_64_64_3_0_1_batch_1,se_256_0_0_0_3_0_2_batch_1,r,b,c_128_1', help='Defines the model as a sequence of layers.') 67 | parser.add_argument('--seed', default=1, type=int, help='Seed for random initialisation') 68 | parser.add_argument('--l2_normed_descriptors', default=1, type=int, help='L2-normalize descriptors/network outputs') 69 | parser.add_argument('--loss_margin', default=1.0, type=float, help='Margin in hinge losses') 70 | parser.add_argument('--stability_loss_weight', default=1.0, type=float, help='Weight of augmentation invariance loss') 71 | parser.add_argument('--stability_loss_squared', default=0, type=int, help='Augmentation invariance loss distances squared (1) or not (0)') 72 | 73 | args = parser.parse_args() 74 | args.start_epoch = 0 75 | args.lr_steps = ast.literal_eval(args.lr_steps) 76 | args.output_dir = args.output_dir.replace('TTTT', datetime.datetime.now().strftime('%Y%m%d%H%M%S')) 77 | assert args.batch_size % args.batch_parts == 0 78 | return args 79 | 80 | 81 | def main(): 82 | args = get_cli_args() 83 | print('Will save to ' + args.output_dir) 84 | if not os.path.exists(args.output_dir): 85 | os.makedirs(args.output_dir) 86 | with open(os.path.join(args.output_dir, 'cmdline.txt'), 'w') as f: 87 | f.write(" ".join(["'"+a+"'" if (len(a) == 0 or a[0] != '-') else a for a in sys.argv])) 88 | 89 | set_seed(args.seed) 90 | device = torch.device(args.device) 91 | writer = SummaryWriter(args.output_dir) 92 | 93 | train_dataset, test_dataset = create_tough_dataset( 94 | args, fold_nr=args.cvfold, n_folds=args.num_folds, seed=args.seed, 95 | exclude_Vertex_from_train=args.db_exclude_vertex, exclude_Prospeccts_from_train=args.db_exclude_prospeccts 96 | ) 97 | logger.info('Train set size: %d, test set size: %d', len(train_dataset), len(test_dataset)) 98 | 99 | # Create model and optimizer (or resume pre-existing) 100 | if args.resume != '': 101 | if args.resume == 'RESUME': 102 | args.resume = args.output_dir + '/model.pth.tar' 103 | model, optimizer, scheduler = resume(args, train_dataset, device) 104 | else: 105 | model = create_model(args, train_dataset, device) 106 | if args.input_normalization: 107 | model.set_input_scaler(estimate_scaler(args, train_dataset, nsamples=200)) 108 | optimizer = create_optimizer(args, model) 109 | scheduler = MultiStepLR(optimizer, milestones=args.lr_steps, gamma=args.lr_decay) 110 | 111 | ############ 112 | def train(): 113 | model.train() 114 | 115 | loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size//args.batch_parts, 116 | num_workers=args.nworkers, shuffle=True, drop_last=True, 117 | worker_init_fn=set_worker_seed) 118 | 119 | if logging.getLogger().getEffectiveLevel() > logging.DEBUG: 120 | loader = tqdm(loader, ncols=100) 121 | 122 | loss_buffer, loss_stabil_buffer, pos_dist_buffer, neg_dist_buffer = [], [], [], [] 123 | t0 = time.time() 124 | 125 | for bidx, batch in enumerate(loader): 126 | if 0 < args.max_train_samples < bidx * args.batch_size//args.batch_parts: 127 | break 128 | t_loader = 1000*(time.time()-t0) 129 | 130 | inputs = batch['inputs'].to(device) # dimensions: batch_size x (4 or 2) x 24 x 24 x 24 131 | targets = batch['targets'].to(device) 132 | 133 | if bidx % args.batch_parts == 0: 134 | optimizer.zero_grad() 135 | t0 = time.time() 136 | 137 | outputs = model(inputs.view(-1, *inputs.shape[2:])) 138 | outputs = outputs.view(*inputs.shape[:2], -1) 139 | loss_joint, loss_match, loss_stabil, pos_dist, neg_dist = compute_loss(args, outputs, targets, True) 140 | loss_joint.backward() 141 | 142 | if bidx % args.batch_parts == args.batch_parts-1: 143 | if args.batch_parts > 1: 144 | for p in model.parameters(): 145 | p.grad.data.div_(args.batch_parts) 146 | optimizer.step() 147 | 148 | t_trainer = 1000*(time.time()-t0) 149 | loss_buffer.append(loss_match.item()) 150 | loss_stabil_buffer.append(loss_stabil.item() if isinstance(loss_stabil, torch.Tensor) else loss_stabil) 151 | pos_dist_buffer.extend(pos_dist.cpu().numpy().tolist()) 152 | neg_dist_buffer.extend(neg_dist.cpu().numpy().tolist()) 153 | logger.debug('Batch loss %f, Loader time %f ms, Trainer time %f ms.', loss_buffer[-1], t_loader, t_trainer) 154 | t0 = time.time() 155 | 156 | ret = {'loss': np.mean(loss_buffer), 'loss_stabil': np.mean(loss_stabil_buffer), 157 | 'pos_dist': np.mean(pos_dist_buffer), 'neg_dist': np.mean(neg_dist_buffer)} 158 | return ret 159 | 160 | ############ 161 | def test(): 162 | model.eval() 163 | 164 | loader = torch.utils.data.DataLoader(test_dataset, batch_size=args.batch_size//args.batch_parts, 165 | num_workers=args.nworkers, worker_init_fn=set_worker_seed) 166 | 167 | if logging.getLogger().getEffectiveLevel() > logging.DEBUG: 168 | loader = tqdm(loader, ncols=100) 169 | 170 | loss_buffer, loss_stabil_buffer, pos_dist_buffer, neg_dist_buffer = [], [], [], [] 171 | 172 | with torch.no_grad(): 173 | for bidx, batch in enumerate(loader): 174 | if 0 < args.max_test_samples < bidx * args.batch_size//args.batch_parts: 175 | break 176 | inputs = batch['inputs'].to(device) 177 | targets = batch['targets'].to(device) 178 | 179 | outputs = model(inputs.view(-1, *inputs.shape[2:])) 180 | outputs = outputs.view(*inputs.shape[:2], -1) 181 | loss_joint, loss_match, loss_stabil, pos_dist, neg_dist = compute_loss(args, outputs, targets, False) 182 | 183 | loss_buffer.append(loss_match.item()) 184 | loss_stabil_buffer.append(loss_stabil.item() if isinstance(loss_stabil, torch.Tensor) else loss_stabil) 185 | pos_dist_buffer.extend(pos_dist.cpu().numpy().tolist()) 186 | neg_dist_buffer.extend(neg_dist.cpu().numpy().tolist()) 187 | 188 | return {'loss': np.mean(loss_buffer), 'loss_stabil': np.mean(loss_stabil_buffer), 189 | 'pos_dist': np.mean(pos_dist_buffer), 'neg_dist': np.mean(neg_dist_buffer)} 190 | 191 | ############ 192 | # Training loop 193 | for epoch in range(args.start_epoch, args.epochs): 194 | print(f'Epoch {epoch}/{args.epochs} ({args.output_dir}):') 195 | scheduler.step() 196 | 197 | train_stats = train() 198 | for k, v in train_stats.items(): 199 | writer.add_scalar('train/' + k, v, epoch) 200 | print(f"-> Train distances: p {train_stats['pos_dist']}, n {train_stats['neg_dist']}, \tLoss: {train_stats['loss']}") 201 | 202 | if (epoch+1) % args.test_nth_epoch == 0 or epoch+1 == args.epochs: 203 | test_stats = test() 204 | for k, v in test_stats.items(): 205 | writer.add_scalar('test/' + k, v, epoch) 206 | print(f"-> Test distances: p {test_stats['pos_dist']}, n {test_stats['neg_dist']}, \tLoss: {test_stats['loss']}") 207 | 208 | torch.save({'epoch': epoch + 1, 'args': args, 'state_dict': model.state_dict(), 209 | 'optimizer': optimizer.state_dict(), 'scheduler': scheduler.state_dict()}, 210 | os.path.join(args.output_dir, 'model.pth.tar')) 211 | 212 | if math.isnan(train_stats['loss']): 213 | break 214 | 215 | 216 | def compute_loss(args, outputs, targets, training): 217 | """ 218 | Computes both stability and contrastive loss 219 | """ 220 | outputs = torch.squeeze(outputs) 221 | targets = torch.squeeze(targets) 222 | assert outputs.dim() == 3 and targets.dim() == 1 223 | 224 | if args.l2_normed_descriptors: 225 | outputs = nnf.normalize(outputs, p=2, dim=2) 226 | 227 | # Stability loss 228 | if training and args.stability_loss_weight > 0: 229 | # every odd entry in the batch is a perturbed version of the previous even entry 230 | a = outputs[:, 0::2].view(-1, outputs.shape[-1]) 231 | b = outputs[:, 1::2].view(-1, outputs.shape[-1]) 232 | if args.stability_loss_squared: 233 | loss_stabil = nnf.pairwise_distance(a, b).pow(2).mean() 234 | else: 235 | loss_stabil = nnf.pairwise_distance(a, b).mean() 236 | # continue with just the even ones 237 | outputs = outputs[:, 0::2] 238 | else: 239 | loss_stabil = 0 240 | 241 | # Contrastive loss 242 | assert outputs.shape[1] == 2 243 | dists = nnf.pairwise_distance(outputs[:, 0], outputs[:, 1]).view(-1) 244 | 245 | pos_loss = dists.pow(2) 246 | neg_loss = torch.clamp(args.loss_margin - dists, min=0).pow(2) 247 | loss_match = torch.sum(pos_loss * targets + neg_loss * (1 - targets)) / targets.numel() 248 | 249 | loss_joint = loss_match + args.stability_loss_weight * loss_stabil 250 | return loss_joint, loss_match, loss_stabil, dists[targets > 0.5].detach(), dists[targets < 0.5].detach() 251 | 252 | 253 | def resume(args, dataset, device): 254 | """ 255 | Loads model and optimizer state from a previous checkpoint 256 | """ 257 | print(f"=> loading checkpoint '{args.resume}'") 258 | checkpoint = torch.load(args.resume, map_location=str(device)) 259 | 260 | model = create_model(checkpoint['args'], dataset, device) 261 | model.load_state_dict(checkpoint['state_dict']) 262 | 263 | optimizer = create_optimizer(args, model) 264 | optimizer.load_state_dict(checkpoint['optimizer']) 265 | args.start_epoch = checkpoint['epoch'] 266 | 267 | scheduler = MultiStepLR(optimizer, milestones=args.lr_steps, gamma=args.lr_decay) 268 | scheduler.load_state_dict(checkpoint['scheduler']) 269 | 270 | return model, optimizer, scheduler 271 | 272 | 273 | def create_optimizer(args, model): 274 | if args.optim == 'sgd': 275 | return optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.wd) 276 | elif args.optim == 'adam': 277 | return optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.wd) 278 | 279 | 280 | def set_seed(seed): 281 | random.seed(seed) 282 | np.random.seed(seed) 283 | torch.manual_seed(seed) 284 | 285 | 286 | def set_worker_seed(worker_id): 287 | np.random.seed(torch.initial_seed() % (2**32 - 1)) 288 | 289 | 290 | def estimate_scaler(args, train_dataset, nsamples): 291 | logger.info('Estimating dataset normalization') 292 | scaler = StandardScaler(copy=True, with_mean=True, with_std=True) 293 | bidx = 0 294 | with tqdm(total=nsamples) as pbar: 295 | while True: 296 | loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, num_workers=args.nworkers, 297 | shuffle=True, drop_last=True, worker_init_fn=set_worker_seed) 298 | for batch in loader: 299 | inputs = batch['inputs'].view(-1, *batch['inputs'].shape[2:]) 300 | assert inputs.dim() == 5 301 | voxels = inputs.transpose(1, 4).contiguous().view(-1, inputs.shape[1]).numpy() 302 | scaler.partial_fit(voxels) 303 | 304 | bidx += inputs.shape[0] 305 | pbar.update(inputs.shape[0]) 306 | if bidx >= nsamples: 307 | return scaler 308 | 309 | 310 | if __name__ == "__main__": 311 | main() 312 | -------------------------------------------------------------------------------- /deeplytough/scripts/vertex_benchmark.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import logging 3 | import os 4 | import pickle 5 | 6 | from datasets import Vertex 7 | from matchers import DeeplyTough 8 | 9 | logging.basicConfig(level=logging.INFO) 10 | logger = logging.getLogger(__name__) 11 | 12 | 13 | def get_cli_args(): 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument('--output_dir', type=str, help='Output directory for result pickle') 16 | parser.add_argument('--alg', type=str, default='DeeplyTough', help='Algorithm type') 17 | parser.add_argument('--net', type=str, default='', help='DeeplyTough network filepath') 18 | parser.add_argument('--device', type=str, default='cpu', help='cpu or cuda:0') 19 | parser.add_argument('--nworkers', default=1, type=int, help='Num subprocesses to use for data loading. 0 means that the data will be loaded in the main process') 20 | parser.add_argument('--batch_size', default=30, type=int) 21 | parser.add_argument('--db_preprocessing', default=0, type=int, help='Bool: whether to run preprocessing for the dataset') 22 | 23 | return parser.parse_args() 24 | 25 | 26 | def main(): 27 | args = get_cli_args() 28 | 29 | database = Vertex() 30 | 31 | if args.db_preprocessing: 32 | database.preprocess_once() 33 | 34 | # Retrieve structures 35 | entries = database.get_structures() 36 | 37 | # Get matcher and perform any necessary pre-computations 38 | if args.alg == 'DeeplyTough': 39 | matcher = DeeplyTough(args.net, device=args.device, batch_size=args.batch_size, nworkers=args.nworkers) 40 | entries = matcher.precompute_descriptors(entries) 41 | else: 42 | raise NotImplementedError 43 | 44 | # Evaluate pocket pairs 45 | results = database.evaluate_matching(entries, matcher) 46 | results['benchmark_args'] = args 47 | results['entries'] = entries # includes descriptors 48 | 49 | # Format output file names 50 | fname = f"Vertex-{args.alg}-{os.path.basename(os.path.dirname(args.net))}.pickle" 51 | 52 | # Make sure output directory exists 53 | os.makedirs(args.output_dir, exist_ok=True) 54 | 55 | # Write pickle 56 | pickle.dump(results, open(os.path.join(args.output_dir, fname), 'wb')) 57 | 58 | # Write csv results 59 | with open(os.path.join(args.output_dir, fname.replace('.pickle', '.csv')), 'w') as f: 60 | for p, s in zip(results['pairs'], results['scores']): 61 | f.write(f'{p[0]},{p[1]},{s}\n') 62 | 63 | # Done! 64 | print(f"Testing finished, AUC = {results['auc']}") 65 | 66 | 67 | if __name__ == '__main__': 68 | main() 69 | -------------------------------------------------------------------------------- /networks/deeplytough_prospeccts.pth.tar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BenevolentAI/DeeplyTough/7536c5bb7d4e1e7e5d4f6cfacd1a437db03596e9/networks/deeplytough_prospeccts.pth.tar -------------------------------------------------------------------------------- /networks/deeplytough_toughm1_test.pth.tar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BenevolentAI/DeeplyTough/7536c5bb7d4e1e7e5d4f6cfacd1a437db03596e9/networks/deeplytough_toughm1_test.pth.tar -------------------------------------------------------------------------------- /networks/deeplytough_vertex.pth.tar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BenevolentAI/DeeplyTough/7536c5bb7d4e1e7e5d4f6cfacd1a437db03596e9/networks/deeplytough_vertex.pth.tar -------------------------------------------------------------------------------- /overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BenevolentAI/DeeplyTough/7536c5bb7d4e1e7e5d4f6cfacd1a437db03596e9/overview.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.19.2 2 | tqdm==4.36.1 3 | tensorboardx==1.9 4 | scikit-learn==0.20.1 5 | torch==1.0 6 | scipy==1.1.0 7 | pandas==1.1.4 8 | transforms3d==0.3.1 9 | matplotlib==3.1.1 10 | requests==2.22.0 11 | cython==0.29.13 12 | numba==0.43.1 13 | llvmlite==0.28 14 | biopython==1.72 15 | mdtraj==1.9.3 16 | natsort==8.0.0 17 | periodictable==1.6.0 18 | -------------------------------------------------------------------------------- /results/Prospeccts-DeeplyTough-deeplytough_prospeccts-P6.2.csv: -------------------------------------------------------------------------------- 1 | 1eyn,1ow4,-0.1210966557264328 2 | 2ans,1eyn,-0.08025316148996353 3 | 2ans,1ow4,-0.07656160742044449 4 | 3fty,1w7h,-0.40553945302963257 5 | 1vyg,1diy,-0.22101353108882904 6 | 3bra,4n7c,-0.7446341514587402 7 | 3cf9,4hkk,-0.3935973346233368 8 | 2qre,1m9n,-0.15306711196899414 9 | 3hig,2gby,-0.5969905257225037 10 | 2rh1,2q6h,-0.4271696209907532 11 | 2wyd,2ovd,-0.21460622549057007 12 | 4nyq,4ia6,-0.31131836771965027 13 | 2g0l,4hki,-0.6067702770233154 14 | 2ph9,1dx6,-0.41628697514533997 15 | 1mv9,1fdq,-0.6037502884864807 16 | 2wd9,3p6h,-0.10641007870435715 17 | 1e6q,1nok,-0.6131260991096497 18 | 2b50,4bvm,-0.46719327569007874 19 | 2x8z,4dpr,-0.2861819863319397 20 | 4hcy,4mhw,-0.06726853549480438 21 | 2vyv,3ew5,-0.08758662641048431 22 | 4kcx,4kd1,-0.768419623374939 23 | 1xql,1pb9,-0.06042586266994476 24 | 2yyj,2yaj,-0.5475000143051147 25 | 1ve7,2i6p,-0.7137326002120972 26 | 2hkk,3pah,-0.7184109687805176 27 | 1b66,1sep,-0.532262921333313 28 | 2nsl,2gqs,-0.062365081161260605 29 | 3ln1,1oq5,-0.5009394288063049 30 | 3dds,2a3b,-0.339447557926178 31 | 3ebp,3blr,-0.3449978232383728 32 | 1pa9,1e2s,-0.1828428953886032 33 | 1mxh,1lcb,-0.3812151849269867 34 | 1mxh,1rf7,-0.12110111862421036 35 | 1rf7,1lcb,-0.4814496636390686 36 | 3ed0,3bqc,-0.15430866181850433 37 | 4cus,3kpu,-0.6722819209098816 38 | 1z9y,2xn5,-0.8759850263595581 39 | 2v6t,2fbz,-0.21715952455997467 40 | 2oyf,4hm0,-0.3283771872520447 41 | 4lzr,4o2b,-0.8234207630157471 42 | 3hlg,1t02,-0.4574362337589264 43 | 3hbg,3etr,-0.059538986533880234 44 | 3hbf,2o63,-0.13629847764968872 45 | 1eyq,2uxu,-0.24603182077407837 46 | 3wl8,2wg9,-0.518446683883667 47 | 3eau,2q1v,-0.9179725646972656 48 | 1tx0,1br6,-0.10462214052677155 49 | 3b00,3p73,-0.6607016921043396 50 | 2o73,2fxl,-0.0228722020983696 51 | 2otf,2nuv,-0.9171537160873413 52 | 3tvl,4odj,-0.06630491465330124 53 | 3gcl,1oxr,-0.2180204689502716 54 | 2uy4,3hs4,-0.3475250005722046 55 | 2qvd,3d6y,-0.4596768319606781 56 | 1qhy,4cla,-0.7986555099487305 57 | 3t4k,4jhi,-0.6516141295433044 58 | 1s2c,2pix,-0.8661952018737793 59 | 2p1q,2oyf,-0.3495883643627167 60 | 2p1q,4hm0,-0.2747463285923004 61 | 1s4m,1he5,-0.028176935389637947 62 | 2fu7,2lig,-0.8264271020889282 63 | -------------------------------------------------------------------------------- /results/Prospeccts-DeeplyTough-deeplytough_prospeccts-P6.csv: -------------------------------------------------------------------------------- 1 | 1eyn,1ow4,-0.1210966557264328 2 | 2ans,1eyn,-0.08025316148996353 3 | 2ans,1ow4,-0.07656160742044449 4 | 3fty,1w7h,-0.40553945302963257 5 | 1vyg,1diy,-0.22101353108882904 6 | 3bra,4n7c,-0.7446341514587402 7 | 3cf9,4hkk,-0.3935973346233368 8 | 2qre,1m9n,-0.15306711196899414 9 | 3hig,2gby,-0.5351876020431519 10 | 2rh1,2q6h,-0.4271696209907532 11 | 2wyd,2ovd,-0.21460622549057007 12 | 4nyq,4ia6,-0.31131836771965027 13 | 2g0l,4hki,-0.6067702770233154 14 | 2ph9,1dx6,-0.41628697514533997 15 | 1mv9,1fdq,-0.6037502884864807 16 | 2wd9,3p6h,-0.10641007870435715 17 | 1e6q,1nok,-0.6131260991096497 18 | 2b50,4bvm,-0.46719327569007874 19 | 2x8z,4dpr,-0.2861819863319397 20 | 4hcy,4mhw,-0.06726853549480438 21 | 2vyv,3ew5,-0.055992886424064636 22 | 4kcx,4kd1,-0.768419623374939 23 | 1xql,1pb9,-0.06532666087150574 24 | 2yyj,2yaj,-0.5475000143051147 25 | 1ve7,2i6p,-0.7137326002120972 26 | 2hkk,3pah,-0.7184109687805176 27 | 1b66,1sep,-0.532262921333313 28 | 2nsl,2gqs,-0.062365081161260605 29 | 3ln1,1oq5,-0.5009394288063049 30 | 3dds,2a3b,-0.339447557926178 31 | 3ebp,3blr,-0.3087920844554901 32 | 1pa9,1e2s,-0.1828428953886032 33 | 1mxh,1lcb,-0.3812151849269867 34 | 1mxh,1rf7,-0.12110111862421036 35 | 1rf7,1lcb,-0.4814496636390686 36 | 3ed0,3bqc,-0.15430866181850433 37 | 4cus,3kpu,-0.6722819209098816 38 | 1z9y,2xn5,-0.8759850263595581 39 | 2v6t,2fbz,-0.21715952455997467 40 | 2oyf,4hm0,-0.3283771872520447 41 | 4lzr,4o2b,-0.8234207630157471 42 | 3hlg,1t02,-0.4574362337589264 43 | 3hbg,3etr,-0.059538986533880234 44 | 3hbf,2o63,-0.13629847764968872 45 | 1eyq,2uxu,-0.24603182077407837 46 | 3wl8,2wg9,-0.518446683883667 47 | 3eau,2q1v,-0.9179725646972656 48 | 1tx0,1br6,-0.10462214052677155 49 | 3b00,3p73,-0.6607016921043396 50 | 2o73,2fxl,-0.022316182032227516 51 | 2otf,2nuv,-0.9171537160873413 52 | 3tvl,4odj,-0.06630491465330124 53 | 3gcl,1oxr,-0.21801893413066864 54 | 2uy4,3hs4,-0.3475250005722046 55 | 2qvd,3d6y,-0.4596768319606781 56 | 1qhy,4cla,-0.7986555099487305 57 | 3t4k,4jhi,-0.6516141295433044 58 | 1s2c,2pix,-0.8661952018737793 59 | 2p1q,2oyf,-0.3495883643627167 60 | 2p1q,4hm0,-0.2747463285923004 61 | 1s4m,1he5,-0.028176935389637947 62 | 2fu7,2lig,-0.8264271020889282 63 | --------------------------------------------------------------------------------