├── .dockerignore
├── Dockerfile
├── LICENCE
├── README.md
├── datasets
    └── custom
    │   ├── 1a05B
    │       ├── 1a05B.pdb
    │       └── pockets
    │       │   ├── pocket0_atm.pdb
    │       │   ├── pocket0_vert.pqr
    │       │   ├── pocket1_atm.pdb
    │       │   ├── pocket1_vert.pqr
    │       │   ├── pocket2_atm.pdb
    │       │   ├── pocket2_vert.pqr
    │       │   ├── pocket3_atm.pdb
    │       │   ├── pocket3_vert.pqr
    │       │   ├── pocket4_atm.pdb
    │       │   ├── pocket4_vert.pqr
    │       │   ├── pocket5_atm.pdb
    │       │   ├── pocket5_vert.pqr
    │       │   ├── pocket6_atm.pdb
    │       │   └── pocket6_vert.pqr
    │   ├── 1a9t
    │       ├── 1a9t.pdb
    │       ├── 1a9t_clean.pdb
    │       ├── 1a9t_lig_1.pdb
    │       └── 1a9t_site_1.pdb
    │   └── pairs.csv
├── datasets_downloader.sh
├── deeplytough
    ├── datasets
    │   ├── __init__.py
    │   ├── custom.py
    │   ├── prospeccts.py
    │   ├── toughm1.py
    │   └── vertex.py
    ├── engine
    │   ├── datasets.py
    │   ├── models.py
    │   └── predictor.py
    ├── matchers
    │   ├── __init__.py
    │   ├── deeply_tough.py
    │   ├── pocket_matcher.py
    │   └── tough_officials.py
    ├── misc
    │   ├── cc_ligands.py
    │   ├── ligand_extract.py
    │   └── utils.py
    └── scripts
    │   ├── custom_evaluation.py
    │   ├── prospeccts_benchmark.py
    │   ├── toughm1_benchmark.py
    │   ├── train.py
    │   └── vertex_benchmark.py
├── networks
    ├── deeplytough_prospeccts.pth.tar
    ├── deeplytough_toughm1_test.pth.tar
    └── deeplytough_vertex.pth.tar
├── overview.png
├── requirements.txt
└── results
    ├── Prospeccts-DeeplyTough-deeplytough_prospeccts-P1.2.csv
    ├── Prospeccts-DeeplyTough-deeplytough_prospeccts-P1.csv
    ├── Prospeccts-DeeplyTough-deeplytough_prospeccts-P2.csv
    ├── Prospeccts-DeeplyTough-deeplytough_prospeccts-P3.csv
    ├── Prospeccts-DeeplyTough-deeplytough_prospeccts-P4.csv
    ├── Prospeccts-DeeplyTough-deeplytough_prospeccts-P5.2.csv
    ├── Prospeccts-DeeplyTough-deeplytough_prospeccts-P5.csv
    ├── Prospeccts-DeeplyTough-deeplytough_prospeccts-P6.2.csv
    ├── Prospeccts-DeeplyTough-deeplytough_prospeccts-P6.csv
    ├── Prospeccts-DeeplyTough-deeplytough_prospeccts-P7.csv
    ├── ToughM1-DeeplyTough-deeplytough_toughm1_test.csv
    └── Vertex-DeeplyTough-deeplytough_vertex.csv


/.dockerignore:
--------------------------------------------------------------------------------
1 | datasets


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM nvidia/cuda:9.0-cudnn7-runtime-ubuntu16.04
 2 | SHELL ["/bin/bash", "-c"]
 3 | 
 4 | # APT dependencies
 5 | RUN apt-get update && apt-get install -y \
 6 |     apt-utils \
 7 |     bzip2 \
 8 |     ca-certificates \
 9 |     git \
10 |     curl \
11 |     sysstat \
12 |     wget \
13 |     unzip \
14 |     # for fpocket
15 |     libnetcdf-dev && \
16 |     apt-get clean
17 | 
18 | RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-4.5.4-Linux-x86_64.sh -O ~/miniconda.sh && \
19 |     /bin/bash ~/miniconda.sh -b -p /miniconda && \
20 |     rm ~/miniconda.sh && \
21 |     /miniconda/bin/conda clean -tipsy && \
22 |     ln -s /miniconda/etc/profile.d/conda.sh /etc/profile.d/conda.sh && \
23 |     echo ". /miniconda/etc/profile.d/conda.sh" >> ~/.bashrc && \
24 |     echo "conda activate base" >> ~/.bashrc
25 | ENV PATH=/miniconda/bin:${PATH}
26 | 
27 | # setup python 3 env
28 | RUN conda update -y -q conda && \
29 |     conda create -y -n deeplytough python=3.6
30 | 
31 | # setup python 2 env
32 | RUN conda create -y -n deeplytough_mgltools python=2.7
33 | RUN conda install -y -n deeplytough_mgltools -c bioconda mgltools=1.5.6
34 | 
35 | # Source code
36 | ADD . /app
37 | WORKDIR /app
38 | ENV PYTHONPATH=/app/deeplytough:$PYTHONPATH
39 | 
40 | # htmd
41 | RUN apt-get -y install openbabel
42 | RUN source activate deeplytough; \
43 |     curl -LO https://github.com/Acellera/htmd/archive/refs/tags/1.13.10.tar.gz && \
44 |     tar -xvzf 1.13.10.tar.gz && rm 1.13.10.tar.gz && cd htmd-1.13.10 && \
45 |     python setup.py install && \
46 |     cd .. && \
47 |     rm -rf htmd-1.13.10;
48 | 
49 | 
50 | RUN source activate deeplytough; \
51 |     pip install --upgrade pip; \
52 |     pip install --no-cache-dir -r /app/requirements.txt \
53 |     pip install --ignore-installed llvmlite==0.28
54 | 
55 | # rot covariant convolutions (includes also the 'experiments' code)
56 | RUN source activate deeplytough; \
57 |     git clone https://github.com/mariogeiger/se3cnn && \
58 |     cd se3cnn && \
59 |     git reset --hard 6b976bea4ea17e1bd5655f0f030c6e2bb1637b57 && \
60 |     mv experiments se3cnn; sed -i "s/exclude=\['experiments\*'\]//g" setup.py && \
61 |     python setup.py install && \
62 |     cd .. && \
63 |     rm -rf se3cnn;
64 | RUN source activate deeplytough; \
65 |     git clone https://github.com/AMLab-Amsterdam/lie_learn && \
66 |     cd lie_learn && python setup.py install && cd .. && rm -rf lie_learn
67 | 
68 | # fpocket2
69 | RUN curl -LO -k https://netcologne.dl.sourceforge.net/project/fpocket/fpocket2.tar.gz && \
70 |     tar -xvzf fpocket2.tar.gz && rm fpocket2.tar.gz && cd fpocket2 && \
71 |     sed -i 's/\$(LFLAGS) \$\^ -o \$@/\$\^ -o \$@ \$(LFLAGS)/g' makefile && make && \
72 |     mv bin/fpocket bin/fpocket2 && mv bin/dpocket bin/dpocket2 && mv bin/mdpocket bin/mdpocket2 && mv bin/tpocket bin/tpocket2
73 | ENV PATH=/app/fpocket2/bin:${PATH}
74 | 


--------------------------------------------------------------------------------
/LICENCE:
--------------------------------------------------------------------------------
1 | (c) BenevolentAI Limited 2019. All rights reserved.
2 | For licensing enquiries, please contact hello@benevolent.ai
3 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # DeeplyTough
  2 | 
  3 | This is the official PyTorch implementation of our paper *DeeplyTough: Learning Structural Comparison of Protein Binding Sites*, available from <https://pubs.acs.org/doi/abs/10.1021/acs.jcim.9b00554>.
  4 | 
  5 | ![DeeplyTough overview figure](overview.png?raw=true "DeeplyTough overview figure.")
  6 | 
  7 | ## Setup
  8 | 
  9 | ### Code setup
 10 | 
 11 | The software is ready for Docker: the image can be created from `Dockerfile` by running `docker build -t deeplytough .` (image size ~4.7GB so you may have to increase the disk space available to docker). The DeeplyTough tool is then accessible within `deeplytough` conda environment inside the container with `source activate deeplytough`.
 12 | 
 13 | Alternatively, environment `deeplytough` can be created inside local [conda](https://conda.io/en/latest/miniconda.html) by executing the following steps from the root of this repository (linux only): 
 14 | 
 15 | ```bash
 16 | # create new python 3 env and activate
 17 | conda create -y -n deeplytough python=3.6
 18 | conda activate deeplytough
 19 | 
 20 | # install legacy version of htmd from source
 21 | curl -LO https://github.com/Acellera/htmd/archive/refs/tags/1.13.10.tar.gz && \
 22 |     tar -xvzf 1.13.10.tar.gz && rm 1.13.10.tar.gz && cd htmd-1.13.10 && \
 23 |     python setup.py install && \
 24 |     cd .. && \
 25 |     rm -rf htmd-1.13.10;
 26 | 
 27 | # install remaining python3 reqs
 28 | apt-get -y install openbabel
 29 | pip install --upgrade pip && pip install -r requirements.txt && pip install --ignore-installed llvmlite==0.28
 30 | 
 31 | # install legacy se3nn library from source
 32 | git clone https://github.com/mariogeiger/se3cnn && cd se3cnn && git reset --hard 6b976bea4ea17e1bd5655f0f030c6e2bb1637b57 && mv experiments se3cnn; sed -i "s/exclude=\['experiments\*'\]//g" setup.py && python setup.py install && cd .. && rm -rf se3cnn
 33 | git clone https://github.com/AMLab-Amsterdam/lie_learn && cd lie_learn && python setup.py install && cd .. && rm -rf lie_learn
 34 | 
 35 | # create python2 env used for protein structure preprocessing
 36 | conda create -y -n deeplytough_mgltools python=2.7
 37 | conda install -y -n deeplytough_mgltools -c bioconda mgltools=1.5.6
 38 | ```
 39 | 
 40 | ### Dataset setup
 41 | 
 42 | #### Training and benchmark datasets
 43 | 
 44 | The tool comes with built-in support for three datasets: TOUGH-M1 (Govindaraj and Brylinski, 2018), Vertex (Chen et al., 2016), and ProSPECCTs (Ehrt et al., 2018). These datasets must be downloaded if one wishes to either retrain the network or evaluate on one of these benchmarks. The datasets can be prepared in two steps:
 45 | 
 46 | 1. Set `STRUCTURE_DATA_DIR` environment variable to a directory that will contain the datasets (about 27 GB): `export STRUCTURE_DATA_DIR=/path_to_a_dir`
 47 | 2. Run `datasets_downloader.sh` from the root of this repository and get yourself a coffee
 48 | 
 49 | This will download PDB files, extracted pockets and pre-process input features. It will also download lists of pocket pairs provided by the respective dataset authors. By downloading Prospeccts, you accept their [terms of use](http://www.ccb.tu-dortmund.de/ag-koch/prospeccts/license_en.pdf).
 50 | 
 51 | Note that this is a convenience and we also provide code for data pre-processing: in case one wishes to start from the respective base datasets, pre-processing may be triggered using the `--db_preprocessing 1` flag when running any of our training and evaluation scripts. For the TOUGH-M1 dataset in particular, fpocket2 is required and can be installed as follows:
 52 | ```bash
 53 | curl -O -L https://netcologne.dl.sourceforge.net/project/fpocket/fpocket2.tar.gz && tar -xvzf fpocket2.tar.gz && rm fpocket2.tar.gz && cd fpocket2 && sed -i 's/\$(LFLAGS) \$\^ -o \$@/\$\^ -o \$@ \$(LFLAGS)/g' makefile && make && mv bin/fpocket bin/fpocket2 && mv bin/dpocket bin/dpocket2 && mv bin/mdpocket bin/mdpocket2 && mv bin/tpocket bin/tpocket2
 54 | ```
 55 | 
 56 | #### Custom datasets
 57 | 
 58 | The tool also supports an easy way of computing pocket distances for a user-defined set of pocket pairs. This requires providing i) a set of PDB structures, ii) pockets in PDB format (extracted around bound ligands or detected using any pocket detection algorithm), iii) a CSV file defining the pairing. A toy custom dataset example is provided in `datasets/custom`. The CSV file contains a quadruplet on each line indicating pairs to evaluate: `relative_path_to_pdbA, relative_path_to_pocketA, relative_path_to_pdbB, relative_path_to_pocketB`, where paths are relative to the directory containing the CSV file and the pdb extension may be omitted. `STRUCTURE_DATA_DIR` environment variable must be set to the parent directory containing the custom dataset (in the example `/path_to_this_repository/datasets`).
 59 | 
 60 | ### Environment setup
 61 | 
 62 | To run the evaluation and training scripts, please first set the `DEEPLYTOUGH` environment variable to the directory containing this repository and then update the `PYTHONPATH` and `PATH` variables respectively:
 63 | ```bash
 64 | export DEEPLYTOUGH=/path_to_this_repository
 65 | export PYTHONPATH=$DEEPLYTOUGH/deeplytough:$PYTHONPATH
 66 | export PATH=$DEEPLYTOUGH/fpocket2/bin:$PATH
 67 | ```
 68 | 
 69 | ## Evaluation
 70 | 
 71 | We provide pre-trained networks in the `networks` directory in this repository. The following commands assume a GPU and a 4-core CPU available; use `--device 'cpu'` if there is no GPU and set `--nworkers` parameter accordingly if there are fewer cores available.
 72 | 
 73 | * Evaluation on TOUGH-M1: 
 74 | ```bash
 75 | python $DEEPLYTOUGH/deeplytough/scripts/toughm1_benchmark.py --output_dir $DEEPLYTOUGH/results --device 'cuda:0' --nworkers 4 --net $DEEPLYTOUGH/networks/deeplytough_toughm1_test.pth.tar
 76 | ```
 77 | 
 78 | * Evaluation on Vertex: 
 79 | ```bash
 80 | python $DEEPLYTOUGH/deeplytough/scripts/vertex_benchmark.py --output_dir $DEEPLYTOUGH/results --device 'cuda:0' --nworkers 4 --net $DEEPLYTOUGH/networks/deeplytough_vertex.pth.tar
 81 | ```
 82 | 
 83 | * Evaluation on ProSPECCTs: 
 84 | ```bash
 85 | python $DEEPLYTOUGH/deeplytough/scripts/prospeccts_benchmark.py --output_dir $DEEPLYTOUGH/results --device 'cuda:0' --nworkers 4 --net $DEEPLYTOUGH/networks/deeplytough_prospeccts.pth.tar
 86 | ```
 87 | 
 88 | * Evaluation on a custom dataset, located in `$STRUCTURE_DATA_DIR/some_custom_name` directory: 
 89 | ```bash
 90 | python $DEEPLYTOUGH/deeplytough/scripts/custom_evaluation.py --dataset_subdir 'some_custom_name' --output_dir $DEEPLYTOUGH/results --device 'cuda:0' --nworkers 4 --net $DEEPLYTOUGH/networks/deeplytough_toughm1_test.pth.tar
 91 | ```
 92 | Note that networks `deeplytough_prospeccts.pth.tar` and `deeplytough_vertex.pth.tar` may also be used, producing different results.
 93 | 
 94 | Each of these commands will output to `$DEEPLYTOUGH/results` a CSV file with the resulting similarity scores (negative distances) as well as a pickle file with more detailed results (please see the code). The CSV files are already provided in this repository for conveniency.
 95 | 
 96 | 
 97 | ## Training
 98 | 
 99 | Training requires a GPU with >=11GB of memory and takes about 1.5 days on recent hardware. In addition, at least a 4-core CPU is recommended due to volumetric input pre-processing being an expensive task.
100 | 
101 | * Training for TOUGH-M1 evaluation: 
102 | ```bash
103 | python $DEEPLYTOUGH/deeplytough/scripts/train.py --output_dir $DEEPLYTOUGH/results/TTTT_forTough --device 'cuda:0' --seed 4
104 | ```
105 | 
106 | * Training for Vertex evaluation:
107 | ```bash
108 | python $DEEPLYTOUGH/deeplytough/scripts/train.py --output_dir $DEEPLYTOUGH/results/TTTT_forVertex --device 'cuda:0' --db_exclude_vertex 'uniprot' --db_split_strategy 'none'
109 | ```
110 | 
111 | * Training for ProSPECCTs evaluation:
112 | ```bash
113 | python $DEEPLYTOUGH/deeplytough/scripts/train.py --output_dir $DEEPLYTOUGH/results/TTTT_forProspeccts --device 'cuda:0' --db_exclude_prospeccts 'uniprot' --db_split_strategy 'none' --model_config 'se_4_4_4_4_7_3_2_batch_1,se_8_8_8_8_3_1_1_batch_1,se_16_16_16_16_3_1_2_batch_1,se_32_32_32_32_3_0_1_batch_1,se_256_0_0_0_3_0_2_batch_1,r,b,c_128_1'
114 | ```
115 | 
116 | Note that due to non-determinism inherent to the currently established process of training deep networks, it is nearly impossible to exactly reproduce the pre-trained networks in `networks` directory.
117 | 
118 | Also note the convenience of an output directory containing "TTTT" will afford this substring being replaced by the current `datetime`.
119 | 
120 | ## Changelog
121 | 
122 | - 23.02.2020: Updated code to follow our revised [JCIM paper](https://pubs.acs.org/doi/abs/10.1021/acs.jcim.9b00554), in particular away moving from UniProt-based splitting strategy as in our [BioRxiv](https://www.biorxiv.org/content/10.1101/600304v1) paper to sequence-based clustering approach whereby protein structures sharing more than 30% sequence identity are always allocated to the same testing/training set. We have also made data pre-processing more robust and frozen the versions of several dependencies. The old code is kept in `old_bioarxiv_version` branch, though note the legacy splitting behavior can be turned on also in the current `master` by setting `--db_split_strategy` command line argument in the scripts to `uniprot_folds` instead of `seqclust`.
123 | - 08.12.2020: pinned versions of requirements and updated DockerFile and README to reflect build instructions
124 | - 28.09.2021: replaced conda htmd with source build in dockerfile to relieve dependency solver (patched: 2.12.2021, also added biopython fn to remove non-protein atoms instead of VMD which is deprecated)
125 | 
126 | ## License Terms
127 | 
128 | (c) BenevolentAI Limited 2019. All rights reserved.<br>
129 | For licensing enquiries, please contact hello@benevolent.ai
130 | 


--------------------------------------------------------------------------------
/datasets/custom/1a05B/pockets/pocket0_atm.pdb:
--------------------------------------------------------------------------------
 1 | HEADER
 2 | HEADER This is a pdb format file writen by the programm fpocket.                 
 3 | HEADER It represents the atoms contacted by the voronoi vertices of the pocket.  
 4 | HEADER                                                                           
 5 | HEADER Information about the pocket     1:
 6 | HEADER 0  - Pocket Score                      : 31.2963
 7 | HEADER 1  - Drug Score                        : 0.7720
 8 | HEADER 2  - Number of V. Vertices             :   123
 9 | HEADER 3  - Mean alpha-sphere radius          : 4.5479
10 | HEADER 4  - Mean alpha-sphere SA              : 0.5003
11 | HEADER 5  - Mean B-factor                     : 0.0000
12 | HEADER 6  - Hydrophobicity Score              : 38.3478
13 | HEADER 7  - Polarity Score                    :     6
14 | HEADER 8  - Volume Score                      : 4.2174
15 | HEADER 9  - Real volume (approximation)       : 1388.1013
16 | HEADER 10 - Charge Score                      :    -3
17 | HEADER 11 - Local hydrophobic density Score   : 50.3896
18 | HEADER 12 - Number of apolar alpha sphere     :    77
19 | HEADER 13 - Proportion of apolar alpha sphere : 0.6260
20 | ATOM    777   CB GLN B 108       8.717  39.545  17.097  1.00  0.00           C 0
21 | ATOM    800   CZ PHE B 110       4.139  41.627  12.679  1.00  0.00           C 0
22 | ATOM    798  CE1 PHE B 110       4.062  41.402  14.048  1.00  0.00           C 0
23 | ATOM    778   CG GLN B 108       9.239  40.935  16.764  1.00  0.00           C 0
24 | ATOM    941  OD1 ASP B 128      13.380  37.971  17.376  1.00  0.00           O 0
25 | ATOM    780  OE1 GLN B 108      11.355  40.912  17.879  1.00  0.00           O 0
26 | ATOM    942  OD2 ASP B 128      14.176  39.950  16.880  1.00  0.00           O 0
27 | ATOM   1371  NH1 ARG B 181      14.113  34.782  16.056  1.00  0.00           N 0
28 | ATOM   2306  NH1 ARG B 304      10.055  34.175   7.392  1.00  0.00           N 0
29 | ATOM   2414  OE1 GLU B 317       9.414  37.896   6.723  1.00  0.00           O 0
30 | ATOM   2307  NH2 ARG B 304      11.172  35.688   6.069  1.00  0.00           N 0
31 | ATOM   2305   CZ ARG B 304      11.065  34.463   6.577  1.00  0.00           C 0
32 | ATOM   2415  OE2 GLU B 317       8.282  36.351   7.779  1.00  0.00           O 0
33 | ATOM    776    O GLN B 108       5.777  39.958  18.438  1.00  0.00           O 0
34 | ATOM   2013   CD PRO B 263       3.471  34.893  14.055  1.00  0.00           C 0
35 | ATOM   2006  CD2 LEU B 262       3.538  36.582  18.127  1.00  0.00           C 0
36 | ATOM   2410    O GLU B 317       5.623  42.496   6.884  1.00  0.00           O 0
37 | ATOM    799  CE2 PHE B 110       4.198  42.930  12.197  1.00  0.00           C 0
38 | ATOM   2411   CB GLU B 317       7.222  39.643   6.556  1.00  0.00           C 0
39 | ATOM   2440  OE1 GLN B 321       6.826  46.002  11.208  1.00  0.00           O 0
40 | ATOM   2254  CD2 LEU B 297       3.915  38.315   9.071  1.00  0.00           C 0
41 | ATOM   2412   CG GLU B 317       7.039  38.173   6.901  1.00  0.00           C 0
42 | ATOM   2431  CG1 VAL B 320       1.710  42.194   9.271  1.00  0.00           C 0
43 | ATOM   2413   CD GLU B 317       8.341  37.432   7.160  1.00  0.00           C 0
44 | ATOM    797  CD2 PHE B 110       4.180  44.005  13.089  1.00  0.00           C 0
45 | ATOM    795   CG PHE B 110       4.100  43.795  14.461  1.00  0.00           C 0
46 | ATOM    796  CD1 PHE B 110       4.044  42.485  14.931  1.00  0.00           C 0
47 | ATOM    766   CG PRO B 106       7.625  33.484  14.857  1.00  0.00           C 0
48 | ATOM   2253  CD1 LEU B 297       2.745  38.570  11.265  1.00  0.00           C 0
49 | ATOM   2012   CG PRO B 263       4.235  34.182  12.937  1.00  0.00           C 0
50 | ATOM   1994    O MET B 261       1.567  37.668  14.307  1.00  0.00           O 0
51 | ATOM    764    O PRO B 106       6.477  34.748  17.432  1.00  0.00           O 0
52 | ATOM    765   CB PRO B 106       8.798  33.959  15.639  1.00  0.00           C 0
53 | ATOM    807   CD PRO B 111       7.053  47.000  15.315  1.00  0.00           C 0
54 | ATOM    785    O ILE B 109       7.420  44.109  17.781  1.00  0.00           O 0
55 | ATOM    791   CA PHE B 110       5.252  45.064  16.357  1.00  0.00           C 0
56 | ATOM    773    N GLN B 108       7.613  38.097  18.746  1.00  0.00           N 0
57 | ATOM   1361    O GLY B 180      19.899  34.614  12.174  1.00  0.00           O 0
58 | ATOM   2353  OE2 GLU B 310      13.419  35.517   4.107  1.00  0.00           O 0
59 | ATOM   2304   NE ARG B 304      11.944  33.524   6.233  1.00  0.00           N 0
60 | ATOM   1367   CG ARG B 181      17.391  33.770  14.986  1.00  0.00           C 0
61 | ATOM   2316  CE1 HIS B 305      12.013  28.487   8.593  1.00  0.00           C 0
62 | ATOM   1368   CD ARG B 181      16.338  33.013  15.800  1.00  0.00           C 0
63 | ATOM   1342    O ALA B 177      18.601  30.684  14.560  1.00  0.00           O 0
64 | ATOM   2303   CD ARG B 304      11.899  32.134   6.677  1.00  0.00           C 0
65 | ATOM   1343   CB ALA B 177      16.484  28.164  14.915  1.00  0.00           C 0
66 | ATOM   1340   CA ALA B 177      17.820  28.434  14.246  1.00  0.00           C 0
67 | ATOM    958  CD2 LEU B 130      13.462  30.218  15.915  1.00  0.00           C 0
68 | ATOM   2317  NE2 HIS B 305      10.935  29.188   8.896  1.00  0.00           N 0
69 | ATOM    957  CD1 LEU B 130      11.044  30.704  16.260  1.00  0.00           C 0
70 | ATOM   2280   CE MET B 301       7.937  30.342  11.637  1.00  0.00           C 0
71 | ATOM    746   CB LEU B 104       9.192  27.445  14.321  1.00  0.00           C 0
72 | ATOM    748  CD1 LEU B 104      10.708  26.844  12.446  1.00  0.00           C 0
73 | ATOM   2250    O LEU B 297       3.914  33.886   8.641  1.00  0.00           O 0
74 | ATOM   2251   CB LEU B 297       2.622  36.389  10.004  1.00  0.00           C 0
75 | ATOM   2278   CG MET B 301       6.421  31.672   9.710  1.00  0.00           C 0
76 | ATOM   2279   SD MET B 301       6.249  30.877  11.331  1.00  0.00           S 0
77 | ATOM   1359   CA GLY B 180      20.886  32.446  12.048  1.00  0.00           C 0
78 | ATOM   1331    O ARG B 176      20.283  28.162  13.030  1.00  0.00           O 0
79 | TER
80 | END
81 | 


--------------------------------------------------------------------------------
/datasets/custom/1a05B/pockets/pocket0_vert.pqr:
--------------------------------------------------------------------------------
  1 | HEADER
  2 | HEADER This is a pqr format file writen by the programm fpocket.                 
  3 | HEADER It represent the voronoi vertices of a single pocket found by the         
  4 | HEADER algorithm.                                                                
  5 | HEADER                                                                           
  6 | HEADER Information about the pocket     1:
  7 | HEADER 0  - Pocket Score                      : 31.2963
  8 | HEADER 1  - Drug Score                        : 0.7720
  9 | HEADER 2  - Number of V. Vertices             :   123
 10 | HEADER 3  - Mean alpha-sphere radius          : 4.5479
 11 | HEADER 4  - Mean alpha-sphere SA              : 0.5003
 12 | HEADER 5  - Mean B-factor                     : 0.0000
 13 | HEADER 6  - Hydrophobicity Score              : 38.3478
 14 | HEADER 7  - Polarity Score                    :     6
 15 | HEADER 8  - Volume Score                      : 4.2174
 16 | HEADER 9  - Real volume (approximation)       : 1388.1013
 17 | HEADER 10 - Charge Score                      :    -3
 18 | HEADER 11 - Local hydrophobic density Score   : 50.3896
 19 | HEADER 12 - Number of apolar alpha sphere     :    77
 20 | HEADER 13 - Proportion of apolar alpha sphere : 0.6260
 21 | ATOM   6305 APOL STP     1       7.642  39.870  13.292    0.00     3.97
 22 | ATOM   7250  POL STP     1      11.528  39.321  14.953    0.00     3.34
 23 | ATOM   7250  POL STP     1      11.249  39.039  15.477    0.00     3.05
 24 | ATOM   9791  POL STP     1      12.728  37.300  11.225    0.00     5.62
 25 | ATOM  13742  POL STP     1      13.128  36.868  11.068    0.00     5.50
 26 | ATOM   7250  POL STP     1      12.393  38.151  11.677    0.00     5.79
 27 | ATOM   7251  POL STP     1      11.456  38.615  12.023    0.00     5.72
 28 | ATOM   7250  POL STP     1      11.957  37.983  11.841    0.00     5.72
 29 | ATOM   9791  POL STP     1      12.462  37.277  11.351    0.00     5.58
 30 | ATOM   6699 APOL STP     1       5.747  37.969  15.028    0.00     3.95
 31 | ATOM   6699 APOL STP     1       5.519  37.990  15.124    0.00     3.86
 32 | ATOM   4006  POL STP     1       8.316  41.692  10.537    0.00     4.61
 33 | ATOM   6302 APOL STP     1       8.673  41.487  10.849    0.00     4.89
 34 | ATOM   6302 APOL STP     1       7.704  41.364  10.400    0.00     4.24
 35 | ATOM   6302 APOL STP     1       6.525  40.749  10.006    0.00     3.69
 36 | ATOM   6302 APOL STP     1       7.503  39.817  10.800    0.00     4.26
 37 | ATOM   4677 APOL STP     1       4.672  41.290   9.593    0.00     3.11
 38 | ATOM   6302 APOL STP     1       4.944  41.220   9.672    0.00     3.14
 39 | ATOM   6304 APOL STP     1       4.589  41.238   9.677    0.00     3.06
 40 | ATOM   6302 APOL STP     1       8.032  39.814  11.067    0.00     4.59
 41 | ATOM   4621 APOL STP     1       7.546  38.910  10.921    0.00     4.12
 42 | ATOM   6302 APOL STP     1       7.796  39.161  11.384    0.00     4.60
 43 | ATOM   6302 APOL STP     1       7.756  39.373  11.193    0.00     4.51
 44 | ATOM   3591 APOL STP     1       9.417  40.602  11.386    0.00     5.39
 45 | ATOM   6302 APOL STP     1       9.216  40.514  11.437    0.00     5.34
 46 | ATOM   3594  POL STP     1      10.014  41.331  11.176    0.00     5.66
 47 | ATOM   3595  POL STP     1      10.162  39.271  11.802    0.00     5.31
 48 | ATOM   6302 APOL STP     1       9.407  41.192  11.313    0.00     5.46
 49 | ATOM   6302 APOL STP     1       8.701  39.402  11.891    0.00     5.14
 50 | ATOM   7251  POL STP     1      11.395  38.535  12.046    0.00     5.72
 51 | ATOM   3587 APOL STP     1       8.032  42.870  13.441    0.00     4.03
 52 | ATOM   6302 APOL STP     1       8.575  42.104  12.506    0.00     4.47
 53 | ATOM   6698 APOL STP     1       7.991  43.375  13.919    0.00     3.95
 54 | ATOM  11163 APOL STP     1       7.651  42.609  13.747    0.00     3.80
 55 | ATOM   6302 APOL STP     1       7.653  42.603  13.742    0.00     3.80
 56 | ATOM  11163 APOL STP     1       7.651  42.609  13.747    0.00     3.80
 57 | ATOM   3746 APOL STP     1       7.564  37.909  12.494    0.00     5.02
 58 | ATOM   4621 APOL STP     1       7.556  37.902  12.484    0.00     5.01
 59 | ATOM   3093 APOL STP     1       6.632  37.886  13.265    0.00     4.42
 60 | ATOM   3746 APOL STP     1       7.049  37.934  13.019    0.00     4.69
 61 | ATOM   3746 APOL STP     1       6.788  37.872  13.381    0.00     4.51
 62 | ATOM   6901 APOL STP     1       5.032  38.032  13.874    0.00     3.51
 63 | ATOM   3746 APOL STP     1       6.879  37.626  13.600    0.00     4.39
 64 | ATOM   6304 APOL STP     1       7.109  37.973  12.951    0.00     4.72
 65 | ATOM   3746 APOL STP     1       7.562  37.910  12.498    0.00     5.02
 66 | ATOM   3746 APOL STP     1       6.651  37.386  14.361    0.00     4.05
 67 | ATOM   3746 APOL STP     1       8.405  37.754  12.509    0.00     4.94
 68 | ATOM   3746 APOL STP     1       7.565  37.911  12.494    0.00     5.02
 69 | ATOM   4621 APOL STP     1       7.569  37.929  12.488    0.00     5.02
 70 | ATOM   3746 APOL STP     1       7.179  37.163  14.706    0.00     3.71
 71 | ATOM   6700  POL STP     1       5.786  37.845  15.151    0.00     3.91
 72 | ATOM   6304 APOL STP     1       7.567  37.982  12.481    0.00     5.01
 73 | ATOM   6305 APOL STP     1       7.593  38.001  12.462    0.00     5.01
 74 | ATOM   6305 APOL STP     1       8.585  39.207  12.002    0.00     5.11
 75 | ATOM   4573  POL STP     1       8.067  43.438  13.943    0.00     3.95
 76 | ATOM   6698  POL STP     1       7.995  43.398  13.938    0.00     3.95
 77 | ATOM   6698  POL STP     1       7.981  43.433  13.950    0.00     3.93
 78 | ATOM   6698 APOL STP     1       7.828  43.535  14.093    0.00     3.76
 79 | ATOM  11159 APOL STP     1       7.131  43.059  14.835    0.00     3.14
 80 | ATOM  11161 APOL STP     1       7.651  42.662  13.790    0.00     3.79
 81 | ATOM   6698 APOL STP     1       7.898  43.512  13.935    0.00     3.85
 82 | ATOM  11159 APOL STP     1       7.479  42.924  14.499    0.00     3.49
 83 | ATOM   6914 APOL STP     1       5.179  38.020  15.117    0.00     3.72
 84 | ATOM   6700  POL STP     1       5.747  37.833  15.185    0.00     3.89
 85 | ATOM   9122  POL STP     1       5.955  37.793  15.539    0.00     3.62
 86 | ATOM   9122  POL STP     1       5.829  37.718  15.838    0.00     3.43
 87 | ATOM   7250  POL STP     1      11.230  37.632  12.286    0.00     5.54
 88 | ATOM   7250  POL STP     1      11.187  38.277  12.200    0.00     5.63
 89 | ATOM   7250  POL STP     1      11.027  37.831  12.360    0.00     5.54
 90 | ATOM   9791  POL STP     1      11.458  36.364  11.885    0.00     5.19
 91 | ATOM  13742  POL STP     1      14.686  36.528  10.578    0.00     5.78
 92 | ATOM  11921  POL STP     1      15.765  36.323   8.999    0.00     5.49
 93 | ATOM  13742  POL STP     1      15.170  36.173   9.823    0.00     5.51
 94 | ATOM  13742  POL STP     1      14.683  36.022  10.596    0.00     5.63
 95 | ATOM  11659 APOL STP     1      14.977  31.533  11.223    0.00     5.00
 96 | ATOM  11934 APOL STP     1      15.591  31.885  10.416    0.00     5.26
 97 | ATOM  11934 APOL STP     1      14.409  32.224  11.131    0.00     5.11
 98 | ATOM  11634 APOL STP     1      14.982  31.102  11.377    0.00     4.84
 99 | ATOM  11634 APOL STP     1      14.957  31.125  11.396    0.00     4.85
100 | ATOM  11659 APOL STP     1      14.915  31.414  11.336    0.00     4.95
101 | ATOM  11652 APOL STP     1      15.403  30.982  13.037    0.00     3.55
102 | ATOM  11663 APOL STP     1      14.288  32.058  11.308    0.00     5.03
103 | ATOM  11934 APOL STP     1      13.718  32.650  11.426    0.00     5.11
104 | ATOM  11934 APOL STP     1      14.360  32.234  11.154    0.00     5.11
105 | ATOM   7215 APOL STP     1      11.962  33.004  12.093    0.00     4.85
106 | ATOM   9791  POL STP     1      11.941  33.248  11.877    0.00     4.95
107 | ATOM   9523 APOL STP     1      12.149  32.858  11.979    0.00     4.92
108 | ATOM   9523  POL STP     1      12.168  32.856  11.954    0.00     4.93
109 | ATOM   7215 APOL STP     1      10.767  29.990  13.132    0.00     3.22
110 | ATOM   7531 APOL STP     1      11.737  30.596  12.433    0.00     3.89
111 | ATOM   9523 APOL STP     1      11.758  30.594  12.433    0.00     3.89
112 | ATOM   9523 APOL STP     1      11.923  31.556  12.233    0.00     4.21
113 | ATOM   9797 APOL STP     1      13.092  29.894  12.056    0.00     3.89
114 | ATOM   9788  POL STP     1      12.117  33.072  11.807    0.00     5.00
115 | ATOM  11934  POL STP     1      13.715  32.651  11.427    0.00     5.11
116 | ATOM  11933  POL STP     1      12.659  33.091  11.552    0.00     5.03
117 | ATOM   9791 APOL STP     1      10.560  33.794  11.681    0.00     4.34
118 | ATOM   9791  POL STP     1      10.010  35.228  11.635    0.00     4.37
119 | ATOM   4621 APOL STP     1       6.073  35.940  10.508    0.00     3.52
120 | ATOM   4621 APOL STP     1       5.475  36.821  11.500    0.00     3.25
121 | ATOM   3746 APOL STP     1       7.944  33.453  11.760    0.00     3.11
122 | ATOM   3746 APOL STP     1       7.089  33.799  11.838    0.00     3.08
123 | ATOM   9791 APOL STP     1       9.154  34.095  11.251    0.00     3.96
124 | ATOM   3595  POL STP     1       6.595  35.050  10.531    0.00     3.48
125 | ATOM   3746 APOL STP     1       7.630  34.957  11.304    0.00     3.85
126 | ATOM   4621  POL STP     1       6.088  35.940  10.507    0.00     3.53
127 | ATOM   9791  POL STP     1       8.906  34.406  11.199    0.00     3.98
128 | ATOM  11637 APOL STP     1      13.895  29.243  12.051    0.00     4.01
129 | ATOM  11933  POL STP     1      13.690  34.220  10.990    0.00     5.11
130 | ATOM  13742  POL STP     1      13.736  34.416  10.955    0.00     5.13
131 | ATOM  11934  POL STP     1      14.205  34.169  10.883    0.00     5.21
132 | ATOM  13742  POL STP     1      14.458  35.186  10.696    0.00     5.39
133 | ATOM  11934 APOL STP     1      13.968  32.989  11.272    0.00     5.11
134 | ATOM  11934  POL STP     1      15.087  33.967  10.339    0.00     5.19
135 | ATOM  13742  POL STP     1      14.749  35.528  10.506    0.00     5.49
136 | ATOM  11934 APOL STP     1      13.717  32.651  11.427    0.00     5.11
137 | ATOM  11934  POL STP     1      15.731  31.955  10.310    0.00     5.28
138 | ATOM   7573  POL STP     1      17.754  30.252  10.573    0.00     4.10
139 | ATOM   9794 APOL STP     1      17.066  30.040   9.226    0.00     5.32
140 | ATOM   9794 APOL STP     1      16.501  30.793   9.813    0.00     5.19
141 | ATOM   9795  POL STP     1      16.036  31.669  10.005    0.00     5.32
142 | ATOM  11934 APOL STP     1      16.042  31.673   9.985    0.00     5.32
143 | ATOM  11934  POL STP     1      16.004  31.696  10.030    0.00     5.32
144 | TER
145 | END
146 | 


--------------------------------------------------------------------------------
/datasets/custom/1a05B/pockets/pocket1_atm.pdb:
--------------------------------------------------------------------------------
 1 | HEADER
 2 | HEADER This is a pdb format file writen by the programm fpocket.                 
 3 | HEADER It represents the atoms contacted by the voronoi vertices of the pocket.  
 4 | HEADER                                                                           
 5 | HEADER Information about the pocket     2:
 6 | HEADER 0  - Pocket Score                      : 21.6873
 7 | HEADER 1  - Drug Score                        : 0.7210
 8 | HEADER 2  - Number of V. Vertices             :    53
 9 | HEADER 3  - Mean alpha-sphere radius          : 3.3450
10 | HEADER 4  - Mean alpha-sphere SA              : 0.4503
11 | HEADER 5  - Mean B-factor                     : 0.0000
12 | HEADER 6  - Hydrophobicity Score              : 37.6667
13 | HEADER 7  - Polarity Score                    :     8
14 | HEADER 8  - Volume Score                      : 4.2500
15 | HEADER 9  - Real volume (approximation)       : 185.8746
16 | HEADER 10 - Charge Score                      :     0
17 | HEADER 11 - Local hydrophobic density Score   : 42.8636
18 | HEADER 12 - Number of apolar alpha sphere     :    44
19 | HEADER 13 - Proportion of apolar alpha sphere : 0.8302
20 | ATOM   1430   CA ASP B 189      13.973  21.185  31.449  1.00  0.00           C 0
21 | ATOM   1494   CB THR B 197      13.501  17.149  31.666  1.00  0.00           C 0
22 | ATOM   1425    O VAL B 188      13.512  22.183  28.819  1.00  0.00           O 0
23 | ATOM   1433   CB ASP B 189      15.184  20.254  31.245  1.00  0.00           C 0
24 | ATOM   1521   CG TRP B 200      15.526  17.930  26.868  1.00  0.00           C 0
25 | ATOM   1496  CG2 THR B 197      12.002  16.892  31.556  1.00  0.00           C 0
26 | ATOM   1485    C THR B 196      13.902  13.606  30.584  1.00  0.00           C 0
27 | ATOM   1489  CG2 THR B 196      11.553  12.341  29.126  1.00  0.00           C 0
28 | ATOM   1490    N THR B 197      13.726  14.671  31.366  1.00  0.00           N 0
29 | ATOM   1491   CA THR B 197      14.308  15.975  31.047  1.00  0.00           C 0
30 | ATOM   1213  CD2 TYR B 162      11.175  14.387  25.797  1.00  0.00           C 0
31 | ATOM   1486    O THR B 196      14.602  13.620  29.571  1.00  0.00           O 0
32 | ATOM   1520   CB TRP B 200      15.774  16.677  27.692  1.00  0.00           C 0
33 | ATOM   1879  CD2 PHE B 244       9.816  21.210  30.524  1.00  0.00           C 0
34 | ATOM   1852    N GLY B 241      10.571  20.868  25.539  1.00  0.00           N 0
35 | ATOM   1523  CD2 TRP B 200      14.598  18.077  25.783  1.00  0.00           C 0
36 | ATOM   1853   CA GLY B 241       9.644  19.818  25.147  1.00  0.00           C 0
37 | ATOM   1850  OG1 THR B 240      11.096  22.753  27.463  1.00  0.00           O 0
38 | ATOM   1526  CE3 TRP B 200      13.676  17.219  25.168  1.00  0.00           C 0
39 | ATOM   1881  CE2 PHE B 244      10.050  20.407  31.650  1.00  0.00           C 0
40 | ATOM   1019  OD2 ASP B 138       6.900  17.016  26.918  1.00  0.00           O 0
41 | ATOM   1018  OD1 ASP B 138       7.244  15.006  27.702  1.00  0.00           O 0
42 | ATOM   1215  CE2 TYR B 162      10.369  15.427  25.351  1.00  0.00           C 0
43 | ATOM   1197   SD MET B 160       8.474  14.361  31.479  1.00  0.00           S 0
44 | ATOM   1198   CE MET B 160       7.306  15.687  31.217  1.00  0.00           C 0
45 | ATOM   1880  CE1 PHE B 244       7.719  20.559  32.224  1.00  0.00           C 0
46 | ATOM   1878  CD1 PHE B 244       7.496  21.360  31.099  1.00  0.00           C 0
47 | ATOM   1882   CZ PHE B 244       9.002  20.082  32.497  1.00  0.00           C 0
48 | ATOM   1877   CG PHE B 244       8.542  21.690  30.238  1.00  0.00           C 0
49 | ATOM   1868   CB MET B 243       4.347  19.098  28.648  1.00  0.00           C 0
50 | ATOM   1872    N PHE B 244       6.199  21.510  28.216  1.00  0.00           N 0
51 | ATOM   1525  CE2 TRP B 200      14.722  19.399  25.304  1.00  0.00           C 0
52 | TER
53 | END
54 | 


--------------------------------------------------------------------------------
/datasets/custom/1a05B/pockets/pocket1_vert.pqr:
--------------------------------------------------------------------------------
 1 | HEADER
 2 | HEADER This is a pqr format file writen by the programm fpocket.                 
 3 | HEADER It represent the voronoi vertices of a single pocket found by the         
 4 | HEADER algorithm.                                                                
 5 | HEADER                                                                           
 6 | HEADER Information about the pocket     2:
 7 | HEADER 0  - Pocket Score                      : 21.6873
 8 | HEADER 1  - Drug Score                        : 0.7210
 9 | HEADER 2  - Number of V. Vertices             :    53
10 | HEADER 3  - Mean alpha-sphere radius          : 3.3450
11 | HEADER 4  - Mean alpha-sphere SA              : 0.4503
12 | HEADER 5  - Mean B-factor                     : 0.0000
13 | HEADER 6  - Hydrophobicity Score              : 37.6667
14 | HEADER 7  - Polarity Score                    :     8
15 | HEADER 8  - Volume Score                      : 4.2500
16 | HEADER 9  - Real volume (approximation)       : 185.8746
17 | HEADER 10 - Charge Score                      :     0
18 | HEADER 11 - Local hydrophobic density Score   : 42.8636
19 | HEADER 12 - Number of apolar alpha sphere     :    44
20 | HEADER 13 - Proportion of apolar alpha sphere : 0.8302
21 | ATOM  15604 APOL STP     2      13.011  19.130  29.293    0.00     3.13
22 | ATOM  15847 APOL STP     2      13.268  18.947  28.970    0.00     3.25
23 | ATOM  15605 APOL STP     2      12.542  19.191  29.398    0.00     3.20
24 | ATOM  15850 APOL STP     2      12.663  18.789  28.689    0.00     3.50
25 | ATOM  14408 APOL STP     2      12.026  15.348  28.926    0.00     3.05
26 | ATOM  14408 APOL STP     2      12.225  15.519  28.738    0.00     3.14
27 | ATOM  15342 APOL STP     2      12.115  15.474  28.673    0.00     3.21
28 | ATOM  14408 APOL STP     2      12.220  15.527  28.700    0.00     3.17
29 | ATOM  15342 APOL STP     2      12.223  15.584  28.610    0.00     3.23
30 | ATOM  15342 APOL STP     2      12.155  15.503  28.655    0.00     3.22
31 | ATOM  13071 APOL STP     2      12.919  17.312  28.649    0.00     3.08
32 | ATOM  15348 APOL STP     2      12.448  16.354  28.242    0.00     3.39
33 | ATOM  15850 APOL STP     2      12.872  17.613  28.586    0.00     3.18
34 | ATOM  15344 APOL STP     2      12.688  15.870  28.246    0.00     3.24
35 | ATOM  15342 APOL STP     2      12.148  15.440  28.649    0.00     3.19
36 | ATOM   9450  POL STP     2      11.828  18.892  28.426    0.00     3.72
37 | ATOM  16806  POL STP     2      11.962  18.808  28.340    0.00     3.74
38 | ATOM  14653 APOL STP     2      10.993  18.446  28.330    0.00     3.72
39 | ATOM  15608 APOL STP     2      12.211  19.346  29.515    0.00     3.20
40 | ATOM  11454  POL STP     2      11.557  19.745  28.327    0.00     3.16
41 | ATOM  14654 APOL STP     2      11.422  18.173  28.048    0.00     3.78
42 | ATOM  13347 APOL STP     2       9.600  17.924  29.099    0.00     3.59
43 | ATOM  14653 APOL STP     2      10.190  18.069  28.454    0.00     3.78
44 | ATOM  13236  POL STP     2      10.083  17.007  28.544    0.00     3.57
45 | ATOM  15342 APOL STP     2      10.361  15.939  28.722    0.00     3.41
46 | ATOM  14654 APOL STP     2      10.337  17.818  28.285    0.00     3.79
47 | ATOM  13086 APOL STP     2      10.373  15.517  28.914    0.00     3.39
48 | ATOM  15342 APOL STP     2      10.382  15.523  28.900    0.00     3.40
49 | ATOM  13086 APOL STP     2       9.808  16.295  29.231    0.00     3.25
50 | ATOM  13237  POL STP     2       9.605  17.132  29.072    0.00     3.46
51 | ATOM  13347 APOL STP     2       9.413  17.815  29.302    0.00     3.55
52 | ATOM  13347 APOL STP     2       8.220  18.466  29.772    0.00     3.26
53 | ATOM  14656 APOL STP     2       8.215  18.467  29.767    0.00     3.26
54 | ATOM  13347 APOL STP     2       8.700  18.213  29.492    0.00     3.36
55 | ATOM  13349 APOL STP     2       8.221  18.465  29.776    0.00     3.26
56 | ATOM  15714 APOL STP     2       8.220  18.445  29.609    0.00     3.32
57 | ATOM  14654 APOL STP     2      11.286  17.962  28.001    0.00     3.78
58 | ATOM  15344 APOL STP     2      11.890  16.858  28.103    0.00     3.45
59 | ATOM  16806 APOL STP     2      11.682  18.386  28.127    0.00     3.75
60 | ATOM  15714 APOL STP     2       8.203  18.467  29.720    0.00     3.28
61 | ATOM  16249 APOL STP     2       7.030  18.513  30.034    0.00     3.08
62 | ATOM  15348 APOL STP     2      12.430  16.407  28.222    0.00     3.40
63 | ATOM  16799 APOL STP     2      12.565  17.310  28.303    0.00     3.33
64 | ATOM  15608 APOL STP     2      12.197  19.354  29.579    0.00     3.16
65 | ATOM  15714 APOL STP     2       8.125  18.467  29.625    0.00     3.31
66 | ATOM  16247 APOL STP     2       7.290  18.497  29.713    0.00     3.19
67 | ATOM  15714  POL STP     2       7.608  18.960  29.187    0.00     3.07
68 | ATOM  16245  POL STP     2       7.329  18.882  29.308    0.00     3.06
69 | ATOM  15714  POL STP     2       8.544  19.432  28.060    0.00     3.14
70 | ATOM  15714 APOL STP     2       8.834  19.233  28.196    0.00     3.21
71 | ATOM  16803 APOL STP     2      12.536  18.775  28.591    0.00     3.55
72 | ATOM  16799 APOL STP     2      12.609  17.400  28.342    0.00     3.31
73 | ATOM  16805  POL STP     2      12.582  19.626  27.456    0.00     3.04
74 | TER
75 | END
76 | 


--------------------------------------------------------------------------------
/datasets/custom/1a05B/pockets/pocket2_atm.pdb:
--------------------------------------------------------------------------------
 1 | HEADER
 2 | HEADER This is a pdb format file writen by the programm fpocket.                 
 3 | HEADER It represents the atoms contacted by the voronoi vertices of the pocket.  
 4 | HEADER                                                                           
 5 | HEADER Information about the pocket     3:
 6 | HEADER 0  - Pocket Score                      : 12.3766
 7 | HEADER 1  - Drug Score                        : 0.0193
 8 | HEADER 2  - Number of V. Vertices             :    57
 9 | HEADER 3  - Mean alpha-sphere radius          : 4.4969
10 | HEADER 4  - Mean alpha-sphere SA              : 0.5756
11 | HEADER 5  - Mean B-factor                     : 0.0000
12 | HEADER 6  - Hydrophobicity Score              : 9.3571
13 | HEADER 7  - Polarity Score                    :     6
14 | HEADER 8  - Volume Score                      : 3.7857
15 | HEADER 9  - Real volume (approximation)       : 826.3259
16 | HEADER 10 - Charge Score                      :    -2
17 | HEADER 11 - Local hydrophobic density Score   : 9.0000
18 | HEADER 12 - Number of apolar alpha sphere     :    10
19 | HEADER 13 - Proportion of apolar alpha sphere : 0.1754
20 | ATOM    616   CB GLU B  88      -7.798  19.454  20.555  1.00  0.00           C 0
21 | ATOM   1893  OD1 ASP B 246       0.272  23.890  24.629  1.00  0.00           O 0
22 | ATOM    641  CD2 LEU B  91      -4.085  21.703  17.519  1.00  0.00           C 0
23 | ATOM    619  OE1 GLU B  88      -9.595  21.284  22.256  1.00  0.00           O 0
24 | ATOM    615    O GLU B  88      -6.210  17.658  18.666  1.00  0.00           O 0
25 | ATOM    647   CG LEU B  92      -3.208  16.649  19.890  1.00  0.00           C 0
26 | ATOM    648  CD1 LEU B  92      -3.824  15.957  21.103  1.00  0.00           C 0
27 | ATOM    649  CD2 LEU B  92      -1.875  17.298  20.247  1.00  0.00           C 0
28 | ATOM   1037  CE2 TYR B 140       0.432  18.683  23.581  1.00  0.00           C 0
29 | ATOM   1039   OH TYR B 140       0.486  20.519  25.086  1.00  0.00           O 0
30 | ATOM    617   CG GLU B  88      -8.783  19.126  21.659  1.00  0.00           C 0
31 | ATOM   1894  OD2 ASP B 246       0.074  23.492  26.759  1.00  0.00           O 0
32 | ATOM    678  NH1 ARG B  95      -0.155  20.717  19.001  1.00  0.00           N 0
33 | ATOM   2098   CD GLU B 275      -1.551  25.240  14.921  1.00  0.00           C 0
34 | ATOM    760  NH2 ARG B 105       1.030  25.852  18.961  1.00  0.00           N 0
35 | ATOM   2104    O PRO B 276      -2.147  28.875  15.415  1.00  0.00           O 0
36 | ATOM   2099  OE1 GLU B 275      -2.791  25.234  15.085  1.00  0.00           O 0
37 | ATOM   2109   CA ILE B 277      -1.809  30.286  17.686  1.00  0.00           C 0
38 | ATOM   2127   CA GLY B 279      -7.506  29.453  18.831  1.00  0.00           C 0
39 | ATOM   2111    O ILE B 277      -2.574  31.009  19.862  1.00  0.00           O 0
40 | ATOM   1924  OD2 ASP B 250      -1.580  29.386  25.610  1.00  0.00           O 0
41 | ATOM   1923  OD1 ASP B 250      -0.212  29.633  23.912  1.00  0.00           O 0
42 | ATOM    497    O VAL B  73      -9.673  24.487  18.145  1.00  0.00           O 0
43 | ATOM    759  NH1 ARG B 105       1.532  27.448  20.533  1.00  0.00           N 0
44 | ATOM    679  NH2 ARG B  95      -0.498  22.592  17.730  1.00  0.00           N 0
45 | ATOM   2126    N GLY B 279      -6.317  30.248  18.582  1.00  0.00           N 0
46 | ATOM   2100  OE2 GLU B 275      -0.777  24.549  15.618  1.00  0.00           O 0
47 | ATOM   2114  CG2 ILE B 277       0.561  29.453  17.411  1.00  0.00           C 0
48 | ATOM   2110    C ILE B 277      -2.786  30.982  18.642  1.00  0.00           C 0
49 | ATOM    982  NH1 ARG B 133       2.481  23.584  21.571  1.00  0.00           N 0
50 | ATOM    983  NH2 ARG B 133       2.146  21.508  22.495  1.00  0.00           N 0
51 | ATOM    500  CG2 VAL B  73      -8.065  22.284  16.742  1.00  0.00           C 0
52 | ATOM   2112   CB ILE B 277      -0.389  30.220  18.319  1.00  0.00           C 0
53 | TER
54 | END
55 | 


--------------------------------------------------------------------------------
/datasets/custom/1a05B/pockets/pocket2_vert.pqr:
--------------------------------------------------------------------------------
 1 | HEADER
 2 | HEADER This is a pqr format file writen by the programm fpocket.                 
 3 | HEADER It represent the voronoi vertices of a single pocket found by the         
 4 | HEADER algorithm.                                                                
 5 | HEADER                                                                           
 6 | HEADER Information about the pocket     3:
 7 | HEADER 0  - Pocket Score                      : 12.3766
 8 | HEADER 1  - Drug Score                        : 0.0193
 9 | HEADER 2  - Number of V. Vertices             :    57
10 | HEADER 3  - Mean alpha-sphere radius          : 4.4969
11 | HEADER 4  - Mean alpha-sphere SA              : 0.5756
12 | HEADER 5  - Mean B-factor                     : 0.0000
13 | HEADER 6  - Hydrophobicity Score              : 9.3571
14 | HEADER 7  - Polarity Score                    :     6
15 | HEADER 8  - Volume Score                      : 3.7857
16 | HEADER 9  - Real volume (approximation)       : 826.3259
17 | HEADER 10 - Charge Score                      :    -2
18 | HEADER 11 - Local hydrophobic density Score   : 9.0000
19 | HEADER 12 - Number of apolar alpha sphere     :    10
20 | HEADER 13 - Proportion of apolar alpha sphere : 0.1754
21 | ATOM   3140  POL STP     3      -4.634  23.240  22.609    0.00     5.34
22 | ATOM   4191 APOL STP     3      -4.943  18.744  21.164    0.00     3.00
23 | ATOM  14097 APOL STP     3      -4.317  19.580  21.959    0.00     3.76
24 | ATOM   4191 APOL STP     3      -4.300  19.938  20.570    0.00     3.53
25 | ATOM  14095 APOL STP     3      -4.214  20.036  20.747    0.00     3.64
26 | ATOM  16605 APOL STP     3      -4.058  20.092  23.350    0.00     4.71
27 | ATOM  16225 APOL STP     3      -4.567  19.703  24.552    0.00     5.15
28 | ATOM  16230  POL STP     3      -4.836  21.079  24.721    0.00     5.36
29 | ATOM  16605 APOL STP     3      -4.401  19.838  24.218    0.00     5.01
30 | ATOM  16230  POL STP     3      -4.627  21.794  24.172    0.00     5.35
31 | ATOM  16230  POL STP     3      -4.823  21.879  24.891    0.00     5.48
32 | ATOM  15238 APOL STP     3      -3.650  21.348  22.410    0.00     4.92
33 | ATOM  16230  POL STP     3      -3.652  21.436  22.472    0.00     4.98
34 | ATOM  16230  POL STP     3      -3.641  21.365  22.469    0.00     4.96
35 | ATOM  16605 APOL STP     3      -3.751  21.022  22.751    0.00     4.86
36 | ATOM  16605  POL STP     3      -3.455  21.263  22.446    0.00     4.80
37 | ATOM  16230  POL STP     3      -3.714  21.631  22.540    0.00     5.04
38 | ATOM  16230  POL STP     3      -3.604  21.628  22.473    0.00     4.98
39 | ATOM  11206  POL STP     3      -1.814  26.711  17.757    0.00     3.21
40 | ATOM  12786  POL STP     3      -2.442  26.767  18.368    0.00     3.64
41 | ATOM  11180  POL STP     3      -4.244  25.999  21.925    0.00     5.67
42 | ATOM  11180  POL STP     3      -3.844  26.138  21.895    0.00     5.43
43 | ATOM  10449  POL STP     3      -5.071  25.204  22.273    0.00     5.99
44 | ATOM  14125  POL STP     3      -5.419  25.120  21.862    0.00     5.68
45 | ATOM  11180  POL STP     3      -4.082  25.773  21.492    0.00     5.69
46 | ATOM  10473  POL STP     3      -3.313  26.160  21.711    0.00     5.15
47 | ATOM  11180  POL STP     3      -3.477  26.143  21.683    0.00     5.27
48 | ATOM  11180  POL STP     3      -3.348  26.241  21.641    0.00     5.15
49 | ATOM   5554  POL STP     3      -3.144  24.728  21.238    0.00     4.89
50 | ATOM  15237  POL STP     3      -2.804  23.304  21.468    0.00     4.45
51 | ATOM  11180  POL STP     3      -4.059  25.773  21.477    0.00     5.68
52 | ATOM   5553  POL STP     3      -3.218  25.481  19.338    0.00     4.28
53 | ATOM   5826  POL STP     3      -3.870  26.095  19.883    0.00     4.99
54 | ATOM   9195  POL STP     3      -2.473  25.404  18.616    0.00     3.55
55 | ATOM  10444  POL STP     3      -4.170  26.046  19.652    0.00     4.84
56 | ATOM  10452  POL STP     3      -3.961  25.996  20.489    0.00     5.22
57 | ATOM  11180  POL STP     3      -3.736  26.306  19.716    0.00     4.85
58 | ATOM  11206  POL STP     3      -1.813  26.681  17.761    0.00     3.20
59 | ATOM  11180  POL STP     3      -4.018  25.881  21.023    0.00     5.45
60 | ATOM  11180  POL STP     3      -3.957  26.007  20.542    0.00     5.23
61 | ATOM  12788  POL STP     3      -3.700  26.338  19.676    0.00     4.81
62 | ATOM  12786  POL STP     3      -3.726  26.350  19.662    0.00     4.80
63 | ATOM  11206  POL STP     3      -1.615  26.762  17.622    0.00     3.10
64 | ATOM  12786  POL STP     3      -3.977  27.033  18.431    0.00     3.98
65 | ATOM  12788  POL STP     3      -1.502  27.244  17.932    0.00     3.07
66 | ATOM  12786  POL STP     3      -3.755  26.707  19.534    0.00     4.47
67 | ATOM   5563  POL STP     3      -1.250  23.859  21.190    0.00     3.76
68 | ATOM  15242  POL STP     3      -1.208  23.456  21.261    0.00     3.70
69 | ATOM   8201  POL STP     3      -0.273  25.148  21.768    0.00     3.17
70 | ATOM  17048  POL STP     3      -0.858  22.854  21.574    0.00     3.42
71 | ATOM  17046  POL STP     3      -1.343  21.381  22.227    0.00     3.50
72 | ATOM  17048  POL STP     3      -1.507  21.732  22.256    0.00     3.67
73 | ATOM  14133  POL STP     3      -6.421  23.404  20.544    0.00     4.18
74 | ATOM  14133 APOL STP     3      -6.514  23.267  20.329    0.00     4.03
75 | ATOM  14133  POL STP     3      -8.068  22.450  19.831    0.00     3.09
76 | ATOM  14284  POL STP     3      -2.003  27.475  19.958    0.00     3.58
77 | ATOM  14284  POL STP     3      -2.068  27.443  19.882    0.00     3.60
78 | TER
79 | END
80 | 


--------------------------------------------------------------------------------
/datasets/custom/1a05B/pockets/pocket3_atm.pdb:
--------------------------------------------------------------------------------
 1 | HEADER
 2 | HEADER This is a pdb format file writen by the programm fpocket.                 
 3 | HEADER It represents the atoms contacted by the voronoi vertices of the pocket.  
 4 | HEADER                                                                           
 5 | HEADER Information about the pocket     4:
 6 | HEADER 0  - Pocket Score                      : 9.4369
 7 | HEADER 1  - Drug Score                        : 0.0151
 8 | HEADER 2  - Number of V. Vertices             :    38
 9 | HEADER 3  - Mean alpha-sphere radius          : 3.8006
10 | HEADER 4  - Mean alpha-sphere SA              : 0.5440
11 | HEADER 5  - Mean B-factor                     : 0.0000
12 | HEADER 6  - Hydrophobicity Score              : 2.0000
13 | HEADER 7  - Polarity Score                    :     6
14 | HEADER 8  - Volume Score                      : 3.4444
15 | HEADER 9  - Real volume (approximation)       : 317.2007
16 | HEADER 10 - Charge Score                      :     0
17 | HEADER 11 - Local hydrophobic density Score   : 0.0000
18 | HEADER 12 - Number of apolar alpha sphere     :     1
19 | HEADER 13 - Proportion of apolar alpha sphere : 0.0263
20 | ATOM    455  OD1 ASP B  66       5.860  18.995  -1.981  1.00  0.00           O 0
21 | ATOM   2053   CA GLY B 270       9.573  16.999   3.743  1.00  0.00           C 0
22 | ATOM     17   NZ LYS B   2       8.810  22.073  -3.139  1.00  0.00           N 0
23 | ATOM   2055    O GLY B 270       7.396  17.521   2.913  1.00  0.00           O 0
24 | ATOM     16   CE LYS B   2       7.846  22.729  -2.211  1.00  0.00           C 0
25 | ATOM   2321    O SER B 306       8.842  24.533   0.114  1.00  0.00           O 0
26 | ATOM    452    O ASP B  66       4.941  20.591   2.518  1.00  0.00           O 0
27 | ATOM   2054    C GLY B 270       8.125  17.424   3.903  1.00  0.00           C 0
28 | ATOM   2319   CA SER B 306       8.253  25.040   2.378  1.00  0.00           C 0
29 | ATOM   2311    O HIS B 305      10.268  24.804   4.224  1.00  0.00           O 0
30 | ATOM   2323   OG SER B 306       6.752  23.965   3.951  1.00  0.00           O 0
31 | ATOM   2052    N GLY B 270      10.318  16.859   4.984  1.00  0.00           N 0
32 | ATOM   2059    O ARG B 271       7.338  19.544   7.060  1.00  0.00           O 0
33 | ATOM   2056    N ARG B 271       7.707  17.675   5.144  1.00  0.00           N 0
34 | ATOM   2042    O GLY B 268      10.199  18.322   7.755  1.00  0.00           O 0
35 | ATOM   2071   CB ALA B 272       5.977  22.340   6.659  1.00  0.00           C 0
36 | ATOM   2038  CD2 LEU B 267       8.395  23.797   8.770  1.00  0.00           C 0
37 | ATOM   2037  CD1 LEU B 267      10.611  24.187   9.869  1.00  0.00           C 0
38 | ATOM   2035   CB LEU B 267       9.419  22.044  10.242  1.00  0.00           C 0
39 | ATOM   2058    C ARG B 271       6.348  19.294   6.366  1.00  0.00           C 0
40 | ATOM   2322   CB SER B 306       6.813  24.658   2.721  1.00  0.00           C 0
41 | ATOM    453   CB ASP B  66       4.528  20.423  -0.652  1.00  0.00           C 0
42 | TER
43 | END
44 | 


--------------------------------------------------------------------------------
/datasets/custom/1a05B/pockets/pocket3_vert.pqr:
--------------------------------------------------------------------------------
 1 | HEADER
 2 | HEADER This is a pqr format file writen by the programm fpocket.                 
 3 | HEADER It represent the voronoi vertices of a single pocket found by the         
 4 | HEADER algorithm.                                                                
 5 | HEADER                                                                           
 6 | HEADER Information about the pocket     4:
 7 | HEADER 0  - Pocket Score                      : 9.4369
 8 | HEADER 1  - Drug Score                        : 0.0151
 9 | HEADER 2  - Number of V. Vertices             :    38
10 | HEADER 3  - Mean alpha-sphere radius          : 3.8006
11 | HEADER 4  - Mean alpha-sphere SA              : 0.5440
12 | HEADER 5  - Mean B-factor                     : 0.0000
13 | HEADER 6  - Hydrophobicity Score              : 2.0000
14 | HEADER 7  - Polarity Score                    :     6
15 | HEADER 8  - Volume Score                      : 3.4444
16 | HEADER 9  - Real volume (approximation)       : 317.2007
17 | HEADER 10 - Charge Score                      :     0
18 | HEADER 11 - Local hydrophobic density Score   : 0.0000
19 | HEADER 12 - Number of apolar alpha sphere     :     1
20 | HEADER 13 - Proportion of apolar alpha sphere : 0.0263
21 | ATOM   3693  POL STP     4      10.275  18.446  -0.622    0.00     4.65
22 | ATOM   5191  POL STP     4       9.201  19.749   0.107    0.00     4.01
23 | ATOM   7540  POL STP     4      10.191  20.295   0.749    0.00     4.49
24 | ATOM  13751  POL STP     4       8.126  20.353   0.627    0.00     3.71
25 | ATOM   7540  POL STP     4      10.195  20.294   0.750    0.00     4.49
26 | ATOM   7540  POL STP     4       9.783  20.983   2.262    0.00     4.26
27 | ATOM  13746  POL STP     4       8.566  20.763   1.082    0.00     3.90
28 | ATOM  11939  POL STP     4       9.649  21.053   2.369    0.00     4.22
29 | ATOM   7540  POL STP     4       9.800  21.014   2.325    0.00     4.26
30 | ATOM  11939  POL STP     4       9.659  21.053   2.371    0.00     4.23
31 | ATOM  11939  POL STP     4       9.343  21.104   2.598    0.00     4.09
32 | ATOM  13742  POL STP     4       8.851  21.128   2.144    0.00     3.96
33 | ATOM   3077  POL STP     4      10.232  20.834   4.632    0.00     3.99
34 | ATOM   3722  POL STP     4       9.991  20.867   4.445    0.00     3.95
35 | ATOM   4965  POL STP     4      10.004  20.504   4.732    0.00     3.67
36 | ATOM   4919  POL STP     4      10.250  20.836   4.656    0.00     3.99
37 | ATOM   4966  POL STP     4       9.941  20.648   4.579    0.00     3.76
38 | ATOM   9813  POL STP     4       9.295  21.083   4.126    0.00     3.85
39 | ATOM   4966  POL STP     4       9.948  20.288   4.932    0.00     3.45
40 | ATOM   9813  POL STP     4       8.502  20.917   4.040    0.00     3.52
41 | ATOM   9309  POL STP     4       9.281  22.094   5.987    0.00     3.38
42 | ATOM   9813  POL STP     4       9.017  21.866   5.292    0.00     3.37
43 | ATOM   9313  POL STP     4      10.233  21.732   6.299    0.00     3.71
44 | ATOM   9316  POL STP     4      11.148  21.954   6.726    0.00     3.89
45 | ATOM  11299  POL STP     4       9.939  21.490   7.084    0.00     3.25
46 | ATOM  11300 APOL STP     4      11.120  21.933   6.791    0.00     3.85
47 | ATOM  11939  POL STP     4       9.401  21.116   2.661    0.00     4.10
48 | ATOM   9813  POL STP     4       7.887  21.213   4.577    0.00     3.04
49 | ATOM   9813  POL STP     4       8.349  20.893   4.034    0.00     3.46
50 | ATOM   9813  POL STP     4       8.258  20.866   3.968    0.00     3.45
51 | ATOM  13756  POL STP     4       7.743  20.510   3.866    0.00     3.11
52 | ATOM  13746  POL STP     4       8.060  20.826   3.856    0.00     3.40
53 | ATOM  13746  POL STP     4       8.377  20.896   3.289    0.00     3.54
54 | ATOM  13742  POL STP     4       8.851  21.128   2.145    0.00     3.96
55 | ATOM  11959  POL STP     4       8.236  21.598   1.879    0.00     3.48
56 | ATOM  13742  POL STP     4       8.823  21.148   2.132    0.00     3.94
57 | ATOM  13742  POL STP     4       8.223  21.582   1.870    0.00     3.49
58 | ATOM  13751  POL STP     4       7.865  20.485   0.526    0.00     3.54
59 | TER
60 | END
61 | 


--------------------------------------------------------------------------------
/datasets/custom/1a05B/pockets/pocket4_atm.pdb:
--------------------------------------------------------------------------------
 1 | HEADER
 2 | HEADER This is a pdb format file writen by the programm fpocket.                 
 3 | HEADER It represents the atoms contacted by the voronoi vertices of the pocket.  
 4 | HEADER                                                                           
 5 | HEADER Information about the pocket     5:
 6 | HEADER 0  - Pocket Score                      : 8.7122
 7 | HEADER 1  - Drug Score                        : 0.0209
 8 | HEADER 2  - Number of V. Vertices             :    40
 9 | HEADER 3  - Mean alpha-sphere radius          : 3.6301
10 | HEADER 4  - Mean alpha-sphere SA              : 0.5389
11 | HEADER 5  - Mean B-factor                     : 0.0000
12 | HEADER 6  - Hydrophobicity Score              : 29.0000
13 | HEADER 7  - Polarity Score                    :     3
14 | HEADER 8  - Volume Score                      : 2.8889
15 | HEADER 9  - Real volume (approximation)       : 199.4893
16 | HEADER 10 - Charge Score                      :     0
17 | HEADER 11 - Local hydrophobic density Score   : 12.0000
18 | HEADER 12 - Number of apolar alpha sphere     :    13
19 | HEADER 13 - Proportion of apolar alpha sphere : 0.3250
20 | ATOM    824  CD2 LEU B 113      -0.926  46.988  16.686  1.00  0.00           C 0
21 | ATOM   2532   CB ILE B 333      -4.258  45.069  16.779  1.00  0.00           C 0
22 | ATOM   2534  CG2 ILE B 333      -3.496  43.832  17.235  1.00  0.00           C 0
23 | ATOM   1978   OG SER B 258      -1.675  43.123  19.946  1.00  0.00           O 0
24 | ATOM   2529   CA ILE B 333      -5.333  45.456  17.828  1.00  0.00           C 0
25 | ATOM   2539    O ALA B 334      -8.094  48.772  19.973  1.00  0.00           O 0
26 | ATOM   2530    C ILE B 333      -5.668  46.936  17.681  1.00  0.00           C 0
27 | ATOM    845   CB ALA B 116      -1.310  45.121  22.731  1.00  0.00           C 0
28 | ATOM   2523    O ASP B 332      -6.429  44.034  19.912  1.00  0.00           O 0
29 | ATOM    820    O LEU B 113       1.499  46.382  20.827  1.00  0.00           O 0
30 | ATOM    821   CB LEU B 113       1.040  46.210  18.033  1.00  0.00           C 0
31 | ATOM    818   CA LEU B 113       1.379  47.535  18.715  1.00  0.00           C 0
32 | ATOM    833    N ASP B 115       1.680  49.023  22.867  1.00  0.00           N 0
33 | ATOM    811    O GLN B 112       0.954  50.130  17.759  1.00  0.00           O 0
34 | ATOM    837   CB ASP B 115      -0.176  50.492  23.534  1.00  0.00           C 0
35 | ATOM   2531    O ILE B 333      -4.780  47.757  17.460  1.00  0.00           O 0
36 | ATOM    841    N ALA B 116      -0.202  47.118  23.619  1.00  0.00           N 0
37 | ATOM    819    C LEU B 113       1.959  47.274  20.110  1.00  0.00           C 0
38 | ATOM   2545   CB ALA B 335      -6.147  52.517  20.325  1.00  0.00           C 0
39 | ATOM    840  OD2 ASP B 115      -2.343  50.562  24.551  1.00  0.00           O 0
40 | ATOM    842   CA ALA B 116      -0.967  45.925  23.979  1.00  0.00           C 0
41 | ATOM   2542   CA ALA B 335      -7.116  51.342  20.363  1.00  0.00           C 0
42 | ATOM   2541    N ALA B 335      -7.003  50.562  19.139  1.00  0.00           N 0
43 | ATOM   2538    C ALA B 334      -7.530  49.346  19.037  1.00  0.00           C 0
44 | TER
45 | END
46 | 


--------------------------------------------------------------------------------
/datasets/custom/1a05B/pockets/pocket4_vert.pqr:
--------------------------------------------------------------------------------
 1 | HEADER
 2 | HEADER This is a pqr format file writen by the programm fpocket.                 
 3 | HEADER It represent the voronoi vertices of a single pocket found by the         
 4 | HEADER algorithm.                                                                
 5 | HEADER                                                                           
 6 | HEADER Information about the pocket     5:
 7 | HEADER 0  - Pocket Score                      : 8.7122
 8 | HEADER 1  - Drug Score                        : 0.0209
 9 | HEADER 2  - Number of V. Vertices             :    40
10 | HEADER 3  - Mean alpha-sphere radius          : 3.6301
11 | HEADER 4  - Mean alpha-sphere SA              : 0.5389
12 | HEADER 5  - Mean B-factor                     : 0.0000
13 | HEADER 6  - Hydrophobicity Score              : 29.0000
14 | HEADER 7  - Polarity Score                    :     3
15 | HEADER 8  - Volume Score                      : 2.8889
16 | HEADER 9  - Real volume (approximation)       : 199.4893
17 | HEADER 10 - Charge Score                      :     0
18 | HEADER 11 - Local hydrophobic density Score   : 12.0000
19 | HEADER 12 - Number of apolar alpha sphere     :    13
20 | HEADER 13 - Proportion of apolar alpha sphere : 0.3250
21 | ATOM  14280 APOL STP     5      -2.558  46.092  19.256    0.00     3.17
22 | ATOM  14280 APOL STP     5      -2.582  46.082  19.269    0.00     3.17
23 | ATOM  11098  POL STP     5      -4.629  47.306  21.649    0.00     4.12
24 | ATOM  12774 APOL STP     5      -4.247  46.816  21.143    0.00     3.74
25 | ATOM   8974  POL STP     5      -1.372  46.142  19.873    0.00     3.04
26 | ATOM  11084 APOL STP     5      -1.688  46.352  19.764    0.00     3.23
27 | ATOM  11084 APOL STP     5      -1.625  46.713  19.880    0.00     3.28
28 | ATOM  11084 APOL STP     5      -1.573  46.817  19.865    0.00     3.25
29 | ATOM   8811  POL STP     5      -0.484  49.408  20.532    0.00     3.21
30 | ATOM   9034  POL STP     5      -2.089  49.038  20.241    0.00     4.08
31 | ATOM   6700 APOL STP     5      -1.004  48.741  20.774    0.00     3.37
32 | ATOM   8811 APOL STP     5      -0.783  48.945  20.720    0.00     3.27
33 | ATOM   8805  POL STP     5      -0.902  48.741  20.837    0.00     3.30
34 | ATOM   9036  POL STP     5      -2.134  48.740  20.409    0.00     4.08
35 | ATOM   8976  POL STP     5      -1.041  48.474  20.739    0.00     3.29
36 | ATOM   8976  POL STP     5      -1.758  47.884  20.458    0.00     3.61
37 | ATOM   9036  POL STP     5      -2.145  48.144  20.355    0.00     3.93
38 | ATOM  11084 APOL STP     5      -1.689  46.970  19.955    0.00     3.36
39 | ATOM   9034  POL STP     5      -2.584  50.256  20.068    0.00     4.23
40 | ATOM  11072  POL STP     5      -2.004  48.888  19.796    0.00     3.80
41 | ATOM   9036  POL STP     5      -2.941  49.892  20.503    0.00     4.15
42 | ATOM   9038  POL STP     5      -2.792  48.758  20.900    0.00     4.10
43 | ATOM  11079 APOL STP     5      -2.124  47.872  20.206    0.00     3.82
44 | ATOM   6702  POL STP     5      -3.440  47.940  22.431    0.00     3.55
45 | ATOM   9038  POL STP     5      -3.260  48.383  21.209    0.00     4.09
46 | ATOM   4443  POL STP     5      -3.672  49.509  20.983    0.00     3.95
47 | ATOM   9036  POL STP     5      -3.511  49.523  20.844    0.00     4.02
48 | ATOM   4752  POL STP     5      -4.286  48.590  21.512    0.00     4.11
49 | ATOM   9038  POL STP     5      -4.251  48.555  21.482    0.00     4.13
50 | ATOM   9038  POL STP     5      -4.254  48.034  21.689    0.00     4.27
51 | ATOM   9038  POL STP     5      -4.449  48.554  21.222    0.00     3.86
52 | ATOM  11096  POL STP     5      -5.154  48.057  20.492    0.00     3.07
53 | ATOM  11098  POL STP     5      -4.453  47.598  21.602    0.00     4.16
54 | ATOM  11084  POL STP     5      -2.487  46.504  19.760    0.00     3.48
55 | ATOM  12774  POL STP     5      -2.724  46.401  19.865    0.00     3.44
56 | ATOM  12774  POL STP     5      -2.576  46.309  19.509    0.00     3.34
57 | ATOM  12774 APOL STP     5      -3.946  46.860  20.899    0.00     3.65
58 | ATOM  12774  POL STP     5      -3.676  45.565  20.589    0.00     3.22
59 | ATOM  14280 APOL STP     5      -2.568  46.109  19.274    0.00     3.19
60 | ATOM  14276 APOL STP     5      -2.663  46.267  19.118    0.00     3.07
61 | TER
62 | END
63 | 


--------------------------------------------------------------------------------
/datasets/custom/1a05B/pockets/pocket5_atm.pdb:
--------------------------------------------------------------------------------
 1 | HEADER
 2 | HEADER This is a pdb format file writen by the programm fpocket.                 
 3 | HEADER It represents the atoms contacted by the voronoi vertices of the pocket.  
 4 | HEADER                                                                           
 5 | HEADER Information about the pocket     6:
 6 | HEADER 0  - Pocket Score                      : 4.1031
 7 | HEADER 1  - Drug Score                        : 0.0270
 8 | HEADER 2  - Number of V. Vertices             :    40
 9 | HEADER 3  - Mean alpha-sphere radius          : 3.7167
10 | HEADER 4  - Mean alpha-sphere SA              : 0.5788
11 | HEADER 5  - Mean B-factor                     : 0.0000
12 | HEADER 6  - Hydrophobicity Score              : 33.8000
13 | HEADER 7  - Polarity Score                    :     2
14 | HEADER 8  - Volume Score                      : 3.0000
15 | HEADER 9  - Real volume (approximation)       : 482.5637
16 | HEADER 10 - Charge Score                      :     1
17 | HEADER 11 - Local hydrophobic density Score   : 18.1905
18 | HEADER 12 - Number of apolar alpha sphere     :    21
19 | HEADER 13 - Proportion of apolar alpha sphere : 0.5250
20 | ATOM     25   CE LYS B   3      -0.356  16.396  -4.234  1.00  0.00           C 0
21 | ATOM    443   CB ALA B  64      -0.976  13.625  -0.882  1.00  0.00           C 0
22 | ATOM    424  CD1 LEU B  61      -5.676  16.959  -0.218  1.00  0.00           C 0
23 | ATOM     26   NZ LYS B   3      -0.114  14.921  -4.231  1.00  0.00           N 0
24 | ATOM    273  CG2 THR B  38      -6.349  19.627  -3.455  1.00  0.00           C 0
25 | ATOM    272  OG1 THR B  38      -5.445  19.742  -1.223  1.00  0.00           O 0
26 | ATOM     22   CB LYS B   3       0.384  19.347  -3.225  1.00  0.00           C 0
27 | ATOM    255   CG ARG B  36      -2.968  20.872  -6.723  1.00  0.00           C 0
28 | ATOM    253    O ARG B  36      -2.193  22.460  -3.936  1.00  0.00           O 0
29 | ATOM     27    N ILE B   4      -0.630  22.107  -1.585  1.00  0.00           N 0
30 | ATOM     30    O ILE B   4      -3.268  22.282  -0.555  1.00  0.00           O 0
31 | ATOM     29    C ILE B   4      -2.284  22.155   0.172  1.00  0.00           C 0
32 | ATOM    448   CB ALA B  65      -0.165  18.573   1.157  1.00  0.00           C 0
33 | ATOM     23   CG LYS B   3       0.698  18.673  -4.566  1.00  0.00           C 0
34 | ATOM     35    N ALA B   5      -2.314  21.493   1.326  1.00  0.00           N 0
35 | ATOM    421    O LEU B  61      -2.603  15.972   1.501  1.00  0.00           O 0
36 | ATOM    444    N ALA B  65       0.350  16.276   0.532  1.00  0.00           N 0
37 | ATOM    445   CA ALA B  65       0.758  17.664   0.364  1.00  0.00           C 0
38 | ATOM     39   CB ALA B   5      -3.323  19.651   2.586  1.00  0.00           C 0
39 | ATOM    422   CB LEU B  61      -5.920  15.599   1.837  1.00  0.00           C 0
40 | ATOM    419   CA LEU B  61      -4.665  14.740   1.666  1.00  0.00           C 0
41 | ATOM    414   CG GLN B  60      -6.258  10.451   1.913  1.00  0.00           C 0
42 | ATOM    393    O ALA B  57      -7.759  12.902   3.123  1.00  0.00           O 0
43 | ATOM    415   CD GLN B  60      -6.838   9.281   1.122  1.00  0.00           C 0
44 | ATOM    417  NE2 GLN B  60      -6.059   8.762   0.174  1.00  0.00           N 0
45 | ATOM    412    O GLN B  60      -2.888  12.603   1.636  1.00  0.00           O 0
46 | ATOM    418    N LEU B  61      -4.813  13.487   2.399  1.00  0.00           N 0
47 | ATOM    411    C GLN B  60      -3.902  12.520   2.328  1.00  0.00           C 0
48 | ATOM    256   CD ARG B  36      -3.715  20.790  -8.045  1.00  0.00           C 0
49 | TER
50 | END
51 | 


--------------------------------------------------------------------------------
/datasets/custom/1a05B/pockets/pocket5_vert.pqr:
--------------------------------------------------------------------------------
 1 | HEADER
 2 | HEADER This is a pqr format file writen by the programm fpocket.                 
 3 | HEADER It represent the voronoi vertices of a single pocket found by the         
 4 | HEADER algorithm.                                                                
 5 | HEADER                                                                           
 6 | HEADER Information about the pocket     6:
 7 | HEADER 0  - Pocket Score                      : 4.1031
 8 | HEADER 1  - Drug Score                        : 0.0270
 9 | HEADER 2  - Number of V. Vertices             :    40
10 | HEADER 3  - Mean alpha-sphere radius          : 3.7167
11 | HEADER 4  - Mean alpha-sphere SA              : 0.5788
12 | HEADER 5  - Mean B-factor                     : 0.0000
13 | HEADER 6  - Hydrophobicity Score              : 33.8000
14 | HEADER 7  - Polarity Score                    :     2
15 | HEADER 8  - Volume Score                      : 3.0000
16 | HEADER 9  - Real volume (approximation)       : 482.5637
17 | HEADER 10 - Charge Score                      :     1
18 | HEADER 11 - Local hydrophobic density Score   : 18.1905
19 | HEADER 12 - Number of apolar alpha sphere     :    21
20 | HEADER 13 - Proportion of apolar alpha sphere : 0.5250
21 | ATOM   3066 APOL STP     6      -3.844  15.069  -3.098    0.00     3.90
22 | ATOM   3066 APOL STP     6      -5.191  14.844  -4.850    0.00     5.11
23 | ATOM   3186 APOL STP     6      -3.633  17.319  -3.134    0.00     3.58
24 | ATOM   3999 APOL STP     6      -3.018  18.535  -3.446    0.00     3.51
25 | ATOM   5513 APOL STP     6      -2.994  18.703  -3.956    0.00     3.52
26 | ATOM   3999  POL STP     6      -2.562  19.809  -2.367    0.00     3.10
27 | ATOM  11007  POL STP     6      -2.596  19.883  -2.334    0.00     3.06
28 | ATOM   3999  POL STP     6      -2.255  19.336  -1.382    0.00     3.22
29 | ATOM   3999  POL STP     6      -2.989  19.198  -3.492    0.00     3.39
30 | ATOM  11007  POL STP     6      -2.262  19.348  -1.388    0.00     3.21
31 | ATOM   9941 APOL STP     6      -2.118  19.309  -1.251    0.00     3.19
32 | ATOM   9944 APOL STP     6      -2.226  19.237  -1.278    0.00     3.26
33 | ATOM   5513 APOL STP     6      -2.982  19.139  -3.778    0.00     3.42
34 | ATOM   5513 APOL STP     6      -2.451  18.740  -4.460    0.00     3.15
35 | ATOM   9941  POL STP     6      -2.469  19.242  -0.736    0.00     3.06
36 | ATOM   3066 APOL STP     6      -2.783  16.320  -1.867    0.00     3.39
37 | ATOM   3187  POL STP     6      -2.452  17.764  -1.631    0.00     3.61
38 | ATOM   3994  POL STP     6      -2.010  16.532  -1.549    0.00     3.16
39 | ATOM   3997  POL STP     6      -2.446  17.770  -1.628    0.00     3.61
40 | ATOM   3994  POL STP     6      -2.099  17.513  -1.511    0.00     3.42
41 | ATOM   9944 APOL STP     6      -2.158  17.621  -1.526    0.00     3.48
42 | ATOM   9944  POL STP     6      -2.010  17.520  -1.237    0.00     3.20
43 | ATOM   9944 APOL STP     6      -2.238  17.692  -1.552    0.00     3.52
44 | ATOM   9944  POL STP     6      -2.412  17.856  -1.545    0.00     3.59
45 | ATOM   9942  POL STP     6      -2.888  18.482  -0.181    0.00     3.04
46 | ATOM   9944 APOL STP     6      -2.133  17.663  -1.544    0.00     3.46
47 | ATOM   4870 APOL STP     6      -7.187  13.047  -1.494    0.00     4.38
48 | ATOM   5860 APOL STP     6      -7.970  13.098  -1.554    0.00     4.69
49 | ATOM   3068 APOL STP     6      -7.413  12.827  -2.228    0.00     4.91
50 | ATOM   4872 APOL STP     6      -6.603  12.795  -2.104    0.00     4.66
51 | ATOM   5860 APOL STP     6      -8.326  13.030  -1.858    0.00     5.02
52 | ATOM   5860 APOL STP     6      -7.018  13.054  -0.284    0.00     3.49
53 | ATOM   4872 APOL STP     6      -5.478  12.709  -2.810    0.00     4.98
54 | ATOM   6945  POL STP     6      -5.214  12.533  -2.351    0.00     4.62
55 | ATOM   6945  POL STP     6      -5.389  12.376  -1.558    0.00     4.06
56 | ATOM  11309  POL STP     6      -6.996  13.048  -0.244    0.00     3.46
57 | ATOM  11312  POL STP     6      -6.988  13.078  -0.191    0.00     3.41
58 | ATOM  11309  POL STP     6      -5.358  12.394  -1.031    0.00     3.64
59 | ATOM  11314  POL STP     6      -5.309  12.343  -0.850    0.00     3.48
60 | ATOM   5513 APOL STP     6      -4.545  16.422  -6.430    0.00     4.73
61 | TER
62 | END
63 | 


--------------------------------------------------------------------------------
/datasets/custom/1a05B/pockets/pocket6_atm.pdb:
--------------------------------------------------------------------------------
 1 | HEADER
 2 | HEADER This is a pdb format file writen by the programm fpocket.                 
 3 | HEADER It represents the atoms contacted by the voronoi vertices of the pocket.  
 4 | HEADER                                                                           
 5 | HEADER Information about the pocket     7:
 6 | HEADER 0  - Pocket Score                      : 2.1149
 7 | HEADER 1  - Drug Score                        : 0.0379
 8 | HEADER 2  - Number of V. Vertices             :    37
 9 | HEADER 3  - Mean alpha-sphere radius          : 4.1927
10 | HEADER 4  - Mean alpha-sphere SA              : 0.5667
11 | HEADER 5  - Mean B-factor                     : 0.0000
12 | HEADER 6  - Hydrophobicity Score              : 42.3333
13 | HEADER 7  - Polarity Score                    :     1
14 | HEADER 8  - Volume Score                      : 4.0000
15 | HEADER 9  - Real volume (approximation)       : 776.3873
16 | HEADER 10 - Charge Score                      :     1
17 | HEADER 11 - Local hydrophobic density Score   : 13.0000
18 | HEADER 12 - Number of apolar alpha sphere     :    14
19 | HEADER 13 - Proportion of apolar alpha sphere : 0.3784
20 | ATOM    584   CB ALA B  84     -11.074   9.100  21.538  1.00  0.00           C 0
21 | ATOM   1077   CD ARG B 145      -2.317   7.950  25.886  1.00  0.00           C 0
22 | ATOM   1062  OE1 GLN B 143      -1.420   4.559  24.472  1.00  0.00           O 0
23 | ATOM    628  OE1 GLN B  89      -6.663  12.953  20.671  1.00  0.00           O 0
24 | ATOM   1080  NH1 ARG B 145      -1.203  10.176  24.601  1.00  0.00           N 0
25 | ATOM   1063  NE2 GLN B 143       0.328   5.630  23.564  1.00  0.00           N 0
26 | ATOM    655   CG ARG B  93      -4.935  11.740  15.736  1.00  0.00           C 0
27 | ATOM   1046  CD1 PHE B 141       0.777  11.953  20.069  1.00  0.00           C 0
28 | ATOM   1048  CE1 PHE B 141       0.185  12.978  19.333  1.00  0.00           C 0
29 | ATOM    583    O ALA B  84     -10.684  11.299  19.399  1.00  0.00           O 0
30 | ATOM    656   CD ARG B  93      -6.440  11.595  15.874  1.00  0.00           C 0
31 | ATOM    659  NH1 ARG B  93      -9.182  10.995  15.638  1.00  0.00           N 0
32 | ATOM   1081  NH2 ARG B 145      -2.470  11.616  25.854  1.00  0.00           N 0
33 | ATOM    629  NE2 GLN B  89      -7.477  14.433  22.148  1.00  0.00           N 0
34 | ATOM    648  CD1 LEU B  92      -3.824  15.957  21.103  1.00  0.00           C 0
35 | ATOM   1031    O TYR B 140       0.019  12.566  23.885  1.00  0.00           O 0
36 | ATOM    685   CG LYS B  96       1.624  10.982  16.217  1.00  0.00           C 0
37 | ATOM    684   CB LYS B  96       1.025  12.311  15.784  1.00  0.00           C 0
38 | ATOM    651   CA ARG B  93      -3.195  13.379  14.879  1.00  0.00           C 0
39 | ATOM    645    O LEU B  92      -1.154  14.729  16.204  1.00  0.00           O 0
40 | ATOM    646   CB LEU B  92      -3.030  15.645  18.745  1.00  0.00           C 0
41 | ATOM    624    O GLN B  89      -6.080  14.674  16.644  1.00  0.00           O 0
42 | ATOM    650    N ARG B  93      -3.360  14.480  15.825  1.00  0.00           N 0
43 | ATOM    644    C LEU B  92      -2.319  15.072  16.407  1.00  0.00           C 0
44 | ATOM   1079   CZ ARG B 145      -2.118  10.379  25.538  1.00  0.00           C 0
45 | ATOM    687   CE LYS B  96       3.402   9.900  17.581  1.00  0.00           C 0
46 | ATOM   1043    O PHE B 141       2.752   8.611  23.062  1.00  0.00           O 0
47 | ATOM   1044   CB PHE B 141       2.620  11.055  21.544  1.00  0.00           C 0
48 | ATOM   1041   CA PHE B 141       2.037  10.902  22.960  1.00  0.00           C 0
49 | TER
50 | END
51 | 


--------------------------------------------------------------------------------
/datasets/custom/1a05B/pockets/pocket6_vert.pqr:
--------------------------------------------------------------------------------
 1 | HEADER
 2 | HEADER This is a pqr format file writen by the programm fpocket.                 
 3 | HEADER It represent the voronoi vertices of a single pocket found by the         
 4 | HEADER algorithm.                                                                
 5 | HEADER                                                                           
 6 | HEADER Information about the pocket     7:
 7 | HEADER 0  - Pocket Score                      : 2.1149
 8 | HEADER 1  - Drug Score                        : 0.0379
 9 | HEADER 2  - Number of V. Vertices             :    37
10 | HEADER 3  - Mean alpha-sphere radius          : 4.1927
11 | HEADER 4  - Mean alpha-sphere SA              : 0.5667
12 | HEADER 5  - Mean B-factor                     : 0.0000
13 | HEADER 6  - Hydrophobicity Score              : 42.3333
14 | HEADER 7  - Polarity Score                    :     1
15 | HEADER 8  - Volume Score                      : 4.0000
16 | HEADER 9  - Real volume (approximation)       : 776.3873
17 | HEADER 10 - Charge Score                      :     1
18 | HEADER 11 - Local hydrophobic density Score   : 13.0000
19 | HEADER 12 - Number of apolar alpha sphere     :    14
20 | HEADER 13 - Proportion of apolar alpha sphere : 0.3784
21 | ATOM   5710  POL STP     7      -5.477   7.093  20.879    0.00     5.98
22 | ATOM  16639  POL STP     7      -4.363   7.580  20.418    0.00     5.85
23 | ATOM  16639  POL STP     7      -4.116   7.578  20.074    0.00     5.98
24 | ATOM  16639  POL STP     7      -4.025   7.630  19.984    0.00     5.98
25 | ATOM  16639  POL STP     7      -3.614   7.538  20.994    0.00     5.08
26 | ATOM  16258 APOL STP     7      -3.307  10.207  19.602    0.00     4.47
27 | ATOM  16639  POL STP     7      -3.497   8.595  19.932    0.00     5.44
28 | ATOM  16639  POL STP     7      -3.327   7.920  19.741    0.00     5.76
29 | ATOM   7959  POL STP     7      -6.928   8.410  19.349    0.00     4.74
30 | ATOM   7959  POL STP     7      -7.344   7.959  18.810    0.00     4.76
31 | ATOM  10060  POL STP     7      -4.352  13.260  23.341    0.00     3.54
32 | ATOM  14552  POL STP     7      -3.370  12.475  22.344    0.00     3.72
33 | ATOM  13950 APOL STP     7      -1.837  10.662  17.353    0.00     3.66
34 | ATOM  13952 APOL STP     7      -1.893  11.258  17.228    0.00     3.42
35 | ATOM  16258 APOL STP     7      -1.983   9.735  17.880    0.00     4.16
36 | ATOM  13952 APOL STP     7      -1.740  11.808  17.161    0.00     3.13
37 | ATOM  13947 APOL STP     7      -3.150  12.540  20.449    0.00     3.54
38 | ATOM  14552  POL STP     7      -2.954  12.211  21.447    0.00     3.86
39 | ATOM   5705  POL STP     7      -4.424  12.900  18.549    0.00     3.09
40 | ATOM  13950 APOL STP     7      -3.404  11.946  19.141    0.00     3.74
41 | ATOM  13952 APOL STP     7      -2.938  12.332  18.346    0.00     3.34
42 | ATOM  15298 APOL STP     7      -2.753  12.532  18.245    0.00     3.17
43 | ATOM  13952 APOL STP     7      -2.410  12.049  17.691    0.00     3.21
44 | ATOM  13952  POL STP     7      -2.251  12.168  17.581    0.00     3.11
45 | ATOM  15298  POL STP     7      -2.275  12.274  17.651    0.00     3.06
46 | ATOM  16630  POL STP     7      -3.338  12.175  22.240    0.00     3.76
47 | ATOM  16637  POL STP     7      -2.961  11.731  21.403    0.00     3.97
48 | ATOM  16630  POL STP     7      -4.004  11.313  22.557    0.00     3.65
49 | ATOM  16637  POL STP     7      -2.967  11.673  21.370    0.00     3.97
50 | ATOM  16637  POL STP     7      -2.932  11.716  21.403    0.00     3.95
51 | ATOM  16257 APOL STP     7      -0.884   7.204  18.759    0.00     5.20
52 | ATOM  16258 APOL STP     7      -2.325   7.140  18.459    0.00     5.95
53 | ATOM  16257  POL STP     7       0.198   7.584  19.587    0.00     4.43
54 | ATOM  16257 APOL STP     7       0.880   8.187  19.810    0.00     3.78
55 | ATOM  16258 APOL STP     7       0.191   8.986  21.157    0.00     3.21
56 | ATOM  16639  POL STP     7      -0.421   8.804  21.358    0.00     3.61
57 | ATOM  16639  POL STP     7      -0.615   8.510  21.158    0.00     3.87
58 | TER
59 | END
60 | 


--------------------------------------------------------------------------------
/datasets/custom/1a9t/1a9t_lig_1.pdb:
--------------------------------------------------------------------------------
 1 | HETATM    1  N1  HPA A 290      26.078  89.932  73.254  1.00  2.02           N  
 2 | HETATM    2  C2  HPA A 290      25.067  89.042  73.005  1.00  2.07           C  
 3 | HETATM    3  N3  HPA A 290      23.780  89.402  73.068  1.00  7.39           N  
 4 | HETATM    4  C4  HPA A 290      23.609  90.739  73.400  1.00  6.34           C  
 5 | HETATM    5  C5  HPA A 290      24.582  91.671  73.657  1.00  3.04           C  
 6 | HETATM    6  C6  HPA A 290      25.936  91.296  73.593  1.00  5.62           C  
 7 | HETATM    7  O6  HPA A 290      26.919  92.035  73.788  1.00  7.06           O  
 8 | HETATM    8  N7  HPA A 290      24.023  92.876  73.957  1.00  2.84           N  
 9 | HETATM    9  C8  HPA A 290      22.672  92.685  73.878  1.00  5.21           C  
10 | HETATM   10  N9  HPA A 290      22.380  91.387  73.534  1.00  6.23           N  
11 | TER      11      HPA A 290                                                       
12 | END
13 | 


--------------------------------------------------------------------------------
/datasets/custom/1a9t/1a9t_site_1.pdb:
--------------------------------------------------------------------------------
  1 | ATOM      1  N   SER A  33      14.515  92.489  75.414  1.00 12.39           N  
  2 | ATOM      2  CA  SER A  33      15.056  93.101  76.625  1.00 15.31           C  
  3 | ATOM      3  C   SER A  33      14.979  94.618  76.598  1.00 15.60           C  
  4 | ATOM      4  O   SER A  33      15.444  95.257  75.645  1.00 14.10           O  
  5 | ATOM      5  CB  SER A  33      16.509  92.672  76.838  1.00 13.75           C  
  6 | ATOM      6  OG  SER A  33      16.602  91.297  77.171  1.00 15.62           O  
  7 | ATOM      7  N   TYR A  88      18.783  79.740  76.620  1.00  5.41           N  
  8 | ATOM      8  CA  TYR A  88      19.667  79.718  77.792  1.00  4.99           C  
  9 | ATOM      9  C   TYR A  88      18.934  79.583  79.133  1.00  5.48           C  
 10 | ATOM     10  O   TYR A  88      19.565  79.376  80.163  1.00  6.48           O  
 11 | ATOM     11  CB  TYR A  88      20.612  80.934  77.820  1.00  6.31           C  
 12 | ATOM     12  CG  TYR A  88      19.943  82.283  78.007  1.00  4.48           C  
 13 | ATOM     13  CD1 TYR A  88      19.336  82.629  79.220  1.00  2.02           C  
 14 | ATOM     14  CD2 TYR A  88      19.900  83.203  76.963  1.00  2.01           C  
 15 | ATOM     15  CE1 TYR A  88      18.699  83.850  79.383  1.00  2.05           C  
 16 | ATOM     16  CE2 TYR A  88      19.268  84.422  77.113  1.00  3.36           C  
 17 | ATOM     17  CZ  TYR A  88      18.667  84.740  78.319  1.00  3.76           C  
 18 | ATOM     18  OH  TYR A  88      17.993  85.928  78.434  1.00  8.75           O  
 19 | ATOM     19  N   ASN A 115      14.605  92.990  69.439  1.00  2.18           N  
 20 | ATOM     20  CA  ASN A 115      15.559  92.480  70.406  1.00  2.69           C  
 21 | ATOM     21  C   ASN A 115      16.616  93.526  70.680  1.00  2.06           C  
 22 | ATOM     22  O   ASN A 115      16.671  94.555  70.013  1.00  3.81           O  
 23 | ATOM     23  CB  ASN A 115      16.207  91.180  69.906  1.00  4.04           C  
 24 | ATOM     24  CG  ASN A 115      16.998  91.366  68.616  1.00  5.63           C  
 25 | ATOM     25  OD1 ASN A 115      17.919  92.175  68.552  1.00  7.94           O  
 26 | ATOM     26  ND2 ASN A 115      16.648  90.601  67.588  1.00  6.47           N  
 27 | ATOM     27  N   ALA A 116      17.395  93.286  71.720  1.00  2.00           N  
 28 | ATOM     28  CA  ALA A 116      18.491  94.153  72.106  1.00  6.29           C  
 29 | ATOM     29  C   ALA A 116      19.676  93.332  71.611  1.00  5.87           C  
 30 | ATOM     30  O   ALA A 116      19.703  92.118  71.823  1.00  5.76           O  
 31 | ATOM     31  CB  ALA A 116      18.536  94.287  73.627  1.00  2.04           C  
 32 | ATOM     32  N   ALA A 117      20.615  93.955  70.909  1.00  6.43           N  
 33 | ATOM     33  CA  ALA A 117      21.765  93.221  70.391  1.00  4.39           C  
 34 | ATOM     34  C   ALA A 117      23.063  93.977  70.578  1.00  5.22           C  
 35 | ATOM     35  O   ALA A 117      23.070  95.204  70.709  1.00  5.13           O  
 36 | ATOM     36  CB  ALA A 117      21.568  92.903  68.914  1.00  4.02           C  
 37 | ATOM     37  N   GLY A 118      24.161  93.228  70.598  1.00  8.26           N  
 38 | ATOM     38  CA  GLY A 118      25.472  93.826  70.738  1.00  5.53           C  
 39 | ATOM     39  C   GLY A 118      25.914  94.198  69.342  1.00  5.34           C  
 40 | ATOM     40  O   GLY A 118      25.671  93.450  68.391  1.00  6.80           O  
 41 | ATOM     41  N   GLY A 119      26.542  95.358  69.207  1.00  5.90           N  
 42 | ATOM     42  CA  GLY A 119      26.984  95.806  67.906  1.00  2.94           C  
 43 | ATOM     43  C   GLY A 119      28.329  95.263  67.484  1.00  5.83           C  
 44 | ATOM     44  O   GLY A 119      29.303  95.329  68.237  1.00  7.05           O  
 45 | ATOM     45  N   LEU A 120      28.372  94.702  66.281  1.00  5.80           N  
 46 | ATOM     46  CA  LEU A 120      29.598  94.158  65.721  1.00  4.97           C  
 47 | ATOM     47  C   LEU A 120      30.091  95.111  64.644  1.00  7.66           C  
 48 | ATOM     48  O   LEU A 120      31.286  95.334  64.509  1.00  9.42           O  
 49 | ATOM     49  CB  LEU A 120      29.358  92.767  65.124  1.00  4.31           C  
 50 | ATOM     50  CG  LEU A 120      29.036  91.599  66.066  1.00  5.41           C  
 51 | ATOM     51  CD1 LEU A 120      28.690  90.361  65.251  1.00  2.70           C  
 52 | ATOM     52  CD2 LEU A 120      30.213  91.301  66.971  1.00  4.67           C  
 53 | ATOM     53  N   TYR A 192      21.817  89.687  61.568  1.00  3.19           N  
 54 | ATOM     54  CA  TYR A 192      21.799  89.379  62.984  1.00  4.11           C  
 55 | ATOM     55  C   TYR A 192      22.315  87.948  63.161  1.00  4.26           C  
 56 | ATOM     56  O   TYR A 192      21.931  87.051  62.408  1.00  3.82           O  
 57 | ATOM     57  CB  TYR A 192      20.361  89.504  63.497  1.00  2.53           C  
 58 | ATOM     58  CG  TYR A 192      20.143  89.078  64.924  1.00  4.49           C  
 59 | ATOM     59  CD1 TYR A 192      19.814  87.755  65.235  1.00  4.56           C  
 60 | ATOM     60  CD2 TYR A 192      20.227  90.000  65.964  1.00  6.21           C  
 61 | ATOM     61  CE1 TYR A 192      19.571  87.369  66.542  1.00  4.86           C  
 62 | ATOM     62  CE2 TYR A 192      19.985  89.619  67.279  1.00  3.59           C  
 63 | ATOM     63  CZ  TYR A 192      19.654  88.308  67.556  1.00  4.35           C  
 64 | ATOM     64  OH  TYR A 192      19.377  87.938  68.842  1.00  2.00           O  
 65 | ATOM     65  N   VAL A 193      23.223  87.744  64.109  1.00  5.58           N  
 66 | ATOM     66  CA  VAL A 193      23.742  86.408  64.373  1.00  2.82           C  
 67 | ATOM     67  C   VAL A 193      23.300  85.962  65.764  1.00  6.09           C  
 68 | ATOM     68  O   VAL A 193      23.428  86.705  66.748  1.00  6.78           O  
 69 | ATOM     69  CB  VAL A 193      25.301  86.324  64.231  1.00  6.21           C  
 70 | ATOM     70  CG1 VAL A 193      26.005  87.179  65.266  1.00  2.00           C  
 71 | ATOM     71  CG2 VAL A 193      25.765  84.872  64.336  1.00  3.59           C  
 72 | ATOM     72  N   LEU A 195      23.502  83.231  68.937  1.00  2.65           N  
 73 | ATOM     73  CA  LEU A 195      24.305  82.230  69.625  1.00  4.15           C  
 74 | ATOM     74  C   LEU A 195      23.550  81.887  70.916  1.00  4.51           C  
 75 | ATOM     75  O   LEU A 195      22.630  82.609  71.303  1.00  5.17           O  
 76 | ATOM     76  CB  LEU A 195      25.746  82.704  69.877  1.00  4.28           C  
 77 | ATOM     77  CG  LEU A 195      26.124  84.132  70.274  1.00 10.10           C  
 78 | ATOM     78  CD1 LEU A 195      25.474  84.536  71.604  1.00  7.30           C  
 79 | ATOM     79  CD2 LEU A 195      27.656  84.210  70.362  1.00  2.75           C  
 80 | ATOM     80  N   GLY A 197      24.363  81.709  74.454  1.00  4.02           N  
 81 | ATOM     81  CA  GLY A 197      24.412  82.468  75.689  1.00  5.18           C  
 82 | ATOM     82  C   GLY A 197      24.422  81.483  76.849  1.00  6.03           C  
 83 | ATOM     83  O   GLY A 197      24.566  80.281  76.614  1.00  3.01           O  
 84 | ATOM     84  N   PRO A 198      24.188  81.931  78.098  1.00  4.82           N  
 85 | ATOM     85  CA  PRO A 198      23.898  83.297  78.547  1.00  3.78           C  
 86 | ATOM     86  C   PRO A 198      25.108  84.207  78.776  1.00  3.45           C  
 87 | ATOM     87  O   PRO A 198      24.948  85.391  79.052  1.00  7.74           O  
 88 | ATOM     88  CB  PRO A 198      23.149  83.059  79.847  1.00  4.24           C  
 89 | ATOM     89  CG  PRO A 198      23.895  81.905  80.425  1.00  5.69           C  
 90 | ATOM     90  CD  PRO A 198      24.104  80.986  79.228  1.00  4.99           C  
 91 | ATOM     91  N   ASN A 199      26.314  83.663  78.703  1.00  2.05           N  
 92 | ATOM     92  CA  ASN A 199      27.501  84.490  78.907  1.00  4.47           C  
 93 | ATOM     93  C   ASN A 199      27.723  85.410  77.718  1.00  3.88           C  
 94 | ATOM     94  O   ASN A 199      27.227  85.153  76.619  1.00  2.60           O  
 95 | ATOM     95  CB  ASN A 199      28.757  83.627  79.120  1.00  4.63           C  
 96 | ATOM     96  CG  ASN A 199      29.093  82.776  77.911  1.00  7.00           C  
 97 | ATOM     97  OD1 ASN A 199      28.553  81.681  77.743  1.00  9.89           O  
 98 | ATOM     98  ND2 ASN A 199      29.985  83.271  77.058  1.00  4.40           N  
 99 | ATOM     99  N   PHE A 200      28.466  86.485  77.944  1.00  4.84           N  
100 | ATOM    100  CA  PHE A 200      28.782  87.410  76.877  1.00  4.66           C  
101 | ATOM    101  C   PHE A 200      30.057  86.909  76.193  1.00  4.84           C  
102 | ATOM    102  O   PHE A 200      30.812  86.132  76.767  1.00  4.81           O  
103 | ATOM    103  CB  PHE A 200      28.909  88.838  77.404  1.00  3.75           C  
104 | ATOM    104  CG  PHE A 200      27.583  89.483  77.727  1.00  4.52           C  
105 | ATOM    105  CD1 PHE A 200      26.622  89.655  76.739  1.00  4.39           C  
106 | ATOM    106  CD2 PHE A 200      27.303  89.931  79.008  1.00  6.31           C  
107 | ATOM    107  CE1 PHE A 200      25.402  90.265  77.021  1.00  7.48           C  
108 | ATOM    108  CE2 PHE A 200      26.086  90.543  79.302  1.00 11.38           C  
109 | ATOM    109  CZ  PHE A 200      25.133  90.712  78.306  1.00  7.63           C  
110 | ATOM    110  N   GLU A 201      30.291  87.374  74.974  1.00  3.72           N  
111 | ATOM    111  CA  GLU A 201      31.403  86.932  74.145  1.00  3.15           C  
112 | ATOM    112  C   GLU A 201      32.828  87.354  74.520  1.00  5.41           C  
113 | ATOM    113  O   GLU A 201      33.051  88.326  75.245  1.00  5.47           O  
114 | ATOM    114  CB  GLU A 201      31.124  87.320  72.683  1.00  2.00           C  
115 | ATOM    115  CG  GLU A 201      29.788  86.812  72.085  1.00  3.69           C  
116 | ATOM    116  CD  GLU A 201      28.545  87.523  72.639  1.00  6.30           C  
117 | ATOM    117  OE1 GLU A 201      28.596  88.751  72.836  1.00  9.20           O  
118 | ATOM    118  OE2 GLU A 201      27.513  86.860  72.870  1.00  8.95           O  
119 | ATOM    119  N   CYS A 206      34.091  87.601  69.116  1.00  6.65           N  
120 | ATOM    120  CA  CYS A 206      33.538  88.780  68.462  1.00  8.06           C  
121 | ATOM    121  C   CYS A 206      34.200  89.072  67.120  1.00  6.15           C  
122 | ATOM    122  O   CYS A 206      33.519  89.394  66.146  1.00  9.13           O  
123 | ATOM    123  CB  CYS A 206      33.645  89.993  69.385  1.00  9.83           C  
124 | ATOM    124  SG  CYS A 206      32.446  89.975  70.742  1.00 15.35           S  
125 | ATOM    125  N   LEU A 209      32.866  86.517  64.677  1.00  7.77           N  
126 | ATOM    126  CA  LEU A 209      31.486  86.788  64.280  1.00  7.77           C  
127 | ATOM    127  C   LEU A 209      31.423  87.915  63.262  1.00  8.59           C  
128 | ATOM    128  O   LEU A 209      30.667  87.846  62.291  1.00  7.66           O  
129 | ATOM    129  CB  LEU A 209      30.630  87.125  65.504  1.00  8.83           C  
130 | ATOM    130  CG  LEU A 209      30.424  85.977  66.499  1.00 11.20           C  
131 | ATOM    131  CD1 LEU A 209      29.659  86.462  67.720  1.00  9.01           C  
132 | ATOM    132  CD2 LEU A 209      29.684  84.836  65.815  1.00 10.52           C  
133 | ATOM    133  N   ALA A 216      25.297  92.400  64.130  1.00  2.96           N  
134 | ATOM    134  CA  ALA A 216      24.846  92.455  65.513  1.00  4.05           C  
135 | ATOM    135  C   ALA A 216      24.717  91.029  66.032  1.00  4.65           C  
136 | ATOM    136  O   ALA A 216      24.399  90.115  65.276  1.00  3.07           O  
137 | ATOM    137  CB  ALA A 216      23.504  93.167  65.597  1.00  2.22           C  
138 | ATOM    138  N   VAL A 217      24.961  90.841  67.321  1.00  5.77           N  
139 | ATOM    139  CA  VAL A 217      24.882  89.515  67.926  1.00  3.87           C  
140 | ATOM    140  C   VAL A 217      23.832  89.512  69.044  1.00  7.11           C  
141 | ATOM    141  O   VAL A 217      23.666  90.517  69.749  1.00  5.82           O  
142 | ATOM    142  CB  VAL A 217      26.266  89.075  68.472  1.00  3.89           C  
143 | ATOM    143  CG1 VAL A 217      26.806  90.087  69.488  1.00  4.41           C  
144 | ATOM    144  CG2 VAL A 217      26.181  87.686  69.081  1.00  5.16           C  
145 | ATOM    145  N   GLY A 218      23.124  88.394  69.188  1.00  3.73           N  
146 | ATOM    146  CA  GLY A 218      22.097  88.279  70.211  1.00  2.92           C  
147 | ATOM    147  C   GLY A 218      21.813  86.825  70.540  1.00  3.81           C  
148 | ATOM    148  O   GLY A 218      22.360  85.930  69.888  1.00  2.23           O  
149 | ATOM    149  N   MET A 219      20.920  86.586  71.500  1.00  3.66           N  
150 | ATOM    150  CA  MET A 219      20.590  85.230  71.952  1.00  4.39           C  
151 | ATOM    151  C   MET A 219      19.120  84.827  71.759  1.00  5.56           C  
152 | ATOM    152  O   MET A 219      18.562  84.067  72.567  1.00  5.99           O  
153 | ATOM    153  CB  MET A 219      20.949  85.091  73.439  1.00  5.01           C  
154 | ATOM    154  CG  MET A 219      22.357  85.537  73.799  1.00  8.26           C  
155 | ATOM    155  SD  MET A 219      22.608  85.739  75.586  1.00  6.21           S  
156 | ATOM    156  CE  MET A 219      24.294  86.353  75.647  1.00  4.47           C  
157 | ATOM    157  N   SER A 220      18.485  85.308  70.697  1.00  4.84           N  
158 | ATOM    158  CA  SER A 220      17.084  84.975  70.462  1.00  3.75           C  
159 | ATOM    159  C   SER A 220      16.672  85.289  69.029  1.00  3.12           C  
160 | ATOM    160  O   SER A 220      17.500  85.284  68.121  1.00  2.08           O  
161 | ATOM    161  CB  SER A 220      16.209  85.800  71.396  1.00  3.99           C  
162 | ATOM    162  OG  SER A 220      16.190  87.150  70.958  1.00  2.24           O  
163 | ATOM    163  N   LEU A 240      17.901  96.749  68.655  1.00  6.70           N  
164 | ATOM    164  CA  LEU A 240      18.266  98.016  69.262  1.00  5.76           C  
165 | ATOM    165  C   LEU A 240      19.707  97.719  69.663  1.00  5.96           C  
166 | ATOM    166  O   LEU A 240      19.958  96.738  70.375  1.00  5.20           O  
167 | ATOM    167  CB  LEU A 240      17.433  98.296  70.516  1.00  4.73           C  
168 | ATOM    168  CG  LEU A 240      17.969  99.436  71.388  1.00  7.46           C  
169 | ATOM    169  CD1 LEU A 240      17.834 100.763  70.667  1.00  6.41           C  
170 | ATOM    170  CD2 LEU A 240      17.236  99.477  72.717  1.00  9.88           C  
171 | ATOM    171  N   ILE A 241      20.655  98.491  69.143  1.00  4.80           N  
172 | ATOM    172  CA  ILE A 241      22.064  98.285  69.468  1.00  6.05           C  
173 | ATOM    173  C   ILE A 241      22.291  98.877  70.861  1.00  8.55           C  
174 | ATOM    174  O   ILE A 241      22.377 100.096  71.015  1.00 10.17           O  
175 | ATOM    175  CB  ILE A 241      22.986  98.949  68.398  1.00  7.13           C  
176 | ATOM    176  CG1 ILE A 241      22.646  98.389  67.006  1.00  2.01           C  
177 | ATOM    177  CG2 ILE A 241      24.478  98.731  68.734  1.00  2.12           C  
178 | ATOM    178  CD1 ILE A 241      22.681  96.864  66.907  1.00  2.13           C  
179 | ATOM    179  N   THR A 242      22.350  98.012  71.874  1.00  7.24           N  
180 | ATOM    180  CA  THR A 242      22.520  98.449  73.257  1.00  8.15           C  
181 | ATOM    181  C   THR A 242      23.952  98.678  73.718  1.00  6.35           C  
182 | ATOM    182  O   THR A 242      24.190  99.368  74.712  1.00 11.82           O  
183 | ATOM    183  CB  THR A 242      21.857  97.467  74.246  1.00 10.49           C  
184 | ATOM    184  OG1 THR A 242      22.374  96.140  74.047  1.00  5.64           O  
185 | ATOM    185  CG2 THR A 242      20.347  97.472  74.061  1.00  9.31           C  
186 | ATOM    186  N   ASN A 243      24.905  98.113  73.002  1.00  4.12           N  
187 | ATOM    187  CA  ASN A 243      26.295  98.259  73.386  1.00  7.15           C  
188 | ATOM    188  C   ASN A 243      27.205  97.873  72.245  1.00  7.48           C  
189 | ATOM    189  O   ASN A 243      26.810  97.152  71.333  1.00  7.76           O  
190 | ATOM    190  CB  ASN A 243      26.601  97.360  74.589  1.00  5.38           C  
191 | ATOM    191  CG  ASN A 243      26.358  95.885  74.294  1.00  7.48           C  
192 | ATOM    192  OD1 ASN A 243      25.219  95.465  74.115  1.00  8.43           O  
193 | ATOM    193  ND2 ASN A 243      27.428  95.100  74.222  1.00  7.84           N  
194 | ATOM    194  N   LYS A 244      28.427  98.375  72.299  1.00 10.56           N  
195 | ATOM    195  CA  LYS A 244      29.428  98.059  71.300  1.00 14.03           C  
196 | ATOM    196  C   LYS A 244      30.253  96.916  71.870  1.00 13.90           C  
197 | ATOM    197  O   LYS A 244      30.704  96.961  73.015  1.00 10.72           O  
198 | ATOM    198  CB  LYS A 244      30.325  99.268  71.012  1.00 19.27           C  
199 | ATOM    199  CG  LYS A 244      29.640 100.390  70.230  1.00 22.50           C  
200 | ATOM    200  CD  LYS A 244      29.180  99.895  68.864  1.00 30.25           C  
201 | ATOM    201  CE  LYS A 244      30.361  99.448  67.999  1.00 30.83           C  
202 | ATOM    202  NZ  LYS A 244      29.915  98.764  66.757  1.00 28.37           N  
203 | ATOM    203  N   VAL A 245      30.372  95.858  71.083  1.00 16.13           N  
204 | ATOM    204  CA  VAL A 245      31.123  94.672  71.463  1.00 16.96           C  
205 | ATOM    205  C   VAL A 245      32.623  94.977  71.643  1.00 15.53           C  
206 | ATOM    206  O   VAL A 245      33.207  95.739  70.875  1.00 18.19           O  
207 | ATOM    207  CB  VAL A 245      30.894  93.579  70.396  1.00 16.29           C  
208 | ATOM    208  CG1 VAL A 245      32.115  92.744  70.209  1.00 17.98           C  
209 | ATOM    209  CG2 VAL A 245      29.698  92.727  70.779  1.00 11.89           C  
210 | ATOM    210  N   ILE A 246      33.232  94.388  72.668  1.00 15.63           N  
211 | ATOM    211  CA  ILE A 246      34.661  94.585  72.951  1.00 15.09           C  
212 | ATOM    212  C   ILE A 246      35.473  93.787  71.919  1.00 15.81           C  
213 | ATOM    213  O   ILE A 246      35.379  92.564  71.860  1.00 12.58           O  
214 | ATOM    214  CB  ILE A 246      35.043  94.095  74.387  1.00 15.26           C  
215 | ATOM    215  CG1 ILE A 246      34.133  94.721  75.450  1.00 11.92           C  
216 | ATOM    216  CG2 ILE A 246      36.510  94.422  74.692  1.00 16.57           C  
217 | ATOM    217  CD1 ILE A 246      34.313  96.207  75.652  1.00 14.36           C  
218 | ATOM    218  N   LYS A 254      33.249  95.972  80.008  1.00 43.68           N  
219 | ATOM    219  CA  LYS A 254      32.662  94.704  79.594  1.00 40.31           C  
220 | ATOM    220  C   LYS A 254      31.137  94.725  79.636  1.00 36.88           C  
221 | ATOM    221  O   LYS A 254      30.536  95.453  80.429  1.00 35.77           O  
222 | ATOM    222  CB  LYS A 254      33.228  93.546  80.426  1.00 42.74           C  
223 | ATOM    223  CG  LYS A 254      33.221  93.762  81.927  1.00 46.09           C  
224 | ATOM    224  CD  LYS A 254      34.202  92.812  82.600  1.00 51.45           C  
225 | ATOM    225  CE  LYS A 254      34.215  92.972  84.118  1.00 55.19           C  
226 | ATOM    226  NZ  LYS A 254      32.950  92.504  84.763  1.00 57.44           N  
227 | ATOM    227  N   ALA A 255      30.524  93.976  78.721  1.00 33.21           N  
228 | ATOM    228  CA  ALA A 255      29.071  93.890  78.613  1.00 30.75           C  
229 | ATOM    229  C   ALA A 255      28.397  93.424  79.897  1.00 29.14           C  
230 | ATOM    230  O   ALA A 255      28.917  92.559  80.606  1.00 25.21           O  
231 | ATOM    231  CB  ALA A 255      28.681  92.978  77.449  1.00 27.15           C  
232 | ATOM    232  N   ASN A 256      27.233  94.005  80.176  1.00 29.06           N  
233 | ATOM    233  CA  ASN A 256      26.447  93.679  81.362  1.00 28.32           C  
234 | ATOM    234  C   ASN A 256      24.971  93.893  81.036  1.00 25.99           C  
235 | ATOM    235  O   ASN A 256      24.627  94.770  80.246  1.00 27.77           O  
236 | ATOM    236  CB  ASN A 256      26.868  94.552  82.551  1.00 29.45           C  
237 | ATOM    237  CG  ASN A 256      26.586  96.022  82.325  1.00 32.50           C  
238 | ATOM    238  OD1 ASN A 256      25.561  96.542  82.767  1.00 30.07           O  
239 | ATOM    239  ND2 ASN A 256      27.489  96.698  81.626  1.00 33.15           N  
240 | ATOM    240  N   HIS A 257      24.108  93.090  81.648  1.00 24.78           N  
241 | ATOM    241  CA  HIS A 257      22.666  93.148  81.416  1.00 24.77           C  
242 | ATOM    242  C   HIS A 257      21.972  94.468  81.751  1.00 25.56           C  
243 | ATOM    243  O   HIS A 257      21.048  94.871  81.043  1.00 23.54           O  
244 | ATOM    244  CB  HIS A 257      21.973  91.998  82.159  1.00 26.52           C  
245 | ATOM    245  CG  HIS A 257      20.548  91.774  81.750  1.00 27.80           C  
246 | ATOM    246  ND1 HIS A 257      20.155  91.687  80.431  1.00 28.65           N  
247 | ATOM    247  CD2 HIS A 257      19.425  91.597  82.487  1.00 28.26           C  
248 | ATOM    248  CE1 HIS A 257      18.853  91.463  80.372  1.00 28.83           C  
249 | ATOM    249  NE2 HIS A 257      18.387  91.405  81.607  1.00 29.35           N  
250 | ATOM    250  N   GLU A 259      23.169  97.479  81.560  1.00 28.90           N  
251 | ATOM    251  CA  GLU A 259      23.504  98.483  80.553  1.00 26.46           C  
252 | ATOM    252  C   GLU A 259      22.559  98.333  79.356  1.00 24.03           C  
253 | ATOM    253  O   GLU A 259      22.137  99.330  78.765  1.00 23.08           O  
254 | ATOM    254  CB  GLU A 259      24.972  98.349  80.110  1.00 26.29           C  
255 | ATOM    255  CG  GLU A 259      25.163  97.714  78.722  1.00 32.25           C  
256 | ATOM    256  CD  GLU A 259      26.604  97.331  78.413  1.00 32.55           C  
257 | ATOM    257  OE1 GLU A 259      27.496  98.208  78.493  1.00 32.66           O  
258 | ATOM    258  OE2 GLU A 259      26.835  96.148  78.071  1.00 30.99           O  
259 | ATOM    259  N   VAL A 260      22.215  97.095  79.003  1.00 20.99           N  
260 | ATOM    260  CA  VAL A 260      21.312  96.885  77.874  1.00 20.00           C  
261 | ATOM    261  C   VAL A 260      19.900  97.347  78.234  1.00 19.97           C  
262 | ATOM    262  O   VAL A 260      19.222  97.977  77.416  1.00 19.12           O  
263 | ATOM    263  CB  VAL A 260      21.302  95.410  77.355  1.00 19.07           C  
264 | ATOM    264  CG1 VAL A 260      22.686  94.789  77.477  1.00 17.49           C  
265 | ATOM    265  CG2 VAL A 260      20.229  94.586  78.032  1.00 16.96           C  
266 | ATOM    266  N   LEU A 261      19.474  97.066  79.467  1.00 19.35           N  
267 | ATOM    267  CA  LEU A 261      18.148  97.472  79.932  1.00 20.06           C  
268 | ATOM    268  C   LEU A 261      18.100  98.989  80.007  1.00 20.50           C  
269 | ATOM    269  O   LEU A 261      17.112  99.616  79.624  1.00 22.46           O  
270 | ATOM    270  CB  LEU A 261      17.852  96.889  81.314  1.00 18.97           C  
271 | ATOM    271  CG  LEU A 261      17.719  95.371  81.394  1.00 18.61           C  
272 | ATOM    272  CD1 LEU A 261      17.403  94.962  82.816  1.00 18.95           C  
273 | ATOM    273  CD2 LEU A 261      16.621  94.905  80.453  1.00 21.45           C  
274 | TER     274      LEU A 261                                                       
275 | END
276 | 


--------------------------------------------------------------------------------
/datasets/custom/pairs.csv:
--------------------------------------------------------------------------------
1 | 1a9t/1a9t_clean , 1a9t/1a9t_site_1 , 1a05B/1a05B , 1a05B/pockets/pocket0_atm
2 | 1a9t/1a9t_clean , 1a9t/1a9t_site_1 , 1a05B/1a05B , 1a05B/pockets/pocket1_atm
3 | 1a9t/1a9t_clean , 1a9t/1a9t_site_1 , 1a05B/1a05B , 1a05B/pockets/pocket2_atm
4 | 1a9t/1a9t_clean , 1a9t/1a9t_site_1 , 1a05B/1a05B , 1a05B/pockets/pocket3_atm
5 | 1a9t/1a9t_clean , 1a9t/1a9t_site_1 , 1a05B/1a05B , 1a05B/pockets/pocket4_atm
6 | 1a9t/1a9t_clean , 1a9t/1a9t_site_1 , 1a05B/1a05B , 1a05B/pockets/pocket5_atm
7 | 1a9t/1a9t_clean , 1a9t/1a9t_site_1 , 1a05B/1a05B , 1a05B/pockets/pocket6_atm


--------------------------------------------------------------------------------
/datasets_downloader.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | if [ -z "$STRUCTURE_DATA_DIR" ]; then
 4 |   echo "STRUCTURE_DATA_DIR not set"
 5 |   exit
 6 | fi
 7 | if ! type wget > /dev/null; then
 8 |   echo "wget not installed"
 9 |   exit
10 | fi
11 | if ! type unzip > /dev/null; then
12 |   echo "unzip not installed"
13 |   exit
14 | fi
15 | 
16 | cd $STRUCTURE_DATA_DIR
17 | 
18 | # TOUGH-M1 dataset
19 | mkdir TOUGH-M1
20 | wget https://zenodo.org/record/3687317/files/dt_tough.zip?download=1 -O dt_tough.zip && unzip dt_tough.zip
21 | rm dt_tough.zip
22 | wget https://dataverse.harvard.edu/api/access/datafile/:persistentId?persistentId=doi:10.7910/DVN/L7H7JJ/UFO5CB -O official_tough_m1.tar.gz && tar -xvzf official_tough_m1.tar.gz -C TOUGH-M1
23 | rm official_tough_m1.tar.gz
24 | wget https://osf.io/tmgne/download -O TOUGH-M1/TOUGH-M1_positive.list
25 | wget https://osf.io/6dn5s/download -O TOUGH-M1/TOUGH-M1_pocket.list
26 | wget https://osf.io/3aypv/download -O TOUGH-M1/TOUGH-M1_negative.list
27 | 
28 | # Vertex dataset
29 | mkdir Vertex
30 | wget https://zenodo.org/record/3687317/files/dt_vertex.zip?download=1 -O dt_vertex.zip && unzip dt_vertex.zip
31 | rm dt_vertex.zip
32 | wget http://pubs.acs.org/doi/suppl/10.1021/acs.jcim.6b00118/suppl_file/ci6b00118_si_002.zip && unzip ci6b00118_si_002.zip -d Vertex
33 | rm ci6b00118_si_002.zip
34 | 
35 | # ProSPECCTs
36 | mkdir prospeccts
37 | for FILE in kahraman_structures.tar.gz identical_structures.tar.gz identical_structures_similar_ligands.tar.gz barelier_structures.tar.gz decoy.tar.gz review_structures.tar.gz NMR_structures.tar.gz
38 | do
39 |     wget www.ewit.ccb.tu-dortmund.de/ag-koch/prospeccts/ --post-data "file=${FILE}&licenseagreement=accept&action=Download" -O $FILE && tar -xvzf $FILE -C prospeccts
40 |     rm $FILE
41 | done
42 | wget https://zenodo.org/record/3687317/files/dt_prospeccts.zip?download=1 -O dt_prospeccts.zip && unzip dt_prospeccts.zip
43 | rm dt_prospeccts.zip
44 | 
45 | 
46 | 


--------------------------------------------------------------------------------
/deeplytough/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | from .toughm1 import ToughM1
2 | from .vertex import Vertex
3 | from .prospeccts import Prospeccts
4 | from .custom import Custom


--------------------------------------------------------------------------------
/deeplytough/datasets/custom.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from misc.utils import htmd_featurizer
 3 | 
 4 | 
 5 | class Custom:
 6 |     """ An arbitrary user dataset
 7 | 
 8 |     Assumes that the dataset is placed in `$STRUCTURE_DATA_DIR/relpath`, containing
 9 |     bunch of protein and pocket structures, which are referred in `pairs.csv`. This
10 |     file contains a quadruplet on each line indicating matches to evaluate:
11 | 
12 |     relative_path_to_pdbA, relative_path_to_pocketA, relative_path_to_pdbB, relative_path_to_pocketB
13 |     """
14 | 
15 |     def __init__(self, relpath='custom'):
16 |         self.relpath = relpath
17 | 
18 |     def preprocess_once(self):
19 |         """ Computes featurization """
20 |         htmd_featurizer(self.get_structures(), skip_existing=True)
21 | 
22 |     def get_structures(self):
23 |         """ Get list of PDB structures with metainfo """
24 | 
25 |         root = os.path.join(os.environ.get('STRUCTURE_DATA_DIR'), self.relpath)
26 |         npz_root = os.path.join(os.environ.get('STRUCTURE_DATA_DIR'), 'processed/htmd', self.relpath)
27 | 
28 |         custom_pdbs = set()
29 |         with open(os.path.join(root, 'pairs.csv')) as f:
30 |             for i, line in enumerate(f.readlines()):
31 |                 tokens = line.split(',')
32 |                 assert len(tokens)==4, 'pairs.csv is expected to have four columns.'
33 |                 custom_pdbs.add((tokens[0].strip(), tokens[1].strip()))
34 |                 custom_pdbs.add((tokens[2].strip(), tokens[3].strip()))
35 | 
36 |         entries = []
37 |         for pdb, pocket in custom_pdbs:
38 |             pdb1 = pdb if os.path.splitext(pdb)[1] != '' else pdb + '.pdb'
39 |             pocket1 = pocket if os.path.splitext(pocket)[1] != '' else pocket + '.pdb'
40 |             entries.append({'protein': os.path.join(root, pdb1),
41 |                      'pocket': os.path.join(root, pocket1),
42 |                      'protein_htmd': os.path.join(npz_root, pdb1.replace('.pdb', '.npz')),
43 |                      'key': pdb + ',' + pocket})
44 | 
45 |         return entries
46 | 
47 |     def evaluate_matching(self, descriptor_entries, matcher):
48 |         """
49 |         Compute pocket matching scores on the custom dataset.
50 |         :param descriptor_entries: List of entries
51 |         :param matcher: PocketMatcher instance
52 |         :return:
53 |         """
54 | 
55 |         target_dict = {d['key']: d for d in descriptor_entries}
56 |         root = os.path.join(os.environ.get('STRUCTURE_DATA_DIR'), self.relpath)
57 | 
58 |         pairs = []
59 |         with open(os.path.join(root, 'pairs.csv')) as f:
60 |             for i, line in enumerate(f.readlines()):
61 |                 tokens = line.split(',')
62 |                 assert len(tokens)==4, 'pairs.csv is expected to have four columns.'
63 |                 key1 = tokens[0].strip() + ',' + tokens[1].strip()
64 |                 key2 = tokens[2].strip() + ',' + tokens[3].strip()
65 |                 pairs.append((target_dict[key1], target_dict[key2]))
66 | 
67 |         scores = matcher.pair_match(pairs)
68 |         return {'pairs': pairs, 'scores': scores}
69 | 


--------------------------------------------------------------------------------
/deeplytough/datasets/prospeccts.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import glob
  3 | import pickle
  4 | import requests
  5 | import string
  6 | import concurrent.futures
  7 | import numpy as np
  8 | from sklearn.metrics import precision_recall_curve, roc_curve, roc_auc_score
  9 | from misc.utils import htmd_featurizer, voc_ap, RcsbPdbClusters
 10 | from misc.ligand_extract import PocketFromLigandDetector
 11 | 
 12 | import logging
 13 | 
 14 | logger = logging.getLogger(__name__)
 15 | 
 16 | 
 17 | class Prospeccts:
 18 |     """ ProSPECCTs dataset by Ehrt et al (http://www.ccb.tu-dortmund.de/ag-koch/prospeccts/) """
 19 | 
 20 |     dbnames = ['P1', 'P1.2', 'P2', 'P3', 'P4', 'P5', 'P5.2', 'P6', 'P6.2', 'P7']
 21 | 
 22 |     def __init__(self, dbname):
 23 |         self.dbname = dbname
 24 | 
 25 |     @staticmethod
 26 |     def _get_pdb_code_from_raw_pdb(pdbpath):
 27 |         search_string = os.path.basename(pdbpath)[:2]
 28 |         logger.info(f'searching for pdb id using string: {search_string}')
 29 |         with open(pdbpath, 'r') as f:
 30 |             g = f.readlines()
 31 |             pdb_code = None
 32 |             while pdb_code is None and len(g):
 33 |                 line = g.pop(0)
 34 |                 for s in line.split():
 35 |                     if search_string in s:
 36 |                         maybe_code = s[:4]
 37 |                         # check this is a real NMR pdb code
 38 |                         try:
 39 |                             logger.info(f"checking whether {maybe_code} is a real NMR entry in the PDB")
 40 |                             r = requests.get(f"https://www.ebi.ac.uk/pdbe/api/pdb/entry/experiment/{maybe_code}")
 41 |                             exp = r.json()[maybe_code][0]['experimental_method']
 42 |                         except Exception as e:
 43 |                             continue
 44 |                         # if pdb is real, and the experimental method is NMR. Eureka!
 45 |                         if "NMR" in exp:
 46 |                             pdb_code = maybe_code
 47 |         return pdb_code
 48 | 
 49 |     @staticmethod
 50 |     def _extract_pocket_and_get_uniprot(pdbpath):
 51 |         fname = os.path.basename(pdbpath).split('.')[0]
 52 |         if '_' in fname:
 53 |             return None, None
 54 | 
 55 |         # 1) Extract the pocket
 56 |         detector = PocketFromLigandDetector(include_het_resname=False, save_clean_structure=True,
 57 |                                             keep_other_hets=False, min_lig_atoms=1, allowed_lig_names=['LIG'])
 58 |         detector.run_one(pdbpath, os.path.dirname(pdbpath))
 59 | 
 60 |         # 2) Attempt to map to Uniprots (fails from time to time, return 'None' in that case)
 61 |         pdb_code = fname[:4].lower()
 62 |         query_chain_id = fname[4].upper() if len(fname) > 4 else ''
 63 |         result = set()
 64 | 
 65 |         # 2b) In the case of NMR structures, Prospeccts has incomplete PDB IDs (e.g. 'cz00A' is really '1cz2 00 A')
 66 |         # Therefore for this dataset, try to get the full PDB ID from the raw PDB text
 67 |         if "NMR_structures" in pdbpath:
 68 |             pdb_code = Prospeccts._get_pdb_code_from_raw_pdb(pdbpath)
 69 |             if not pdb_code:
 70 |                 pdb_code = 'XXXX'
 71 | 
 72 |         try:
 73 |             r = requests.get(f'http://www.ebi.ac.uk/pdbe/api/mappings/uniprot/{pdb_code}')
 74 |             fam = r.json()[pdb_code]['UniProt']
 75 |         except Exception as e:
 76 |             # this logically fails for artificial proteins not in PDB, such as in decoys (P3, P4), but that's fine.
 77 |             logger.warning(f'PDB not found {e} {pdb_code} {query_chain_id}')
 78 |             return fname, 'None'
 79 |         for fam_id in fam.keys():
 80 |             for chain in fam[fam_id]['mappings']:
 81 |                 if not query_chain_id:
 82 |                     result.add(fam_id)
 83 |                 elif chain['chain_id'] == query_chain_id:
 84 |                     if len(result) > 0 and fam_id != next(iter(result)):
 85 |                         logger.warning(f'Duplicate chain {fam_id} {result}')
 86 |                     result.add(fam_id)
 87 |         if not result:
 88 |             logger.warning(f'Chain not found {pdb_code} chain {query_chain_id}')
 89 |         return fname, result
 90 | 
 91 |     def preprocess_once(self):
 92 |         if self.dbname == 'P1':  # batch downloading and mapping together and do it just once, e.g. with P1
 93 |             logger.info('Preprocessing: extracting pockets and obtaining uniprots, this will take time.')
 94 |             all_pdbs = glob.glob(os.environ['STRUCTURE_DATA_DIR'] + '/prospeccts/**/*.pdb', recursive=True)
 95 |             all_pdbs = [pdb for pdb in all_pdbs if (pdb.count('_site') + pdb.count('_lig') + pdb.count('_clean')) == 0]
 96 | 
 97 |             code5_to_seqclusts = {}
 98 |             clusterer = RcsbPdbClusters(identity=30)   
 99 | 
100 |             code5_to_uniprot = {}
101 |             with concurrent.futures.ProcessPoolExecutor() as executor:
102 |                 for code, uniprot in executor.map(Prospeccts._extract_pocket_and_get_uniprot, all_pdbs):
103 |                     if code:
104 |                         code5_to_uniprot[code] = uniprot
105 |                         seqclusts = set([clusterer.get_seqclust(code[:4], c) for c in string.ascii_uppercase])
106 |                         code5_to_seqclusts[code] = seqclusts                    
107 | 
108 |             pickle.dump({
109 |                     'code5_to_uniprot': code5_to_uniprot,
110 |                     'code5_to_seqclusts': code5_to_seqclusts
111 |                 },
112 |                 open(os.path.join(os.environ['STRUCTURE_DATA_DIR'], 'prospeccts', 'pdbcode_mappings.pickle'), 'wb')
113 |             )
114 | 
115 |         htmd_featurizer(self.get_structures(extra_mappings=False), skip_existing=True)
116 | 
117 |     def _prospeccts_paths(self):
118 |         if self.dbname == 'P1':
119 |             dir1, dir2, listfn = 'identical_structures', 'identical_structures', 'identical_structures.csv'
120 |         elif self.dbname == 'P1.2':
121 |             dir1, dir2, listfn = 'identical_structures_similar_ligands', 'identical_structures_similar_ligands', 'identical_structures_similar_ligands.csv'
122 |         elif self.dbname == 'P2':
123 |             dir1, dir2, listfn = 'NMR_structures', 'NMR_structures', 'NMR_structures.csv'
124 |         elif self.dbname == 'P3':
125 |             dir1, dir2, listfn = 'decoy', 'decoy_structures', 'decoy_structures5.csv'
126 |         elif self.dbname == 'P4':
127 |             dir1, dir2, listfn = 'decoy', 'decoy_shape_structures', 'decoy_structures5.csv'
128 |         elif self.dbname == 'P5':
129 |             dir1, dir2, listfn = 'kahraman_structures', 'kahraman_structures', 'kahraman_structures80.csv'
130 |         elif self.dbname == 'P5.2':
131 |             dir1, dir2, listfn = 'kahraman_structures', 'kahraman_structures', 'kahraman_structures.csv'
132 |         elif self.dbname == 'P6':
133 |             dir1, dir2, listfn = 'barelier_structures', 'barelier_structures', 'barelier_structures.csv'
134 |         elif self.dbname == 'P6.2':
135 |             dir1, dir2, listfn = 'barelier_structures', 'barelier_structures_cofactors', 'barelier_structures.csv'
136 |         elif self.dbname == 'P7':
137 |             dir1, dir2, listfn = 'review_structures', 'review_structures', 'review_structures.csv'
138 |         else:
139 |             raise NotImplementedError
140 |         return dir1, dir2, listfn
141 | 
142 |     def get_structures(self, extra_mappings=True):
143 |         """ Get list of PDB structures with metainfo """
144 |         dir1, dir2, listfn = self._prospeccts_paths()
145 |         root = os.path.join(os.environ.get('STRUCTURE_DATA_DIR'), 'prospeccts', dir1)
146 |         npz_root = os.path.join(os.environ.get('STRUCTURE_DATA_DIR'), 'processed/htmd/prospeccts', dir1)
147 | 
148 |         db_pdbs = set()
149 |         with open(os.path.join(root, listfn)) as f:
150 |             for line in f.readlines():
151 |                 tokens = line.split(',')
152 |                 db_pdbs.add(tokens[0])
153 |                 db_pdbs.add(tokens[1])
154 |                 
155 |         code5_to_seqclusts, code5_to_uniprot = None, None
156 |         if extra_mappings:
157 |             mapping = pickle.load(open(os.path.join(os.environ['STRUCTURE_DATA_DIR'], 'prospeccts', 'pdbcode_mappings.pickle'), 'rb'))
158 |             code5_to_seqclusts = mapping['code5_to_seqclusts']
159 |             code5_to_uniprot = mapping['code5_to_uniprot']
160 | 
161 |         entries = []
162 |         for pdb in db_pdbs:
163 |             entries.append({
164 |                 'protein': root + f'/{dir2}/{pdb}_clean.pdb',
165 |                 'pocket': root + f'/{dir2}/{pdb}_site_1.pdb',
166 |                 'ligand': root + f'/{dir2}/{pdb}_lig_1.pdb',
167 |                 'protein_htmd': npz_root + f'/{dir2}/{pdb}_clean.npz',
168 |                 'code5': pdb,
169 |                 'code': pdb[:4],
170 |                 'uniprot': code5_to_uniprot[pdb] if code5_to_uniprot else 'None',
171 |                 'seqclusts': code5_to_seqclusts[pdb] if code5_to_seqclusts else 'None',
172 |             })
173 |         return entries
174 | 
175 |     def evaluate_matching(self, descriptor_entries, matcher):
176 |         """
177 |         Evaluate pocket matching on one Prospeccts dataset
178 |         The evaluation metrics is AUC
179 | 
180 |         :param descriptor_entries: List of entries
181 |         :param matcher: PocketMatcher instance
182 |         """
183 | 
184 |         target_dict = {d['code5']: d for d in descriptor_entries}
185 |         pairs = []
186 |         positives = []
187 | 
188 |         dir1, dir2, listfn = self._prospeccts_paths()
189 |         root = os.path.join(os.environ.get('STRUCTURE_DATA_DIR'), 'prospeccts', dir1)
190 | 
191 |         with open(os.path.join(root, listfn)) as f:
192 |             for line in f.readlines():
193 |                 tokens = line.split(',')
194 |                 id1, id2, cls = tokens[0], tokens[1], tokens[2].strip()
195 |                 if id1 in target_dict and id2 in target_dict:
196 |                     pairs.append((target_dict[id1], target_dict[id2]))
197 |                     positives.append(cls == 'active')
198 |                 else:
199 |                     logger.warning(f'Detection entry missing for {id1},{id2}')
200 | 
201 |         scores = matcher.pair_match(pairs)
202 | 
203 |         goodidx = np.flatnonzero(np.isfinite(np.array(scores)))
204 |         if len(goodidx) != len(scores):
205 |             logger.warning(f'Ignoring {len(scores) - len(goodidx)} pairs')
206 |             positives_clean, scores_clean = np.array(positives)[goodidx], np.array(scores)[goodidx]
207 |         else:
208 |             positives_clean, scores_clean = positives, scores
209 | 
210 |         # Calculate metrics
211 |         fpr, tpr, roc_thresholds = roc_curve(positives_clean, scores_clean)
212 |         auc = roc_auc_score(positives_clean, scores_clean)
213 |         precision, recall, thresholds = precision_recall_curve(positives_clean, scores_clean)
214 |         ap = voc_ap(recall[::-1], precision[::-1])
215 | 
216 |         results = {
217 |             'ap': ap,
218 |             'pr': precision,
219 |             're': recall,
220 |             'th': thresholds,
221 |             'auc': auc,
222 |             'fpr': fpr,
223 |             'tpr': tpr,
224 |             'th_roc': roc_thresholds,
225 |             'pairs': pairs,
226 |             'scores': scores,
227 |             'pos_mask': positives
228 |         }
229 |         return results
230 | 


--------------------------------------------------------------------------------
/deeplytough/datasets/toughm1.py:
--------------------------------------------------------------------------------
  1 | import concurrent.futures
  2 | import logging
  3 | import os
  4 | import pickle
  5 | import subprocess
  6 | import tempfile
  7 | import urllib.request
  8 | from collections import defaultdict
  9 | 
 10 | import Bio.PDB as PDB
 11 | import numpy as np
 12 | import requests
 13 | from sklearn.metrics import precision_recall_curve, roc_curve, roc_auc_score
 14 | from sklearn.model_selection import KFold, GroupShuffleSplit
 15 | 
 16 | from misc.utils import htmd_featurizer, voc_ap, RcsbPdbClusters, pdb_check_obsolete
 17 | 
 18 | logger = logging.getLogger(__name__)
 19 | 
 20 | 
 21 | class ToughM1:
 22 |     """
 23 |     TOUGH-M1 dataset by Govindaraj and Brylinski
 24 |     https://osf.io/6ngbs/wiki/home/
 25 |     """
 26 |     def __init__(self):
 27 |         self.tough_data_dir = os.path.join(os.environ.get('STRUCTURE_DATA_DIR'), 'TOUGH-M1')
 28 | 
 29 |     @staticmethod
 30 |     def _preprocess_worker(entry):
 31 | 
 32 |         def struct_to_centroid(structure):
 33 |             return np.mean(np.array([atom.get_coord() for atom in structure.get_atoms()]), axis=0)
 34 | 
 35 |         def pdb_chain_to_uniprot(pdb_code, query_chain_id):
 36 |             """
 37 |             Get pdb chain mapping to uniprot accession using the pdbe api
 38 |             """
 39 |             result = 'None'
 40 |             r = requests.get(f'http://www.ebi.ac.uk/pdbe/api/mappings/uniprot/{pdb_code}')
 41 |             fam = r.json()[pdb_code]['UniProt']
 42 | 
 43 |             for fam_id in fam.keys():
 44 |                 for chain in fam[fam_id]['mappings']:
 45 |                     if chain['chain_id'] == query_chain_id:
 46 |                         if result != 'None' and fam_id != result:
 47 |                             logger.warning(f'DUPLICATE {fam_id} {result}')
 48 |                         result = fam_id
 49 |             if result == 'None':
 50 |                 logger.warning(f'No uniprot accession found for {pdb_code}: {query_chain_id}')
 51 |             return result
 52 | 
 53 |         # 1) We won't be using provided `.fpocket` files because they don't contain the actual atoms, just
 54 |         # Voronoii centers. So we run fpocket2 ourselves, it seems to be equivalent to published results.
 55 |         try:
 56 |             command = ['fpocket2', '-f', entry['protein']]
 57 |             subprocess.run(command, check=True)
 58 |         except subprocess.CalledProcessError as e:
 59 |             logger.warning('Calling fpocket2 failed, please make sure it is on the PATH')
 60 |             raise e
 61 | 
 62 |         # 2) Some chains have been renamed since TOUGH-M1 dataset was released so one cannot directly retrieve
 63 |         # uniprot accessions corresponding to a given chain. So we first locate corresponding chains in the
 64 |         # original pdb files, get their ids and translate those to uniprot using the SIFTS webservices.
 65 |         parser = PDB.PDBParser(PERMISSIVE=True, QUIET=True)
 66 |         tough_str = parser.get_structure('t', entry['protein'])
 67 |         tough_c = struct_to_centroid(tough_str)
 68 | 
 69 |         # 2a) Some structures are now obsolete since TOUGH-M1 was published, for these, get superceding entry
 70 |         pdb_code = entry['code'].lower()
 71 |         superceded = pdb_check_obsolete(entry['code'])
 72 |         if superceded:
 73 |             pdb_code = superceded
 74 |         # 2b) try to download pdb from RSCB mirror site
 75 |         with tempfile.TemporaryDirectory() as tmpdir:
 76 |             fname = tmpdir + '/prot.pdb'
 77 |             try:
 78 |                 urllib.request.urlretrieve(f"http://files.rcsb.org/download/{pdb_code}.pdb", fname)
 79 |             except:
 80 |                 logger.info(f'Could not download PDB: {pdb_code}')
 81 |                 return [entry['code5'], 'None', 'None']
 82 |             orig_str = parser.get_structure('o', fname)
 83 | 
 84 |         # TOUGH authors haven't re-centered the chains so we can roughly find them just by centroids :)
 85 |         dists = []
 86 |         ids = []
 87 |         for model in orig_str:
 88 |             for chain in model:
 89 |                 if len(chain) < 20:  # ignore chains with fewer than 20 residues
 90 |                     continue
 91 |                 dists.append(np.linalg.norm(struct_to_centroid(chain) - tough_c))
 92 |                 ids.append(chain.id)
 93 |         chain_id = ids[np.argmin(dists)]
 94 |         if np.min(dists) > 5:
 95 |             logger.warning(f"Suspiciously large distance when trying to map tough structure to downloaded one"
 96 |                            f"DIST {dists} {ids} {entry['code']} {pdb_code}")
 97 |             return [entry['code5'], 'None', 'None']
 98 | 
 99 |         uniprot = pdb_chain_to_uniprot(pdb_code.lower(), chain_id)
100 |         return [entry['code5'], uniprot, pdb_code.lower() + chain_id]
101 | 
102 |     def preprocess_once(self):
103 |         """
104 |         Re-run fpocket2 and try to obtain Uniprot Accession for each PDB entry.
105 |         Needs to be called just once in a lifetime
106 |         """
107 |         code5_to_uniprot = {}
108 |         code5_to_seqclust = {}
109 |         uniprot_to_code5 = defaultdict(list)
110 |         logger.info('Preprocessing: obtaining uniprot accessions, this will take time.')
111 |         entries = self.get_structures(extra_mappings=False)
112 |         clusterer = RcsbPdbClusters(identity=30)
113 |         
114 |         with concurrent.futures.ProcessPoolExecutor() as executor:
115 |             for code5, uniprot, code5new in executor.map(ToughM1._preprocess_worker, entries):
116 |                 code5_to_uniprot[code5] = uniprot
117 |                 uniprot_to_code5[uniprot] = uniprot_to_code5[uniprot] + [code5]
118 |                 code5_to_seqclust[code5] = clusterer.get_seqclust(code5new[:4], code5new[4:5])
119 | 
120 |         unclustered = [k for k,v in code5_to_seqclust.items() if v == 'None']
121 |         if len(unclustered) > 0:
122 |             logger.info(f"Unable to get clusters for {len(unclustered)} entries: {unclustered}")
123 | 
124 |         # write uniprot mapping to file
125 |         pickle.dump({
126 |                 'code5_to_uniprot': code5_to_uniprot,
127 |                 'uniprot_to_code5': uniprot_to_code5,
128 |                 'code5_to_seqclust': code5_to_seqclust
129 |             },
130 |             open(os.path.join(self.tough_data_dir, 'pdbcode_mappings.pickle'), 'wb')
131 |         )
132 | 
133 |         # prepare coordinates and feature channels for descriptor calculation
134 |         htmd_featurizer(self.get_structures(), skip_existing=True)
135 | 
136 |     def get_structures(self, extra_mappings=True):
137 |         """
138 |         Get list of PDB structures with metainfo
139 |         """
140 |         root = os.path.join(self.tough_data_dir, 'TOUGH-M1_dataset')
141 |         npz_root = os.path.join(os.environ.get('STRUCTURE_DATA_DIR'), 'processed/htmd/TOUGH-M1/TOUGH-M1_dataset')
142 |         fname_uniprot_mapping = os.path.join(self.tough_data_dir, 'pdbcode_mappings.pickle')
143 | 
144 |         # try to load translation pickle
145 |         code5_to_uniprot = None
146 |         code5_to_seqclust = None
147 |         if extra_mappings:
148 |             mapping = pickle.load(open(fname_uniprot_mapping, 'rb'))
149 |             code5_to_uniprot = mapping['code5_to_uniprot']
150 |             code5_to_seqclust = mapping['code5_to_seqclust']
151 | 
152 |         entries = []
153 |         with open(os.path.join(self.tough_data_dir, 'TOUGH-M1_pocket.list')) as f:
154 |             for line in f.readlines():
155 |                 code5, pocketnr, _ = line.split()
156 |                 entries.append({
157 |                     'protein': root + f'/{code5}/{code5}.pdb',
158 |                     'pocket': root + f'/{code5}/{code5}_out/pockets/pocket{int(pocketnr)-1}_vert.pqr',
159 |                     'ligand': root + f'/{code5}/{code5}00.pdb',
160 |                     'protein_htmd': npz_root + f'/{code5}/{code5}.npz',
161 |                     'code5': code5,
162 |                     'code': code5[:4],
163 |                     'uniprot': code5_to_uniprot[code5] if code5_to_uniprot else 'None',
164 |                     'seqclust': code5_to_seqclust[code5] if code5_to_seqclust else 'None'
165 |                 })
166 |         return entries
167 | 
168 |     def get_structures_splits(self, fold_nr, strategy='seqclust', n_folds=5, seed=0):
169 |         pdb_entries = self.get_structures()
170 | 
171 |         if strategy == 'pdb_folds':
172 |             splitter = KFold(n_splits=n_folds, shuffle=True, random_state=seed)
173 |             folds = list(splitter.split(pdb_entries))
174 |             train_idx, test_idx = folds[fold_nr]
175 |             return [pdb_entries[i] for i in train_idx], [pdb_entries[i] for i in test_idx]
176 | 
177 |         elif strategy == 'uniprot_folds':
178 |             splitter = GroupShuffleSplit(n_splits=n_folds, test_size=1.0/n_folds, random_state=seed)
179 |             pdb_entries = list(filter(lambda entry: entry['uniprot'] != 'None', pdb_entries))
180 |             folds = list(splitter.split(pdb_entries, groups=[e['uniprot'] for e in pdb_entries]))
181 |             train_idx, test_idx = folds[fold_nr]
182 |             return [pdb_entries[i] for i in train_idx], [pdb_entries[i] for i in test_idx]
183 | 
184 |         elif strategy == 'seqclust':
185 |             splitter = GroupShuffleSplit(n_splits=n_folds, test_size=1.0/n_folds, random_state=seed)
186 |             pdb_entries = list(filter(lambda entry: entry['seqclust'] != 'None', pdb_entries))
187 |             folds = list(splitter.split(pdb_entries, groups=[e['seqclust'] for e in pdb_entries]))
188 |             train_idx, test_idx = folds[fold_nr]
189 |             return [pdb_entries[i] for i in train_idx], [pdb_entries[i] for i in test_idx]
190 | 
191 |         elif strategy == 'none':
192 |             return pdb_entries, pdb_entries
193 |         else:
194 |             raise NotImplementedError
195 | 
196 |     def evaluate_matching(self, descriptor_entries, matcher):
197 |         """
198 |         Evaluate pocket matching on TOUGH-M1 dataset. The evaluation metrics is AUC.
199 | 
200 |         :param descriptor_entries: List of entries
201 |         :param matcher: PocketMatcher instance
202 |         """
203 | 
204 |         target_dict = {d['code5']: d for d in descriptor_entries}
205 |         pairs = []
206 |         positives = []
207 | 
208 |         def parse_file_list(f):
209 |             f_pairs = []
210 |             for line in f.readlines():
211 |                 id1, id2 = line.split()[:2]
212 |                 if id1 in target_dict and id2 in target_dict:
213 |                     f_pairs.append((target_dict[id1], target_dict[id2]))
214 |             return f_pairs
215 | 
216 |         with open(os.path.join(self.tough_data_dir, 'TOUGH-M1_positive.list')) as f:
217 |             pos_pairs = parse_file_list(f)
218 |             pairs.extend(pos_pairs)
219 |             positives.extend([True] * len(pos_pairs))
220 | 
221 |         with open(os.path.join(self.tough_data_dir, 'TOUGH-M1_negative.list')) as f:
222 |             neg_pairs = parse_file_list(f)
223 |             pairs.extend(neg_pairs)
224 |             positives.extend([False] * len(neg_pairs))
225 | 
226 |         scores = matcher.pair_match(pairs)
227 | 
228 |         goodidx = np.flatnonzero(np.isfinite(np.array(scores)))
229 |         if len(goodidx) != len(scores):
230 |             logger.warning(f'Ignoring {len(scores) - len(goodidx)} pairs')
231 |             positives_clean, scores_clean = np.array(positives)[goodidx],  np.array(scores)[goodidx]
232 |         else:
233 |             positives_clean, scores_clean = positives, scores
234 | 
235 |         # Calculate metrics
236 |         fpr, tpr, roc_thresholds = roc_curve(positives_clean, scores_clean)
237 |         auc = roc_auc_score(positives_clean, scores_clean)
238 |         precision, recall, thresholds = precision_recall_curve(positives_clean, scores_clean)
239 |         ap = voc_ap(recall[::-1], precision[::-1])
240 | 
241 |         results = {
242 |             'ap': ap,
243 |             'pr': precision,
244 |             're': recall,
245 |             'th': thresholds,
246 |             'auc': auc,
247 |             'fpr': fpr,
248 |             'tpr': tpr,
249 |             'th_roc': roc_thresholds,
250 |             'pairs': pairs,
251 |             'scores': scores,
252 |             'pos_mask': positives
253 |         }
254 |         return results
255 | 


--------------------------------------------------------------------------------
/deeplytough/datasets/vertex.py:
--------------------------------------------------------------------------------
  1 | import concurrent.futures
  2 | import logging
  3 | import os
  4 | import pickle
  5 | import string
  6 | import urllib.request
  7 | from collections import defaultdict
  8 | 
  9 | import numpy as np
 10 | from sklearn.metrics import precision_recall_curve, roc_curve, roc_auc_score
 11 | from tqdm.autonotebook import tqdm
 12 | 
 13 | from misc.ligand_extract import PocketFromLigandDetector
 14 | from misc.utils import htmd_featurizer, voc_ap, RcsbPdbClusters
 15 | 
 16 | logger = logging.getLogger(__name__)
 17 | 
 18 | 
 19 | class Vertex:
 20 |     """
 21 |     Vertex dataset by Chen et al
 22 |     http://pubs.acs.org/doi/suppl/10.1021/acs.jcim.6b00118/suppl_file/ci6b00118_si_002.zip
 23 |     """
 24 | 
 25 |     @staticmethod
 26 |     def _download_pdb_and_extract_pocket(entry):
 27 |         code = entry['code']
 28 |         entry_dir = os.path.dirname(entry['protein'])
 29 |         os.makedirs(entry_dir, exist_ok=True)
 30 |         fname = f'{entry_dir}/{code}.pdb'
 31 |         try:
 32 |             urllib.request.urlretrieve(f'http://files.rcsb.org/download/{code.upper()}.pdb', fname)
 33 |             detector = PocketFromLigandDetector(include_het_resname=False, save_clean_structure=True,
 34 |                                                 keep_other_hets=False, min_lig_atoms=3)
 35 |             detector.run_one(fname, entry_dir)
 36 |         except Exception as e:
 37 |             logger.warning(f'PROBLEM DOWNLOADING AND EXTRACTING {code}:')
 38 |             logger.exception(e)
 39 |         return code
 40 | 
 41 |     def preprocess_once(self):
 42 |         """
 43 |         Download pdb files and extract pocket around ligands
 44 |         """
 45 |         logger.info('Preprocessing: downloading data and extracting pockets, this will take time.')
 46 |         entries = self.get_structures(extra_mappings=False)
 47 |         
 48 |         code5_to_seqclusts = {}
 49 |         clusterer = RcsbPdbClusters(identity=30)        
 50 |         for entry in entries:
 51 |             # entries are defined by site integers in the vertex set
 52 |             chains = string.ascii_uppercase  # play it safe and take all possible chains for a protein
 53 |             seqclusts = set([clusterer.get_seqclust(entry['code'], c) for c in chains])
 54 |             code5_to_seqclusts[entry['code5']] = seqclusts
 55 |         pickle.dump({'code5_to_seqclusts': code5_to_seqclusts},
 56 |                     open(os.path.join(os.environ['STRUCTURE_DATA_DIR'], 'Vertex' , 'pdbcode_mappings.pickle'), 'wb'))
 57 |         with concurrent.futures.ProcessPoolExecutor() as executor:
 58 |             for _ in executor.map(Vertex._download_pdb_and_extract_pocket, entries):
 59 |                 pass
 60 | 
 61 |         htmd_featurizer(entries, skip_existing=True)
 62 | 
 63 |     def get_structures(self, extra_mappings=True):
 64 |         """
 65 |         Get list of PDB structures with metainfo
 66 |         """
 67 | 
 68 |         root = os.path.join(os.environ.get('STRUCTURE_DATA_DIR'), 'Vertex')
 69 |         npz_root = os.path.join(os.environ.get('STRUCTURE_DATA_DIR'), 'processed/htmd/Vertex')
 70 | 
 71 |         # Read in a set of (pdb_chain, uniprot, ligand_cc) tuples
 72 |         vertex_pdbs = set()
 73 |         with open(os.path.join(root, 'protein_pairs.tsv')) as f:
 74 |             for i, line in enumerate(f.readlines()):
 75 |                 if i > 1:
 76 |                     tokens = line.split('\t')
 77 |                     vertex_pdbs.add((tokens[0].lower(), tokens[2], tokens[1]))
 78 |                     vertex_pdbs.add((tokens[5].lower(), tokens[7], tokens[6]))
 79 | 
 80 |         code5_to_seqclusts = None        
 81 |         if extra_mappings:
 82 |             mapping = pickle.load(open(os.path.join(os.environ['STRUCTURE_DATA_DIR'], 'Vertex', 'pdbcode_mappings.pickle'), 'rb'))
 83 |             code5_to_seqclusts = mapping['code5_to_seqclusts']
 84 | 
 85 |         # Generate entries for the Vertex set
 86 |         entries = []
 87 |         for n, (code5, uniprot, ligand_cc) in enumerate(vertex_pdbs):
 88 |             pdb_code = code5[:4]
 89 |             entries.append({
 90 |                 'protein': root + f'/{pdb_code}/{pdb_code}_clean.pdb',
 91 |                 'pocket': root + f'/{pdb_code}/{pdb_code}_site_{int(code5[5])}.pdb',
 92 |                 'ligand': root + f'/{pdb_code}/{pdb_code}_lig_{int(code5[5])}.pdb',
 93 |                 'protein_htmd': npz_root + f'/{pdb_code}/{pdb_code}_clean.npz',
 94 |                 'code5': code5,
 95 |                 'code': code5[:4],
 96 |                 'lig_cc': ligand_cc,
 97 |                 'uniprot': uniprot,
 98 |                 'seqclusts': code5_to_seqclusts[code5] if code5_to_seqclusts else 'None'
 99 |             })
100 |         return entries
101 | 
102 |     @staticmethod
103 |     def evaluate_matching(descriptor_entries, matcher):
104 |         """
105 |         Evaluate pocket matching on Vertex dataset
106 |         The evaluation metric is AUC
107 | 
108 |         :param descriptor_entries: List of entries
109 |         :param matcher: PocketMatcher instance
110 |         """
111 | 
112 |         target_dict = {d['code5']: i for i, d in enumerate(descriptor_entries)}
113 |         prot_pairs = defaultdict(list)
114 |         prot_positives = {}
115 | 
116 |         # Assemble dictionary pair-of-uniprots -> list_of_pairs_of_indices_into_descriptor_entries
117 |         with open(os.path.join(os.environ.get('STRUCTURE_DATA_DIR'), 'Vertex', 'protein_pairs.tsv')) as f:
118 |             for i, line in enumerate(f.readlines()):
119 |                 if i > 1:
120 |                     tokens = line.split('\t')
121 |                     pdb1, id1, pdb2, id2, cls = tokens[0].lower(), tokens[2], tokens[5].lower(), tokens[7], int(tokens[-1])
122 |                     if pdb1 in target_dict and pdb2 in target_dict:
123 |                         key = (id1, id2) if id1 < id2 else (id2, id1)
124 |                         prot_pairs[key] = prot_pairs[key] + [(target_dict[pdb1], target_dict[pdb2])]
125 |                         if key in prot_positives:
126 |                             assert prot_positives[key] == (cls == 1)
127 |                         else:
128 |                             prot_positives[key] = (cls == 1)
129 | 
130 |         positives = []
131 |         scores = []
132 |         keys_out = []
133 | 
134 |         # Evaluate each protein pairs (taking max over all pdb pocket scores, see Fig 1B in Chen et al)
135 |         for key, pdb_pairs in tqdm(prot_pairs.items()):
136 |             unique_idxs = list(set([p[0] for p in pdb_pairs] + [p[1] for p in pdb_pairs]))
137 | 
138 |             complete_scores = matcher.complete_match([descriptor_entries[i] for i in unique_idxs])
139 | 
140 |             sel_scores = []
141 |             for pair in pdb_pairs:
142 |                 i, j = unique_idxs.index(pair[0]), unique_idxs.index(pair[1])
143 |                 if np.isfinite(complete_scores[i, j]):
144 |                     sel_scores.append(complete_scores[i, j])
145 | 
146 |             if len(sel_scores) > 0:
147 |                 positives.append(prot_positives[key])
148 |                 keys_out.append(key)
149 |                 scores.append(max(sel_scores))
150 |             else:
151 |                 logger.warning(f'Skipping a pair, could not be evaluated')
152 | 
153 |         # Calculate metrics
154 |         fpr, tpr, roc_thresholds = roc_curve(positives, scores)
155 |         auc = roc_auc_score(positives, scores)
156 |         precision, recall, thresholds = precision_recall_curve(positives, scores)
157 |         ap = voc_ap(recall[::-1], precision[::-1])
158 | 
159 |         results = {
160 |             'ap': ap,
161 |             'pr': precision,
162 |             're': recall,
163 |             'th': thresholds,
164 |             'auc': auc,
165 |             'fpr': fpr,
166 |             'tpr': tpr,
167 |             'th_roc': roc_thresholds,
168 |             'pairs': keys_out,
169 |             'scores': scores,
170 |             'pos_mask': positives
171 |         }
172 |         return results
173 | 


--------------------------------------------------------------------------------
/deeplytough/engine/datasets.py:
--------------------------------------------------------------------------------
  1 | import ctypes
  2 | import logging
  3 | import math
  4 | import os
  5 | import random
  6 | 
  7 | import htmd.home
  8 | import numpy as np
  9 | import transforms3d
 10 | from torch.utils.data import Dataset
 11 | 
 12 | from datasets import ToughM1, Vertex, Prospeccts
 13 | from misc.utils import center_from_pdb_file
 14 | 
 15 | logger = logging.getLogger(__name__)
 16 | 
 17 | occupancylib = ctypes.cdll.LoadLibrary(os.path.join(htmd.home.home(libDir=True), 'occupancy_ext.so'))
 18 | 
 19 | 
 20 | class VoxelizedDataset(Dataset):
 21 |     """ Abstract base class for dataset of voxelized proteins """
 22 | 
 23 |     # 'hydrophobic', 'aromatic', 'hbond_acceptor', 'hbond_donor'
 24 |     # 'positive_ionizable', 'negative_ionizable', 'metal', 'occupancies'
 25 |     num_channels = 8
 26 | 
 27 |     def __init__(self, pdb_list, box_size, augm_rot=False, augm_mirror_prob=0.0):
 28 |         """
 29 |         :param pdb_list: List of pdb files (as dicts, with key 'protein_htmd').
 30 |         :param box_size: Patch size
 31 |         :param augm_rot: Rotation augmentation
 32 |         :param augm_mirror_prob: Mirroring probability for each axis
 33 |         """
 34 | 
 35 |         self.pdb_list = []
 36 |         self.pdb_idx = []
 37 | 
 38 |         for i, pdb_entry in enumerate(pdb_list):
 39 |             if not os.path.exists(pdb_entry['protein_htmd']):
 40 |                 logging.warning(f"HTMD featurization file not found: {pdb_entry['protein_htmd']},"
 41 |                                 f"corresponding pdb likely could not be parsed")
 42 |                 continue
 43 |             self.pdb_list.append(pdb_entry)
 44 |             self.pdb_idx.append(i)
 45 | 
 46 |         assert len(self.pdb_list) > 0, f'No HTMD could be found but {len(pdb_list)}' \
 47 |             f'PDB files were given, please call preprocess_once() on the dataset'
 48 |         logger.info('Dataset size: %d', len(self.pdb_list))
 49 | 
 50 |         self._resolution = 1.0
 51 |         self._box_size = box_size
 52 |         self._augm_rot = augm_rot
 53 |         self._augm_mirror_prob = augm_mirror_prob
 54 | 
 55 |     def __len__(self):
 56 |         return len(self.pdb_list)
 57 | 
 58 |     def __getitem__(self, idx):
 59 |         raise NotImplementedError()
 60 | 
 61 |     def _sample_augmentation(self):
 62 |         """ Samples random rotation and mirroring, returns a 3x3 matrix """
 63 |         M = np.eye(3)
 64 |         if self._augm_rot:
 65 |             angle = random.uniform(0, 2*math.pi)
 66 |             M = np.dot(transforms3d.axangles.axangle2mat(np.random.uniform(size=3), angle), M)
 67 |         if self._augm_mirror_prob > 0:
 68 |             if random.random() < self._augm_mirror_prob/2:
 69 |                 M = np.dot(transforms3d.zooms.zfdir2mat(-1, [1, 0, 0]), M)
 70 |             if random.random() < self._augm_mirror_prob/2:
 71 |                 M = np.dot(transforms3d.zooms.zfdir2mat(-1, [0, 1, 0]), M)
 72 |             if random.random() < self._augm_mirror_prob/2:
 73 |                 M = np.dot(transforms3d.zooms.zfdir2mat(-1, [0, 0, 1]), M)
 74 |         return M
 75 | 
 76 |     def _extract_volume(self, coords, channels, center, num_voxels, resolution=1.0):
 77 |         """ Computes dense volume for htmd preprocessed coordinates """
 78 |         assert center.size == 3
 79 |         num_voxels = np.array(num_voxels)
 80 |         if num_voxels[0] % 2 == 0 and num_voxels[1] % 2 == 0 and num_voxels[2] % 2 == 0:
 81 |             # place the center point at one of the two middle voxels (not centered, but center will not be quantized)
 82 |             start = center - resolution * (num_voxels // 2)
 83 |             end = center + resolution * (num_voxels // 2 - 1)
 84 |         else:
 85 |             # center the box around the center point
 86 |             start = center - resolution * (num_voxels // 2)
 87 |             end = center + resolution * (num_voxels // 2)
 88 | 
 89 |         gridx, gridy, gridz = np.meshgrid(np.linspace(start[0], end[0], num_voxels[0]),
 90 |                                           np.linspace(start[1], end[1], num_voxels[1]),
 91 |                                           np.linspace(start[2], end[2], num_voxels[2]), indexing='ij')
 92 | 
 93 |         centers = np.stack([gridx, gridy, gridz], axis=-1).reshape(-1, 3)
 94 |         volume = self._getOccupancyC(coords, centers, channels)
 95 |         volume = volume.reshape(
 96 |             num_voxels[0], num_voxels[1], num_voxels[2], -1).transpose((3, 0, 1, 2)).astype(np.float32)
 97 |         return volume, start, centers
 98 | 
 99 |     @staticmethod
100 |     def _getOccupancyC(coords, centers, channelsigmas):  # adapted from voxeldescriptors.py in HTMD
101 |         """ Calls the C code to calculate the voxels values for each property """
102 | 
103 |         centers = centers.astype(np.float64)
104 |         coords = coords.astype(np.float32)
105 |         channelsigmas = channelsigmas.astype(np.float64)
106 | 
107 |         nchannels = channelsigmas.shape[1]
108 |         occus = np.zeros((centers.shape[0], nchannels), dtype=np.float64)
109 | 
110 |         occupancylib.descriptor_ext(centers.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
111 |                                     coords.ctypes.data_as(ctypes.POINTER(ctypes.c_float)),
112 |                                     channelsigmas.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
113 |                                     occus.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
114 |                                     ctypes.c_int(occus.shape[0]),  # n of centers
115 |                                     ctypes.c_int(coords.shape[0]),  # n of atoms
116 |                                     ctypes.c_int(nchannels))  # n of channels
117 |         return occus
118 | 
119 | 
120 | class PdbTupleVoxelizedDataset(VoxelizedDataset):
121 |     """ Abstract base class for dataset of tuples of subvolumes of voxelized proteins """
122 | 
123 |     def __init__(self, pos_pairs, neg_pairs, pdb_list, box_size, augm_rot=False, augm_mirror_prob=0.0,
124 |                  max_sampling_dist=4.0, augm_robustness=False, augm_decoy_prob=0, db_pairs_limit=0):
125 |         super().__init__(pdb_list, box_size, augm_rot, augm_mirror_prob)
126 | 
127 |         self._max_sampling_dist = max_sampling_dist
128 |         self._augm_robustness = augm_robustness
129 |         self._decoy_prob = augm_decoy_prob
130 | 
131 |         # map pdb code -> entry
132 |         self._pdb_map = {}
133 |         for i, pdb_entry in enumerate(self.pdb_list):
134 |             code = pdb_entry['code5'] if 'code5' in pdb_entry else pdb_entry['code']
135 |             self._pdb_map[code] = i
136 | 
137 |         # filter pairs to those supported by pdbs
138 |         self._pos_pairs = list(filter(lambda p: p[0] in self._pdb_map and p[1] in self._pdb_map, pos_pairs))
139 |         self._neg_pairs = list(filter(lambda p: p[0] in self._pdb_map and p[1] in self._pdb_map, neg_pairs))
140 |         if db_pairs_limit > 0:
141 |             self._pos_pairs = self._pos_pairs[:db_pairs_limit]
142 |             self._neg_pairs = self._neg_pairs[:db_pairs_limit]
143 |         logger.info('Dataset positive pairs: %d, negative pairs: %d', len(self._pos_pairs), len(self._neg_pairs))
144 |         num_eff_pdbs = set(
145 |             [p[0] for p in self._pos_pairs] +
146 |             [p[1] for p in self._pos_pairs] +
147 |             [p[0] for p in self._neg_pairs] +
148 |             [p[1] for p in self._neg_pairs]
149 |         )
150 |         logger.info('Effective number of PDB files: %d', len(num_eff_pdbs))
151 |         assert len(self._pos_pairs) > 0 and len(self._neg_pairs) > 0
152 | 
153 |     def _get_patch(self, idx, allow_decoy=False):
154 | 
155 |         container = np.load(self.pdb_list[idx]['protein_htmd'])
156 |         struct_coords = container['coords']
157 |         struct_channels = container['channels']
158 |         center = center_from_pdb_file(self.pdb_list[idx]['pocket'])
159 | 
160 |         v, r = np.random.normal(size=3), np.random.uniform(size=1, high=self._max_sampling_dist)
161 |         center = center + r * v / (np.linalg.norm(v) + 1e-10)
162 |         centers = [center]
163 | 
164 |         # Decoys (random negative points) can be added to increase the variability of negatives (e.g. empty space:P)
165 |         # Should be added into only one member of a negative pair (then matching anything to a decoy will be penalized;
166 |         # having decoys in both pair members would be difficult, a decoy might in fact match another decoy)
167 |         if allow_decoy and random.uniform(0, 1) <= self._decoy_prob:
168 |             struct_min = np.amin(struct_coords, axis=0)
169 |             struct_max = np.amax(struct_coords, axis=0)
170 |             centers = [struct_min + (struct_max - struct_min) * np.random.uniform(size=3)]
171 | 
172 |         shape = [self._box_size]*3
173 |         volumes = []
174 | 
175 |         if self._augm_robustness:
176 |             centers.extend(centers)  # same centers but with different augmentation
177 | 
178 |         for center in centers:
179 |             # data augmentation by rotation and mirroring
180 |             if self._augm_rot or self._augm_mirror_prob > 0:
181 |                 M = self._sample_augmentation()
182 |                 struct_coords_aug = np.dot(struct_coords, M.T)
183 |                 center = np.dot(center, M.T)
184 |             else:
185 |                 struct_coords_aug = struct_coords
186 | 
187 |             # crop point cloud and convert it into a volume
188 |             volume, start, grid_pts = self._extract_volume(struct_coords_aug, struct_channels, center, shape,
189 |                                                            self._resolution)
190 |             volumes.append(volume)
191 | 
192 |         return volumes
193 | 
194 | 
195 | class PdbPairVoxelizedDataset(PdbTupleVoxelizedDataset):
196 |     """ Dataset of pairs of voxelized pockets """
197 | 
198 |     def __len__(self):
199 |         # positive pairs as the driving entity
200 |         return len(self._pos_pairs) * 2
201 | 
202 |     def __getitem__(self, idx):
203 |         if idx % 2 == 0:
204 |             cls = 1  # positive class
205 |             pair = self._pos_pairs[idx // 2]
206 |         else:
207 |             cls = 0  # negative class
208 |             pair = random.choice(self._neg_pairs)
209 | 
210 |         first_vols = self._get_patch(self._pdb_map[pair[0]])
211 |         second_vols = self._get_patch(self._pdb_map[pair[1]], allow_decoy=(cls == 'neg'))
212 | 
213 |         return {'inputs': np.stack(first_vols + second_vols), 'targets': np.array([cls], dtype=np.float32)}
214 | 
215 | 
216 | class PointOfInterestVoxelizedDataset(VoxelizedDataset):
217 |     """ Dataset of voxelized subvolumes around interest points """
218 | 
219 |     def __init__(self, pdb_list, point_list, box_size):
220 |         super().__init__(pdb_list, box_size=box_size, augm_rot=False, augm_mirror_prob=0)
221 |         self._extraction_points = point_list
222 | 
223 |     def __getitem__(self, idx):
224 |         container = np.load(self.pdb_list[idx]['protein_htmd'])
225 |         struct_coords = container['coords']
226 |         struct_channels = container['channels']
227 |         shape = [self._box_size] * 3
228 |         volumes = []
229 |         for center in self._extraction_points[self.pdb_idx[idx]]:
230 |             volume, start, grid_pts = self._extract_volume(struct_coords, struct_channels, center, shape,
231 |                                                            self._resolution)
232 |             volumes.append(volume)
233 | 
234 |         return {'inputs': np.stack(volumes), 'pdb_idx': self.pdb_idx[idx]}
235 | 
236 | 
237 | def create_tough_dataset(args, fold_nr, n_folds=5, seed=0, exclude_Vertex_from_train=False,
238 |                          exclude_Prospeccts_from_train=False):
239 | 
240 |     if args.db_preprocessing:
241 |         ToughM1().preprocess_once()
242 | 
243 |     pdb_train, pdb_test = ToughM1().get_structures_splits(fold_nr, strategy=args.db_split_strategy,
244 |                                                           n_folds=n_folds, seed=seed)
245 | 
246 |     # Vertex
247 |     if exclude_Vertex_from_train:
248 |         if args.db_preprocessing:
249 |             Vertex().preprocess_once()
250 | 
251 |         # Get Vertex dataset
252 |         vertex = Vertex().get_structures()
253 | 
254 |         # Exclude entries from tough training set that exist in the vertex set
255 |         logger.info(f'Before Vertex filter {len(pdb_train)}')
256 |         if exclude_Vertex_from_train == 'uniprot':
257 |             vertex_ups = set([entry['uniprot'] for entry in vertex] + ['None'])
258 |             pdb_train = list(filter(lambda entry: entry['uniprot'] not in vertex_ups, pdb_train))
259 |         elif exclude_Vertex_from_train == 'pdb':
260 |             vertex_pdbs = set([entry['code'] for entry in vertex])
261 |             pdb_train = list(filter(lambda entry: entry['code'] not in vertex_pdbs, pdb_train))
262 |         elif exclude_Vertex_from_train == 'seqclust':
263 |             vertex_seqclusts = set([c for entry in vertex for c in entry['seqclusts']] + ['None'])
264 |             pdb_train = list(filter(lambda entry: entry['seqclust'] not in vertex_seqclusts, pdb_train))
265 |         else:
266 |             raise NotImplementedError()
267 |         logger.info(f'After Vertex filter {len(pdb_train)}')
268 | 
269 |     # ProSPECCTS
270 |     if exclude_Prospeccts_from_train:
271 |         if args.db_preprocessing:
272 |             for dbname in Prospeccts.dbnames:
273 |                 Prospeccts(dbname).preprocess_once()
274 | 
275 |         # Get ProSPECCTs datasets
276 |         all_prospeccts = [entry for dbname in Prospeccts.dbnames for entry in Prospeccts(dbname).get_structures()]
277 | 
278 |         # Exclude entries from tough training set that exist in the ProSPECCTs sets
279 |         logger.info(f'Before Prospeccts filter {len(pdb_train)}')
280 |         if exclude_Prospeccts_from_train == 'uniprot':
281 |             prospeccts_ups = set([u for entry in all_prospeccts for u in entry['uniprot']] + ['None'])
282 |             pdb_train = list(filter(lambda entry: entry['uniprot'] not in prospeccts_ups, pdb_train))
283 |         elif exclude_Prospeccts_from_train == 'pdb':
284 |             prospeccts_pdbs = set([entry['code'] for entry in all_prospeccts])
285 |             pdb_train = list(filter(lambda entry: entry['code'].lower() not in prospeccts_pdbs, pdb_train))
286 |         elif exclude_Prospeccts_from_train == 'seqclust':
287 |             prospeccts_seqclusts = set([c for entry in all_prospeccts for c in entry['seqclusts']] + ['None'])
288 |             pdb_train = list(filter(lambda entry: entry['seqclust'] not in prospeccts_seqclusts, pdb_train))
289 |         else:
290 |             raise NotImplementedError()
291 |         logger.info(f'After Prospeccts filter {len(pdb_train)}')
292 | 
293 |     # Read TOUGH-M1 negative and positive pocket pairs
294 |     with open(os.path.join(os.environ.get('STRUCTURE_DATA_DIR'), 'TOUGH-M1', 'TOUGH-M1_positive.list')) as f:
295 |         pos_pairs = [line.split()[:2] for line in f.readlines()]
296 |     with open(os.path.join(os.environ.get('STRUCTURE_DATA_DIR'), 'TOUGH-M1', 'TOUGH-M1_negative.list')) as f:
297 |         neg_pairs = [line.split()[:2] for line in f.readlines()]
298 | 
299 |     # Apply random seed, shuffle pairs and then return to original unseeded random state
300 |     rndstate = random.getstate()
301 |     random.seed(seed)
302 |     random.shuffle(pos_pairs)
303 |     random.shuffle(neg_pairs)
304 |     if args.db_size_limit > 0:
305 |         random.shuffle(pdb_train)
306 |         pdb_train = pdb_train[:args.db_size_limit]
307 |     random.setstate(rndstate)
308 | 
309 |     train_db = PdbPairVoxelizedDataset(pos_pairs, neg_pairs, pdb_train, box_size=args.patch_size,
310 |                                        augm_rot=args.augm_rot, augm_mirror_prob=args.augm_mirror_prob,
311 |                                        max_sampling_dist=args.augm_sampling_dist,
312 |                                        augm_robustness=args.stability_loss_weight > 0,
313 |                                        augm_decoy_prob=args.augm_decoy_prob, db_pairs_limit=-args.db_size_limit)
314 |     test_db = PdbPairVoxelizedDataset(pos_pairs, neg_pairs, pdb_test, box_size=args.patch_size, augm_rot=False,
315 |                                       augm_mirror_prob=0.0, max_sampling_dist=args.augm_sampling_dist)
316 |     return train_db, test_db
317 | 


--------------------------------------------------------------------------------
/deeplytough/engine/models.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as nnf
  4 | from se3cnn.blocks import GatedBlock
  5 | 
  6 | 
  7 | class VoxelNetwork(nn.Module):
  8 |     """
  9 |     Network for 3D voxel patch classification
 10 |     """
 11 | 
 12 |     def __init__(self, config, nfeat):
 13 |         """
 14 |         Build the netwrok in a flexible way based on `config` string, which contains sequence
 15 |         of comma-delimited layer definiton tokens layer_arg1_arg2_... See README.md for examples
 16 | 
 17 |         :param config:
 18 |         :param nfeat: Number of input channels
 19 |         """
 20 |         super().__init__()
 21 |         self.register_buffer('scaler_mean', torch.zeros(1, nfeat, 1, 1, 1))
 22 |         self.register_buffer('scaler_std', torch.ones(1, nfeat, 1, 1, 1))
 23 | 
 24 |         for d, conf in enumerate(config.split(',')):
 25 |             conf = conf.strip().split('_')
 26 | 
 27 |             if conf[0] == 'b':  # Batch norm;
 28 |                 self.add_module(str(d), nn.BatchNorm3d(nfeat))
 29 |             elif conf[0] == 'r':  # ReLU
 30 |                 self.add_module(str(d), nn.ReLU(True))
 31 | 
 32 |             elif conf[0] == 'm':  # Max pooling
 33 |                 kernel_size = int(conf[1])
 34 |                 self.add_module(str(d), nn.MaxPool3d(kernel_size))
 35 |             elif conf[0] == 'a':  # Avg pooling
 36 |                 kernel_size = int(conf[1])
 37 |                 self.add_module(str(d), nn.AvgPool3d(kernel_size))
 38 | 
 39 |             elif conf[0] == 'c':  # 3D convolution         args: output feat, kernel size, padding, stride
 40 |                 nfeato = int(conf[1])
 41 |                 kernel_size = int(conf[2])
 42 |                 padding = int(conf[3]) if len(conf) > 3 else 0
 43 |                 stride = int(conf[4]) if len(conf) > 4 else 1
 44 |                 self.add_module(str(d), nn.Conv3d(nfeat, nfeato, kernel_size, stride, padding))
 45 |                 nfeat = nfeato
 46 | 
 47 |             elif conf[0] == 'se':  # SE(3)-covariant block   args: output feat, mult1, mult2, mult3, kernel size, padding, stride, bnnorm, smoothing
 48 |                 nfeato = int(conf[1])
 49 |                 mult1 = int(conf[2])
 50 |                 mult2 = int(conf[3])
 51 |                 mult3 = int(conf[4])
 52 |                 kernel_size = int(conf[5])
 53 |                 padding = int(conf[6]) if len(conf) > 6 else 0
 54 |                 stride = int(conf[7]) if len(conf) > 7 else 1
 55 |                 normalization = conf[8] if len(conf) > 8 else None
 56 |                 smooth = bool(int(conf[9])) if len(conf) > 9 else False
 57 | 
 58 |                 if isinstance(nfeat, int):
 59 |                     nfeat = (nfeat,)
 60 |                     nfeato = tuple([n for n in (nfeato, mult1, mult2, mult3) if n > 0])
 61 |                     activation = (None, nnf.sigmoid)
 62 |                 elif mult1 <= 0:
 63 |                     nfeato = (nfeato,)
 64 |                     activation = None
 65 |                 else:
 66 |                     nfeato = tuple([n for n in (nfeato, mult1, mult2, mult3) if n > 0])
 67 |                     activation = (nnf.relu, nnf.sigmoid)
 68 | 
 69 |                 conv = GatedBlock(nfeat, nfeato, size=kernel_size, padding=padding, stride=stride,
 70 |                                   activation=activation, normalization=normalization, smooth_stride=smooth)
 71 |                 self.add_module(str(d), conv)
 72 | 
 73 |                 if mult1 <= 0:
 74 |                     nfeato = nfeato[0]
 75 |                 nfeat = nfeato
 76 | 
 77 |             else:
 78 |                 raise NotImplementedError('Unknown module: ' + conf[0])
 79 | 
 80 |         self.nfeato = nfeat
 81 | 
 82 |     def set_input_scaler(self, scaler):
 83 |         """
 84 |         Sets scaling of inputs
 85 | 
 86 |         :param scaler:
 87 |         :return:
 88 |         """
 89 |         self.scaler_mean.copy_(torch.Tensor(scaler.mean_).view(1, scaler.mean_.size, 1, 1, 1))
 90 |         self.scaler_std.copy_(torch.Tensor(scaler.scale_).view(1, scaler.scale_.size, 1, 1, 1))
 91 | 
 92 |     def forward(self, inputs):
 93 |         inputs = (inputs - self.scaler_mean) / self.scaler_std
 94 |         for key, module in self._modules.items():
 95 |             inputs = module(inputs)
 96 |         return inputs
 97 | 
 98 | 
 99 | def create_model(args, dataset, device):
100 |     """
101 |     Creates a model
102 |     """
103 |     model = VoxelNetwork(args.model_config, dataset.num_channels)
104 |     print(f'Total number of parameters: {sum([p.numel() for p in model.parameters()])}')
105 |     print(model)
106 |     return model.to(device)
107 | 


--------------------------------------------------------------------------------
/deeplytough/engine/predictor.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import os
  3 | 
  4 | import numpy as np
  5 | import torch
  6 | import torch.nn.functional as nnf
  7 | from tqdm.autonotebook import tqdm
  8 | 
  9 | from engine.datasets import PointOfInterestVoxelizedDataset
 10 | from engine.models import create_model
 11 | 
 12 | logger = logging.getLogger(__name__)
 13 | 
 14 | 
 15 | def load_model(model_dir, device):
 16 |     """
 17 |     Loads the model from file
 18 |     """
 19 |     if isinstance(device, str):
 20 |         device = torch.device(device)
 21 |     fname = os.path.join(model_dir, 'model.pth.tar') if 'pth.tar' not in model_dir else model_dir
 22 |     checkpoint = torch.load(fname, map_location=str(device))
 23 |     model = create_model(checkpoint['args'], PointOfInterestVoxelizedDataset, device)
 24 |     model.load_state_dict(checkpoint['state_dict'])
 25 | 
 26 |     return model, checkpoint['args']
 27 | 
 28 | 
 29 | def load_and_precompute_point_feats(model, args, pdb_list, point_list, device, nworkers, batch_size):
 30 |     """
 31 |     Compute descriptors for every (pdb, point) pair given
 32 |     """
 33 |     model.eval()
 34 |     if isinstance(device, str):
 35 |         device = torch.device(device)
 36 | 
 37 |     dataset = PointOfInterestVoxelizedDataset(pdb_list, point_list, box_size=args.patch_size)
 38 |     loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, num_workers=nworkers)
 39 | 
 40 |     with torch.no_grad():
 41 |         feats = [None] * len(point_list)
 42 | 
 43 |         for batch in tqdm(loader):
 44 |             inputs = batch['inputs'].squeeze(1).to(device)
 45 |             outputs = model(inputs)
 46 |             if args.l2_normed_descriptors:
 47 |                 outputs = nnf.normalize(outputs)
 48 |             descriptors = outputs.cpu().float()
 49 |             for b in range(descriptors.shape[0]):
 50 |                 feats[batch['pdb_idx'][b]] = descriptors[b].view(-1, descriptors[b].shape[0])
 51 | 
 52 |         return feats
 53 | 
 54 | 
 55 | def match_precomputed_point_pairs(descriptors_A, descriptors_B):
 56 |     """
 57 |     Match pairs of descriptors. Some may be None, then their distance is NaN
 58 |     """
 59 |     with torch.no_grad():
 60 |         distances = []
 61 | 
 62 |         for feats_A, feats_B in tqdm(zip(descriptors_A, descriptors_B)):
 63 |             if feats_A is None or feats_B is None:
 64 |                 distances.append(np.nan)
 65 |             else:
 66 |                 distances.append(nnf.pairwise_distance(feats_A, feats_B).numpy())
 67 | 
 68 |         return np.squeeze(np.array(distances))
 69 | 
 70 | 
 71 | def match_precomputed_points_bipartite(descriptors_A, descriptors_B):
 72 |     """
 73 |     Matches the Cartesian product of descriptors (bipartite or complete matching, if B is None)
 74 |     Some may be None, then their distance is NaN
 75 |     """
 76 |     with torch.no_grad():
 77 | 
 78 |         def assemble(descriptors):
 79 |             try:
 80 |                 nfeat = next(filter(lambda x: x is not None, descriptors)).shape[1]
 81 |             except StopIteration:
 82 |                 return None
 83 |             feats = torch.full((len(descriptors), nfeat), np.nan, dtype=torch.float64)
 84 |             for i, f in enumerate(descriptors):
 85 |                 if f is not None:
 86 |                     feats[i, :] = f
 87 |             return feats
 88 | 
 89 |         feats_A = assemble(descriptors_A)
 90 | 
 91 |         if descriptors_B is not None:
 92 |             feats_B = assemble(descriptors_B)
 93 |         else:
 94 |             feats_B = feats_A
 95 |             descriptors_B = descriptors_A
 96 | 
 97 |         if feats_A is None or feats_B is None:
 98 |             return np.full((len(descriptors_A), len(descriptors_B)), np.nan)
 99 |         else:
100 |             return bag_distances(feats_A, feats_B).numpy()
101 | 
102 | 
103 | def bag_euclidean_distances2(x, y=None):
104 |     """
105 |     Input: x is a Nxd matrix
106 |            y is an optional Mxd matirx
107 |     Output: dist is a NxM matrix where dist[i,j] is the square norm between x[i,:] and y[j,:]
108 |             if y is not given then use 'y=x'.
109 |     i.e. dist[i,j] = ||x[i,:]-y[j,:]||^2
110 |     (https://discuss.pytorch.org/t/efficient-distance-matrix-computation/9065/2)
111 |     """
112 |     x_norm2 = (x**2).sum(1).view(-1, 1)
113 |     if y is not None:
114 |         y_t = torch.transpose(y, 0, 1)
115 |         y_norm2 = (y**2).sum(1).view(1, -1)
116 |     else:
117 |         y_t = torch.transpose(x, 0, 1)
118 |         y_norm2 = x_norm2.view(1, -1)
119 | 
120 |     dist = x_norm2 + y_norm2 - 2.0 * torch.mm(x, y_t)
121 |     return torch.clamp(dist, min=0)
122 | 
123 | 
124 | def bag_distances(x, y):
125 |     if x.shape[0] == 1:
126 |         return nnf.pairwise_distance(x, y)
127 |     else:
128 |         # eps because derivative of sqrt at 0 is nan .. but no gradient if vectors identical due to clamping
129 |         return torch.sqrt(bag_euclidean_distances2(x, y) + 1e-8)
130 | 


--------------------------------------------------------------------------------
/deeplytough/matchers/__init__.py:
--------------------------------------------------------------------------------
1 | from .deeply_tough import DeeplyTough
2 | from .tough_officials import ToughOfficials


--------------------------------------------------------------------------------
/deeplytough/matchers/deeply_tough.py:
--------------------------------------------------------------------------------
 1 | import concurrent.futures
 2 | import logging
 3 | import os
 4 | from tqdm.autonotebook import tqdm
 5 | from .pocket_matcher import PocketMatcher
 6 | from engine.predictor import load_model, match_precomputed_point_pairs, load_and_precompute_point_feats, match_precomputed_points_bipartite
 7 | from misc.utils import center_from_pdb_file
 8 | 
 9 | logger = logging.getLogger(__name__)
10 | 
11 | 
12 | class DeeplyTough(PocketMatcher):
13 |     """
14 |     Pocket matching with DeeplyTough, with precomputing of descriptors.
15 |     """
16 | 
17 |     def __init__(self, model_dir, device='cpu', batch_size=30, nworkers=1):
18 |         """
19 |         """
20 |         self.model_dir = model_dir
21 |         self.device = device
22 |         self.batch_size = batch_size
23 |         self.nworkers = nworkers
24 |         self.model, self.args = load_model(model_dir, device)
25 | 
26 |     def precompute_descriptors(self, entries):
27 |         """
28 |         Precompute pocket descriptors/features.
29 | 
30 |         :param entries: List of entries. Required keys: `protein`, `pocket`.
31 |         :return: entries but with `descriptor` keys.
32 |         """
33 | 
34 |         pdb_list, point_list = [], []
35 | 
36 |         with concurrent.futures.ProcessPoolExecutor() as executor:
37 |             jobs = [executor.submit(center_from_pdb_file, entry['pocket']) for entry in entries]
38 |             for job, entry in tqdm(zip(jobs, entries)):
39 |                 center = job.result()
40 |                 if center is not None:
41 |                     pdb_list.append(entry)
42 |                     point_list.append([center])
43 |                 else:
44 |                     logger.warning('Pocket not found, skipping: ' + os.path.basename(entry['pocket']))
45 | 
46 |         feats = load_and_precompute_point_feats(self.model, self.args, pdb_list, point_list, self.device, self.nworkers, self.batch_size)
47 |         for entry, feat in zip(pdb_list, feats):
48 |             entry['descriptor'] = feat
49 |         return pdb_list
50 | 
51 |     def pair_match(self, entry_pairs):
52 |         """
53 |         Computes matches between given pairs of pockets.
54 | 
55 |         :param entry_pairs: List of tuples. Required keys: `descriptors`.
56 |         :return: np.array, score vector (negative distance)
57 |         """
58 | 
59 |         featslist_A = [entry['descriptor'] for entry, _ in entry_pairs]
60 |         featslist_B = [entry['descriptor'] for _, entry in entry_pairs]
61 |         distances = match_precomputed_point_pairs(featslist_A, featslist_B)
62 |         return -distances
63 | 
64 |     def complete_match(self, entries):
65 | 
66 |         featslist = [entry['descriptor'] for entry in entries]
67 |         distances = match_precomputed_points_bipartite(featslist, None)
68 |         return -distances
69 | 
70 |     def bipartite_match(self, entries_a, entries_b):
71 | 
72 |         featslist_A = [entry['descriptor'] for entry in entries_a]
73 |         featslist_B = [entry['descriptor'] for entry in entries_b]
74 |         distances = match_precomputed_points_bipartite(featslist_A, featslist_B)
75 |         return -distances
76 | 


--------------------------------------------------------------------------------
/deeplytough/matchers/pocket_matcher.py:
--------------------------------------------------------------------------------
 1 | from abc import abstractmethod
 2 | 
 3 | 
 4 | class PocketMatcher(object):
 5 |     """
 6 |     Base class for pocket matcher
 7 |     """
 8 | 
 9 |     @abstractmethod
10 |     def bipartite_match(self, entries_a, entries_b):
11 |         """
12 |         Computes all matches between pockets from `entries_a` and pockets from `entries_b`.
13 | 
14 |         :param entries_a: List of dicts. Required keys: `protein`, `pocket`.
15 |         :param entries_b: List of dicts. Required keys: `protein`, `pocket`.
16 |         :return: np.array, score matrix
17 |         """
18 |         raise NotImplementedError
19 | 
20 |     @abstractmethod
21 |     def pair_match(self, entry_pairs):
22 |         """
23 |         Computes matches between given pairs of entries.
24 | 
25 |         :param entry_pairs: List of tuples of dicts. Required keys: `protein`, `pocket`.
26 |         :return: np.array, score vector
27 |         """
28 |         raise NotImplementedError
29 | 
30 |     @abstractmethod
31 |     def complete_match(self, entries):
32 |         """
33 |         Computes all matches between given `entries`.
34 | 
35 |         :param entries: List of dicts. Required keys: `protein`, `pocket`.
36 |         :return: np.array, score matrix
37 |         """
38 |         raise NotImplementedError
39 | 


--------------------------------------------------------------------------------
/deeplytough/matchers/tough_officials.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import os
 3 | from .pocket_matcher import PocketMatcher
 4 | 
 5 | 
 6 | class ToughOfficials(PocketMatcher):
 7 |     """
 8 |     Return the precomputed results for several methods in official Tough dataset repo
 9 |     """
10 | 
11 |     def __init__(self, alg_name, score_column):
12 |         self.scores = {}
13 |         for cls in ['positive', 'negative']:
14 |             with open(os.path.join(
15 |                     os.environ.get('STRUCTURE_DATA_DIR'), 'TOUGH-M1', f'{alg_name}-TOUGH-M1_{cls}.score')) as f:
16 |                 for line in f.readlines():
17 |                     s = line.split()
18 |                     self.scores[s[0] + s[1]] = float(s[score_column])
19 | 
20 |     def pair_match(self, entry_pairs):
21 | 
22 |         scores = np.full((len(entry_pairs)), np.nan)
23 | 
24 |         for i, (entry_a, entry_b) in enumerate(entry_pairs):
25 |             score = self.scores.get(entry_a['code5'] + entry_b['code5'], None)
26 |             if score is None:
27 |                 score = self.scores.get(entry_a['code5'] + entry_b['code5'], None)
28 |             assert score is not None
29 |             scores[i] = score
30 | 
31 |         return scores
32 | 
33 | 
34 | 


--------------------------------------------------------------------------------
/deeplytough/misc/cc_ligands.py:
--------------------------------------------------------------------------------
 1 | stabilisers = {
 2 |     "B3P", "PGE", "6JZ", "15P", "PE3", "XPE", "7PE", "M2M", "13P",
 3 |     "3PP", "PX4", "3OL", "OC9", "AE3", "6JZ", "XPE", "211", "ODI",
 4 |     "DIA", "PG5", "CXE", "ME2", "P4G", "TOE", "PG5", "PE8", "ZPG",
 5 |     "PE3", "MXE"
 6 | }
 7 | 
 8 | excipients = {
 9 |     "SO4", "SUL", " CL", " BR", " CA", " MG", " NI", " MN", " CU", "PO4",
10 |     " CD", "NH4", " CO", " NA", "  K", " ZN", " FE", "AZI", "A", "Ad", "C",
11 |     "Cd", "CD", "CD2", "G", "Gd", "T", "Td", "A", "Ar", "Cr", "G", "Gr",
12 |     "U", "Ur", "YG", "I", "Ir", "CR", "CR2", "CR3", "CAC", "CO2", "CO3",
13 |     "CYN", "FS4", "MO6", "NCO", "NO3", "SCN", "SF4", " SE", " PB", "AU",
14 |     "AU1", "AU3", "BR", "BR1", "CA", "CA2", "CL", "CL1", "CMP", "CO", "CO3",
15 |     "CPR", "CS", "CS1", "CU", "CU1", "CU2", "AG", "AG1", "AL", "AL3", "F",
16 |     "F1", "FE", "FE2", "FE3", "IR", "IR3", "K", "K1", "KR", "FUC", "MAN",
17 |     "GAL", "MAL", "NAG", "GOL", "MPD", "BGC", "PEG", "EDO", "GLC", "PG4",
18 |     "BOG", "HTO", "ACX", "BMA", "FUC-a-L", "GAL-b-D", "GLC-b-D",  "GCU",
19 |     "GCU-b-D", "CEG", "CEG-b-D", "MAN-b-D", "NAG-b-D", "RIB", "FRC", "FRU",
20 |     "XYS", "XLS", "C8E", "CE9", "CRY", "DOX", "EGL", "F6P", "NDG", "NGA",
21 |     "P6G", "SIA", "SUC", "XYS", "1PE", "OLC", "POP", "MES", "EPE", "PYR",
22 |     "GLC", "CIT", "FLC", "TAR", "HC4", "MYR", "HED", "DTT", "BME", "TRS",
23 |     "MPD", "ABA", "ACE", "ACT", "CME", "CSD", "CSO", "DMS", "EOH", "FMT",
24 |     "GTT", "HED", "IMD", "IOH", "IPA", "LDA", "LLP", "MYR", "PEP", "PYR",
25 |     "PXY", "OXE", "TMT", "TMZ", "2CV", "PLQ", "TAM", "1PG", "12P", "XP4",
26 |     "PL3", "PE4", "PEU", "MPG", "B8M", "BOM", "B7M", "2PE", "STE", "DME",
27 |     "PLM", "PG0", "PE5", "PG6", "P33", "HEZ", "F23", "DTV", "SDS", "DTU",
28 |     "DTD", "MRD", "MRY", "P33", "BU1", "LHG", "D10", "OCT", "LI1", "ETE",
29 |     "TZZ", "DEP", "DKA", "OLA", "MRD", "ACD", "MLR", "POG", "BTB", "PC1",
30 |     "ACY", " DT", "3GD", "MAE", "CA3", "144", "CP",  "0KA", "A71", "UVW",
31 |     "BET", "PBU", "UAP", "SER", "CDL", "CEY", "LMN", "J7Z", "DA",  "SIN",
32 |     "  I", "PLC", "BME-BME", "FNE", "FUM", "MAK", " CP", "PAE", "DTL", "HLT",
33 |     "ASC", "FPP", "FII", "D1D", "PCT", "TTN", "HDA", "EDO-EDO", "PGA",
34 |     "XXD", "INS", "217", "BHL", "16D", "HSE", "OPE", "HCS", "SOR", "SKM",
35 |     "KIV", "FCN", "TRA", "TRC", "MTL", "MZP", "KDG", "DHK"
36 | }
37 | 
38 | saccharide = {
39 |     "NAG", "MAN", "BMA", "FUC", "GAL", "BGC", "GLC", "NDG", "BOG", "SUC", "XYP", "FUL",
40 |     "MAL", "GLA", "LMT", "A2G", "NGA", "F6P", "XYS", "LAT", "MMA", "DMU", "BNG", "RAM",
41 |     "KDO", "CBI", "PRP", "TRE", "SGN", "FBP", "BDP", "IDS", "FRU", "ACR", "CAP", "AHR",
42 |     "GCU", "DGD", "G6P", "ADA", "BCD", "LBT", "G2F", "S6P", "LMU", "IPT", "GCS", "FDP",
43 |     "CTT", "CBS", "BHG", "5RP", "SGC", "RIB", "KDA", "SCR", "ORP", "MTT", "G6D", "ASG",
44 |     "RIP", "NBG", "MAG", "GLO", "FCA", "GMH", "G1P", "AMU", "R5P", "GLP", "6PG", "XMM",
45 |     "MGL", "BEM", "ACX", "XLS", "NAA", "GCD", "CTR", "16G", "NGS", "MBG", "LGU", "BGL",
46 |     "PA5", "MFU", "GTR", "FU4", "2FP", "SGA", "RER", "GN1", "GLD", "GCV", "G16", "ARA",
47 |     "AGL", "XUL", "RNS", "MAV", "HSX", "GYP", "GCO", "ASO", "ARB", "AAL", "149", "TDG",
48 |     "RUB", "R1P", "N9S", "MAW", "M6P", "LGC", "IDY", "IDR", "GP1", "GAD", "DAF", "CTO",
49 |     "BXP", "AGH", "ABE", "2FG", "147", "SHG", "PSJ", "MXY", "M8C", "GLF", "GCN", "FUB",
50 |     "FSI", "FCB", "E4P", "DFX", "DDA", "CT3", "CBK", "10M", "TCB", "QPS", "GUP", "GS1",
51 |     "GL0", "G4D", "FUD", "BDR", "ARE", "AIG", "SSG", "RUU", "RAF", "NTP", "NG6", "MAB",
52 |     "MA3", "GP4", "GC4", "ERI", "ERE", "DVC", "DQR", "DLG", "DDL", "DAG", "BXY", "B9D",
53 |     "AOS", "2M4", "2DG", "1GL", "0MK", "Z6J", "XBP", "SUS", "SIO", "RG1", "QDK", "NTO",
54 |     "NHF", "NGC", "MN0", "MFB", "MDM", "MDA", "LOG", "LB2", "LAK", "KO2", "IDU", "IDG",
55 |     "HSQ", "GCT", "G4S", "FXP", "FCT", "DR2", "DNO", "BXX", "BDG", "B4G", "B2G", "AXR",
56 |     "ABL", "3CM", "2F8", "X2F", "UCD", "TYV", "TOC", "TOA", "TAG", "SUP", "SOL", "SOE",
57 |     "SI3", "SGD", "ROR", "RF5", "REL", "RAT", "R2B", "PTQ", "PNW", "PNG", "NGY", "NGE",
58 |     "NG1", "MRP", "MLB", "LSM", "LCN", "LAI", "L6S", "KBG", "HSY", "GU9", "GU8", "GU6",
59 |     "GU5", "GU4", "GU3", "GU2", "GU1", "GU0", "GNX", "GLS", "G6S", "G3I", "FFC", "F1P",
60 |     "EPG", "EBG", "DT6", "DSR", "DR4", "DR3", "DOM", "DGS", "CRA", "CR6", "CEG", "CDR",
61 |     "BMX", "BBK", "B6D", "B0D", "AXP", "ARI", "AFP", "ABF", "7JZ", "5SP", "5GF", "3SA",
62 |     "2GL", "289", "ZDM", "YO5", "XXR", "XLF", "XDP", "X5S", "X4S", "X1X", "VG1", "UDC",
63 |     "TMX", "TMR", "TM9", "TM6", "TM5", "T6P", "SHB", "SG7", "SG6", "SG5", "SG4", "SF9",
64 |     "SF6", "SDD", "SA0", "RST", "RPA", "RNT", "RGG", "RBL", "RB5", "RAO", "RAE", "R1X",
65 |     "QV4", "PSV", "PNA", "P6P", "P53", "P3M", "OX2", "OPM", "NYT", "NXD", "NTF", "NM9",
66 |     "NM6", "NGF", "NGB", "N1L", "MUG", "MMN", "MG5", "MDP", "MCU", "MAT", "MA2", "MA1",
67 |     "M3M", "M1F", "LXZ", "LXB", "LVZ", "LTM", "LPK", "LOX", "LM2", "LDY", "LAG", "KTU",
68 |     "KO1", "KFN", "KDM", "ISL", "IN1", "IDX", "IAB", "HSJ", "HSH", "HSG", "HS2", "HMS",
69 |     "HDL", "H2P", "GYV", "GUZ", "GUF", "GTK", "GTH", "GQ4", "GQ2", "GQ1", "GPM", "GPH",
70 |     "GLT", "GLG", "GL9", "GL7", "GL6", "GL5", "GL2", "GIV", "GFP", "GFG", "GE1", "GCW",
71 |     "GC1", "GAC", "FIX", "FDQ", "F1X", "EJT", "EGA", "EBQ", "EAG", "E5G", "DRI", "DQQ",
72 |     "DP5", "DLF", "DIG", "DFR", "DEL", "D6G", "CR1", "CGF", "C5X", "C4X", "C3X", "BXF",
73 |     "BGS", "BGP", "B8D", "B16", "AOG", "ALL", "AFR", "AFO", "AFD", "AF1", "ACG", "ABD",
74 |     "ABC", "AAO", "A1Q", "6SA", "5MM", "5DI", "50A", "4NN", "4CQ", "48Z", "46Z", "46M",
75 |     "46D", "3ZW", "3MG", "3MF", "3LR", "3FM", "3DY", "3DO", "34V", "32O", "2M5", "2FL",
76 |     "293", "291", "27C", "26Y", "26W", "26V", "26R", "26Q", "26M", "1S3", "1LL", "1JB",
77 |     "1GN", "1BW", "18Y", "18T", "15L", "14T", "0YT", "0XY", "0V4", "0TS", "0NZ", "0BD",
78 |     "045"
79 | }
80 | 
81 | ignore_list = set()
82 | ignore_list.update(excipients)
83 | ignore_list.update(saccharide)
84 | ignore_list.update(stabilisers)
85 | 


--------------------------------------------------------------------------------
/deeplytough/misc/ligand_extract.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | import Bio.PDB as PDB
  4 | import numpy as np
  5 | 
  6 | from misc.cc_ligands import ignore_list
  7 | from misc.utils import NonUniqueStructureBuilder
  8 | 
  9 | 
 10 | def residue_dist_to_ligand(protein_residue, ligand_residue):
 11 |     """Returns distance from the protein to the closest non-hydrogen ligand atom. This seems to be consistent with
 12 |     what pdbbind is using"""
 13 |     distances = []
 14 |     for lig_atom in ligand_residue.child_list:
 15 |         if not lig_atom.element or lig_atom.element[0] != 'H':
 16 |             for prot_atom in protein_residue.child_list:
 17 |                 diff_vector = prot_atom.coord - lig_atom.coord
 18 |                 distances.append(np.sqrt(np.sum(diff_vector * diff_vector)))
 19 |     return min(distances) if len(distances) else 1e10
 20 | 
 21 | 
 22 | class NearLigandSelect(PDB.Select):
 23 |     def __init__(self, distance_threshold, ligand_residue, keep_lig_in_site, keep_water, keep_other_hets=False):
 24 |         self.threshold = distance_threshold
 25 |         self.ligand_res = ligand_residue
 26 |         self.keep_water = keep_water
 27 |         self.keep_lig = keep_lig_in_site
 28 |         self.keep_other_hets = keep_other_hets
 29 | 
 30 |     def accept_residue(self, residue):
 31 |         if residue == self.ligand_res:
 32 |             return self.keep_lig
 33 |         elif not self.keep_other_hets and residue.get_id()[0].strip() != '':
 34 |             return False
 35 |         else:
 36 |             if not self.keep_water:
 37 |                 if residue.resname == 'HOH':
 38 |                     return False
 39 |             dist = residue_dist_to_ligand(residue, self.ligand_res)
 40 |             return dist < self.threshold
 41 | 
 42 | 
 43 | class LigandOnlySelect(PDB.Select):
 44 |     def __init__(self, ligand_residue):
 45 |         self.ligand_residue = ligand_residue
 46 | 
 47 |     def accept_residue(self, residue):
 48 |         if residue == self.ligand_residue:
 49 |             return True  # change this to False if you don't want the ligand
 50 |         else:
 51 |             return False
 52 | 
 53 | 
 54 | class ChainOnlySelect(PDB.Select):
 55 |     def accept_residue(self, residue):
 56 |         return residue.get_id()[0].strip() == ''
 57 | 
 58 | 
 59 | def filter_unwanted_het_ids(het_list):
 60 |     het_list = filter(lambda x: x not in ignore_list, het_list)
 61 |     return list(het_list)
 62 | 
 63 | 
 64 | def get_het_residues_from_pdb(model, remove_duplicates=False, min_lig_atoms=-1, allowed_names=None):
 65 |     resi_list = []
 66 |     res_names = []
 67 |     for res in model.get_residues():
 68 |         if res._id[0].startswith('H_'):
 69 |             if remove_duplicates and res.resname in res_names:
 70 |                 continue
 71 |             if min_lig_atoms > 0 and len(list(res.get_atoms())) < min_lig_atoms:
 72 |                 continue
 73 |             if allowed_names and res.resname not in allowed_names:
 74 |                 continue
 75 |             resi_list.append(res)
 76 |             res_names.append(res.resname)
 77 |     wanted_res_names = filter_unwanted_het_ids(set(res_names))
 78 |     wanted_resi_list = []
 79 |     for res in resi_list:
 80 |         if res.resname in wanted_res_names:
 81 |             wanted_resi_list.append(res)
 82 |     return wanted_resi_list
 83 | 
 84 | 
 85 | class PocketFromLigandDetector:
 86 |     """
 87 |     Extracts pockets around a ligand (which is either part of the input PDB file, or already separated in
 88 |     a different file).
 89 |     """
 90 |     def __init__(self, distance_threshold=8.0, ligand_fname_pattern=None, include_het_resname=True,
 91 |                  save_clean_structure=False, keep_other_hets=True, min_lig_atoms=-1, allowed_lig_names=None):
 92 |         """
 93 | 
 94 |         :param distance_threshold: Max distance between residue and ligand
 95 |         :param ligand_fname_pattern: A tuple (old, new) used to obtain ligand's file name by replacing `old` with `new`
 96 |         """
 97 | 
 98 |         self.distance_threshold = distance_threshold
 99 |         self.ligand_fname_pattern = ('', '') if ligand_fname_pattern is None else ligand_fname_pattern
100 |         self.keep_water = False
101 |         self.include_het_resname = include_het_resname
102 |         self.save_clean_structure = save_clean_structure
103 |         self.keep_other_hets = keep_other_hets
104 |         self.min_lig_atoms = min_lig_atoms
105 |         self.allowed_lig_names = allowed_lig_names
106 | 
107 |     def run_one(self, pdb_path, output_dir):
108 | 
109 |         # parse structure object (permissive flag but let's not lose any atoms by using a custom builder)
110 |         parser = PDB.PDBParser(PERMISSIVE=1, QUIET=True, structure_builder=NonUniqueStructureBuilder())
111 |         # only consider the first model in the pdb file
112 |         structure = parser.get_structure('X', pdb_path)
113 |         model = structure[0]
114 | 
115 |         # Get ligand (het) to extract the site around
116 |         if self.ligand_fname_pattern[0]:
117 |             ligand_path = pdb_path.replace(self.ligand_fname_pattern[0], self.ligand_fname_pattern[1])
118 |             ligand = parser.get_structure('L', ligand_path)
119 |             het_list = list(ligand.get_residues())
120 |         else:
121 |             # get het entries of interest (filter using static dictionaries)
122 |             het_list = get_het_residues_from_pdb(model, remove_duplicates=False, min_lig_atoms=self.min_lig_atoms,
123 |                                                  allowed_names=self.allowed_lig_names)
124 | 
125 |         # Setup a PDB writer and load protein
126 |         io = PDB.PDBIO()
127 |         io.set_structure(model)
128 | 
129 |         # create output directory and split input pdb_path
130 |         os.makedirs(output_dir, exist_ok=True)
131 |         name, ext = os.path.basename(pdb_path).rsplit('.', 1)
132 | 
133 |         for n, het in enumerate(het_list):
134 | 
135 |             # Set name of output site file
136 |             if self.include_het_resname:
137 |                 site_name = f"{name}_site_{n+1}_{het.resname}.{ext}"
138 |             else:
139 |                 site_name = f"{name}_site_{n+1}.{ext}"
140 |             fname = os.path.join(output_dir, site_name)
141 | 
142 |             io.save(fname, NearLigandSelect(self.distance_threshold, het, keep_lig_in_site=False,
143 |                                             keep_water=self.keep_water, keep_other_hets=self.keep_other_hets))
144 |             if not self.ligand_fname_pattern[0]:
145 |                 if self.include_het_resname:
146 |                     lig_name = f"{name}_lig_{n+1}_{het.resname}.{ext}"
147 |                 else:
148 |                     lig_name = f"{name}_lig_{n+1}.{ext}"
149 |                 io.save(os.path.join(output_dir, lig_name), LigandOnlySelect(het))
150 | 
151 |         if self.save_clean_structure:
152 |             io.save(os.path.join(output_dir, f'{name}_clean.{ext}'), ChainOnlySelect())
153 | 


--------------------------------------------------------------------------------
/deeplytough/misc/utils.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import os
  3 | import subprocess
  4 | import tempfile
  5 | import urllib.request as request
  6 | from functools import lru_cache
  7 | 
  8 | import Bio.PDB as PDB
  9 | import htmd.molecule.molecule as htmdmol
 10 | import htmd.molecule.voxeldescriptors as htmdvox
 11 | import numpy as np
 12 | import requests
 13 | from scipy.spatial import ConvexHull
 14 | from scipy.spatial.qhull import QhullError
 15 | 
 16 | logger = logging.getLogger(__name__)
 17 | 
 18 | 
 19 | def failsafe_hull(coords):
 20 |     """
 21 |     Wrapper of ConvexHull which returns None if hull cannot be computed for given points (e.g. all colinear or too few)
 22 |     """
 23 |     coords = np.array(coords)
 24 |     if coords.shape[0] > 3:
 25 |         try:
 26 |             return ConvexHull(coords)
 27 |         except QhullError as e:
 28 |             if 'hull precision error' not in str(e) and 'input is less than 3-dimensional' not in str(e):
 29 |                 raise e
 30 |     return None
 31 | 
 32 | 
 33 | def hull_centroid_3d(hull):
 34 |     """
 35 |     The centroid of a 3D polytope. Taken over from http://www.alecjacobson.com/weblog/?p=3854 and
 36 |     http://www2.imperial.ac.uk/~rn/centroid.pdf.
 37 |     For >nD ones, https://stackoverflow.com/questions/4824141/how-do-i-calculate-a-3d-centroid
 38 | 
 39 |     :param hull: scipy.spatial.ConvexHull
 40 |     :return:
 41 |     """
 42 |     if hull is None:
 43 |         return None
 44 | 
 45 |     A = hull.points[hull.simplices[:, 0], :]
 46 |     B = hull.points[hull.simplices[:, 1], :]
 47 |     C = hull.points[hull.simplices[:, 2], :]
 48 |     N = np.cross(B-A, C-A)
 49 | 
 50 |     # get consistent outer orientation (compensate for the lack of ordering in scipy's facetes), assume a convex hull
 51 |     M = np.mean(hull.points[hull.vertices, :], axis=0)
 52 |     sign = np.sign(np.sum((A - M) * N, axis=1, keepdims=True))
 53 |     N = N * sign
 54 | 
 55 |     vol = np.sum(N*A)/6
 56 |     centroid = 1/(2*vol)*(1/24 * np.sum(N*((A+B)**2 + (B+C)**2 + (C+A)**2), axis=0))
 57 |     return centroid
 58 | 
 59 | 
 60 | def point_in_hull(point, hull, tolerance=1e-12):
 61 |     # https://stackoverflow.com/questions/16750618/whats-an-efficient-way-to-find-if-a-point-lies-in-the-convex-hull-of-a-point-cl
 62 |     return all((np.dot(eq[:-1], point) + eq[-1] <= tolerance) for eq in hull.equations)
 63 | 
 64 | 
 65 | def structure_to_coord(structure, allow_off_chain=False, allow_hydrogen=False):
 66 |     coords = []
 67 |     for chains in structure:
 68 |         for chain in chains:
 69 |              if allow_off_chain or chain.get_id().strip() != '':
 70 |                 for residue in chain:
 71 |                     for atom in residue:
 72 |                         if allow_hydrogen or atom.get_name()[0] != 'H':
 73 |                             coords.append(atom.get_coord())
 74 |     return np.array(coords)
 75 | 
 76 | 
 77 | class NonUniqueStructureBuilder(PDB.StructureBuilder.StructureBuilder):
 78 |     """This makes PDB more forgiving by being able to load atoms with non-unique names within a residue."""
 79 | 
 80 |     @staticmethod
 81 |     def _number_to_3char_name(n):
 82 |         code = ''
 83 |         for k in range(3):
 84 |             r = n % 36
 85 |             code = chr(ord('A')+r if r < 26 else ord('0')+r-26) + code
 86 |             n = n // 36
 87 |         assert n == 0, 'number cannot fit 3 characters'
 88 |         return code
 89 | 
 90 |     def init_atom(self, name, coord, b_factor, occupancy, altloc, fullname, serial_number=None, element=None):
 91 | 
 92 |         for attempt in range(10000):
 93 |             try:
 94 |                 return super().init_atom(name, coord, b_factor, occupancy, altloc, fullname, serial_number, element)
 95 |             except PDB.PDBExceptions.PDBConstructionException:
 96 |                 name = name[0] + self._number_to_3char_name(attempt)
 97 | 
 98 | 
 99 | def center_from_pdb_file(filepath):
100 |     """ Returns the geometric center of a PDB-file structure """
101 |     parser = PDB.PDBParser(PERMISSIVE=1, QUIET=True, structure_builder=NonUniqueStructureBuilder())
102 |     try:
103 |         pocket = parser.get_structure('pocket', filepath)
104 |     except FileNotFoundError:
105 |         return None
106 |     coords = structure_to_coord(pocket, allow_off_chain=True, allow_hydrogen=True)
107 |     if len(coords) > 3:
108 |         return hull_centroid_3d(failsafe_hull(coords))
109 |     elif len(coords) > 0:
110 |         return np.mean(coords, axis=0)
111 |     else:
112 |         return None
113 | 
114 | 
115 | def remove_water_and_hets(pdb_path: str, output_file: str) -> str:
116 | 
117 |     class NonWaterAndHetsSelect(PDB.Select):
118 |         def accept_residue(self, residue):
119 |             if residue.id[0] != ' ':
120 |                 return False
121 |             elif residue.resname == 'HOH':
122 |                 return False
123 |             return True
124 | 
125 |     parser = PDB.PDBParser(PERMISSIVE=1, QUIET=True, structure_builder=NonUniqueStructureBuilder())
126 |     structure = parser.get_structure('protein', pdb_path)
127 |     model = structure[0]
128 | 
129 |     io = PDB.PDBIO()
130 |     io.set_structure(model)
131 |     io.save(output_file, NonWaterAndHetsSelect())
132 | 
133 | 
134 | def htmd_featurizer(pdb_entries, skip_existing=True):
135 |     """ Ensures than all entries have their HTMD featurization precomputed """
136 |     # - note: this is massively hacky but the data also tends to be quite dirty...
137 | 
138 |     # - Mgltools is Python 2.5 only script destroying Python3 environments, so we have to call another conda env
139 |     # - unaddressed warnings info: http://mgldev.scripps.edu/pipermail/mglsupport/2008-December/000091.html
140 |     # - note: http://autodock.scripps.edu/faqs-help/how-to/how-to-prepare-a-receptor-file-for-autodock4
141 |     # - note: http://mgldev.scripps.edu/pipermail/autodock/2008-April/003946.html
142 |     mgl_command = 'source activate deeplytough_mgltools; pythonsh ' \
143 |                   '$CONDA_PREFIX/MGLToolsPckgs/AutoDockTools/Utilities24/prepare_receptor4.py ' \
144 |                   '-r {} -U nphs_lps_waters -A hydrogens'
145 | 
146 |     for entry in pdb_entries:
147 |         pdb_path = os.path.abspath(entry['protein'])
148 |         npz_path = os.path.abspath(entry['protein_htmd'])
149 |         if skip_existing and os.path.exists(npz_path):
150 |             continue
151 |             
152 |         logger.info(f'Pre-processing {pdb_path} with HTMD...')
153 |         if not os.path.exists(pdb_path):
154 |             logger.error(f'{pdb_path} not found, skipping its pre-preprocessing.')
155 |             continue
156 | 
157 |         output_dir = os.path.dirname(npz_path)
158 |         os.makedirs(output_dir, exist_ok=True)
159 | 
160 |         def compute_channels():
161 |             pdbqt_path = os.path.join(output_dir, os.path.basename(pdb_path)) + 'qt'
162 |             if not os.path.exists(pdbqt_path) and os.path.exists(pdbqt_path.replace('.pdb', '_model1.pdb')):
163 |                 os.rename(pdbqt_path.replace('.pdb', '_model1.pdb'), pdbqt_path)
164 |             mol = htmdmol.Molecule(pdbqt_path)
165 | 
166 |             # this no longer works (2/12/2021 – non trivial fix, replaced with earlier `remove_water_and_hets`
167 |             # mol.filter('protein')
168 | 
169 |             # slaughtered getVoxelDescriptors()
170 |             channels = htmdvox._getAtomtypePropertiesPDBQT(mol)
171 |             sigmas = htmdvox._getRadii(mol)
172 |             channels = sigmas[:, np.newaxis] * channels.astype(float)
173 |             coords = mol.coords[:, :, mol.frame]
174 | 
175 |             np.savez(npz_path, channels=channels, coords=coords)
176 | 
177 |         with tempfile.TemporaryDirectory() as tmpdirname:
178 |             # use biopython to remove all non-protein atoms
179 |             pdb_path_tempdir1 = os.path.join(tmpdirname, os.path.basename(pdb_path))  # same name different dir
180 |             remove_water_and_hets(pdb_path, pdb_path_tempdir1)
181 | 
182 |             # process pdb -> pdbqt (output written to `output_dir`)
183 |             try:
184 |                 subprocess.run(['/bin/bash', '-ic', mgl_command.format(pdb_path_tempdir1)], cwd=output_dir, check=True)
185 |             except Exception as err1:
186 |                 try:
187 |                     # Put input through obabel to handle some problematic formattings, it's parser seems quite robust
188 |                     # (could go directly to pdbqt with `-xr -xc -h` but somehow the partial charges are all zero)
189 |                     with tempfile.TemporaryDirectory() as tmpdirname2:
190 |                         pdb_path_tempdir2 = os.path.join(tmpdirname2, os.path.basename(pdb_path))
191 |                         subprocess.run(['obabel', pdb_path_tempdir1, '-O', pdb_path_tempdir2, '-h'], check=True)
192 |                         subprocess.run(['/bin/bash', '-ic', mgl_command.format(pdb_path_tempdir2)],
193 |                                        cwd=output_dir, check=True)
194 |                 except Exception as err2:
195 |                     logger.exception(err2)
196 |                     continue
197 | 
198 |         # compute channels
199 |         try:
200 |             compute_channels()
201 |         except Exception as err3:
202 |             logger.exception(err3)
203 | 
204 | 
205 | def voc_ap(rec, prec):
206 |     """
207 |     Compute VOC AP given precision and recall.
208 |     Taken from https://github.com/marvis/pytorch-yolo2/blob/master/scripts/voc_eval.py
209 |     Different from scikit's average_precision_score (https://github.com/scikit-learn/scikit-learn/issues/4577)
210 |     """
211 |     # first append sentinel values at the end
212 |     mrec = np.concatenate(([0.], rec, [1.]))
213 |     mpre = np.concatenate(([0.], prec, [0.]))
214 | 
215 |     # compute the precision envelope
216 |     for i in range(mpre.size - 1, 0, -1):
217 |         mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
218 | 
219 |     # to calculate area under PR curve, look for points
220 |     # where X axis (recall) changes value
221 |     i = np.where(mrec[1:] != mrec[:-1])[0]
222 | 
223 |     # and sum (\Delta recall) * prec
224 |     ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
225 |     return ap
226 | 
227 | 
228 | @lru_cache()
229 | def pdb_check_obsolete(pdb_code):
230 |     """ Check the status of a pdb, if it is obsolete return the superceding PDB ID else return None """
231 |     try:
232 |         r = requests.get(f'https://www.ebi.ac.uk/pdbe/api/pdb/entry/status/{pdb_code}').json()
233 |     except:
234 |         logger.info(f"Could not check obsolete status of {pdb_code}")
235 |         return None
236 |     if r[pdb_code][0]['status_code'] == 'OBS':
237 |         pdb_code = r[pdb_code][0]['superceded_by'][0]
238 |         return pdb_code
239 |     else:
240 |         return None
241 | 
242 | 
243 | class RcsbPdbClusters:
244 |     def __init__(self, identity=30):
245 |         self.cluster_dir = os.environ.get('STRUCTURE_DATA_DIR')
246 |         self.identity = identity
247 |         self.clusters = {}
248 |         self._fetch_cluster_file()
249 | 
250 |     def _download_cluster_sets(self, cluster_file_path):
251 |         os.makedirs(os.path.dirname(cluster_file_path), exist_ok=True)
252 |         # Note that the files changes frequently as do the ordering of cluster within
253 |         request.urlretrieve(f'https://cdn.rcsb.org/resources/sequence/clusters/bc-{self.identity}.out', cluster_file_path)
254 | 
255 |     def _fetch_cluster_file(self):
256 |         """ load cluster file if found else download and load """
257 |         cluster_file_path = os.path.join(self.cluster_dir, f"bc-{self.identity}.out")
258 |         logging.info(f"cluster file path: {cluster_file_path}")
259 |         if not os.path.exists(cluster_file_path):
260 |             logging.warning("Cluster definition not found, will download a fresh one.")
261 |             logging.warning("However, this will very likely lead to silent incompatibilities with any old 'pdbcode_mappings.pickle' files! Please better remove those manually.")
262 |             self._download_cluster_sets(cluster_file_path)
263 | 
264 |         for n, line in enumerate(open(cluster_file_path, 'rb')):
265 |             for id in line.decode('ascii').split():
266 |                 self.clusters[id] = n
267 | 
268 |     def get_seqclust(self, pdbCode, chainId, check_obsolete=True):
269 |         """ Get sequence cluster ID for a pdbcode chain using RCSB mmseq2/blastclust predefined clusters """
270 |         query_str = f"{pdbCode.upper()}_{chainId.upper()}"  # e.g. 1ATP_I
271 |         seqclust = self.clusters.get(query_str, 'None')
272 |         
273 |         if check_obsolete and seqclust == 'None':
274 |             superceded = pdb_check_obsolete(pdbCode)
275 |             if superceded is not None:
276 |                 logging.info(f"Assigning cluster for obsolete entry via superceding: {pdbCode}->{superceded} {chainId}")
277 |                 return self.get_seqclust(superceded, chainId, check_obsolete=False)  # assumes chain is same in superceding entry
278 |         if seqclust == 'None':
279 |             logging.info(f"unable to assign cluster to {pdbCode}{chainId}")
280 |         return seqclust
281 | 


--------------------------------------------------------------------------------
/deeplytough/scripts/custom_evaluation.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import logging
 3 | import os
 4 | import pickle
 5 | 
 6 | from datasets import Custom
 7 | from matchers import DeeplyTough
 8 | 
 9 | logging.basicConfig(level=logging.INFO)
10 | logger = logging.getLogger(__name__)
11 | 
12 | 
13 | def get_cli_args():
14 |     parser = argparse.ArgumentParser()
15 |     parser.add_argument('--dataset_subdir', type=str, default='custom', help='Dataset directory within $STRUCTURE_DATA_DIR')
16 |     parser.add_argument('--output_dir', type=str, help='Output directory for result pickle')
17 |     parser.add_argument('--alg', type=str, default='DeeplyTough', help='Algorithm type')
18 |     parser.add_argument('--net', type=str, default='', help='DeeplyTough network filepath')
19 |     parser.add_argument('--device', type=str, default='cpu', help='cpu or cuda:0')
20 |     parser.add_argument('--nworkers', default=1, type=int, help='Num subprocesses to use for data loading. 0 means that the data will be loaded in the main process')
21 |     parser.add_argument('--batch_size', default=30, type=int)
22 |     parser.add_argument('--db_preprocessing', default=1, type=int, help='Bool: whether to run preprocessing for the dataset')
23 | 
24 |     return parser.parse_args()
25 | 
26 | 
27 | def main():
28 |     args = get_cli_args()
29 | 
30 |     database = Custom(args.dataset_subdir)
31 | 
32 |     if args.db_preprocessing:
33 |         database.preprocess_once()
34 | 
35 |     # Retrieve structures
36 |     entries = database.get_structures()
37 | 
38 |     # Get matcher and perform any necessary pre-compututations
39 |     if args.alg == 'DeeplyTough':
40 |         matcher = DeeplyTough(args.net, device=args.device, batch_size=args.batch_size, nworkers=args.nworkers)
41 |         entries = matcher.precompute_descriptors(entries)
42 |     else:
43 |         raise NotImplementedError
44 | 
45 |     # Evaluate pocket pairs
46 |     results = database.evaluate_matching(entries, matcher)
47 |     results['benchmark_args'] = args
48 | 
49 |     # Format output file names
50 |     fname = f"Custom-{args.alg}-{os.path.basename(os.path.dirname(args.net))}.pickle"
51 |     fname_txt = os.path.join(args.output_dir, fname.replace('.pickle', '.csv'))
52 | 
53 |     # Make sure output directory exists
54 |     os.makedirs(args.output_dir, exist_ok=True)
55 | 
56 |     # Write pickle
57 |     pickle.dump(results, open(os.path.join(args.output_dir, fname), 'wb'))
58 | 
59 |     # Write csv results
60 |     with open(fname_txt, 'w') as f:
61 |         for p, s in zip(results['pairs'], results['scores']):
62 |             f.write(f"{p[0]['key']},{p[1]['key']},{s}\n")
63 | 
64 |     # Done!
65 |     print(f'Evaluation finished, see {fname_txt}')
66 | 
67 | 
68 | if __name__ == '__main__':
69 |     main()
70 | 


--------------------------------------------------------------------------------
/deeplytough/scripts/prospeccts_benchmark.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import logging
 3 | import os
 4 | import pickle
 5 | 
 6 | from datasets import Prospeccts
 7 | from matchers import DeeplyTough
 8 | 
 9 | logging.basicConfig(level=logging.INFO)
10 | logger = logging.getLogger(__name__)
11 | 
12 | 
13 | def get_cli_args():
14 |     parser = argparse.ArgumentParser()
15 |     parser.add_argument('--output_dir', type=str, help='Output directory for result pickle')
16 |     parser.add_argument('--alg', type=str, default='DeeplyTough', help='Algorithm type')
17 |     parser.add_argument('--net', type=str, default='', help='DeeplyTough network filepath')
18 |     parser.add_argument('--device', type=str, default='cpu', help='cpu or cuda:0')
19 |     parser.add_argument('--nworkers', default=1, type=int, help='Num subprocesses to use for data loading. 0 means that the data will be loaded in the main process')
20 |     parser.add_argument('--batch_size', default=30, type=int)
21 |     parser.add_argument('--dbname', type=str, default='all', help='Prospeccts dataset name (P1 .. P7, or all)')
22 |     parser.add_argument('--db_preprocessing', default=0, type=int, help='Bool: whether to run preprocessing for the dataset')
23 | 
24 |     return parser.parse_args()
25 | 
26 | 
27 | def main():
28 |     args = get_cli_args()
29 | 
30 |     dbnames = args.dbname if args.dbname != 'all' else Prospeccts.dbnames
31 | 
32 |     for dbname in dbnames:
33 |         database = Prospeccts(dbname)
34 | 
35 |         if args.db_preprocessing:
36 |             database.preprocess_once()
37 | 
38 |         # Retrieve structures
39 |         entries = database.get_structures()
40 | 
41 |         # Get matcher and perform any necessary pre-computations
42 |         if args.alg == 'DeeplyTough':
43 |             matcher = DeeplyTough(args.net, device=args.device, batch_size=args.batch_size, nworkers=args.nworkers)
44 |             entries = matcher.precompute_descriptors(entries)
45 |         else:
46 |             raise NotImplementedError
47 | 
48 |         # Evaluate pocket pairs
49 |         results = database.evaluate_matching(entries, matcher)
50 |         results['benchmark_args'] = args
51 | 
52 |         # Format output file names
53 |         fname = f"Prospeccts-{args.alg}-{os.path.basename(os.path.dirname(args.net))}-{dbname}.pickle"
54 | 
55 |         # Make sure output directory exists
56 |         os.makedirs(args.output_dir, exist_ok=True)
57 | 
58 |         # Write pickle
59 |         pickle.dump(results, open(os.path.join(args.output_dir, fname), 'wb'))
60 | 
61 |         # Write csv results
62 |         with open(os.path.join(args.output_dir, fname.replace('.pickle','.csv')), 'w') as f:
63 |             for p, s in zip(results['pairs'], results['scores']):
64 |                 f.write(f"{p[0]['code5']},{p[1]['code5']},{s}\n")
65 | 
66 |         # Done!
67 |         print(f"Testing finished on {dbname}, AUC = {results['auc']}")
68 | 
69 | 
70 | if __name__ == '__main__':
71 |     main()
72 | 


--------------------------------------------------------------------------------
/deeplytough/scripts/toughm1_benchmark.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import logging
 3 | import os
 4 | import pickle
 5 | 
 6 | from datasets import ToughM1
 7 | from matchers import DeeplyTough, ToughOfficials
 8 | 
 9 | logging.basicConfig(level=logging.INFO)
10 | logger = logging.getLogger(__name__)
11 | 
12 | 
13 | def get_cli_args():
14 |     parser = argparse.ArgumentParser()
15 |     parser.add_argument('--output_dir', type=str, help='Output directory for result pickle')
16 |     parser.add_argument('--alg', type=str, default='DeeplyTough', help='Algorithm type')
17 |     parser.add_argument('--net', type=str, default='', help='DeeplyTough network filepath')
18 |     parser.add_argument('--device', type=str, default='cpu', help='cpu or cuda:0')
19 |     parser.add_argument('--nworkers', default=1, type=int, help='Num subprocesses to use for data loading. 0 means that the data will be loaded in the main process')
20 |     parser.add_argument('--batch_size', default=30, type=int)
21 |     parser.add_argument('--cvfold', default=0, type=int, help='Fold left-out for testing in leave-one-out setting')
22 |     parser.add_argument('--cvseed', default=7, type=int, help='Dataset shuffling seed')
23 |     parser.add_argument('--num_folds', default=5, type=int, help='Num folds')
24 |     parser.add_argument('--db_split_strategy', default='seqclust', help="pdb_folds|uniprot_folds|seqclust")
25 |     parser.add_argument('--db_preprocessing', default=0, type=int, help='Bool: if 1, run preprocessing for the dataset')
26 | 
27 |     return parser.parse_args()
28 | 
29 | 
30 | def main():
31 |     args = get_cli_args()
32 | 
33 |     database = ToughM1()
34 | 
35 |     if args.db_preprocessing:
36 |         database.preprocess_once()
37 | 
38 |     # Retrieve structures
39 |     _, entries = database.get_structures_splits(args.cvfold, strategy=args.db_split_strategy,
40 |                                                 n_folds=args.num_folds, seed=args.cvseed)
41 | 
42 |     # Get matcher and perform any necessary pre-computations
43 |     if args.alg == 'DeeplyTough':
44 |         matcher = DeeplyTough(args.net, device=args.device, batch_size=args.batch_size, nworkers=args.nworkers)
45 |         if matcher.args.seed != args.cvseed or matcher.args.cvfold != args.cvfold:
46 |             logger.warning('Likely not evaluating on the test set for this network')
47 |         entries = matcher.precompute_descriptors(entries)
48 |     elif args.alg == 'OfiGlosa':
49 |         matcher = ToughOfficials('G-LoSA', 2)
50 |     elif args.alg == 'OfiApoc':
51 |         matcher = ToughOfficials('APoc', 2)
52 |     elif args.alg == 'OfiSiteEngine':
53 |         matcher = ToughOfficials('SiteEngine', 3)
54 |     else:
55 |         raise NotImplementedError
56 | 
57 |     # Evaluate pocket pairs
58 |     results = database.evaluate_matching(entries, matcher)
59 |     results['benchmark_args'] = args
60 | 
61 |     # Format output file names
62 |     fname = f'ToughM1-{args.alg}-{os.path.basename(os.path.dirname(args.net))}.pickle'
63 | 
64 |     # Make sure output directory exists
65 |     os.makedirs(args.output_dir, exist_ok=True)
66 | 
67 |     # Write pickle
68 |     pickle.dump(results, open(os.path.join(args.output_dir, fname), 'wb'))
69 | 
70 |     # Write csv results
71 |     with open(os.path.join(args.output_dir, fname.replace('.pickle', '.csv')), 'w') as f:
72 |         for p, s in zip(results['pairs'], results['scores']):
73 |             f.write(f"{p[0]['code5']},{p[1]['code5']},{s}\n")
74 | 
75 |     # Done!
76 |     print(f"Testing finished, AUC = {results['auc']}")
77 | 
78 | 
79 | if __name__ == '__main__':
80 |     main()
81 | 


--------------------------------------------------------------------------------
/deeplytough/scripts/train.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import ast
  3 | import datetime
  4 | import logging
  5 | import math
  6 | import os
  7 | import random
  8 | import sys
  9 | import time
 10 | 
 11 | import numpy as np
 12 | import torch
 13 | import torch.nn.functional as nnf
 14 | import torch.optim as optim
 15 | from sklearn.preprocessing import StandardScaler
 16 | from tensorboardX import SummaryWriter
 17 | from torch.optim.lr_scheduler import MultiStepLR
 18 | from tqdm import tqdm
 19 | 
 20 | from engine.datasets import create_tough_dataset
 21 | from engine.models import create_model
 22 | 
 23 | logging.basicConfig(level=logging.INFO)
 24 | logger = logging.getLogger(__name__)
 25 | 
 26 | 
 27 | def get_cli_args():
 28 |     parser = argparse.ArgumentParser()
 29 | 
 30 |     # Optimization arguments
 31 |     parser.add_argument('--wd', default=5.0e-4, type=float, help='Weight decay')
 32 |     parser.add_argument('--lr', default=1e-3, type=float, help='Initial learning rate')
 33 |     parser.add_argument('--lr_decay', default=0.2, type=float, help='Multiplicative factor used on learning rate at `lr_steps`')
 34 |     parser.add_argument('--lr_steps', default='[100,150]', help='List of epochs where the learning rate is decreased by `lr_decay`')
 35 |     parser.add_argument('--momentum', default=0.9, type=float, help='Momentum for sgd')
 36 |     parser.add_argument('--epochs', default=151, type=int, help='Number of epochs to train. If <=0, only testing will be done.')
 37 |     parser.add_argument('--batch_size', default=16, type=int, help='Batch size')
 38 |     parser.add_argument('--batch_parts', default=1, type=int, help='Batch can be evaluated sequentially in multiple shards, >=1, very useful in low memory settings, though computation is not strictly equivalent due to batch normalization runnning statistics.')
 39 |     parser.add_argument('--optim', default='adam', help='Optimizer: sgd|adam')
 40 |     parser.add_argument('--max_train_samples', default=2500, type=int, help='Max train samples per epoch (good for large datasets)')
 41 |     parser.add_argument('--max_test_samples', default=100, type=int, help='Max test samples per epoch (good for large datasets)')
 42 | 
 43 |     # Experiment arguments
 44 |     parser.add_argument('--device', default='cuda:0', type=str)
 45 |     parser.add_argument('--nworkers', default=3, type=int, help='Num subprocesses to use for data loading. 0 means that the data will be loaded in the main process')
 46 |     parser.add_argument('--test_nth_epoch', default=1, type=int, help='Test each n-th epoch during training')
 47 |     parser.add_argument('--resume', default='', help='Loads a previously saved model.')
 48 | 
 49 |     # Dataset
 50 |     parser.add_argument('--output_dir', default='results', help='Directory to store results')
 51 |     parser.add_argument('--cvfold', default=0, type=int, help='Fold left-out for testing in leave-one-out setting')
 52 |     parser.add_argument('--num_folds', default=5, type=int, help='Num folds')
 53 |     parser.add_argument('--augm_rot', default=1, type=int, help='Training augmentation: Bool, random rotation')
 54 |     parser.add_argument('--augm_mirror_prob', default=0, type=float, help='Training augmentation: Probability of mirroring about axes')
 55 |     parser.add_argument('--augm_sampling_dist', default=2.0, type=float, help='Training augmentation: Max distance from fpocket centers')
 56 |     parser.add_argument('--augm_decoy_prob', default=0.1, type=float, help='Training augmentation: Probability of negative decoy')
 57 |     parser.add_argument('--patch_size', default=24, type=int, help='Patch size for training')
 58 |     parser.add_argument('--input_normalization', default=1, type=int, help='Bool: whether to normalize input statistics')
 59 |     parser.add_argument('--db_exclude_vertex', default='', type=str, help='Whether to exclude Vertex dataset proteins in the training fold: (|seqclust|uniprot|pdb)')
 60 |     parser.add_argument('--db_exclude_prospeccts', default='', type=str, help='Whether to exclude Prospeccts dataset proteins in the training fold: (|seqclust|uniprot|pdb)')
 61 |     parser.add_argument('--db_split_strategy', default='seqclust', help="pdb_folds|uniprot_folds|seqclust|none")
 62 |     parser.add_argument('--db_preprocessing', default=0, type=int, help='Bool: whether to run preprocessing for the dataset')
 63 |     parser.add_argument('--db_size_limit', default=0, type=int, help='Artification restriction of database size, either on # pdbs (>0) or # pairs (<0)')
 64 | 
 65 |     # Model
 66 |     parser.add_argument('--model_config', default='se_16_16_16_16_7_3_2_batch_1,se_32_32_32_32_3_1_1_batch_1,se_48_48_48_48_3_1_2_batch_1,se_64_64_64_64_3_0_1_batch_1,se_256_0_0_0_3_0_2_batch_1,r,b,c_128_1', help='Defines the model as a sequence of layers.')
 67 |     parser.add_argument('--seed', default=1, type=int, help='Seed for random initialisation')
 68 |     parser.add_argument('--l2_normed_descriptors', default=1, type=int, help='L2-normalize descriptors/network outputs')
 69 |     parser.add_argument('--loss_margin', default=1.0, type=float, help='Margin in hinge losses')
 70 |     parser.add_argument('--stability_loss_weight', default=1.0, type=float, help='Weight of augmentation invariance loss')
 71 |     parser.add_argument('--stability_loss_squared', default=0, type=int, help='Augmentation invariance loss distances squared (1) or not (0)')
 72 | 
 73 |     args = parser.parse_args()
 74 |     args.start_epoch = 0
 75 |     args.lr_steps = ast.literal_eval(args.lr_steps)
 76 |     args.output_dir = args.output_dir.replace('TTTT', datetime.datetime.now().strftime('%Y%m%d%H%M%S'))
 77 |     assert args.batch_size % args.batch_parts == 0
 78 |     return args
 79 | 
 80 | 
 81 | def main():
 82 |     args = get_cli_args()
 83 |     print('Will save to ' + args.output_dir)
 84 |     if not os.path.exists(args.output_dir):
 85 |         os.makedirs(args.output_dir)
 86 |     with open(os.path.join(args.output_dir, 'cmdline.txt'), 'w') as f:
 87 |         f.write(" ".join(["'"+a+"'" if (len(a) == 0 or a[0] != '-') else a for a in sys.argv]))
 88 | 
 89 |     set_seed(args.seed)
 90 |     device = torch.device(args.device)
 91 |     writer = SummaryWriter(args.output_dir)
 92 | 
 93 |     train_dataset, test_dataset = create_tough_dataset(
 94 |         args, fold_nr=args.cvfold, n_folds=args.num_folds, seed=args.seed,
 95 |         exclude_Vertex_from_train=args.db_exclude_vertex, exclude_Prospeccts_from_train=args.db_exclude_prospeccts
 96 |     )
 97 |     logger.info('Train set size: %d, test set size: %d', len(train_dataset), len(test_dataset))
 98 | 
 99 |     # Create model and optimizer (or resume pre-existing)
100 |     if args.resume != '':
101 |         if args.resume == 'RESUME':
102 |             args.resume = args.output_dir + '/model.pth.tar'
103 |         model, optimizer, scheduler = resume(args, train_dataset, device)
104 |     else:
105 |         model = create_model(args, train_dataset, device)
106 |         if args.input_normalization:
107 |             model.set_input_scaler(estimate_scaler(args, train_dataset, nsamples=200))
108 |         optimizer = create_optimizer(args, model)
109 |         scheduler = MultiStepLR(optimizer, milestones=args.lr_steps, gamma=args.lr_decay)
110 | 
111 |     ############
112 |     def train():
113 |         model.train()
114 | 
115 |         loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size//args.batch_parts,
116 |                                              num_workers=args.nworkers, shuffle=True, drop_last=True,
117 |                                              worker_init_fn=set_worker_seed)
118 | 
119 |         if logging.getLogger().getEffectiveLevel() > logging.DEBUG:
120 |             loader = tqdm(loader, ncols=100)
121 | 
122 |         loss_buffer, loss_stabil_buffer, pos_dist_buffer, neg_dist_buffer = [], [], [], []
123 |         t0 = time.time()
124 | 
125 |         for bidx, batch in enumerate(loader):
126 |             if 0 < args.max_train_samples < bidx * args.batch_size//args.batch_parts:
127 |                 break
128 |             t_loader = 1000*(time.time()-t0)
129 | 
130 |             inputs = batch['inputs'].to(device)  # dimensions: batch_size x (4 or 2) x 24 x 24 x 24
131 |             targets = batch['targets'].to(device)
132 | 
133 |             if bidx % args.batch_parts == 0:
134 |                 optimizer.zero_grad()
135 |             t0 = time.time()
136 | 
137 |             outputs = model(inputs.view(-1, *inputs.shape[2:]))
138 |             outputs = outputs.view(*inputs.shape[:2], -1)
139 |             loss_joint, loss_match, loss_stabil, pos_dist, neg_dist = compute_loss(args, outputs, targets, True)
140 |             loss_joint.backward()
141 | 
142 |             if bidx % args.batch_parts == args.batch_parts-1:
143 |                 if args.batch_parts > 1:
144 |                     for p in model.parameters():
145 |                         p.grad.data.div_(args.batch_parts)
146 |                 optimizer.step()
147 | 
148 |             t_trainer = 1000*(time.time()-t0)
149 |             loss_buffer.append(loss_match.item())
150 |             loss_stabil_buffer.append(loss_stabil.item() if isinstance(loss_stabil, torch.Tensor) else loss_stabil)
151 |             pos_dist_buffer.extend(pos_dist.cpu().numpy().tolist())
152 |             neg_dist_buffer.extend(neg_dist.cpu().numpy().tolist())
153 |             logger.debug('Batch loss %f, Loader time %f ms, Trainer time %f ms.', loss_buffer[-1], t_loader, t_trainer)
154 |             t0 = time.time()
155 | 
156 |         ret = {'loss': np.mean(loss_buffer), 'loss_stabil': np.mean(loss_stabil_buffer),
157 |                'pos_dist': np.mean(pos_dist_buffer), 'neg_dist': np.mean(neg_dist_buffer)}
158 |         return ret
159 | 
160 |     ############
161 |     def test():
162 |         model.eval()
163 | 
164 |         loader = torch.utils.data.DataLoader(test_dataset, batch_size=args.batch_size//args.batch_parts,
165 |                                              num_workers=args.nworkers, worker_init_fn=set_worker_seed)
166 | 
167 |         if logging.getLogger().getEffectiveLevel() > logging.DEBUG:
168 |             loader = tqdm(loader, ncols=100)
169 | 
170 |         loss_buffer, loss_stabil_buffer, pos_dist_buffer, neg_dist_buffer = [], [], [], []
171 | 
172 |         with torch.no_grad():
173 |             for bidx, batch in enumerate(loader):
174 |                 if 0 < args.max_test_samples < bidx * args.batch_size//args.batch_parts:
175 |                     break
176 |                 inputs = batch['inputs'].to(device)
177 |                 targets = batch['targets'].to(device)
178 | 
179 |                 outputs = model(inputs.view(-1, *inputs.shape[2:]))
180 |                 outputs = outputs.view(*inputs.shape[:2], -1)
181 |                 loss_joint, loss_match, loss_stabil, pos_dist, neg_dist = compute_loss(args, outputs, targets, False)
182 | 
183 |                 loss_buffer.append(loss_match.item())
184 |                 loss_stabil_buffer.append(loss_stabil.item() if isinstance(loss_stabil, torch.Tensor) else loss_stabil)
185 |                 pos_dist_buffer.extend(pos_dist.cpu().numpy().tolist())
186 |                 neg_dist_buffer.extend(neg_dist.cpu().numpy().tolist())
187 | 
188 |         return {'loss': np.mean(loss_buffer), 'loss_stabil': np.mean(loss_stabil_buffer),
189 |                 'pos_dist': np.mean(pos_dist_buffer), 'neg_dist': np.mean(neg_dist_buffer)}
190 | 
191 |     ############
192 |     # Training loop
193 |     for epoch in range(args.start_epoch, args.epochs):
194 |         print(f'Epoch {epoch}/{args.epochs} ({args.output_dir}):')
195 |         scheduler.step()
196 | 
197 |         train_stats = train()
198 |         for k, v in train_stats.items():
199 |             writer.add_scalar('train/' + k, v, epoch)
200 |         print(f"-> Train distances: p {train_stats['pos_dist']}, n {train_stats['neg_dist']}, \tLoss: {train_stats['loss']}")
201 | 
202 |         if (epoch+1) % args.test_nth_epoch == 0 or epoch+1 == args.epochs:
203 |             test_stats = test()
204 |             for k, v in test_stats.items():
205 |                 writer.add_scalar('test/' + k, v, epoch)
206 |             print(f"-> Test distances: p {test_stats['pos_dist']}, n {test_stats['neg_dist']}, \tLoss: {test_stats['loss']}")
207 | 
208 |         torch.save({'epoch': epoch + 1, 'args': args, 'state_dict': model.state_dict(),
209 |                     'optimizer': optimizer.state_dict(), 'scheduler': scheduler.state_dict()},
210 |                    os.path.join(args.output_dir, 'model.pth.tar'))
211 | 
212 |         if math.isnan(train_stats['loss']):
213 |             break
214 | 
215 | 
216 | def compute_loss(args, outputs, targets, training):
217 |     """
218 |     Computes both stability and contrastive loss
219 |     """
220 |     outputs = torch.squeeze(outputs)
221 |     targets = torch.squeeze(targets)
222 |     assert outputs.dim() == 3 and targets.dim() == 1
223 | 
224 |     if args.l2_normed_descriptors:
225 |         outputs = nnf.normalize(outputs, p=2, dim=2)
226 | 
227 |     # Stability loss
228 |     if training and args.stability_loss_weight > 0:
229 |         # every odd entry in the batch is a perturbed version of the previous even entry
230 |         a = outputs[:, 0::2].view(-1, outputs.shape[-1])
231 |         b = outputs[:, 1::2].view(-1, outputs.shape[-1])
232 |         if args.stability_loss_squared:
233 |             loss_stabil = nnf.pairwise_distance(a, b).pow(2).mean()
234 |         else:
235 |             loss_stabil = nnf.pairwise_distance(a, b).mean()
236 |         # continue with just the even ones
237 |         outputs = outputs[:, 0::2]
238 |     else:
239 |         loss_stabil = 0
240 | 
241 |     # Contrastive loss
242 |     assert outputs.shape[1] == 2
243 |     dists = nnf.pairwise_distance(outputs[:, 0], outputs[:, 1]).view(-1)
244 | 
245 |     pos_loss = dists.pow(2)
246 |     neg_loss = torch.clamp(args.loss_margin - dists, min=0).pow(2)
247 |     loss_match = torch.sum(pos_loss * targets + neg_loss * (1 - targets)) / targets.numel()
248 | 
249 |     loss_joint = loss_match + args.stability_loss_weight * loss_stabil
250 |     return loss_joint, loss_match, loss_stabil, dists[targets > 0.5].detach(), dists[targets < 0.5].detach()
251 | 
252 | 
253 | def resume(args, dataset, device):
254 |     """
255 |     Loads model and optimizer state from a previous checkpoint
256 |     """
257 |     print(f"=> loading checkpoint '{args.resume}'")
258 |     checkpoint = torch.load(args.resume, map_location=str(device))
259 | 
260 |     model = create_model(checkpoint['args'], dataset, device)
261 |     model.load_state_dict(checkpoint['state_dict'])
262 | 
263 |     optimizer = create_optimizer(args, model)
264 |     optimizer.load_state_dict(checkpoint['optimizer'])
265 |     args.start_epoch = checkpoint['epoch']
266 | 
267 |     scheduler = MultiStepLR(optimizer, milestones=args.lr_steps, gamma=args.lr_decay)
268 |     scheduler.load_state_dict(checkpoint['scheduler'])
269 | 
270 |     return model, optimizer, scheduler
271 | 
272 | 
273 | def create_optimizer(args, model):
274 |     if args.optim == 'sgd':
275 |         return optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.wd)
276 |     elif args.optim == 'adam':
277 |         return optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.wd)
278 | 
279 | 
280 | def set_seed(seed):
281 |     random.seed(seed)
282 |     np.random.seed(seed)
283 |     torch.manual_seed(seed)
284 | 
285 | 
286 | def set_worker_seed(worker_id):
287 |     np.random.seed(torch.initial_seed() % (2**32 - 1))
288 | 
289 | 
290 | def estimate_scaler(args, train_dataset, nsamples):
291 |     logger.info('Estimating dataset normalization')
292 |     scaler = StandardScaler(copy=True, with_mean=True, with_std=True)
293 |     bidx = 0
294 |     with tqdm(total=nsamples) as pbar:
295 |         while True:
296 |             loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, num_workers=args.nworkers,
297 |                                                  shuffle=True, drop_last=True, worker_init_fn=set_worker_seed)
298 |             for batch in loader:
299 |                 inputs = batch['inputs'].view(-1, *batch['inputs'].shape[2:])
300 |                 assert inputs.dim() == 5
301 |                 voxels = inputs.transpose(1, 4).contiguous().view(-1, inputs.shape[1]).numpy()
302 |                 scaler.partial_fit(voxels)
303 | 
304 |                 bidx += inputs.shape[0]
305 |                 pbar.update(inputs.shape[0])
306 |                 if bidx >= nsamples:
307 |                     return scaler
308 | 
309 | 
310 | if __name__ == "__main__":
311 |     main()
312 | 


--------------------------------------------------------------------------------
/deeplytough/scripts/vertex_benchmark.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import logging
 3 | import os
 4 | import pickle
 5 | 
 6 | from datasets import Vertex
 7 | from matchers import DeeplyTough
 8 | 
 9 | logging.basicConfig(level=logging.INFO)
10 | logger = logging.getLogger(__name__)
11 | 
12 | 
13 | def get_cli_args():
14 |     parser = argparse.ArgumentParser()
15 |     parser.add_argument('--output_dir', type=str, help='Output directory for result pickle')
16 |     parser.add_argument('--alg', type=str, default='DeeplyTough', help='Algorithm type')
17 |     parser.add_argument('--net', type=str, default='', help='DeeplyTough network filepath')
18 |     parser.add_argument('--device', type=str, default='cpu', help='cpu or cuda:0')
19 |     parser.add_argument('--nworkers', default=1, type=int, help='Num subprocesses to use for data loading. 0 means that the data will be loaded in the main process')
20 |     parser.add_argument('--batch_size', default=30, type=int)
21 |     parser.add_argument('--db_preprocessing', default=0, type=int, help='Bool: whether to run preprocessing for the dataset')
22 | 
23 |     return parser.parse_args()
24 | 
25 | 
26 | def main():
27 |     args = get_cli_args()
28 | 
29 |     database = Vertex()
30 | 
31 |     if args.db_preprocessing:
32 |         database.preprocess_once()
33 | 
34 |     # Retrieve structures
35 |     entries = database.get_structures()
36 | 
37 |     # Get matcher and perform any necessary pre-computations
38 |     if args.alg == 'DeeplyTough':
39 |         matcher = DeeplyTough(args.net, device=args.device, batch_size=args.batch_size, nworkers=args.nworkers)
40 |         entries = matcher.precompute_descriptors(entries)
41 |     else:
42 |         raise NotImplementedError
43 | 
44 |     # Evaluate pocket pairs
45 |     results = database.evaluate_matching(entries, matcher)
46 |     results['benchmark_args'] = args
47 |     results['entries'] = entries  # includes descriptors
48 | 
49 |     # Format output file names
50 |     fname = f"Vertex-{args.alg}-{os.path.basename(os.path.dirname(args.net))}.pickle"
51 | 
52 |     # Make sure output directory exists
53 |     os.makedirs(args.output_dir, exist_ok=True)
54 | 
55 |     # Write pickle
56 |     pickle.dump(results, open(os.path.join(args.output_dir, fname), 'wb'))
57 | 
58 |     # Write csv results
59 |     with open(os.path.join(args.output_dir, fname.replace('.pickle', '.csv')), 'w') as f:
60 |         for p, s in zip(results['pairs'], results['scores']):
61 |             f.write(f'{p[0]},{p[1]},{s}\n')
62 | 
63 |     # Done!
64 |     print(f"Testing finished, AUC = {results['auc']}")
65 | 
66 | 
67 | if __name__ == '__main__':
68 |     main()
69 | 


--------------------------------------------------------------------------------
/networks/deeplytough_prospeccts.pth.tar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BenevolentAI/DeeplyTough/7536c5bb7d4e1e7e5d4f6cfacd1a437db03596e9/networks/deeplytough_prospeccts.pth.tar


--------------------------------------------------------------------------------
/networks/deeplytough_toughm1_test.pth.tar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BenevolentAI/DeeplyTough/7536c5bb7d4e1e7e5d4f6cfacd1a437db03596e9/networks/deeplytough_toughm1_test.pth.tar


--------------------------------------------------------------------------------
/networks/deeplytough_vertex.pth.tar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BenevolentAI/DeeplyTough/7536c5bb7d4e1e7e5d4f6cfacd1a437db03596e9/networks/deeplytough_vertex.pth.tar


--------------------------------------------------------------------------------
/overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BenevolentAI/DeeplyTough/7536c5bb7d4e1e7e5d4f6cfacd1a437db03596e9/overview.png


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | numpy==1.19.2
 2 | tqdm==4.36.1
 3 | tensorboardx==1.9
 4 | scikit-learn==0.20.1
 5 | torch==1.0
 6 | scipy==1.1.0
 7 | pandas==1.1.4
 8 | transforms3d==0.3.1
 9 | matplotlib==3.1.1
10 | requests==2.22.0
11 | cython==0.29.13
12 | numba==0.43.1
13 | llvmlite==0.28
14 | biopython==1.72
15 | mdtraj==1.9.3
16 | natsort==8.0.0
17 | periodictable==1.6.0
18 | 


--------------------------------------------------------------------------------
/results/Prospeccts-DeeplyTough-deeplytough_prospeccts-P6.2.csv:
--------------------------------------------------------------------------------
 1 | 1eyn,1ow4,-0.1210966557264328
 2 | 2ans,1eyn,-0.08025316148996353
 3 | 2ans,1ow4,-0.07656160742044449
 4 | 3fty,1w7h,-0.40553945302963257
 5 | 1vyg,1diy,-0.22101353108882904
 6 | 3bra,4n7c,-0.7446341514587402
 7 | 3cf9,4hkk,-0.3935973346233368
 8 | 2qre,1m9n,-0.15306711196899414
 9 | 3hig,2gby,-0.5969905257225037
10 | 2rh1,2q6h,-0.4271696209907532
11 | 2wyd,2ovd,-0.21460622549057007
12 | 4nyq,4ia6,-0.31131836771965027
13 | 2g0l,4hki,-0.6067702770233154
14 | 2ph9,1dx6,-0.41628697514533997
15 | 1mv9,1fdq,-0.6037502884864807
16 | 2wd9,3p6h,-0.10641007870435715
17 | 1e6q,1nok,-0.6131260991096497
18 | 2b50,4bvm,-0.46719327569007874
19 | 2x8z,4dpr,-0.2861819863319397
20 | 4hcy,4mhw,-0.06726853549480438
21 | 2vyv,3ew5,-0.08758662641048431
22 | 4kcx,4kd1,-0.768419623374939
23 | 1xql,1pb9,-0.06042586266994476
24 | 2yyj,2yaj,-0.5475000143051147
25 | 1ve7,2i6p,-0.7137326002120972
26 | 2hkk,3pah,-0.7184109687805176
27 | 1b66,1sep,-0.532262921333313
28 | 2nsl,2gqs,-0.062365081161260605
29 | 3ln1,1oq5,-0.5009394288063049
30 | 3dds,2a3b,-0.339447557926178
31 | 3ebp,3blr,-0.3449978232383728
32 | 1pa9,1e2s,-0.1828428953886032
33 | 1mxh,1lcb,-0.3812151849269867
34 | 1mxh,1rf7,-0.12110111862421036
35 | 1rf7,1lcb,-0.4814496636390686
36 | 3ed0,3bqc,-0.15430866181850433
37 | 4cus,3kpu,-0.6722819209098816
38 | 1z9y,2xn5,-0.8759850263595581
39 | 2v6t,2fbz,-0.21715952455997467
40 | 2oyf,4hm0,-0.3283771872520447
41 | 4lzr,4o2b,-0.8234207630157471
42 | 3hlg,1t02,-0.4574362337589264
43 | 3hbg,3etr,-0.059538986533880234
44 | 3hbf,2o63,-0.13629847764968872
45 | 1eyq,2uxu,-0.24603182077407837
46 | 3wl8,2wg9,-0.518446683883667
47 | 3eau,2q1v,-0.9179725646972656
48 | 1tx0,1br6,-0.10462214052677155
49 | 3b00,3p73,-0.6607016921043396
50 | 2o73,2fxl,-0.0228722020983696
51 | 2otf,2nuv,-0.9171537160873413
52 | 3tvl,4odj,-0.06630491465330124
53 | 3gcl,1oxr,-0.2180204689502716
54 | 2uy4,3hs4,-0.3475250005722046
55 | 2qvd,3d6y,-0.4596768319606781
56 | 1qhy,4cla,-0.7986555099487305
57 | 3t4k,4jhi,-0.6516141295433044
58 | 1s2c,2pix,-0.8661952018737793
59 | 2p1q,2oyf,-0.3495883643627167
60 | 2p1q,4hm0,-0.2747463285923004
61 | 1s4m,1he5,-0.028176935389637947
62 | 2fu7,2lig,-0.8264271020889282
63 | 


--------------------------------------------------------------------------------
/results/Prospeccts-DeeplyTough-deeplytough_prospeccts-P6.csv:
--------------------------------------------------------------------------------
 1 | 1eyn,1ow4,-0.1210966557264328
 2 | 2ans,1eyn,-0.08025316148996353
 3 | 2ans,1ow4,-0.07656160742044449
 4 | 3fty,1w7h,-0.40553945302963257
 5 | 1vyg,1diy,-0.22101353108882904
 6 | 3bra,4n7c,-0.7446341514587402
 7 | 3cf9,4hkk,-0.3935973346233368
 8 | 2qre,1m9n,-0.15306711196899414
 9 | 3hig,2gby,-0.5351876020431519
10 | 2rh1,2q6h,-0.4271696209907532
11 | 2wyd,2ovd,-0.21460622549057007
12 | 4nyq,4ia6,-0.31131836771965027
13 | 2g0l,4hki,-0.6067702770233154
14 | 2ph9,1dx6,-0.41628697514533997
15 | 1mv9,1fdq,-0.6037502884864807
16 | 2wd9,3p6h,-0.10641007870435715
17 | 1e6q,1nok,-0.6131260991096497
18 | 2b50,4bvm,-0.46719327569007874
19 | 2x8z,4dpr,-0.2861819863319397
20 | 4hcy,4mhw,-0.06726853549480438
21 | 2vyv,3ew5,-0.055992886424064636
22 | 4kcx,4kd1,-0.768419623374939
23 | 1xql,1pb9,-0.06532666087150574
24 | 2yyj,2yaj,-0.5475000143051147
25 | 1ve7,2i6p,-0.7137326002120972
26 | 2hkk,3pah,-0.7184109687805176
27 | 1b66,1sep,-0.532262921333313
28 | 2nsl,2gqs,-0.062365081161260605
29 | 3ln1,1oq5,-0.5009394288063049
30 | 3dds,2a3b,-0.339447557926178
31 | 3ebp,3blr,-0.3087920844554901
32 | 1pa9,1e2s,-0.1828428953886032
33 | 1mxh,1lcb,-0.3812151849269867
34 | 1mxh,1rf7,-0.12110111862421036
35 | 1rf7,1lcb,-0.4814496636390686
36 | 3ed0,3bqc,-0.15430866181850433
37 | 4cus,3kpu,-0.6722819209098816
38 | 1z9y,2xn5,-0.8759850263595581
39 | 2v6t,2fbz,-0.21715952455997467
40 | 2oyf,4hm0,-0.3283771872520447
41 | 4lzr,4o2b,-0.8234207630157471
42 | 3hlg,1t02,-0.4574362337589264
43 | 3hbg,3etr,-0.059538986533880234
44 | 3hbf,2o63,-0.13629847764968872
45 | 1eyq,2uxu,-0.24603182077407837
46 | 3wl8,2wg9,-0.518446683883667
47 | 3eau,2q1v,-0.9179725646972656
48 | 1tx0,1br6,-0.10462214052677155
49 | 3b00,3p73,-0.6607016921043396
50 | 2o73,2fxl,-0.022316182032227516
51 | 2otf,2nuv,-0.9171537160873413
52 | 3tvl,4odj,-0.06630491465330124
53 | 3gcl,1oxr,-0.21801893413066864
54 | 2uy4,3hs4,-0.3475250005722046
55 | 2qvd,3d6y,-0.4596768319606781
56 | 1qhy,4cla,-0.7986555099487305
57 | 3t4k,4jhi,-0.6516141295433044
58 | 1s2c,2pix,-0.8661952018737793
59 | 2p1q,2oyf,-0.3495883643627167
60 | 2p1q,4hm0,-0.2747463285923004
61 | 1s4m,1he5,-0.028176935389637947
62 | 2fu7,2lig,-0.8264271020889282
63 | 


--------------------------------------------------------------------------------