├── utils
├── __pycache__
│ ├── util.cpython-37.pyc
│ ├── util.cpython-38.pyc
│ ├── util.cpython-39.pyc
│ ├── util.cpython-310.pyc
│ ├── parsers.cpython-310.pyc
│ ├── parsers.cpython-37.pyc
│ ├── parsers.cpython-38.pyc
│ ├── parsers.cpython-39.pyc
│ ├── align_pdbs.cpython-310.pyc
│ ├── align_pdbs.cpython-37.pyc
│ ├── align_pdbs.cpython-38.pyc
│ ├── align_pdbs.cpython-39.pyc
│ ├── kinematics.cpython-310.pyc
│ ├── kinematics.cpython-37.pyc
│ ├── kinematics.cpython-38.pyc
│ ├── kinematics.cpython-39.pyc
│ ├── kabsch_align.cpython-37.pyc
│ ├── dunbrack_rotlib.cpython-310.pyc
│ ├── dunbrack_rotlib.cpython-37.pyc
│ ├── dunbrack_rotlib.cpython-38.pyc
│ └── dunbrack_rotlib.cpython-39.pyc
├── kabsch_align.py
├── dunbrack_rotlib.py
├── align_pdbs.py
└── util.py
├── examples
├── P450
│ ├── command
│ └── inputs
│ │ ├── HBA_CYS_P450_nosample.cst
│ │ ├── P450_motif.pdb
│ │ └── HBA_unique.params
└── Kemp_eliminase
│ ├── command
│ └── inputs
│ ├── BIO.params
│ └── BIO_His_ED_oxy_nosample.cst
├── utils.py
├── README.md
├── invrotzyme.py
└── protocol.py
/utils/__pycache__/util.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ikalvet/invrotzyme/HEAD/utils/__pycache__/util.cpython-37.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/util.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ikalvet/invrotzyme/HEAD/utils/__pycache__/util.cpython-38.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/util.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ikalvet/invrotzyme/HEAD/utils/__pycache__/util.cpython-39.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/util.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ikalvet/invrotzyme/HEAD/utils/__pycache__/util.cpython-310.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/parsers.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ikalvet/invrotzyme/HEAD/utils/__pycache__/parsers.cpython-310.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/parsers.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ikalvet/invrotzyme/HEAD/utils/__pycache__/parsers.cpython-37.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/parsers.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ikalvet/invrotzyme/HEAD/utils/__pycache__/parsers.cpython-38.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/parsers.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ikalvet/invrotzyme/HEAD/utils/__pycache__/parsers.cpython-39.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/align_pdbs.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ikalvet/invrotzyme/HEAD/utils/__pycache__/align_pdbs.cpython-310.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/align_pdbs.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ikalvet/invrotzyme/HEAD/utils/__pycache__/align_pdbs.cpython-37.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/align_pdbs.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ikalvet/invrotzyme/HEAD/utils/__pycache__/align_pdbs.cpython-38.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/align_pdbs.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ikalvet/invrotzyme/HEAD/utils/__pycache__/align_pdbs.cpython-39.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/kinematics.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ikalvet/invrotzyme/HEAD/utils/__pycache__/kinematics.cpython-310.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/kinematics.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ikalvet/invrotzyme/HEAD/utils/__pycache__/kinematics.cpython-37.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/kinematics.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ikalvet/invrotzyme/HEAD/utils/__pycache__/kinematics.cpython-38.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/kinematics.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ikalvet/invrotzyme/HEAD/utils/__pycache__/kinematics.cpython-39.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/kabsch_align.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ikalvet/invrotzyme/HEAD/utils/__pycache__/kabsch_align.cpython-37.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/dunbrack_rotlib.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ikalvet/invrotzyme/HEAD/utils/__pycache__/dunbrack_rotlib.cpython-310.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/dunbrack_rotlib.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ikalvet/invrotzyme/HEAD/utils/__pycache__/dunbrack_rotlib.cpython-37.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/dunbrack_rotlib.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ikalvet/invrotzyme/HEAD/utils/__pycache__/dunbrack_rotlib.cpython-38.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/dunbrack_rotlib.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ikalvet/invrotzyme/HEAD/utils/__pycache__/dunbrack_rotlib.cpython-39.pyc
--------------------------------------------------------------------------------
/examples/P450/command:
--------------------------------------------------------------------------------
1 | python ../../invrotzyme.py --cstfile inputs/HBA_CYS_P450_nosample.cst --params inputs/HBA_unique.params --motif_for_cst 1:3:inputs/P450_motif.pdb --frac_random_rotamers 0.1 --prefix outputs/
2 |
--------------------------------------------------------------------------------
/examples/Kemp_eliminase/command:
--------------------------------------------------------------------------------
1 | python ../../invrotzyme.py --cstfile inputs/BIO_His_ED_oxy_nosample.cst --params inputs/BIO.params --dunbrack_prob 0.6 --frac_random_rotamers_per_cst 0.5 0.5 0.5 0.5 --secstruct_per_cst H H E --prefix outputs/ --suffix HHE
2 |
--------------------------------------------------------------------------------
/examples/P450/inputs/HBA_CYS_P450_nosample.cst:
--------------------------------------------------------------------------------
1 | # cst constraint descriptor for ferryl intermediate C-H abstraction TS from methoxybiphenyl
2 | # CYS coordinating to the Heme Fe based on P450 geometry
3 | # I. Kalvet, Baker lab, UW, ikalvet@uw.edu
4 |
5 |
6 | #block 1 for CYS coordinated to Fe
7 |
8 | CST::BEGIN
9 |
10 | TEMPLATE:: ATOM_MAP: 1 atom_name: FE1 N4 C19
11 | TEMPLATE:: ATOM_MAP: 1 residue3: HBA
12 |
13 | TEMPLATE:: ATOM_MAP: 2 atom_type: SH1
14 | TEMPLATE:: ATOM_MAP: 2 residue3: CYS
15 |
16 | CONSTRAINT:: distanceAB: 2.5 0.15 100. 1 0
17 | CONSTRAINT:: angle_A: 85.9 5.0 100.0 360. 0
18 | CONSTRAINT:: angle_B: 111.0 5.0 75.0 360. 0
19 | CONSTRAINT:: torsion_A: 84.5 5.0 75.0 360. 0
20 | CONSTRAINT:: torsion_AB: 108.0 15.0 0.0 360. 0
21 | CONSTRAINT:: torsion_B: 82.4 20.0 25.0 360. 0
22 |
23 | ALGORITHM_INFO:: match
24 | MAX_DUNBRACK_ENERGY 5.0
25 | IGNORE_UPSTREAM_PROTON_CHI
26 | ALGORITHM_INFO::END
27 |
28 | CST::END
29 |
30 |
31 |
--------------------------------------------------------------------------------
/examples/Kemp_eliminase/inputs/BIO.params:
--------------------------------------------------------------------------------
1 | NAME BIO
2 | IO_STRING BIO Z
3 | TYPE LIGAND
4 | AA UNK
5 | ATOM C1 aroC X -0.01
6 | ATOM C6 aroC X -0.03
7 | ATOM C5 aroC X 0.13
8 | ATOM N2 Npro X 0.06
9 | ATOM O3 ONH2 X -0.13
10 | ATOM O2 ONH2 X -0.13
11 | ATOM C4 aroC X -0.02
12 | ATOM C3 aroC X 0.04
13 | ATOM C7 aroC X 0.08
14 | ATOM N1 Nhis X -0.09
15 | ATOM O1 ONH2 X -1.05
16 | ATOM C2 aroC X 0.17
17 | ATOM H4 Haro X 0.09
18 | ATOM H2 Haro X 0.06
19 | ATOM H3 Haro X 0.06
20 | ATOM H1 Haro X 0.07
21 | BOND_TYPE O3 N2 2
22 | BOND_TYPE N2 O2 2
23 | BOND_TYPE N2 C5 1
24 | BOND_TYPE H2 C4 1
25 | BOND_TYPE C4 C5 4
26 | BOND_TYPE C4 C3 4
27 | BOND_TYPE C5 C6 4
28 | BOND_TYPE H4 C7 1
29 | BOND_TYPE C3 C7 4
30 | BOND_TYPE C3 C2 4
31 | BOND_TYPE C6 H3 1
32 | BOND_TYPE C6 C1 4
33 | BOND_TYPE C7 N1 4
34 | BOND_TYPE C2 C1 4
35 | BOND_TYPE C2 O1 4
36 | BOND_TYPE C1 H1 1
37 | BOND_TYPE N1 O1 4
38 | CHI 1 C6 C5 N2 O3
39 | NBR_ATOM C4
40 | NBR_RADIUS 4.083104
41 | ICOOR_INTERNAL C1 0.000000 0.000000 0.000000 C1 C6 C5
42 | ICOOR_INTERNAL C6 0.000000 180.000000 1.382716 C1 C6 C5
43 | ICOOR_INTERNAL C5 0.000000 59.182789 1.409222 C6 C1 C5
44 | ICOOR_INTERNAL N2 -179.998004 61.281665 1.447079 C5 C6 C1
45 | ICOOR_INTERNAL O3 -179.984056 61.433633 1.236799 N2 C5 C6
46 | ICOOR_INTERNAL O2 179.986424 61.557617 1.239696 N2 C5 O3
47 | ICOOR_INTERNAL C4 179.889661 57.795737 1.396329 C5 C6 N2
48 | ICOOR_INTERNAL C3 0.095219 63.144774 1.385375 C4 C5 C6
49 | ICOOR_INTERNAL C7 -179.747781 48.470187 1.449305 C3 C4 C5
50 | ICOOR_INTERNAL N1 179.814748 63.412331 1.251719 C7 C3 C4
51 | ICOOR_INTERNAL O1 0.121877 78.864331 1.794487 N1 C7 C3
52 | ICOOR_INTERNAL C2 -0.218402 78.350293 1.303685 O1 N1 C7
53 | ICOOR_INTERNAL H4 179.979921 55.243279 1.277743 C7 C3 N1
54 | ICOOR_INTERNAL H2 -179.886000 56.975238 1.084893 C4 C5 C3
55 | ICOOR_INTERNAL H3 -179.972679 59.482293 1.082576 C6 C1 C5
56 | ICOOR_INTERNAL H1 179.798701 58.039008 1.083917 C1 C6 C5
57 |
--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | """
4 | Created on Sun Aug 25 23:12:52 2024
5 |
6 | @author: indrek
7 | """
8 | import numpy as np
9 |
10 |
11 | # number of chis, excluding proton-chis
12 | N_chis = {'ALA': 0, 'ARG': 4, 'TRP': 2, 'GLY': 0, 'ASP': 2, 'HIS': 2, 'GLU': 3,
13 | 'GLN': 3, 'ASN': 2, 'LEU': 2, 'ILE': 2, 'THR': 1, 'VAL': 1, 'SER': 1,
14 | 'MET': 3, 'CYS': 1, 'PRO': 3, 'LYS': 4, 'PHE': 2, 'TYR': 2, "CYX": 1}
15 |
16 |
17 | # PHI and PSI values for ideal backbone, and tolerances for randomization
18 | idealized_SS_phi_psi = {"H": {"phi": (-57.0, 10.0), "psi": (-47.0, 10.0)},
19 | "E": {"phi": (-140.0, 20.0), "psi": (130.0, 20.0)},
20 | "-": {"phi": (-140.0, 20.0), "psi": (130.0, 20.0)}}
21 |
22 |
23 | def get_dist(a, b):
24 | return np.linalg.norm(a-b)
25 |
26 |
27 | def get_angle(a1, a2, a3):
28 | a1 = np.array(a1)
29 | a2 = np.array(a2)
30 | a3 = np.array(a3)
31 |
32 | ba = a1 - a2
33 | bc = a3 - a2
34 |
35 | cosine_angle = np.dot(ba, bc) / (np.linalg.norm(ba) * np.linalg.norm(bc))
36 | angle = np.arccos(cosine_angle)
37 |
38 | return round(np.degrees(angle), 1)
39 |
40 |
41 |
42 | def get_dihedral(a1, a2, a3, a4):
43 | """
44 | a1, a2, a3, a4 (np.array)
45 | Each array has to contain 3 floats corresponding to X, Y and Z of an atom.
46 | Solution by 'Praxeolitic' from Stackoverflow:
47 | https://stackoverflow.com/questions/20305272/dihedral-torsion-angle-from-four-points-in-cartesian-coordinates-in-python#
48 | 1 sqrt, 1 cross product
49 | Calculates the dihedral/torsion between atoms a1, a2, a3 and a4
50 | Output is in degrees
51 | """
52 |
53 | b0 = a1 - a2
54 | b1 = a3 - a2
55 | b2 = a4 - a3
56 |
57 | # normalize b1 so that it does not influence magnitude of vector
58 | # rejections that come next
59 | b1 /= np.linalg.norm(b1)
60 |
61 | # vector rejections
62 | # v = projection of b0 onto plane perpendicular to b1
63 | # = b0 minus component that aligns with b1
64 | # w = projection of b2 onto plane perpendicular to b1
65 | # = b2 minus component that aligns with b1
66 | v = b0 - np.dot(b0, b1)*b1
67 | w = b2 - np.dot(b2, b1)*b1
68 |
69 | # angle between v and w in a plane is the torsion angle
70 | # v and w may not be normalized but that's fine since tan is y/x
71 | x = np.dot(v, w)
72 | y = np.dot(np.cross(b1, v), w)
73 | return np.degrees(np.arctan2(y, x))
74 |
75 |
76 | def rmsd(geom, target):
77 | return np.sqrt(((geom - target) ** 2).mean())
78 |
79 |
80 |
--------------------------------------------------------------------------------
/examples/Kemp_eliminase/inputs/BIO_His_ED_oxy_nosample.cst:
--------------------------------------------------------------------------------
1 | # Rosetta matcher/enzdes CST description for Kemp Eliminase
2 | # Active consisting of a HIS-GLU/ASP dyad and SER/THR/TYR/GLN/ASN oxyanion hole
3 | # CYS coordinating to the Heme Fe based on UPO geometry
4 | # I. Kalvet, Baker lab, UW, ikalvet@uw.edu
5 |
6 |
7 | ################## CST_1 ( His base ) ###############
8 | CST::BEGIN
9 |
10 | TEMPLATE:: ATOM_MAP: 1 atom_name: C7 N1 O1
11 | TEMPLATE:: ATOM_MAP: 1 residue3: BIO
12 |
13 | TEMPLATE:: ATOM_MAP: 2 atom_type: Nhis
14 | TEMPLATE:: ATOM_MAP: 2 residue1: H
15 |
16 | CONSTRAINT:: distanceAB: 2.68 0.15 100. 1 0
17 | CONSTRAINT:: angle_A: 125.8 5.0 100.0 360. 0
18 | CONSTRAINT:: angle_B: 114.7 5.0 75.0 360. 0
19 | CONSTRAINT:: torsion_A: 180.0 5.0 75.0 360. 0
20 | CONSTRAINT:: torsion_AB: 58.5 45.0 0.0 90. 0
21 | CONSTRAINT:: torsion_B: 180.0 5.0 25.0 360. 0
22 |
23 | CST::END
24 |
25 | ################## CST_2 ( GLU/ASP activating His ) ###############
26 | CST::BEGIN
27 |
28 | TEMPLATE:: ATOM_MAP: 1 atom_type: Ntrp
29 | TEMPLATE:: ATOM_MAP: 1 residue3: HIS
30 |
31 | TEMPLATE:: ATOM_MAP: 2 atom_type: OOC
32 | TEMPLATE:: ATOM_MAP: 2 residue1: ED
33 |
34 |
35 | CONSTRAINT:: distanceAB: 2.62 0.2 100. 1 0
36 | CONSTRAINT:: angle_A: 126.0 15.0 50.0 360. 0
37 | CONSTRAINT:: angle_B: 106.5 25.0 50.0 180. 0
38 | CONSTRAINT:: torsion_A: 0.0 25.0 50.0 180. 0
39 | CONSTRAINT:: torsion_AB: 90.0 10.0 0.0 180. 0
40 | CONSTRAINT:: torsion_B: 180.0 60.0 25.0 360. 0
41 |
42 | ALGORITHM_INFO:: match
43 | SECONDARY_MATCH: UPSTREAM_CST 1
44 | ALGORITHM_INFO::END
45 |
46 | CST::END
47 |
48 |
49 |
50 | ################## CST_3 ( oxyanion hole ) ###############
51 | ############ either SER/THR or TYR or ASN/GLN ##########
52 | VARIABLE_CST::BEGIN
53 |
54 | CST::BEGIN
55 | TEMPLATE:: ATOM_MAP: 1 atom_name: O1 N1 C7
56 | TEMPLATE:: ATOM_MAP: 1 residue3: BIO
57 |
58 | TEMPLATE:: ATOM_MAP: 2 atom_type: OH
59 | TEMPLATE:: ATOM_MAP: 2 residue1: ST
60 |
61 | CONSTRAINT:: distanceAB: 2.81 0.2 80.0 0 0
62 | CONSTRAINT:: angle_A: 150.0 5.0 10.0 360 0
63 | CONSTRAINT:: angle_B: 100.0 5.0 10.0 360 0
64 | CONSTRAINT:: torsion_A: 180.0 10.0 10.0 360 0
65 | CONSTRAINT:: torsion_AB: 71.0 10.0 10.0 90 0
66 | CONSTRAINT:: torsion_B: 180.0 10.0 10.0 120 0
67 |
68 | ALGORITHM_INFO:: match
69 | SECONDARY_MATCH: DOWNSTREAM
70 | ALGORITHM_INFO::END
71 | CST::END
72 |
73 | CST::BEGIN
74 | TEMPLATE:: ATOM_MAP: 1 atom_name: O1 N1 C7
75 | TEMPLATE:: ATOM_MAP: 1 residue3: BIO
76 |
77 | TEMPLATE:: ATOM_MAP: 2 atom_name: OH CZ CE2
78 | TEMPLATE:: ATOM_MAP: 2 residue3: TYR
79 |
80 | CONSTRAINT:: distanceAB: 2.81 0.2 80.0 0 0
81 | CONSTRAINT:: angle_A: 150.0 5.0 10.0 360 0
82 | CONSTRAINT:: angle_B: 100.0 5.0 10.0 360 0
83 | CONSTRAINT:: torsion_A: 180.0 10.0 10.0 360 0
84 | CONSTRAINT:: torsion_AB: 71.0 10.0 10.0 90 0
85 | CONSTRAINT:: torsion_B: 90.0 10.0 10.0 180 0
86 |
87 | ALGORITHM_INFO:: match
88 | SECONDARY_MATCH: DOWNSTREAM
89 | ALGORITHM_INFO::END
90 | CST::END
91 |
92 | CST::BEGIN
93 | TEMPLATE:: ATOM_MAP: 1 atom_name: O1 N1 C7
94 | TEMPLATE:: ATOM_MAP: 1 residue3: BIO
95 |
96 | TEMPLATE:: ATOM_MAP: 2 atom_type: NH2O
97 | TEMPLATE:: ATOM_MAP: 2 residue1: NQ
98 |
99 | CONSTRAINT:: distanceAB: 2.81 0.2 80.0 0 0
100 | CONSTRAINT:: angle_A: 150.0 5.0 10.0 360 0
101 | CONSTRAINT:: angle_B: 100.0 5.0 10.0 360 0
102 | CONSTRAINT:: torsion_A: 180.0 10.0 10.0 360 0
103 | CONSTRAINT:: torsion_AB: 71.0 10.0 10.0 90 0
104 | CONSTRAINT:: torsion_B: 180.0 10.0 10.0 180 0
105 |
106 | ALGORITHM_INFO:: match
107 | SECONDARY_MATCH: DOWNSTREAM
108 | ALGORITHM_INFO::END
109 | CST::END
110 |
111 | VARIABLE_CST::END
112 |
113 |
--------------------------------------------------------------------------------
/utils/kabsch_align.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import copy
3 | #Gyu Rie Lee
4 | #Borrowed kabsch code and modified slightly for superimposition
5 |
6 | #Use kabsch algorithm to align van der Mers with mainchain atoms (or given subset of coord)
7 | #get transformation matrix from xyz1 and xyz2 (could be N-CA-C of residues)
8 | #then use this to align residue+functional group
9 | #xyz1/coord_for_align1 would be the reference
10 | #IMPORTANT: xyz1_in is being copied inside as xyz1 because xyz1_in will be used repeatedly outside of this code
11 |
12 |
13 | def np_kabsch(A,B):
14 | """
15 | Numpy version of kabsch algorithm. Superimposes B onto A
16 |
17 | Parameters:
18 | (A,B) np.array - shape (N,3) arrays of xyz crds of points
19 |
20 |
21 | Returns:
22 | rms - rmsd between A and B
23 | R - rotation matrix to superimpose B onto A
24 | rB - the rotated B coordinates
25 | """
26 | A = np.copy(A)
27 | B = np.copy(B)
28 |
29 | def centroid(X):
30 | # return the mean X,Y,Z down the atoms
31 | return np.mean(X, axis=0, keepdims=True)
32 |
33 | def rmsd(V,W, eps=1e-6):
34 | # First sum down atoms, then sum down xyz
35 | N = V.shape[-2]
36 | return np.sqrt(np.sum((V-W)*(V-W), axis=(-2,-1)) / N + eps)
37 |
38 |
39 | N, ndim = A.shape
40 |
41 | # move to centroid
42 | A = A - centroid(A)
43 | B = B - centroid(B)
44 |
45 | # computation of the covariance matrix
46 | C = np.matmul(A.T, B)
47 |
48 | # compute optimal rotation matrix using SVD
49 | U,S,Vt = np.linalg.svd(C)
50 |
51 |
52 | # ensure right handed coordinate system
53 | d = np.eye(3)
54 | d[-1,-1] = np.sign(np.linalg.det(Vt.T@U.T))
55 |
56 | # construct rotation matrix
57 | R = Vt.T@d@U.T
58 |
59 | # get rotated coords
60 | rB = B@R
61 |
62 | # calculate rmsd
63 | rms = rmsd(A,rB)
64 |
65 | return rms, rB, R
66 |
67 |
68 | def kabsch_align_coords(xyz1, xyz2_in, mobile_coord):
69 |
70 | # xyz1 = copy.deepcopy(xyz1_in)
71 | xyz2 = copy.deepcopy(xyz2_in)
72 | # check dimensions
73 | #print(len(xyz1), len(xyz2))
74 | assert len(xyz1) == len(xyz2)
75 | L = len(xyz1)
76 | assert L > 2
77 |
78 | # move two both sets of points to their
79 | # centers of masses (COM)
80 | COM1 = np.sum(xyz1, axis=0) / float(L)
81 | COM2 = np.sum(xyz2, axis=0) / float(L)
82 | xyz1 -= COM1
83 | xyz2 -= COM2
84 |
85 | # Initial residual, see Kabsch.
86 | E0 = np.sum( np.sum(xyz1*xyz1,axis=0),axis=0) + np.sum( np.sum(xyz2*xyz2,axis=0),axis=0 )
87 |
88 | # SVD of the covariance matrix
89 | V, S, Wt = np.linalg.svd( np.dot( np.transpose(xyz2), xyz1))
90 |
91 | # check parity of the transformation
92 | reflect = float(str(float(np.linalg.det(V) * np.linalg.det(Wt))))
93 | if reflect == -1.0:
94 | S[-1] = -S[-1]
95 | V[:,-1] = -V[:,-1]
96 |
97 | RMSD = E0 - (2.0 * sum(S))
98 | RMSD = np.sqrt(abs(RMSD / L))
99 |
100 | # U is simply V*Wt
101 | U = np.dot(V, Wt)
102 |
103 | # translation vector
104 | t = COM1 - COM2
105 |
106 | superimposed_coord = np.dot((mobile_coord-COM2), U)
107 | superimposed_coord += COM1
108 | # rot_coord_2 = np.dot((coord_for_align2 - COM2), U)
109 | # rot_coord_1 = coord_for_align1 - COM1
110 |
111 | # rot_coord_2 = np.dot((coord_for_align2 - COM2), U) + COM1
112 |
113 | # return coord_for_align1, rot_coord_2
114 | return superimposed_coord
115 | # return RMSD, t, U
116 |
117 | def kabsch_rmsd(xyz1_in,xyz2_in):
118 |
119 | xyz1 = copy.deepcopy(xyz1_in)
120 | xyz2 = copy.deepcopy(xyz2_in)
121 | # check dimensions
122 | assert len(xyz1) == len(xyz2)
123 | L = len(xyz1)
124 | assert L > 2
125 |
126 | # move two both sets of points to their
127 | # centers of masses (COM)
128 | COM1 = np.sum(xyz1, axis=0) / float(L)
129 | COM2 = np.sum(xyz2, axis=0) / float(L)
130 | xyz1 -= COM1
131 | xyz2 -= COM2
132 |
133 | # Initial residual, see Kabsch.
134 | E0 = np.sum( np.sum(xyz1*xyz1,axis=0),axis=0) + np.sum( np.sum(xyz2*xyz2,axis=0),axis=0 )
135 |
136 | # SVD of the covariance matrix
137 | V, S, Wt = np.linalg.svd( np.dot( np.transpose(xyz2), xyz1))
138 |
139 | # check parity of the transformation
140 | reflect = float(str(float(np.linalg.det(V) * np.linalg.det(Wt))))
141 | if reflect == -1.0:
142 | S[-1] = -S[-1]
143 | V[:,-1] = -V[:,-1]
144 |
145 | RMSD = E0 - (2.0 * sum(S))
146 | RMSD = np.sqrt(abs(RMSD / L))
147 |
148 | # U is simply V*Wt
149 | U = np.dot(V, Wt)
150 |
151 | # translation vector
152 | t = COM1 - COM2
153 |
154 | return RMSD
155 | # return RMSD, t, U
156 |
157 |
--------------------------------------------------------------------------------
/utils/dunbrack_rotlib.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | import pandas as pd
3 | import os
4 |
5 | comparisons = {'<=': '__le__',
6 | '<': '__lt__',
7 | '>': '__gt__',
8 | '>=': '__ge__',
9 | '=': '__eq__'}
10 |
11 | chi_psi_SS = {"H": {"phi": (-72.0, -50.0),
12 | "psi": (-50.0, -30.0)},
13 | "E": {"phi": (-161.0, -89.0),
14 | "psi": (109.0, 151.0)},
15 | "L": {"phi": (),
16 | "psi": ()},
17 | "-": {"phi": (-180.0, 180.0),
18 | "psi": (-180.0, 180.0)}}
19 |
20 |
21 | def load_rotamer_df(dunbrack_database):
22 | header = ["restype", "phi", "psi", "N", "r1", "r2", "r3", "r4", "prob", "chi1", "chi2", "chi3", "chi4", "std1", "std2", "std3", "std4"]
23 | rotlib = pd.read_csv(dunbrack_database, sep="\s+", names=header)
24 | for n in range(1, 5):
25 | rotlib[f"chi{n}_min"] = rotlib[f"chi{n}"]-rotlib[f"std{n}"]
26 | rotlib[f"chi{n}_max"] = rotlib[f"chi{n}"]+rotlib[f"std{n}"]
27 | return rotlib
28 |
29 |
30 | def filter_rotlib(scores, filters):
31 | filtered_scores = scores.copy()
32 |
33 | for s in filters.keys():
34 | _fltrs = []
35 | if isinstance(filters[s][0], list):
36 | _fltrs = filters[s]
37 | else:
38 | _fltrs.append(filters[s])
39 | for fltr in _fltrs:
40 | if fltr is not None and s in scores.keys():
41 | val = fltr[0]
42 | sign = comparisons[fltr[1]]
43 | filtered_scores =\
44 | filtered_scores.loc[(filtered_scores[s].__getattribute__(sign)(val))]
45 | return filtered_scores
46 |
47 |
48 | def find_good_rotamers(rotlib, restype, cumulative_prob=1.0, secstruct=None, phi=None, psi=None, keep_only_best=False):
49 | """
50 | Arugments:
51 | rotlib (pandas.DataFrame)
52 | restype (str) :: name3 of an amino acid in the rotamer library
53 | cumulative_prob (float) :: cumulative probability up to which rotamers are returned
54 | secstruct (str, ('H', 'E')) :: secondary structure type for which rotamers are searched.
55 | phi (tuple, (float, float)) :: min and max phi value for defining a subset of the library
56 | psi (tuple, (float, float)) :: min and max psi value for defining a subset of the library
57 | keep_only_best (bool) :: only the highest probability rotamer is returned for each phi/psi bin
58 | """
59 | assert isinstance(phi, (tuple, type(None)))
60 | assert isinstance(psi, (tuple, type(None)))
61 | assert secstruct in ("H", "E", "-", None), "Not implemented for other secondary structures yet"
62 | # assert restype not in ["ALA", "GLY"], "No rotamer library for ALA and GLY"
63 | assert not all([x is None for x in [secstruct, phi]]), "Must provide either secstruct letter OR phi and psi values"
64 | assert not all([x is None for x in [secstruct, psi]]), "Must provide either secstruct letter OR phi and psi values"
65 |
66 | if secstruct is not None:
67 | phi_limits = chi_psi_SS[secstruct]["phi"]
68 | psi_limits = chi_psi_SS[secstruct]["psi"]
69 | elif phi is not None and psi is not None:
70 | phi_limits = phi
71 | psi_limits = psi
72 | else:
73 | print("Both phi and psi need to be defined")
74 | return None
75 |
76 | filters = {'restype': [restype, '='],
77 | 'phi': [[phi_limits[0], '>='], [phi_limits[1], '<=']],
78 | 'psi': [[psi_limits[0], '>='], [psi_limits[1], '<=']]}
79 |
80 | SS_rotlib = filter_rotlib(rotlib, filters)
81 | phi_psi_bins = list(set([(row.phi, row.psi) for idx, row in SS_rotlib.iterrows()]))
82 | df = pd.DataFrame()
83 | for phi_psi_bin in phi_psi_bins:
84 | _df = SS_rotlib.loc[(SS_rotlib["phi"] == phi_psi_bin[0]) & (SS_rotlib["psi"] == phi_psi_bin[1])]
85 | if keep_only_best is True:
86 | _df2 = _df.iloc[0]
87 | else:
88 | if cumulative_prob == 1.0:
89 | _df2 = _df.copy()
90 | else:
91 | _df2 = _df.loc[_df.prob.cumsum() <= cumulative_prob]
92 |
93 | # Also adding the next most probable rotamer that would push the cumulative sum over the cutoff
94 | # This fixes the issue where no rotamers are returned when the cutoff is lower than the prob of the most likely rotamer
95 | if len(_df2) == 0:
96 | idx_to_add = 0
97 | elif len(_df2) < len(_df):
98 | idx_to_add = len(_df2)
99 | else:
100 | idx_to_add = None
101 | if idx_to_add is not None:
102 | _df2 = pd.concat([_df2, _df.iloc[idx_to_add].to_frame().T], ignore_index=True)
103 | df = pd.concat([df, _df2], ignore_index=True)
104 | return df
105 |
106 |
107 | def find_bb_from_inverse(rotlib, chis):
108 | df = pd.DataFrame()
109 | for idx, row in rotlib.iterrows():
110 | _chi_matches = []
111 | for i, ch in enumerate(chis):
112 | _chi_matches.append(row[f"chi{i+1}"]-row[f"std{i+1}"] <= ch <= row[f"chi{i+1}"]+row[f"std{i+1}"])
113 | if all(_chi_matches):
114 | # df = df.append(row)
115 | df = pd.concat([df, row])
116 | return df
117 |
118 |
119 | def find_bb_from_inverse_loc(rotlib, chis):
120 | """
121 | Finds
122 | Arguments:
123 | rotlib (pandas.DataFrame) :: rotamer library. Preferrably for a given amino acid.
124 | chis (list) :: list of chi values
125 | """
126 | assert isinstance(rotlib, pd.DataFrame)
127 | rl = rotlib.copy()
128 | for i, ch in enumerate(chis):
129 | rl = rl.loc[(rl[f"chi{i+1}_min"] <= ch) & (rl[f"chi{i+1}_max"] >= ch)]
130 | return rl
131 |
132 |
133 |
134 |
--------------------------------------------------------------------------------
/examples/P450/inputs/P450_motif.pdb:
--------------------------------------------------------------------------------
1 | ATOM 1 N HIS A 363 4.913 -43.057 15.166 1.00 11.88 A N
2 | ATOM 2 CA HIS A 363 4.586 -41.709 15.616 1.00 11.40 A C
3 | ATOM 3 C HIS A 363 3.735 -40.925 14.609 1.00 12.52 A C
4 | ATOM 4 O HIS A 363 3.491 -39.745 14.804 1.00 12.41 A O
5 | ATOM 5 CB HIS A 363 5.847 -40.946 15.976 1.00 12.94 A C
6 | ATOM 6 CG HIS A 363 6.460 -41.421 17.253 1.00 12.72 A C
7 | ATOM 7 CD2 HIS A 363 7.320 -42.431 17.522 1.00 12.58 A C
8 | ATOM 8 ND1 HIS A 363 6.167 -40.861 18.473 1.00 12.29 A N
9 | ATOM 9 CE1 HIS A 363 6.827 -41.482 19.434 1.00 11.30 A C
10 | ATOM 10 NE2 HIS A 363 7.514 -42.464 18.877 1.00 13.49 A N
11 | ATOM 11 N ARG A 364 3.235 -41.553 13.544 1.00 12.12 A N
12 | ATOM 12 CA ARG A 364 2.319 -40.877 12.622 1.00 11.10 A C
13 | ATOM 13 C ARG A 364 1.202 -40.146 13.356 1.00 12.71 A C
14 | ATOM 14 O ARG A 364 0.640 -40.610 14.344 1.00 12.68 A O
15 | ATOM 15 CB ARG A 364 1.685 -41.899 11.649 1.00 11.87 A C
16 | ATOM 16 CG ARG A 364 0.917 -41.342 10.487 1.00 13.05 A C
17 | ATOM 17 CD ARG A 364 0.408 -42.437 9.552 1.00 14.31 A C
18 | ATOM 18 NE ARG A 364 -0.194 -41.802 8.400 1.00 15.24 A N
19 | ATOM 19 CZ ARG A 364 0.121 -42.006 7.135 1.00 15.87 A C
20 | ATOM 20 NH1 ARG A 364 1.008 -42.903 6.794 1.00 15.19 A N1+
21 | ATOM 21 NH2 ARG A 364 -0.453 -41.271 6.209 1.00 16.74 A N
22 | ATOM 22 N CYS A 365 0.870 -38.980 12.842 1.00 11.45 A N
23 | ATOM 23 CA CYS A 365 -0.133 -38.095 13.440 1.00 10.72 A C
24 | ATOM 24 C CYS A 365 -1.398 -38.810 13.866 1.00 11.45 A C
25 | ATOM 25 O CYS A 365 -2.130 -39.345 13.038 1.00 13.42 A O
26 | ATOM 26 CB CYS A 365 -0.499 -37.044 12.396 1.00 10.95 A C
27 | ATOM 27 SG CYS A 365 -1.632 -35.790 12.940 1.00 12.75 A S
28 | ATOM 28 N ALA A 366 -1.739 -38.680 15.149 1.00 12.58 A N
29 | ATOM 29 CA ALA A 366 -2.981 -39.272 15.628 1.00 14.03 A C
30 | ATOM 30 C ALA A 366 -4.183 -38.592 15.020 1.00 15.69 A C
31 | ATOM 31 O ALA A 366 -5.249 -39.210 14.915 1.00 15.05 A O
32 | ATOM 32 CB ALA A 366 -3.101 -39.141 17.134 1.00 13.26 A C
33 | ATOM 33 N GLY A 367 -4.073 -37.328 14.670 1.00 12.82 A N
34 | ATOM 34 CA GLY A 367 -5.151 -36.485 14.210 1.00 14.57 A C
35 | ATOM 35 C GLY A 367 -5.299 -36.322 12.702 1.00 13.52 A C
36 | ATOM 36 O GLY A 367 -5.966 -35.395 12.227 1.00 13.45 A O
37 | ATOM 37 N GLU A 368 -4.747 -37.251 11.929 1.00 14.10 A N
38 | ATOM 38 CA GLU A 368 -4.816 -37.140 10.474 1.00 12.64 A C
39 | ATOM 39 C GLU A 368 -6.252 -37.199 9.966 1.00 15.94 A C
40 | ATOM 40 O GLU A 368 -6.635 -36.418 9.083 1.00 15.41 A O
41 | ATOM 41 CB GLU A 368 -3.961 -38.215 9.828 1.00 15.10 A C
42 | ATOM 42 CG GLU A 368 -3.784 -38.032 8.359 1.00 15.16 A C
43 | ATOM 43 CD GLU A 368 -2.640 -38.795 7.750 1.00 15.22 A C
44 | ATOM 44 OE1 GLU A 368 -2.460 -39.970 8.159 1.00 15.91 A O
45 | ATOM 45 OE2 GLU A 368 -1.967 -38.239 6.860 1.00 16.51 A O1-
46 | ATOM 46 N TRP A 369 -7.044 -38.135 10.472 1.00 16.59 A N
47 | ATOM 47 CA TRP A 369 -8.454 -38.191 10.058 1.00 16.63 A C
48 | ATOM 48 C TRP A 369 -9.248 -36.984 10.533 1.00 15.46 A C
49 | ATOM 49 O TRP A 369 -10.033 -36.427 9.756 1.00 18.41 A O
50 | ATOM 50 CB TRP A 369 -9.036 -39.532 10.485 1.00 18.58 A C
51 | ATOM 51 CG TRP A 369 -8.425 -40.568 9.565 1.00 37.49 A C
52 | ATOM 52 CD1 TRP A 369 -7.501 -41.507 9.903 1.00 40.53 A C
53 | ATOM 53 CD2 TRP A 369 -8.593 -40.683 8.131 1.00 40.81 A C
54 | ATOM 54 CE2 TRP A 369 -7.773 -41.750 7.700 1.00 42.82 A C
55 | ATOM 55 CE3 TRP A 369 -9.366 -40.004 7.180 1.00 41.25 A C
56 | ATOM 56 NE1 TRP A 369 -7.152 -42.253 8.808 1.00 38.35 A N
57 | ATOM 57 CZ2 TRP A 369 -7.710 -42.161 6.367 1.00 49.47 A C
58 | ATOM 58 CZ3 TRP A 369 -9.304 -40.417 5.854 1.00 46.90 A C
59 | ATOM 59 CH2 TRP A 369 -8.470 -41.477 5.461 1.00 41.19 A C
60 | ATOM 60 N VAL A 370 -8.981 -36.486 11.744 1.00 15.47 A N
61 | ATOM 61 CA VAL A 370 -9.591 -35.231 12.183 1.00 16.31 A C
62 | ATOM 62 C VAL A 370 -9.294 -34.108 11.199 1.00 16.02 A C
63 | ATOM 63 O VAL A 370 -10.169 -33.321 10.823 1.00 16.65 A O
64 | ATOM 64 CB VAL A 370 -9.137 -34.851 13.606 1.00 15.82 A C
65 | ATOM 65 CG1 VAL A 370 -9.382 -33.345 13.933 1.00 17.55 A C
66 | ATOM 66 CG2 VAL A 370 -9.801 -35.759 14.636 1.00 17.86 A C
67 | ATOM 67 N THR A 371 -8.020 -33.997 10.805 1.00 13.79 A N
68 | ATOM 68 CA THR A 371 -7.593 -32.922 9.932 1.00 13.46 A C
69 | ATOM 69 C THR A 371 -8.322 -32.993 8.592 1.00 12.99 A C
70 | ATOM 70 O THR A 371 -8.839 -31.976 8.099 1.00 13.80 A O
71 | ATOM 71 CB THR A 371 -6.089 -32.985 9.710 1.00 13.13 A C
72 | ATOM 72 CG2 THR A 371 -5.608 -31.874 8.840 1.00 14.33 A C
73 | ATOM 73 OG1 THR A 371 -5.358 -32.902 10.943 1.00 13.94 A O
74 | TER
75 | END
76 |
--------------------------------------------------------------------------------
/utils/align_pdbs.py:
--------------------------------------------------------------------------------
1 | import os,sys
2 | sys.path.append(os.path.dirname(os.path.realpath(__file__)))
3 | import kabsch_align
4 | import util
5 | import numpy as np
6 | import pyrosetta as pyr
7 | import pyrosetta.rosetta
8 |
9 |
10 | def find_atom_idx(atom, mapping):
11 | for i,A in enumerate(mapping):
12 | try:
13 | if A.strip() == atom:
14 | return i
15 | except AttributeError:
16 | print('This is atom ',A)
17 |
18 | raise KeyError(f'Could not find atom {atom} in mapping {mapping}')
19 |
20 |
21 | def align_pose_to_residue(ref_residue, mobile_pose, ref_atoms):
22 | xyz1, parsed1 = get_xyz_stack_residue(ref_residue, ref_atoms["atoms1"])
23 | xyz2, parsed2 = get_xyz_stack_pose(mobile_pose, ref_atoms["atoms2"])
24 |
25 | # run Kabsch to get rotation matrix for atoms and rmsd
26 | # aligns xyz2 onto xyz1
27 | rmsd, _, R = kabsch_align.np_kabsch(xyz1, xyz2)
28 | print('RMSD between atoms: ',rmsd)
29 |
30 | # (1) now translate both proteins such that centroid(xyz1/xyz2) is at origin
31 | # (2) rorate xyz2 onto xyz1 with R
32 | # (3) write pdbs into outdir
33 |
34 | def centroid(X):
35 | # return the mean X,Y,Z down the atoms
36 | return np.mean(X, axis=0, keepdims=True)
37 |
38 | # centroid of just the points being aligned
39 | centroid1 = centroid(xyz1)
40 | centroid2 = centroid(xyz2)
41 |
42 | # (1)
43 | #xyz_protein1 = np.copy(parsed1['xyz']) - centroid1
44 | xyz_protein2 = np.copy(parsed2) - centroid2
45 |
46 | # (2)
47 | xyz_protein2 = xyz_protein2 @ R
48 |
49 | # Translate protein 2 to where it aligns with original protein 1
50 | xyz_protein2 += centroid1
51 |
52 | out_pose = mobile_pose.clone()
53 | for resno, res_coords in enumerate(xyz_protein2):
54 | for i, ac in enumerate(res_coords):
55 | if np.isnan(ac[0]):
56 | break
57 | out_pose.residue(resno+1).set_xyz(i+1, pyrosetta.rosetta.numeric.xyzVector_double_t(*ac))
58 | continue
59 | return out_pose
60 |
61 |
62 | def align_residue_to_residue(ref_residue, mobile_residue, ref_atoms):
63 | xyz1, parsed1 = get_xyz_stack_residue(ref_residue, ref_atoms["atoms1"])
64 | xyz2, parsed2 = get_xyz_stack_residue(mobile_residue, ref_atoms["atoms2"])
65 |
66 | # run Kabsch to get rotation matrix for atoms and rmsd
67 | # aligns xyz2 onto xyz1
68 | rmsd, _, R = kabsch_align.np_kabsch(xyz1, xyz2)
69 | if rmsd > 0.1:
70 | print('RMSD between atoms: ',rmsd)
71 |
72 | # (1) now translate both proteins such that centroid(xyz1/xyz2) is at origin
73 | # (2) rorate xyz2 onto xyz1 with R
74 | # (3) write pdbs into outdir
75 |
76 | def centroid(X):
77 | # return the mean X,Y,Z down the atoms
78 | return np.mean(X, axis=0, keepdims=True)
79 |
80 | # centroid of just the points being aligned
81 | centroid1 = centroid(xyz1)
82 | centroid2 = centroid(xyz2)
83 |
84 | # (1)
85 | #xyz_protein1 = np.copy(parsed1['xyz']) - centroid1
86 | xyz_protein2 = np.copy(parsed2) - centroid2
87 |
88 | # (2)
89 | xyz_protein2 = xyz_protein2 @ R
90 |
91 | # Translate protein 2 to where it aligns with original protein 1
92 | xyz_protein2 += centroid1
93 |
94 | out_residue = mobile_residue.clone()
95 |
96 | for i, ac in enumerate(xyz_protein2[0]):
97 | if np.isnan(ac[0]):
98 | break
99 | out_residue.set_xyz(i+1, pyrosetta.rosetta.numeric.xyzVector_double_t(*ac))
100 | continue
101 | return out_residue
102 |
103 |
104 | def get_xyz_stack_residue(residue, atoms_list):
105 | """
106 | Extracts the xyz crds corresponding to every atom in atoms_list
107 | atoms_list format: [(resno, atomname), (resno, atomname), ...]
108 | """
109 | if residue.is_ligand() or residue.is_virtual_residue():
110 | return None, None
111 |
112 | xyz_all = parse_residue_coords(residue)
113 | seq = [util.alpha_1.index(residue.name1())]
114 | xyz_out = []
115 |
116 | # for each atom, get residue index and atom index
117 | # store crds
118 | for atom in atoms_list:
119 | # get index of residue and its Heavy atom mapping
120 | AA_int = seq[0]
121 |
122 | if residue.is_lower_terminus():
123 | AA_long_map = util.aa2longH_Nterm[AA_int]
124 | elif residue.is_upper_terminus():
125 | AA_long_map = util.aa2longH_Cterm[AA_int]
126 | else:
127 | AA_long_map = util.aa2longH[AA_int]
128 |
129 | # get index of atom in residue
130 | atom_idx0 = find_atom_idx(atom.strip(), AA_long_map)
131 |
132 | # crds of this atom
133 | xyz_atom = xyz_all[0, atom_idx0, :]
134 |
135 | xyz_out.append(xyz_atom)
136 |
137 | return np.array(xyz_out), xyz_all
138 |
139 |
140 | def get_xyz_stack_pose(pose, atoms_list):
141 | """
142 | Extracts the xyz crds corresponding to every atom in atoms_list
143 | atoms_list format: [(resno, atomname), (resno, atomname), ...]
144 | """
145 |
146 | xyz_all = parse_pose_coords(pose)
147 | seq = [util.alpha_1.index(r.name1()) for r in pose.residues if not r.is_ligand() and not r.is_virtual_residue()]
148 | xyz_out = []
149 |
150 | # for each atom, get residue index and atom index
151 | # store crds
152 | for (resn, atom) in atoms_list:
153 | # get index of residue and its Heavy atom mapping
154 | AA_int = seq[resn-1]
155 | if pose.residue(resn).is_lower_terminus():
156 | AA_long_map = util.aa2longH_Nterm[AA_int]
157 | elif pose.residue(resn).is_upper_terminus():
158 | AA_long_map = util.aa2longH_Cterm[AA_int]
159 | else:
160 | AA_long_map = util.aa2longH[AA_int]
161 |
162 | # get index of atom in residue
163 | atom_idx0 = find_atom_idx(atom.strip(), AA_long_map)
164 |
165 | # crds of this atom
166 | xyz_atom = xyz_all[resn-1, atom_idx0, :]
167 |
168 | xyz_out.append(xyz_atom)
169 |
170 | return np.array(xyz_out), xyz_all
171 |
172 |
173 | def parse_pose_coords(pose):
174 | res = [r.seqpos() for r in pose.residues if not r.is_ligand() and not r.is_virtual_residue()]
175 | xyz = np.full((len(res), 26, 3), np.nan, dtype=np.float32)
176 | for r in pose.residues:
177 | if r.is_ligand() or r.is_virtual_residue():
178 | continue
179 | # rc = np.ndarray((res.natoms(), 3), dtype=np.float32)
180 | for n in range(r.natoms()):
181 | try:
182 | xyz[r.seqpos()-1][n] = r.xyz(n+1)
183 | except IndexError:
184 | print(r.name())
185 | print(r.seqpos())
186 | print(r.natoms())
187 | sys.exit(1)
188 | return xyz
189 |
190 |
191 | def parse_residue_coords(residue):
192 | xyz = np.full((1, 26, 3), np.nan, dtype=np.float32)
193 | if residue.is_ligand() or residue.is_virtual_residue():
194 | return None
195 | # rc = np.ndarray((res.natoms(), 3), dtype=np.float32)
196 | for n in range(residue.natoms()):
197 | xyz[0][n] = residue.xyz(n+1)
198 | return xyz
199 |
200 |
201 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # InvrotZyme
2 |
3 | Script for building inverse rotamer assemblies out of a Rosetta matcher/enzdes constraint file.
4 |
5 | This script will place sidechains according to the constraint file definitions, sample backbone positions, and optionally grow out extended backbone stubs (idealized helix or strand).
6 | This script will perform an exhaustive analysis of all allowed rotamers and CST samplings.
7 |
8 | You can also provide a motif PDB that will serve as a host for a particular constrained catalytic residue. That residue must exist in the PDB file, and only the rotamer will then be used for that residue.
9 |
10 | The purpose of this tool is to find combinations of inverse rotamers that can be placed (on small extended backbones) without clashes. The outputs of this script can subsequently be used as inputs for RFdiffusion All-Atom to create protein backbones that host these active sites.
11 |
12 |
13 |
14 | ## Examples
15 |
16 | A few usage examples are provided in `examples/`
17 |
18 | **Kemp eliminase example:**
19 | Places three catalytic residues around a benzisoxazole substrate. A HIS-GLU/ASP dyad on one side, and a SER/THR/TYR/GLN/ASN H-bond donor on the other side.
20 | `cd examples/Kemp_eliminase ; python ../../invrotzyme.py --cstfile inputs/BIO_His_ED_oxy_nosample.cst --params inputs/BIO.params --dunbrack_prob 0.6 --frac_random_rotamers_per_cst 0.5 0.5 0.5 0.5 --secstruct_per_cst H H E --prefix outputs/ --suffix HHE`
21 |
22 |
23 | **P450 example:**
24 | Places a custom Heme ligand in complex with a substrate against a CYS-containing motif from a cytochrome P450 enzyme.
25 | `cd examples/P450 ; python ../../invrotzyme.py --cstfile inputs/HBA_CYS_P450_nosample.cst --params inputs/HBA_unique.params --motif_for_cst 1:3:inputs/P450_motif.pdb --frac_random_rotamers 0.1 --prefix outputs/`
26 |
27 |
28 | ## Usage
29 |
30 | First prepare a matcher/enzdes Constraint file according to the standard format outlined in Rosetta documentation:
31 | https://docs.rosettacommons.org/docs/latest/rosetta_basics/file_types/match-cstfile-format
32 |
33 | This script requires all six degrees of freedom to be defined, so you msut provide distance, 2 angles, and 3 torsions for each interaction.
34 |
35 | You can then run the script using many of the options below, perhaps taking inspiration from the provided examples.
36 |
37 | ```
38 | options:
39 | -h, --help show this help message and exit
40 | --cstfile CSTFILE CST file used for matching. Keep sampling to minimum to avoid combinatorial explosion.
41 | --params PARAMS [PARAMS ...]
42 | params files used by ligands and residues
43 | --keep_his_tautomer KEEP_HIS_TAUTOMER
44 | Per cst, should a specific HIS tautomer (HIS or HIS_D) be used. Keeps only one the requested HIS tautomers. Format: 'cst_no:HIS/HIS_D,..'
45 | --dunbrack_prob DUNBRACK_PROB
46 | Cumulative Dunbrack probability of used rotamers for any residue. As used by the -packing:dunbrack_prob_... flag in Rosetta.
47 | --dunbrack_prob_per_cst DUNBRACK_PROB_PER_CST [DUNBRACK_PROB_PER_CST ...]
48 | Cumulative Dunbrack probability of used rotamers for each CST residue.
49 | --N_len N_LEN Number of residues added to the stub N-term
50 | --C_len C_LEN Number of residues added to the stub C-term
51 | --N_len_per_cst N_LEN_PER_CST [N_LEN_PER_CST ...]
52 | Number of residues added to the stub N-term, per CST
53 | --C_len_per_cst C_LEN_PER_CST [C_LEN_PER_CST ...]
54 | Number of residues added to the stub C-term, per CST
55 | --prune_ligand_rotamers PRUNE_LIGAND_ROTAMERS
56 | Prunes the set of used ligand rotamers based on clashcheck, AND rmsd similarity cutoff.
57 | --max_random_rotamers MAX_RANDOM_ROTAMERS
58 | Number of random rotamers picked for each residue for the sampling. Reasonable number would be below 20 for quick sampling.
59 | --max_random_rotamers_per_cst MAX_RANDOM_ROTAMERS_PER_CST [MAX_RANDOM_ROTAMERS_PER_CST ...]
60 | Number of random rotamers picked for each CST block for the sampling. First value is for the ligand.
61 | --frac_random_rotamers FRAC_RANDOM_ROTAMERS
62 | Fraction of rotamers that are randomly picked for each residue for the sampling.
63 | --frac_random_rotamers_per_cst FRAC_RANDOM_ROTAMERS_PER_CST [FRAC_RANDOM_ROTAMERS_PER_CST ...]
64 | Fraction of rotamers that are randomly picked for each CST block for the sampling. First value is for the ligand.
65 | --secstruct SECSTRUCT
66 | What secondary structure stub should be generated for each residue.
67 | --secstruct_per_cst SECSTRUCT_PER_CST [SECSTRUCT_PER_CST ...]
68 | Per CST, what secondary structure stub should be generated for reaach residue.
69 | --motif_for_cst MOTIF_FOR_CST [MOTIF_FOR_CST ...]
70 | Per CST, an external motif that should be used, instead of inverse rotamers. Only works for the first CST right now.
71 | Format: cst_no:resno_in_motif:filepath ...
72 | --use_best_rotamer_cstids USE_BEST_ROTAMER_CSTIDS [USE_BEST_ROTAMER_CSTIDS ...]
73 | CST ID's that should only use the best rotamer from each secondary structure bin. Numbering starts from 1.
74 | --extra_chi EXTRA_CHI
75 | Enables extra CHI sampling on a given level for all CST's. Input format: chino:level,chino2:level2
76 | --extra_chi_per_cst EXTRA_CHI_PER_CST [EXTRA_CHI_PER_CST ...]
77 | Enables extra CHI sampling on a given level for specific CST's. Input format: CSTNO1-chino:level,chino2:level2 CSTNO2-chino:level,chino2:level2
78 | Sampling levels:
79 | 0 Default original dihedral only; same as using no flag at all
80 | 1 +/- one standard deviation (sd); 3 samples
81 | 2 +/- 0.5 sd; 3 samples
82 | 3 +/- 1 & 2 sd; 5 samples
83 | 4 +/- 0.5 & 1 sd; 5 samples
84 | 5 +/- 0.5, 1, 1.5 & 2 sd; 9 samples
85 | 6 +/- 0.33, 0.67, 1 sd; 7 samples
86 | 7 +/- 0.25, 0.5, 0.75, 1, 1.25 & 1.5 sd; 13 samples.
87 | --suffix SUFFIX Suffix to be added to the end of output PDB files
88 | --prefix PREFIX Prefix to be added to the beginning of output PDB files
89 | --tip_atom Inverse rotamers will be pre-selected based on whether the tip atoms are placed geometrically differently. Rotamer diversity is ignored.
90 | --debug Debug mode. Printing more stuff out and running single-threaded
91 | ```
92 |
93 | The script runs by default on multiple CPU cores using python multiprocessing. When submitted as a Slurm job, it will adjust the number of cores based on the environment variable `SLURM_CPUS_ON_NODE`.
94 |
95 |
96 | ### Best practices
97 |
98 | Keep conformational sampling levels in the CST file to a minimum to avoid combinatorial explosion. Only sample torsions that are expectes to lead different valid assemblies.
99 |
100 | It's possible to limit the sampling by randomly picking rotamers for each residue, and limiting how the sidechain placements are sampled in the CST file.
101 | It's possible to control the length of the generated idealized backbone stub (from zero to ...).
102 | It's possible control most of the parameters separately for each constraint block.
103 | With using the `--tip_atom` argument it is possible to skip the inverse rotamer clash analysis, and only output assemblies based on their unique placement of catalytic atoms.
104 |
105 | The output PDB files of this script will also contain the `REMARK 666 ...` lines which are required by the Rosetta enzdes constraint parser. As such, the outputs are suitable for building more complex enzyme design pipelines.
106 | For example, the published all-atom diffusion pipeline (https://github.com/ikalvet/heme_binder_diffusion) is directly compatible with the outputs of this script.
107 |
108 |
109 | ### Requirements
110 |
111 | Python packages that are required:
112 | ```
113 | pyrosetta
114 | numpy
115 | pandas
116 | scipy
117 | ```
118 |
--------------------------------------------------------------------------------
/examples/P450/inputs/HBA_unique.params:
--------------------------------------------------------------------------------
1 | NAME HBA
2 | IO_STRING HBA Z
3 | TYPE LIGAND
4 | AA UNK
5 | ATOM FE1 Fe3p X 3.00
6 | ATOM N2 Npro X -0.37
7 | ATOM C33 aroC X -0.11
8 | ATOM C32 aroC X -0.11
9 | ATOM C34 CH3 X -0.27
10 | ATOM H8 Hapo X 0.10
11 | ATOM H9 Hapo X 0.10
12 | ATOM H10 Hapo X 0.10
13 | ATOM C2 aroC X -0.11
14 | ATOM C3 CH2 X -0.18
15 | ATOM C4 CH2 X -0.18
16 | ATOM C5 COO X 0.62
17 | ATOM O1 OOC X -0.76
18 | ATOM O3 OOC X -0.76
19 | ATOM H27 Hapo X 0.10
20 | ATOM H28 Hapo X 0.10
21 | ATOM H21 Hapo X 0.10
22 | ATOM H25 Hapo X 0.10
23 | ATOM C1 aroC X -0.11
24 | ATOM C28 aroC X -0.11
25 | ATOM C6 aroC X -0.11
26 | ATOM C7 aroC X -0.11
27 | ATOM C8 CH2 X -0.18
28 | ATOM C9 CH2 X -0.18
29 | ATOM C10 COO X 0.62
30 | ATOM O2 OOC X -0.76
31 | ATOM O4 OOC X -0.76
32 | ATOM H29 Hapo X 0.10
33 | ATOM H30 Hapo X 0.10
34 | ATOM H26 Hapo X 0.10
35 | ATOM H3 Hapo X 0.10
36 | ATOM C11 aroC X -0.11
37 | ATOM C12 aroC X -0.11
38 | ATOM N1 Npro X -0.37
39 | ATOM C31 aroC X -0.11
40 | ATOM C14 aroC X -0.11
41 | ATOM N4 Npro X -0.37
42 | ATOM C19 aroC X -0.11
43 | ATOM C30 aroC X -0.11
44 | ATOM C21 aroC X -0.11
45 | ATOM N3 Npro X -0.37
46 | ATOM C26 aroC X -0.11
47 | ATOM C29 aroC X -0.11
48 | ATOM H20 Haro X 0.12
49 | ATOM C25 aroC X -0.11
50 | ATOM C27 CH3 X -0.27
51 | ATOM H13 Hapo X 0.10
52 | ATOM H12 Hapo X 0.10
53 | ATOM H11 Hapo X 0.10
54 | ATOM C22 aroC X -0.11
55 | ATOM C23 aroC X -0.11
56 | ATOM C24 aroC X -0.11
57 | ATOM H5 Haro X 0.12
58 | ATOM H4 Haro X 0.12
59 | ATOM H1 Haro X 0.12
60 | ATOM H24 Haro X 0.12
61 | ATOM C18 aroC X -0.11
62 | ATOM C15 aroC X -0.11
63 | ATOM C16 aroC X -0.11
64 | ATOM C17 aroC X -0.11
65 | ATOM H6 Haro X 0.12
66 | ATOM H7 Haro X 0.12
67 | ATOM H2 Haro X 0.12
68 | ATOM C20 CH3 X -0.27
69 | ATOM H14 Hapo X 0.10
70 | ATOM H15 Hapo X 0.10
71 | ATOM H16 Hapo X 0.10
72 | ATOM H23 Haro X 0.12
73 | ATOM C13 CH3 X -0.27
74 | ATOM H19 Hapo X 0.10
75 | ATOM H18 Hapo X 0.10
76 | ATOM H17 Hapo X 0.10
77 | ATOM H22 Haro X 0.12
78 | ATOM O5 OH X -0.66
79 | ATOM C35 CH2 X -0.18
80 | ATOM O6 OH X -0.66
81 | ATOM C36 aroC X -0.11
82 | ATOM C38 aroC X -0.11
83 | ATOM C40 aroC X -0.11
84 | ATOM C41 aroC X -0.11
85 | ATOM C39 aroC X -0.11
86 | ATOM C37 aroC X -0.11
87 | ATOM H34 Haro X 0.12
88 | ATOM H37 Haro X 0.12
89 | ATOM C42 aroC X -0.11
90 | ATOM C44 aroC X -0.11
91 | ATOM C46 aroC X -0.11
92 | ATOM C47 aroC X -0.11
93 | ATOM C45 aroC X -0.11
94 | ATOM C43 aroC X -0.11
95 | ATOM H38 Haro X 0.12
96 | ATOM H40 Haro X 0.12
97 | ATOM H42 Haro X 0.12
98 | ATOM H41 Haro X 0.12
99 | ATOM H39 Haro X 0.12
100 | ATOM H36 Haro X 0.12
101 | ATOM H35 Haro X 0.12
102 | ATOM H33 Hapo X 0.10
103 | ATOM H32 Hapo X 0.10
104 | ATOM H31 Hpol X 0.43
105 | BOND_TYPE O1 C5 4
106 | BOND_TYPE O3 C5 4
107 | BOND_TYPE C5 C4 1
108 | BOND_TYPE H8 C34 1
109 | BOND_TYPE H21 C3 1
110 | BOND_TYPE C4 C3 1
111 | BOND_TYPE C4 H27 1
112 | BOND_TYPE C4 H28 1
113 | BOND_TYPE H9 C34 1
114 | BOND_TYPE C34 H10 1
115 | BOND_TYPE C34 C32 1
116 | BOND_TYPE C3 H25 1
117 | BOND_TYPE C3 C2 1
118 | BOND_TYPE O2 C10 4
119 | BOND_TYPE C32 C2 4
120 | BOND_TYPE C32 C33 4
121 | BOND_TYPE C2 C1 4
122 | BOND_TYPE O4 C10 4
123 | BOND_TYPE C10 C9 1
124 | BOND_TYPE H20 C29 1
125 | BOND_TYPE H22 C28 1
126 | BOND_TYPE C33 C29 2
127 | BOND_TYPE C33 N2 4
128 | BOND_TYPE C1 C28 2
129 | BOND_TYPE C1 N2 4
130 | BOND_TYPE H26 C8 1
131 | BOND_TYPE C29 C26 1
132 | BOND_TYPE H13 C27 1
133 | BOND_TYPE C28 C6 1
134 | BOND_TYPE H12 C27 1
135 | BOND_TYPE C9 H29 1
136 | BOND_TYPE C9 C8 1
137 | BOND_TYPE C9 H30 1
138 | BOND_TYPE N2 FE1 1
139 | BOND_TYPE C27 H11 1
140 | BOND_TYPE C27 C25 1
141 | BOND_TYPE C8 H3 1
142 | BOND_TYPE C8 C7 1
143 | BOND_TYPE C26 C25 1
144 | BOND_TYPE C26 N3 2
145 | BOND_TYPE C6 C7 1
146 | BOND_TYPE C6 N1 2
147 | BOND_TYPE H33 C35 1
148 | BOND_TYPE C25 C22 2
149 | BOND_TYPE C7 C11 2
150 | BOND_TYPE C35 H32 1
151 | BOND_TYPE C35 O6 1
152 | BOND_TYPE H31 O5 1
153 | BOND_TYPE N3 FE1 1
154 | BOND_TYPE N3 C21 1
155 | BOND_TYPE N1 FE1 1
156 | BOND_TYPE N1 C12 1
157 | BOND_TYPE FE1 O5 1
158 | BOND_TYPE FE1 N4 1
159 | BOND_TYPE C22 C21 1
160 | BOND_TYPE C22 C23 1
161 | BOND_TYPE C11 C12 1
162 | BOND_TYPE C11 C13 1
163 | BOND_TYPE O6 C36 1
164 | BOND_TYPE H1 C23 1
165 | BOND_TYPE C21 C30 2
166 | BOND_TYPE C12 C31 2
167 | BOND_TYPE H19 C13 1
168 | BOND_TYPE C23 C24 2
169 | BOND_TYPE C13 H18 1
170 | BOND_TYPE C13 H17 1
171 | BOND_TYPE H35 C38 1
172 | BOND_TYPE N4 C19 4
173 | BOND_TYPE N4 C14 4
174 | BOND_TYPE C36 C38 4
175 | BOND_TYPE C36 C37 4
176 | BOND_TYPE C30 C19 1
177 | BOND_TYPE C30 H24 1
178 | BOND_TYPE C31 C14 1
179 | BOND_TYPE C31 H23 1
180 | BOND_TYPE C38 C40 4
181 | BOND_TYPE C24 H5 1
182 | BOND_TYPE C24 H4 1
183 | BOND_TYPE C19 C18 4
184 | BOND_TYPE C14 C15 4
185 | BOND_TYPE H34 C37 1
186 | BOND_TYPE C37 C39 4
187 | BOND_TYPE C40 H36 1
188 | BOND_TYPE C40 C41 4
189 | BOND_TYPE C18 C15 4
190 | BOND_TYPE C18 C20 1
191 | BOND_TYPE C15 C16 1
192 | BOND_TYPE C39 C41 4
193 | BOND_TYPE C39 H37 1
194 | BOND_TYPE C41 C42 1
195 | BOND_TYPE H14 C20 1
196 | BOND_TYPE H2 C16 1
197 | BOND_TYPE C16 C17 2
198 | BOND_TYPE C20 H15 1
199 | BOND_TYPE C20 H16 1
200 | BOND_TYPE H39 C44 1
201 | BOND_TYPE C42 C44 4
202 | BOND_TYPE C42 C43 4
203 | BOND_TYPE C17 H6 1
204 | BOND_TYPE C17 H7 1
205 | BOND_TYPE H38 C43 1
206 | BOND_TYPE C44 C46 4
207 | BOND_TYPE C43 C45 4
208 | BOND_TYPE C46 H41 1
209 | BOND_TYPE C46 C47 4
210 | BOND_TYPE C45 C47 4
211 | BOND_TYPE C45 H40 1
212 | BOND_TYPE C47 H42 1
213 | BOND_TYPE O5 C35 1
214 | CHI 1 C3 C4 C5 O1
215 | CHI 2 C2 C3 C4 C5
216 | CHI 3 C32 C2 C3 C4
217 | CHI 4 C8 C9 C10 O2
218 | CHI 5 C7 C8 C9 C10
219 | CHI 6 C6 C7 C8 C9
220 | CHI 7 O5 C35 O6 C36
221 | CHI 8 N2 FE1 O5 C35
222 | CHI 9 C25 C22 C23 C24
223 | CHI 10 C35 O6 C36 C38
224 | CHI 11 C18 C15 C16 C17
225 | CHI 12 C40 C41 C42 C44
226 | CHI 13 FE1 O5 C35 O6
227 | NBR_ATOM O5
228 | NBR_RADIUS 13.456399
229 | ICOOR_INTERNAL FE1 0.000000 0.000000 0.000000 FE1 N2 C33
230 | ICOOR_INTERNAL N2 0.000000 180.000000 2.018878 FE1 N2 C33
231 | ICOOR_INTERNAL C33 0.000001 53.581195 1.373135 N2 FE1 C33
232 | ICOOR_INTERNAL C32 -178.502193 69.602248 1.452331 C33 N2 FE1
233 | ICOOR_INTERNAL C34 -179.986284 55.631927 1.496810 C32 C33 N2
234 | ICOOR_INTERNAL H8 179.523543 68.748802 1.092786 C34 C32 C33
235 | ICOOR_INTERNAL H9 -120.214586 68.550017 1.096877 C34 C32 H8
236 | ICOOR_INTERNAL H10 -119.584492 68.563454 1.096894 C34 C32 H9
237 | ICOOR_INTERNAL C2 -179.832410 73.531587 1.369725 C32 C33 C34
238 | ICOOR_INTERNAL C3 179.828563 50.915427 1.499732 C2 C32 C33
239 | ICOOR_INTERNAL C4 -102.596835 67.625659 1.538258 C3 C2 C32
240 | ICOOR_INTERNAL C5 146.999694 65.222093 1.554685 C4 C3 C2
241 | ICOOR_INTERNAL O1 -31.997497 64.516933 1.262209 C5 C4 C3
242 | ICOOR_INTERNAL O3 -178.654391 62.681761 1.262040 C5 C4 O1
243 | ICOOR_INTERNAL H27 -122.780968 70.057389 1.096230 C4 C3 C5
244 | ICOOR_INTERNAL H28 -115.836979 70.552845 1.100306 C4 C3 H27
245 | ICOOR_INTERNAL H21 119.769291 68.760642 1.092767 C3 C2 C4
246 | ICOOR_INTERNAL H25 120.234202 68.569298 1.096805 C3 C2 H21
247 | ICOOR_INTERNAL C1 -179.655565 73.427377 1.452820 C2 C32 C3
248 | ICOOR_INTERNAL C28 178.719920 55.957567 1.384487 C1 C2 C32
249 | ICOOR_INTERNAL C6 -178.930708 55.033211 1.384065 C28 C1 C2
250 | ICOOR_INTERNAL C7 179.988903 55.835882 1.453199 C6 C28 C1
251 | ICOOR_INTERNAL C8 -0.860763 55.699372 1.501415 C7 C6 C28
252 | ICOOR_INTERNAL C9 -78.998795 65.749088 1.546648 C8 C7 C6
253 | ICOOR_INTERNAL C10 141.999037 67.347387 1.551252 C9 C8 C7
254 | ICOOR_INTERNAL O2 -33.999719 62.282349 1.259720 C10 C9 C8
255 | ICOOR_INTERNAL O4 -179.957769 65.001979 1.266190 C10 C9 O2
256 | ICOOR_INTERNAL H29 -118.434948 72.078025 1.099244 C9 C8 C10
257 | ICOOR_INTERNAL H30 -117.376528 69.709170 1.095638 C9 C8 H29
258 | ICOOR_INTERNAL H26 120.034261 68.570555 1.096912 C8 C7 C9
259 | ICOOR_INTERNAL H3 120.192030 68.728840 1.092802 C8 C7 H26
260 | ICOOR_INTERNAL C11 179.868008 73.441608 1.369284 C7 C6 C8
261 | ICOOR_INTERNAL C12 -0.154467 73.482580 1.452883 C11 C7 C6
262 | ICOOR_INTERNAL N1 0.435801 69.675550 1.374007 C12 C11 C7
263 | ICOOR_INTERNAL C31 -178.693261 55.893570 1.380984 C12 C11 N1
264 | ICOOR_INTERNAL C14 178.357907 54.599034 1.386888 C31 C12 C11
265 | ICOOR_INTERNAL N4 0.520628 54.286519 1.372941 C14 C31 C12
266 | ICOOR_INTERNAL C19 -179.055731 73.706335 1.377697 N4 C14 C31
267 | ICOOR_INTERNAL C30 -178.505888 54.779402 1.384031 C19 N4 C14
268 | ICOOR_INTERNAL C21 2.654589 54.793775 1.382403 C30 C19 N4
269 | ICOOR_INTERNAL N3 -4.661808 55.020478 1.378742 C21 C30 C19
270 | ICOOR_INTERNAL C26 -175.324645 73.700213 1.380305 N3 C21 C30
271 | ICOOR_INTERNAL C29 177.683167 53.999984 1.385889 C26 N3 C21
272 | ICOOR_INTERNAL H20 -179.979397 62.804618 1.083332 C29 C26 N3
273 | ICOOR_INTERNAL C25 -178.928272 69.621569 1.440461 C26 N3 C29
274 | ICOOR_INTERNAL C27 -178.514185 55.444074 1.496833 C25 C26 N3
275 | ICOOR_INTERNAL H13 -57.561069 68.564267 1.096437 C27 C25 C26
276 | ICOOR_INTERNAL H12 119.456698 68.786471 1.096955 C27 C25 H13
277 | ICOOR_INTERNAL H11 119.928154 68.567917 1.092300 C27 C25 H12
278 | ICOOR_INTERNAL C22 178.673006 73.118881 1.379818 C25 C26 C27
279 | ICOOR_INTERNAL C23 -179.979763 54.615808 1.459849 C22 C25 C26
280 | ICOOR_INTERNAL C24 150.023835 51.869958 1.342020 C23 C22 C25
281 | ICOOR_INTERNAL H5 -179.640676 59.615916 1.085871 C24 C23 C22
282 | ICOOR_INTERNAL H4 178.171547 56.914633 1.083754 C24 C23 H5
283 | ICOOR_INTERNAL H1 -177.989675 65.539017 1.089079 C23 C22 C24
284 | ICOOR_INTERNAL H24 -179.987889 62.579510 1.079747 C30 C19 C21
285 | ICOOR_INTERNAL C18 178.982124 69.399441 1.443148 C19 N4 C30
286 | ICOOR_INTERNAL C15 -0.483648 73.586246 1.381379 C18 C19 N4
287 | ICOOR_INTERNAL C16 -177.712163 50.665146 1.457704 C15 C18 C19
288 | ICOOR_INTERNAL C17 -28.973816 53.788201 1.342234 C16 C15 C18
289 | ICOOR_INTERNAL H6 -2.582383 57.411776 1.085027 C17 C16 C15
290 | ICOOR_INTERNAL H7 -178.671386 59.329067 1.085731 C17 C16 H6
291 | ICOOR_INTERNAL H2 179.942344 64.071680 1.088635 C16 C15 C17
292 | ICOOR_INTERNAL C20 -176.831231 54.601971 1.495128 C18 C19 C15
293 | ICOOR_INTERNAL H14 27.956740 68.323585 1.093567 C20 C18 C19
294 | ICOOR_INTERNAL H15 -120.526429 68.078446 1.097641 C20 C18 H14
295 | ICOOR_INTERNAL H16 -119.213017 69.806158 1.094147 C20 C18 H15
296 | ICOOR_INTERNAL H23 -178.630689 62.934282 1.082653 C31 C12 C14
297 | ICOOR_INTERNAL C13 -179.788688 50.851531 1.496813 C11 C7 C12
298 | ICOOR_INTERNAL H19 -0.846953 68.746995 1.092826 C13 C11 C7
299 | ICOOR_INTERNAL H18 120.198842 68.537795 1.096911 C13 C11 H19
300 | ICOOR_INTERNAL H17 119.584023 68.569414 1.096832 C13 C11 H18
301 | ICOOR_INTERNAL H22 179.655343 62.492735 1.083429 C28 C1 C6
302 | ICOOR_INTERNAL O5 93.551083 88.797704 1.766351 FE1 N2 C33
303 | ICOOR_INTERNAL C35 -88.167337 58.378822 2.508687 O5 FE1 N2
304 | ICOOR_INTERNAL O6 -57.034305 64.528007 1.383909 C35 O5 FE1
305 | ICOOR_INTERNAL C36 -55.550936 60.118970 1.362558 O6 C35 O5
306 | ICOOR_INTERNAL C38 17.111617 56.522838 1.401330 C36 O6 C35
307 | ICOOR_INTERNAL C40 175.458107 60.992428 1.388896 C38 C36 O6
308 | ICOOR_INTERNAL C41 1.079143 60.518840 1.395052 C40 C38 C36
309 | ICOOR_INTERNAL C39 0.432097 58.242537 1.398985 C41 C40 C38
310 | ICOOR_INTERNAL C37 -0.958172 61.160057 1.384293 C39 C41 C40
311 | ICOOR_INTERNAL H34 -179.126725 58.776514 1.083687 C37 C39 C41
312 | ICOOR_INTERNAL H37 -179.384445 60.046487 1.082011 C39 C41 C37
313 | ICOOR_INTERNAL C42 -179.992532 60.881265 1.540038 C41 C40 C39
314 | ICOOR_INTERNAL C44 -45.000184 59.059228 1.405006 C42 C41 C40
315 | ICOOR_INTERNAL C46 -179.996457 59.039759 1.393954 C44 C42 C41
316 | ICOOR_INTERNAL C47 -0.001829 59.754353 1.396227 C46 C44 C42
317 | ICOOR_INTERNAL C45 0.000386 60.546632 1.395743 C47 C46 C44
318 | ICOOR_INTERNAL C43 0.003095 59.739786 1.393927 C45 C47 C46
319 | ICOOR_INTERNAL H38 179.998065 60.342795 1.085673 C43 C45 C47
320 | ICOOR_INTERNAL H40 179.993256 59.919618 1.086355 C45 C47 C43
321 | ICOOR_INTERNAL H42 -179.994438 59.724947 1.086054 C47 C46 C45
322 | ICOOR_INTERNAL H41 -179.993764 60.344214 1.086348 C46 C44 C47
323 | ICOOR_INTERNAL H39 179.999701 60.615603 1.085631 C44 C42 C46
324 | ICOOR_INTERNAL H36 -178.439340 59.339446 1.081631 C40 C38 C41
325 | ICOOR_INTERNAL H35 -174.807835 59.206472 1.082225 C38 C36 C40
326 | ICOOR_INTERNAL H33 122.944578 70.550409 1.088203 C35 O5 O6
327 | ICOOR_INTERNAL H32 118.437744 85.616119 1.094307 C35 O5 H33
328 | ICOOR_INTERNAL H31 1.092413 66.031827 1.245030 O5 FE1 C35
329 | PDB_ROTAMERS HBA_conformers_unique.pdb
330 |
--------------------------------------------------------------------------------
/invrotzyme.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | """
4 | Created on Fri Apr 15 12:48:51 2022
5 |
6 | @author: ikalvet
7 | """
8 | import argparse
9 | import pyrosetta as pyr
10 | import pyrosetta.rosetta
11 | import pyrosetta.distributed.io
12 | import sys, os
13 | import itertools
14 | import functools
15 | import operator
16 | import time
17 | import numpy as np
18 | import pandas as pd
19 | import multiprocessing
20 | import random
21 | import scipy.spatial
22 | script_dir = os.path.dirname(os.path.realpath(__file__))
23 | sys.path.append(script_dir)
24 | sys.path.append(script_dir+'/utils/')
25 | import protocol
26 | import utils
27 | import dunbrack_rotlib
28 | import align_pdbs
29 |
30 |
31 |
32 |
33 | def process_rotamer_set_queue(q, early_stop, prefix, bad_rotamers, rotamers, cst_io, cst_atoms, motifs, results_found):
34 | while True:
35 | i_ids = q.get()
36 | if i_ids is None:
37 | return
38 |
39 | i = i_ids[0]
40 | ids = i_ids[1]
41 | # Grabbing a combination of inverse rotamers based on the provided
42 | # per-cst inverse rotamer ids.
43 | c = [rotamers[n][i] for n, i in enumerate(ids)]
44 |
45 | if any([rot_id in bad_rotamers[j] for j, rot_id in enumerate(ids)]):
46 | # print(f"Bad rotamer in set {i}")
47 | continue
48 |
49 | # TODO: implement symmetry here
50 | # Take the list "c" and apply some symmetric transform to the residues there
51 | # Then the rest of the code should take care of it appropriately
52 |
53 | pose = pyrosetta.rosetta.core.pose.Pose()
54 | bad_rotamer = False
55 | catres_resnos = {n: 0 for n,r in enumerate(c) if not isinstance(r, pyrosetta.rosetta.core.pose.Pose) and r.is_ligand()}
56 | ligands = [r for r in c if not isinstance(r, pyrosetta.rosetta.core.pose.Pose) and r.is_ligand()]
57 | for j, res in enumerate(c):
58 | if args.debug:
59 | if isinstance(res, pyrosetta.rosetta.core.conformation.Residue):
60 | print(i, j, res.name())
61 | elif isinstance(res, pyrosetta.rosetta.core.pose.Pose):
62 | print(i, j, res.pdb_info().name())
63 |
64 | if not isinstance(res, pyrosetta.rosetta.core.pose.Pose) and res.is_ligand(): # ligand
65 | continue
66 |
67 | # If we have already seen that it's a bad rotamer then let's just skip it
68 | if ids[j] in bad_rotamers[j]:
69 | if args.debug: print(f"{j}, previously seen as a bad rotamer")
70 | bad_rotamer = True
71 | break
72 |
73 | if isinstance(res, pyrosetta.rosetta.core.conformation.Residue):
74 | _res_pose = pyrosetta.rosetta.core.pose.Pose()
75 | _res_pose.append_residue_by_jump(res, 0)
76 | if res.is_protein():
77 | _res_pose = protocol.extend_SS(pose=_res_pose, ref_seqpos=1,
78 | secstruct=args.secstruct_per_cst[j], AAA=AAA,
79 | nres_Nterm=args.N_len_per_cst[j],
80 | nres_Cterm=args.C_len_per_cst[j])
81 | _res_pose.fold_tree().clear()
82 | _res_pose.fold_tree().add_edge(1, _res_pose.size(), -1) # This will avoid FoldTree reordering error showing up
83 | catres_resno = args.N_len_per_cst[j]+1
84 |
85 | elif isinstance(res, pyrosetta.rosetta.core.pose.Pose):
86 | _res_pose = res.clone()
87 | catres_resno = motifs[j]["resno"]
88 |
89 | # Figuring out information about which CST atoms are used for this residue
90 | catres_cst_atoms = protocol.identify_cst_atoms_for_res(res, j, catres_resno, _res_pose, cst_atoms[j], motifs, ligands)
91 |
92 |
93 | # Adding ligand to the extended chain and checking for clashes
94 | for ligand in ligands:
95 | # _res_pose.append_residue_by_jump(ligand, 1) # this doesn't turn ligand into new chain
96 | _res_pose.append_residue_by_jump(ligand, catres_resno,
97 | jump_anchor_atom=_res_pose.residue(catres_resno).atom_name(_res_pose.residue(catres_resno).nbr_atom()),
98 | jump_root_atom=ligand.atom_name(ligand.nbr_atom()),
99 | start_new_chain=True)
100 |
101 | if protocol.check_clash(_res_pose, catres_resnos=[catres_resno]+[r.seqpos() for r in _res_pose.residues if r.is_ligand()], cst_atoms=catres_cst_atoms, tip_atom=args.tip_atom, debug=args.debug) is True:
102 | if args.debug: print(f"{j}, clash after extension")
103 | # Only adding the residude object to the bad residues
104 | # The motif pose will never be dumped
105 | if isinstance(res, pyrosetta.rosetta.core.conformation.Residue):
106 | if ids[j] not in bad_rotamers[j]:
107 | bad_rotamers[j].append(ids[j])
108 | elif isinstance(res, pyrosetta.rosetta.core.pose.Pose):
109 | if args.debug: print("MOTIF POSE SEEMS TO GIVE CLASH!!!! PLEASE INVESTIGATE!!!")
110 | bad_rotamer = True
111 |
112 | # Giving up if all rotamers are bad
113 | if len(set(bad_rotamers[j])) == len(rotamers[j]):
114 | print(f"All rotamers for CST {j} are bad...")
115 | break
116 |
117 | if isinstance(res, pyrosetta.rosetta.core.conformation.Residue):
118 | catres_resnos[j] = pose.size() + args.N_len_per_cst[j]+1
119 | else:
120 | catres_resnos[j] = motifs[j]["resno"]
121 |
122 | pyrosetta.rosetta.core.pose.append_subpose_to_pose(pose, _res_pose, 1, _res_pose.size()-len(ligands), new_chain=True)
123 |
124 | # Finished individual evaluation of residues
125 | # Now putting the whole thing together
126 | if bad_rotamer is True:
127 | if args.debug: print(f"{j}, bad rotamer")
128 | continue
129 |
130 | # Adding ligand as the last residue
131 | for _n,res in enumerate(c):
132 | if isinstance(res, pyrosetta.rosetta.core.pose.Pose):
133 | continue
134 | if res.is_ligand():
135 | lig_pose = pyrosetta.rosetta.core.pose.Pose()
136 | lig_pose.append_residue_by_jump(res, 0)
137 | pyrosetta.rosetta.core.pose.append_subpose_to_pose(pose, lig_pose, 1, 1, new_chain=True)
138 | catres_resnos[_n] = pose.size()
139 |
140 | # Checking for clashes
141 | # Ignoring clashes between catalytic residues and the ligand
142 | ignore_clash_respairs = []
143 | for j in catres_resnos:
144 | if isinstance(c[j], pyrosetta.rosetta.core.conformation.Residue):
145 | assert pose.residue(catres_resnos[j]).name3() == c[j].name3(), f"cst {j}: resno {catres_resnos[j]}, {c[j].name3()} != {pose.residue(catres_resnos[j]).name3()}"
146 | if j == 0:
147 | continue
148 | if args.debug: print(f"clashcheck exclude cst atoms, cst {j}, resno {catres_resnos[j]}, name {pose.residue(catres_resnos[j]).name()}")
149 | ignore_clash_respairs.append((catres_resnos[0], catres_resnos[j]))
150 |
151 | clash = protocol.check_clash(pose, catres_resnos=catres_resnos.values(), ignore_respairs=ignore_clash_respairs, tip_atom=args.tip_atom, debug=args.debug)
152 | if clash is True:
153 | if args.debug: print(f"{i}, clash in the final assembly")
154 | continue
155 | if args.debug: print(j, pose.sequence())
156 |
157 | # TODO: Need to implement checking whether the pose actually respects the CST's
158 | # This is an issue when the ligand has any chi sampling enabled, and another residue is matched downstream of that.
159 | # Some combinations of rotamers are not meant to work together
160 | ## I think this is now managed in the REMARK 666 generation stage
161 |
162 | pose_name = args.prefix
163 | for res in c:
164 | if isinstance(res, pyrosetta.rosetta.core.conformation.Residue):
165 | if res.is_protein():
166 | pose_name += res.name1() + "_"
167 | else:
168 | pose_name += res.name3() + "_"
169 | elif isinstance(res, pyrosetta.rosetta.core.pose.Pose):
170 | pose_name += os.path.basename(res.pdb_info().name()).replace(".pdb", "") + "_"
171 | pose_name += f"{prefix}_{i}{args.suffix}.pdb"
172 | if os.path.exists(pose_name):
173 | print(f"Found existing file with name {pose_name}")
174 | pose_name.replace(".pdb", "a.pdb")
175 |
176 |
177 | remarks = protocol.create_remark_lines(pose, catres_resnos, cst_io)
178 |
179 | if len(remarks) != len(catres_resnos) - 1:
180 | if args.debug: print(f"{i}: Could not build all REMARK 666 lines")
181 | continue
182 |
183 | print(f"Found good rotamer: {pose_name.replace('.pdb', '')}")
184 |
185 | pdbstr = pyrosetta.distributed.io.to_pdbstring(pose).split("\n")
186 |
187 | pdbstr_new = []
188 | for l in pdbstr:
189 | pdbstr_new.append(l)
190 | if "HEADER" in l:
191 | for rmrk in remarks:
192 | pdbstr_new.append(rmrk)
193 | with open(pose_name, "w") as file:
194 | file.write("\n".join(pdbstr_new))
195 |
196 | results_found.append(ids)
197 | if args.max_outputs is not None and len(results_found) > args.max_outputs:
198 | early_stop.value = True
199 | print(f"Reached the output limit of {args.max_outputs}")
200 |
201 |
202 |
203 |
204 | def parallelize_mp(iterables, rotset, prefix, cst_io, cst_atoms, motifs, results_found):
205 |
206 | the_queue = multiprocessing.Queue(maxsize=args.nproc) # Queue stores the iterables
207 |
208 | start = time.time()
209 | manager = multiprocessing.Manager()
210 | bad_rotamers = manager.dict()
211 | early_stop = multiprocessing.Value("b", False)
212 |
213 | if results_found is None:
214 | results_found = manager.list()
215 |
216 | print(f"Starting to generate inverse rotamer assemblies using {args.nproc} parallel processes.")
217 | pool = multiprocessing.Pool(processes=args.nproc,
218 | initializer=process_rotamer_set_queue,
219 | initargs=(the_queue, early_stop, prefix, bad_rotamers, rotset, cst_io, cst_atoms, motifs, results_found, ))
220 |
221 | for i, c in enumerate(iterables):
222 | if i == 0:
223 | for j in range(len(c)):
224 | bad_rotamers[j] = manager.list()
225 | if early_stop.value == True:
226 | the_queue.put(None)
227 | break
228 | the_queue.put((i, c))
229 |
230 | # None to end each process
231 | for _i in range(args.nproc):
232 | the_queue.put(None)
233 |
234 | # Closing the queue and the pool
235 | the_queue.close()
236 | the_queue.join_thread()
237 | pool.close()
238 | pool.join()
239 |
240 | print(f"Bad rotamers from set {prefix}:")
241 | for j in bad_rotamers:
242 | print(f" CST {j}: {list(set(bad_rotamers[j]))}")
243 |
244 | end = time.time()
245 | print(f"Processing all the rotamers in set {prefix} took {(end - start):.2f} seconds")
246 | return results_found
247 |
248 |
249 |
250 |
251 | def main(args):
252 | if args.suffix != "":
253 | args.suffix = f"_{args.suffix}"
254 |
255 | if args.prefix != "":
256 | args.prefix = f"{args.prefix}"
257 |
258 | assert os.path.exists(args.cstfile)
259 | extra_res_fa = ""
260 | if args.params is not None:
261 | params = [p for p in args.params if ".params" in p]
262 | extra_res_fa = "-extra_res_fa " + ' '.join(params)
263 |
264 | """
265 | Setting up PyRosetta
266 | """
267 |
268 | # pyr.init(f"{extra_res_fa} -run:preserve_header -output_virtual true")
269 | pyr.init(f"{extra_res_fa} -run:preserve_header")
270 |
271 | # Loading the backbone-dependent Dunbrack rotamer library into a dataframe
272 | dunbrack_database = os.path.dirname(pyr.__file__) + "/database/rotamer/bbdep02.May.sortlib-correct.12.2010"
273 | rotlib = dunbrack_rotlib.load_rotamer_df(dunbrack_database)
274 |
275 |
276 | global AAA # making it global so that functions downstream can see it
277 | AAA = pyr.pose_from_sequence("AAA")
278 |
279 |
280 | ###### CST PARSING ########
281 | # Parsing the CST file
282 | addcst_mover = pyrosetta.rosetta.protocols.enzdes.AddOrRemoveMatchCsts()
283 | chem_manager = pyrosetta.rosetta.core.chemical.ChemicalManager.get_instance()
284 | residue_type_set = chem_manager.residue_type_set("fa_standard")
285 | cst_io = pyrosetta.rosetta.protocols.toolbox.match_enzdes_util.EnzConstraintIO(residue_type_set)
286 | cst_io.read_enzyme_cstfile(args.cstfile)
287 |
288 |
289 | # Figuring out which residue atoms are used for each cst
290 | # Using the MCFI (MatcherConstraintFileInfo) object for that
291 | # cst_atoms will be a dict where each cst_block contains a list of variable CST's? and then a list of residue types
292 | cst_atoms = protocol.get_cst_atoms(cst_io)
293 |
294 | # Storing information about which residues are matched for each CST block
295 | restypes = {}
296 | for n in range(1, cst_io.mcfi_lists_size()+1):
297 | restypes[n] = []
298 | for restype in cst_io.mcfi_list(n).upstream_restypes():
299 | restypes[n].append(restype.name3())
300 |
301 |
302 | ### PROCESS ARGUMENTS A BIT FURTHER ###
303 | args = protocol.parse_arguments(args, restypes)
304 |
305 |
306 | #### PARSING HIS TAUTOMER RESTRICTIONS #####
307 | keep_his_tautomer_per_cst = None
308 | if args.keep_his_tautomer is not None:
309 | keep_his_tautomer_per_cst = {int(x.split(":")[0]): x.split(":")[1] for x in args.keep_his_tautomer.split(",")}
310 | assert all([val in ["HIS", "HIS_D"] for key, val in keep_his_tautomer_per_cst.items()]), "Invalid input for --keep_his_tautomer"
311 |
312 |
313 | ### ROTAMER SUBSAMPLING ####
314 | chi_subsampling_levels = protocol.parse_rotamer_subsampling(args, cst_atoms)
315 |
316 |
317 | ### Putting together a dictionary listing good rotamers for each residue in each CST
318 | restype_good_rotamers = {}
319 | for n in restypes:
320 | restype_good_rotamers[n] = {}
321 | for restyp in restypes[n]:
322 | if restyp not in utils.N_chis.keys():
323 | continue
324 | if restyp not in restype_good_rotamers.keys():
325 | use_only_best_rotamer = False
326 | if n in args.use_best_rotamer_cstids:
327 | use_only_best_rotamer = True
328 | restype_good_rotamers[n][restyp] = dunbrack_rotlib.find_good_rotamers(rotlib, restyp, args.dunbrack_prob_per_cst[n],
329 | args.secstruct_per_cst[n],
330 | keep_only_best=use_only_best_rotamer)
331 |
332 |
333 | ### PARSING EXTERNAL MOTIFS ####
334 | # TODO: make external motifs usable with other CST id's, not just the 1st one
335 | motifs = None
336 | if args.motif_for_cst is not None:
337 | motifs = protocol.parse_motif_input(args.motif_for_cst, cst_atoms, restypes)
338 |
339 |
340 |
341 | ### GETTING INVERSE ROTAMERS ####
342 | ### This is where half of the work gets done ###
343 | invrot_tree = pyrosetta.rosetta.protocols.toolbox.match_enzdes_util.TheozymeInvrotTree(cst_io)
344 | invrot_tree.generate_targets_and_inverse_rotamers()
345 | all_inverse_rotamers_per_cst = invrot_tree.collect_all_inverse_rotamers()
346 |
347 |
348 | ## There is a way to get inverse rotamers from cst_io
349 | ## need to investigate this, because this allows keeping the sub-cst information
350 | """
351 | target_ats = pyrosetta.rosetta.utility.vector1_unsigned_long()
352 | invrot_ats = pyrosetta.rosetta.utility.vector1_unsigned_long()
353 |
354 | _mcfi.inverse_rotamers_against_residue(target_conf=lig, invrot_restype=_mcfi.allowed_restypes(_mcfi.upstream_res())[1],
355 | target_ats=target_ats, invrot_ats=invrot_ats, flip_exgs_upstream_downstream_samples=False, backbone_interaction=False)
356 | """
357 |
358 |
359 | time.sleep(1)
360 |
361 | print(f"{len(all_inverse_rotamers_per_cst)} rotamer sets to process")
362 |
363 | results_found = None
364 | for xx, rotset in enumerate(all_inverse_rotamers_per_cst):
365 | print(f"Non-redundant rotamer set {xx+1}")
366 | for cst_block, invrots in enumerate(rotset.invrots()):
367 | print(f"CST {cst_block}: {len(invrots)} inverse rotamers.")
368 |
369 | # Listify the inverse rotamer dataset
370 | rotset_sub = [[invrot for invrot in invrots] for invrots in rotset.invrots()]
371 |
372 | # Pruning all other inverse rotamers based on proton-chis.
373 | # Removing duplicate rotamers where the only difference is in the value of the proton_chi
374 | for rotset_id in range(len(rotset_sub)):
375 | if isinstance(rotset_sub[rotset_id][0], pyrosetta.rosetta.core.pose.Pose) or rotset_sub[rotset_id][0].is_ligand():
376 | continue
377 | _n_before = len(rotset_sub[rotset_id])
378 | rotset_sub[rotset_id] = protocol.prune_residue_rotamers(rotset_sub[rotset_id])
379 | if len(rotset_sub[rotset_id]) != _n_before:
380 | print(f"CST {rotset_id}: {len(rotset_sub[rotset_id])} inverse rotamers after pruning for proton-chi")
381 |
382 |
383 | # Loading any external motifs, if provided and aligning them to the appropriate CST atoms
384 | if args.motif_for_cst is not None:
385 | for cstno in motifs:
386 | # TODO: implement for not-first CST's (or CST's with additional sampling from CST file),
387 | # Picking rotamers with unique subsampling defined in CST
388 | to_align_rotamers = protocol.find_unique_rotamers_for_motif([r if i==cstno else [] for i, r in enumerate(rotset_sub)], motifs)
389 | rotset_sub[cstno] = [align_pdbs.align_pose_to_residue(rotamer, motifs[cstno]["pose"],
390 | {"atoms1": motifs[cstno]["atoms"],
391 | "atoms2": [(motifs[cstno]["resno"], a) for a in motifs[cstno]["atoms"]]}) for rotamer in to_align_rotamers[cstno]]
392 |
393 |
394 | # Pruning inverse rotamers based on Dunbrack probabilites
395 | rotset_sub = protocol.preselect_inverse_rotamers(rotset_sub, restype_good_rotamers, keep_his_tautomer_per_cst)
396 | if rotset_sub is None:
397 | continue
398 |
399 | # Culling ligand rotamers based on RMSD cutoff
400 | if args.prune_ligand_rotamers != 0.0:
401 | for rotset_id in range(len(rotset_sub)):
402 | if isinstance(rotset_sub[rotset_id][0], pyrosetta.rosetta.core.pose.Pose):
403 | continue
404 | if rotset_sub[rotset_id][0].is_ligand():
405 | rotset_sub[rotset_id] = protocol.prune_ligand_rotamers(rotset_sub[rotset_id], args.prune_ligand_rotamers, args.nproc)
406 |
407 | # Performing rotamer subsampling (expanding CHI's)
408 | if any([any([y != 0 for y in x.values()]) for k, x in chi_subsampling_levels.items()]):
409 | rotset_sub = protocol.subsample_rotamers(rotset_sub, chi_subsampling_levels, restype_good_rotamers, cst_atoms)
410 |
411 | # Picking random rotamers if requested
412 | if args.frac_random_rotamers_per_cst is not None or args.max_random_rotamers_per_cst is not None:
413 | print("Picking a random subset of inverse rotamers")
414 | rotset_sub = protocol.pick_random_rotamers_set(rotset_sub, max_random_rotamers_per_cst=args.max_random_rotamers_per_cst,
415 | frac_random_rotamers_per_cst=args.frac_random_rotamers_per_cst)
416 |
417 | for cst_block, invrots in enumerate(rotset_sub):
418 | print(f"CST {cst_block}: {len(invrots)} inverse rotamers after filtering.")
419 |
420 | rotset_ids = [[i for i, y in enumerate(x)] for x in rotset_sub]
421 | rotamer_id_combinations = itertools.product(*[x for x in rotset_ids])
422 |
423 | # Processing this subset of rotamers
424 | print(f"{functools.reduce(operator.mul, map(len, rotset_ids), 1)} inverse rotamer combinations to process in this set.")
425 | results_found = parallelize_mp(iterables=rotamer_id_combinations, rotset=rotset_sub, prefix=xx+1, cst_io=cst_io, cst_atoms=cst_atoms, motifs=motifs, results_found=results_found)
426 |
427 |
428 |
429 | if __name__ == "__main__":
430 | parser = argparse.ArgumentParser()
431 | parser.add_argument("--cstfile", type=str, required=True, help="CST file used for matching. Keep sampling to minimum to avoid combinatorial explosion.")
432 | parser.add_argument("--params", nargs="+", required=False, help="params files used by ligands and residues")
433 | parser.add_argument("--keep_his_tautomer", type=str, help="Per cst, should a specific HIS tautomer (HIS or HIS_D) be used. Keeps only one the requested HIS tautomers. Format: 'cst_no:HIS/HIS_D,..'")
434 | parser.add_argument("--dunbrack_prob", type=float, default=0.85, help="Cumulative Dunbrack probability of used rotamers for any residue\n."
435 | "As used by the -packing:dunbrack_prob_... flag in Rosetta.")
436 | parser.add_argument("--dunbrack_prob_per_cst", type=float, nargs="+", help="Cumulative Dunbrack probability of used rotamers for each CST residue.")
437 | parser.add_argument("--N_len", type=int, default=4, help="Number of residues added to the stub N-term")
438 | parser.add_argument("--C_len", type=int, default=5, help="Number of residues added to the stub C-term")
439 | parser.add_argument("--N_len_per_cst", type=int, nargs="+", help="Number of residues added to the stub N-term, per CST")
440 | parser.add_argument("--C_len_per_cst", type=int, nargs="+", help="Number of residues added to the stub C-term, per CST")
441 | parser.add_argument("--prune_ligand_rotamers", type=float, default=0.0, help="Prunes the set of used ligand rotamers based on clashcheck, AND rmsd similarity cutoff.")
442 | parser.add_argument("--max_random_rotamers", type=int, help="Number of random rotamers picked for each residue for the sampling. Reasonable number would be below 20 for quick sampling.")
443 | parser.add_argument("--max_random_rotamers_per_cst", nargs="+", type=int, help="Number of random rotamers picked for each CST block for the sampling. First value is for the ligand.")
444 | parser.add_argument("--frac_random_rotamers", type=float, help="Fraction of rotamers that are randomly picked for each residue for the sampling.")
445 | parser.add_argument("--frac_random_rotamers_per_cst", nargs="+", type=float, help="Fraction of rotamers that are randomly picked for each CST block for the sampling. First value is for the ligand.")
446 | parser.add_argument("--secstruct", type=str, default="H", choices=["E", "H"], help="What secondary structure stub should be generated for each residue.")
447 | parser.add_argument("--secstruct_per_cst", nargs="+", type=str, help="Per CST, what secondary structure stub should be generated for each residue.")
448 | parser.add_argument("--motif_for_cst", type=str, nargs="+", help="Per CST, an external motif that should be used, instead of inverse rotamers. Only works for the first CST right now. Format: cst_no:resno_in_motif:filepath ...")
449 | parser.add_argument("--use_best_rotamer_cstids", nargs="+", type=int, default=[], help="CST ID's that should only use the best rotamer from each secondary structure bin. Numbering starts from 1.")
450 | parser.add_argument("--extra_chi", type=str, help="Enables extra CHI sampling on a given level for all CST's. Input format: chino:level,chino2:level2")
451 | parser.add_argument("--extra_chi_per_cst", nargs="+", help=f"Enables extra CHI sampling on a given level for specific CST's. Input format: CSTNO1-chino:level,chino2:level2 CSTNO2-chino:level,chino2:level2\nSampling levels:\n{protocol.calculate_samplings.__doc__}")
452 | parser.add_argument("--suffix", type=str, default= "", help="Suffix to be added to the end of output files")
453 | parser.add_argument("--prefix", type=str, default= "", help="Prefix to be added to the beginning of output files")
454 | parser.add_argument("--tip_atom", action="store_true", default=False, help="Inverse rotamers will be pre-selected based on whether the tip atoms are placed geometrically differently. Rotamer diversity is ignored.")
455 | parser.add_argument("--nproc", type=int, help="Number of CPU cores used.")
456 | parser.add_argument("--max_outputs", type=int, help="Maximum number of output structures that will be produced.")
457 | parser.add_argument("--debug", action="store_true", default=False, help="Debug mode. Will print out more output at each step. Will run in single-core mode.")
458 |
459 | args = parser.parse_args()
460 |
461 | if "SLURM_CPUS_ON_NODE" in os.environ:
462 | args.nproc = int(os.environ["SLURM_CPUS_ON_NODE"])
463 | if args.nproc is None:
464 | args.nproc = os.cpu_count()
465 | if args.debug is True:
466 | args.nproc = 1
467 |
468 | main(args)
469 |
470 |
--------------------------------------------------------------------------------
/utils/util.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import numpy as np
3 |
4 |
5 | num2aa=[
6 | 'ALA','ARG','ASN','ASP','CYS',
7 | 'GLN','GLU','GLY','HIS','ILE',
8 | 'LEU','LYS','MET','PHE','PRO',
9 | 'SER','THR','TRP','TYR','VAL',
10 | ]
11 |
12 | aa2num= {x:i for i,x in enumerate(num2aa)}
13 |
14 | alpha_1 = list("ARNDCQEGHILKMFPSTWYV-")
15 | aa_N_1 = {n:a for n,a in enumerate(alpha_1)}
16 | aa_1_N = {a:n for n,a in enumerate(alpha_1)}
17 |
18 | aa123 = {aa1: aa3 for aa1, aa3 in zip(alpha_1, num2aa)}
19 | aa321 = {aa3: aa1 for aa1, aa3 in zip(alpha_1, num2aa)}
20 |
21 | def N_to_AA(x):
22 | x = np.array(x);
23 | if x.ndim == 1: x = x[None]
24 | return ["".join([aa_N_1.get(a,"-") for a in y]) for y in x]
25 |
26 |
27 | def alphabet_mapping(seq_list, alphabet_dict):
28 | """
29 | Args:
30 | seq_list: a list of sequences ['ABADSDAS', 'AABSDVDDV']
31 | Returns:
32 | encoded: a list of np.arrays
33 | """
34 | encoded = [[alphabet_dict[token] for token in seq] for seq in seq_list]
35 | return encoded
36 |
37 |
38 | def alphabet_onehot_2_onehot(alphabet1, alphabet2):
39 | '''
40 | Args:
41 | alphabet1: List of amino acids in order (A characters)
42 | alphabet2: List of amino acids in different order
43 |
44 | Returns:
45 | map: AxA matrix to map one-hot encoding from alphabet1 to alphabet2
46 | '''
47 | assert len(alphabet1) == len(alphabet2), 'The alphabets must be the same length'
48 |
49 | alpha1_2_int = {aa: i for i, aa in enumerate(alphabet1)}
50 | alpha2_2_int = {aa: i for i, aa in enumerate(alphabet2)}
51 |
52 | A = len(alphabet1)
53 | map = np.zeros((A,A))
54 |
55 | for aa in alphabet1:
56 | j = alpha1_2_int[aa]
57 | i = alpha2_2_int[aa]
58 | map[i, j] = 1
59 |
60 | return map
61 |
62 |
63 | # minimal sc atom representation (Nx8)
64 | aa2short=[
65 | (" N "," CA "," C "," CB ", None, None, None, None), # ala
66 | (" N "," CA "," C "," CB "," CG "," CD "," NE "," CZ "), # arg
67 | (" N "," CA "," C "," CB "," CG "," OD1", None, None), # asn
68 | (" N "," CA "," C "," CB "," CG "," OD1", None, None), # asp
69 | (" N "," CA "," C "," CB "," SG ", None, None, None), # cys
70 | (" N "," CA "," C "," CB "," CG "," CD "," OE1", None), # gln
71 | (" N "," CA "," C "," CB "," CG "," CD "," OE1", None), # glu
72 | (" N "," CA "," C ", None, None, None, None, None), # gly
73 | (" N "," CA "," C "," CB "," CG "," ND1", None, None), # his
74 | (" N "," CA "," C "," CB "," CG1"," CD1", None, None), # ile
75 | (" N "," CA "," C "," CB "," CG "," CD1", None, None), # leu
76 | (" N "," CA "," C "," CB "," CG "," CD "," CE "," NZ "), # lys
77 | (" N "," CA "," C "," CB "," CG "," SD "," CE ", None), # met
78 | (" N "," CA "," C "," CB "," CG "," CD1", None, None), # phe
79 | (" N "," CA "," C "," CB "," CG "," CD ", None, None), # pro
80 | (" N "," CA "," C "," CB "," OG ", None, None, None), # ser
81 | (" N "," CA "," C "," CB "," OG1", None, None, None), # thr
82 | (" N "," CA "," C "," CB "," CG "," CD1", None, None), # trp
83 | (" N "," CA "," C "," CB "," CG "," CD1", None, None), # tyr
84 | (" N "," CA "," C "," CB "," CG1", None, None, None), # val
85 | ]
86 |
87 | # full sc atom representation (Nx14)
88 | aa2long=[
89 | (" N "," CA "," C "," O "," CB ", None, None, None, None, None, None, None, None, None), # ala
90 | (" N "," CA "," C "," O "," CB "," CG "," CD "," NE "," CZ "," NH1"," NH2", None, None, None), # arg
91 | (" N "," CA "," C "," O "," CB "," CG "," OD1"," ND2", None, None, None, None, None, None), # asn
92 | (" N "," CA "," C "," O "," CB "," CG "," OD1"," OD2", None, None, None, None, None, None), # asp
93 | (" N "," CA "," C "," O "," CB "," SG ", None, None, None, None, None, None, None, None), # cys
94 | (" N "," CA "," C "," O "," CB "," CG "," CD "," OE1"," NE2", None, None, None, None, None), # gln
95 | (" N "," CA "," C "," O "," CB "," CG "," CD "," OE1"," OE2", None, None, None, None, None), # glu
96 | (" N "," CA "," C "," O ", None, None, None, None, None, None, None, None, None, None), # gly
97 | (" N "," CA "," C "," O "," CB "," CG "," ND1"," CD2"," CE1"," NE2", None, None, None, None), # his
98 | (" N "," CA "," C "," O "," CB "," CG1"," CG2"," CD1", None, None, None, None, None, None), # ile
99 | (" N "," CA "," C "," O "," CB "," CG "," CD1"," CD2", None, None, None, None, None, None), # leu
100 | (" N "," CA "," C "," O "," CB "," CG "," CD "," CE "," NZ ", None, None, None, None, None), # lys
101 | (" N "," CA "," C "," O "," CB "," CG "," SD "," CE ", None, None, None, None, None, None), # met
102 | (" N "," CA "," C "," O "," CB "," CG "," CD1"," CD2"," CE1"," CE2"," CZ ", None, None, None), # phe
103 | (" N "," CA "," C "," O "," CB "," CG "," CD ", None, None, None, None, None, None, None), # pro
104 | (" N "," CA "," C "," O "," CB "," OG ", None, None, None, None, None, None, None, None), # ser
105 | (" N "," CA "," C "," O "," CB "," OG1"," CG2", None, None, None, None, None, None, None), # thr
106 | (" N "," CA "," C "," O "," CB "," CG "," CD1"," CD2"," CE2"," CE3"," NE1"," CZ2"," CZ3"," CH2"), # trp
107 | (" N "," CA "," C "," O "," CB "," CG "," CD1"," CD2"," CE1"," CE2"," CZ "," OH ", None, None), # tyr
108 | (" N "," CA "," C "," O "," CB "," CG1"," CG2", None, None, None, None, None, None, None), # val
109 | ]
110 |
111 | # build the "alternate" sc mapping
112 | aa2longalt=[
113 | (" N "," CA "," C "," O "," CB ", None, None, None, None, None, None, None, None, None), # ala
114 | (" N "," CA "," C "," O "," CB "," CG "," CD "," NE "," CZ "," NH2"," NH1", None, None, None), # arg
115 | (" N "," CA "," C "," O "," CB "," CG "," OD1"," ND2", None, None, None, None, None, None), # asn
116 | (" N "," CA "," C "," O "," CB "," CG "," OD2"," OD1", None, None, None, None, None, None), # asp
117 | (" N "," CA "," C "," O "," CB "," SG ", None, None, None, None, None, None, None, None), # cys
118 | (" N "," CA "," C "," O "," CB "," CG "," CD "," OE1"," NE2", None, None, None, None, None), # gln
119 | (" N "," CA "," C "," O "," CB "," CG "," CD "," OE2"," OE1", None, None, None, None, None), # glu
120 | (" N "," CA "," C "," O ", None, None, None, None, None, None, None, None, None, None), # gly
121 | (" N "," CA "," C "," O "," CB "," CG "," ND1"," CD2"," CE1"," NE2", None, None, None, None), # his
122 | (" N "," CA "," C "," O "," CB "," CG1"," CG2"," CD1", None, None, None, None, None, None), # ile
123 | (" N "," CA "," C "," O "," CB "," CG "," CD2"," CD1", None, None, None, None, None, None), # leu
124 | (" N "," CA "," C "," O "," CB "," CG "," CD "," CE "," NZ ", None, None, None, None, None), # lys
125 | (" N "," CA "," C "," O "," CB "," CG "," SD "," CE ", None, None, None, None, None, None), # met
126 | (" N "," CA "," C "," O "," CB "," CG "," CD2"," CD1"," CE2"," CE1"," CZ ", None, None, None), # phe
127 | (" N "," CA "," C "," O "," CB "," CG "," CD ", None, None, None, None, None, None, None), # pro
128 | (" N "," CA "," C "," O "," CB "," OG ", None, None, None, None, None, None, None, None), # ser
129 | (" N "," CA "," C "," O "," CB "," OG1"," CG2", None, None, None, None, None, None, None), # thr
130 | (" N "," CA "," C "," O "," CB "," CG "," CD1"," CD2"," CE2"," CE3"," NE1"," CZ2"," CZ3"," CH2"), # trp
131 | (" N "," CA "," C "," O "," CB "," CG "," CD2"," CD1"," CE2"," CE1"," CZ "," OH ", None, None), # tyr
132 | (" N "," CA "," C "," O "," CB "," CG2"," CG1", None, None, None, None, None, None, None), # val
133 | ]
134 |
135 | # full sc & H atom representation (Nx22)
136 | aa2longH = [
137 | (' N ', ' CA ', ' C ', ' O ', ' CB ', ' H ', ' HA ', '1HB ', '2HB ', '3HB ', None, None, None, None, None, None, None, None, None, None, None, None, None, None), # ala
138 | (' N ', ' CA ', ' C ', ' O ', ' CB ', ' CG ', ' CD ', ' NE ', ' CZ ', ' NH1', ' NH2', ' H ', ' HA ', '1HB ', '2HB ', '1HG ', '2HG ', '1HD ', '2HD ', ' HE ', '1HH1', '2HH1', '1HH2', '2HH2'), # arg
139 | (' N ', ' CA ', ' C ', ' O ', ' CB ', ' CG ', ' OD1', ' ND2', ' H ', ' HA ', '1HB ', '2HB ', '1HD2', '2HD2', None, None, None, None, None, None, None, None, None, None), # asn
140 | (' N ', ' CA ', ' C ', ' O ', ' CB ', ' CG ', ' OD1', ' OD2', ' H ', ' HA ', '1HB ', '2HB ', None, None, None, None, None, None, None, None, None, None, None, None), # asp
141 | (' N ', ' CA ', ' C ', ' O ', ' CB ', ' SG ', ' H ', ' HA ', '1HB ', '2HB ', ' HG ', None, None, None, None, None, None, None, None, None, None, None, None, None), # cys
142 | (' N ', ' CA ', ' C ', ' O ', ' CB ', ' CG ', ' CD ', ' OE1', ' NE2', ' H ', ' HA ', '1HB ', '2HB ', '1HG ', '2HG ', '1HE2', '2HE2', None, None, None, None, None, None, None), # gln
143 | (' N ', ' CA ', ' C ', ' O ', ' CB ', ' CG ', ' CD ', ' OE1', ' OE2', ' H ', ' HA ', '1HB ', '2HB ', '1HG ', '2HG ', None, None, None, None, None, None, None, None, None), # glu
144 | (' N ', ' CA ', ' C ', ' O ', ' H ', '1HA ', '2HA ', None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None), # gly
145 | (' N ', ' CA ', ' C ', ' O ', ' CB ', ' CG ', ' ND1', ' CD2', ' CE1', ' NE2', ' H ', ' HA ', '1HB ', '2HB ', ' HD2', ' HE1', ' HE2', None, None, None, None, None, None, None), # his
146 | (' N ', ' CA ', ' C ', ' O ', ' CB ', ' CG1', ' CG2', ' CD1', ' H ', ' HA ', ' HB ', '1HG1', '2HG1', '1HG2', '2HG2', '3HG2', '1HD1', '2HD1', '3HD1', None, None, None, None, None), # ile
147 | (' N ', ' CA ', ' C ', ' O ', ' CB ', ' CG ', ' CD1', ' CD2', ' H ', ' HA ', '1HB ', '2HB ', ' HG ', '1HD1', '2HD1', '3HD1', '1HD2', '2HD2', '3HD2', None, None, None, None, None), # leu
148 | (' N ', ' CA ', ' C ', ' O ', ' CB ', ' CG ', ' CD ', ' CE ', ' NZ ', ' H ', ' HA ', '1HB ', '2HB ', '1HG ', '2HG ', '1HD ', '2HD ', '1HE ', '2HE ', '1HZ ', '2HZ ', '3HZ ', None, None), # lys
149 | (' N ', ' CA ', ' C ', ' O ', ' CB ', ' CG ', ' SD ', ' CE ', ' H ', ' HA ', '1HB ', '2HB ', '1HG ', '2HG ', '1HE ', '2HE ', '3HE ', None, None, None, None, None, None, None), # met
150 | (' N ', ' CA ', ' C ', ' O ', ' CB ', ' CG ', ' CD1', ' CD2', ' CE1', ' CE2', ' CZ ', ' H ', ' HA ', '1HB ', '2HB ', ' HD1', ' HD2', ' HE1', ' HE2', ' HZ ', None, None, None, None), # phe
151 | (' N ', ' CA ', ' C ', ' O ', ' CB ', ' CG ', ' CD ', ' NV ', ' HA ', '1HB ', '2HB ', '1HG ', '2HG ', '1HD ', '2HD ', None, None, None, None, None, None, None, None, None), # pro
152 | (' N ', ' CA ', ' C ', ' O ', ' CB ', ' OG ', ' H ', ' HA ', '1HB ', '2HB ', ' HG ', None, None, None, None, None, None, None, None, None, None, None, None, None), # ser
153 | (' N ', ' CA ', ' C ', ' O ', ' CB ', ' OG1', ' CG2', ' H ', ' HA ', ' HB ', ' HG1', '1HG2', '2HG2', '3HG2', None, None, None, None, None, None, None, None, None, None), # thr
154 | (' N ', ' CA ', ' C ', ' O ', ' CB ', ' CG ', ' CD1', ' CD2', ' NE1', ' CE2', ' CE3', ' CZ2', ' CZ3', ' CH2', ' H ', ' HA ', '1HB ', '2HB ', ' HD1', ' HE1', ' HE3', ' HZ2', 'HZ3', 'HH2'), # trp
155 | (' N ', ' CA ', ' C ', ' O ', ' CB ', ' CG ', ' CD1', ' CD2', ' CE1', ' CE2', ' CZ ', ' OH ', ' H ', ' HA ', '1HB ', '2HB ', ' HD1', ' HD2', ' HE1', ' HE2', ' HH ', None, None, None), # tyr
156 | (' N ', ' CA ', ' C ', ' O ', ' CB ', ' CG1', ' CG2', ' H ', ' HA ', ' HB ', '1HG1', '2HG1', '3HG1', '1HG2', '2HG2', '3HG2', None, None, None, None, None, None, None, None) # val
157 | ]
158 |
159 | aa2longH_Nterm = [
160 | (' N ', ' CA ', ' C ', ' O ', ' CB ', '1H ', '2H ', '3H ', ' HA ', '1HB ', '2HB ', '3HB ', None, None, None, None, None, None, None, None, None, None, None, None), # ala
161 | (' N ', ' CA ', ' C ', ' O ', ' CB ', ' CG ', ' CD ', ' NE ', ' CZ ', ' NH1', ' NH2', '1H ', '2H ', '3H ', ' HA ', '1HB ', '2HB ', '1HG ', '2HG ', '1HD ', '2HD ', ' HE ', '1HH1', '2HH1', '1HH2', '2HH2'), # arg
162 | (' N ', ' CA ', ' C ', ' O ', ' CB ', ' CG ', ' OD1', ' ND2', '1H ', '2H ', '3H ', ' HA ', '1HB ', '2HB ', '1HD2', '2HD2', None, None, None, None, None, None, None, None, None, None), # asn
163 | (' N ', ' CA ', ' C ', ' O ', ' CB ', ' CG ', ' OD1', ' OD2', '1H ', '2H ', '3H ', ' HA ', '1HB ', '2HB ', None, None, None, None, None, None, None, None, None, None, None, None), # asp
164 | (' N ', ' CA ', ' C ', ' O ', ' CB ', ' SG ', '1H ', '2H ', '3H ', ' HA ', '1HB ', '2HB ', ' HG ', None, None, None, None, None, None, None, None, None, None, None, None, None), # cys
165 | (' N ', ' CA ', ' C ', ' O ', ' CB ', ' CG ', ' CD ', ' OE1', ' NE2', '1H ', '2H ', '3H ', ' HA ', '1HB ', '2HB ', '1HG ', '2HG ', '1HE2', '2HE2', None, None, None, None, None, None, None), # gln
166 | (' N ', ' CA ', ' C ', ' O ', ' CB ', ' CG ', ' CD ', ' OE1', ' OE2', '1H ', '2H ', '3H ', ' HA ', '1HB ', '2HB ', '1HG ', '2HG ', None, None, None, None, None, None, None, None, None), # glu
167 | (' N ', ' CA ', ' C ', ' O ', '1H ', '2H ', '3H ', '1HA ', '2HA ', None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None), # gly
168 | (' N ', ' CA ', ' C ', ' O ', ' CB ', ' CG ', ' ND1', ' CD2', ' CE1', ' NE2', '1H ', '2H ', '3H ', ' HA ', '1HB ', '2HB ', ' HD2', ' HE1', ' HE2', None, None, None, None, None, None, None), # his
169 | (' N ', ' CA ', ' C ', ' O ', ' CB ', ' CG1', ' CG2', ' CD1', '1H ', '2H ', '3H ', ' HA ', ' HB ', '1HG1', '2HG1', '1HG2', '2HG2', '3HG2', '1HD1', '2HD1', '3HD1', None, None, None, None, None), # ile
170 | (' N ', ' CA ', ' C ', ' O ', ' CB ', ' CG ', ' CD1', ' CD2', '1H ', '2H ', '3H ', ' HA ', '1HB ', '2HB ', ' HG ', '1HD1', '2HD1', '3HD1', '1HD2', '2HD2', '3HD2', None, None, None, None, None), # leu
171 | (' N ', ' CA ', ' C ', ' O ', ' CB ', ' CG ', ' CD ', ' CE ', ' NZ ', '1H ', '2H ', '3H ', ' HA ', '1HB ', '2HB ', '1HG ', '2HG ', '1HD ', '2HD ', '1HE ', '2HE ', '1HZ ', '2HZ ', '3HZ ', None, None), # lys
172 | (' N ', ' CA ', ' C ', ' O ', ' CB ', ' CG ', ' SD ', ' CE ', '1H ', '2H ', '3H ', ' HA ', '1HB ', '2HB ', '1HG ', '2HG ', '1HE ', '2HE ', '3HE ', None, None, None, None, None, None, None), # met
173 | (' N ', ' CA ', ' C ', ' O ', ' CB ', ' CG ', ' CD1', ' CD2', ' CE1', ' CE2', ' CZ ', '1H ', '2H ', '3H ', ' HA ', '1HB ', '2HB ', ' HD1', ' HD2', ' HE1', ' HE2', ' HZ ', None, None, None, None), # phe
174 | (' N ', ' CA ', ' C ', ' O ', ' CB ', ' CG ', ' CD ', ' NV ', 'CAV ', ' HA ', '1HB ', '2HB ', '1HG ', '2HG ', '1HD ', '2HD ', '1H ', '2H ', None, None, None, None, None, None, None, None), # pro
175 | (' N ', ' CA ', ' C ', ' O ', ' CB ', ' OG ', '1H ', '2H ', '3H ', ' HA ', '1HB ', '2HB ', ' HG ', None, None, None, None, None, None, None, None, None, None, None, None, None), # ser
176 | (' N ', ' CA ', ' C ', ' O ', ' CB ', ' OG1', ' CG2', '1H ', '2H ', '3H ', ' HA ', ' HB ', ' HG1', '1HG2', '2HG2', '3HG2', None, None, None, None, None, None, None, None, None, None), # thr
177 | (' N ', ' CA ', ' C ', ' O ', ' CB ', ' CG ', ' CD1', ' CD2', ' NE1', ' CE2', ' CE3', ' CZ2', ' CZ3', ' CH2', '1H ', '2H ', '3H ', ' HA ', '1HB ', '2HB ', ' HD1', ' HE1', ' HE3', ' HZ2', 'HZ3', 'HH2'), # trp
178 | (' N ', ' CA ', ' C ', ' O ', ' CB ', ' CG ', ' CD1', ' CD2', ' CE1', ' CE2', ' CZ ', ' OH ', '1H ', '2H ', '3H ', ' HA ', '1HB ', '2HB ', ' HD1', ' HD2', ' HE1', ' HE2', ' HH ', None, None, None), # tyr
179 | (' N ', ' CA ', ' C ', ' O ', ' CB ', ' CG1', ' CG2', '1H ', '2H ', '3H ', ' HA ', ' HB ', '1HG1', '2HG1', '3HG1', '1HG2', '2HG2', '3HG2', None, None, None, None, None, None, None, None) # val
180 | ]
181 |
182 | aa2longH_Cterm = [
183 | (' N ', ' CA ', ' C ', ' O ', ' OXT', ' CB ', ' H ', ' HA ', '1HB ', '2HB ', '3HB ', None, None, None, None, None, None, None, None, None, None, None, None, None, None), # ala
184 | (' N ', ' CA ', ' C ', ' O ', ' OXT', ' CB ', ' CG ', ' CD ', ' NE ', ' CZ ', ' NH1', ' NH2', ' H ', ' HA ', '1HB ', '2HB ', '1HG ', '2HG ', '1HD ', '2HD ', ' HE ', '1HH1', '2HH1', '1HH2', '2HH2'), # arg
185 | (' N ', ' CA ', ' C ', ' O ', ' OXT', ' CB ', ' CG ', ' OD1', ' ND2', ' H ', ' HA ', '1HB ', '2HB ', '1HD2', '2HD2', None, None, None, None, None, None, None, None, None, None), # asn
186 | (' N ', ' CA ', ' C ', ' O ', ' OXT', ' CB ', ' CG ', ' OD1', ' OD2', ' H ', ' HA ', '1HB ', '2HB ', None, None, None, None, None, None, None, None, None, None, None, None), # asp
187 | (' N ', ' CA ', ' C ', ' O ', ' OXT', ' CB ', ' SG ', ' H ', ' HA ', '1HB ', '2HB ', ' HG ', None, None, None, None, None, None, None, None, None, None, None, None, None), # cys
188 | (' N ', ' CA ', ' C ', ' O ', ' OXT', ' CB ', ' CG ', ' CD ', ' OE1', ' NE2', ' H ', ' HA ', '1HB ', '2HB ', '1HG ', '2HG ', '1HE2', '2HE2', None, None, None, None, None, None, None), # gln
189 | (' N ', ' CA ', ' C ', ' O ', ' OXT', ' CB ', ' CG ', ' CD ', ' OE1', ' OE2', ' H ', ' HA ', '1HB ', '2HB ', '1HG ', '2HG ', None, None, None, None, None, None, None, None, None), # glu
190 | (' N ', ' CA ', ' C ', ' O ', ' OXT', ' H ', '1HA ', '2HA ', None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None), # gly
191 | (' N ', ' CA ', ' C ', ' O ', ' OXT', ' CB ', ' CG ', ' ND1', ' CD2', ' CE1', ' NE2', ' H ', ' HA ', '1HB ', '2HB ', ' HD2', ' HE1', ' HE2', None, None, None, None, None, None, None), # his
192 | (' N ', ' CA ', ' C ', ' O ', ' OXT', ' CB ', ' CG1', ' CG2', ' CD1', ' H ', ' HA ', ' HB ', '1HG1', '2HG1', '1HG2', '2HG2', '3HG2', '1HD1', '2HD1', '3HD1', None, None, None, None, None), # ile
193 | (' N ', ' CA ', ' C ', ' O ', ' OXT', ' CB ', ' CG ', ' CD1', ' CD2', ' H ', ' HA ', '1HB ', '2HB ', ' HG ', '1HD1', '2HD1', '3HD1', '1HD2', '2HD2', '3HD2', None, None, None, None, None), # leu
194 | (' N ', ' CA ', ' C ', ' O ', ' OXT', ' CB ', ' CG ', ' CD ', ' CE ', ' NZ ', ' H ', ' HA ', '1HB ', '2HB ', '1HG ', '2HG ', '1HD ', '2HD ', '1HE ', '2HE ', '1HZ ', '2HZ ', '3HZ ', None, None), # lys
195 | (' N ', ' CA ', ' C ', ' O ', ' OXT', ' CB ', ' CG ', ' SD ', ' CE ', ' H ', ' HA ', '1HB ', '2HB ', '1HG ', '2HG ', '1HE ', '2HE ', '3HE ', None, None, None, None, None, None, None), # met
196 | (' N ', ' CA ', ' C ', ' O ', ' OXT', ' CB ', ' CG ', ' CD1', ' CD2', ' CE1', ' CE2', ' CZ ', ' H ', ' HA ', '1HB ', '2HB ', ' HD1', ' HD2', ' HE1', ' HE2', ' HZ ', None, None, None, None), # phe
197 | (' N ', ' CA ', ' C ', ' O ', ' OXT', ' CB ', ' CG ', ' CD ', ' NV ', ' HA ', '1HB ', '2HB ', '1HG ', '2HG ', '1HD ', '2HD ', None, None, None, None, None, None, None, None, None), # pro
198 | (' N ', ' CA ', ' C ', ' O ', ' OXT', ' CB ', ' OG ', ' H ', ' HA ', '1HB ', '2HB ', ' HG ', None, None, None, None, None, None, None, None, None, None, None, None, None), # ser
199 | (' N ', ' CA ', ' C ', ' O ', ' OXT', ' CB ', ' OG1', ' CG2', ' H ', ' HA ', ' HB ', ' HG1', '1HG2', '2HG2', '3HG2', None, None, None, None, None, None, None, None, None, None), # thr
200 | (' N ', ' CA ', ' C ', ' O ', ' OXT', ' CB ', ' CG ', ' CD1', ' CD2', ' NE1', ' CE2', ' CE3', ' CZ2', ' CZ3', ' CH2', ' H ', ' HA ', '1HB ', '2HB ', ' HD1', ' HE1', ' HE3', ' HZ2', 'HZ3', 'HH2'), # trp
201 | (' N ', ' CA ', ' C ', ' O ', ' OXT', ' CB ', ' CG ', ' CD1', ' CD2', ' CE1', ' CE2', ' CZ ', ' OH ', ' H ', ' HA ', '1HB ', '2HB ', ' HD1', ' HD2', ' HE1', ' HE2', ' HH ', None, None, None), # tyr
202 | (' N ', ' CA ', ' C ', ' O ', ' OXT', ' CB ', ' CG1', ' CG2', ' H ', ' HA ', ' HB ', '1HG1', '2HG1', '3HG1', '1HG2', '2HG2', '3HG2', None, None, None, None, None, None, None, None) # val
203 | ]
204 |
205 |
206 | # build "deterministic" atoms
207 | # see notebook (se3_experiments.ipynb for derivation)
208 | aa2frames=[
209 | [], # ala
210 | [ # arg
211 | [' NH1', ' CZ ', ' NE ', ' CD ', [-0.7218378782272339, 1.0856682062149048, -0.006118079647421837]],
212 | [' NH2', ' CZ ', ' NE ', ' CD ', [-0.6158039569854736, -1.1400136947631836, 0.006467342376708984]]],
213 | [ # asn
214 | [' ND2', ' CG ', ' CB ', ' OD1', [-0.6304131746292114, -1.1431225538253784, 0.02364802360534668]]],
215 | [ # asp
216 | [' OD2', ' CG ', ' CB ', ' OD1', [-0.5972501039505005, -1.0955055952072144, 0.04530305415391922]]],
217 | [], # cys
218 | [ # gln
219 | [' NE2', ' CD ', ' CG ', ' OE1', [-0.6558755040168762, -1.1324536800384521, 0.026521772146224976]]],
220 | [ # glu
221 | [' OE2', ' CD ', ' CG ', ' OE1', [-0.5578438639640808, -1.1161314249038696, -0.015464287251234055]]],
222 | [], # gly
223 | [ # his
224 | [' CD2', ' CG ', ' CB ', ' ND1', [-0.7502505779266357, -1.1680538654327393, 0.0005368441343307495]],
225 | [' CE1', ' CG ', ' CB ', ' ND1', [-2.0262467861175537, 0.539483368396759, -0.004495501518249512]],
226 | [' NE2', ' CG ', ' CB ', ' ND1', [-2.0761325359344482, -0.8199722766876221, -0.0018703639507293701]]],
227 | [ # ile
228 | [' CG2', ' CB ', ' CA ', ' CG1', [-0.6059935688972473, -0.8108057379722595, 1.1861376762390137]]],
229 | [ # leu
230 | [' CD2', ' CG ', ' CB ', ' CD1', [-0.5942193269729614, -0.7693282961845398, -1.1914138793945312]]],
231 | [], # lys
232 | [], # met
233 | [ # phe
234 | [' CD2', ' CG ', ' CB ', ' CD1', [-0.7164441347122192, -1.197853446006775, 0.06416648626327515]],
235 | [' CE1', ' CG ', ' CB ', ' CD1', [-2.0785865783691406, 1.2366485595703125, 0.08100450038909912]],
236 | [' CE2', ' CG ', ' CB ', ' CD1', [-2.107091188430786, -1.178497076034546, 0.13524535298347473]],
237 | [' CZ ', ' CG ', ' CB ', ' CD1', [-2.786630630493164, 0.03873880207538605, 0.14633776247501373]]],
238 | [], # pro
239 | [], # ser
240 | [ # thr
241 | [' CG2', ' CB ', ' CA ', ' OG1', [-0.6842088103294373, -0.6709619164466858, 1.2105456590652466]]],
242 | [ # trp
243 | [' CD2', ' CG ', ' CB ', ' CD1', [-0.8550368547439575, -1.0790592432022095, 0.09017711877822876]],
244 | [' NE1', ' CG ', ' CB ', ' CD1', [-2.1863200664520264, 0.8064242601394653, 0.08350661396980286]],
245 | [' CE2', ' CG ', ' CB ', ' CD1', [-2.1801204681396484, -0.5795643329620361, 0.14015203714370728]],
246 | [' CE3', ' CG ', ' CB ', ' CD1', [-0.605582594871521, -2.4733362197875977, 0.16200461983680725]],
247 | [' CE2', ' CG ', ' CB ', ' CD1', [-2.1801204681396484, -0.5795643329620361, 0.14015203714370728]],
248 | [' CZ2', ' CG ', ' CB ', ' CD1', [-3.2672977447509766, -1.473116159439087, 0.250858873128891]],
249 | [' CZ3', ' CG ', ' CB ', ' CD1', [-1.6969941854476929, -3.3360071182250977, 0.264143705368042]],
250 | [' CH2', ' CG ', ' CB ', ' CD1', [-3.009331703186035, -2.8451972007751465, 0.3059283494949341]]],
251 | [ # tyr
252 | [' CD2', ' CG ', ' CB ', ' CD1', [-0.69439297914505, -1.2123756408691406, -0.009198814630508423]],
253 | [' CE1', ' CG ', ' CB ', ' CD1', [-2.104464054107666, 1.1910505294799805, -0.014679580926895142]],
254 | [' CE2', ' CG ', ' CB ', ' CD1', [-2.0857787132263184, -1.2231677770614624, -0.024517983198165894]],
255 | [' CZ ', ' CG ', ' CB ', ' CD1', [-2.7897322177886963, -0.021470561623573303, -0.026979409158229828]],
256 | [' OH ', ' CG ', ' CB ', ' CD1', [-4.1559271812438965, -0.029129385948181152, -0.044720835983753204]]],
257 | [ # val
258 | [' CG2', ' CB ', ' CA ', ' CG1', [-0.6258467435836792, -0.7654698491096497, -1.1894742250442505]]],
259 | ]
260 |
261 | # O from frame (C,N-1,CA)
262 | bb2oframe=[-0.5992066264152527, -1.0820008516311646, 0.0001476481556892395]
263 |
264 | # build the mapping from indices in reduced representation to
265 | # indices in the full representation
266 | # N x 14 x 6 =
267 | # base-idx < 0 ==> no atom
268 | # xyz = 0 ==> no mapping
269 | short2long = np.zeros((20,14,6))
270 | for i in range(20):
271 | i_s, i_l = aa2short[i],aa2long[i]
272 | for j,a in enumerate(i_l):
273 | # case 1: if no atom defined, blank
274 | if (a is None):
275 | short2long[i,j,0] = -1
276 | # case 2: atom is a base atom
277 | elif (a in i_s):
278 | short2long[i,j,0] = i_s.index(a)
279 | if (short2long[i,j,0] == 0):
280 | short2long[i,j,1] = 1
281 | short2long[i,j,2] = 2
282 | else:
283 | short2long[i,j,1] = 0
284 | if (short2long[i,j,0] == 1):
285 | short2long[i,j,2] = 2
286 | else:
287 | short2long[i,j,2] = 1
288 | # case 3: atom is ' O '
289 | elif (a == " O "):
290 | short2long[i,j,0] = 2
291 | short2long[i,j,1] = 0 #Nprev (will pre-roll N as nothing else needs it)
292 | short2long[i,j,2] = 1
293 | short2long[i,j,3:] = np.array(bb2oframe)
294 | # case 4: build this atom
295 | else:
296 | i_f = aa2frames[i]
297 | names = [f[0] for f in i_f]
298 | idx = names.index(a)
299 | short2long[i,j,0] = i_s.index(i_f[idx][1])
300 | short2long[i,j,1] = i_s.index(i_f[idx][2])
301 | short2long[i,j,2] = i_s.index(i_f[idx][3])
302 | short2long[i,j,3:] = np.array(i_f[idx][4])
303 |
304 | # build the mapping from atoms in the full rep (Nx14) to the "alternate" rep
305 | long2alt = np.zeros((20,14))
306 | for i in range(20):
307 | i_l, i_lalt = aa2long[i], aa2longalt[i]
308 | for j,a in enumerate(i_l):
309 | if (a is None):
310 | long2alt[i,j] = j
311 | else:
312 | long2alt[i,j] = i_lalt.index(a)
313 |
314 |
--------------------------------------------------------------------------------
/protocol.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | """
4 | Created on Sun Aug 25 23:14:34 2024
5 |
6 | @author: indrek
7 | """
8 | import pyrosetta as pyr
9 | import pyrosetta.rosetta
10 | import os, sys
11 | import random
12 | import numpy as np
13 | import itertools
14 | import multiprocessing
15 | import time
16 | import scipy.spatial
17 |
18 | script_dir = os.path.dirname(os.path.realpath(__file__))
19 | sys.path.append(script_dir)
20 | sys.path.append(script_dir+'/utils/')
21 | import utils
22 | import dunbrack_rotlib
23 | import align_pdbs
24 |
25 |
26 | """
27 | PARSING FUNCTIONS
28 | """
29 | def parse_arguments(args, restypes):
30 | # Limiting Dunbrack library as requested.
31 | if args.dunbrack_prob_per_cst is None:
32 | args.dunbrack_prob_per_cst = [None]+[args.dunbrack_prob for r in restypes]
33 | else:
34 | assert all([isinstance(x, float) for x in args.dunbrack_prob_per_cst])
35 | args.dunbrack_prob_per_cst = [None]+ args.dunbrack_prob_per_cst
36 |
37 |
38 | ######### IF REQUESTED... ############
39 | ### RANDOM ROTAMER SELECTION SETUP ###
40 | if args.max_random_rotamers_per_cst is not None:
41 | assert all([isinstance(x, int) for x in args.max_random_rotamers_per_cst])
42 | assert len(args.max_random_rotamers_per_cst) == len(restypes)+1, "Invalid number of per-cst max_random_rotamers_per_cst"
43 |
44 | if args.frac_random_rotamers_per_cst is not None:
45 | assert all([isinstance(x, float) for x in args.frac_random_rotamers_per_cst])
46 | assert len(args.frac_random_rotamers_per_cst) == len(restypes)+1, "Invalid number of per-cst frac_random_rotamers_per_cst"
47 |
48 | if args.max_random_rotamers is not None:
49 | args.max_random_rotamers_per_cst = [args.max_random_rotamers]+[args.max_random_rotamers for r in restypes]
50 |
51 | if args.frac_random_rotamers is not None:
52 | args.frac_random_rotamers_per_cst = [args.frac_random_rotamers]+[args.frac_random_rotamers for r in restypes]
53 |
54 | # In case best rotamer is requested for a given CST id then set randomness to 1.0
55 | for i, frac in enumerate(args.frac_random_rotamers_per_cst):
56 | if i in args.use_best_rotamer_cstids:
57 | args.frac_random_rotamers_per_cst[i] = 1.0
58 |
59 |
60 | #### PARSING SECONDARY STRUCTURE LENGTHS #####
61 | if args.N_len_per_cst is None:
62 | args.N_len_per_cst = [None]+[args.N_len for r in restypes]
63 | else:
64 | assert all([isinstance(x, int) for x in args.N_len_per_cst])
65 | args.N_len_per_cst = [None]+ args.N_len_per_cst
66 |
67 | if args.C_len_per_cst is None:
68 | args.C_len_per_cst = [None]+[args.C_len for r in restypes]
69 | else:
70 | assert all([isinstance(x, int) for x in args.C_len_per_cst])
71 | args.C_len_per_cst = [None]+ args.C_len_per_cst
72 |
73 |
74 | # Loading favored rotamers for each used residue type in each CST block
75 | # This allows different rotamer sets to be stored if same residue type should be
76 | # on different secondary structures in different CST blocks
77 | # TODO: could also consider enabling different probabilities for different CST's or AA's? <-- partially done
78 | if args.secstruct_per_cst is None:
79 | args.secstruct_per_cst = [None]+[args.secstruct for r in restypes]
80 | else:
81 | assert all([x in "EH-" for x in args.secstruct_per_cst])
82 | args.secstruct_per_cst = [None]+ args.secstruct_per_cst
83 | return args
84 |
85 |
86 | def parse_motif_input(motif_input, cst_atoms, restypes):
87 | motifs = {}
88 | for motif_txt in motif_input:
89 | motif_cst_no = int(motif_txt.split(":")[0])
90 | if motif_cst_no != 1:
91 | sys.exit("External motif not supported for not-first CST's right now.")
92 | motif_resno = int(motif_txt.split(":")[1])
93 | motif_fp = motif_txt.split(":")[2]
94 | motifs[motif_cst_no] = {"resno": motif_resno,
95 | "pose": pyr.pose_from_file(motif_fp),
96 | "fp": motif_fp,
97 | "atoms": None}
98 | motif_resname = motifs[motif_cst_no]["pose"].residue(motif_resno).name3()
99 | assert motif_resname in restypes[motif_cst_no], f"{motif_resname} not found in {restypes}"
100 |
101 | # Finding the CST atoms for a given CST
102 | for sub_cst_block in cst_atoms[motif_cst_no]:
103 | for per_aa_cstset in sub_cst_block:
104 | if motif_resname in [aa.split("-")[0] for aa in per_aa_cstset.keys()]:
105 | motif_resname_full = [aa for aa in per_aa_cstset.keys() if aa.split("-")[0]==motif_resname][0]
106 | motifs[motif_cst_no]["atoms"] = per_aa_cstset[motif_resname_full]
107 | if motifs[motif_cst_no]["atoms"] is None:
108 | print(cst_atoms)
109 | sys.exit("Unable to find correct motif atoms based on the corresponding CST definition")
110 | return motifs
111 |
112 |
113 | def parse_rotamer_subsampling(args, cst_atoms):
114 | chi_subsampling_levels = {}
115 | __xtrachi_cst_def = {}
116 | _extra_chi_definitions = {}
117 | if args.extra_chi is not None:
118 | # 1:2,2:2,3:1,4:1
119 | __xtrachi = args.extra_chi.split(",")
120 | _extra_chi_definitions = {int(x.split(":")[0]): int(x.split(":")[1]) for x in __xtrachi}
121 |
122 | elif args.extra_chi_per_cst is not None:
123 | # CSTNO-1:2,2:2,3:1,4:1 CSTNO2-1:1,2:1
124 | __xtrachi_cst = {int(x.split("-")[0]): x.split("-")[1].split(",") for x in args.extra_chi_per_cst}
125 | __xtrachi_cst_def = {cstno: {int(x.split(":")[0]): int(x.split(":")[1]) for x in val} for cstno, val in __xtrachi_cst.items()}
126 |
127 | for cstno in cst_atoms:
128 | chi_subsampling_levels[cstno] = {}
129 | for n in range(4):
130 | if cstno in __xtrachi_cst_def.keys() and n+1 in __xtrachi_cst_def[cstno].keys():
131 | chi_subsampling_levels[cstno][n+1] = __xtrachi_cst_def[cstno][n+1]
132 | elif n+1 in _extra_chi_definitions.keys():
133 | chi_subsampling_levels[cstno][n+1] = _extra_chi_definitions[n+1]
134 | else:
135 | chi_subsampling_levels[cstno][n+1] = 0
136 | assert 0 <= chi_subsampling_levels[cstno][n+1] <= 7, f"Invalid sampling level for cst {cstno}, chi {n+1}: {chi_subsampling_levels[cstno][n+1]}"
137 |
138 | print("Using CHI sampling levels for CST's:")
139 | for cstno in chi_subsampling_levels:
140 | print(f" CST {cstno} :: {chi_subsampling_levels[cstno]}")
141 |
142 | return chi_subsampling_levels
143 |
144 |
145 | def get_cst_atoms(cst_io):
146 | cst_atoms = {}
147 | for n in range(1, cst_io.mcfi_lists_size()+1):
148 | cst_atoms[n] = []
149 | for m in range(1, cst_io.mcfi_list(n).num_mcfis()+1):
150 | cst_atoms[n].append([])
151 | _mcfi = cst_io.mcfi_list(n).mcfi(m)
152 |
153 | # Figuring out if there is a particular downstream or upstream secondary match happening
154 | downstream_match = False
155 | upstream_match = False
156 | downstream_res_cst = 1
157 | if _mcfi.algorithm_inputs().__contains__("match"):
158 | if any(["DOWNSTREAM" in ai for ai in _mcfi.algorithm_inputs()["match"]]):
159 | downstream_match = True
160 | downstream_res_cst = 1 # I think this is always 1, right?
161 | elif any(["UPSTREAM_CST" in ai for ai in _mcfi.algorithm_inputs()["match"]]):
162 | upstream_match = True
163 | for ai in _mcfi.algorithm_inputs()["match"]:
164 | if "SECONDARY_MATCH:" in ai and "UPSTREAM_CST" in ai:
165 | downstream_res_cst = int(ai.split()[2])
166 | break
167 |
168 |
169 | rt_combs = itertools.product(_mcfi.allowed_restypes(_mcfi.downstream_res()), _mcfi.allowed_restypes(_mcfi.upstream_res()))
170 | for (ds_res, us_res) in rt_combs:
171 | ais_ds = [ds_res.atom_name(_mcfi.template_atom_inds(_mcfi.downstream_res(), ai, ds_res)[1]) for ai in range(1, 4)]
172 | ais_us = [us_res.atom_name(_mcfi.template_atom_inds(_mcfi.upstream_res(), ai, us_res)[1]) for ai in range(1, 4)]
173 |
174 | # Need to append CST numbers to residue names
175 | cst_atoms[n][-1].append({f"{ds_res.name()}-{downstream_res_cst}": tuple(ais_ds),
176 | f"{us_res.name()}-{n}": tuple(ais_us)})
177 |
178 | return cst_atoms
179 |
180 |
181 |
182 | """
183 | ROTAMER-RELATED FUNCTIONS
184 | """
185 | def preselect_inverse_rotamers(rotset, restype_good_rotamers, keep_his_tautomer_per_cst, tip_atom=False):
186 | if tip_atom is False:
187 | print("Preselecting inverse rotamers based on Dunbrack probability")
188 | good_rotamers = [[] for x in rotset]
189 | for i, invrots in enumerate(rotset):
190 | if len(invrots) == 0:
191 | continue
192 | for res in invrots:
193 | if isinstance(res, pyrosetta.rosetta.core.pose.Pose): # motif pose
194 | good_rotamers[i].append(res)
195 | continue
196 | if res.is_ligand():
197 | # if len(good_rotamers[i]) > 0 and args.single_ligand_rotamer is True:
198 | # break
199 | good_rotamers[i].append(res)
200 | continue
201 | if res.name3() == "HIS" and keep_his_tautomer_per_cst is not None and i in keep_his_tautomer_per_cst:
202 | if res.name() != keep_his_tautomer_per_cst[i]:
203 | continue
204 | # Need to exclude proton CHIs
205 | _chis = [res.chi(n+1) for n in range(res.nchi()) if "H" not in [res.atom_type(an).element() for an in res.chi_atoms(n+1)]]
206 | if res.name3() in ["ALA", "GLY"]:
207 | good_rotamers[i].append(res)
208 | else:
209 | rotlib_matches = dunbrack_rotlib.find_bb_from_inverse_loc(restype_good_rotamers[i][res.name3()], _chis)
210 | if len(rotlib_matches) > 0:
211 | good_rotamers[i].append(res)
212 | if len(good_rotamers[i]) == 0 and len(rotset[i]) != 0:
213 | print(f"Failed to find compatible rotamers for constraint {i}: {res.name()}")
214 | return None
215 | else:
216 | print("Preselecting inverse rotamers only based whether the tip atoms are different")
217 | good_rotamers = []
218 | for i, invrots in enumerate(rotset):
219 | if isinstance(invrots[0], pyrosetta.rosetta.core.pose.Pose):
220 | good_rotamers.append(invrots)
221 | continue
222 | elif invrots[0].is_ligand():
223 | good_rotamers.append(invrots)
224 | continue
225 | good_rotamers.append([])
226 | for invrot in invrots:
227 | if len(good_rotamers[i]) == 0:
228 | good_rotamers[i].append(invrot)
229 | continue
230 | is_unique = []
231 | for rot in good_rotamers[i]:
232 | if rot.name() != invrot.name():
233 | continue
234 | if (rot.xyz("CA")-invrot.xyz("CA")).norm() < 0.2:
235 | is_unique.append(False)
236 | continue
237 | if (rot.xyz("CB")-invrot.xyz("CB")).norm() < 0.2:
238 | is_unique.append(False)
239 | continue
240 | is_unique.append(True)
241 | if all(is_unique):
242 | good_rotamers[i].append(invrot)
243 | return good_rotamers
244 |
245 |
246 | def find_unique_rotamers_for_motif(rotset, motifs):
247 | """
248 | Identifies different rotamers from the inverse rotamer set that can be used for aligning the motif to.
249 | Difference is calculated based on the geometric distance between the motif atoms of inverse rotamers.
250 | """
251 | print("Preselecting inverse rotamers for motif alignment, based on unique CST subsampling")
252 | unique_rotset = []
253 |
254 | for i, invrots in enumerate(rotset):
255 | if len(invrots) == 0:
256 | unique_rotset.append([])
257 | continue
258 | unique_rotamers = []
259 | for j, res in enumerate(invrots):
260 | if len(unique_rotamers) == 0:
261 | unique_rotamers.append(res)
262 | continue
263 | dms = []
264 | for ures in unique_rotamers:
265 | dms.append([(res.xyz(a)-ures.xyz(a)).norm() for a in motifs[i]["atoms"]])
266 | if all([sum(x) > 0.1 for x in dms]):
267 | unique_rotamers.append(res)
268 |
269 | print(f" CST {i}, {len(unique_rotamers)}/{len(invrots)} after unique selection")
270 | unique_rotset.append(unique_rotamers)
271 | return unique_rotset
272 |
273 |
274 | def pick_random_rotamers(invrots, N_max=None, frac=None):
275 | if N_max is not None:
276 | if len(invrots) < N_max:
277 | return [r for r in invrots]
278 | elif isinstance(invrots[0], pyrosetta.rosetta.core.pose.Pose):
279 | return [r for r in invrots]
280 | else:
281 | return random.sample([r for r in invrots], N_max)
282 | if frac is not None:
283 | if len(invrots) <= 1:
284 | return [r for r in invrots]
285 | elif isinstance(invrots[0], pyrosetta.rosetta.core.pose.Pose):
286 | return [r for r in invrots]
287 | else:
288 | return random.sample([r for r in invrots], int(round(frac*len(invrots), 0)))
289 |
290 |
291 | def pick_random_rotamers_set(rotset, max_random_rotamers_per_cst=None, frac_random_rotamers_per_cst=None):
292 | """
293 | Selects a subset of inverse rotamers for each set of inverse rotamers
294 | Arguments:
295 | rotset (list)
296 | max_random_rotamers_per_cst (list, int)
297 | frac_random_rotamers_per_cst (list, float)
298 | """
299 | if max_random_rotamers_per_cst is None and frac_random_rotamers_per_cst is None:
300 | sys.exit("Bad setup")
301 | elif max_random_rotamers_per_cst is not None and frac_random_rotamers_per_cst is not None:
302 | sys.exit("Bad setup")
303 |
304 | if max_random_rotamers_per_cst is None:
305 | max_random_rotamers_per_cst = [None for x in frac_random_rotamers_per_cst]
306 | elif frac_random_rotamers_per_cst is None:
307 | frac_random_rotamers_per_cst = [None for x in max_random_rotamers_per_cst]
308 |
309 | assert len(rotset) == len(frac_random_rotamers_per_cst)
310 | assert len(rotset) == len(max_random_rotamers_per_cst)
311 |
312 | rotsett = []
313 |
314 | for n, invrots in enumerate(rotset):
315 | rotsett.append(pick_random_rotamers(invrots, N_max=max_random_rotamers_per_cst[n], frac=frac_random_rotamers_per_cst[n]))
316 | return rotsett
317 |
318 |
319 | def subsample_rotamers(rotamers, subsample_levels, per_cst_rotlib, cst_atoms):
320 | expanded_rotset = []
321 | for cst_block, invrots in enumerate(rotamers):
322 | expanded_rotset.append([])
323 | if cst_block == 0: # Ligand
324 | expanded_rotset[0] = [r for r in invrots]
325 | continue
326 | for n, invrot in enumerate(invrots):
327 | if isinstance(invrot, pyrosetta.rosetta.core.pose.Pose): # motif pose
328 | expanded_rotset[cst_block].append(invrot)
329 | continue
330 | _asd = dunbrack_rotlib.find_bb_from_inverse_loc(per_cst_rotlib[cst_block][invrot.name3()], list(invrot.chi()))
331 | if len(_asd) == 0:
332 | print(f"CST {cst_block}: rotamer {n} found no hits from Dunbrack library!?")
333 | expanded_rotset[cst_block].append(invrot)
334 | continue
335 | # Right not taking STDEV just as an average of all found rotamers in desired secondary structure bins
336 | stdevs = {chino+1: _asd[f"std{chino+1}"].mean() for chino in range(invrot.nchi())}
337 |
338 | # Expanding all chi's based on user request
339 | chi_samplings = {chino: calculate_samplings(invrot.chi(chino), stdevs[chino], subsample_levels[cst_block][chino]) for chino in stdevs}
340 | for chiset in itertools.product(*chi_samplings.values()):
341 | _rot = invrot.clone()
342 | for chino, _chi in enumerate(chiset):
343 | _rot.set_chi(chino+1, _chi)
344 |
345 | # Need to realign coordinates
346 | # First let's find what are the CST atoms used
347 | align_atoms = [[restype_block[f"{invrot.name()}-{cst_block}"] for restype_block in var_cst if invrot.name() == list(restype_block.keys())[1].split("-")[0]] for var_cst in cst_atoms[cst_block]]
348 | align_atoms = list(set([item for sublist in align_atoms for item in sublist]))
349 | if len(align_atoms) != 1:
350 | print(f"Bad choice for alignment atoms: {align_atoms}")
351 | __rot = align_pdbs.align_residue_to_residue(invrot, _rot, {"atoms1": align_atoms[0],
352 | "atoms2": align_atoms[0]})
353 | expanded_rotset[cst_block].append(__rot)
354 | print(f"Expanded CST-{cst_block} rotamers from {len(invrots)} to {len(expanded_rotset[cst_block])}")
355 | return expanded_rotset
356 |
357 |
358 |
359 | def prune_ligand_rotamers(rotset, rmsd_cutoff=None, nproc=None):
360 | print("Pruning ligand rotamers based on intramolecular clashes")
361 | # Clashcheck
362 | def process():
363 | while True:
364 | i = the_queue.get(block=True)
365 | if i is None:
366 | return
367 | res = rotset[i]
368 | nonbonded_distmat = []
369 | for p in itertools.combinations(range(1, res.natoms()+1), 2):
370 | if any([res.is_virtual(n) for n in p]):
371 | continue
372 | # Skipping over bonded atoms
373 | if p[0] in res.bonded_neighbor(p[1]) or p[1] in res.bonded_neighbor(p[0]):
374 | continue
375 | nonbonded_distmat.append((res.xyz(p[0]) - res.xyz(p[1])).norm())
376 |
377 | if all([res.atom_type(n).is_heavyatom() for n in p]):
378 | cutoff = 2.1
379 | else:
380 | cutoff = 1.7
381 |
382 | if nonbonded_distmat[-1] < cutoff:
383 | # if args.debug: print(f"Ligand rotamer pruning: {i}: {p}, {res.atom_name(p[0])}-{res.atom_name(p[1])}, {nonbonded_distmat[-1]}")
384 | good_rotamers[i] = False
385 | # print(f"Clashing ligand rotamer: {i}")
386 | break
387 |
388 | print(f"{len(rotset)} conformers to process")
389 | the_queue = multiprocessing.Queue() # Queue stores the iterables
390 |
391 | start = time.time()
392 | manager = multiprocessing.Manager()
393 | good_rotamers = manager.dict() # Need a special dictionary to store outputs from multiple processes
394 |
395 | for i, res in enumerate(rotset):
396 | the_queue.put(i)
397 | good_rotamers[i] = True
398 |
399 | pool = multiprocessing.Pool(processes=nproc,
400 | initializer=process)
401 |
402 | # None to end each process
403 | for _i in range(nproc):
404 | the_queue.put(None)
405 |
406 | # Closing the queue and the pool
407 | the_queue.close()
408 | the_queue.join_thread()
409 | pool.close()
410 | pool.join()
411 |
412 | end = time.time()
413 | print(f"Found {len([i for i in good_rotamers.keys() if good_rotamers[i] is True])} good ligand rotamers.\n"
414 | f"Processing all the rotamers for clashes took {(end - start):.2f} seconds")
415 |
416 |
417 | ## RMSD
418 | if rmsd_cutoff in [None, 0.0]:
419 | return [rotset[i] for i in good_rotamers.keys() if good_rotamers[i] is True]
420 |
421 | unique_rotamers = {}
422 | DMs = {}
423 | for i in good_rotamers.keys():
424 | if good_rotamers[i] is False:
425 | continue
426 | res = rotset[i]
427 |
428 | xyz = np.array([res.xyz(n+1) for n in range(res.natoms()) if res.atom_type(n+1).element() != "H"])
429 | DMs[i] = scipy.spatial.distance.pdist(xyz, 'euclidean')
430 |
431 | if len(unique_rotamers) == 0:
432 | unique_rotamers[i] = res
433 | continue
434 | rmsds = []
435 | for j, res_u in unique_rotamers.items():
436 | rmsds.append(utils.rmsd(DMs[i], DMs[j]))
437 |
438 | if rmsds[-1] < rmsd_cutoff:
439 | break
440 |
441 | if min(rmsds) < rmsd_cutoff:
442 | continue
443 | else:
444 | unique_rotamers[i] = rotset[i]
445 |
446 | print(f"Found {len(unique_rotamers)}/{len(good_rotamers)} unique ligand rotamers based on RMSD cutoff {rmsd_cutoff}.")
447 | return [rot for i, rot in unique_rotamers.items()]
448 |
449 |
450 | def prune_residue_rotamers(rotset):
451 | """
452 | Pruning based on proton chi similarity
453 | """
454 | unique_rotamers = {}
455 | for i, res in enumerate(rotset):
456 | if res.name3() not in utils.N_chis:
457 | n_chis = len([n for n in range(1, res.nchi()+1) if not any([res.atom_type(x).element() == "H" for x in res.chi_atoms(n)])])
458 | else:
459 | n_chis = utils.N_chis[res.name3()]
460 | if res.nchi() == n_chis:
461 | unique_rotamers[i] = res
462 | continue
463 | if i == 0:
464 | unique_rotamers[i] = res
465 | continue
466 |
467 | ads = [] # largest atom-atom distance between heavyatoms of RES and all parsed residues
468 | for j, res_u in unique_rotamers.items():
469 | if res.name3() != res_u.name3():
470 | continue
471 | ads.append(max([(res.xyz(n+1) - res_u.xyz(n+1)).norm() for n in range(res.natoms()) if res.atom_type(n+1).element() != "H"]))
472 | if ads[-1] < 0.02:
473 | break
474 |
475 | if len(ads) == 0 or min(ads) >= 0.02:
476 | unique_rotamers[i] = res
477 | else:
478 | continue
479 |
480 | return [val for k, val in unique_rotamers.items()]
481 |
482 |
483 | def calculate_samplings(chi_value, std, sampling_level):
484 | """
485 | 0 Default original dihedral only; same as using no flag at all
486 | 1 +/- one standard deviation (sd); 3 samples
487 | 2 +/- 0.5 sd; 3 samples
488 | 3 +/- 1 & 2 sd; 5 samples
489 | 4 +/- 0.5 & 1 sd; 5 samples
490 | 5 +/- 0.5, 1, 1.5 & 2 sd; 9 samples
491 | 6 +/- 0.33, 0.67, 1 sd; 7 samples
492 | 7 +/- 0.25, 0.5, 0.75, 1, 1.25 & 1.5 sd; 13 samples.
493 | """
494 | if sampling_level == 0:
495 | samples = [chi_value]
496 | elif sampling_level == 1:
497 | samples = [chi_value-std, chi_value, chi_value+std]
498 | elif sampling_level == 2:
499 | samples = [chi_value-0.5*std, chi_value, chi_value+0.5*std]
500 | elif sampling_level == 3:
501 | samples = [chi_value-2*std, chi_value-std, chi_value, chi_value+std, chi_value+2*std]
502 | elif sampling_level == 4:
503 | samples = [chi_value-std, chi_value-0.5*std, chi_value, chi_value+0.5*std, chi_value+std]
504 | elif sampling_level == 5:
505 | samples = [chi_value-2*std, chi_value-1.5*std, chi_value-std, chi_value-0.5*std,
506 | chi_value,
507 | chi_value+0.5*std, chi_value+std, chi_value+1.5*std, chi_value+2*std]
508 | elif sampling_level == 6:
509 | samples = [chi_value*std, chi_value-0.667*std, chi_value-0.333*std,
510 | chi_value,
511 | chi_value+0.333*std, chi_value+0.667*std, chi_value*std]
512 | elif sampling_level == 7:
513 | samples = [chi_value-1.5*std, chi_value-1.25*std, chi_value-std, chi_value-0.75*std, chi_value-0.5*std, chi_value-0.25*std,
514 | chi_value,
515 | chi_value+0.25*std, chi_value+0.5*std, chi_value+0.75*std, chi_value+std, chi_value+1.25*std, chi_value+1.5*std]
516 | else:
517 | sys.exit(f"Invalid sampling level: {sampling_level}")
518 | return samples
519 |
520 |
521 | """
522 | Functions used during inverse rotamer assembly generation
523 | """
524 | def identify_cst_atoms_for_res(res, cst_no, catres_resno, _res_pose, cst_atompair_sets, motifs, ligands):
525 | j = cst_no
526 | catres_cst_atoms = {}
527 | for subcst in cst_atompair_sets:
528 | for respair in subcst:
529 | # residue 1
530 | if isinstance(res, pyrosetta.rosetta.core.conformation.Residue) and f"{res.name()}-{j}" in respair.keys():
531 | _this_res = {catres_resno: respair[f"{res.name()}-{j}"]}
532 | elif isinstance(res, pyrosetta.rosetta.core.pose.Pose) and f"{res.residue(motifs[j]['resno']).name()}-{j}" in respair.keys():
533 | _this_res = {catres_resno: motifs[j]["atoms"]}
534 | else:
535 | _trgt = None
536 | continue
537 |
538 | # residue 2 (that residue 1 is constrained to)
539 | _trgt = None
540 | if j == 1:
541 | _trgt = {_res_pose.size()+1: respair[ligands[0].name3()+f"-{j}"]}
542 | else:
543 | _trgt = [rn for rn,_ in respair.items() if f"-{j}" not in rn]
544 | # target must be a ligand
545 | if not any([ rn.split("-")[0] in [l.name3() for l in ligands] for rn in _trgt]):
546 | _trgt = None
547 | continue
548 | else:
549 | if len(_trgt) != 1:
550 | _trgt = None
551 | continue
552 | for il, lig in enumerate(ligands):
553 | if il+1 != int(_trgt[0].split("-")[1]):
554 | continue
555 | _trgt = {_res_pose.size()+il+1: respair[_trgt[0]]}
556 | if _trgt is None:
557 | continue
558 | else:
559 | break
560 | if _trgt is None:
561 | continue
562 | else:
563 | break
564 | if _trgt is not None:
565 | # No validation is done whether correct CST atoms are used for this particular residue
566 | # i.e. sitation where a variable CST is used with different sets of atoms from the same residue
567 | catres_cst_atoms.update(_this_res)
568 | catres_cst_atoms.update(_trgt)
569 | else:
570 | catres_cst_atoms = None
571 | return catres_cst_atoms
572 |
573 |
574 | def check_clash(pose, catres_resnos, cutoff=1.7, ignore_respairs=None, cst_atoms=None, tip_atom=False, debug=False):
575 | """
576 | Checks for clashes between residue atoms
577 | Only consideres residues that have nbr_atom within 10 angstrom of eachother.
578 | Default clash cutoff is 1.7 angstrom.
579 | Clashes are not detected for N-H and O-H contacts.
580 | cst_atoms: {resno1: (a1, a2, a3), resno2: (a1, a2, a3)}
581 | """
582 |
583 | combs = itertools.combinations(range(1, pose.size()+1), 2)
584 | for c in combs:
585 | res1 = pose.residue(c[0])
586 | res2 = pose.residue(c[1])
587 | # Going through a bunch of conditions that would allow us to skip
588 | # checking clashes in a given pair of residues
589 |
590 | _ignore_atoms = {res1.seqpos():[],res2.seqpos():[]}
591 | if tip_atom is True:
592 | # Ignoring any of the backbone-ish atoms
593 | for r in [res1, res2]:
594 | if r.is_ligand():
595 | continue
596 | if r.seqpos() in catres_resnos:
597 | if r.name3() in ["GLY", "PRO", "ALA"]:
598 | continue
599 | for a in ["CA", "CB", "C", "N", "O"]:
600 | _ignore_atoms[r.seqpos()].append(r.atom_index(a))
601 | if r.attached_H_begin(r.atom_index(a)) == 0:
602 | continue
603 | for _n in range(r.attached_H_begin(r.atom_index(a)), r.attached_H_end(r.atom_index(a))+1):
604 | _ignore_atoms[r.seqpos()].append(_n)
605 | if cst_atoms is not None:
606 | for r in [res1, res2]:
607 | if r.seqpos() not in cst_atoms.keys():
608 | continue
609 | for a in cst_atoms[r.seqpos()]:
610 | _ignore_atoms[r.seqpos()].append( r.atom_index(a.strip()) )
611 |
612 |
613 | if ignore_respairs is not None:
614 | if any([res1.seqpos() in p and res2.seqpos() in p for p in ignore_respairs]):
615 | continue
616 |
617 | if res1.chain() == res2.chain():
618 | continue
619 | if res1.seqpos() == res2.seqpos():
620 | continue
621 | if res1.is_bonded(res2):
622 | continue
623 | if (res1.nbr_atom_xyz() - res2.nbr_atom_xyz()).norm() > 10.0:
624 | continue
625 | if res1.is_virtual_residue() or res2.is_virtual_residue():
626 | continue
627 |
628 | for atm1 in range(1, res1.natoms()+1):
629 | if res1.is_virtual(atm1):
630 | continue
631 | if atm1 in _ignore_atoms[res1.seqpos()]:
632 | continue
633 | for atm2 in range(1, res2.natoms()+1):
634 | if res2.is_virtual(atm2):
635 | continue
636 | if atm2 in _ignore_atoms[res2.seqpos()]:
637 | continue
638 |
639 | if all([res1.atom_type(atm1).is_heavyatom(), res2.atom_type(atm2).is_heavyatom()]):
640 | cutoff = 1.8
641 | else:
642 | cutoff = 1.5
643 | _dist = (res1.xyz(atm1) - res2.xyz(atm2)).norm()
644 | if _dist < cutoff:
645 | if res1.atom_type(atm1).element() in "NO" and res2.atom_type(atm2).element() == "H": # H-bonds are not clashes
646 | continue
647 | # elif res1.atom_type(atm1).element() == "H" and res2.atom_type(atm2).element() in "NO":
648 | # continue
649 | else:
650 | if debug: print(f"Clashing atoms: {res1.name()}-{res1.seqpos()}-{res1.atom_name(atm1)} -- {res2.name()}-{res2.seqpos()}-{res2.atom_name(atm2)}: {_dist}")
651 | return True
652 | return False
653 |
654 |
655 | def adjust_bb(pose, resno, phi, psi):
656 | pose.set_phi(resno, phi)
657 | pose.set_psi(resno, psi)
658 | pose.set_omega(resno, 180.0)
659 |
660 |
661 | def extend_SS(pose, ref_seqpos, secstruct, AAA, nres_Nterm=4, nres_Cterm=5):
662 | """
663 | Extends the stubs around a given residue in a pose by a number of residues on N-term and C-term side.
664 | The secondary structure is set to either idealized Helix or Strand
665 |
666 | Parameters
667 | ----------
668 | pose : pyrosetta.rosetta.core.pose.Pose
669 | DESCRIPTION.
670 | ref_seqpos : int
671 | DESCRIPTION.
672 | secstruct : str
673 | "E" or "H".
674 | AAA : pyrosetta.rosetta.core.pose.Pose
675 | pose object with 3 alanines.
676 | nres_Nterm : int, optional
677 | How many residues are added to N terminus. The default is 4.
678 | nres_Cterm : int, optional
679 | How many residues are added to C terminus. The default is 5.
680 |
681 | Returns
682 | -------
683 | pose2 : TYPE
684 | DESCRIPTION.
685 |
686 | """
687 | # assert nres_Nterm >= 2, "Too short N-term extension"
688 | # assert nres_Cterm >= 2, "Too short C-term extension"
689 | pose2 = pose.clone()
690 | for n in range(nres_Cterm):
691 | pose2.append_polymer_residue_after_seqpos(AAA.residue(2), ref_seqpos+n, True)
692 | adjust_bb(pose2, ref_seqpos+n, phi=utils.idealized_SS_phi_psi[secstruct]["phi"][0], psi=utils.idealized_SS_phi_psi[secstruct]["psi"][0])
693 |
694 | if nres_Cterm > 0:
695 | adjust_bb(pose2, pose2.size(), phi=utils.idealized_SS_phi_psi[secstruct]["phi"][0], psi=utils.idealized_SS_phi_psi[secstruct]["psi"][0])
696 | else:
697 | # If no C-term stub included then adding temporarily one
698 | pose2.append_polymer_residue_after_seqpos(AAA.residue(2), ref_seqpos, True)
699 |
700 | for n in range(nres_Nterm):
701 | pose2.prepend_polymer_residue_before_seqpos(AAA.residue(2), ref_seqpos, True)
702 | if n == 0:
703 | # Building foldtree to have a center point at the reference residue
704 | ft = pyrosetta.rosetta.core.kinematics.FoldTree()
705 | ft.add_edge(ref_seqpos+2, pose2.chain_begin(pose2.chain(ref_seqpos)), -1)
706 | ft.add_edge(ref_seqpos+2, pose2.chain_end(pose2.chain(ref_seqpos)), -1)
707 | for j in range(1, pose2.num_chains()+1):
708 | if j == pose2.chain(ref_seqpos):
709 | continue
710 | else: # adding foldtree edges for other chains
711 | ft.add_edge(pose2.fold_tree().get_residue_edge(pose2.chain_begin(j)))
712 | pose2.fold_tree().clear()
713 | pose2.fold_tree(ft)
714 | adjust_bb(pose2, ref_seqpos+1, phi=utils.idealized_SS_phi_psi[secstruct]["phi"][0], psi=utils.idealized_SS_phi_psi[secstruct]["psi"][0])
715 |
716 | adjust_bb(pose2, ref_seqpos, phi=utils.idealized_SS_phi_psi[secstruct]["phi"][0], psi=utils.idealized_SS_phi_psi[secstruct]["psi"][0])
717 |
718 | if nres_Cterm == 0:
719 | pose2.delete_residue_slow(pose2.size())
720 |
721 | return pose2
722 |
723 |
724 | def create_remark_lines(pose, catalytic_residues, cst_io):
725 | ## Adding REMARK 666 lines to the PDB's
726 | ## This is actually quite arduous since we need to figure out which variable CST block a particular residue came from
727 |
728 | pdb_info = pyrosetta.rosetta.core.pose.PDBInfo(pose) # can this be added to pose somehow?
729 |
730 | ligands = [r for r in pose.residues if r.is_ligand()]
731 |
732 | calculators = {"dis": utils.get_dist, "ang": utils.get_angle, "tor": utils.get_dihedral}
733 | remarks = []
734 | for j, resno in catalytic_residues.items():
735 | if pose.residue(resno).is_ligand() and j == 0:
736 | continue
737 | rmrk = None
738 |
739 | for m in range(1, cst_io.mcfi_list(j).num_mcfis()+1):
740 | _mcfi = cst_io.mcfi_list(j).mcfi(m)
741 |
742 | downstream_res_cst = 0
743 | if _mcfi.algorithm_inputs().__contains__("match"):
744 | if any(["DOWNSTREAM" in ai for ai in _mcfi.algorithm_inputs()["match"]]):
745 | downstream_res_cst = 0 # I think this is always 1, right?
746 | elif any(["UPSTREAM_CST" in ai for ai in _mcfi.algorithm_inputs()["match"]]):
747 | for ai in _mcfi.algorithm_inputs()["match"]:
748 | if "SECONDARY_MATCH:" in ai and "UPSTREAM_CST" in ai:
749 | downstream_res_cst = int(ai.split()[2])
750 | break
751 | # Residues in the final pose
752 | DS_RES = pose.residue(catalytic_residues[downstream_res_cst])
753 | US_RES = pose.residue(resno)
754 |
755 | good_cst_found = False
756 | rt_combs = itertools.product(_mcfi.allowed_restypes(_mcfi.downstream_res()), _mcfi.allowed_restypes(_mcfi.upstream_res()))
757 | for (ds_res, us_res) in rt_combs:
758 | if US_RES.name().split(":")[0] != us_res.name(): # skipping the wrong residue types
759 | continue
760 | ais_ds = [ds_res.atom_name(_mcfi.template_atom_inds(_mcfi.downstream_res(), ai, ds_res)[1]) for ai in range(1, 4)]
761 | ais_us = [us_res.atom_name(_mcfi.template_atom_inds(_mcfi.upstream_res(), ai, us_res)[1]) for ai in range(1, 4)]
762 |
763 | cst_atomsets = {'dis_U1D1': [DS_RES.xyz(ais_ds[0]), US_RES.xyz(ais_us[0])],
764 | 'ang_U1D2': [DS_RES.xyz(ais_ds[1]), DS_RES.xyz(ais_ds[0]), US_RES.xyz(ais_us[0])],
765 | 'ang_U2D1': [DS_RES.xyz(ais_ds[0]), US_RES.xyz(ais_us[0]), US_RES.xyz(ais_us[1])],
766 | 'tor_U1D3': [DS_RES.xyz(ais_ds[2]), DS_RES.xyz(ais_ds[1]), DS_RES.xyz(ais_ds[0]), US_RES.xyz(ais_us[0])],
767 | 'tor_U2D2': [DS_RES.xyz(ais_ds[1]), DS_RES.xyz(ais_ds[0]), US_RES.xyz(ais_us[0]), US_RES.xyz(ais_us[1])],
768 | 'tor_U3D1': [DS_RES.xyz(ais_ds[0]), US_RES.xyz(ais_us[0]), US_RES.xyz(ais_us[1]), US_RES.xyz(ais_us[2])]}
769 | cst_atomsets = {k: np.array(v) for k,v in cst_atomsets.items()}
770 |
771 | # Measuring whether a particular respair geometrically matches the CST
772 | good_cst_found = False
773 | for cs in _mcfi.constraints():
774 | passed_cst = []
775 | for cst_par in cst_atomsets.keys():
776 | cst_samples = getattr(cs, cst_par).create_sample_vector()
777 | val = calculators[cst_par[:3]](*cst_atomsets[cst_par])
778 | if val < 0.0:
779 | val = 360.0 + val
780 | # is any of the sampled values very close to the measured value?
781 | if "dis" in cst_par:
782 | passed_cst.append( any([abs(val-x) < 0.1 for x in cst_samples]) )
783 | else:
784 | passed_cst.append( any([abs(val-x) < 1.0 for x in cst_samples]) )
785 | if all(passed_cst):
786 | good_cst_found = True
787 | break
788 | if good_cst_found:
789 | break
790 | if good_cst_found:
791 | # if there's only one ligand then it will be stored as chain X residue 0
792 | if len(ligands) == 1 and DS_RES.name3() == ligands[0].name3():
793 | rmrk = f"REMARK 666 MATCH TEMPLATE X {DS_RES.name3()}"\
794 | f" 0 MATCH MOTIF {pdb_info.chain(resno)} "\
795 | f"{US_RES.name3()} {resno:>4} {j} {m} "
796 | else:
797 | rmrk = f"REMARK 666 MATCH TEMPLATE {pdb_info.chain(DS_RES.seqpos())} {DS_RES.name3()}"\
798 | f" {DS_RES.seqpos():>4} MATCH MOTIF {pdb_info.chain(resno)} "\
799 | f"{US_RES.name3()} {resno:>4} {j} {m} "
800 | remarks.append(rmrk)
801 | break
802 | return remarks
803 |
804 |
--------------------------------------------------------------------------------