├── utils ├── __pycache__ │ ├── util.cpython-37.pyc │ ├── util.cpython-38.pyc │ ├── util.cpython-39.pyc │ ├── util.cpython-310.pyc │ ├── parsers.cpython-310.pyc │ ├── parsers.cpython-37.pyc │ ├── parsers.cpython-38.pyc │ ├── parsers.cpython-39.pyc │ ├── align_pdbs.cpython-310.pyc │ ├── align_pdbs.cpython-37.pyc │ ├── align_pdbs.cpython-38.pyc │ ├── align_pdbs.cpython-39.pyc │ ├── kinematics.cpython-310.pyc │ ├── kinematics.cpython-37.pyc │ ├── kinematics.cpython-38.pyc │ ├── kinematics.cpython-39.pyc │ ├── kabsch_align.cpython-37.pyc │ ├── dunbrack_rotlib.cpython-310.pyc │ ├── dunbrack_rotlib.cpython-37.pyc │ ├── dunbrack_rotlib.cpython-38.pyc │ └── dunbrack_rotlib.cpython-39.pyc ├── kabsch_align.py ├── dunbrack_rotlib.py ├── align_pdbs.py └── util.py ├── examples ├── P450 │ ├── command │ └── inputs │ │ ├── HBA_CYS_P450_nosample.cst │ │ ├── P450_motif.pdb │ │ └── HBA_unique.params └── Kemp_eliminase │ ├── command │ └── inputs │ ├── BIO.params │ └── BIO_His_ED_oxy_nosample.cst ├── utils.py ├── README.md ├── invrotzyme.py └── protocol.py /utils/__pycache__/util.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ikalvet/invrotzyme/HEAD/utils/__pycache__/util.cpython-37.pyc -------------------------------------------------------------------------------- /utils/__pycache__/util.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ikalvet/invrotzyme/HEAD/utils/__pycache__/util.cpython-38.pyc -------------------------------------------------------------------------------- /utils/__pycache__/util.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ikalvet/invrotzyme/HEAD/utils/__pycache__/util.cpython-39.pyc -------------------------------------------------------------------------------- /utils/__pycache__/util.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ikalvet/invrotzyme/HEAD/utils/__pycache__/util.cpython-310.pyc -------------------------------------------------------------------------------- /utils/__pycache__/parsers.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ikalvet/invrotzyme/HEAD/utils/__pycache__/parsers.cpython-310.pyc -------------------------------------------------------------------------------- /utils/__pycache__/parsers.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ikalvet/invrotzyme/HEAD/utils/__pycache__/parsers.cpython-37.pyc -------------------------------------------------------------------------------- /utils/__pycache__/parsers.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ikalvet/invrotzyme/HEAD/utils/__pycache__/parsers.cpython-38.pyc -------------------------------------------------------------------------------- /utils/__pycache__/parsers.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ikalvet/invrotzyme/HEAD/utils/__pycache__/parsers.cpython-39.pyc -------------------------------------------------------------------------------- /utils/__pycache__/align_pdbs.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ikalvet/invrotzyme/HEAD/utils/__pycache__/align_pdbs.cpython-310.pyc -------------------------------------------------------------------------------- /utils/__pycache__/align_pdbs.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ikalvet/invrotzyme/HEAD/utils/__pycache__/align_pdbs.cpython-37.pyc -------------------------------------------------------------------------------- /utils/__pycache__/align_pdbs.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ikalvet/invrotzyme/HEAD/utils/__pycache__/align_pdbs.cpython-38.pyc -------------------------------------------------------------------------------- /utils/__pycache__/align_pdbs.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ikalvet/invrotzyme/HEAD/utils/__pycache__/align_pdbs.cpython-39.pyc -------------------------------------------------------------------------------- /utils/__pycache__/kinematics.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ikalvet/invrotzyme/HEAD/utils/__pycache__/kinematics.cpython-310.pyc -------------------------------------------------------------------------------- /utils/__pycache__/kinematics.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ikalvet/invrotzyme/HEAD/utils/__pycache__/kinematics.cpython-37.pyc -------------------------------------------------------------------------------- /utils/__pycache__/kinematics.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ikalvet/invrotzyme/HEAD/utils/__pycache__/kinematics.cpython-38.pyc -------------------------------------------------------------------------------- /utils/__pycache__/kinematics.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ikalvet/invrotzyme/HEAD/utils/__pycache__/kinematics.cpython-39.pyc -------------------------------------------------------------------------------- /utils/__pycache__/kabsch_align.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ikalvet/invrotzyme/HEAD/utils/__pycache__/kabsch_align.cpython-37.pyc -------------------------------------------------------------------------------- /utils/__pycache__/dunbrack_rotlib.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ikalvet/invrotzyme/HEAD/utils/__pycache__/dunbrack_rotlib.cpython-310.pyc -------------------------------------------------------------------------------- /utils/__pycache__/dunbrack_rotlib.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ikalvet/invrotzyme/HEAD/utils/__pycache__/dunbrack_rotlib.cpython-37.pyc -------------------------------------------------------------------------------- /utils/__pycache__/dunbrack_rotlib.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ikalvet/invrotzyme/HEAD/utils/__pycache__/dunbrack_rotlib.cpython-38.pyc -------------------------------------------------------------------------------- /utils/__pycache__/dunbrack_rotlib.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ikalvet/invrotzyme/HEAD/utils/__pycache__/dunbrack_rotlib.cpython-39.pyc -------------------------------------------------------------------------------- /examples/P450/command: -------------------------------------------------------------------------------- 1 | python ../../invrotzyme.py --cstfile inputs/HBA_CYS_P450_nosample.cst --params inputs/HBA_unique.params --motif_for_cst 1:3:inputs/P450_motif.pdb --frac_random_rotamers 0.1 --prefix outputs/ 2 | -------------------------------------------------------------------------------- /examples/Kemp_eliminase/command: -------------------------------------------------------------------------------- 1 | python ../../invrotzyme.py --cstfile inputs/BIO_His_ED_oxy_nosample.cst --params inputs/BIO.params --dunbrack_prob 0.6 --frac_random_rotamers_per_cst 0.5 0.5 0.5 0.5 --secstruct_per_cst H H E --prefix outputs/ --suffix HHE 2 | -------------------------------------------------------------------------------- /examples/P450/inputs/HBA_CYS_P450_nosample.cst: -------------------------------------------------------------------------------- 1 | # cst constraint descriptor for ferryl intermediate C-H abstraction TS from methoxybiphenyl 2 | # CYS coordinating to the Heme Fe based on P450 geometry 3 | # I. Kalvet, Baker lab, UW, ikalvet@uw.edu 4 | 5 | 6 | #block 1 for CYS coordinated to Fe 7 | 8 | CST::BEGIN 9 | 10 | TEMPLATE:: ATOM_MAP: 1 atom_name: FE1 N4 C19 11 | TEMPLATE:: ATOM_MAP: 1 residue3: HBA 12 | 13 | TEMPLATE:: ATOM_MAP: 2 atom_type: SH1 14 | TEMPLATE:: ATOM_MAP: 2 residue3: CYS 15 | 16 | CONSTRAINT:: distanceAB: 2.5 0.15 100. 1 0 17 | CONSTRAINT:: angle_A: 85.9 5.0 100.0 360. 0 18 | CONSTRAINT:: angle_B: 111.0 5.0 75.0 360. 0 19 | CONSTRAINT:: torsion_A: 84.5 5.0 75.0 360. 0 20 | CONSTRAINT:: torsion_AB: 108.0 15.0 0.0 360. 0 21 | CONSTRAINT:: torsion_B: 82.4 20.0 25.0 360. 0 22 | 23 | ALGORITHM_INFO:: match 24 | MAX_DUNBRACK_ENERGY 5.0 25 | IGNORE_UPSTREAM_PROTON_CHI 26 | ALGORITHM_INFO::END 27 | 28 | CST::END 29 | 30 | 31 | -------------------------------------------------------------------------------- /examples/Kemp_eliminase/inputs/BIO.params: -------------------------------------------------------------------------------- 1 | NAME BIO 2 | IO_STRING BIO Z 3 | TYPE LIGAND 4 | AA UNK 5 | ATOM C1 aroC X -0.01 6 | ATOM C6 aroC X -0.03 7 | ATOM C5 aroC X 0.13 8 | ATOM N2 Npro X 0.06 9 | ATOM O3 ONH2 X -0.13 10 | ATOM O2 ONH2 X -0.13 11 | ATOM C4 aroC X -0.02 12 | ATOM C3 aroC X 0.04 13 | ATOM C7 aroC X 0.08 14 | ATOM N1 Nhis X -0.09 15 | ATOM O1 ONH2 X -1.05 16 | ATOM C2 aroC X 0.17 17 | ATOM H4 Haro X 0.09 18 | ATOM H2 Haro X 0.06 19 | ATOM H3 Haro X 0.06 20 | ATOM H1 Haro X 0.07 21 | BOND_TYPE O3 N2 2 22 | BOND_TYPE N2 O2 2 23 | BOND_TYPE N2 C5 1 24 | BOND_TYPE H2 C4 1 25 | BOND_TYPE C4 C5 4 26 | BOND_TYPE C4 C3 4 27 | BOND_TYPE C5 C6 4 28 | BOND_TYPE H4 C7 1 29 | BOND_TYPE C3 C7 4 30 | BOND_TYPE C3 C2 4 31 | BOND_TYPE C6 H3 1 32 | BOND_TYPE C6 C1 4 33 | BOND_TYPE C7 N1 4 34 | BOND_TYPE C2 C1 4 35 | BOND_TYPE C2 O1 4 36 | BOND_TYPE C1 H1 1 37 | BOND_TYPE N1 O1 4 38 | CHI 1 C6 C5 N2 O3 39 | NBR_ATOM C4 40 | NBR_RADIUS 4.083104 41 | ICOOR_INTERNAL C1 0.000000 0.000000 0.000000 C1 C6 C5 42 | ICOOR_INTERNAL C6 0.000000 180.000000 1.382716 C1 C6 C5 43 | ICOOR_INTERNAL C5 0.000000 59.182789 1.409222 C6 C1 C5 44 | ICOOR_INTERNAL N2 -179.998004 61.281665 1.447079 C5 C6 C1 45 | ICOOR_INTERNAL O3 -179.984056 61.433633 1.236799 N2 C5 C6 46 | ICOOR_INTERNAL O2 179.986424 61.557617 1.239696 N2 C5 O3 47 | ICOOR_INTERNAL C4 179.889661 57.795737 1.396329 C5 C6 N2 48 | ICOOR_INTERNAL C3 0.095219 63.144774 1.385375 C4 C5 C6 49 | ICOOR_INTERNAL C7 -179.747781 48.470187 1.449305 C3 C4 C5 50 | ICOOR_INTERNAL N1 179.814748 63.412331 1.251719 C7 C3 C4 51 | ICOOR_INTERNAL O1 0.121877 78.864331 1.794487 N1 C7 C3 52 | ICOOR_INTERNAL C2 -0.218402 78.350293 1.303685 O1 N1 C7 53 | ICOOR_INTERNAL H4 179.979921 55.243279 1.277743 C7 C3 N1 54 | ICOOR_INTERNAL H2 -179.886000 56.975238 1.084893 C4 C5 C3 55 | ICOOR_INTERNAL H3 -179.972679 59.482293 1.082576 C6 C1 C5 56 | ICOOR_INTERNAL H1 179.798701 58.039008 1.083917 C1 C6 C5 57 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Sun Aug 25 23:12:52 2024 5 | 6 | @author: indrek 7 | """ 8 | import numpy as np 9 | 10 | 11 | # number of chis, excluding proton-chis 12 | N_chis = {'ALA': 0, 'ARG': 4, 'TRP': 2, 'GLY': 0, 'ASP': 2, 'HIS': 2, 'GLU': 3, 13 | 'GLN': 3, 'ASN': 2, 'LEU': 2, 'ILE': 2, 'THR': 1, 'VAL': 1, 'SER': 1, 14 | 'MET': 3, 'CYS': 1, 'PRO': 3, 'LYS': 4, 'PHE': 2, 'TYR': 2, "CYX": 1} 15 | 16 | 17 | # PHI and PSI values for ideal backbone, and tolerances for randomization 18 | idealized_SS_phi_psi = {"H": {"phi": (-57.0, 10.0), "psi": (-47.0, 10.0)}, 19 | "E": {"phi": (-140.0, 20.0), "psi": (130.0, 20.0)}, 20 | "-": {"phi": (-140.0, 20.0), "psi": (130.0, 20.0)}} 21 | 22 | 23 | def get_dist(a, b): 24 | return np.linalg.norm(a-b) 25 | 26 | 27 | def get_angle(a1, a2, a3): 28 | a1 = np.array(a1) 29 | a2 = np.array(a2) 30 | a3 = np.array(a3) 31 | 32 | ba = a1 - a2 33 | bc = a3 - a2 34 | 35 | cosine_angle = np.dot(ba, bc) / (np.linalg.norm(ba) * np.linalg.norm(bc)) 36 | angle = np.arccos(cosine_angle) 37 | 38 | return round(np.degrees(angle), 1) 39 | 40 | 41 | 42 | def get_dihedral(a1, a2, a3, a4): 43 | """ 44 | a1, a2, a3, a4 (np.array) 45 | Each array has to contain 3 floats corresponding to X, Y and Z of an atom. 46 | Solution by 'Praxeolitic' from Stackoverflow: 47 | https://stackoverflow.com/questions/20305272/dihedral-torsion-angle-from-four-points-in-cartesian-coordinates-in-python# 48 | 1 sqrt, 1 cross product 49 | Calculates the dihedral/torsion between atoms a1, a2, a3 and a4 50 | Output is in degrees 51 | """ 52 | 53 | b0 = a1 - a2 54 | b1 = a3 - a2 55 | b2 = a4 - a3 56 | 57 | # normalize b1 so that it does not influence magnitude of vector 58 | # rejections that come next 59 | b1 /= np.linalg.norm(b1) 60 | 61 | # vector rejections 62 | # v = projection of b0 onto plane perpendicular to b1 63 | # = b0 minus component that aligns with b1 64 | # w = projection of b2 onto plane perpendicular to b1 65 | # = b2 minus component that aligns with b1 66 | v = b0 - np.dot(b0, b1)*b1 67 | w = b2 - np.dot(b2, b1)*b1 68 | 69 | # angle between v and w in a plane is the torsion angle 70 | # v and w may not be normalized but that's fine since tan is y/x 71 | x = np.dot(v, w) 72 | y = np.dot(np.cross(b1, v), w) 73 | return np.degrees(np.arctan2(y, x)) 74 | 75 | 76 | def rmsd(geom, target): 77 | return np.sqrt(((geom - target) ** 2).mean()) 78 | 79 | 80 | -------------------------------------------------------------------------------- /examples/Kemp_eliminase/inputs/BIO_His_ED_oxy_nosample.cst: -------------------------------------------------------------------------------- 1 | # Rosetta matcher/enzdes CST description for Kemp Eliminase 2 | # Active consisting of a HIS-GLU/ASP dyad and SER/THR/TYR/GLN/ASN oxyanion hole 3 | # CYS coordinating to the Heme Fe based on UPO geometry 4 | # I. Kalvet, Baker lab, UW, ikalvet@uw.edu 5 | 6 | 7 | ################## CST_1 ( His base ) ############### 8 | CST::BEGIN 9 | 10 | TEMPLATE:: ATOM_MAP: 1 atom_name: C7 N1 O1 11 | TEMPLATE:: ATOM_MAP: 1 residue3: BIO 12 | 13 | TEMPLATE:: ATOM_MAP: 2 atom_type: Nhis 14 | TEMPLATE:: ATOM_MAP: 2 residue1: H 15 | 16 | CONSTRAINT:: distanceAB: 2.68 0.15 100. 1 0 17 | CONSTRAINT:: angle_A: 125.8 5.0 100.0 360. 0 18 | CONSTRAINT:: angle_B: 114.7 5.0 75.0 360. 0 19 | CONSTRAINT:: torsion_A: 180.0 5.0 75.0 360. 0 20 | CONSTRAINT:: torsion_AB: 58.5 45.0 0.0 90. 0 21 | CONSTRAINT:: torsion_B: 180.0 5.0 25.0 360. 0 22 | 23 | CST::END 24 | 25 | ################## CST_2 ( GLU/ASP activating His ) ############### 26 | CST::BEGIN 27 | 28 | TEMPLATE:: ATOM_MAP: 1 atom_type: Ntrp 29 | TEMPLATE:: ATOM_MAP: 1 residue3: HIS 30 | 31 | TEMPLATE:: ATOM_MAP: 2 atom_type: OOC 32 | TEMPLATE:: ATOM_MAP: 2 residue1: ED 33 | 34 | 35 | CONSTRAINT:: distanceAB: 2.62 0.2 100. 1 0 36 | CONSTRAINT:: angle_A: 126.0 15.0 50.0 360. 0 37 | CONSTRAINT:: angle_B: 106.5 25.0 50.0 180. 0 38 | CONSTRAINT:: torsion_A: 0.0 25.0 50.0 180. 0 39 | CONSTRAINT:: torsion_AB: 90.0 10.0 0.0 180. 0 40 | CONSTRAINT:: torsion_B: 180.0 60.0 25.0 360. 0 41 | 42 | ALGORITHM_INFO:: match 43 | SECONDARY_MATCH: UPSTREAM_CST 1 44 | ALGORITHM_INFO::END 45 | 46 | CST::END 47 | 48 | 49 | 50 | ################## CST_3 ( oxyanion hole ) ############### 51 | ############ either SER/THR or TYR or ASN/GLN ########## 52 | VARIABLE_CST::BEGIN 53 | 54 | CST::BEGIN 55 | TEMPLATE:: ATOM_MAP: 1 atom_name: O1 N1 C7 56 | TEMPLATE:: ATOM_MAP: 1 residue3: BIO 57 | 58 | TEMPLATE:: ATOM_MAP: 2 atom_type: OH 59 | TEMPLATE:: ATOM_MAP: 2 residue1: ST 60 | 61 | CONSTRAINT:: distanceAB: 2.81 0.2 80.0 0 0 62 | CONSTRAINT:: angle_A: 150.0 5.0 10.0 360 0 63 | CONSTRAINT:: angle_B: 100.0 5.0 10.0 360 0 64 | CONSTRAINT:: torsion_A: 180.0 10.0 10.0 360 0 65 | CONSTRAINT:: torsion_AB: 71.0 10.0 10.0 90 0 66 | CONSTRAINT:: torsion_B: 180.0 10.0 10.0 120 0 67 | 68 | ALGORITHM_INFO:: match 69 | SECONDARY_MATCH: DOWNSTREAM 70 | ALGORITHM_INFO::END 71 | CST::END 72 | 73 | CST::BEGIN 74 | TEMPLATE:: ATOM_MAP: 1 atom_name: O1 N1 C7 75 | TEMPLATE:: ATOM_MAP: 1 residue3: BIO 76 | 77 | TEMPLATE:: ATOM_MAP: 2 atom_name: OH CZ CE2 78 | TEMPLATE:: ATOM_MAP: 2 residue3: TYR 79 | 80 | CONSTRAINT:: distanceAB: 2.81 0.2 80.0 0 0 81 | CONSTRAINT:: angle_A: 150.0 5.0 10.0 360 0 82 | CONSTRAINT:: angle_B: 100.0 5.0 10.0 360 0 83 | CONSTRAINT:: torsion_A: 180.0 10.0 10.0 360 0 84 | CONSTRAINT:: torsion_AB: 71.0 10.0 10.0 90 0 85 | CONSTRAINT:: torsion_B: 90.0 10.0 10.0 180 0 86 | 87 | ALGORITHM_INFO:: match 88 | SECONDARY_MATCH: DOWNSTREAM 89 | ALGORITHM_INFO::END 90 | CST::END 91 | 92 | CST::BEGIN 93 | TEMPLATE:: ATOM_MAP: 1 atom_name: O1 N1 C7 94 | TEMPLATE:: ATOM_MAP: 1 residue3: BIO 95 | 96 | TEMPLATE:: ATOM_MAP: 2 atom_type: NH2O 97 | TEMPLATE:: ATOM_MAP: 2 residue1: NQ 98 | 99 | CONSTRAINT:: distanceAB: 2.81 0.2 80.0 0 0 100 | CONSTRAINT:: angle_A: 150.0 5.0 10.0 360 0 101 | CONSTRAINT:: angle_B: 100.0 5.0 10.0 360 0 102 | CONSTRAINT:: torsion_A: 180.0 10.0 10.0 360 0 103 | CONSTRAINT:: torsion_AB: 71.0 10.0 10.0 90 0 104 | CONSTRAINT:: torsion_B: 180.0 10.0 10.0 180 0 105 | 106 | ALGORITHM_INFO:: match 107 | SECONDARY_MATCH: DOWNSTREAM 108 | ALGORITHM_INFO::END 109 | CST::END 110 | 111 | VARIABLE_CST::END 112 | 113 | -------------------------------------------------------------------------------- /utils/kabsch_align.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import copy 3 | #Gyu Rie Lee 4 | #Borrowed kabsch code and modified slightly for superimposition 5 | 6 | #Use kabsch algorithm to align van der Mers with mainchain atoms (or given subset of coord) 7 | #get transformation matrix from xyz1 and xyz2 (could be N-CA-C of residues) 8 | #then use this to align residue+functional group 9 | #xyz1/coord_for_align1 would be the reference 10 | #IMPORTANT: xyz1_in is being copied inside as xyz1 because xyz1_in will be used repeatedly outside of this code 11 | 12 | 13 | def np_kabsch(A,B): 14 | """ 15 | Numpy version of kabsch algorithm. Superimposes B onto A 16 | 17 | Parameters: 18 | (A,B) np.array - shape (N,3) arrays of xyz crds of points 19 | 20 | 21 | Returns: 22 | rms - rmsd between A and B 23 | R - rotation matrix to superimpose B onto A 24 | rB - the rotated B coordinates 25 | """ 26 | A = np.copy(A) 27 | B = np.copy(B) 28 | 29 | def centroid(X): 30 | # return the mean X,Y,Z down the atoms 31 | return np.mean(X, axis=0, keepdims=True) 32 | 33 | def rmsd(V,W, eps=1e-6): 34 | # First sum down atoms, then sum down xyz 35 | N = V.shape[-2] 36 | return np.sqrt(np.sum((V-W)*(V-W), axis=(-2,-1)) / N + eps) 37 | 38 | 39 | N, ndim = A.shape 40 | 41 | # move to centroid 42 | A = A - centroid(A) 43 | B = B - centroid(B) 44 | 45 | # computation of the covariance matrix 46 | C = np.matmul(A.T, B) 47 | 48 | # compute optimal rotation matrix using SVD 49 | U,S,Vt = np.linalg.svd(C) 50 | 51 | 52 | # ensure right handed coordinate system 53 | d = np.eye(3) 54 | d[-1,-1] = np.sign(np.linalg.det(Vt.T@U.T)) 55 | 56 | # construct rotation matrix 57 | R = Vt.T@d@U.T 58 | 59 | # get rotated coords 60 | rB = B@R 61 | 62 | # calculate rmsd 63 | rms = rmsd(A,rB) 64 | 65 | return rms, rB, R 66 | 67 | 68 | def kabsch_align_coords(xyz1, xyz2_in, mobile_coord): 69 | 70 | # xyz1 = copy.deepcopy(xyz1_in) 71 | xyz2 = copy.deepcopy(xyz2_in) 72 | # check dimensions 73 | #print(len(xyz1), len(xyz2)) 74 | assert len(xyz1) == len(xyz2) 75 | L = len(xyz1) 76 | assert L > 2 77 | 78 | # move two both sets of points to their 79 | # centers of masses (COM) 80 | COM1 = np.sum(xyz1, axis=0) / float(L) 81 | COM2 = np.sum(xyz2, axis=0) / float(L) 82 | xyz1 -= COM1 83 | xyz2 -= COM2 84 | 85 | # Initial residual, see Kabsch. 86 | E0 = np.sum( np.sum(xyz1*xyz1,axis=0),axis=0) + np.sum( np.sum(xyz2*xyz2,axis=0),axis=0 ) 87 | 88 | # SVD of the covariance matrix 89 | V, S, Wt = np.linalg.svd( np.dot( np.transpose(xyz2), xyz1)) 90 | 91 | # check parity of the transformation 92 | reflect = float(str(float(np.linalg.det(V) * np.linalg.det(Wt)))) 93 | if reflect == -1.0: 94 | S[-1] = -S[-1] 95 | V[:,-1] = -V[:,-1] 96 | 97 | RMSD = E0 - (2.0 * sum(S)) 98 | RMSD = np.sqrt(abs(RMSD / L)) 99 | 100 | # U is simply V*Wt 101 | U = np.dot(V, Wt) 102 | 103 | # translation vector 104 | t = COM1 - COM2 105 | 106 | superimposed_coord = np.dot((mobile_coord-COM2), U) 107 | superimposed_coord += COM1 108 | # rot_coord_2 = np.dot((coord_for_align2 - COM2), U) 109 | # rot_coord_1 = coord_for_align1 - COM1 110 | 111 | # rot_coord_2 = np.dot((coord_for_align2 - COM2), U) + COM1 112 | 113 | # return coord_for_align1, rot_coord_2 114 | return superimposed_coord 115 | # return RMSD, t, U 116 | 117 | def kabsch_rmsd(xyz1_in,xyz2_in): 118 | 119 | xyz1 = copy.deepcopy(xyz1_in) 120 | xyz2 = copy.deepcopy(xyz2_in) 121 | # check dimensions 122 | assert len(xyz1) == len(xyz2) 123 | L = len(xyz1) 124 | assert L > 2 125 | 126 | # move two both sets of points to their 127 | # centers of masses (COM) 128 | COM1 = np.sum(xyz1, axis=0) / float(L) 129 | COM2 = np.sum(xyz2, axis=0) / float(L) 130 | xyz1 -= COM1 131 | xyz2 -= COM2 132 | 133 | # Initial residual, see Kabsch. 134 | E0 = np.sum( np.sum(xyz1*xyz1,axis=0),axis=0) + np.sum( np.sum(xyz2*xyz2,axis=0),axis=0 ) 135 | 136 | # SVD of the covariance matrix 137 | V, S, Wt = np.linalg.svd( np.dot( np.transpose(xyz2), xyz1)) 138 | 139 | # check parity of the transformation 140 | reflect = float(str(float(np.linalg.det(V) * np.linalg.det(Wt)))) 141 | if reflect == -1.0: 142 | S[-1] = -S[-1] 143 | V[:,-1] = -V[:,-1] 144 | 145 | RMSD = E0 - (2.0 * sum(S)) 146 | RMSD = np.sqrt(abs(RMSD / L)) 147 | 148 | # U is simply V*Wt 149 | U = np.dot(V, Wt) 150 | 151 | # translation vector 152 | t = COM1 - COM2 153 | 154 | return RMSD 155 | # return RMSD, t, U 156 | 157 | -------------------------------------------------------------------------------- /utils/dunbrack_rotlib.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import pandas as pd 3 | import os 4 | 5 | comparisons = {'<=': '__le__', 6 | '<': '__lt__', 7 | '>': '__gt__', 8 | '>=': '__ge__', 9 | '=': '__eq__'} 10 | 11 | chi_psi_SS = {"H": {"phi": (-72.0, -50.0), 12 | "psi": (-50.0, -30.0)}, 13 | "E": {"phi": (-161.0, -89.0), 14 | "psi": (109.0, 151.0)}, 15 | "L": {"phi": (), 16 | "psi": ()}, 17 | "-": {"phi": (-180.0, 180.0), 18 | "psi": (-180.0, 180.0)}} 19 | 20 | 21 | def load_rotamer_df(dunbrack_database): 22 | header = ["restype", "phi", "psi", "N", "r1", "r2", "r3", "r4", "prob", "chi1", "chi2", "chi3", "chi4", "std1", "std2", "std3", "std4"] 23 | rotlib = pd.read_csv(dunbrack_database, sep="\s+", names=header) 24 | for n in range(1, 5): 25 | rotlib[f"chi{n}_min"] = rotlib[f"chi{n}"]-rotlib[f"std{n}"] 26 | rotlib[f"chi{n}_max"] = rotlib[f"chi{n}"]+rotlib[f"std{n}"] 27 | return rotlib 28 | 29 | 30 | def filter_rotlib(scores, filters): 31 | filtered_scores = scores.copy() 32 | 33 | for s in filters.keys(): 34 | _fltrs = [] 35 | if isinstance(filters[s][0], list): 36 | _fltrs = filters[s] 37 | else: 38 | _fltrs.append(filters[s]) 39 | for fltr in _fltrs: 40 | if fltr is not None and s in scores.keys(): 41 | val = fltr[0] 42 | sign = comparisons[fltr[1]] 43 | filtered_scores =\ 44 | filtered_scores.loc[(filtered_scores[s].__getattribute__(sign)(val))] 45 | return filtered_scores 46 | 47 | 48 | def find_good_rotamers(rotlib, restype, cumulative_prob=1.0, secstruct=None, phi=None, psi=None, keep_only_best=False): 49 | """ 50 | Arugments: 51 | rotlib (pandas.DataFrame) 52 | restype (str) :: name3 of an amino acid in the rotamer library 53 | cumulative_prob (float) :: cumulative probability up to which rotamers are returned 54 | secstruct (str, ('H', 'E')) :: secondary structure type for which rotamers are searched. 55 | phi (tuple, (float, float)) :: min and max phi value for defining a subset of the library 56 | psi (tuple, (float, float)) :: min and max psi value for defining a subset of the library 57 | keep_only_best (bool) :: only the highest probability rotamer is returned for each phi/psi bin 58 | """ 59 | assert isinstance(phi, (tuple, type(None))) 60 | assert isinstance(psi, (tuple, type(None))) 61 | assert secstruct in ("H", "E", "-", None), "Not implemented for other secondary structures yet" 62 | # assert restype not in ["ALA", "GLY"], "No rotamer library for ALA and GLY" 63 | assert not all([x is None for x in [secstruct, phi]]), "Must provide either secstruct letter OR phi and psi values" 64 | assert not all([x is None for x in [secstruct, psi]]), "Must provide either secstruct letter OR phi and psi values" 65 | 66 | if secstruct is not None: 67 | phi_limits = chi_psi_SS[secstruct]["phi"] 68 | psi_limits = chi_psi_SS[secstruct]["psi"] 69 | elif phi is not None and psi is not None: 70 | phi_limits = phi 71 | psi_limits = psi 72 | else: 73 | print("Both phi and psi need to be defined") 74 | return None 75 | 76 | filters = {'restype': [restype, '='], 77 | 'phi': [[phi_limits[0], '>='], [phi_limits[1], '<=']], 78 | 'psi': [[psi_limits[0], '>='], [psi_limits[1], '<=']]} 79 | 80 | SS_rotlib = filter_rotlib(rotlib, filters) 81 | phi_psi_bins = list(set([(row.phi, row.psi) for idx, row in SS_rotlib.iterrows()])) 82 | df = pd.DataFrame() 83 | for phi_psi_bin in phi_psi_bins: 84 | _df = SS_rotlib.loc[(SS_rotlib["phi"] == phi_psi_bin[0]) & (SS_rotlib["psi"] == phi_psi_bin[1])] 85 | if keep_only_best is True: 86 | _df2 = _df.iloc[0] 87 | else: 88 | if cumulative_prob == 1.0: 89 | _df2 = _df.copy() 90 | else: 91 | _df2 = _df.loc[_df.prob.cumsum() <= cumulative_prob] 92 | 93 | # Also adding the next most probable rotamer that would push the cumulative sum over the cutoff 94 | # This fixes the issue where no rotamers are returned when the cutoff is lower than the prob of the most likely rotamer 95 | if len(_df2) == 0: 96 | idx_to_add = 0 97 | elif len(_df2) < len(_df): 98 | idx_to_add = len(_df2) 99 | else: 100 | idx_to_add = None 101 | if idx_to_add is not None: 102 | _df2 = pd.concat([_df2, _df.iloc[idx_to_add].to_frame().T], ignore_index=True) 103 | df = pd.concat([df, _df2], ignore_index=True) 104 | return df 105 | 106 | 107 | def find_bb_from_inverse(rotlib, chis): 108 | df = pd.DataFrame() 109 | for idx, row in rotlib.iterrows(): 110 | _chi_matches = [] 111 | for i, ch in enumerate(chis): 112 | _chi_matches.append(row[f"chi{i+1}"]-row[f"std{i+1}"] <= ch <= row[f"chi{i+1}"]+row[f"std{i+1}"]) 113 | if all(_chi_matches): 114 | # df = df.append(row) 115 | df = pd.concat([df, row]) 116 | return df 117 | 118 | 119 | def find_bb_from_inverse_loc(rotlib, chis): 120 | """ 121 | Finds 122 | Arguments: 123 | rotlib (pandas.DataFrame) :: rotamer library. Preferrably for a given amino acid. 124 | chis (list) :: list of chi values 125 | """ 126 | assert isinstance(rotlib, pd.DataFrame) 127 | rl = rotlib.copy() 128 | for i, ch in enumerate(chis): 129 | rl = rl.loc[(rl[f"chi{i+1}_min"] <= ch) & (rl[f"chi{i+1}_max"] >= ch)] 130 | return rl 131 | 132 | 133 | 134 | -------------------------------------------------------------------------------- /examples/P450/inputs/P450_motif.pdb: -------------------------------------------------------------------------------- 1 | ATOM 1 N HIS A 363 4.913 -43.057 15.166 1.00 11.88 A N 2 | ATOM 2 CA HIS A 363 4.586 -41.709 15.616 1.00 11.40 A C 3 | ATOM 3 C HIS A 363 3.735 -40.925 14.609 1.00 12.52 A C 4 | ATOM 4 O HIS A 363 3.491 -39.745 14.804 1.00 12.41 A O 5 | ATOM 5 CB HIS A 363 5.847 -40.946 15.976 1.00 12.94 A C 6 | ATOM 6 CG HIS A 363 6.460 -41.421 17.253 1.00 12.72 A C 7 | ATOM 7 CD2 HIS A 363 7.320 -42.431 17.522 1.00 12.58 A C 8 | ATOM 8 ND1 HIS A 363 6.167 -40.861 18.473 1.00 12.29 A N 9 | ATOM 9 CE1 HIS A 363 6.827 -41.482 19.434 1.00 11.30 A C 10 | ATOM 10 NE2 HIS A 363 7.514 -42.464 18.877 1.00 13.49 A N 11 | ATOM 11 N ARG A 364 3.235 -41.553 13.544 1.00 12.12 A N 12 | ATOM 12 CA ARG A 364 2.319 -40.877 12.622 1.00 11.10 A C 13 | ATOM 13 C ARG A 364 1.202 -40.146 13.356 1.00 12.71 A C 14 | ATOM 14 O ARG A 364 0.640 -40.610 14.344 1.00 12.68 A O 15 | ATOM 15 CB ARG A 364 1.685 -41.899 11.649 1.00 11.87 A C 16 | ATOM 16 CG ARG A 364 0.917 -41.342 10.487 1.00 13.05 A C 17 | ATOM 17 CD ARG A 364 0.408 -42.437 9.552 1.00 14.31 A C 18 | ATOM 18 NE ARG A 364 -0.194 -41.802 8.400 1.00 15.24 A N 19 | ATOM 19 CZ ARG A 364 0.121 -42.006 7.135 1.00 15.87 A C 20 | ATOM 20 NH1 ARG A 364 1.008 -42.903 6.794 1.00 15.19 A N1+ 21 | ATOM 21 NH2 ARG A 364 -0.453 -41.271 6.209 1.00 16.74 A N 22 | ATOM 22 N CYS A 365 0.870 -38.980 12.842 1.00 11.45 A N 23 | ATOM 23 CA CYS A 365 -0.133 -38.095 13.440 1.00 10.72 A C 24 | ATOM 24 C CYS A 365 -1.398 -38.810 13.866 1.00 11.45 A C 25 | ATOM 25 O CYS A 365 -2.130 -39.345 13.038 1.00 13.42 A O 26 | ATOM 26 CB CYS A 365 -0.499 -37.044 12.396 1.00 10.95 A C 27 | ATOM 27 SG CYS A 365 -1.632 -35.790 12.940 1.00 12.75 A S 28 | ATOM 28 N ALA A 366 -1.739 -38.680 15.149 1.00 12.58 A N 29 | ATOM 29 CA ALA A 366 -2.981 -39.272 15.628 1.00 14.03 A C 30 | ATOM 30 C ALA A 366 -4.183 -38.592 15.020 1.00 15.69 A C 31 | ATOM 31 O ALA A 366 -5.249 -39.210 14.915 1.00 15.05 A O 32 | ATOM 32 CB ALA A 366 -3.101 -39.141 17.134 1.00 13.26 A C 33 | ATOM 33 N GLY A 367 -4.073 -37.328 14.670 1.00 12.82 A N 34 | ATOM 34 CA GLY A 367 -5.151 -36.485 14.210 1.00 14.57 A C 35 | ATOM 35 C GLY A 367 -5.299 -36.322 12.702 1.00 13.52 A C 36 | ATOM 36 O GLY A 367 -5.966 -35.395 12.227 1.00 13.45 A O 37 | ATOM 37 N GLU A 368 -4.747 -37.251 11.929 1.00 14.10 A N 38 | ATOM 38 CA GLU A 368 -4.816 -37.140 10.474 1.00 12.64 A C 39 | ATOM 39 C GLU A 368 -6.252 -37.199 9.966 1.00 15.94 A C 40 | ATOM 40 O GLU A 368 -6.635 -36.418 9.083 1.00 15.41 A O 41 | ATOM 41 CB GLU A 368 -3.961 -38.215 9.828 1.00 15.10 A C 42 | ATOM 42 CG GLU A 368 -3.784 -38.032 8.359 1.00 15.16 A C 43 | ATOM 43 CD GLU A 368 -2.640 -38.795 7.750 1.00 15.22 A C 44 | ATOM 44 OE1 GLU A 368 -2.460 -39.970 8.159 1.00 15.91 A O 45 | ATOM 45 OE2 GLU A 368 -1.967 -38.239 6.860 1.00 16.51 A O1- 46 | ATOM 46 N TRP A 369 -7.044 -38.135 10.472 1.00 16.59 A N 47 | ATOM 47 CA TRP A 369 -8.454 -38.191 10.058 1.00 16.63 A C 48 | ATOM 48 C TRP A 369 -9.248 -36.984 10.533 1.00 15.46 A C 49 | ATOM 49 O TRP A 369 -10.033 -36.427 9.756 1.00 18.41 A O 50 | ATOM 50 CB TRP A 369 -9.036 -39.532 10.485 1.00 18.58 A C 51 | ATOM 51 CG TRP A 369 -8.425 -40.568 9.565 1.00 37.49 A C 52 | ATOM 52 CD1 TRP A 369 -7.501 -41.507 9.903 1.00 40.53 A C 53 | ATOM 53 CD2 TRP A 369 -8.593 -40.683 8.131 1.00 40.81 A C 54 | ATOM 54 CE2 TRP A 369 -7.773 -41.750 7.700 1.00 42.82 A C 55 | ATOM 55 CE3 TRP A 369 -9.366 -40.004 7.180 1.00 41.25 A C 56 | ATOM 56 NE1 TRP A 369 -7.152 -42.253 8.808 1.00 38.35 A N 57 | ATOM 57 CZ2 TRP A 369 -7.710 -42.161 6.367 1.00 49.47 A C 58 | ATOM 58 CZ3 TRP A 369 -9.304 -40.417 5.854 1.00 46.90 A C 59 | ATOM 59 CH2 TRP A 369 -8.470 -41.477 5.461 1.00 41.19 A C 60 | ATOM 60 N VAL A 370 -8.981 -36.486 11.744 1.00 15.47 A N 61 | ATOM 61 CA VAL A 370 -9.591 -35.231 12.183 1.00 16.31 A C 62 | ATOM 62 C VAL A 370 -9.294 -34.108 11.199 1.00 16.02 A C 63 | ATOM 63 O VAL A 370 -10.169 -33.321 10.823 1.00 16.65 A O 64 | ATOM 64 CB VAL A 370 -9.137 -34.851 13.606 1.00 15.82 A C 65 | ATOM 65 CG1 VAL A 370 -9.382 -33.345 13.933 1.00 17.55 A C 66 | ATOM 66 CG2 VAL A 370 -9.801 -35.759 14.636 1.00 17.86 A C 67 | ATOM 67 N THR A 371 -8.020 -33.997 10.805 1.00 13.79 A N 68 | ATOM 68 CA THR A 371 -7.593 -32.922 9.932 1.00 13.46 A C 69 | ATOM 69 C THR A 371 -8.322 -32.993 8.592 1.00 12.99 A C 70 | ATOM 70 O THR A 371 -8.839 -31.976 8.099 1.00 13.80 A O 71 | ATOM 71 CB THR A 371 -6.089 -32.985 9.710 1.00 13.13 A C 72 | ATOM 72 CG2 THR A 371 -5.608 -31.874 8.840 1.00 14.33 A C 73 | ATOM 73 OG1 THR A 371 -5.358 -32.902 10.943 1.00 13.94 A O 74 | TER 75 | END 76 | -------------------------------------------------------------------------------- /utils/align_pdbs.py: -------------------------------------------------------------------------------- 1 | import os,sys 2 | sys.path.append(os.path.dirname(os.path.realpath(__file__))) 3 | import kabsch_align 4 | import util 5 | import numpy as np 6 | import pyrosetta as pyr 7 | import pyrosetta.rosetta 8 | 9 | 10 | def find_atom_idx(atom, mapping): 11 | for i,A in enumerate(mapping): 12 | try: 13 | if A.strip() == atom: 14 | return i 15 | except AttributeError: 16 | print('This is atom ',A) 17 | 18 | raise KeyError(f'Could not find atom {atom} in mapping {mapping}') 19 | 20 | 21 | def align_pose_to_residue(ref_residue, mobile_pose, ref_atoms): 22 | xyz1, parsed1 = get_xyz_stack_residue(ref_residue, ref_atoms["atoms1"]) 23 | xyz2, parsed2 = get_xyz_stack_pose(mobile_pose, ref_atoms["atoms2"]) 24 | 25 | # run Kabsch to get rotation matrix for atoms and rmsd 26 | # aligns xyz2 onto xyz1 27 | rmsd, _, R = kabsch_align.np_kabsch(xyz1, xyz2) 28 | print('RMSD between atoms: ',rmsd) 29 | 30 | # (1) now translate both proteins such that centroid(xyz1/xyz2) is at origin 31 | # (2) rorate xyz2 onto xyz1 with R 32 | # (3) write pdbs into outdir 33 | 34 | def centroid(X): 35 | # return the mean X,Y,Z down the atoms 36 | return np.mean(X, axis=0, keepdims=True) 37 | 38 | # centroid of just the points being aligned 39 | centroid1 = centroid(xyz1) 40 | centroid2 = centroid(xyz2) 41 | 42 | # (1) 43 | #xyz_protein1 = np.copy(parsed1['xyz']) - centroid1 44 | xyz_protein2 = np.copy(parsed2) - centroid2 45 | 46 | # (2) 47 | xyz_protein2 = xyz_protein2 @ R 48 | 49 | # Translate protein 2 to where it aligns with original protein 1 50 | xyz_protein2 += centroid1 51 | 52 | out_pose = mobile_pose.clone() 53 | for resno, res_coords in enumerate(xyz_protein2): 54 | for i, ac in enumerate(res_coords): 55 | if np.isnan(ac[0]): 56 | break 57 | out_pose.residue(resno+1).set_xyz(i+1, pyrosetta.rosetta.numeric.xyzVector_double_t(*ac)) 58 | continue 59 | return out_pose 60 | 61 | 62 | def align_residue_to_residue(ref_residue, mobile_residue, ref_atoms): 63 | xyz1, parsed1 = get_xyz_stack_residue(ref_residue, ref_atoms["atoms1"]) 64 | xyz2, parsed2 = get_xyz_stack_residue(mobile_residue, ref_atoms["atoms2"]) 65 | 66 | # run Kabsch to get rotation matrix for atoms and rmsd 67 | # aligns xyz2 onto xyz1 68 | rmsd, _, R = kabsch_align.np_kabsch(xyz1, xyz2) 69 | if rmsd > 0.1: 70 | print('RMSD between atoms: ',rmsd) 71 | 72 | # (1) now translate both proteins such that centroid(xyz1/xyz2) is at origin 73 | # (2) rorate xyz2 onto xyz1 with R 74 | # (3) write pdbs into outdir 75 | 76 | def centroid(X): 77 | # return the mean X,Y,Z down the atoms 78 | return np.mean(X, axis=0, keepdims=True) 79 | 80 | # centroid of just the points being aligned 81 | centroid1 = centroid(xyz1) 82 | centroid2 = centroid(xyz2) 83 | 84 | # (1) 85 | #xyz_protein1 = np.copy(parsed1['xyz']) - centroid1 86 | xyz_protein2 = np.copy(parsed2) - centroid2 87 | 88 | # (2) 89 | xyz_protein2 = xyz_protein2 @ R 90 | 91 | # Translate protein 2 to where it aligns with original protein 1 92 | xyz_protein2 += centroid1 93 | 94 | out_residue = mobile_residue.clone() 95 | 96 | for i, ac in enumerate(xyz_protein2[0]): 97 | if np.isnan(ac[0]): 98 | break 99 | out_residue.set_xyz(i+1, pyrosetta.rosetta.numeric.xyzVector_double_t(*ac)) 100 | continue 101 | return out_residue 102 | 103 | 104 | def get_xyz_stack_residue(residue, atoms_list): 105 | """ 106 | Extracts the xyz crds corresponding to every atom in atoms_list 107 | atoms_list format: [(resno, atomname), (resno, atomname), ...] 108 | """ 109 | if residue.is_ligand() or residue.is_virtual_residue(): 110 | return None, None 111 | 112 | xyz_all = parse_residue_coords(residue) 113 | seq = [util.alpha_1.index(residue.name1())] 114 | xyz_out = [] 115 | 116 | # for each atom, get residue index and atom index 117 | # store crds 118 | for atom in atoms_list: 119 | # get index of residue and its Heavy atom mapping 120 | AA_int = seq[0] 121 | 122 | if residue.is_lower_terminus(): 123 | AA_long_map = util.aa2longH_Nterm[AA_int] 124 | elif residue.is_upper_terminus(): 125 | AA_long_map = util.aa2longH_Cterm[AA_int] 126 | else: 127 | AA_long_map = util.aa2longH[AA_int] 128 | 129 | # get index of atom in residue 130 | atom_idx0 = find_atom_idx(atom.strip(), AA_long_map) 131 | 132 | # crds of this atom 133 | xyz_atom = xyz_all[0, atom_idx0, :] 134 | 135 | xyz_out.append(xyz_atom) 136 | 137 | return np.array(xyz_out), xyz_all 138 | 139 | 140 | def get_xyz_stack_pose(pose, atoms_list): 141 | """ 142 | Extracts the xyz crds corresponding to every atom in atoms_list 143 | atoms_list format: [(resno, atomname), (resno, atomname), ...] 144 | """ 145 | 146 | xyz_all = parse_pose_coords(pose) 147 | seq = [util.alpha_1.index(r.name1()) for r in pose.residues if not r.is_ligand() and not r.is_virtual_residue()] 148 | xyz_out = [] 149 | 150 | # for each atom, get residue index and atom index 151 | # store crds 152 | for (resn, atom) in atoms_list: 153 | # get index of residue and its Heavy atom mapping 154 | AA_int = seq[resn-1] 155 | if pose.residue(resn).is_lower_terminus(): 156 | AA_long_map = util.aa2longH_Nterm[AA_int] 157 | elif pose.residue(resn).is_upper_terminus(): 158 | AA_long_map = util.aa2longH_Cterm[AA_int] 159 | else: 160 | AA_long_map = util.aa2longH[AA_int] 161 | 162 | # get index of atom in residue 163 | atom_idx0 = find_atom_idx(atom.strip(), AA_long_map) 164 | 165 | # crds of this atom 166 | xyz_atom = xyz_all[resn-1, atom_idx0, :] 167 | 168 | xyz_out.append(xyz_atom) 169 | 170 | return np.array(xyz_out), xyz_all 171 | 172 | 173 | def parse_pose_coords(pose): 174 | res = [r.seqpos() for r in pose.residues if not r.is_ligand() and not r.is_virtual_residue()] 175 | xyz = np.full((len(res), 26, 3), np.nan, dtype=np.float32) 176 | for r in pose.residues: 177 | if r.is_ligand() or r.is_virtual_residue(): 178 | continue 179 | # rc = np.ndarray((res.natoms(), 3), dtype=np.float32) 180 | for n in range(r.natoms()): 181 | try: 182 | xyz[r.seqpos()-1][n] = r.xyz(n+1) 183 | except IndexError: 184 | print(r.name()) 185 | print(r.seqpos()) 186 | print(r.natoms()) 187 | sys.exit(1) 188 | return xyz 189 | 190 | 191 | def parse_residue_coords(residue): 192 | xyz = np.full((1, 26, 3), np.nan, dtype=np.float32) 193 | if residue.is_ligand() or residue.is_virtual_residue(): 194 | return None 195 | # rc = np.ndarray((res.natoms(), 3), dtype=np.float32) 196 | for n in range(residue.natoms()): 197 | xyz[0][n] = residue.xyz(n+1) 198 | return xyz 199 | 200 | 201 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # InvrotZyme 2 | 3 | Script for building inverse rotamer assemblies out of a Rosetta matcher/enzdes constraint file. 4 | 5 | This script will place sidechains according to the constraint file definitions, sample backbone positions, and optionally grow out extended backbone stubs (idealized helix or strand). 6 | This script will perform an exhaustive analysis of all allowed rotamers and CST samplings. 7 | 8 | You can also provide a motif PDB that will serve as a host for a particular constrained catalytic residue. That residue must exist in the PDB file, and only the rotamer will then be used for that residue. 9 | 10 | The purpose of this tool is to find combinations of inverse rotamers that can be placed (on small extended backbones) without clashes. The outputs of this script can subsequently be used as inputs for RFdiffusion All-Atom to create protein backbones that host these active sites. 11 | 12 | 13 | 14 | ## Examples 15 | 16 | A few usage examples are provided in `examples/` 17 | 18 | **Kemp eliminase example:** 19 | Places three catalytic residues around a benzisoxazole substrate. A HIS-GLU/ASP dyad on one side, and a SER/THR/TYR/GLN/ASN H-bond donor on the other side. 20 | `cd examples/Kemp_eliminase ; python ../../invrotzyme.py --cstfile inputs/BIO_His_ED_oxy_nosample.cst --params inputs/BIO.params --dunbrack_prob 0.6 --frac_random_rotamers_per_cst 0.5 0.5 0.5 0.5 --secstruct_per_cst H H E --prefix outputs/ --suffix HHE` 21 | 22 | 23 | **P450 example:** 24 | Places a custom Heme ligand in complex with a substrate against a CYS-containing motif from a cytochrome P450 enzyme. 25 | `cd examples/P450 ; python ../../invrotzyme.py --cstfile inputs/HBA_CYS_P450_nosample.cst --params inputs/HBA_unique.params --motif_for_cst 1:3:inputs/P450_motif.pdb --frac_random_rotamers 0.1 --prefix outputs/` 26 | 27 | 28 | ## Usage 29 | 30 | First prepare a matcher/enzdes Constraint file according to the standard format outlined in Rosetta documentation:
31 | https://docs.rosettacommons.org/docs/latest/rosetta_basics/file_types/match-cstfile-format 32 | 33 | This script requires all six degrees of freedom to be defined, so you msut provide distance, 2 angles, and 3 torsions for each interaction. 34 | 35 | You can then run the script using many of the options below, perhaps taking inspiration from the provided examples. 36 | 37 | ``` 38 | options: 39 | -h, --help show this help message and exit 40 | --cstfile CSTFILE CST file used for matching. Keep sampling to minimum to avoid combinatorial explosion. 41 | --params PARAMS [PARAMS ...] 42 | params files used by ligands and residues 43 | --keep_his_tautomer KEEP_HIS_TAUTOMER 44 | Per cst, should a specific HIS tautomer (HIS or HIS_D) be used. Keeps only one the requested HIS tautomers. Format: 'cst_no:HIS/HIS_D,..' 45 | --dunbrack_prob DUNBRACK_PROB 46 | Cumulative Dunbrack probability of used rotamers for any residue. As used by the -packing:dunbrack_prob_... flag in Rosetta. 47 | --dunbrack_prob_per_cst DUNBRACK_PROB_PER_CST [DUNBRACK_PROB_PER_CST ...] 48 | Cumulative Dunbrack probability of used rotamers for each CST residue. 49 | --N_len N_LEN Number of residues added to the stub N-term 50 | --C_len C_LEN Number of residues added to the stub C-term 51 | --N_len_per_cst N_LEN_PER_CST [N_LEN_PER_CST ...] 52 | Number of residues added to the stub N-term, per CST 53 | --C_len_per_cst C_LEN_PER_CST [C_LEN_PER_CST ...] 54 | Number of residues added to the stub C-term, per CST 55 | --prune_ligand_rotamers PRUNE_LIGAND_ROTAMERS 56 | Prunes the set of used ligand rotamers based on clashcheck, AND rmsd similarity cutoff. 57 | --max_random_rotamers MAX_RANDOM_ROTAMERS 58 | Number of random rotamers picked for each residue for the sampling. Reasonable number would be below 20 for quick sampling. 59 | --max_random_rotamers_per_cst MAX_RANDOM_ROTAMERS_PER_CST [MAX_RANDOM_ROTAMERS_PER_CST ...] 60 | Number of random rotamers picked for each CST block for the sampling. First value is for the ligand. 61 | --frac_random_rotamers FRAC_RANDOM_ROTAMERS 62 | Fraction of rotamers that are randomly picked for each residue for the sampling. 63 | --frac_random_rotamers_per_cst FRAC_RANDOM_ROTAMERS_PER_CST [FRAC_RANDOM_ROTAMERS_PER_CST ...] 64 | Fraction of rotamers that are randomly picked for each CST block for the sampling. First value is for the ligand. 65 | --secstruct SECSTRUCT 66 | What secondary structure stub should be generated for each residue. 67 | --secstruct_per_cst SECSTRUCT_PER_CST [SECSTRUCT_PER_CST ...] 68 | Per CST, what secondary structure stub should be generated for reaach residue. 69 | --motif_for_cst MOTIF_FOR_CST [MOTIF_FOR_CST ...] 70 | Per CST, an external motif that should be used, instead of inverse rotamers. Only works for the first CST right now. 71 | Format: cst_no:resno_in_motif:filepath ... 72 | --use_best_rotamer_cstids USE_BEST_ROTAMER_CSTIDS [USE_BEST_ROTAMER_CSTIDS ...] 73 | CST ID's that should only use the best rotamer from each secondary structure bin. Numbering starts from 1. 74 | --extra_chi EXTRA_CHI 75 | Enables extra CHI sampling on a given level for all CST's. Input format: chino:level,chino2:level2 76 | --extra_chi_per_cst EXTRA_CHI_PER_CST [EXTRA_CHI_PER_CST ...] 77 | Enables extra CHI sampling on a given level for specific CST's. Input format: CSTNO1-chino:level,chino2:level2 CSTNO2-chino:level,chino2:level2 78 | Sampling levels: 79 | 0 Default original dihedral only; same as using no flag at all 80 | 1 +/- one standard deviation (sd); 3 samples 81 | 2 +/- 0.5 sd; 3 samples 82 | 3 +/- 1 & 2 sd; 5 samples 83 | 4 +/- 0.5 & 1 sd; 5 samples 84 | 5 +/- 0.5, 1, 1.5 & 2 sd; 9 samples 85 | 6 +/- 0.33, 0.67, 1 sd; 7 samples 86 | 7 +/- 0.25, 0.5, 0.75, 1, 1.25 & 1.5 sd; 13 samples. 87 | --suffix SUFFIX Suffix to be added to the end of output PDB files 88 | --prefix PREFIX Prefix to be added to the beginning of output PDB files 89 | --tip_atom Inverse rotamers will be pre-selected based on whether the tip atoms are placed geometrically differently. Rotamer diversity is ignored. 90 | --debug Debug mode. Printing more stuff out and running single-threaded 91 | ``` 92 | 93 | The script runs by default on multiple CPU cores using python multiprocessing. When submitted as a Slurm job, it will adjust the number of cores based on the environment variable `SLURM_CPUS_ON_NODE`. 94 | 95 | 96 | ### Best practices 97 | 98 | Keep conformational sampling levels in the CST file to a minimum to avoid combinatorial explosion. Only sample torsions that are expectes to lead different valid assemblies.
99 | 100 | It's possible to limit the sampling by randomly picking rotamers for each residue, and limiting how the sidechain placements are sampled in the CST file.
101 | It's possible to control the length of the generated idealized backbone stub (from zero to ...).
102 | It's possible control most of the parameters separately for each constraint block.
103 | With using the `--tip_atom` argument it is possible to skip the inverse rotamer clash analysis, and only output assemblies based on their unique placement of catalytic atoms. 104 | 105 | The output PDB files of this script will also contain the `REMARK 666 ...` lines which are required by the Rosetta enzdes constraint parser. As such, the outputs are suitable for building more complex enzyme design pipelines.
106 | For example, the published all-atom diffusion pipeline (https://github.com/ikalvet/heme_binder_diffusion) is directly compatible with the outputs of this script. 107 | 108 | 109 | ### Requirements 110 | 111 | Python packages that are required: 112 | ``` 113 | pyrosetta 114 | numpy 115 | pandas 116 | scipy 117 | ``` 118 | -------------------------------------------------------------------------------- /examples/P450/inputs/HBA_unique.params: -------------------------------------------------------------------------------- 1 | NAME HBA 2 | IO_STRING HBA Z 3 | TYPE LIGAND 4 | AA UNK 5 | ATOM FE1 Fe3p X 3.00 6 | ATOM N2 Npro X -0.37 7 | ATOM C33 aroC X -0.11 8 | ATOM C32 aroC X -0.11 9 | ATOM C34 CH3 X -0.27 10 | ATOM H8 Hapo X 0.10 11 | ATOM H9 Hapo X 0.10 12 | ATOM H10 Hapo X 0.10 13 | ATOM C2 aroC X -0.11 14 | ATOM C3 CH2 X -0.18 15 | ATOM C4 CH2 X -0.18 16 | ATOM C5 COO X 0.62 17 | ATOM O1 OOC X -0.76 18 | ATOM O3 OOC X -0.76 19 | ATOM H27 Hapo X 0.10 20 | ATOM H28 Hapo X 0.10 21 | ATOM H21 Hapo X 0.10 22 | ATOM H25 Hapo X 0.10 23 | ATOM C1 aroC X -0.11 24 | ATOM C28 aroC X -0.11 25 | ATOM C6 aroC X -0.11 26 | ATOM C7 aroC X -0.11 27 | ATOM C8 CH2 X -0.18 28 | ATOM C9 CH2 X -0.18 29 | ATOM C10 COO X 0.62 30 | ATOM O2 OOC X -0.76 31 | ATOM O4 OOC X -0.76 32 | ATOM H29 Hapo X 0.10 33 | ATOM H30 Hapo X 0.10 34 | ATOM H26 Hapo X 0.10 35 | ATOM H3 Hapo X 0.10 36 | ATOM C11 aroC X -0.11 37 | ATOM C12 aroC X -0.11 38 | ATOM N1 Npro X -0.37 39 | ATOM C31 aroC X -0.11 40 | ATOM C14 aroC X -0.11 41 | ATOM N4 Npro X -0.37 42 | ATOM C19 aroC X -0.11 43 | ATOM C30 aroC X -0.11 44 | ATOM C21 aroC X -0.11 45 | ATOM N3 Npro X -0.37 46 | ATOM C26 aroC X -0.11 47 | ATOM C29 aroC X -0.11 48 | ATOM H20 Haro X 0.12 49 | ATOM C25 aroC X -0.11 50 | ATOM C27 CH3 X -0.27 51 | ATOM H13 Hapo X 0.10 52 | ATOM H12 Hapo X 0.10 53 | ATOM H11 Hapo X 0.10 54 | ATOM C22 aroC X -0.11 55 | ATOM C23 aroC X -0.11 56 | ATOM C24 aroC X -0.11 57 | ATOM H5 Haro X 0.12 58 | ATOM H4 Haro X 0.12 59 | ATOM H1 Haro X 0.12 60 | ATOM H24 Haro X 0.12 61 | ATOM C18 aroC X -0.11 62 | ATOM C15 aroC X -0.11 63 | ATOM C16 aroC X -0.11 64 | ATOM C17 aroC X -0.11 65 | ATOM H6 Haro X 0.12 66 | ATOM H7 Haro X 0.12 67 | ATOM H2 Haro X 0.12 68 | ATOM C20 CH3 X -0.27 69 | ATOM H14 Hapo X 0.10 70 | ATOM H15 Hapo X 0.10 71 | ATOM H16 Hapo X 0.10 72 | ATOM H23 Haro X 0.12 73 | ATOM C13 CH3 X -0.27 74 | ATOM H19 Hapo X 0.10 75 | ATOM H18 Hapo X 0.10 76 | ATOM H17 Hapo X 0.10 77 | ATOM H22 Haro X 0.12 78 | ATOM O5 OH X -0.66 79 | ATOM C35 CH2 X -0.18 80 | ATOM O6 OH X -0.66 81 | ATOM C36 aroC X -0.11 82 | ATOM C38 aroC X -0.11 83 | ATOM C40 aroC X -0.11 84 | ATOM C41 aroC X -0.11 85 | ATOM C39 aroC X -0.11 86 | ATOM C37 aroC X -0.11 87 | ATOM H34 Haro X 0.12 88 | ATOM H37 Haro X 0.12 89 | ATOM C42 aroC X -0.11 90 | ATOM C44 aroC X -0.11 91 | ATOM C46 aroC X -0.11 92 | ATOM C47 aroC X -0.11 93 | ATOM C45 aroC X -0.11 94 | ATOM C43 aroC X -0.11 95 | ATOM H38 Haro X 0.12 96 | ATOM H40 Haro X 0.12 97 | ATOM H42 Haro X 0.12 98 | ATOM H41 Haro X 0.12 99 | ATOM H39 Haro X 0.12 100 | ATOM H36 Haro X 0.12 101 | ATOM H35 Haro X 0.12 102 | ATOM H33 Hapo X 0.10 103 | ATOM H32 Hapo X 0.10 104 | ATOM H31 Hpol X 0.43 105 | BOND_TYPE O1 C5 4 106 | BOND_TYPE O3 C5 4 107 | BOND_TYPE C5 C4 1 108 | BOND_TYPE H8 C34 1 109 | BOND_TYPE H21 C3 1 110 | BOND_TYPE C4 C3 1 111 | BOND_TYPE C4 H27 1 112 | BOND_TYPE C4 H28 1 113 | BOND_TYPE H9 C34 1 114 | BOND_TYPE C34 H10 1 115 | BOND_TYPE C34 C32 1 116 | BOND_TYPE C3 H25 1 117 | BOND_TYPE C3 C2 1 118 | BOND_TYPE O2 C10 4 119 | BOND_TYPE C32 C2 4 120 | BOND_TYPE C32 C33 4 121 | BOND_TYPE C2 C1 4 122 | BOND_TYPE O4 C10 4 123 | BOND_TYPE C10 C9 1 124 | BOND_TYPE H20 C29 1 125 | BOND_TYPE H22 C28 1 126 | BOND_TYPE C33 C29 2 127 | BOND_TYPE C33 N2 4 128 | BOND_TYPE C1 C28 2 129 | BOND_TYPE C1 N2 4 130 | BOND_TYPE H26 C8 1 131 | BOND_TYPE C29 C26 1 132 | BOND_TYPE H13 C27 1 133 | BOND_TYPE C28 C6 1 134 | BOND_TYPE H12 C27 1 135 | BOND_TYPE C9 H29 1 136 | BOND_TYPE C9 C8 1 137 | BOND_TYPE C9 H30 1 138 | BOND_TYPE N2 FE1 1 139 | BOND_TYPE C27 H11 1 140 | BOND_TYPE C27 C25 1 141 | BOND_TYPE C8 H3 1 142 | BOND_TYPE C8 C7 1 143 | BOND_TYPE C26 C25 1 144 | BOND_TYPE C26 N3 2 145 | BOND_TYPE C6 C7 1 146 | BOND_TYPE C6 N1 2 147 | BOND_TYPE H33 C35 1 148 | BOND_TYPE C25 C22 2 149 | BOND_TYPE C7 C11 2 150 | BOND_TYPE C35 H32 1 151 | BOND_TYPE C35 O6 1 152 | BOND_TYPE H31 O5 1 153 | BOND_TYPE N3 FE1 1 154 | BOND_TYPE N3 C21 1 155 | BOND_TYPE N1 FE1 1 156 | BOND_TYPE N1 C12 1 157 | BOND_TYPE FE1 O5 1 158 | BOND_TYPE FE1 N4 1 159 | BOND_TYPE C22 C21 1 160 | BOND_TYPE C22 C23 1 161 | BOND_TYPE C11 C12 1 162 | BOND_TYPE C11 C13 1 163 | BOND_TYPE O6 C36 1 164 | BOND_TYPE H1 C23 1 165 | BOND_TYPE C21 C30 2 166 | BOND_TYPE C12 C31 2 167 | BOND_TYPE H19 C13 1 168 | BOND_TYPE C23 C24 2 169 | BOND_TYPE C13 H18 1 170 | BOND_TYPE C13 H17 1 171 | BOND_TYPE H35 C38 1 172 | BOND_TYPE N4 C19 4 173 | BOND_TYPE N4 C14 4 174 | BOND_TYPE C36 C38 4 175 | BOND_TYPE C36 C37 4 176 | BOND_TYPE C30 C19 1 177 | BOND_TYPE C30 H24 1 178 | BOND_TYPE C31 C14 1 179 | BOND_TYPE C31 H23 1 180 | BOND_TYPE C38 C40 4 181 | BOND_TYPE C24 H5 1 182 | BOND_TYPE C24 H4 1 183 | BOND_TYPE C19 C18 4 184 | BOND_TYPE C14 C15 4 185 | BOND_TYPE H34 C37 1 186 | BOND_TYPE C37 C39 4 187 | BOND_TYPE C40 H36 1 188 | BOND_TYPE C40 C41 4 189 | BOND_TYPE C18 C15 4 190 | BOND_TYPE C18 C20 1 191 | BOND_TYPE C15 C16 1 192 | BOND_TYPE C39 C41 4 193 | BOND_TYPE C39 H37 1 194 | BOND_TYPE C41 C42 1 195 | BOND_TYPE H14 C20 1 196 | BOND_TYPE H2 C16 1 197 | BOND_TYPE C16 C17 2 198 | BOND_TYPE C20 H15 1 199 | BOND_TYPE C20 H16 1 200 | BOND_TYPE H39 C44 1 201 | BOND_TYPE C42 C44 4 202 | BOND_TYPE C42 C43 4 203 | BOND_TYPE C17 H6 1 204 | BOND_TYPE C17 H7 1 205 | BOND_TYPE H38 C43 1 206 | BOND_TYPE C44 C46 4 207 | BOND_TYPE C43 C45 4 208 | BOND_TYPE C46 H41 1 209 | BOND_TYPE C46 C47 4 210 | BOND_TYPE C45 C47 4 211 | BOND_TYPE C45 H40 1 212 | BOND_TYPE C47 H42 1 213 | BOND_TYPE O5 C35 1 214 | CHI 1 C3 C4 C5 O1 215 | CHI 2 C2 C3 C4 C5 216 | CHI 3 C32 C2 C3 C4 217 | CHI 4 C8 C9 C10 O2 218 | CHI 5 C7 C8 C9 C10 219 | CHI 6 C6 C7 C8 C9 220 | CHI 7 O5 C35 O6 C36 221 | CHI 8 N2 FE1 O5 C35 222 | CHI 9 C25 C22 C23 C24 223 | CHI 10 C35 O6 C36 C38 224 | CHI 11 C18 C15 C16 C17 225 | CHI 12 C40 C41 C42 C44 226 | CHI 13 FE1 O5 C35 O6 227 | NBR_ATOM O5 228 | NBR_RADIUS 13.456399 229 | ICOOR_INTERNAL FE1 0.000000 0.000000 0.000000 FE1 N2 C33 230 | ICOOR_INTERNAL N2 0.000000 180.000000 2.018878 FE1 N2 C33 231 | ICOOR_INTERNAL C33 0.000001 53.581195 1.373135 N2 FE1 C33 232 | ICOOR_INTERNAL C32 -178.502193 69.602248 1.452331 C33 N2 FE1 233 | ICOOR_INTERNAL C34 -179.986284 55.631927 1.496810 C32 C33 N2 234 | ICOOR_INTERNAL H8 179.523543 68.748802 1.092786 C34 C32 C33 235 | ICOOR_INTERNAL H9 -120.214586 68.550017 1.096877 C34 C32 H8 236 | ICOOR_INTERNAL H10 -119.584492 68.563454 1.096894 C34 C32 H9 237 | ICOOR_INTERNAL C2 -179.832410 73.531587 1.369725 C32 C33 C34 238 | ICOOR_INTERNAL C3 179.828563 50.915427 1.499732 C2 C32 C33 239 | ICOOR_INTERNAL C4 -102.596835 67.625659 1.538258 C3 C2 C32 240 | ICOOR_INTERNAL C5 146.999694 65.222093 1.554685 C4 C3 C2 241 | ICOOR_INTERNAL O1 -31.997497 64.516933 1.262209 C5 C4 C3 242 | ICOOR_INTERNAL O3 -178.654391 62.681761 1.262040 C5 C4 O1 243 | ICOOR_INTERNAL H27 -122.780968 70.057389 1.096230 C4 C3 C5 244 | ICOOR_INTERNAL H28 -115.836979 70.552845 1.100306 C4 C3 H27 245 | ICOOR_INTERNAL H21 119.769291 68.760642 1.092767 C3 C2 C4 246 | ICOOR_INTERNAL H25 120.234202 68.569298 1.096805 C3 C2 H21 247 | ICOOR_INTERNAL C1 -179.655565 73.427377 1.452820 C2 C32 C3 248 | ICOOR_INTERNAL C28 178.719920 55.957567 1.384487 C1 C2 C32 249 | ICOOR_INTERNAL C6 -178.930708 55.033211 1.384065 C28 C1 C2 250 | ICOOR_INTERNAL C7 179.988903 55.835882 1.453199 C6 C28 C1 251 | ICOOR_INTERNAL C8 -0.860763 55.699372 1.501415 C7 C6 C28 252 | ICOOR_INTERNAL C9 -78.998795 65.749088 1.546648 C8 C7 C6 253 | ICOOR_INTERNAL C10 141.999037 67.347387 1.551252 C9 C8 C7 254 | ICOOR_INTERNAL O2 -33.999719 62.282349 1.259720 C10 C9 C8 255 | ICOOR_INTERNAL O4 -179.957769 65.001979 1.266190 C10 C9 O2 256 | ICOOR_INTERNAL H29 -118.434948 72.078025 1.099244 C9 C8 C10 257 | ICOOR_INTERNAL H30 -117.376528 69.709170 1.095638 C9 C8 H29 258 | ICOOR_INTERNAL H26 120.034261 68.570555 1.096912 C8 C7 C9 259 | ICOOR_INTERNAL H3 120.192030 68.728840 1.092802 C8 C7 H26 260 | ICOOR_INTERNAL C11 179.868008 73.441608 1.369284 C7 C6 C8 261 | ICOOR_INTERNAL C12 -0.154467 73.482580 1.452883 C11 C7 C6 262 | ICOOR_INTERNAL N1 0.435801 69.675550 1.374007 C12 C11 C7 263 | ICOOR_INTERNAL C31 -178.693261 55.893570 1.380984 C12 C11 N1 264 | ICOOR_INTERNAL C14 178.357907 54.599034 1.386888 C31 C12 C11 265 | ICOOR_INTERNAL N4 0.520628 54.286519 1.372941 C14 C31 C12 266 | ICOOR_INTERNAL C19 -179.055731 73.706335 1.377697 N4 C14 C31 267 | ICOOR_INTERNAL C30 -178.505888 54.779402 1.384031 C19 N4 C14 268 | ICOOR_INTERNAL C21 2.654589 54.793775 1.382403 C30 C19 N4 269 | ICOOR_INTERNAL N3 -4.661808 55.020478 1.378742 C21 C30 C19 270 | ICOOR_INTERNAL C26 -175.324645 73.700213 1.380305 N3 C21 C30 271 | ICOOR_INTERNAL C29 177.683167 53.999984 1.385889 C26 N3 C21 272 | ICOOR_INTERNAL H20 -179.979397 62.804618 1.083332 C29 C26 N3 273 | ICOOR_INTERNAL C25 -178.928272 69.621569 1.440461 C26 N3 C29 274 | ICOOR_INTERNAL C27 -178.514185 55.444074 1.496833 C25 C26 N3 275 | ICOOR_INTERNAL H13 -57.561069 68.564267 1.096437 C27 C25 C26 276 | ICOOR_INTERNAL H12 119.456698 68.786471 1.096955 C27 C25 H13 277 | ICOOR_INTERNAL H11 119.928154 68.567917 1.092300 C27 C25 H12 278 | ICOOR_INTERNAL C22 178.673006 73.118881 1.379818 C25 C26 C27 279 | ICOOR_INTERNAL C23 -179.979763 54.615808 1.459849 C22 C25 C26 280 | ICOOR_INTERNAL C24 150.023835 51.869958 1.342020 C23 C22 C25 281 | ICOOR_INTERNAL H5 -179.640676 59.615916 1.085871 C24 C23 C22 282 | ICOOR_INTERNAL H4 178.171547 56.914633 1.083754 C24 C23 H5 283 | ICOOR_INTERNAL H1 -177.989675 65.539017 1.089079 C23 C22 C24 284 | ICOOR_INTERNAL H24 -179.987889 62.579510 1.079747 C30 C19 C21 285 | ICOOR_INTERNAL C18 178.982124 69.399441 1.443148 C19 N4 C30 286 | ICOOR_INTERNAL C15 -0.483648 73.586246 1.381379 C18 C19 N4 287 | ICOOR_INTERNAL C16 -177.712163 50.665146 1.457704 C15 C18 C19 288 | ICOOR_INTERNAL C17 -28.973816 53.788201 1.342234 C16 C15 C18 289 | ICOOR_INTERNAL H6 -2.582383 57.411776 1.085027 C17 C16 C15 290 | ICOOR_INTERNAL H7 -178.671386 59.329067 1.085731 C17 C16 H6 291 | ICOOR_INTERNAL H2 179.942344 64.071680 1.088635 C16 C15 C17 292 | ICOOR_INTERNAL C20 -176.831231 54.601971 1.495128 C18 C19 C15 293 | ICOOR_INTERNAL H14 27.956740 68.323585 1.093567 C20 C18 C19 294 | ICOOR_INTERNAL H15 -120.526429 68.078446 1.097641 C20 C18 H14 295 | ICOOR_INTERNAL H16 -119.213017 69.806158 1.094147 C20 C18 H15 296 | ICOOR_INTERNAL H23 -178.630689 62.934282 1.082653 C31 C12 C14 297 | ICOOR_INTERNAL C13 -179.788688 50.851531 1.496813 C11 C7 C12 298 | ICOOR_INTERNAL H19 -0.846953 68.746995 1.092826 C13 C11 C7 299 | ICOOR_INTERNAL H18 120.198842 68.537795 1.096911 C13 C11 H19 300 | ICOOR_INTERNAL H17 119.584023 68.569414 1.096832 C13 C11 H18 301 | ICOOR_INTERNAL H22 179.655343 62.492735 1.083429 C28 C1 C6 302 | ICOOR_INTERNAL O5 93.551083 88.797704 1.766351 FE1 N2 C33 303 | ICOOR_INTERNAL C35 -88.167337 58.378822 2.508687 O5 FE1 N2 304 | ICOOR_INTERNAL O6 -57.034305 64.528007 1.383909 C35 O5 FE1 305 | ICOOR_INTERNAL C36 -55.550936 60.118970 1.362558 O6 C35 O5 306 | ICOOR_INTERNAL C38 17.111617 56.522838 1.401330 C36 O6 C35 307 | ICOOR_INTERNAL C40 175.458107 60.992428 1.388896 C38 C36 O6 308 | ICOOR_INTERNAL C41 1.079143 60.518840 1.395052 C40 C38 C36 309 | ICOOR_INTERNAL C39 0.432097 58.242537 1.398985 C41 C40 C38 310 | ICOOR_INTERNAL C37 -0.958172 61.160057 1.384293 C39 C41 C40 311 | ICOOR_INTERNAL H34 -179.126725 58.776514 1.083687 C37 C39 C41 312 | ICOOR_INTERNAL H37 -179.384445 60.046487 1.082011 C39 C41 C37 313 | ICOOR_INTERNAL C42 -179.992532 60.881265 1.540038 C41 C40 C39 314 | ICOOR_INTERNAL C44 -45.000184 59.059228 1.405006 C42 C41 C40 315 | ICOOR_INTERNAL C46 -179.996457 59.039759 1.393954 C44 C42 C41 316 | ICOOR_INTERNAL C47 -0.001829 59.754353 1.396227 C46 C44 C42 317 | ICOOR_INTERNAL C45 0.000386 60.546632 1.395743 C47 C46 C44 318 | ICOOR_INTERNAL C43 0.003095 59.739786 1.393927 C45 C47 C46 319 | ICOOR_INTERNAL H38 179.998065 60.342795 1.085673 C43 C45 C47 320 | ICOOR_INTERNAL H40 179.993256 59.919618 1.086355 C45 C47 C43 321 | ICOOR_INTERNAL H42 -179.994438 59.724947 1.086054 C47 C46 C45 322 | ICOOR_INTERNAL H41 -179.993764 60.344214 1.086348 C46 C44 C47 323 | ICOOR_INTERNAL H39 179.999701 60.615603 1.085631 C44 C42 C46 324 | ICOOR_INTERNAL H36 -178.439340 59.339446 1.081631 C40 C38 C41 325 | ICOOR_INTERNAL H35 -174.807835 59.206472 1.082225 C38 C36 C40 326 | ICOOR_INTERNAL H33 122.944578 70.550409 1.088203 C35 O5 O6 327 | ICOOR_INTERNAL H32 118.437744 85.616119 1.094307 C35 O5 H33 328 | ICOOR_INTERNAL H31 1.092413 66.031827 1.245030 O5 FE1 C35 329 | PDB_ROTAMERS HBA_conformers_unique.pdb 330 | -------------------------------------------------------------------------------- /invrotzyme.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Fri Apr 15 12:48:51 2022 5 | 6 | @author: ikalvet 7 | """ 8 | import argparse 9 | import pyrosetta as pyr 10 | import pyrosetta.rosetta 11 | import pyrosetta.distributed.io 12 | import sys, os 13 | import itertools 14 | import functools 15 | import operator 16 | import time 17 | import numpy as np 18 | import pandas as pd 19 | import multiprocessing 20 | import random 21 | import scipy.spatial 22 | script_dir = os.path.dirname(os.path.realpath(__file__)) 23 | sys.path.append(script_dir) 24 | sys.path.append(script_dir+'/utils/') 25 | import protocol 26 | import utils 27 | import dunbrack_rotlib 28 | import align_pdbs 29 | 30 | 31 | 32 | 33 | def process_rotamer_set_queue(q, early_stop, prefix, bad_rotamers, rotamers, cst_io, cst_atoms, motifs, results_found): 34 | while True: 35 | i_ids = q.get() 36 | if i_ids is None: 37 | return 38 | 39 | i = i_ids[0] 40 | ids = i_ids[1] 41 | # Grabbing a combination of inverse rotamers based on the provided 42 | # per-cst inverse rotamer ids. 43 | c = [rotamers[n][i] for n, i in enumerate(ids)] 44 | 45 | if any([rot_id in bad_rotamers[j] for j, rot_id in enumerate(ids)]): 46 | # print(f"Bad rotamer in set {i}") 47 | continue 48 | 49 | # TODO: implement symmetry here 50 | # Take the list "c" and apply some symmetric transform to the residues there 51 | # Then the rest of the code should take care of it appropriately 52 | 53 | pose = pyrosetta.rosetta.core.pose.Pose() 54 | bad_rotamer = False 55 | catres_resnos = {n: 0 for n,r in enumerate(c) if not isinstance(r, pyrosetta.rosetta.core.pose.Pose) and r.is_ligand()} 56 | ligands = [r for r in c if not isinstance(r, pyrosetta.rosetta.core.pose.Pose) and r.is_ligand()] 57 | for j, res in enumerate(c): 58 | if args.debug: 59 | if isinstance(res, pyrosetta.rosetta.core.conformation.Residue): 60 | print(i, j, res.name()) 61 | elif isinstance(res, pyrosetta.rosetta.core.pose.Pose): 62 | print(i, j, res.pdb_info().name()) 63 | 64 | if not isinstance(res, pyrosetta.rosetta.core.pose.Pose) and res.is_ligand(): # ligand 65 | continue 66 | 67 | # If we have already seen that it's a bad rotamer then let's just skip it 68 | if ids[j] in bad_rotamers[j]: 69 | if args.debug: print(f"{j}, previously seen as a bad rotamer") 70 | bad_rotamer = True 71 | break 72 | 73 | if isinstance(res, pyrosetta.rosetta.core.conformation.Residue): 74 | _res_pose = pyrosetta.rosetta.core.pose.Pose() 75 | _res_pose.append_residue_by_jump(res, 0) 76 | if res.is_protein(): 77 | _res_pose = protocol.extend_SS(pose=_res_pose, ref_seqpos=1, 78 | secstruct=args.secstruct_per_cst[j], AAA=AAA, 79 | nres_Nterm=args.N_len_per_cst[j], 80 | nres_Cterm=args.C_len_per_cst[j]) 81 | _res_pose.fold_tree().clear() 82 | _res_pose.fold_tree().add_edge(1, _res_pose.size(), -1) # This will avoid FoldTree reordering error showing up 83 | catres_resno = args.N_len_per_cst[j]+1 84 | 85 | elif isinstance(res, pyrosetta.rosetta.core.pose.Pose): 86 | _res_pose = res.clone() 87 | catres_resno = motifs[j]["resno"] 88 | 89 | # Figuring out information about which CST atoms are used for this residue 90 | catres_cst_atoms = protocol.identify_cst_atoms_for_res(res, j, catres_resno, _res_pose, cst_atoms[j], motifs, ligands) 91 | 92 | 93 | # Adding ligand to the extended chain and checking for clashes 94 | for ligand in ligands: 95 | # _res_pose.append_residue_by_jump(ligand, 1) # this doesn't turn ligand into new chain 96 | _res_pose.append_residue_by_jump(ligand, catres_resno, 97 | jump_anchor_atom=_res_pose.residue(catres_resno).atom_name(_res_pose.residue(catres_resno).nbr_atom()), 98 | jump_root_atom=ligand.atom_name(ligand.nbr_atom()), 99 | start_new_chain=True) 100 | 101 | if protocol.check_clash(_res_pose, catres_resnos=[catres_resno]+[r.seqpos() for r in _res_pose.residues if r.is_ligand()], cst_atoms=catres_cst_atoms, tip_atom=args.tip_atom, debug=args.debug) is True: 102 | if args.debug: print(f"{j}, clash after extension") 103 | # Only adding the residude object to the bad residues 104 | # The motif pose will never be dumped 105 | if isinstance(res, pyrosetta.rosetta.core.conformation.Residue): 106 | if ids[j] not in bad_rotamers[j]: 107 | bad_rotamers[j].append(ids[j]) 108 | elif isinstance(res, pyrosetta.rosetta.core.pose.Pose): 109 | if args.debug: print("MOTIF POSE SEEMS TO GIVE CLASH!!!! PLEASE INVESTIGATE!!!") 110 | bad_rotamer = True 111 | 112 | # Giving up if all rotamers are bad 113 | if len(set(bad_rotamers[j])) == len(rotamers[j]): 114 | print(f"All rotamers for CST {j} are bad...") 115 | break 116 | 117 | if isinstance(res, pyrosetta.rosetta.core.conformation.Residue): 118 | catres_resnos[j] = pose.size() + args.N_len_per_cst[j]+1 119 | else: 120 | catres_resnos[j] = motifs[j]["resno"] 121 | 122 | pyrosetta.rosetta.core.pose.append_subpose_to_pose(pose, _res_pose, 1, _res_pose.size()-len(ligands), new_chain=True) 123 | 124 | # Finished individual evaluation of residues 125 | # Now putting the whole thing together 126 | if bad_rotamer is True: 127 | if args.debug: print(f"{j}, bad rotamer") 128 | continue 129 | 130 | # Adding ligand as the last residue 131 | for _n,res in enumerate(c): 132 | if isinstance(res, pyrosetta.rosetta.core.pose.Pose): 133 | continue 134 | if res.is_ligand(): 135 | lig_pose = pyrosetta.rosetta.core.pose.Pose() 136 | lig_pose.append_residue_by_jump(res, 0) 137 | pyrosetta.rosetta.core.pose.append_subpose_to_pose(pose, lig_pose, 1, 1, new_chain=True) 138 | catres_resnos[_n] = pose.size() 139 | 140 | # Checking for clashes 141 | # Ignoring clashes between catalytic residues and the ligand 142 | ignore_clash_respairs = [] 143 | for j in catres_resnos: 144 | if isinstance(c[j], pyrosetta.rosetta.core.conformation.Residue): 145 | assert pose.residue(catres_resnos[j]).name3() == c[j].name3(), f"cst {j}: resno {catres_resnos[j]}, {c[j].name3()} != {pose.residue(catres_resnos[j]).name3()}" 146 | if j == 0: 147 | continue 148 | if args.debug: print(f"clashcheck exclude cst atoms, cst {j}, resno {catres_resnos[j]}, name {pose.residue(catres_resnos[j]).name()}") 149 | ignore_clash_respairs.append((catres_resnos[0], catres_resnos[j])) 150 | 151 | clash = protocol.check_clash(pose, catres_resnos=catres_resnos.values(), ignore_respairs=ignore_clash_respairs, tip_atom=args.tip_atom, debug=args.debug) 152 | if clash is True: 153 | if args.debug: print(f"{i}, clash in the final assembly") 154 | continue 155 | if args.debug: print(j, pose.sequence()) 156 | 157 | # TODO: Need to implement checking whether the pose actually respects the CST's 158 | # This is an issue when the ligand has any chi sampling enabled, and another residue is matched downstream of that. 159 | # Some combinations of rotamers are not meant to work together 160 | ## I think this is now managed in the REMARK 666 generation stage 161 | 162 | pose_name = args.prefix 163 | for res in c: 164 | if isinstance(res, pyrosetta.rosetta.core.conformation.Residue): 165 | if res.is_protein(): 166 | pose_name += res.name1() + "_" 167 | else: 168 | pose_name += res.name3() + "_" 169 | elif isinstance(res, pyrosetta.rosetta.core.pose.Pose): 170 | pose_name += os.path.basename(res.pdb_info().name()).replace(".pdb", "") + "_" 171 | pose_name += f"{prefix}_{i}{args.suffix}.pdb" 172 | if os.path.exists(pose_name): 173 | print(f"Found existing file with name {pose_name}") 174 | pose_name.replace(".pdb", "a.pdb") 175 | 176 | 177 | remarks = protocol.create_remark_lines(pose, catres_resnos, cst_io) 178 | 179 | if len(remarks) != len(catres_resnos) - 1: 180 | if args.debug: print(f"{i}: Could not build all REMARK 666 lines") 181 | continue 182 | 183 | print(f"Found good rotamer: {pose_name.replace('.pdb', '')}") 184 | 185 | pdbstr = pyrosetta.distributed.io.to_pdbstring(pose).split("\n") 186 | 187 | pdbstr_new = [] 188 | for l in pdbstr: 189 | pdbstr_new.append(l) 190 | if "HEADER" in l: 191 | for rmrk in remarks: 192 | pdbstr_new.append(rmrk) 193 | with open(pose_name, "w") as file: 194 | file.write("\n".join(pdbstr_new)) 195 | 196 | results_found.append(ids) 197 | if args.max_outputs is not None and len(results_found) > args.max_outputs: 198 | early_stop.value = True 199 | print(f"Reached the output limit of {args.max_outputs}") 200 | 201 | 202 | 203 | 204 | def parallelize_mp(iterables, rotset, prefix, cst_io, cst_atoms, motifs, results_found): 205 | 206 | the_queue = multiprocessing.Queue(maxsize=args.nproc) # Queue stores the iterables 207 | 208 | start = time.time() 209 | manager = multiprocessing.Manager() 210 | bad_rotamers = manager.dict() 211 | early_stop = multiprocessing.Value("b", False) 212 | 213 | if results_found is None: 214 | results_found = manager.list() 215 | 216 | print(f"Starting to generate inverse rotamer assemblies using {args.nproc} parallel processes.") 217 | pool = multiprocessing.Pool(processes=args.nproc, 218 | initializer=process_rotamer_set_queue, 219 | initargs=(the_queue, early_stop, prefix, bad_rotamers, rotset, cst_io, cst_atoms, motifs, results_found, )) 220 | 221 | for i, c in enumerate(iterables): 222 | if i == 0: 223 | for j in range(len(c)): 224 | bad_rotamers[j] = manager.list() 225 | if early_stop.value == True: 226 | the_queue.put(None) 227 | break 228 | the_queue.put((i, c)) 229 | 230 | # None to end each process 231 | for _i in range(args.nproc): 232 | the_queue.put(None) 233 | 234 | # Closing the queue and the pool 235 | the_queue.close() 236 | the_queue.join_thread() 237 | pool.close() 238 | pool.join() 239 | 240 | print(f"Bad rotamers from set {prefix}:") 241 | for j in bad_rotamers: 242 | print(f" CST {j}: {list(set(bad_rotamers[j]))}") 243 | 244 | end = time.time() 245 | print(f"Processing all the rotamers in set {prefix} took {(end - start):.2f} seconds") 246 | return results_found 247 | 248 | 249 | 250 | 251 | def main(args): 252 | if args.suffix != "": 253 | args.suffix = f"_{args.suffix}" 254 | 255 | if args.prefix != "": 256 | args.prefix = f"{args.prefix}" 257 | 258 | assert os.path.exists(args.cstfile) 259 | extra_res_fa = "" 260 | if args.params is not None: 261 | params = [p for p in args.params if ".params" in p] 262 | extra_res_fa = "-extra_res_fa " + ' '.join(params) 263 | 264 | """ 265 | Setting up PyRosetta 266 | """ 267 | 268 | # pyr.init(f"{extra_res_fa} -run:preserve_header -output_virtual true") 269 | pyr.init(f"{extra_res_fa} -run:preserve_header") 270 | 271 | # Loading the backbone-dependent Dunbrack rotamer library into a dataframe 272 | dunbrack_database = os.path.dirname(pyr.__file__) + "/database/rotamer/bbdep02.May.sortlib-correct.12.2010" 273 | rotlib = dunbrack_rotlib.load_rotamer_df(dunbrack_database) 274 | 275 | 276 | global AAA # making it global so that functions downstream can see it 277 | AAA = pyr.pose_from_sequence("AAA") 278 | 279 | 280 | ###### CST PARSING ######## 281 | # Parsing the CST file 282 | addcst_mover = pyrosetta.rosetta.protocols.enzdes.AddOrRemoveMatchCsts() 283 | chem_manager = pyrosetta.rosetta.core.chemical.ChemicalManager.get_instance() 284 | residue_type_set = chem_manager.residue_type_set("fa_standard") 285 | cst_io = pyrosetta.rosetta.protocols.toolbox.match_enzdes_util.EnzConstraintIO(residue_type_set) 286 | cst_io.read_enzyme_cstfile(args.cstfile) 287 | 288 | 289 | # Figuring out which residue atoms are used for each cst 290 | # Using the MCFI (MatcherConstraintFileInfo) object for that 291 | # cst_atoms will be a dict where each cst_block contains a list of variable CST's? and then a list of residue types 292 | cst_atoms = protocol.get_cst_atoms(cst_io) 293 | 294 | # Storing information about which residues are matched for each CST block 295 | restypes = {} 296 | for n in range(1, cst_io.mcfi_lists_size()+1): 297 | restypes[n] = [] 298 | for restype in cst_io.mcfi_list(n).upstream_restypes(): 299 | restypes[n].append(restype.name3()) 300 | 301 | 302 | ### PROCESS ARGUMENTS A BIT FURTHER ### 303 | args = protocol.parse_arguments(args, restypes) 304 | 305 | 306 | #### PARSING HIS TAUTOMER RESTRICTIONS ##### 307 | keep_his_tautomer_per_cst = None 308 | if args.keep_his_tautomer is not None: 309 | keep_his_tautomer_per_cst = {int(x.split(":")[0]): x.split(":")[1] for x in args.keep_his_tautomer.split(",")} 310 | assert all([val in ["HIS", "HIS_D"] for key, val in keep_his_tautomer_per_cst.items()]), "Invalid input for --keep_his_tautomer" 311 | 312 | 313 | ### ROTAMER SUBSAMPLING #### 314 | chi_subsampling_levels = protocol.parse_rotamer_subsampling(args, cst_atoms) 315 | 316 | 317 | ### Putting together a dictionary listing good rotamers for each residue in each CST 318 | restype_good_rotamers = {} 319 | for n in restypes: 320 | restype_good_rotamers[n] = {} 321 | for restyp in restypes[n]: 322 | if restyp not in utils.N_chis.keys(): 323 | continue 324 | if restyp not in restype_good_rotamers.keys(): 325 | use_only_best_rotamer = False 326 | if n in args.use_best_rotamer_cstids: 327 | use_only_best_rotamer = True 328 | restype_good_rotamers[n][restyp] = dunbrack_rotlib.find_good_rotamers(rotlib, restyp, args.dunbrack_prob_per_cst[n], 329 | args.secstruct_per_cst[n], 330 | keep_only_best=use_only_best_rotamer) 331 | 332 | 333 | ### PARSING EXTERNAL MOTIFS #### 334 | # TODO: make external motifs usable with other CST id's, not just the 1st one 335 | motifs = None 336 | if args.motif_for_cst is not None: 337 | motifs = protocol.parse_motif_input(args.motif_for_cst, cst_atoms, restypes) 338 | 339 | 340 | 341 | ### GETTING INVERSE ROTAMERS #### 342 | ### This is where half of the work gets done ### 343 | invrot_tree = pyrosetta.rosetta.protocols.toolbox.match_enzdes_util.TheozymeInvrotTree(cst_io) 344 | invrot_tree.generate_targets_and_inverse_rotamers() 345 | all_inverse_rotamers_per_cst = invrot_tree.collect_all_inverse_rotamers() 346 | 347 | 348 | ## There is a way to get inverse rotamers from cst_io 349 | ## need to investigate this, because this allows keeping the sub-cst information 350 | """ 351 | target_ats = pyrosetta.rosetta.utility.vector1_unsigned_long() 352 | invrot_ats = pyrosetta.rosetta.utility.vector1_unsigned_long() 353 | 354 | _mcfi.inverse_rotamers_against_residue(target_conf=lig, invrot_restype=_mcfi.allowed_restypes(_mcfi.upstream_res())[1], 355 | target_ats=target_ats, invrot_ats=invrot_ats, flip_exgs_upstream_downstream_samples=False, backbone_interaction=False) 356 | """ 357 | 358 | 359 | time.sleep(1) 360 | 361 | print(f"{len(all_inverse_rotamers_per_cst)} rotamer sets to process") 362 | 363 | results_found = None 364 | for xx, rotset in enumerate(all_inverse_rotamers_per_cst): 365 | print(f"Non-redundant rotamer set {xx+1}") 366 | for cst_block, invrots in enumerate(rotset.invrots()): 367 | print(f"CST {cst_block}: {len(invrots)} inverse rotamers.") 368 | 369 | # Listify the inverse rotamer dataset 370 | rotset_sub = [[invrot for invrot in invrots] for invrots in rotset.invrots()] 371 | 372 | # Pruning all other inverse rotamers based on proton-chis. 373 | # Removing duplicate rotamers where the only difference is in the value of the proton_chi 374 | for rotset_id in range(len(rotset_sub)): 375 | if isinstance(rotset_sub[rotset_id][0], pyrosetta.rosetta.core.pose.Pose) or rotset_sub[rotset_id][0].is_ligand(): 376 | continue 377 | _n_before = len(rotset_sub[rotset_id]) 378 | rotset_sub[rotset_id] = protocol.prune_residue_rotamers(rotset_sub[rotset_id]) 379 | if len(rotset_sub[rotset_id]) != _n_before: 380 | print(f"CST {rotset_id}: {len(rotset_sub[rotset_id])} inverse rotamers after pruning for proton-chi") 381 | 382 | 383 | # Loading any external motifs, if provided and aligning them to the appropriate CST atoms 384 | if args.motif_for_cst is not None: 385 | for cstno in motifs: 386 | # TODO: implement for not-first CST's (or CST's with additional sampling from CST file), 387 | # Picking rotamers with unique subsampling defined in CST 388 | to_align_rotamers = protocol.find_unique_rotamers_for_motif([r if i==cstno else [] for i, r in enumerate(rotset_sub)], motifs) 389 | rotset_sub[cstno] = [align_pdbs.align_pose_to_residue(rotamer, motifs[cstno]["pose"], 390 | {"atoms1": motifs[cstno]["atoms"], 391 | "atoms2": [(motifs[cstno]["resno"], a) for a in motifs[cstno]["atoms"]]}) for rotamer in to_align_rotamers[cstno]] 392 | 393 | 394 | # Pruning inverse rotamers based on Dunbrack probabilites 395 | rotset_sub = protocol.preselect_inverse_rotamers(rotset_sub, restype_good_rotamers, keep_his_tautomer_per_cst) 396 | if rotset_sub is None: 397 | continue 398 | 399 | # Culling ligand rotamers based on RMSD cutoff 400 | if args.prune_ligand_rotamers != 0.0: 401 | for rotset_id in range(len(rotset_sub)): 402 | if isinstance(rotset_sub[rotset_id][0], pyrosetta.rosetta.core.pose.Pose): 403 | continue 404 | if rotset_sub[rotset_id][0].is_ligand(): 405 | rotset_sub[rotset_id] = protocol.prune_ligand_rotamers(rotset_sub[rotset_id], args.prune_ligand_rotamers, args.nproc) 406 | 407 | # Performing rotamer subsampling (expanding CHI's) 408 | if any([any([y != 0 for y in x.values()]) for k, x in chi_subsampling_levels.items()]): 409 | rotset_sub = protocol.subsample_rotamers(rotset_sub, chi_subsampling_levels, restype_good_rotamers, cst_atoms) 410 | 411 | # Picking random rotamers if requested 412 | if args.frac_random_rotamers_per_cst is not None or args.max_random_rotamers_per_cst is not None: 413 | print("Picking a random subset of inverse rotamers") 414 | rotset_sub = protocol.pick_random_rotamers_set(rotset_sub, max_random_rotamers_per_cst=args.max_random_rotamers_per_cst, 415 | frac_random_rotamers_per_cst=args.frac_random_rotamers_per_cst) 416 | 417 | for cst_block, invrots in enumerate(rotset_sub): 418 | print(f"CST {cst_block}: {len(invrots)} inverse rotamers after filtering.") 419 | 420 | rotset_ids = [[i for i, y in enumerate(x)] for x in rotset_sub] 421 | rotamer_id_combinations = itertools.product(*[x for x in rotset_ids]) 422 | 423 | # Processing this subset of rotamers 424 | print(f"{functools.reduce(operator.mul, map(len, rotset_ids), 1)} inverse rotamer combinations to process in this set.") 425 | results_found = parallelize_mp(iterables=rotamer_id_combinations, rotset=rotset_sub, prefix=xx+1, cst_io=cst_io, cst_atoms=cst_atoms, motifs=motifs, results_found=results_found) 426 | 427 | 428 | 429 | if __name__ == "__main__": 430 | parser = argparse.ArgumentParser() 431 | parser.add_argument("--cstfile", type=str, required=True, help="CST file used for matching. Keep sampling to minimum to avoid combinatorial explosion.") 432 | parser.add_argument("--params", nargs="+", required=False, help="params files used by ligands and residues") 433 | parser.add_argument("--keep_his_tautomer", type=str, help="Per cst, should a specific HIS tautomer (HIS or HIS_D) be used. Keeps only one the requested HIS tautomers. Format: 'cst_no:HIS/HIS_D,..'") 434 | parser.add_argument("--dunbrack_prob", type=float, default=0.85, help="Cumulative Dunbrack probability of used rotamers for any residue\n." 435 | "As used by the -packing:dunbrack_prob_... flag in Rosetta.") 436 | parser.add_argument("--dunbrack_prob_per_cst", type=float, nargs="+", help="Cumulative Dunbrack probability of used rotamers for each CST residue.") 437 | parser.add_argument("--N_len", type=int, default=4, help="Number of residues added to the stub N-term") 438 | parser.add_argument("--C_len", type=int, default=5, help="Number of residues added to the stub C-term") 439 | parser.add_argument("--N_len_per_cst", type=int, nargs="+", help="Number of residues added to the stub N-term, per CST") 440 | parser.add_argument("--C_len_per_cst", type=int, nargs="+", help="Number of residues added to the stub C-term, per CST") 441 | parser.add_argument("--prune_ligand_rotamers", type=float, default=0.0, help="Prunes the set of used ligand rotamers based on clashcheck, AND rmsd similarity cutoff.") 442 | parser.add_argument("--max_random_rotamers", type=int, help="Number of random rotamers picked for each residue for the sampling. Reasonable number would be below 20 for quick sampling.") 443 | parser.add_argument("--max_random_rotamers_per_cst", nargs="+", type=int, help="Number of random rotamers picked for each CST block for the sampling. First value is for the ligand.") 444 | parser.add_argument("--frac_random_rotamers", type=float, help="Fraction of rotamers that are randomly picked for each residue for the sampling.") 445 | parser.add_argument("--frac_random_rotamers_per_cst", nargs="+", type=float, help="Fraction of rotamers that are randomly picked for each CST block for the sampling. First value is for the ligand.") 446 | parser.add_argument("--secstruct", type=str, default="H", choices=["E", "H"], help="What secondary structure stub should be generated for each residue.") 447 | parser.add_argument("--secstruct_per_cst", nargs="+", type=str, help="Per CST, what secondary structure stub should be generated for each residue.") 448 | parser.add_argument("--motif_for_cst", type=str, nargs="+", help="Per CST, an external motif that should be used, instead of inverse rotamers. Only works for the first CST right now. Format: cst_no:resno_in_motif:filepath ...") 449 | parser.add_argument("--use_best_rotamer_cstids", nargs="+", type=int, default=[], help="CST ID's that should only use the best rotamer from each secondary structure bin. Numbering starts from 1.") 450 | parser.add_argument("--extra_chi", type=str, help="Enables extra CHI sampling on a given level for all CST's. Input format: chino:level,chino2:level2") 451 | parser.add_argument("--extra_chi_per_cst", nargs="+", help=f"Enables extra CHI sampling on a given level for specific CST's. Input format: CSTNO1-chino:level,chino2:level2 CSTNO2-chino:level,chino2:level2\nSampling levels:\n{protocol.calculate_samplings.__doc__}") 452 | parser.add_argument("--suffix", type=str, default= "", help="Suffix to be added to the end of output files") 453 | parser.add_argument("--prefix", type=str, default= "", help="Prefix to be added to the beginning of output files") 454 | parser.add_argument("--tip_atom", action="store_true", default=False, help="Inverse rotamers will be pre-selected based on whether the tip atoms are placed geometrically differently. Rotamer diversity is ignored.") 455 | parser.add_argument("--nproc", type=int, help="Number of CPU cores used.") 456 | parser.add_argument("--max_outputs", type=int, help="Maximum number of output structures that will be produced.") 457 | parser.add_argument("--debug", action="store_true", default=False, help="Debug mode. Will print out more output at each step. Will run in single-core mode.") 458 | 459 | args = parser.parse_args() 460 | 461 | if "SLURM_CPUS_ON_NODE" in os.environ: 462 | args.nproc = int(os.environ["SLURM_CPUS_ON_NODE"]) 463 | if args.nproc is None: 464 | args.nproc = os.cpu_count() 465 | if args.debug is True: 466 | args.nproc = 1 467 | 468 | main(args) 469 | 470 | -------------------------------------------------------------------------------- /utils/util.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import numpy as np 3 | 4 | 5 | num2aa=[ 6 | 'ALA','ARG','ASN','ASP','CYS', 7 | 'GLN','GLU','GLY','HIS','ILE', 8 | 'LEU','LYS','MET','PHE','PRO', 9 | 'SER','THR','TRP','TYR','VAL', 10 | ] 11 | 12 | aa2num= {x:i for i,x in enumerate(num2aa)} 13 | 14 | alpha_1 = list("ARNDCQEGHILKMFPSTWYV-") 15 | aa_N_1 = {n:a for n,a in enumerate(alpha_1)} 16 | aa_1_N = {a:n for n,a in enumerate(alpha_1)} 17 | 18 | aa123 = {aa1: aa3 for aa1, aa3 in zip(alpha_1, num2aa)} 19 | aa321 = {aa3: aa1 for aa1, aa3 in zip(alpha_1, num2aa)} 20 | 21 | def N_to_AA(x): 22 | x = np.array(x); 23 | if x.ndim == 1: x = x[None] 24 | return ["".join([aa_N_1.get(a,"-") for a in y]) for y in x] 25 | 26 | 27 | def alphabet_mapping(seq_list, alphabet_dict): 28 | """ 29 | Args: 30 | seq_list: a list of sequences ['ABADSDAS', 'AABSDVDDV'] 31 | Returns: 32 | encoded: a list of np.arrays 33 | """ 34 | encoded = [[alphabet_dict[token] for token in seq] for seq in seq_list] 35 | return encoded 36 | 37 | 38 | def alphabet_onehot_2_onehot(alphabet1, alphabet2): 39 | ''' 40 | Args: 41 | alphabet1: List of amino acids in order (A characters) 42 | alphabet2: List of amino acids in different order 43 | 44 | Returns: 45 | map: AxA matrix to map one-hot encoding from alphabet1 to alphabet2 46 | ''' 47 | assert len(alphabet1) == len(alphabet2), 'The alphabets must be the same length' 48 | 49 | alpha1_2_int = {aa: i for i, aa in enumerate(alphabet1)} 50 | alpha2_2_int = {aa: i for i, aa in enumerate(alphabet2)} 51 | 52 | A = len(alphabet1) 53 | map = np.zeros((A,A)) 54 | 55 | for aa in alphabet1: 56 | j = alpha1_2_int[aa] 57 | i = alpha2_2_int[aa] 58 | map[i, j] = 1 59 | 60 | return map 61 | 62 | 63 | # minimal sc atom representation (Nx8) 64 | aa2short=[ 65 | (" N "," CA "," C "," CB ", None, None, None, None), # ala 66 | (" N "," CA "," C "," CB "," CG "," CD "," NE "," CZ "), # arg 67 | (" N "," CA "," C "," CB "," CG "," OD1", None, None), # asn 68 | (" N "," CA "," C "," CB "," CG "," OD1", None, None), # asp 69 | (" N "," CA "," C "," CB "," SG ", None, None, None), # cys 70 | (" N "," CA "," C "," CB "," CG "," CD "," OE1", None), # gln 71 | (" N "," CA "," C "," CB "," CG "," CD "," OE1", None), # glu 72 | (" N "," CA "," C ", None, None, None, None, None), # gly 73 | (" N "," CA "," C "," CB "," CG "," ND1", None, None), # his 74 | (" N "," CA "," C "," CB "," CG1"," CD1", None, None), # ile 75 | (" N "," CA "," C "," CB "," CG "," CD1", None, None), # leu 76 | (" N "," CA "," C "," CB "," CG "," CD "," CE "," NZ "), # lys 77 | (" N "," CA "," C "," CB "," CG "," SD "," CE ", None), # met 78 | (" N "," CA "," C "," CB "," CG "," CD1", None, None), # phe 79 | (" N "," CA "," C "," CB "," CG "," CD ", None, None), # pro 80 | (" N "," CA "," C "," CB "," OG ", None, None, None), # ser 81 | (" N "," CA "," C "," CB "," OG1", None, None, None), # thr 82 | (" N "," CA "," C "," CB "," CG "," CD1", None, None), # trp 83 | (" N "," CA "," C "," CB "," CG "," CD1", None, None), # tyr 84 | (" N "," CA "," C "," CB "," CG1", None, None, None), # val 85 | ] 86 | 87 | # full sc atom representation (Nx14) 88 | aa2long=[ 89 | (" N "," CA "," C "," O "," CB ", None, None, None, None, None, None, None, None, None), # ala 90 | (" N "," CA "," C "," O "," CB "," CG "," CD "," NE "," CZ "," NH1"," NH2", None, None, None), # arg 91 | (" N "," CA "," C "," O "," CB "," CG "," OD1"," ND2", None, None, None, None, None, None), # asn 92 | (" N "," CA "," C "," O "," CB "," CG "," OD1"," OD2", None, None, None, None, None, None), # asp 93 | (" N "," CA "," C "," O "," CB "," SG ", None, None, None, None, None, None, None, None), # cys 94 | (" N "," CA "," C "," O "," CB "," CG "," CD "," OE1"," NE2", None, None, None, None, None), # gln 95 | (" N "," CA "," C "," O "," CB "," CG "," CD "," OE1"," OE2", None, None, None, None, None), # glu 96 | (" N "," CA "," C "," O ", None, None, None, None, None, None, None, None, None, None), # gly 97 | (" N "," CA "," C "," O "," CB "," CG "," ND1"," CD2"," CE1"," NE2", None, None, None, None), # his 98 | (" N "," CA "," C "," O "," CB "," CG1"," CG2"," CD1", None, None, None, None, None, None), # ile 99 | (" N "," CA "," C "," O "," CB "," CG "," CD1"," CD2", None, None, None, None, None, None), # leu 100 | (" N "," CA "," C "," O "," CB "," CG "," CD "," CE "," NZ ", None, None, None, None, None), # lys 101 | (" N "," CA "," C "," O "," CB "," CG "," SD "," CE ", None, None, None, None, None, None), # met 102 | (" N "," CA "," C "," O "," CB "," CG "," CD1"," CD2"," CE1"," CE2"," CZ ", None, None, None), # phe 103 | (" N "," CA "," C "," O "," CB "," CG "," CD ", None, None, None, None, None, None, None), # pro 104 | (" N "," CA "," C "," O "," CB "," OG ", None, None, None, None, None, None, None, None), # ser 105 | (" N "," CA "," C "," O "," CB "," OG1"," CG2", None, None, None, None, None, None, None), # thr 106 | (" N "," CA "," C "," O "," CB "," CG "," CD1"," CD2"," CE2"," CE3"," NE1"," CZ2"," CZ3"," CH2"), # trp 107 | (" N "," CA "," C "," O "," CB "," CG "," CD1"," CD2"," CE1"," CE2"," CZ "," OH ", None, None), # tyr 108 | (" N "," CA "," C "," O "," CB "," CG1"," CG2", None, None, None, None, None, None, None), # val 109 | ] 110 | 111 | # build the "alternate" sc mapping 112 | aa2longalt=[ 113 | (" N "," CA "," C "," O "," CB ", None, None, None, None, None, None, None, None, None), # ala 114 | (" N "," CA "," C "," O "," CB "," CG "," CD "," NE "," CZ "," NH2"," NH1", None, None, None), # arg 115 | (" N "," CA "," C "," O "," CB "," CG "," OD1"," ND2", None, None, None, None, None, None), # asn 116 | (" N "," CA "," C "," O "," CB "," CG "," OD2"," OD1", None, None, None, None, None, None), # asp 117 | (" N "," CA "," C "," O "," CB "," SG ", None, None, None, None, None, None, None, None), # cys 118 | (" N "," CA "," C "," O "," CB "," CG "," CD "," OE1"," NE2", None, None, None, None, None), # gln 119 | (" N "," CA "," C "," O "," CB "," CG "," CD "," OE2"," OE1", None, None, None, None, None), # glu 120 | (" N "," CA "," C "," O ", None, None, None, None, None, None, None, None, None, None), # gly 121 | (" N "," CA "," C "," O "," CB "," CG "," ND1"," CD2"," CE1"," NE2", None, None, None, None), # his 122 | (" N "," CA "," C "," O "," CB "," CG1"," CG2"," CD1", None, None, None, None, None, None), # ile 123 | (" N "," CA "," C "," O "," CB "," CG "," CD2"," CD1", None, None, None, None, None, None), # leu 124 | (" N "," CA "," C "," O "," CB "," CG "," CD "," CE "," NZ ", None, None, None, None, None), # lys 125 | (" N "," CA "," C "," O "," CB "," CG "," SD "," CE ", None, None, None, None, None, None), # met 126 | (" N "," CA "," C "," O "," CB "," CG "," CD2"," CD1"," CE2"," CE1"," CZ ", None, None, None), # phe 127 | (" N "," CA "," C "," O "," CB "," CG "," CD ", None, None, None, None, None, None, None), # pro 128 | (" N "," CA "," C "," O "," CB "," OG ", None, None, None, None, None, None, None, None), # ser 129 | (" N "," CA "," C "," O "," CB "," OG1"," CG2", None, None, None, None, None, None, None), # thr 130 | (" N "," CA "," C "," O "," CB "," CG "," CD1"," CD2"," CE2"," CE3"," NE1"," CZ2"," CZ3"," CH2"), # trp 131 | (" N "," CA "," C "," O "," CB "," CG "," CD2"," CD1"," CE2"," CE1"," CZ "," OH ", None, None), # tyr 132 | (" N "," CA "," C "," O "," CB "," CG2"," CG1", None, None, None, None, None, None, None), # val 133 | ] 134 | 135 | # full sc & H atom representation (Nx22) 136 | aa2longH = [ 137 | (' N ', ' CA ', ' C ', ' O ', ' CB ', ' H ', ' HA ', '1HB ', '2HB ', '3HB ', None, None, None, None, None, None, None, None, None, None, None, None, None, None), # ala 138 | (' N ', ' CA ', ' C ', ' O ', ' CB ', ' CG ', ' CD ', ' NE ', ' CZ ', ' NH1', ' NH2', ' H ', ' HA ', '1HB ', '2HB ', '1HG ', '2HG ', '1HD ', '2HD ', ' HE ', '1HH1', '2HH1', '1HH2', '2HH2'), # arg 139 | (' N ', ' CA ', ' C ', ' O ', ' CB ', ' CG ', ' OD1', ' ND2', ' H ', ' HA ', '1HB ', '2HB ', '1HD2', '2HD2', None, None, None, None, None, None, None, None, None, None), # asn 140 | (' N ', ' CA ', ' C ', ' O ', ' CB ', ' CG ', ' OD1', ' OD2', ' H ', ' HA ', '1HB ', '2HB ', None, None, None, None, None, None, None, None, None, None, None, None), # asp 141 | (' N ', ' CA ', ' C ', ' O ', ' CB ', ' SG ', ' H ', ' HA ', '1HB ', '2HB ', ' HG ', None, None, None, None, None, None, None, None, None, None, None, None, None), # cys 142 | (' N ', ' CA ', ' C ', ' O ', ' CB ', ' CG ', ' CD ', ' OE1', ' NE2', ' H ', ' HA ', '1HB ', '2HB ', '1HG ', '2HG ', '1HE2', '2HE2', None, None, None, None, None, None, None), # gln 143 | (' N ', ' CA ', ' C ', ' O ', ' CB ', ' CG ', ' CD ', ' OE1', ' OE2', ' H ', ' HA ', '1HB ', '2HB ', '1HG ', '2HG ', None, None, None, None, None, None, None, None, None), # glu 144 | (' N ', ' CA ', ' C ', ' O ', ' H ', '1HA ', '2HA ', None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None), # gly 145 | (' N ', ' CA ', ' C ', ' O ', ' CB ', ' CG ', ' ND1', ' CD2', ' CE1', ' NE2', ' H ', ' HA ', '1HB ', '2HB ', ' HD2', ' HE1', ' HE2', None, None, None, None, None, None, None), # his 146 | (' N ', ' CA ', ' C ', ' O ', ' CB ', ' CG1', ' CG2', ' CD1', ' H ', ' HA ', ' HB ', '1HG1', '2HG1', '1HG2', '2HG2', '3HG2', '1HD1', '2HD1', '3HD1', None, None, None, None, None), # ile 147 | (' N ', ' CA ', ' C ', ' O ', ' CB ', ' CG ', ' CD1', ' CD2', ' H ', ' HA ', '1HB ', '2HB ', ' HG ', '1HD1', '2HD1', '3HD1', '1HD2', '2HD2', '3HD2', None, None, None, None, None), # leu 148 | (' N ', ' CA ', ' C ', ' O ', ' CB ', ' CG ', ' CD ', ' CE ', ' NZ ', ' H ', ' HA ', '1HB ', '2HB ', '1HG ', '2HG ', '1HD ', '2HD ', '1HE ', '2HE ', '1HZ ', '2HZ ', '3HZ ', None, None), # lys 149 | (' N ', ' CA ', ' C ', ' O ', ' CB ', ' CG ', ' SD ', ' CE ', ' H ', ' HA ', '1HB ', '2HB ', '1HG ', '2HG ', '1HE ', '2HE ', '3HE ', None, None, None, None, None, None, None), # met 150 | (' N ', ' CA ', ' C ', ' O ', ' CB ', ' CG ', ' CD1', ' CD2', ' CE1', ' CE2', ' CZ ', ' H ', ' HA ', '1HB ', '2HB ', ' HD1', ' HD2', ' HE1', ' HE2', ' HZ ', None, None, None, None), # phe 151 | (' N ', ' CA ', ' C ', ' O ', ' CB ', ' CG ', ' CD ', ' NV ', ' HA ', '1HB ', '2HB ', '1HG ', '2HG ', '1HD ', '2HD ', None, None, None, None, None, None, None, None, None), # pro 152 | (' N ', ' CA ', ' C ', ' O ', ' CB ', ' OG ', ' H ', ' HA ', '1HB ', '2HB ', ' HG ', None, None, None, None, None, None, None, None, None, None, None, None, None), # ser 153 | (' N ', ' CA ', ' C ', ' O ', ' CB ', ' OG1', ' CG2', ' H ', ' HA ', ' HB ', ' HG1', '1HG2', '2HG2', '3HG2', None, None, None, None, None, None, None, None, None, None), # thr 154 | (' N ', ' CA ', ' C ', ' O ', ' CB ', ' CG ', ' CD1', ' CD2', ' NE1', ' CE2', ' CE3', ' CZ2', ' CZ3', ' CH2', ' H ', ' HA ', '1HB ', '2HB ', ' HD1', ' HE1', ' HE3', ' HZ2', 'HZ3', 'HH2'), # trp 155 | (' N ', ' CA ', ' C ', ' O ', ' CB ', ' CG ', ' CD1', ' CD2', ' CE1', ' CE2', ' CZ ', ' OH ', ' H ', ' HA ', '1HB ', '2HB ', ' HD1', ' HD2', ' HE1', ' HE2', ' HH ', None, None, None), # tyr 156 | (' N ', ' CA ', ' C ', ' O ', ' CB ', ' CG1', ' CG2', ' H ', ' HA ', ' HB ', '1HG1', '2HG1', '3HG1', '1HG2', '2HG2', '3HG2', None, None, None, None, None, None, None, None) # val 157 | ] 158 | 159 | aa2longH_Nterm = [ 160 | (' N ', ' CA ', ' C ', ' O ', ' CB ', '1H ', '2H ', '3H ', ' HA ', '1HB ', '2HB ', '3HB ', None, None, None, None, None, None, None, None, None, None, None, None), # ala 161 | (' N ', ' CA ', ' C ', ' O ', ' CB ', ' CG ', ' CD ', ' NE ', ' CZ ', ' NH1', ' NH2', '1H ', '2H ', '3H ', ' HA ', '1HB ', '2HB ', '1HG ', '2HG ', '1HD ', '2HD ', ' HE ', '1HH1', '2HH1', '1HH2', '2HH2'), # arg 162 | (' N ', ' CA ', ' C ', ' O ', ' CB ', ' CG ', ' OD1', ' ND2', '1H ', '2H ', '3H ', ' HA ', '1HB ', '2HB ', '1HD2', '2HD2', None, None, None, None, None, None, None, None, None, None), # asn 163 | (' N ', ' CA ', ' C ', ' O ', ' CB ', ' CG ', ' OD1', ' OD2', '1H ', '2H ', '3H ', ' HA ', '1HB ', '2HB ', None, None, None, None, None, None, None, None, None, None, None, None), # asp 164 | (' N ', ' CA ', ' C ', ' O ', ' CB ', ' SG ', '1H ', '2H ', '3H ', ' HA ', '1HB ', '2HB ', ' HG ', None, None, None, None, None, None, None, None, None, None, None, None, None), # cys 165 | (' N ', ' CA ', ' C ', ' O ', ' CB ', ' CG ', ' CD ', ' OE1', ' NE2', '1H ', '2H ', '3H ', ' HA ', '1HB ', '2HB ', '1HG ', '2HG ', '1HE2', '2HE2', None, None, None, None, None, None, None), # gln 166 | (' N ', ' CA ', ' C ', ' O ', ' CB ', ' CG ', ' CD ', ' OE1', ' OE2', '1H ', '2H ', '3H ', ' HA ', '1HB ', '2HB ', '1HG ', '2HG ', None, None, None, None, None, None, None, None, None), # glu 167 | (' N ', ' CA ', ' C ', ' O ', '1H ', '2H ', '3H ', '1HA ', '2HA ', None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None), # gly 168 | (' N ', ' CA ', ' C ', ' O ', ' CB ', ' CG ', ' ND1', ' CD2', ' CE1', ' NE2', '1H ', '2H ', '3H ', ' HA ', '1HB ', '2HB ', ' HD2', ' HE1', ' HE2', None, None, None, None, None, None, None), # his 169 | (' N ', ' CA ', ' C ', ' O ', ' CB ', ' CG1', ' CG2', ' CD1', '1H ', '2H ', '3H ', ' HA ', ' HB ', '1HG1', '2HG1', '1HG2', '2HG2', '3HG2', '1HD1', '2HD1', '3HD1', None, None, None, None, None), # ile 170 | (' N ', ' CA ', ' C ', ' O ', ' CB ', ' CG ', ' CD1', ' CD2', '1H ', '2H ', '3H ', ' HA ', '1HB ', '2HB ', ' HG ', '1HD1', '2HD1', '3HD1', '1HD2', '2HD2', '3HD2', None, None, None, None, None), # leu 171 | (' N ', ' CA ', ' C ', ' O ', ' CB ', ' CG ', ' CD ', ' CE ', ' NZ ', '1H ', '2H ', '3H ', ' HA ', '1HB ', '2HB ', '1HG ', '2HG ', '1HD ', '2HD ', '1HE ', '2HE ', '1HZ ', '2HZ ', '3HZ ', None, None), # lys 172 | (' N ', ' CA ', ' C ', ' O ', ' CB ', ' CG ', ' SD ', ' CE ', '1H ', '2H ', '3H ', ' HA ', '1HB ', '2HB ', '1HG ', '2HG ', '1HE ', '2HE ', '3HE ', None, None, None, None, None, None, None), # met 173 | (' N ', ' CA ', ' C ', ' O ', ' CB ', ' CG ', ' CD1', ' CD2', ' CE1', ' CE2', ' CZ ', '1H ', '2H ', '3H ', ' HA ', '1HB ', '2HB ', ' HD1', ' HD2', ' HE1', ' HE2', ' HZ ', None, None, None, None), # phe 174 | (' N ', ' CA ', ' C ', ' O ', ' CB ', ' CG ', ' CD ', ' NV ', 'CAV ', ' HA ', '1HB ', '2HB ', '1HG ', '2HG ', '1HD ', '2HD ', '1H ', '2H ', None, None, None, None, None, None, None, None), # pro 175 | (' N ', ' CA ', ' C ', ' O ', ' CB ', ' OG ', '1H ', '2H ', '3H ', ' HA ', '1HB ', '2HB ', ' HG ', None, None, None, None, None, None, None, None, None, None, None, None, None), # ser 176 | (' N ', ' CA ', ' C ', ' O ', ' CB ', ' OG1', ' CG2', '1H ', '2H ', '3H ', ' HA ', ' HB ', ' HG1', '1HG2', '2HG2', '3HG2', None, None, None, None, None, None, None, None, None, None), # thr 177 | (' N ', ' CA ', ' C ', ' O ', ' CB ', ' CG ', ' CD1', ' CD2', ' NE1', ' CE2', ' CE3', ' CZ2', ' CZ3', ' CH2', '1H ', '2H ', '3H ', ' HA ', '1HB ', '2HB ', ' HD1', ' HE1', ' HE3', ' HZ2', 'HZ3', 'HH2'), # trp 178 | (' N ', ' CA ', ' C ', ' O ', ' CB ', ' CG ', ' CD1', ' CD2', ' CE1', ' CE2', ' CZ ', ' OH ', '1H ', '2H ', '3H ', ' HA ', '1HB ', '2HB ', ' HD1', ' HD2', ' HE1', ' HE2', ' HH ', None, None, None), # tyr 179 | (' N ', ' CA ', ' C ', ' O ', ' CB ', ' CG1', ' CG2', '1H ', '2H ', '3H ', ' HA ', ' HB ', '1HG1', '2HG1', '3HG1', '1HG2', '2HG2', '3HG2', None, None, None, None, None, None, None, None) # val 180 | ] 181 | 182 | aa2longH_Cterm = [ 183 | (' N ', ' CA ', ' C ', ' O ', ' OXT', ' CB ', ' H ', ' HA ', '1HB ', '2HB ', '3HB ', None, None, None, None, None, None, None, None, None, None, None, None, None, None), # ala 184 | (' N ', ' CA ', ' C ', ' O ', ' OXT', ' CB ', ' CG ', ' CD ', ' NE ', ' CZ ', ' NH1', ' NH2', ' H ', ' HA ', '1HB ', '2HB ', '1HG ', '2HG ', '1HD ', '2HD ', ' HE ', '1HH1', '2HH1', '1HH2', '2HH2'), # arg 185 | (' N ', ' CA ', ' C ', ' O ', ' OXT', ' CB ', ' CG ', ' OD1', ' ND2', ' H ', ' HA ', '1HB ', '2HB ', '1HD2', '2HD2', None, None, None, None, None, None, None, None, None, None), # asn 186 | (' N ', ' CA ', ' C ', ' O ', ' OXT', ' CB ', ' CG ', ' OD1', ' OD2', ' H ', ' HA ', '1HB ', '2HB ', None, None, None, None, None, None, None, None, None, None, None, None), # asp 187 | (' N ', ' CA ', ' C ', ' O ', ' OXT', ' CB ', ' SG ', ' H ', ' HA ', '1HB ', '2HB ', ' HG ', None, None, None, None, None, None, None, None, None, None, None, None, None), # cys 188 | (' N ', ' CA ', ' C ', ' O ', ' OXT', ' CB ', ' CG ', ' CD ', ' OE1', ' NE2', ' H ', ' HA ', '1HB ', '2HB ', '1HG ', '2HG ', '1HE2', '2HE2', None, None, None, None, None, None, None), # gln 189 | (' N ', ' CA ', ' C ', ' O ', ' OXT', ' CB ', ' CG ', ' CD ', ' OE1', ' OE2', ' H ', ' HA ', '1HB ', '2HB ', '1HG ', '2HG ', None, None, None, None, None, None, None, None, None), # glu 190 | (' N ', ' CA ', ' C ', ' O ', ' OXT', ' H ', '1HA ', '2HA ', None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None), # gly 191 | (' N ', ' CA ', ' C ', ' O ', ' OXT', ' CB ', ' CG ', ' ND1', ' CD2', ' CE1', ' NE2', ' H ', ' HA ', '1HB ', '2HB ', ' HD2', ' HE1', ' HE2', None, None, None, None, None, None, None), # his 192 | (' N ', ' CA ', ' C ', ' O ', ' OXT', ' CB ', ' CG1', ' CG2', ' CD1', ' H ', ' HA ', ' HB ', '1HG1', '2HG1', '1HG2', '2HG2', '3HG2', '1HD1', '2HD1', '3HD1', None, None, None, None, None), # ile 193 | (' N ', ' CA ', ' C ', ' O ', ' OXT', ' CB ', ' CG ', ' CD1', ' CD2', ' H ', ' HA ', '1HB ', '2HB ', ' HG ', '1HD1', '2HD1', '3HD1', '1HD2', '2HD2', '3HD2', None, None, None, None, None), # leu 194 | (' N ', ' CA ', ' C ', ' O ', ' OXT', ' CB ', ' CG ', ' CD ', ' CE ', ' NZ ', ' H ', ' HA ', '1HB ', '2HB ', '1HG ', '2HG ', '1HD ', '2HD ', '1HE ', '2HE ', '1HZ ', '2HZ ', '3HZ ', None, None), # lys 195 | (' N ', ' CA ', ' C ', ' O ', ' OXT', ' CB ', ' CG ', ' SD ', ' CE ', ' H ', ' HA ', '1HB ', '2HB ', '1HG ', '2HG ', '1HE ', '2HE ', '3HE ', None, None, None, None, None, None, None), # met 196 | (' N ', ' CA ', ' C ', ' O ', ' OXT', ' CB ', ' CG ', ' CD1', ' CD2', ' CE1', ' CE2', ' CZ ', ' H ', ' HA ', '1HB ', '2HB ', ' HD1', ' HD2', ' HE1', ' HE2', ' HZ ', None, None, None, None), # phe 197 | (' N ', ' CA ', ' C ', ' O ', ' OXT', ' CB ', ' CG ', ' CD ', ' NV ', ' HA ', '1HB ', '2HB ', '1HG ', '2HG ', '1HD ', '2HD ', None, None, None, None, None, None, None, None, None), # pro 198 | (' N ', ' CA ', ' C ', ' O ', ' OXT', ' CB ', ' OG ', ' H ', ' HA ', '1HB ', '2HB ', ' HG ', None, None, None, None, None, None, None, None, None, None, None, None, None), # ser 199 | (' N ', ' CA ', ' C ', ' O ', ' OXT', ' CB ', ' OG1', ' CG2', ' H ', ' HA ', ' HB ', ' HG1', '1HG2', '2HG2', '3HG2', None, None, None, None, None, None, None, None, None, None), # thr 200 | (' N ', ' CA ', ' C ', ' O ', ' OXT', ' CB ', ' CG ', ' CD1', ' CD2', ' NE1', ' CE2', ' CE3', ' CZ2', ' CZ3', ' CH2', ' H ', ' HA ', '1HB ', '2HB ', ' HD1', ' HE1', ' HE3', ' HZ2', 'HZ3', 'HH2'), # trp 201 | (' N ', ' CA ', ' C ', ' O ', ' OXT', ' CB ', ' CG ', ' CD1', ' CD2', ' CE1', ' CE2', ' CZ ', ' OH ', ' H ', ' HA ', '1HB ', '2HB ', ' HD1', ' HD2', ' HE1', ' HE2', ' HH ', None, None, None), # tyr 202 | (' N ', ' CA ', ' C ', ' O ', ' OXT', ' CB ', ' CG1', ' CG2', ' H ', ' HA ', ' HB ', '1HG1', '2HG1', '3HG1', '1HG2', '2HG2', '3HG2', None, None, None, None, None, None, None, None) # val 203 | ] 204 | 205 | 206 | # build "deterministic" atoms 207 | # see notebook (se3_experiments.ipynb for derivation) 208 | aa2frames=[ 209 | [], # ala 210 | [ # arg 211 | [' NH1', ' CZ ', ' NE ', ' CD ', [-0.7218378782272339, 1.0856682062149048, -0.006118079647421837]], 212 | [' NH2', ' CZ ', ' NE ', ' CD ', [-0.6158039569854736, -1.1400136947631836, 0.006467342376708984]]], 213 | [ # asn 214 | [' ND2', ' CG ', ' CB ', ' OD1', [-0.6304131746292114, -1.1431225538253784, 0.02364802360534668]]], 215 | [ # asp 216 | [' OD2', ' CG ', ' CB ', ' OD1', [-0.5972501039505005, -1.0955055952072144, 0.04530305415391922]]], 217 | [], # cys 218 | [ # gln 219 | [' NE2', ' CD ', ' CG ', ' OE1', [-0.6558755040168762, -1.1324536800384521, 0.026521772146224976]]], 220 | [ # glu 221 | [' OE2', ' CD ', ' CG ', ' OE1', [-0.5578438639640808, -1.1161314249038696, -0.015464287251234055]]], 222 | [], # gly 223 | [ # his 224 | [' CD2', ' CG ', ' CB ', ' ND1', [-0.7502505779266357, -1.1680538654327393, 0.0005368441343307495]], 225 | [' CE1', ' CG ', ' CB ', ' ND1', [-2.0262467861175537, 0.539483368396759, -0.004495501518249512]], 226 | [' NE2', ' CG ', ' CB ', ' ND1', [-2.0761325359344482, -0.8199722766876221, -0.0018703639507293701]]], 227 | [ # ile 228 | [' CG2', ' CB ', ' CA ', ' CG1', [-0.6059935688972473, -0.8108057379722595, 1.1861376762390137]]], 229 | [ # leu 230 | [' CD2', ' CG ', ' CB ', ' CD1', [-0.5942193269729614, -0.7693282961845398, -1.1914138793945312]]], 231 | [], # lys 232 | [], # met 233 | [ # phe 234 | [' CD2', ' CG ', ' CB ', ' CD1', [-0.7164441347122192, -1.197853446006775, 0.06416648626327515]], 235 | [' CE1', ' CG ', ' CB ', ' CD1', [-2.0785865783691406, 1.2366485595703125, 0.08100450038909912]], 236 | [' CE2', ' CG ', ' CB ', ' CD1', [-2.107091188430786, -1.178497076034546, 0.13524535298347473]], 237 | [' CZ ', ' CG ', ' CB ', ' CD1', [-2.786630630493164, 0.03873880207538605, 0.14633776247501373]]], 238 | [], # pro 239 | [], # ser 240 | [ # thr 241 | [' CG2', ' CB ', ' CA ', ' OG1', [-0.6842088103294373, -0.6709619164466858, 1.2105456590652466]]], 242 | [ # trp 243 | [' CD2', ' CG ', ' CB ', ' CD1', [-0.8550368547439575, -1.0790592432022095, 0.09017711877822876]], 244 | [' NE1', ' CG ', ' CB ', ' CD1', [-2.1863200664520264, 0.8064242601394653, 0.08350661396980286]], 245 | [' CE2', ' CG ', ' CB ', ' CD1', [-2.1801204681396484, -0.5795643329620361, 0.14015203714370728]], 246 | [' CE3', ' CG ', ' CB ', ' CD1', [-0.605582594871521, -2.4733362197875977, 0.16200461983680725]], 247 | [' CE2', ' CG ', ' CB ', ' CD1', [-2.1801204681396484, -0.5795643329620361, 0.14015203714370728]], 248 | [' CZ2', ' CG ', ' CB ', ' CD1', [-3.2672977447509766, -1.473116159439087, 0.250858873128891]], 249 | [' CZ3', ' CG ', ' CB ', ' CD1', [-1.6969941854476929, -3.3360071182250977, 0.264143705368042]], 250 | [' CH2', ' CG ', ' CB ', ' CD1', [-3.009331703186035, -2.8451972007751465, 0.3059283494949341]]], 251 | [ # tyr 252 | [' CD2', ' CG ', ' CB ', ' CD1', [-0.69439297914505, -1.2123756408691406, -0.009198814630508423]], 253 | [' CE1', ' CG ', ' CB ', ' CD1', [-2.104464054107666, 1.1910505294799805, -0.014679580926895142]], 254 | [' CE2', ' CG ', ' CB ', ' CD1', [-2.0857787132263184, -1.2231677770614624, -0.024517983198165894]], 255 | [' CZ ', ' CG ', ' CB ', ' CD1', [-2.7897322177886963, -0.021470561623573303, -0.026979409158229828]], 256 | [' OH ', ' CG ', ' CB ', ' CD1', [-4.1559271812438965, -0.029129385948181152, -0.044720835983753204]]], 257 | [ # val 258 | [' CG2', ' CB ', ' CA ', ' CG1', [-0.6258467435836792, -0.7654698491096497, -1.1894742250442505]]], 259 | ] 260 | 261 | # O from frame (C,N-1,CA) 262 | bb2oframe=[-0.5992066264152527, -1.0820008516311646, 0.0001476481556892395] 263 | 264 | # build the mapping from indices in reduced representation to 265 | # indices in the full representation 266 | # N x 14 x 6 = 267 | # base-idx < 0 ==> no atom 268 | # xyz = 0 ==> no mapping 269 | short2long = np.zeros((20,14,6)) 270 | for i in range(20): 271 | i_s, i_l = aa2short[i],aa2long[i] 272 | for j,a in enumerate(i_l): 273 | # case 1: if no atom defined, blank 274 | if (a is None): 275 | short2long[i,j,0] = -1 276 | # case 2: atom is a base atom 277 | elif (a in i_s): 278 | short2long[i,j,0] = i_s.index(a) 279 | if (short2long[i,j,0] == 0): 280 | short2long[i,j,1] = 1 281 | short2long[i,j,2] = 2 282 | else: 283 | short2long[i,j,1] = 0 284 | if (short2long[i,j,0] == 1): 285 | short2long[i,j,2] = 2 286 | else: 287 | short2long[i,j,2] = 1 288 | # case 3: atom is ' O ' 289 | elif (a == " O "): 290 | short2long[i,j,0] = 2 291 | short2long[i,j,1] = 0 #Nprev (will pre-roll N as nothing else needs it) 292 | short2long[i,j,2] = 1 293 | short2long[i,j,3:] = np.array(bb2oframe) 294 | # case 4: build this atom 295 | else: 296 | i_f = aa2frames[i] 297 | names = [f[0] for f in i_f] 298 | idx = names.index(a) 299 | short2long[i,j,0] = i_s.index(i_f[idx][1]) 300 | short2long[i,j,1] = i_s.index(i_f[idx][2]) 301 | short2long[i,j,2] = i_s.index(i_f[idx][3]) 302 | short2long[i,j,3:] = np.array(i_f[idx][4]) 303 | 304 | # build the mapping from atoms in the full rep (Nx14) to the "alternate" rep 305 | long2alt = np.zeros((20,14)) 306 | for i in range(20): 307 | i_l, i_lalt = aa2long[i], aa2longalt[i] 308 | for j,a in enumerate(i_l): 309 | if (a is None): 310 | long2alt[i,j] = j 311 | else: 312 | long2alt[i,j] = i_lalt.index(a) 313 | 314 | -------------------------------------------------------------------------------- /protocol.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Sun Aug 25 23:14:34 2024 5 | 6 | @author: indrek 7 | """ 8 | import pyrosetta as pyr 9 | import pyrosetta.rosetta 10 | import os, sys 11 | import random 12 | import numpy as np 13 | import itertools 14 | import multiprocessing 15 | import time 16 | import scipy.spatial 17 | 18 | script_dir = os.path.dirname(os.path.realpath(__file__)) 19 | sys.path.append(script_dir) 20 | sys.path.append(script_dir+'/utils/') 21 | import utils 22 | import dunbrack_rotlib 23 | import align_pdbs 24 | 25 | 26 | """ 27 | PARSING FUNCTIONS 28 | """ 29 | def parse_arguments(args, restypes): 30 | # Limiting Dunbrack library as requested. 31 | if args.dunbrack_prob_per_cst is None: 32 | args.dunbrack_prob_per_cst = [None]+[args.dunbrack_prob for r in restypes] 33 | else: 34 | assert all([isinstance(x, float) for x in args.dunbrack_prob_per_cst]) 35 | args.dunbrack_prob_per_cst = [None]+ args.dunbrack_prob_per_cst 36 | 37 | 38 | ######### IF REQUESTED... ############ 39 | ### RANDOM ROTAMER SELECTION SETUP ### 40 | if args.max_random_rotamers_per_cst is not None: 41 | assert all([isinstance(x, int) for x in args.max_random_rotamers_per_cst]) 42 | assert len(args.max_random_rotamers_per_cst) == len(restypes)+1, "Invalid number of per-cst max_random_rotamers_per_cst" 43 | 44 | if args.frac_random_rotamers_per_cst is not None: 45 | assert all([isinstance(x, float) for x in args.frac_random_rotamers_per_cst]) 46 | assert len(args.frac_random_rotamers_per_cst) == len(restypes)+1, "Invalid number of per-cst frac_random_rotamers_per_cst" 47 | 48 | if args.max_random_rotamers is not None: 49 | args.max_random_rotamers_per_cst = [args.max_random_rotamers]+[args.max_random_rotamers for r in restypes] 50 | 51 | if args.frac_random_rotamers is not None: 52 | args.frac_random_rotamers_per_cst = [args.frac_random_rotamers]+[args.frac_random_rotamers for r in restypes] 53 | 54 | # In case best rotamer is requested for a given CST id then set randomness to 1.0 55 | for i, frac in enumerate(args.frac_random_rotamers_per_cst): 56 | if i in args.use_best_rotamer_cstids: 57 | args.frac_random_rotamers_per_cst[i] = 1.0 58 | 59 | 60 | #### PARSING SECONDARY STRUCTURE LENGTHS ##### 61 | if args.N_len_per_cst is None: 62 | args.N_len_per_cst = [None]+[args.N_len for r in restypes] 63 | else: 64 | assert all([isinstance(x, int) for x in args.N_len_per_cst]) 65 | args.N_len_per_cst = [None]+ args.N_len_per_cst 66 | 67 | if args.C_len_per_cst is None: 68 | args.C_len_per_cst = [None]+[args.C_len for r in restypes] 69 | else: 70 | assert all([isinstance(x, int) for x in args.C_len_per_cst]) 71 | args.C_len_per_cst = [None]+ args.C_len_per_cst 72 | 73 | 74 | # Loading favored rotamers for each used residue type in each CST block 75 | # This allows different rotamer sets to be stored if same residue type should be 76 | # on different secondary structures in different CST blocks 77 | # TODO: could also consider enabling different probabilities for different CST's or AA's? <-- partially done 78 | if args.secstruct_per_cst is None: 79 | args.secstruct_per_cst = [None]+[args.secstruct for r in restypes] 80 | else: 81 | assert all([x in "EH-" for x in args.secstruct_per_cst]) 82 | args.secstruct_per_cst = [None]+ args.secstruct_per_cst 83 | return args 84 | 85 | 86 | def parse_motif_input(motif_input, cst_atoms, restypes): 87 | motifs = {} 88 | for motif_txt in motif_input: 89 | motif_cst_no = int(motif_txt.split(":")[0]) 90 | if motif_cst_no != 1: 91 | sys.exit("External motif not supported for not-first CST's right now.") 92 | motif_resno = int(motif_txt.split(":")[1]) 93 | motif_fp = motif_txt.split(":")[2] 94 | motifs[motif_cst_no] = {"resno": motif_resno, 95 | "pose": pyr.pose_from_file(motif_fp), 96 | "fp": motif_fp, 97 | "atoms": None} 98 | motif_resname = motifs[motif_cst_no]["pose"].residue(motif_resno).name3() 99 | assert motif_resname in restypes[motif_cst_no], f"{motif_resname} not found in {restypes}" 100 | 101 | # Finding the CST atoms for a given CST 102 | for sub_cst_block in cst_atoms[motif_cst_no]: 103 | for per_aa_cstset in sub_cst_block: 104 | if motif_resname in [aa.split("-")[0] for aa in per_aa_cstset.keys()]: 105 | motif_resname_full = [aa for aa in per_aa_cstset.keys() if aa.split("-")[0]==motif_resname][0] 106 | motifs[motif_cst_no]["atoms"] = per_aa_cstset[motif_resname_full] 107 | if motifs[motif_cst_no]["atoms"] is None: 108 | print(cst_atoms) 109 | sys.exit("Unable to find correct motif atoms based on the corresponding CST definition") 110 | return motifs 111 | 112 | 113 | def parse_rotamer_subsampling(args, cst_atoms): 114 | chi_subsampling_levels = {} 115 | __xtrachi_cst_def = {} 116 | _extra_chi_definitions = {} 117 | if args.extra_chi is not None: 118 | # 1:2,2:2,3:1,4:1 119 | __xtrachi = args.extra_chi.split(",") 120 | _extra_chi_definitions = {int(x.split(":")[0]): int(x.split(":")[1]) for x in __xtrachi} 121 | 122 | elif args.extra_chi_per_cst is not None: 123 | # CSTNO-1:2,2:2,3:1,4:1 CSTNO2-1:1,2:1 124 | __xtrachi_cst = {int(x.split("-")[0]): x.split("-")[1].split(",") for x in args.extra_chi_per_cst} 125 | __xtrachi_cst_def = {cstno: {int(x.split(":")[0]): int(x.split(":")[1]) for x in val} for cstno, val in __xtrachi_cst.items()} 126 | 127 | for cstno in cst_atoms: 128 | chi_subsampling_levels[cstno] = {} 129 | for n in range(4): 130 | if cstno in __xtrachi_cst_def.keys() and n+1 in __xtrachi_cst_def[cstno].keys(): 131 | chi_subsampling_levels[cstno][n+1] = __xtrachi_cst_def[cstno][n+1] 132 | elif n+1 in _extra_chi_definitions.keys(): 133 | chi_subsampling_levels[cstno][n+1] = _extra_chi_definitions[n+1] 134 | else: 135 | chi_subsampling_levels[cstno][n+1] = 0 136 | assert 0 <= chi_subsampling_levels[cstno][n+1] <= 7, f"Invalid sampling level for cst {cstno}, chi {n+1}: {chi_subsampling_levels[cstno][n+1]}" 137 | 138 | print("Using CHI sampling levels for CST's:") 139 | for cstno in chi_subsampling_levels: 140 | print(f" CST {cstno} :: {chi_subsampling_levels[cstno]}") 141 | 142 | return chi_subsampling_levels 143 | 144 | 145 | def get_cst_atoms(cst_io): 146 | cst_atoms = {} 147 | for n in range(1, cst_io.mcfi_lists_size()+1): 148 | cst_atoms[n] = [] 149 | for m in range(1, cst_io.mcfi_list(n).num_mcfis()+1): 150 | cst_atoms[n].append([]) 151 | _mcfi = cst_io.mcfi_list(n).mcfi(m) 152 | 153 | # Figuring out if there is a particular downstream or upstream secondary match happening 154 | downstream_match = False 155 | upstream_match = False 156 | downstream_res_cst = 1 157 | if _mcfi.algorithm_inputs().__contains__("match"): 158 | if any(["DOWNSTREAM" in ai for ai in _mcfi.algorithm_inputs()["match"]]): 159 | downstream_match = True 160 | downstream_res_cst = 1 # I think this is always 1, right? 161 | elif any(["UPSTREAM_CST" in ai for ai in _mcfi.algorithm_inputs()["match"]]): 162 | upstream_match = True 163 | for ai in _mcfi.algorithm_inputs()["match"]: 164 | if "SECONDARY_MATCH:" in ai and "UPSTREAM_CST" in ai: 165 | downstream_res_cst = int(ai.split()[2]) 166 | break 167 | 168 | 169 | rt_combs = itertools.product(_mcfi.allowed_restypes(_mcfi.downstream_res()), _mcfi.allowed_restypes(_mcfi.upstream_res())) 170 | for (ds_res, us_res) in rt_combs: 171 | ais_ds = [ds_res.atom_name(_mcfi.template_atom_inds(_mcfi.downstream_res(), ai, ds_res)[1]) for ai in range(1, 4)] 172 | ais_us = [us_res.atom_name(_mcfi.template_atom_inds(_mcfi.upstream_res(), ai, us_res)[1]) for ai in range(1, 4)] 173 | 174 | # Need to append CST numbers to residue names 175 | cst_atoms[n][-1].append({f"{ds_res.name()}-{downstream_res_cst}": tuple(ais_ds), 176 | f"{us_res.name()}-{n}": tuple(ais_us)}) 177 | 178 | return cst_atoms 179 | 180 | 181 | 182 | """ 183 | ROTAMER-RELATED FUNCTIONS 184 | """ 185 | def preselect_inverse_rotamers(rotset, restype_good_rotamers, keep_his_tautomer_per_cst, tip_atom=False): 186 | if tip_atom is False: 187 | print("Preselecting inverse rotamers based on Dunbrack probability") 188 | good_rotamers = [[] for x in rotset] 189 | for i, invrots in enumerate(rotset): 190 | if len(invrots) == 0: 191 | continue 192 | for res in invrots: 193 | if isinstance(res, pyrosetta.rosetta.core.pose.Pose): # motif pose 194 | good_rotamers[i].append(res) 195 | continue 196 | if res.is_ligand(): 197 | # if len(good_rotamers[i]) > 0 and args.single_ligand_rotamer is True: 198 | # break 199 | good_rotamers[i].append(res) 200 | continue 201 | if res.name3() == "HIS" and keep_his_tautomer_per_cst is not None and i in keep_his_tautomer_per_cst: 202 | if res.name() != keep_his_tautomer_per_cst[i]: 203 | continue 204 | # Need to exclude proton CHIs 205 | _chis = [res.chi(n+1) for n in range(res.nchi()) if "H" not in [res.atom_type(an).element() for an in res.chi_atoms(n+1)]] 206 | if res.name3() in ["ALA", "GLY"]: 207 | good_rotamers[i].append(res) 208 | else: 209 | rotlib_matches = dunbrack_rotlib.find_bb_from_inverse_loc(restype_good_rotamers[i][res.name3()], _chis) 210 | if len(rotlib_matches) > 0: 211 | good_rotamers[i].append(res) 212 | if len(good_rotamers[i]) == 0 and len(rotset[i]) != 0: 213 | print(f"Failed to find compatible rotamers for constraint {i}: {res.name()}") 214 | return None 215 | else: 216 | print("Preselecting inverse rotamers only based whether the tip atoms are different") 217 | good_rotamers = [] 218 | for i, invrots in enumerate(rotset): 219 | if isinstance(invrots[0], pyrosetta.rosetta.core.pose.Pose): 220 | good_rotamers.append(invrots) 221 | continue 222 | elif invrots[0].is_ligand(): 223 | good_rotamers.append(invrots) 224 | continue 225 | good_rotamers.append([]) 226 | for invrot in invrots: 227 | if len(good_rotamers[i]) == 0: 228 | good_rotamers[i].append(invrot) 229 | continue 230 | is_unique = [] 231 | for rot in good_rotamers[i]: 232 | if rot.name() != invrot.name(): 233 | continue 234 | if (rot.xyz("CA")-invrot.xyz("CA")).norm() < 0.2: 235 | is_unique.append(False) 236 | continue 237 | if (rot.xyz("CB")-invrot.xyz("CB")).norm() < 0.2: 238 | is_unique.append(False) 239 | continue 240 | is_unique.append(True) 241 | if all(is_unique): 242 | good_rotamers[i].append(invrot) 243 | return good_rotamers 244 | 245 | 246 | def find_unique_rotamers_for_motif(rotset, motifs): 247 | """ 248 | Identifies different rotamers from the inverse rotamer set that can be used for aligning the motif to. 249 | Difference is calculated based on the geometric distance between the motif atoms of inverse rotamers. 250 | """ 251 | print("Preselecting inverse rotamers for motif alignment, based on unique CST subsampling") 252 | unique_rotset = [] 253 | 254 | for i, invrots in enumerate(rotset): 255 | if len(invrots) == 0: 256 | unique_rotset.append([]) 257 | continue 258 | unique_rotamers = [] 259 | for j, res in enumerate(invrots): 260 | if len(unique_rotamers) == 0: 261 | unique_rotamers.append(res) 262 | continue 263 | dms = [] 264 | for ures in unique_rotamers: 265 | dms.append([(res.xyz(a)-ures.xyz(a)).norm() for a in motifs[i]["atoms"]]) 266 | if all([sum(x) > 0.1 for x in dms]): 267 | unique_rotamers.append(res) 268 | 269 | print(f" CST {i}, {len(unique_rotamers)}/{len(invrots)} after unique selection") 270 | unique_rotset.append(unique_rotamers) 271 | return unique_rotset 272 | 273 | 274 | def pick_random_rotamers(invrots, N_max=None, frac=None): 275 | if N_max is not None: 276 | if len(invrots) < N_max: 277 | return [r for r in invrots] 278 | elif isinstance(invrots[0], pyrosetta.rosetta.core.pose.Pose): 279 | return [r for r in invrots] 280 | else: 281 | return random.sample([r for r in invrots], N_max) 282 | if frac is not None: 283 | if len(invrots) <= 1: 284 | return [r for r in invrots] 285 | elif isinstance(invrots[0], pyrosetta.rosetta.core.pose.Pose): 286 | return [r for r in invrots] 287 | else: 288 | return random.sample([r for r in invrots], int(round(frac*len(invrots), 0))) 289 | 290 | 291 | def pick_random_rotamers_set(rotset, max_random_rotamers_per_cst=None, frac_random_rotamers_per_cst=None): 292 | """ 293 | Selects a subset of inverse rotamers for each set of inverse rotamers 294 | Arguments: 295 | rotset (list) 296 | max_random_rotamers_per_cst (list, int) 297 | frac_random_rotamers_per_cst (list, float) 298 | """ 299 | if max_random_rotamers_per_cst is None and frac_random_rotamers_per_cst is None: 300 | sys.exit("Bad setup") 301 | elif max_random_rotamers_per_cst is not None and frac_random_rotamers_per_cst is not None: 302 | sys.exit("Bad setup") 303 | 304 | if max_random_rotamers_per_cst is None: 305 | max_random_rotamers_per_cst = [None for x in frac_random_rotamers_per_cst] 306 | elif frac_random_rotamers_per_cst is None: 307 | frac_random_rotamers_per_cst = [None for x in max_random_rotamers_per_cst] 308 | 309 | assert len(rotset) == len(frac_random_rotamers_per_cst) 310 | assert len(rotset) == len(max_random_rotamers_per_cst) 311 | 312 | rotsett = [] 313 | 314 | for n, invrots in enumerate(rotset): 315 | rotsett.append(pick_random_rotamers(invrots, N_max=max_random_rotamers_per_cst[n], frac=frac_random_rotamers_per_cst[n])) 316 | return rotsett 317 | 318 | 319 | def subsample_rotamers(rotamers, subsample_levels, per_cst_rotlib, cst_atoms): 320 | expanded_rotset = [] 321 | for cst_block, invrots in enumerate(rotamers): 322 | expanded_rotset.append([]) 323 | if cst_block == 0: # Ligand 324 | expanded_rotset[0] = [r for r in invrots] 325 | continue 326 | for n, invrot in enumerate(invrots): 327 | if isinstance(invrot, pyrosetta.rosetta.core.pose.Pose): # motif pose 328 | expanded_rotset[cst_block].append(invrot) 329 | continue 330 | _asd = dunbrack_rotlib.find_bb_from_inverse_loc(per_cst_rotlib[cst_block][invrot.name3()], list(invrot.chi())) 331 | if len(_asd) == 0: 332 | print(f"CST {cst_block}: rotamer {n} found no hits from Dunbrack library!?") 333 | expanded_rotset[cst_block].append(invrot) 334 | continue 335 | # Right not taking STDEV just as an average of all found rotamers in desired secondary structure bins 336 | stdevs = {chino+1: _asd[f"std{chino+1}"].mean() for chino in range(invrot.nchi())} 337 | 338 | # Expanding all chi's based on user request 339 | chi_samplings = {chino: calculate_samplings(invrot.chi(chino), stdevs[chino], subsample_levels[cst_block][chino]) for chino in stdevs} 340 | for chiset in itertools.product(*chi_samplings.values()): 341 | _rot = invrot.clone() 342 | for chino, _chi in enumerate(chiset): 343 | _rot.set_chi(chino+1, _chi) 344 | 345 | # Need to realign coordinates 346 | # First let's find what are the CST atoms used 347 | align_atoms = [[restype_block[f"{invrot.name()}-{cst_block}"] for restype_block in var_cst if invrot.name() == list(restype_block.keys())[1].split("-")[0]] for var_cst in cst_atoms[cst_block]] 348 | align_atoms = list(set([item for sublist in align_atoms for item in sublist])) 349 | if len(align_atoms) != 1: 350 | print(f"Bad choice for alignment atoms: {align_atoms}") 351 | __rot = align_pdbs.align_residue_to_residue(invrot, _rot, {"atoms1": align_atoms[0], 352 | "atoms2": align_atoms[0]}) 353 | expanded_rotset[cst_block].append(__rot) 354 | print(f"Expanded CST-{cst_block} rotamers from {len(invrots)} to {len(expanded_rotset[cst_block])}") 355 | return expanded_rotset 356 | 357 | 358 | 359 | def prune_ligand_rotamers(rotset, rmsd_cutoff=None, nproc=None): 360 | print("Pruning ligand rotamers based on intramolecular clashes") 361 | # Clashcheck 362 | def process(): 363 | while True: 364 | i = the_queue.get(block=True) 365 | if i is None: 366 | return 367 | res = rotset[i] 368 | nonbonded_distmat = [] 369 | for p in itertools.combinations(range(1, res.natoms()+1), 2): 370 | if any([res.is_virtual(n) for n in p]): 371 | continue 372 | # Skipping over bonded atoms 373 | if p[0] in res.bonded_neighbor(p[1]) or p[1] in res.bonded_neighbor(p[0]): 374 | continue 375 | nonbonded_distmat.append((res.xyz(p[0]) - res.xyz(p[1])).norm()) 376 | 377 | if all([res.atom_type(n).is_heavyatom() for n in p]): 378 | cutoff = 2.1 379 | else: 380 | cutoff = 1.7 381 | 382 | if nonbonded_distmat[-1] < cutoff: 383 | # if args.debug: print(f"Ligand rotamer pruning: {i}: {p}, {res.atom_name(p[0])}-{res.atom_name(p[1])}, {nonbonded_distmat[-1]}") 384 | good_rotamers[i] = False 385 | # print(f"Clashing ligand rotamer: {i}") 386 | break 387 | 388 | print(f"{len(rotset)} conformers to process") 389 | the_queue = multiprocessing.Queue() # Queue stores the iterables 390 | 391 | start = time.time() 392 | manager = multiprocessing.Manager() 393 | good_rotamers = manager.dict() # Need a special dictionary to store outputs from multiple processes 394 | 395 | for i, res in enumerate(rotset): 396 | the_queue.put(i) 397 | good_rotamers[i] = True 398 | 399 | pool = multiprocessing.Pool(processes=nproc, 400 | initializer=process) 401 | 402 | # None to end each process 403 | for _i in range(nproc): 404 | the_queue.put(None) 405 | 406 | # Closing the queue and the pool 407 | the_queue.close() 408 | the_queue.join_thread() 409 | pool.close() 410 | pool.join() 411 | 412 | end = time.time() 413 | print(f"Found {len([i for i in good_rotamers.keys() if good_rotamers[i] is True])} good ligand rotamers.\n" 414 | f"Processing all the rotamers for clashes took {(end - start):.2f} seconds") 415 | 416 | 417 | ## RMSD 418 | if rmsd_cutoff in [None, 0.0]: 419 | return [rotset[i] for i in good_rotamers.keys() if good_rotamers[i] is True] 420 | 421 | unique_rotamers = {} 422 | DMs = {} 423 | for i in good_rotamers.keys(): 424 | if good_rotamers[i] is False: 425 | continue 426 | res = rotset[i] 427 | 428 | xyz = np.array([res.xyz(n+1) for n in range(res.natoms()) if res.atom_type(n+1).element() != "H"]) 429 | DMs[i] = scipy.spatial.distance.pdist(xyz, 'euclidean') 430 | 431 | if len(unique_rotamers) == 0: 432 | unique_rotamers[i] = res 433 | continue 434 | rmsds = [] 435 | for j, res_u in unique_rotamers.items(): 436 | rmsds.append(utils.rmsd(DMs[i], DMs[j])) 437 | 438 | if rmsds[-1] < rmsd_cutoff: 439 | break 440 | 441 | if min(rmsds) < rmsd_cutoff: 442 | continue 443 | else: 444 | unique_rotamers[i] = rotset[i] 445 | 446 | print(f"Found {len(unique_rotamers)}/{len(good_rotamers)} unique ligand rotamers based on RMSD cutoff {rmsd_cutoff}.") 447 | return [rot for i, rot in unique_rotamers.items()] 448 | 449 | 450 | def prune_residue_rotamers(rotset): 451 | """ 452 | Pruning based on proton chi similarity 453 | """ 454 | unique_rotamers = {} 455 | for i, res in enumerate(rotset): 456 | if res.name3() not in utils.N_chis: 457 | n_chis = len([n for n in range(1, res.nchi()+1) if not any([res.atom_type(x).element() == "H" for x in res.chi_atoms(n)])]) 458 | else: 459 | n_chis = utils.N_chis[res.name3()] 460 | if res.nchi() == n_chis: 461 | unique_rotamers[i] = res 462 | continue 463 | if i == 0: 464 | unique_rotamers[i] = res 465 | continue 466 | 467 | ads = [] # largest atom-atom distance between heavyatoms of RES and all parsed residues 468 | for j, res_u in unique_rotamers.items(): 469 | if res.name3() != res_u.name3(): 470 | continue 471 | ads.append(max([(res.xyz(n+1) - res_u.xyz(n+1)).norm() for n in range(res.natoms()) if res.atom_type(n+1).element() != "H"])) 472 | if ads[-1] < 0.02: 473 | break 474 | 475 | if len(ads) == 0 or min(ads) >= 0.02: 476 | unique_rotamers[i] = res 477 | else: 478 | continue 479 | 480 | return [val for k, val in unique_rotamers.items()] 481 | 482 | 483 | def calculate_samplings(chi_value, std, sampling_level): 484 | """ 485 | 0 Default original dihedral only; same as using no flag at all 486 | 1 +/- one standard deviation (sd); 3 samples 487 | 2 +/- 0.5 sd; 3 samples 488 | 3 +/- 1 & 2 sd; 5 samples 489 | 4 +/- 0.5 & 1 sd; 5 samples 490 | 5 +/- 0.5, 1, 1.5 & 2 sd; 9 samples 491 | 6 +/- 0.33, 0.67, 1 sd; 7 samples 492 | 7 +/- 0.25, 0.5, 0.75, 1, 1.25 & 1.5 sd; 13 samples. 493 | """ 494 | if sampling_level == 0: 495 | samples = [chi_value] 496 | elif sampling_level == 1: 497 | samples = [chi_value-std, chi_value, chi_value+std] 498 | elif sampling_level == 2: 499 | samples = [chi_value-0.5*std, chi_value, chi_value+0.5*std] 500 | elif sampling_level == 3: 501 | samples = [chi_value-2*std, chi_value-std, chi_value, chi_value+std, chi_value+2*std] 502 | elif sampling_level == 4: 503 | samples = [chi_value-std, chi_value-0.5*std, chi_value, chi_value+0.5*std, chi_value+std] 504 | elif sampling_level == 5: 505 | samples = [chi_value-2*std, chi_value-1.5*std, chi_value-std, chi_value-0.5*std, 506 | chi_value, 507 | chi_value+0.5*std, chi_value+std, chi_value+1.5*std, chi_value+2*std] 508 | elif sampling_level == 6: 509 | samples = [chi_value*std, chi_value-0.667*std, chi_value-0.333*std, 510 | chi_value, 511 | chi_value+0.333*std, chi_value+0.667*std, chi_value*std] 512 | elif sampling_level == 7: 513 | samples = [chi_value-1.5*std, chi_value-1.25*std, chi_value-std, chi_value-0.75*std, chi_value-0.5*std, chi_value-0.25*std, 514 | chi_value, 515 | chi_value+0.25*std, chi_value+0.5*std, chi_value+0.75*std, chi_value+std, chi_value+1.25*std, chi_value+1.5*std] 516 | else: 517 | sys.exit(f"Invalid sampling level: {sampling_level}") 518 | return samples 519 | 520 | 521 | """ 522 | Functions used during inverse rotamer assembly generation 523 | """ 524 | def identify_cst_atoms_for_res(res, cst_no, catres_resno, _res_pose, cst_atompair_sets, motifs, ligands): 525 | j = cst_no 526 | catres_cst_atoms = {} 527 | for subcst in cst_atompair_sets: 528 | for respair in subcst: 529 | # residue 1 530 | if isinstance(res, pyrosetta.rosetta.core.conformation.Residue) and f"{res.name()}-{j}" in respair.keys(): 531 | _this_res = {catres_resno: respair[f"{res.name()}-{j}"]} 532 | elif isinstance(res, pyrosetta.rosetta.core.pose.Pose) and f"{res.residue(motifs[j]['resno']).name()}-{j}" in respair.keys(): 533 | _this_res = {catres_resno: motifs[j]["atoms"]} 534 | else: 535 | _trgt = None 536 | continue 537 | 538 | # residue 2 (that residue 1 is constrained to) 539 | _trgt = None 540 | if j == 1: 541 | _trgt = {_res_pose.size()+1: respair[ligands[0].name3()+f"-{j}"]} 542 | else: 543 | _trgt = [rn for rn,_ in respair.items() if f"-{j}" not in rn] 544 | # target must be a ligand 545 | if not any([ rn.split("-")[0] in [l.name3() for l in ligands] for rn in _trgt]): 546 | _trgt = None 547 | continue 548 | else: 549 | if len(_trgt) != 1: 550 | _trgt = None 551 | continue 552 | for il, lig in enumerate(ligands): 553 | if il+1 != int(_trgt[0].split("-")[1]): 554 | continue 555 | _trgt = {_res_pose.size()+il+1: respair[_trgt[0]]} 556 | if _trgt is None: 557 | continue 558 | else: 559 | break 560 | if _trgt is None: 561 | continue 562 | else: 563 | break 564 | if _trgt is not None: 565 | # No validation is done whether correct CST atoms are used for this particular residue 566 | # i.e. sitation where a variable CST is used with different sets of atoms from the same residue 567 | catres_cst_atoms.update(_this_res) 568 | catres_cst_atoms.update(_trgt) 569 | else: 570 | catres_cst_atoms = None 571 | return catres_cst_atoms 572 | 573 | 574 | def check_clash(pose, catres_resnos, cutoff=1.7, ignore_respairs=None, cst_atoms=None, tip_atom=False, debug=False): 575 | """ 576 | Checks for clashes between residue atoms 577 | Only consideres residues that have nbr_atom within 10 angstrom of eachother. 578 | Default clash cutoff is 1.7 angstrom. 579 | Clashes are not detected for N-H and O-H contacts. 580 | cst_atoms: {resno1: (a1, a2, a3), resno2: (a1, a2, a3)} 581 | """ 582 | 583 | combs = itertools.combinations(range(1, pose.size()+1), 2) 584 | for c in combs: 585 | res1 = pose.residue(c[0]) 586 | res2 = pose.residue(c[1]) 587 | # Going through a bunch of conditions that would allow us to skip 588 | # checking clashes in a given pair of residues 589 | 590 | _ignore_atoms = {res1.seqpos():[],res2.seqpos():[]} 591 | if tip_atom is True: 592 | # Ignoring any of the backbone-ish atoms 593 | for r in [res1, res2]: 594 | if r.is_ligand(): 595 | continue 596 | if r.seqpos() in catres_resnos: 597 | if r.name3() in ["GLY", "PRO", "ALA"]: 598 | continue 599 | for a in ["CA", "CB", "C", "N", "O"]: 600 | _ignore_atoms[r.seqpos()].append(r.atom_index(a)) 601 | if r.attached_H_begin(r.atom_index(a)) == 0: 602 | continue 603 | for _n in range(r.attached_H_begin(r.atom_index(a)), r.attached_H_end(r.atom_index(a))+1): 604 | _ignore_atoms[r.seqpos()].append(_n) 605 | if cst_atoms is not None: 606 | for r in [res1, res2]: 607 | if r.seqpos() not in cst_atoms.keys(): 608 | continue 609 | for a in cst_atoms[r.seqpos()]: 610 | _ignore_atoms[r.seqpos()].append( r.atom_index(a.strip()) ) 611 | 612 | 613 | if ignore_respairs is not None: 614 | if any([res1.seqpos() in p and res2.seqpos() in p for p in ignore_respairs]): 615 | continue 616 | 617 | if res1.chain() == res2.chain(): 618 | continue 619 | if res1.seqpos() == res2.seqpos(): 620 | continue 621 | if res1.is_bonded(res2): 622 | continue 623 | if (res1.nbr_atom_xyz() - res2.nbr_atom_xyz()).norm() > 10.0: 624 | continue 625 | if res1.is_virtual_residue() or res2.is_virtual_residue(): 626 | continue 627 | 628 | for atm1 in range(1, res1.natoms()+1): 629 | if res1.is_virtual(atm1): 630 | continue 631 | if atm1 in _ignore_atoms[res1.seqpos()]: 632 | continue 633 | for atm2 in range(1, res2.natoms()+1): 634 | if res2.is_virtual(atm2): 635 | continue 636 | if atm2 in _ignore_atoms[res2.seqpos()]: 637 | continue 638 | 639 | if all([res1.atom_type(atm1).is_heavyatom(), res2.atom_type(atm2).is_heavyatom()]): 640 | cutoff = 1.8 641 | else: 642 | cutoff = 1.5 643 | _dist = (res1.xyz(atm1) - res2.xyz(atm2)).norm() 644 | if _dist < cutoff: 645 | if res1.atom_type(atm1).element() in "NO" and res2.atom_type(atm2).element() == "H": # H-bonds are not clashes 646 | continue 647 | # elif res1.atom_type(atm1).element() == "H" and res2.atom_type(atm2).element() in "NO": 648 | # continue 649 | else: 650 | if debug: print(f"Clashing atoms: {res1.name()}-{res1.seqpos()}-{res1.atom_name(atm1)} -- {res2.name()}-{res2.seqpos()}-{res2.atom_name(atm2)}: {_dist}") 651 | return True 652 | return False 653 | 654 | 655 | def adjust_bb(pose, resno, phi, psi): 656 | pose.set_phi(resno, phi) 657 | pose.set_psi(resno, psi) 658 | pose.set_omega(resno, 180.0) 659 | 660 | 661 | def extend_SS(pose, ref_seqpos, secstruct, AAA, nres_Nterm=4, nres_Cterm=5): 662 | """ 663 | Extends the stubs around a given residue in a pose by a number of residues on N-term and C-term side. 664 | The secondary structure is set to either idealized Helix or Strand 665 | 666 | Parameters 667 | ---------- 668 | pose : pyrosetta.rosetta.core.pose.Pose 669 | DESCRIPTION. 670 | ref_seqpos : int 671 | DESCRIPTION. 672 | secstruct : str 673 | "E" or "H". 674 | AAA : pyrosetta.rosetta.core.pose.Pose 675 | pose object with 3 alanines. 676 | nres_Nterm : int, optional 677 | How many residues are added to N terminus. The default is 4. 678 | nres_Cterm : int, optional 679 | How many residues are added to C terminus. The default is 5. 680 | 681 | Returns 682 | ------- 683 | pose2 : TYPE 684 | DESCRIPTION. 685 | 686 | """ 687 | # assert nres_Nterm >= 2, "Too short N-term extension" 688 | # assert nres_Cterm >= 2, "Too short C-term extension" 689 | pose2 = pose.clone() 690 | for n in range(nres_Cterm): 691 | pose2.append_polymer_residue_after_seqpos(AAA.residue(2), ref_seqpos+n, True) 692 | adjust_bb(pose2, ref_seqpos+n, phi=utils.idealized_SS_phi_psi[secstruct]["phi"][0], psi=utils.idealized_SS_phi_psi[secstruct]["psi"][0]) 693 | 694 | if nres_Cterm > 0: 695 | adjust_bb(pose2, pose2.size(), phi=utils.idealized_SS_phi_psi[secstruct]["phi"][0], psi=utils.idealized_SS_phi_psi[secstruct]["psi"][0]) 696 | else: 697 | # If no C-term stub included then adding temporarily one 698 | pose2.append_polymer_residue_after_seqpos(AAA.residue(2), ref_seqpos, True) 699 | 700 | for n in range(nres_Nterm): 701 | pose2.prepend_polymer_residue_before_seqpos(AAA.residue(2), ref_seqpos, True) 702 | if n == 0: 703 | # Building foldtree to have a center point at the reference residue 704 | ft = pyrosetta.rosetta.core.kinematics.FoldTree() 705 | ft.add_edge(ref_seqpos+2, pose2.chain_begin(pose2.chain(ref_seqpos)), -1) 706 | ft.add_edge(ref_seqpos+2, pose2.chain_end(pose2.chain(ref_seqpos)), -1) 707 | for j in range(1, pose2.num_chains()+1): 708 | if j == pose2.chain(ref_seqpos): 709 | continue 710 | else: # adding foldtree edges for other chains 711 | ft.add_edge(pose2.fold_tree().get_residue_edge(pose2.chain_begin(j))) 712 | pose2.fold_tree().clear() 713 | pose2.fold_tree(ft) 714 | adjust_bb(pose2, ref_seqpos+1, phi=utils.idealized_SS_phi_psi[secstruct]["phi"][0], psi=utils.idealized_SS_phi_psi[secstruct]["psi"][0]) 715 | 716 | adjust_bb(pose2, ref_seqpos, phi=utils.idealized_SS_phi_psi[secstruct]["phi"][0], psi=utils.idealized_SS_phi_psi[secstruct]["psi"][0]) 717 | 718 | if nres_Cterm == 0: 719 | pose2.delete_residue_slow(pose2.size()) 720 | 721 | return pose2 722 | 723 | 724 | def create_remark_lines(pose, catalytic_residues, cst_io): 725 | ## Adding REMARK 666 lines to the PDB's 726 | ## This is actually quite arduous since we need to figure out which variable CST block a particular residue came from 727 | 728 | pdb_info = pyrosetta.rosetta.core.pose.PDBInfo(pose) # can this be added to pose somehow? 729 | 730 | ligands = [r for r in pose.residues if r.is_ligand()] 731 | 732 | calculators = {"dis": utils.get_dist, "ang": utils.get_angle, "tor": utils.get_dihedral} 733 | remarks = [] 734 | for j, resno in catalytic_residues.items(): 735 | if pose.residue(resno).is_ligand() and j == 0: 736 | continue 737 | rmrk = None 738 | 739 | for m in range(1, cst_io.mcfi_list(j).num_mcfis()+1): 740 | _mcfi = cst_io.mcfi_list(j).mcfi(m) 741 | 742 | downstream_res_cst = 0 743 | if _mcfi.algorithm_inputs().__contains__("match"): 744 | if any(["DOWNSTREAM" in ai for ai in _mcfi.algorithm_inputs()["match"]]): 745 | downstream_res_cst = 0 # I think this is always 1, right? 746 | elif any(["UPSTREAM_CST" in ai for ai in _mcfi.algorithm_inputs()["match"]]): 747 | for ai in _mcfi.algorithm_inputs()["match"]: 748 | if "SECONDARY_MATCH:" in ai and "UPSTREAM_CST" in ai: 749 | downstream_res_cst = int(ai.split()[2]) 750 | break 751 | # Residues in the final pose 752 | DS_RES = pose.residue(catalytic_residues[downstream_res_cst]) 753 | US_RES = pose.residue(resno) 754 | 755 | good_cst_found = False 756 | rt_combs = itertools.product(_mcfi.allowed_restypes(_mcfi.downstream_res()), _mcfi.allowed_restypes(_mcfi.upstream_res())) 757 | for (ds_res, us_res) in rt_combs: 758 | if US_RES.name().split(":")[0] != us_res.name(): # skipping the wrong residue types 759 | continue 760 | ais_ds = [ds_res.atom_name(_mcfi.template_atom_inds(_mcfi.downstream_res(), ai, ds_res)[1]) for ai in range(1, 4)] 761 | ais_us = [us_res.atom_name(_mcfi.template_atom_inds(_mcfi.upstream_res(), ai, us_res)[1]) for ai in range(1, 4)] 762 | 763 | cst_atomsets = {'dis_U1D1': [DS_RES.xyz(ais_ds[0]), US_RES.xyz(ais_us[0])], 764 | 'ang_U1D2': [DS_RES.xyz(ais_ds[1]), DS_RES.xyz(ais_ds[0]), US_RES.xyz(ais_us[0])], 765 | 'ang_U2D1': [DS_RES.xyz(ais_ds[0]), US_RES.xyz(ais_us[0]), US_RES.xyz(ais_us[1])], 766 | 'tor_U1D3': [DS_RES.xyz(ais_ds[2]), DS_RES.xyz(ais_ds[1]), DS_RES.xyz(ais_ds[0]), US_RES.xyz(ais_us[0])], 767 | 'tor_U2D2': [DS_RES.xyz(ais_ds[1]), DS_RES.xyz(ais_ds[0]), US_RES.xyz(ais_us[0]), US_RES.xyz(ais_us[1])], 768 | 'tor_U3D1': [DS_RES.xyz(ais_ds[0]), US_RES.xyz(ais_us[0]), US_RES.xyz(ais_us[1]), US_RES.xyz(ais_us[2])]} 769 | cst_atomsets = {k: np.array(v) for k,v in cst_atomsets.items()} 770 | 771 | # Measuring whether a particular respair geometrically matches the CST 772 | good_cst_found = False 773 | for cs in _mcfi.constraints(): 774 | passed_cst = [] 775 | for cst_par in cst_atomsets.keys(): 776 | cst_samples = getattr(cs, cst_par).create_sample_vector() 777 | val = calculators[cst_par[:3]](*cst_atomsets[cst_par]) 778 | if val < 0.0: 779 | val = 360.0 + val 780 | # is any of the sampled values very close to the measured value? 781 | if "dis" in cst_par: 782 | passed_cst.append( any([abs(val-x) < 0.1 for x in cst_samples]) ) 783 | else: 784 | passed_cst.append( any([abs(val-x) < 1.0 for x in cst_samples]) ) 785 | if all(passed_cst): 786 | good_cst_found = True 787 | break 788 | if good_cst_found: 789 | break 790 | if good_cst_found: 791 | # if there's only one ligand then it will be stored as chain X residue 0 792 | if len(ligands) == 1 and DS_RES.name3() == ligands[0].name3(): 793 | rmrk = f"REMARK 666 MATCH TEMPLATE X {DS_RES.name3()}"\ 794 | f" 0 MATCH MOTIF {pdb_info.chain(resno)} "\ 795 | f"{US_RES.name3()} {resno:>4} {j} {m} " 796 | else: 797 | rmrk = f"REMARK 666 MATCH TEMPLATE {pdb_info.chain(DS_RES.seqpos())} {DS_RES.name3()}"\ 798 | f" {DS_RES.seqpos():>4} MATCH MOTIF {pdb_info.chain(resno)} "\ 799 | f"{US_RES.name3()} {resno:>4} {j} {m} " 800 | remarks.append(rmrk) 801 | break 802 | return remarks 803 | 804 | --------------------------------------------------------------------------------