├── .vscode └── settings.json ├── 0_pdb2fasta.py ├── 1_rama_single_structure.py ├── 2_rama_md_heatmap.csh ├── 2x_rama_md_heatmap_gen.py ├── 3_rama_extract_popul.py ├── Examples ├── 1_single_struct │ ├── 1_run_single_structure_rama.sh │ ├── 3anr.pdb.bz2 │ └── 3anr.rama_plot.png ├── 2_md_heatmap │ ├── 2_run_md_heatmap.sh │ ├── fgf21-wt-s2.fasta │ ├── fgf21-wt-s2.prod_gamd.nc │ ├── fgf21-wt-s2.prot.prmtop │ ├── fgf21-wt.A208.rama.out.bz2 │ ├── fgf21-wt.A208.rama_histo.png │ ├── fgf21-wt.P205.rama_histo.png │ ├── fgf21-wt.S204.rama_histo.png │ ├── templ_metrics.wt.traj │ └── tmp.fgf21-wt.traj └── 3_extract_popul │ ├── 3_run_extract_population.sh │ ├── fgf21-wt.A208.rama.cluster.list │ ├── fgf21-wt.A208.rama.cluster.txt │ ├── fgf21-wt.A208.rama_histo.png │ └── fgf21-wt.A208.rama_histo.popul.png ├── LICENSE ├── README.md ├── __pycache__ ├── aa_residue.cpython-36.pyc └── aa_residue.cpython-37.pyc ├── aa_residue.py ├── combine_column.py ├── pyrama_data ├── pref_general.data.bz2 ├── pref_glycine.data.bz2 ├── pref_preproline.data.bz2 └── pref_proline.data.bz2 └── x_old └── 3_rama_extract_popul.py-v2 /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "python.pythonPath": "/Users/pmung/miniconda2/envs/cdpy3/bin/python" 3 | } -------------------------------------------------------------------------------- /0_pdb2fasta.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | ########################################################################## 4 | ## 5 | ## Peter M.U. Ung @ MSSM 6 | ## 7 | ## 18.06.19 v2.0 add chain information 8 | ## 9 | ## Take in a PDB structure and generate FASTA sequence from it 10 | ## Can only handle standard amino acids 11 | ## 12 | ########################################################################## 13 | 14 | import sys,glob 15 | from Bio.PDB.PDBParser import PDBParser 16 | from Bio.PDB.Polypeptide import PPBuilder 17 | 18 | msg = """ 19 | > {0}\n [PDB filename: .pdb] [Name of PDB] [FASTA Output Name] 20 | 21 | Note: \033[31mCapping Groups (ACE/NME)\033[0m and \033[31mNonstandard amino acids\033[0m are not recognized 22 | need to \033[35mmanually add 'X'\033[0m to generated correctly spaced FASTA 23 | """.format(sys.argv[0]) 24 | if len(sys.argv) != 4: sys.exit(msg) 25 | 26 | def FASTA_Gen(pdb_file, pdb_id, output_file): 27 | 28 | m =PDBParser(PERMISSIVE=1).get_structure(pdb_id, pdb_file) 29 | 30 | w = open(output_file, 'w') 31 | for chain in m.get_chains(): 32 | residues = list(chain.get_residues()) 33 | chain_id = chain.get_id() 34 | 35 | s = '' 36 | peptides = PPBuilder().build_peptides(chain, aa_only=False) 37 | for peptide in peptides: 38 | s = s + peptide.get_sequence() + '\n\n' 39 | 40 | w.write('>{0}_{1}|{2}-{3}\n'.format( 41 | pdb_id, chain_id, 42 | residues[0].get_id()[1], residues[-1].get_id()[1] ) ) 43 | w.write(str(s)) 44 | 45 | w.close() 46 | 47 | ############################################## 48 | if __name__ == "__main__": 49 | FASTA_Gen(sys.argv[1], sys.argv[2], sys.argv[3]) 50 | -------------------------------------------------------------------------------- /1_rama_single_structure.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import scipy 3 | ############################################################################# 4 | ## 5 | ## Peter MU Ung @ MSSM / Yale 6 | ## 7 | ## v2.0 19.12.03 - generate figure with Seaborn/Matplotlib directly 8 | ## 9 | ## use to convert dihedral angle of residues of a PDB structure 2D 10 | ## Ramachandran plot 11 | ## 12 | ## 13 | ############################################################################ 14 | 15 | import sys,os,gzip,bz2,re 16 | import aa_residue 17 | import numpy as np 18 | import pandas as pd 19 | import matplotlib as mpl 20 | import matplotlib.pyplot as plt 21 | 22 | from Bio.PDB.PDBParser import PDBParser 23 | from Bio.PDB.Polypeptide import PPBuilder 24 | p = PDBParser(PERMISSIVE=1,QUIET=True) 25 | 26 | from argparse import ArgumentParser 27 | from aa_residue import AA, UnnaturalAA, CheckUnnaturalAA 28 | 29 | font = {'family': 'Arial'} 30 | mpl.rc('font', **font) 31 | 32 | msg = ''' > {0} 33 | -in [ PDB file for Ramachandran density plot ] 34 | -img [ Output image name, extension as format (e.g. .png,svg,eps,ps,pdf) ]\n 35 | optional: 36 | -ref [ Directory to Density data for reference Ramachandran distribution ] 37 | -dpi [ Figure DPI resolution (def: 300) ]\n 38 | '''.format(sys.argv[0]) 39 | #if len(sys.argv) < 3 or len(sys.argv) > 4: sys.exit(msg) 40 | 41 | ref_df = pd.DataFrame( 42 | { 'Pro':{'file': 'pref_proline.data.bz2', 'bounds': [0,0.002, 0.02,1], 43 | 'cmap': mpl.colors.ListedColormap(['#FFFFFF','#D0FFC5','#7EE589'])}, 44 | 'Gly':{'file': 'pref_glycine.data.bz2', 'bounds': [0,0.002, 0.02,1], 45 | 'cmap': mpl.colors.ListedColormap(['#FFFFFF','#FFE8C5','#FED479'])}, 46 | 'Gen':{'file': 'pref_general.data.bz2', 'bounds': [0,0.0005,0.02,1], 47 | 'cmap': mpl.colors.ListedColormap(['#FFFFFF','#B3E8FF','#26C3FF'])}, 48 | 'PreP':{'file':'pref_preproline.data.bz2', 'bounds': [0,0.002, 0.02,1], 49 | 'cmap':mpl.colors.ListedColormap(['#FFFFFF','#FFD7DA','#F3ABB0'])} 50 | } ) 51 | 52 | ############################################################################ 53 | 54 | def main( ): 55 | 56 | args = UserInput() 57 | if args.dpi is None: # figure DPI 58 | args.dpi = 300 59 | else: args.dpi = int(args.dpi) 60 | 61 | # if reference rama density is available, generate reference figure settings 62 | ref_dict = {} 63 | for key in ref_df: 64 | ref_dict[key] = RefRamaData( args.ref_dir, ref_df[key] ) 65 | 66 | # extract input residue dihedral angles and generate figure settings 67 | res_dict = InputRamaData( args.in_file ) 68 | 69 | # generate figure 70 | GenerateImage( res_dict, ref_dict, args.img_name, args.dpi ) 71 | 72 | 73 | ############################################################################ 74 | ############################################################################ 75 | ## extract input residue dihedral angles 76 | def InputRamaData( pdb_file ): 77 | 78 | Res_Dih = [] 79 | ## extract sequence and dihedral angle data from PDB structure 80 | pdb_id = pdb_file.split('.pdb')[0].split('/')[-1] 81 | 82 | # use file_handle to deal with zipped PDB 83 | # going into each chain in model(s), then each of broken pieces in each 84 | # chain, get peptide and info 85 | # take non-standard AA and other ligands as well. Non-standard AA will be 86 | # coverted to standard AA. Water and ligands will not get phi/psi values 87 | # so will be dropped later at the dataframe level 88 | for model in p.get_structure(pdb_id, file_handle(pdb_file) ): 89 | for chain in model: 90 | chain_id = chain.get_id() 91 | polypeptides = PPBuilder().build_peptides(chain, aa_only=False) 92 | 93 | for poly in polypeptides: 94 | phi_psi = poly.get_phi_psi_list() 95 | 96 | for res_idx, residue in enumerate(poly): 97 | resname = residue.get_resname() 98 | resid = residue.get_id()[1] 99 | phi, psi = phi_psi[res_idx] 100 | 101 | # convert 3-char AA to 1-char, check if AA is non-standard AA 102 | if CheckUnnaturalAA(resname): 103 | resname = UnnaturalAA(resname) 104 | one_resname = AA(resname) 105 | 106 | Res_Dih.append([one_resname, resid, chain_id, phi, psi]) 107 | 108 | 109 | # build dataframe of dihedral angle data, drop those without phi/psi 110 | pdb_df = pd.DataFrame(Res_Dih, 111 | columns=['resname','resid','chain','PHI','PSI']).dropna() 112 | 113 | # convert angle from radian to degree 114 | pdb_df['phi'] = radian2deg(pdb_df.PHI.to_numpy()) 115 | pdb_df['psi'] = radian2deg(pdb_df.PSI.to_numpy()) 116 | 117 | # select subsets of residues: Pro, Gly, PrePro, General 118 | pro_df = pdb_df[pdb_df.resname == 'P'] 119 | gly_df = pdb_df[pdb_df.resname == 'G'] 120 | pp_df = pdb_df.loc[(pro_df.index-1)] # pre-Proline residues 121 | pp_df = pp_df[pp_df.resname != 'P'] 122 | not_x = list(pro_df.index) + list(gly_df.index) + list(pp_df.index) 123 | gen_df = pdb_df.drop(not_x) # general residues 124 | 125 | res_dict = {'Pro': pro_df, 'Gly': gly_df, 'PreP': pp_df, 'Gen': gen_df} 126 | 127 | return res_dict 128 | 129 | 130 | ############################################################################ 131 | # if reference rama density is available, generate settings 132 | def RefRamaData( ref_dir, ref_inf ): 133 | 134 | rama_data = pd.read_csv(ref_dir+'/'+ref_inf.file, delimiter=' ', comment='#') 135 | rama_ref = rama_data.pivot(index='phi',columns='psi',values='density') 136 | 137 | # data is transpose to get correct orientation 138 | ref_obj = ImageData( histo2d=rama_ref.transpose() ) 139 | 140 | # extent is different from res_obj.extent 141 | # color is (white, light color, dark color) at specific contour level 142 | ref_obj.extent = (-181,181,-181,181) 143 | ref_obj.colors = ref_inf.cmap 144 | ref_obj.norm = mpl.colors.BoundaryNorm(ref_inf.bounds, ref_obj.colors.N) 145 | 146 | return ref_obj 147 | 148 | 149 | ############################################################################ 150 | ## Generate Ramachandran heat map 151 | def GenerateImage( res_dict, ref_dict, img_name, dpi ): 152 | 153 | for idx, (key, ref_obj) in enumerate(sorted(ref_dict.items(), key=lambda x:x[0].lower())): 154 | 155 | plt.figure(2, figsize=(8,8)) 156 | plt.subplot(2,2, idx+1) 157 | plt.title(key, fontsize=16) 158 | 159 | # reference dihedral angle density as background 160 | plt.imshow( ref_obj.histo2d[::-1], cmap=ref_obj.colors, 161 | norm=ref_obj.norm, extent=ref_obj.extent ) 162 | 163 | # plt.grid(linestyle='--') 164 | 165 | # PDB AA backbone dihedral angle distribution 166 | plt.scatter( res_dict[key].phi, res_dict[key].psi, 167 | alpha=0.67, s=8 ) 168 | 169 | ## add additional items 170 | plt.xlim([-180,180]) 171 | plt.ylim([-180,180]) 172 | plt.plot([-180, 180], [0, 0], color="black", linewidth=1) 173 | plt.plot([0, 0], [-180, 180], color="black", linewidth=1) 174 | plt.xticks(np.arange(-180,210, step=60), fontsize=14) 175 | plt.yticks(np.arange(-180,210, step=60), fontsize=14) 176 | 177 | plt.xlabel(r'Phi $\phi$', fontsize=16) 178 | plt.ylabel(r'Psi $\psi$', fontsize=16) 179 | 180 | plt.tight_layout() 181 | plt.savefig(img_name, bbox_inches=0, dpi=dpi) 182 | 183 | 184 | ############################################################################ 185 | 186 | class ImageData(object): 187 | def __init__( self, histo2d='', **kwargs): 188 | self.histo2d = histo2d 189 | 190 | # convert radian into degree 191 | def radian2deg(rad): 192 | return np.rad2deg(rad) 193 | 194 | # to handle zipped files for Biopython 195 | def file_handle(file_name): 196 | if re.search(r'.gz$', file_name): 197 | handle = gzip.open(file_name, 'rt') 198 | elif re.search(r'.bz2$', file_name): 199 | handle = bz2.open(file_name, 'rt') 200 | else: 201 | handle = open(file_name) 202 | 203 | return handle 204 | 205 | ########################################################################## 206 | def UserInput(): 207 | p = ArgumentParser(description='Command Line Arguments') 208 | 209 | p.add_argument('-in', dest='in_file', required=True, 210 | help='PDB structure (zipped okay)') 211 | p.add_argument('-img', dest='img_name', required=True, 212 | help='Output image filename, extension as format (e.g. .png,svg,eps,ps,pdf)') 213 | 214 | p.add_argument('-ref', dest='ref_dir', required=False, 215 | help='Directory of Density data for reference Ramachandran distribution') 216 | p.add_argument('-dpi', dest='dpi', required=False, 217 | help='Figure DPI (def: 300)') 218 | 219 | return p.parse_args() 220 | 221 | 222 | ########################################################################## 223 | if __name__ == '__main__': 224 | main() 225 | -------------------------------------------------------------------------------- /2_rama_md_heatmap.csh: -------------------------------------------------------------------------------- 1 | #!/bin/csh 2 | 3 | ########################################################################## 4 | ## 5 | ## Peter MU Ung @ UMich COPharm 6 | ## v1.0 -- 09.01.22 7 | ## 8 | ## Peter MU Ung @ MSSM / Yale 9 | ## v2.0 -- 19.12.03 generalize process up to generating figures 10 | ## 11 | ## To generate PTRAJ input files and run PTRAJ to generate separate 12 | ## Phi and Psi angle files for generating Ramachandran plot. 13 | ## 14 | ## Use "phi_psi_dih_combine.csh" to combine the separate phi/psi files 15 | ## Phi+Psi file will be used for "rama_png_gen.pl" to generate Rama plot 16 | ## in PNG format. 17 | ## 18 | ## Use with: 19 | ## -) cpptraj (use GitHub V4.14.12+ for extra functions) 20 | ## -) combine_column.py 21 | ## -) rama_md_heatmap_gen.py 22 | ## -) ecoDnaK.fasta (only 1 line of the sequence) 23 | ## 24 | ## Note: Torsion angle calculation will always start with i+1 and end i-1 25 | ## because the first residue will not have Phi and last residue will 26 | ## not have Psi angle 27 | ## 28 | ## 29 | ########################################################################## 30 | 31 | if ($#argv != 11) then 32 | echo '' 33 | echo ' > x.csh ' 34 | echo ' [ Starting residue for phi/psi in PTRAJ (prmtop) ]' 35 | echo ' [ Ending residue for phi/psi in PTRAJ (prmtop) ]' 36 | echo ' [ fasta_file (one line of fasta seq only) ] HIE/HID/HIP need to be in HIS' 37 | echo ' [ Actual starting residue number ]' 38 | echo ' [ Output prefix ]' 39 | echo ' [ Output image extension, (e.g. png,svg,eps,ps,pdf) ]' 40 | echo ' [ Template cpptraj input with "parm" and all "trajin" ]' 41 | echo ' [ Run cpptraj? : 0|1 ]' 42 | echo ' [ Generate figure with AA Dihedral density reference? : 0|1 ]' 43 | echo '' 44 | echo ' [ Path to this script directory ]' 45 | echo ' [ Path to Cpptraj ]' 46 | echo '' 47 | exit 48 | endif 49 | 50 | set startResid = $argv[1] # Starting residue for phi/psi in PTRAJ 51 | set endResid = $argv[2] # Ending residue for phi/psi in PTRAJ 52 | set fasta_file = $argv[3] 53 | set fastaStart = $argv[4] 54 | 55 | set out_pref = $argv[5] # Folder name, and file prefix 56 | set img_ext = $argv[6] # Image format. png,svg,jpg,eps,ps,pdf 57 | 58 | set templ_traj = $argv[7] # Template PTRAJ input file 59 | set run_traj = $argv[8] # run dihedral data generation 60 | set ref_rama = $argv[9] # use reference AA dihedral density map 61 | 62 | #set scptdir = '/home/pmung/Dropbox/9_scripts/3_program/plotting/RAMAplot' 63 | #set ctraj = '/home/software/ctraj/bin/cpptraj' 64 | set scptdir = $argv[10] 65 | set ctraj = $argv[11] 66 | 67 | ########################################################################## 68 | 69 | 70 | ## split fasta into array of characters, ignore header of fasta 71 | set fasta = (`cat ./$fasta_file | grep -v '>' | tr -d '\n' | grep -o .`) 72 | 73 | cp ./$templ_traj tmp.$out_pref.traj 74 | 75 | ## Write out all individual temp traj input files, then run CPPTRAJ 76 | @ i = $startResid 77 | @ a = $fastaStart 78 | while ($i <= $endResid) # Generate tmp input for Dihedral angle calc. 79 | @ j = $i - 1 80 | @ k = $i + 1 81 | 82 | ## Phi = C1-N2-CA2-C2 83 | echo dihedral $i\_phi :$j@C :$i@N :$i@CA :$i@C \ 84 | out $out_pref.${fasta[$i]}$a.phi.out >> tmp.$out_pref.traj 85 | wait 86 | 87 | ## Psi = N2-CA2-C2-N3 88 | echo dihedral $i\_psi :$i@N :$i@CA :$i@C :$k@N \ 89 | out $out_pref.${fasta[$i]}$a.psi.out >> tmp.$out_pref.traj 90 | wait 91 | 92 | @ i++ 93 | @ a++ 94 | end 95 | 96 | ## generate phi-psi data 97 | if ($run_traj == 1) then 98 | $ctraj -i tmp.$out_pref.traj 99 | wait 100 | endif 101 | 102 | 103 | ######################################################################## 104 | 105 | 106 | ## combine phi/psi files into one, then convert to figures with scripts 107 | @ i = $startResid 108 | @ a = $fastaStart 109 | while ($i <= $endResid) 110 | 111 | ## Generate phi-psi data 112 | echo " ## Generate combined phi-psi files: $out_pref.${fasta[$i]}$a" 113 | if (! -e $out_pref.${fasta[$i]}$a.rama.out.bz2) then 114 | $scptdir/combine_column.py \ 115 | -in1 $out_pref.${fasta[$i]}$a.phi.out \ 116 | -in2 $out_pref.${fasta[$i]}$a.psi.out \ 117 | -cols 2 \ 118 | > $out_pref.${fasta[$i]}$a.rama.out 119 | wait 120 | echo $out_pref.${fasta[$i]}$a.rama.out 121 | 122 | if ( ! -s $out_pref.${fasta[$i]}$a.rama.out ) then 123 | bzip2 -f $out_pref.${fasta[$i]}$a.rama.out 124 | wait 125 | else 126 | # rm $out_pref.${fasta[$i]}$a.rama.out 127 | endif 128 | 129 | rm $out_pref.${fasta[$i]}$a.phi.out $out_pref.${fasta[$i]}$a.psi.out 130 | endif 131 | 132 | 133 | ## Set up parameters for Ramachandran density heatmap, 134 | ## use different reference Ramachandran densities (Pro, Gly, all others) 135 | ## according the the residue to be plotted 136 | echo '' 137 | echo " ## Generate Ramachandran plot: $out_pref.${fasta[$i]}$a" 138 | set t_step = '-t_step 1' 139 | set c_step = '-c_step 1' 140 | 141 | @ j = $i + 1 142 | 143 | if ($ref_rama == 0) then 144 | set ref = '' 145 | set res = '' 146 | else if (${fasta[$i]} == 'P') then 147 | set ref = "-ref $scptdir/pyrama_data/pref_proline.data.bz2" 148 | set res = '-res Pro' 149 | else if (${fasta[$i]} == 'G') then 150 | set ref = "-ref $scptdir/pyrama_data/pref_glycine.data.bz2" 151 | set res = '-res Gly' 152 | else if (${fasta[$j]} == 'P') then 153 | echo " * Pre-proline: ${fasta[$i]}$a" 154 | set ref = "-ref $scptdir/pyrama_data/pref_preproline.data.bz2" 155 | set res = '-res PreP' 156 | else 157 | set ref = "-ref $scptdir/pyrama_data/pref_general.data.bz2" 158 | set res = '-res Gen' 159 | set t_step = '' 160 | set c_step = '' 161 | endif 162 | 163 | ## Generate dihedral density heatmap 164 | $scptdir/2x_rama_md_heatmap_gen.py \ 165 | -in $out_pref.${fasta[$i]}$a.rama.out.bz2 \ 166 | -img $out_pref.${fasta[$i]}$a.rama_histo.$img_ext \ 167 | $ref $res $t_step $c_step & 168 | 169 | @ i++ 170 | @ a++ 171 | end 172 | 173 | 174 | -------------------------------------------------------------------------------- /2x_rama_md_heatmap_gen.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | ############################################################################# 4 | ## 5 | ## Peter MU Ung @ MSSM / Yale 6 | ## 7 | ## v2.0 19.12.03 - generate figure with Seaborn/Matplotlib directly 8 | ## 9 | ## use to convert dihedral angle of a residue over time (snapshots) to 10 | ## HISTOGRAM format and generate figure from here directly 11 | ## 12 | ## Required input file format (for each frame):