├── cadee ├── __init__.py ├── ana │ ├── __init__.py │ ├── cat_cadee_dbs.py │ └── export_to_csv.py ├── dyn │ ├── __init__.py │ ├── LICENSE │ ├── submit.sh │ └── mpi.py ├── prep │ ├── __init__.py │ ├── LICENSE │ ├── config.py │ ├── mergepdb.py │ ├── create_template_based_simpack.py │ ├── clash.py │ ├── test_tools.py │ ├── fep.py │ ├── test_genseqs.py │ ├── alascan.py │ └── genseqs.py ├── tools │ ├── __init__.py │ ├── cadee_delete_tempfiles.sh │ ├── lossy_repack.sh │ ├── tools.py │ ├── pcadee.sh │ ├── srunq.sh │ └── repair_simpack.sh ├── qscripts │ ├── __init__.py │ ├── lib │ │ ├── __init__.py │ │ ├── asd.py │ │ ├── random.txt │ │ ├── qscripts.cfg.default │ │ ├── plotdata.py │ │ ├── common.py │ │ └── OrderedDict.py │ ├── .gitignore │ ├── README.md │ ├── template_examples │ │ ├── run_feps_q.sh │ │ ├── genrelax_10ns.proc │ │ ├── genfeps_10ps-f.proc │ │ └── genrelax_minim.proc │ ├── LICENSE.txt │ ├── q_setprot.py │ ├── q_pdbindex.py │ ├── qscripts_config.py │ ├── q_rescale.py │ ├── q_dynplot.sh │ └── q_dyntemps.py ├── executables │ ├── __init__.py │ ├── q │ │ ├── __init__.py │ │ ├── readme │ │ └── testq.sh │ └── exe.py ├── version.py ├── lib │ └── simpack_template_12ns_100ps_8000ps_4160ps.tar.bz2 └── cadee ├── simpack_templates ├── simpack_template_0.1ns_30ps_5ps_65ps.tar.bz2 ├── simpack_template_0.05ns_15ps_2.5ps_32.5ps.tar.bz2 ├── simpack_template_1.6ns_100ps_1000ps_520ps.tar.bz2 ├── simpack_template_12ns_100ps_8000ps_4160ps.tar.bz2 ├── simpack_template_20ns_100ps_16000ps_4160ps.tar.bz2 └── readme.md ├── example ├── seder.sh ├── wt.qpinp ├── libraries │ ├── dh2.qlib │ └── ligand.qlib └── examples.sh ├── LICENSE ├── doc ├── example_installation_uppmax_rackham.md └── example_installation_hpc2n_abisko.md ├── rename_q5_to_q6.txt ├── workflow.md ├── setup.py ├── readme.md └── INSTALL.md /cadee/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /cadee/ana/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /cadee/dyn/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /cadee/prep/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /cadee/tools/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /cadee/qscripts/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /cadee/executables/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /cadee/executables/q/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /cadee/qscripts/lib/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /cadee/version.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.9" 2 | -------------------------------------------------------------------------------- /cadee/qscripts/.gitignore: -------------------------------------------------------------------------------- 1 | *pyc 2 | tmp-run/ 3 | qscripts.cfg 4 | *swp 5 | -------------------------------------------------------------------------------- /cadee/executables/q/readme: -------------------------------------------------------------------------------- 1 | please copy your q-executables into this directory 2 | for Q6: make sure all your executables are lowercase only. (Qfep6, Qdyn6, Qprep6) 3 | -------------------------------------------------------------------------------- /cadee/lib/simpack_template_12ns_100ps_8000ps_4160ps.tar.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kamerlinlab/cadee/HEAD/cadee/lib/simpack_template_12ns_100ps_8000ps_4160ps.tar.bz2 -------------------------------------------------------------------------------- /simpack_templates/simpack_template_0.1ns_30ps_5ps_65ps.tar.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kamerlinlab/cadee/HEAD/simpack_templates/simpack_template_0.1ns_30ps_5ps_65ps.tar.bz2 -------------------------------------------------------------------------------- /simpack_templates/simpack_template_0.05ns_15ps_2.5ps_32.5ps.tar.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kamerlinlab/cadee/HEAD/simpack_templates/simpack_template_0.05ns_15ps_2.5ps_32.5ps.tar.bz2 -------------------------------------------------------------------------------- /simpack_templates/simpack_template_1.6ns_100ps_1000ps_520ps.tar.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kamerlinlab/cadee/HEAD/simpack_templates/simpack_template_1.6ns_100ps_1000ps_520ps.tar.bz2 -------------------------------------------------------------------------------- /simpack_templates/simpack_template_12ns_100ps_8000ps_4160ps.tar.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kamerlinlab/cadee/HEAD/simpack_templates/simpack_template_12ns_100ps_8000ps_4160ps.tar.bz2 -------------------------------------------------------------------------------- /simpack_templates/simpack_template_20ns_100ps_16000ps_4160ps.tar.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kamerlinlab/cadee/HEAD/simpack_templates/simpack_template_20ns_100ps_16000ps_4160ps.tar.bz2 -------------------------------------------------------------------------------- /example/seder.sh: -------------------------------------------------------------------------------- 1 | #cat batch*.log | sed "s=/home/beat/Downloads/cadee/=\$CADEE_DIR/=g" | sed "s=/home/beat/global/=\$HOME/global/=g" | sed "s=beat-ThinkPad-X1-Carbon-3rd=localhost=g" | sed "s= - 2017-09-24 = - 170924 =g" | sed "s/^000//g" | sed "s=/home/beat=\$HOME=g" > finalized.log 2 | -------------------------------------------------------------------------------- /cadee/qscripts/lib/asd.py: -------------------------------------------------------------------------------- 1 | import q_topology as q 2 | a = q.Qlib() 3 | a.read_lib("/home/mpurg/Desktop/Projects/Q_AMBER/1-alpha_version/testing_LJ/ass/q/qamber14.lib") 4 | print a 5 | b = q.Qprm() 6 | b.read_prm("/home/mpurg/Desktop/Projects/Q_AMBER/1-alpha_version/testing_LJ/ass/q/qamber14.prm") 7 | print b 8 | 9 | -------------------------------------------------------------------------------- /cadee/qscripts/lib/random.txt: -------------------------------------------------------------------------------- 1 | "No, it's not wrong... just tell the experimentalists they should have used TIP3P water" - DBN 2 | "My runs reproduce experiment! Better not poke at it..." - MP 3 | "I can reproduce trends, something must be wrong..." - PB 4 | "Helloooo" 5 | "No, no, no, it's just colourful; it's not working." - YK 6 | 7 | 8 | -------------------------------------------------------------------------------- /cadee/qscripts/README.md: -------------------------------------------------------------------------------- 1 | # README # 2 | 3 | Some scripts for Q. 4 | 5 | If you have no idea where to start, look at the bash script tests/probr-cl/test_probrcl.sh. 6 | It creates a FEP file for a simple gas phase EVB reaction, runs some relaxations, creates fep inputs and finally runs and (auto)maps them. 7 | 8 | Bugs, suggestions and questions: miha.purg@gmail.com 9 | -------------------------------------------------------------------------------- /cadee/tools/cadee_delete_tempfiles.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | find . -name "*.pyc" -exec rm {} \; 3 | rm -f rawmut.pdb 4 | rm -f mutant.??? 5 | rm -f qprep.??? 6 | rm -f proper.??? 7 | rm -f proper.fasta 8 | 9 | if [ -e 'libmut' ] 10 | then 11 | echo -n "Delete libmut folder (y/N)? >" 12 | read ans 13 | [ $ans = 'y' ] && rm -r libmut && echo "deleted!" 14 | fi 15 | 16 | if [ -e 'ala_scan' ] 17 | then 18 | echo -n "Delete ala_scan folder (y/N)? >" 19 | read ans 20 | [ $ans = 'y' ] && rm -r ala_scan && echo "deleted!" 21 | fi 22 | -------------------------------------------------------------------------------- /example/wt.qpinp: -------------------------------------------------------------------------------- 1 | !load ligand library 2 | readlib libraries/ligand.qlib 3 | ! load protein library 4 | readlib libraries/qoplsaa.qlib 5 | ! load force field parameters 6 | readprm libraries/qoplsaa.qprm 7 | ! load structure 8 | readpdb rawmut.pdb 9 | ! solvate with 20 Å sphere 10 | boundary 11 | 1 12 | 495:C6 13 | 20 14 | solvate 15 | 495:C6 16 | 20 17 | 1 18 | HOH 19 | ! generate topology 20 | maketop mutant.top 21 | ! save topology 22 | writetop mutant.top 23 | ! write a PDB file with all atoms 24 | writepdb mutant.pdb y 25 | ! done 26 | quit 27 | -------------------------------------------------------------------------------- /cadee/qscripts/template_examples/run_feps_q.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | CORES=4 3 | 4 | QBIN=$(which Qdyn6p) 5 | 6 | OK="(\033[0;32m OK \033[0m)" 7 | FAILED="(\033[0;31m FAILED \033[0m)" 8 | 9 | steps=( $(ls -1v *inp | sed 's/.inp//') ) 10 | 11 | for step in ${steps[@]} 12 | do 13 | echo "Running step ${step}" 14 | if mpirun --report-bindings -n ${CORES} ${QBIN} ${step}.inp > ${step}.log 15 | then echo -e "$OK" 16 | cp ${step}.re ${step}.re.rest 17 | else 18 | echo -e "$FAILED" 19 | echo "Check output (${step}.log) for more info." 20 | exit 1 21 | fi 22 | done 23 | 24 | -------------------------------------------------------------------------------- /cadee/dyn/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016, Beat Anton Amrein, beat.amrein@gmail.com 2 | 3 | CADEE is free software; you can redistribute it and/or 4 | modify it under the terms of the GNU General Public License 5 | as published by the Free Software Foundation; Version 2.0 6 | of the License. 7 | 8 | CADEE is distributed in the hope that it will be useful, 9 | but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 | General Public License for more details. 12 | 13 | You should have received a copy of the GNU General Public 14 | License along with this library; if not, write to the 15 | Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, 16 | Boston, MA 02110-1301, USA. 17 | -------------------------------------------------------------------------------- /cadee/prep/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016, Beat Anton Amrein, beat.amrein@gmail.com 2 | 3 | CADEE is free software; you can redistribute it and/or 4 | modify it under the terms of the GNU General Public License 5 | as published by the Free Software Foundation; Version 2.0 6 | of the License. 7 | 8 | CADEE is distributed in the hope that it will be useful, 9 | but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 | General Public License for more details. 12 | 13 | You should have received a copy of the GNU General Public 14 | License along with this library; if not, write to the 15 | Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, 16 | Boston, MA 02110-1301, USA. 17 | -------------------------------------------------------------------------------- /cadee/dyn/submit.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | #SBATCH -A snic2015-16-12 # project name 3 | #SBATCH -n 6 # number of cores that shall be used in parallel 4 | #SBATCH --time=0-01:00:00 # wallclock time limit in days - hours : minutes : seconds 5 | #SBATCH --overcommit # needed for efficient resource usage. 6 | 7 | # This is a demo SLURM script on how to run ensemble.py, assuming CADEE was installed to $HOME/cadee/ 8 | 9 | # TODO: check path 10 | cd ~/cadee/ensemble 11 | source init.sh 12 | cd - 13 | 14 | 15 | # TODO: check paths 16 | mpirun -n 7 python ~/cadee/ensemble/ensemble.py ~/inputs_tarchives/tarchives --alpha 229 --hij 60 17 | 18 | # TODO: check paths 19 | # alternative: 20 | srun -n 7 python ~/cadee/ensemble/ensemble.py ~/inputs_tarchives/tarchives --alpha 229 --hij 60 21 | -------------------------------------------------------------------------------- /cadee/tools/lossy_repack.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # This script is will compress a simpack lossy, by deleting all dcd files. 4 | 5 | # Author: Beat Amrein, beat.amrein@gmail.com 6 | # This script is part of CADEE. 7 | 8 | 9 | 10 | echo "WARNING: This will *lossy* compress tarballs in $PWD" 11 | echo 'press ctrl+c to abort within 5 secs' 12 | 13 | sleep 5 14 | 15 | 16 | set -e 17 | 18 | wd=$PWD 19 | for fil in $(ls *.tar) 20 | do 21 | echo unpack $fil 22 | cd $wd 23 | mkdir -p /dev/shm/tmp/$$ 24 | cd /dev/shm/tmp/$$ 25 | tar xf $wd/$fil 26 | echo compress 27 | rm -f *dcd 28 | echo hash 29 | md5sum * > hashes.md5 30 | echo repack 31 | tar cf $fil * 32 | set +e 33 | mv $fil $wd/$fil || true 34 | set -e 35 | echo clean 36 | rm -r /dev/shm/tmp/$$ 37 | done 38 | 39 | 40 | -------------------------------------------------------------------------------- /cadee/qscripts/lib/qscripts.cfg.default: -------------------------------------------------------------------------------- 1 | [qexec] 2 | qfep = ; qfep5 binary path 3 | qcalc = ; qcalc5 binary path (version that supports GC) 4 | 5 | [mapping] 6 | bin = 51 7 | skip = 50 8 | minpts_per_bin = 10 9 | temp = 300.00 10 | gas_const = 0.0019872041 ; kcal/mol 11 | nthread = 1 ; number of repeats/replicas to map in parallel 12 | 13 | [inputs] 14 | fep_frames = 51 15 | num_repeats = 10 16 | prefix_rep = rep_ 17 | relax_dir = relax 18 | 19 | [files] 20 | en_list_fn = q_enfiles.list ; created by q_genfeps, used by q_mapper when mapping 21 | qfep_inp = map_fep.inp 22 | qfep_out = map_fep.log ; also used in q_analysemaps.py 23 | mapper_logfile = qm.summary.log 24 | analysemaps_plots = qam.PlotData.json 25 | analysedyn_plots = qad.PlotData.json 26 | plot_export_dir = qa_PlotData 27 | 28 | [other] 29 | cfgversion = 6 30 | -------------------------------------------------------------------------------- /cadee/executables/q/testq.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This script is very quickly testing, if the executables seem to work. 4 | # It will find out, when the tools have wrong names, for example. 5 | 6 | echo "" | ./Qcalc6 2>&1 | grep -q 'qcalc' 7 | if [ $? -ne 0 ] 8 | then 9 | echo "Qcalc6 is invalid" 10 | exit 1 11 | fi 12 | 13 | echo "" | ./Qfep6 2>&1 | grep -q '# Qfep' 14 | if [ $? -ne 0 ] 15 | then 16 | echo "Qfep6 is invalid" 17 | exit 1 18 | fi 19 | 20 | echo "" | ./Qprep6 2>&1 | grep -q 'Qprep>' 21 | if [ $? -ne 0 ] 22 | then 23 | echo "Qprep6 is invalid" 24 | exit 1 25 | fi 26 | 27 | echo "" | ./Qdyn6 2>&1 | grep -q "ABNORMAL TERMINATION of Qdyn5" 28 | if [ $? -ne 0 ] 29 | then 30 | echo "Qdyn6 is invalid" 31 | exit 1 32 | fi 33 | 34 | 35 | echo "" | ./Qdyn6p 2>&1 | grep -q "MPI_ABORT was invoked on rank 0 in communicator MPI_COMM_WORLD" 36 | if [ $? -ne 0 ] 37 | then 38 | echo "Qdyn6p is invalid" 39 | echo Qcalc6, Qfep6, Qprep6 and Qdyn6 are valid. 40 | else 41 | echo Qcalc6, Qfep6, Qprep6, Qdyn6 and Qdyn6p are valid. 42 | fi 43 | -------------------------------------------------------------------------------- /example/libraries/dh2.qlib: -------------------------------------------------------------------------------- 1 | {DH2} 2 | 3 | # Date: Fri Mar 11 13:43:50 2016 4 | 5 | 6 | [atoms] 7 | 1 P1 P 1.3336 8 | 2 O2 O2Z -0.9265 9 | 3 O3 O2Z -0.9265 10 | 4 O4 O2Z -0.9265 11 | 5 O5 OP -0.5731 12 | 6 C6 CT 0.2501 13 | 7 H7 HC -0.0386 14 | 8 H8 HC -0.0386 15 | 9 C9 C2 0.5084 16 | 10 O10 O -0.5961 17 | 11 C11 CT 0.3072 18 | 12 H12 HC -0.0187 19 | 13 H13 HC -0.0187 20 | 14 O14 OH -0.7439 21 | 15 H15 HO 0.4079 22 | 23 | 24 | [bonds] 25 | P1 O2 26 | P1 O3 27 | P1 O4 28 | P1 O5 29 | O5 C6 30 | C6 H7 31 | C6 H8 32 | C6 C9 33 | C9 O10 34 | C9 C11 35 | C11 H12 36 | C11 H13 37 | C11 O14 38 | O14 H15 39 | 40 | 41 | [impropers] 42 | O10 C9 C6 C11 43 | 44 | 45 | [charge_groups] 46 | C6 P1 O2 O3 O4 O5 H7 H8 C9 O10 C11 H12 H13 O14 H15 47 | 48 | -------------------------------------------------------------------------------- /example/libraries/ligand.qlib: -------------------------------------------------------------------------------- 1 | {DH2} 2 | 3 | # Date: Fri Mar 11 13:43:50 2016 4 | 5 | 6 | [atoms] 7 | 1 P1 P 1.3336 8 | 2 O2 O2Z -0.9265 9 | 3 O3 O2Z -0.9265 10 | 4 O4 O2Z -0.9265 11 | 5 O5 OP -0.5731 12 | 6 C6 CT 0.2501 13 | 7 H7 HC -0.0386 14 | 8 H8 HC -0.0386 15 | 9 C9 C2 0.5084 16 | 10 O10 O -0.5961 17 | 11 C11 CT 0.3072 18 | 12 H12 HC -0.0187 19 | 13 H13 HC -0.0187 20 | 14 O14 OH -0.7439 21 | 15 H15 HO 0.4079 22 | 23 | 24 | [bonds] 25 | P1 O2 26 | P1 O3 27 | P1 O4 28 | P1 O5 29 | O5 C6 30 | C6 H7 31 | C6 H8 32 | C6 C9 33 | C9 O10 34 | C9 C11 35 | C11 H12 36 | C11 H13 37 | C11 O14 38 | O14 H15 39 | 40 | 41 | [impropers] 42 | O10 C9 C6 C11 43 | 44 | 45 | [charge_groups] 46 | C6 P1 O2 O3 O4 O5 H7 H8 C9 O10 C11 H12 H13 O14 H15 47 | 48 | -------------------------------------------------------------------------------- /cadee/dyn/mpi.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | Module for coordinating mpi 5 | 6 | Author: {0} ({1}) 7 | 8 | This program is part of CADEE, the framework for 9 | Computer-Aided Directed Evolution of Enzymes. 10 | """ 11 | 12 | 13 | __author__ = "Beat Amrein" 14 | __email__ = "beat.amrein@gmail.com" 15 | 16 | try: 17 | from mpi4py import MPI 18 | except ImportError: 19 | print('mpi4py not found') 20 | 21 | try: 22 | comm = MPI.COMM_WORLD 23 | rank = comm.Get_rank() 24 | root = 0 25 | size = comm.Get_size() 26 | mpi = True 27 | except NameError: 28 | comm = 0 29 | rank = 0 30 | root = 0 31 | size = 0 32 | mpi = False 33 | print('MPI disabled') 34 | 35 | 36 | class Tags(object): 37 | """ MPI tags """ 38 | DONE = 1 39 | INPUTS = 2 40 | LOG = 3 41 | IO_TICKET = 4 42 | IO_REQUEST = 5 43 | IO_FINISHED = 6 44 | RESULTS = 7 45 | SHUTDOWN = 8 46 | 47 | 48 | def get_info(): 49 | ret = "MPI Info: " 50 | ret += " enabled: " + str(mpi) 51 | ret += " rank: " + str(rank) 52 | ret += " size: " + str(size) 53 | return ret 54 | -------------------------------------------------------------------------------- /cadee/qscripts/LICENSE.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016 Miha Purg 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | CADEE: Computer-Aided Directed Evolution of Enzymes. 2 | 3 | Copyright (c) 2017, Beat Anton Amrein (beat.amrein@gmail.com) 4 | and 5 | Shina Caroline Lynn Kamerlin (kamerlin@icm.uu.se) 6 | 7 | This program is free software; you can redistribute it and/or modify it under 8 | the terms of the GNU General Public License as published by the Free Software 9 | Foundation; version 2 of the License. 10 | 11 | This program is distributed in the hope that it will be useful, but WITHOUT 12 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 13 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License along with 16 | this program; if not, write to the Free Software Foundation, Inc., 51 Franklin 17 | Street, Fifth Floor, Boston, MA 02110-1301, USA. 18 | 19 | 20 | 21 | * * * * * 22 | 23 | 24 | The development of CADEE is mainly funded by academic research grants. To help 25 | us fund development, we humbly ask that you cite the CADEE paper: 26 | 27 | * CADEE: Computer-Aided Directed Evolution of Enzymes 28 | Amrein BA, Steffen-Munsberg F, Szeler I, Purg M, Kulkarni Y & Kamerlin SCL. 29 | IUCrJ 4, 50-64 (2017) 30 | DOI: 10.1107/S2052252516018017 31 | -------------------------------------------------------------------------------- /doc/example_installation_uppmax_rackham.md: -------------------------------------------------------------------------------- 1 | # Installing CADEE on the Rackham Cluster 2 | 3 | The intel, mpi and Open Babel modules are loaded: 4 | 5 | ``` 6 | module add intel/17.4 intelmpi/17.4 openbabel 7 | ``` 8 | Next Q is cloned and compiled and the executables added to $PATH 9 | ``` 10 | git clone https://github.com/qusers/qsource.git . 11 | git checkout development/beat # fix for 'make all' 12 | cd qsource/src/ 13 | make all COMP=ifort 14 | cd ../bin 15 | export PATH="$PWD:$PATH" 16 | ``` 17 | 18 | Next, install SCWRL4 to $HOME/.local/bin/ and and check that Scwrl4 is in $PATH: 19 | ``` 20 | which Scwrl4 21 | > ~/.local/bin/Scwrl4 22 | ``` 23 | 24 | Only if the Scwrl4 executable exists, CADEE is can be installed: 25 | ``` 26 | cd $HOME 27 | cd Downloads 28 | git clone https://github.com/kamerlinlab/cadee cadee 29 | cd cadee 30 | python setup.py install --user 31 | ``` 32 | 33 | Let's check that cadee is in $PATH: 34 | ``` 35 | which cadee 36 | > ~/.local/bin/cadee 37 | ``` 38 | 39 | **Important**: You must load the intelmpi library to launch cadee. 40 | 41 | ``` 42 | module add intelmpi/17.4 43 | cadee --help 44 | 45 | or, when on a node: 46 | 47 | srun -n 1 $(which cadee) --help 48 | srun -n 1 $HOME/.local/bin/cadee --help 49 | ``` 50 | 51 | If you now see the following output, you have successfully installed CADEE: 52 | 53 | ``` 54 | Usage: 55 | cadee [ prep(p) | dyn(d) | ana(a) | tool(t) ] 56 | 57 | Multi Core Tasks: 58 | mpirun -n X cadee dyn 59 | mpiexec -n X cadee dyn 60 | X == Number of cores to use; 2+. 61 | ``` -------------------------------------------------------------------------------- /rename_q5_to_q6.txt: -------------------------------------------------------------------------------- 1 | sed -i "s/qdyn5/Qdyn6/g" ./setup.py ./cadee/qscripts/template_examples/run_feps_q.sh ./cadee/qscripts/q_genrelax.py ./cadee/executables/q/readme ./cadee/executables/q/testq.sh ./cadee/tools/srunq.sh ./cadee/tools/pcadee.sh ./cadee/dyn/scan.py ./cadee/dyn/ensemble.py ./cadee/cadee ./simpack_templates/readme.md 2 | sed -i "s/qprep5/Qprep6/g" ./setup.py ./cadee/qscripts/template_examples/run_feps_q.sh ./cadee/qscripts/q_genrelax.py ./cadee/executables/q/readme ./cadee/executables/q/testq.sh ./cadee/tools/srunq.sh ./cadee/tools/pcadee.sh ./cadee/dyn/scan.py ./cadee/dyn/ensemble.py ./cadee/cadee ./simpack_templates/readme.md 3 | sed -i "s/qcalc5/Qcalc6/g" ./setup.py ./cadee/qscripts/template_examples/run_feps_q.sh ./cadee/qscripts/q_genrelax.py ./cadee/executables/q/readme ./cadee/executables/q/testq.sh ./cadee/tools/srunq.sh ./cadee/tools/pcadee.sh ./cadee/dyn/scan.py ./cadee/dyn/ensemble.py ./cadee/cadee ./simpack_templates/readme.md 4 | sed -i "s/qdum5/Qdum6/g" ./setup.py ./cadee/qscripts/template_examples/run_feps_q.sh ./cadee/qscripts/q_genrelax.py ./cadee/executables/q/readme ./cadee/executables/q/testq.sh ./cadee/tools/srunq.sh ./cadee/tools/pcadee.sh ./cadee/dyn/scan.py ./cadee/dyn/ensemble.py ./cadee/cadee ./simpack_templates/readme.md /readme.md 5 | sed -i "s/qfep5/Qfep6/g" ./setup.py ./cadee/qscripts/template_examples/run_feps_q.sh ./cadee/qscripts/q_genrelax.py ./cadee/executables/q/readme ./cadee/executables/q/testq.sh ./cadee/tools/srunq.sh ./cadee/tools/pcadee.sh ./cadee/dyn/scan.py ./cadee/dyn/ensemble.py ./cadee/cadee ./simpack_templates/readme.md 6 | 7 | sed -i "s/Q5/Q6/g" ./cadee/executables/q/readme ./INSTALL.md 8 | 9 | echo "Manual Edit of setup.py and readme.md" 10 | -------------------------------------------------------------------------------- /cadee/executables/exe.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | This module looks up an executable. 4 | 5 | Author: {0} ({1}) 6 | 7 | This program is part of CADEE, the framework for 8 | Computer-Aided Directed Evolution of Enzymes. 9 | """ 10 | 11 | import os 12 | 13 | __author__ = "Beat Amrein" 14 | __email__ = "beat.amrein@gmail.com" 15 | 16 | 17 | def which(program, enforce_subdir=False): 18 | """ look up if executable/program exists 19 | 20 | :param program: name of executable to find 21 | :param enforce_subdir: only search for executables in enclosed with this package 22 | :type program: str 23 | :type enforce_subdir: bool 24 | :return: path to executable or None 25 | :type return: str or None 26 | ::Note:: 27 | inspired by: 28 | http://stackoverflow.com/questions/377017/test-if-executable-exists-in-python 29 | """ 30 | def is_exe(fpath): 31 | return os.path.isfile(fpath) and os.access(fpath, os.X_OK) 32 | 33 | # find executable in folder where this script is located 34 | script_path = os.path.realpath(__file__) 35 | path = os.path.dirname(script_path) 36 | exe_file = os.path.join(path, program) 37 | if is_exe(exe_file): 38 | return exe_file 39 | 40 | # find q-executable in subfolder where this script is located 41 | script_path = os.path.realpath(__file__) 42 | path = os.path.dirname(script_path) + '/q/' 43 | exe_file = os.path.join(path, program) 44 | if is_exe(exe_file): 45 | return exe_file 46 | # special case for upper-case Qdyn etc... 47 | exe_file = os.path.join(path, program[0].upper()+program[1:]) 48 | if is_exe(exe_file): 49 | return exe_file 50 | 51 | if enforce_subdir: 52 | return None 53 | 54 | fpath, fname = os.path.split(program) 55 | fpath = False 56 | 57 | if fpath: 58 | if is_exe(program): 59 | return program 60 | else: 61 | for path in os.environ["PATH"].split(os.pathsep): 62 | path = path.strip('"') 63 | exe_file = os.path.join(path, program) 64 | if is_exe(exe_file): 65 | return exe_file 66 | 67 | return None 68 | -------------------------------------------------------------------------------- /cadee/tools/tools.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from __future__ import print_function 3 | 4 | """Shell Tool Launcher 5 | Author: {0} ({1}) 6 | 7 | This program is part of CADEE, the framework for 8 | Computer-Aided Directed Evolution of Enzymes. 9 | """ 10 | 11 | import sys 12 | import os 13 | 14 | 15 | def main(args, caller): 16 | def tool_usage(exitcode): 17 | print() 18 | print() 19 | print('Available Tools:') 20 | print() 21 | print(' repair_simpack (rs):') 22 | print(' Description: Unpack and Repack a simpack to fix errors.') 23 | print(' Caution: WILL OVERWRITE ORIGINAL SIMPACK!') 24 | print(' Example: {0} repair_simpack /full/path/to/simpacks/wt_0.tar'.format(caller)) 25 | print() 26 | print(' lossy_repack (lr):') 27 | print(' Description: Utility to repack simpacks lossy, deleting dcd files.') 28 | print(' Will apply to all simpacks in current working directory') 29 | print(' Example: cd /full/path/to/simpacks; {0} lossy_repack'.format(caller)) 30 | print('') 31 | print('') 32 | sys.exit(exitcode) 33 | 34 | if len(args) < 2: 35 | tool_usage() 36 | 37 | subcmd = args[1].lower() 38 | args.remove(subcmd) 39 | args.remove(args[0]) 40 | 41 | shell_command = "/bin/bash " 42 | shell_command += os.path.dirname(os.path.abspath(__file__)) 43 | 44 | if subcmd == 'delete_tempfiles' or subcmd == 'dt': 45 | shell_command = os.path.join(shell_command, 'delete_tempfiles.sh') 46 | elif subcmd == 'lossy_repack' or subcmd == 'lr': 47 | shell_command = os.path.join(shell_command, 'lossy_repack.sh') 48 | elif subcmd == 'repair_simpack' or subcmd == 'rs': 49 | shell_command = os.path.join(shell_command, 'repair_simpack.sh') 50 | elif subcmd == '--help': 51 | tool_usage(0) 52 | else: 53 | print("Unknown command:", subcmd) 54 | tool_usage(1) 55 | 56 | shell_command = shell_command + " " + " ".join(sys.argv) 57 | 58 | os.system(shell_command) 59 | 60 | if __name__ == "__main__": 61 | main(sys.argv, sys.argv[0]) 62 | 63 | -------------------------------------------------------------------------------- /doc/example_installation_hpc2n_abisko.md: -------------------------------------------------------------------------------- 1 | # Installing CADEE on the Abisko Cluster 2 | 3 | The intel, mpi and Open Babel modules are loaded: 4 | 5 | ``` 6 | module add intel/2017b iimpi/2017b 7 | ``` 8 | 9 | On HPC2N, Open Babel is not installed. We therefor need download and compile it: 10 | ``` 11 | cd ~/pfs 12 | wget https://sourceforge.net/projects/openbabel/files/openbabel/2.4.1/openbabel-2.4.1.tar.gz 13 | tar xf openbabel-2.4.1.tar.gz 14 | cd openbabel-2.4.1/ 15 | mkdir build 16 | cd build 17 | cmake ../ 18 | make 19 | cd bin 20 | ``` 21 | Next the Open Babel binaries are added to ~/.bashrc 22 | ``` 23 | echo export PATH="$PWD:\$PATH" >> ~/.bashrc 24 | source ~/.bashrc 25 | ``` 26 | 27 | Next Q is cloned, compiled and the executables added to $PATH: 28 | ``` 29 | git clone https://github.com/qusers/qsource.git . 30 | cd qsource 31 | git checkout development/beat # fix for 'make all' 32 | cd src 33 | make all COMP=ifort 34 | cd ../bin 35 | export PATH="$PWD:$PATH" 36 | ``` 37 | 38 | This cluster does not allow accessing files in $HOME when running a job. 39 | We hence must adjust the '~/.local folder': 40 | 41 | ``` 42 | cd ~ 43 | mv ~/.local ~/pfs/.local 44 | ln -s ~/pfs/.local .local 45 | ``` 46 | 47 | Next, install SCWRL4 to $HOME/.local/bin/ and and check that Scwrl4 is in $PATH: 48 | ``` 49 | which Scwrl4 50 | > ~/.local/bin/Scwrl4 51 | ``` 52 | 53 | Only if the Scwrl4 executable exists, CADEE is can be installed: 54 | ``` 55 | cd $HOME 56 | cd Downloads 57 | git clone https://github.com/kamerlinlab/cadee cadee 58 | cd cadee 59 | python setup.py install --user 60 | ``` 61 | 62 | Let's check that cadee is in $PATH: 63 | ``` 64 | which cadee 65 | > ~/.local/bin/cadee 66 | ``` 67 | 68 | **Important**: You must load the intelmpi library to launch cadee. 69 | 70 | ``` 71 | module add iimpi/2017b 72 | cadee --help 73 | mpiexec -n 1 $(which cadee) --help 74 | 75 | or, when on a node: 76 | 77 | srun -n 1 $HOME/.local/bin/cadee --help 78 | 79 | ``` 80 | 81 | If you now see the following output, you have successfully installed CADEE: 82 | 83 | ``` 84 | Usage: 85 | cadee [ prep(p) | dyn(d) | ana(a) | tool(t) ] 86 | 87 | Multi Core Tasks: 88 | mpirun -n X cadee dyn 89 | mpiexec -n X cadee dyn 90 | X == Number of cores to use; 2+. 91 | ``` -------------------------------------------------------------------------------- /cadee/ana/cat_cadee_dbs.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | This program will concatenate two cadee.db files together 5 | 6 | Usage: python fuse_dbs.py cadee1.db cadee2.db [cadee3.db [...]] 7 | Will create a concat_cadee.db file, containing both [all] database data in one file. 8 | 9 | Author: {0} ({1}) 10 | 11 | This program is part of CADEE, the framework for 12 | Computer-Aided Directed Evolution of Enzymes. 13 | """ 14 | 15 | from __future__ import print_function 16 | 17 | import sys 18 | import os 19 | import sqlite3 20 | from cadee.dyn.tools import SqlDB 21 | 22 | # TODO: Write module to read data from sqldatabase and display in browser 23 | 24 | __author__ = "Beat Amrein" 25 | __email__ = "beat.amrein@gmail.com" 26 | 27 | OUTFILE = 'concat_cadee.db' 28 | 29 | 30 | def usage(): 31 | """Print Usage and Exit.""" 32 | print ("Usage: ", os.path.basename(__file__), ' cadee1.db cadee2.db cadee3.db ...') 33 | print (" Will create a concat_cadee.db file, containing both [all] database data in one file.") 34 | sys.exit(1) 35 | 36 | 37 | def main(db_list): 38 | """ 39 | param: db_list is a list of cadee.db files, i.e. ['cadee1.db', 'cadee2.db', ...] 40 | output: creates file OUTFILE (default: concat_cadee.db) 41 | error: will quit if OUTFILE exists 42 | """ 43 | for dbfile in db_list: 44 | # connect and get values from DB 45 | conn = sqlite3.connect(dbfile) 46 | cursor = conn.cursor() 47 | # select only 'medium' runs 48 | try: 49 | cursor.execute("SELECT * FROM results") 50 | except sqlite3.DatabaseError as e: 51 | print("Error when accessing the database: '{}' ({})".format( 52 | dbfile, e)) 53 | sys.exit(1) 54 | 55 | results = cursor.fetchall() 56 | conn.close() 57 | 58 | db = SqlDB(OUTFILE, 10000) 59 | 60 | for res in results: 61 | res = list(res) 62 | res[1] = res[1].split('_')[0] 63 | db.add_row(res) 64 | 65 | db.flush() 66 | 67 | 68 | if __name__ == "__main__": 69 | if len(sys.argv) < 2: 70 | usage() 71 | 72 | if os.path.exists(OUTFILE): 73 | print('ERROR: Outputfile', OUTFILE, 'exists. Please remove the file and try again.') 74 | sys.exit(2) 75 | 76 | main(sys.argv) 77 | 78 | -------------------------------------------------------------------------------- /cadee/qscripts/template_examples/genrelax_10ns.proc: -------------------------------------------------------------------------------- 1 | # Example input for q_genrelax.py 2 | # 3 | # 100ps of equilibration, followed by 10x 1ns 4 | # 5 | # No velocity generation, could be used as a 6 | # continuation of the genrelax_minim.proc run. 7 | # 8 | 9 | ####################################################### 10 | {SCRIPT_VARS} 11 | ####################################################### 12 | Q_REGION $315.P1$ $315.H14$ 0.5 1 0 0 13 | 14 | ####################################################### 15 | {GENERAL} 16 | ####################################################### 17 | [MD] 18 | stepsize 1 19 | temperature 300 20 | bath_coupling 100 21 | separate_scaling on 22 | lrf on 23 | 24 | [cut-offs] 25 | solute_solute 10 26 | solute_solvent 10 27 | solvent_solvent 10 28 | q_atom 99 29 | 30 | [lambdas] 31 | 1.00 0.00 32 | 33 | [intervals] 34 | non_bond 30 35 | output 10000 36 | trajectory 10000 37 | temperature 1000 38 | 39 | [sequence_restraints] 40 | Q_REGION 41 | 42 | [distance_restraints] 43 | # distance restraints here 44 | 45 | 46 | [angle_restraints] 47 | # angle restraints here 48 | 49 | 50 | ####################################################### 51 | {STEPS} 52 | ####################################################### 53 | # 100ps of equilibration followed by 10x 1ns of MD 54 | 55 | [MD] 56 | steps 100000 57 | _____________________________________________________________ 58 | [MD] 59 | steps 1000000 60 | _____________________________________________________________ 61 | [MD] 62 | steps 1000000 63 | _____________________________________________________________ 64 | [MD] 65 | steps 1000000 66 | _____________________________________________________________ 67 | [MD] 68 | steps 1000000 69 | _____________________________________________________________ 70 | [MD] 71 | steps 1000000 72 | _____________________________________________________________ 73 | [MD] 74 | steps 1000000 75 | _____________________________________________________________ 76 | [MD] 77 | steps 1000000 78 | _____________________________________________________________ 79 | [MD] 80 | steps 1000000 81 | _____________________________________________________________ 82 | [MD] 83 | steps 1000000 84 | _____________________________________________________________ 85 | [MD] 86 | steps 1000000 87 | -------------------------------------------------------------------------------- /cadee/qscripts/template_examples/genfeps_10ps-f.proc: -------------------------------------------------------------------------------- 1 | # Example input for q_genfeps.py 2 | # 3 | # There are four sections in curly braces: 4 | # SCRIPT_VARS - contains user defined constants that will be replaced with their values if they appear in this input. Useful for seq.restr. Example: RESTRAINT1 5993 6003 5 | # try to make them unique and descriptive, not something like MD (that would suck for you) 6 | # GENERAL - qdyn5 parameters, used in all steps. 7 | # STEPS_EQUIL - qdyn5 parameters for each equilibration step, separated by "________________". These values override the ones defined in the GENERAL section. 8 | # FEP - qdyn5 parameters for FEP inputs. These values override the ones in the GENERAL section. 9 | # 10 | # The files section is automatically generated for each step 11 | # Lambda values are generated as well 12 | # Restraints are overriden 13 | # Atom indices can be specified with placeholders $RESID.ATOMNAME$ 14 | # 15 | # 10ps@10K followed by 100ps@300K equilibration 16 | # 10ps per FEP frame 17 | 18 | ####################################################### 19 | {SCRIPT_VARS} 20 | ####################################################### 21 | Q_REGION $315.P1$ $315.H14$ 0.5 1 0 0 22 | 23 | ####################################################### 24 | {GENERAL} 25 | ####################################################### 26 | [MD] 27 | stepsize 1 28 | temperature 300 29 | bath_coupling 100 30 | separate_scaling on 31 | lrf on 32 | 33 | [cut-offs] 34 | solute_solute 10 35 | solute_solvent 10 36 | solvent_solvent 10 37 | q_atom 99 38 | 39 | [intervals] 40 | non_bond 30 41 | output 1000 42 | trajectory 1000 43 | temperature 1000 44 | energy 10 45 | 46 | [sequence_restraints] 47 | Q_REGION 48 | 49 | [distance_restraints] 50 | # distance restraints here 51 | 52 | [angle_restraints] 53 | # angle restraints here 54 | 55 | ####################################################### 56 | {STEPS_EQUIL} 57 | ####################################################### 58 | [MD] 59 | steps 10000 60 | temperature 10 61 | initial_temperature 10 62 | random_seed -1 # <1 will generate a random random_seed 63 | ______________________________________________________ 64 | [MD] 65 | steps 100000 66 | 67 | ####################################################### 68 | {FEP} 69 | ####################################################### 70 | [MD] 71 | steps 10000 72 | 73 | -------------------------------------------------------------------------------- /workflow.md: -------------------------------------------------------------------------------- 1 | CADEE 0.9.1: Workflow 2 | ===================== 3 | 4 | #### Description 5 | CADEE's workslow is imitating directed evolution. Before one can start with directed evolution iterations, a parametrized EVB reaction has to be obtained. Once a the reference reaction has been established, one iteration after the other can be applied. Usually, this can be kicked off with an alanine scan (1, 2), followed up with analysis (3). The analysis will aid the user to select hotspots and start another round in-silico mutagenesis (4). 6 | 7 | 8 | ## Workflow 9 | #### Prerequisites: 10 | ##### i) PDB file generated with qprep 11 | ##### ii) FEP file for wild-type reaction 12 | ##### iii) Qprep-inputfile used to generate PDB file (see i) 13 | ##### iv) Path to the folder with the libraries used to generate the inputfiles. 14 | ##### v) Adjusting inputfiles: 15 | Next, the wildtype-reaction can be prepared: 16 | `cadee prep wt.pdb wt.fep qprep6.inp ./libraries` CADEE will provide information on how adjust your qprep-input file. These changes have to be implemented to continue; CADEE needs absolute paths to your library, for example. 17 | 18 | ### 1. Prepare and alanine scan for 16 mutants: 19 | ` cadee prep wt.pdb wt.fep qprep6.inp ./libraries --alascan --nummuts 16` 20 | This command created a folder (ala_scan) with subfolders containing the topology and fepfile: mutant.top, mutant.fep, qprep6.inp 21 | It will contain 16 subfolders, labelled in scheme 0XX_ALA, 1XX_ALA. 22 | 23 | 24 | #### 2: Submission of Jobs 25 | You might have to adjust the following instructions to fit the machine you are executing it. 26 | ``` 27 | MPI_ENVIRONMENT -n N cadee dyn /path/to/simpacks 28 | ``` 29 | MPI_ENVIRONMENT and N need to be adjusted to your system, and depend on your compiler and other settings. If you use a modern laptop Computer Ubuntu 16.04 with the recommended defaults, the mpiexec.mpich -n 3 might be the right choice. 30 | 31 | 32 | #### 3: Analysis of Results 33 | You can then analyse the cadee.db to generate either *csv or *html files. 34 | ``` 35 | cadee ana --help 36 | ``` 37 | 38 | #### 4: Next iteration: If you have SCWRL4 installed, you can also do arbitraty mutations, eg mutate residue 15 to glutamic: 39 | = requirements:SCWRL4 installed (see Installation Section) 40 | `cadee prep wt.pdb wt.fep qprep5.ipn ./libraries --libmut 15:E` 41 | This created a folder (libmut) with subfolders containing the topologies and fepfiles 42 | 43 | or even a saturation on 15 44 | `cadee prep wt.pdb wt.fep qprep6.inp ./libraries --libmut 15:SATURATE` 45 | 46 | the libmut argument is very powerful, here other options include: 47 | 48 | = SINGLE POINT MUTATION = 49 | --libmut 137:NEGATIVE (2AA) 50 | --libmut 137:UNCHARGED (4AA) 51 | --libmut 137:SHRINK (variable) 52 | --libmut 137:'CGP' (3AA) 53 | --libmut 137:'DEKCPG' (6AA) 54 | 55 | = COMBINATORIAL MULTI POINT MUTATION = 56 | --libmut 137:'DEKCPG' 138:'DEKCPG' (6AAx6AA=36AA) 57 | --libmut 137:'DEKCPG' 138:'DEKCPG' 139:SATURATE (6AAx6AAx20=720AA) 58 | 59 | #### Return to [readme.md](./readme.md) 60 | -------------------------------------------------------------------------------- /cadee/qscripts/q_setprot.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | # -*- coding: utf-8 -*- 3 | # 4 | # MIT License 5 | # 6 | # Copyright (c) 2016 Miha Purg 7 | # 8 | # Permission is hereby granted, free of charge, to any person obtaining a copy 9 | # of this software and associated documentation files (the "Software"), to deal 10 | # in the Software without restriction, including without limitation the rights 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | # copies of the Software, and to permit persons to whom the Software is 13 | # furnished to do so, subject to the following conditions: 14 | # 15 | # The above copyright notice and this permission notice shall be included in all 16 | # copies or substantial portions of the Software. 17 | # 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | # SOFTWARE. 25 | # 26 | # 27 | # 28 | # Takes as arguments: 29 | # - pdb structure 30 | # - file containing residue indexes that should be charged 31 | # (each in new line or space separated) 32 | # 33 | # Renames the ARG,GLU,ASP,LYS that should be neutral to ARN,GLH,ASH and LYN 34 | # and vice versa 35 | # 36 | 37 | import sys 38 | import os 39 | try: 40 | import argparse 41 | except ImportError: 42 | import lib.argparse as argparse 43 | from lib.common import backup_file 44 | 45 | parser = argparse.ArgumentParser() 46 | parser.add_argument("pdb", help="PDB structure file") 47 | parser.add_argument("resids", help="Text file with space or newline separated indexes of residues that should be IONIZED. All others will be set to their neutral form.") 48 | parser.add_argument("outfn", help="Output filename") 49 | 50 | if len(sys.argv) == 1: 51 | parser.print_help() 52 | sys.exit(1) 53 | 54 | args = parser.parse_args() 55 | 56 | if not os.path.lexists(args.pdb): 57 | print "FATAL! File %s doesn't exist." % args.pdb 58 | sys.exit(1) 59 | 60 | chres = ( "ARG", "GLU", "ASP", "LYS" ) # charged names 61 | nres = ( "ARN", "GLH", "ASH", "LYN" ) # neutral names 62 | 63 | pdb_lines = open(args.pdb, 'r').readlines() 64 | charged_residues = open(args.resids, 'r').read().split() 65 | 66 | new_pdb = "" 67 | 68 | for line in pdb_lines: 69 | ri = line[22:26].strip() 70 | rn = line[17:20] 71 | new_line="" 72 | 73 | if ri in charged_residues: 74 | if rn in nres: 75 | new_rn = chres[ nres.index(rn) ] 76 | print "Changing: %s%s to %s%s" % (rn,ri, new_rn, ri) 77 | new_line=line.replace(rn, new_rn) 78 | else: 79 | new_line=line 80 | else: 81 | if rn in chres: 82 | new_rn = nres[ chres.index(rn) ] 83 | new_line=line.replace(rn, new_rn) 84 | else: 85 | new_line = line 86 | new_pdb += new_line 87 | 88 | 89 | backup = backup_file(args.outfn) 90 | if backup: 91 | print "Backed up '%s' to '%s'" % (args.outfn, backup) 92 | 93 | open(args.outfn, 'w').write(new_pdb) 94 | print "Wrote " + args.outfn 95 | 96 | 97 | 98 | -------------------------------------------------------------------------------- /cadee/prep/config.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ Configuration Module 4 | Author: {0} ({1}) 5 | 6 | This module is part of CADEE, the framework for 7 | Computer-Aided Directed Evolution of Enzymes. 8 | """ 9 | 10 | 11 | from __future__ import print_function 12 | 13 | import logging 14 | 15 | __author__ = "Beat Amrein" 16 | __email__ = "beat.amrein@gmail.com" 17 | 18 | logger = logging.getLogger('prep.config') 19 | 20 | # ERROR/EXIT CODES 21 | ERR_USAGE = 1 22 | ERR_OUTPUTFOLDER_EXISTS = 2 23 | ERR_TOPO_GENERATION_WT = 3 24 | ERR_QPREP5_INEXISTENT = 4 25 | ERR_MKTOP_INEXISTENT = 5 26 | ERR_NO_BABEL = 6 27 | 28 | # CONSTANTS 29 | NLC = '\n' 30 | 31 | 32 | class SatLibs(object): 33 | """Saturation Libraries""" 34 | ALL = ['A', 'R', 'N', 'D', 'C', 'E', 'Q', 'G', 'H', 'I', 'L', 'K', 35 | 'M', 'F', 'P', 'S', 'T', 'W', 'Y', 'V'] 36 | NDT = ['F', 'L', 'I', 'V', 'Y', 'H', 'N', 'D', 'C', 'R', 'S', 'G'] 37 | SPECIAL = ['C', 'G', 'P'] 38 | HYDROPHOBIC = ['A', 'V', 'I', 'L', 'M', 'F', 'Y', 'W'] 39 | MINUS = ['D', 'E'] 40 | PLUS = ['R', 'H', 'K'] 41 | CHARGED = MINUS[:] 42 | CHARGED.extend(PLUS) 43 | NEUTRAL = ['S', 'T', 'N', 'Q'] 44 | POLAR = CHARGED[:] 45 | 46 | @staticmethod 47 | def get_lib(name): 48 | """ check if library with name exists, alternatively check 49 | if the provided library is fine 50 | 51 | @raises Exception if an AA code is used 2x, 52 | @raises Exception if an len(name) is <1, 53 | @raises Exception if invalid AA code is provided. 54 | 55 | """ 56 | # TODO: MAKE SURE THAT THE RESIDUES ARE ACTUALLY CHARGED CORRECTLY, 57 | # I.E. ASP => ASH etc... (check with propka) 58 | name = name.upper().strip() 59 | if name == 'ALL' or name == 'SATURATE': 60 | return SatLibs.ALL 61 | elif name == 'NDT': 62 | return SatLibs.NDT 63 | elif name == 'SPECIAL': 64 | return SatLibs.SPECIAL 65 | elif name == 'HYDROPHOBIC' or name == 'APOLAR': 66 | return SatLibs.HYDROPHOBIC 67 | elif name == 'CHARGED': 68 | return SatLibs.CHARGED 69 | elif name == 'CHARGED+' or name == 'POSITIVE': 70 | return SatLibs.PLUS 71 | elif name == 'CHARGED-' or name == 'NEGATIVE': 72 | return SatLibs.MINUS 73 | elif name == 'POLAR' or name == 'HYDROPHILE': 74 | return SatLibs.POLAR 75 | elif name == 'NEUTRAL' or name == 'UNCHARGED': 76 | return SatLibs.NEUTRAL 77 | else: 78 | customlib = [] 79 | for char in name: 80 | if char in SatLibs.ALL: 81 | if char in customlib: 82 | raise Exception('1-Letter Code used twice:', char) 83 | else: 84 | customlib.append(char) 85 | else: 86 | raise Exception('Invalid 1-Letter Code:', char) 87 | if len(customlib) < 1: 88 | raise Exception('Invalid: Empty Library', char) 89 | return customlib 90 | 91 | 92 | RESRENAME = [ 93 | ["HID", "HIS"], 94 | ["HIE", "HIS"], 95 | ["HIP", "HIS"], 96 | ["ASH", "ASP"], 97 | ["GLH", "GLU"], 98 | ["ARN", "ARG"], 99 | ["LYN", "LYS"], 100 | ["DNI", "ASP"], 101 | ["HNI", "HIS"] 102 | ] 103 | 104 | # residues compatible with scwrl4, only these residue will be scwrl-ed 105 | NATURAL_AA = ['ARG', 'HIS', 'LYS', 'ASP', 'GLU', 'SER', 'THR', 'ASN', 'GLN', 106 | 'CYS', 'GLY', 'PRO', 'ALA', 'VAL', 'ILE', 'LEU', 'MET', 'PHE', 107 | 'TYR', 'TRP'] 108 | 109 | # USER-DEFINED CONSTANTS 110 | SOLVENT_MOLECULES = ['WAT', 'HOH', 'H2O', 'T3P'] 111 | -------------------------------------------------------------------------------- /cadee/ana/export_to_csv.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | The script reads extracts a database produced by CADEE 5 | and exports the data in a CSV file for custom analysis. 6 | 7 | usage: python extract_to_csv_medium.py cadee.pdb medium.csv 8 | this will create your medium.csv file (spreadsheet) 9 | 10 | Author: {0} ({1}) 11 | 12 | This program is part of CADEE, the framework for 13 | Computer-Aided Directed Evolution of Enzymes. 14 | 15 | """ 16 | 17 | 18 | from __future__ import print_function 19 | import sys 20 | import os 21 | import sqlite3 22 | 23 | __author__ = "Beat Amrein, Miha Purg" 24 | __email__ = "beat.amrein@gmail.com, miha.purg@gmail.com" 25 | 26 | RUN_TYPE="us" 27 | 28 | def main(args, what='barr_forw'): 29 | """ 30 | :param args: ['cadee.db', 'output.csv'] *list of filenames*: 31 | :param what: 'barr_forw' (default) or 'exo' *string*: 32 | :return: void 33 | """ 34 | 35 | if len(args) != 3: 36 | print("Invalid arguments", args) 37 | print() 38 | print("Usage: \n " + os.path.basename(__file__) + " cadee.db output.csv") 39 | sys.exit(1) 40 | 41 | db=args[1] 42 | if not os.path.lexists(db ): 43 | print("File %s does not exist!" % db) 44 | sys.exit(1) 45 | 46 | outcsv=args[2] 47 | 48 | if os.path.exists(outcsv): 49 | print("File %s exists. Please remove it and retry." % outcsv) 50 | sys.exit(2) 51 | 52 | if what == 'barr_forw': 53 | print('Exporting Barrier') 54 | elif what == 'exo': 55 | print('Exporting deltaG') 56 | else: 57 | print("Unknown Column %s. Please use either 'barr_forw' or 'exo'", what) 58 | sys.exit(3) 59 | 60 | # connect and get values from DB 61 | conn = sqlite3.connect(db) 62 | cursor = conn.cursor() 63 | try: 64 | cursor.execute("SELECT mutant,replik,name,? FROM results WHERE feptype=?", (what, RUN_TYPE)) 65 | except sqlite3.DatabaseError as e: 66 | print("Error accessing the database: '%s' (%s)" % (db, e)) 67 | sys.exit(1) 68 | 69 | results = cursor.fetchall() 70 | conn.close() 71 | 72 | data = {} 73 | names = set() # filenames (1100_eq, 1200_eq), set to make a list of unique values from all the mutants (in case some are missing) 74 | 75 | maxreplik = 0 76 | # get WT averages 77 | for res in results: 78 | mutant, replik, name, barr = res 79 | if replik > maxreplik: 80 | maxreplik = replik 81 | mutant = mutant.split('_')[0] 82 | if mutant not in data: 83 | data[mutant] = {} 84 | if replik not in data[mutant]: 85 | data[mutant][replik] = {} 86 | data[mutant][replik][name] = barr 87 | names.add(name) 88 | 89 | csv = [] 90 | muts = sorted(data.keys()) 91 | csv.append("mutant;replik;" + ";".join(muts)) 92 | for name in sorted(list(names)): 93 | 94 | for replik in range(maxreplik+1): 95 | values = [] 96 | for mut in muts: 97 | i = data[mut].get(replik, {}) 98 | if len(i) == 0: 99 | values.append("") 100 | print('WARNING: empty replik', mut, replik) 101 | else: 102 | j = i.get(name, "") 103 | if j == "": 104 | values.append("") 105 | print('info: empty name', mut, replik, name) 106 | else: 107 | values.append(str(j)) 108 | csv.append(name + ";" + str(replik) + ";" + ";".join(values)) 109 | 110 | open(outcsv, "w").write("\n".join(csv)) 111 | print("Success... Wrote %s..." % outcsv) 112 | 113 | if __name__ == "__main__": 114 | main(sys.argv[1:]) 115 | -------------------------------------------------------------------------------- /cadee/prep/mergepdb.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """Merge PDB-Files 4 | 5 | Author: {0} ({1}) 6 | 7 | This program is part of CADEE, the framework for 8 | Computer-Aided Directed Evolution of Enzymes. 9 | """ 10 | 11 | 12 | from __future__ import print_function 13 | 14 | import logging 15 | import sys 16 | import time 17 | 18 | import tools as tools 19 | from config import NLC, ERR_USAGE, NATURAL_AA 20 | 21 | __author__ = "Beat Amrein" 22 | __email__ = "beat.amrein@gmail.com" 23 | 24 | logger = logging.getLogger('prep.mergepdb') 25 | 26 | 27 | # not used, using babel instead 28 | def get_sequence(pdbfile): 29 | """Return Aminoacid Sequence as list""" 30 | seq = [] 31 | oresnum = -1 32 | for line in open(pdbfile, 'r'): 33 | if line[:4] != 'ATOM': 34 | continue 35 | resnum = int(line[22:26]) 36 | if resnum != oresnum: 37 | seq.append(line[17:20]) 38 | oresnum = resnum 39 | return seq 40 | 41 | 42 | def mergepdb(oldq, scwrlpdb, newfile, pos): 43 | """ 44 | INPUT: 45 | oldq: pdbfile 46 | scwrlpdb: pdbfile 47 | pos: list with positions that were mutated 48 | 49 | OUTPUT: merged oldq/scwrlpdb 50 | 51 | LIMITATIONS: 52 | ONLY WORKS WHEN NOT_RESIDUES ARE AT THE END OF THE PDBFILES. 53 | """ 54 | 55 | ores = {} 56 | postfix = [] 57 | for line in tools.read_pdbatoms(oldq): 58 | 59 | # if line[11:14].strip()=='H': 60 | # print('skip proton') 61 | # continue 62 | 63 | resi = line[22:27] 64 | # resi=int(resi) 65 | 66 | if int(resi)-1 in pos: # mutated residue 67 | resn = None 68 | else: 69 | resn = line[17:20] 70 | if resn not in NATURAL_AA: 71 | postfix.append(line[:-1]) 72 | 73 | if not line[12:15] == ' N ': 74 | continue 75 | coords = line[31:54] 76 | ores[coords] = resn 77 | 78 | resnumnam = None 79 | newfile = open(newfile, 'w') 80 | for line in tools.read_pdbatoms(scwrlpdb): 81 | 82 | # kill hydrogen: 83 | if line.split()[-1] == "H": 84 | continue 85 | 86 | if line[12:15] == ' N ': 87 | coords = line[31:54] 88 | resnumnam = line[17:27] 89 | resn = ores[coords] 90 | if resnumnam == line[17:27] and (resn is not None): 91 | line = line[:17] + resn + line[20:-1] 92 | else: 93 | pass 94 | print (line.rstrip(), file=newfile, end=NLC) 95 | 96 | for fix in postfix: 97 | print (fix.rstrip(), file=newfile, end=NLC) 98 | 99 | newfile.close() 100 | 101 | # os.system('vimdiff {0} {1} {2}'.format(oldq, scwrlpdb, newfile)) 102 | 103 | if __name__ == "__main__": 104 | # Parse Command Line 105 | def usage(): 106 | """Print Usage and Exit""" 107 | print('') 108 | print('Usage:') 109 | print(' ' + sys.argv[0] + ' oldq.pdb nowq.pdb output.pdb rs1 [ res2 ... ]') # NOPEP8 110 | print('') 111 | sys.exit(ERR_USAGE) 112 | 113 | def get_residues(): 114 | """Return Arg4+ as list of integers""" 115 | positions = [] 116 | for pos in sys.argv[4:]: 117 | try: 118 | positions.append(int(pos)) 119 | except ValueError: 120 | usage() 121 | except TypeError: 122 | usage() 123 | 124 | if len(sys.argv) < 5: 125 | usage() 126 | 127 | START = time.time() 128 | 129 | if len(sys.argv) == 3: 130 | mergepdb(sys.argv[1], sys.argv[2], None, get_residues()) 131 | else: 132 | mergepdb(sys.argv[1], sys.argv[2], 133 | open(sys.argv[3], 'w'), get_residues()) 134 | 135 | print('time', time.time()-START) 136 | -------------------------------------------------------------------------------- /cadee/prep/create_template_based_simpack.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | This module generates Q-input files. 4 | 5 | Author: {0} ({1}) 6 | 7 | This program is part of CADEE, the framework for 8 | Computer-Aided Directed Evolution of Enzymes. 9 | """ 10 | 11 | 12 | from __future__ import print_function 13 | 14 | import logging 15 | import tarfile 16 | import os 17 | 18 | import tools 19 | 20 | __author__ = "Beat Amrein" 21 | __email__ = "beat.amrein@gmail.com" 22 | 23 | logger = logging.getLogger('prep.create_inputs') 24 | 25 | 26 | #TODO: Make fep/temperization settings user adjustable 27 | # fep settings 28 | fep = "0.5 1 0" # (seq restraints) 29 | 30 | # Temperization settings (seq restraints) 31 | dyn = [None]*9 32 | dyn[1] = ("200 1 0", "20 1 0") 33 | dyn[2] = ("200 1 0", "10 1 0") 34 | dyn[3] = ("20 1 0", "5 1 0") 35 | dyn[4] = ("20 1 0", "5 1 0") 36 | dyn[5] = ("20 1 0", "2 1 0") 37 | dyn[6] = ("5 1 0",) 38 | dyn[7] = ("0.5 1 0",) 39 | 40 | 41 | def _create_inputs(tartemplate): 42 | def rewrite_res_section(base, fil=None): 43 | # for line in inpfil: 44 | # if line.strip() == '[Sequence_Restraints]': 45 | # break 46 | # print(line, end='', file=fil) 47 | # print('[Sequence_Restraints]', file=fil) 48 | if "_dyn" in base and '08_dyn' not in base: 49 | idx = int(base[:2]) 50 | print(" {1} {2} {0}".format( 51 | dyn[idx][0], *solute_range), file=fil) 52 | if len(dyn[idx]) == 2: 53 | print(" {1} {2} {0}".format( 54 | dyn[idx][1], *solvent_range), file=fil) 55 | else: 56 | for feprange in reversed(fepranges): 57 | print(" {1} {2} {0}".format( 58 | fep, *feprange), file=fil) 59 | print('', file=fil) 60 | print('', file=fil) 61 | fil.close() 62 | 63 | pdbfil = 'mutant.pdb' 64 | fepfil = 'mutant.fep' 65 | 66 | if not os.path.isfile(pdbfil): 67 | errmsg = 'PDBFile {0} does not exist: {1}'.format(pdbfil, os.getcwd()) 68 | raise Exception(errmsg) 69 | 70 | if not os.path.isfile(fepfil): 71 | errmsg = 'FEPFile {0} does not exist: {1}'.format(fepfil, os.getcwd()) 72 | raise Exception(errmsg) 73 | 74 | # determine the ranges for solute, solvent, and fepatoms 75 | solute_range, solvent_range = tools.get_solute_and_solvent_ranges(pdbfil) 76 | indexes = tools.get_fep_atom_pdbindexes(fepfil) 77 | fepranges = tools.get_ranges(indexes) 78 | 79 | tartemplate.extractall() 80 | for inp in os.listdir('.'): 81 | if inp[-4:] == ".inp": 82 | if "_eq" in inp or "_fep" in inp or "_dyn" in inp: 83 | base = os.path.basename(inp) 84 | rewrite_res_section(base, open(inp, 'a')) 85 | 86 | 87 | def walk(folder, tartemplate): 88 | """ Walk trough subfolders of "folder" and create inputs based on "tartemplate" 89 | :param: folder string 90 | :param: tartemplate tarfile 91 | """ 92 | 93 | wd = os.getcwd() 94 | os.chdir(folder) 95 | for fol in os.listdir('.'): 96 | os.chdir(folder) 97 | logger.info('Creating input files for %s .', fol) 98 | try: 99 | os.chdir(fol) 100 | except: 101 | continue 102 | try: 103 | _create_inputs(tartemplate) 104 | except Exception as e: 105 | # TODO: Errorhandling 106 | logger.warning('Exception %s happened in %s .', e, fol) 107 | raise 108 | os.chdir(wd) 109 | 110 | 111 | def main(folder, templatetar=None): 112 | 113 | if templatetar is None: 114 | templatetar = os.path.join(os.path.dirname(os.path.realpath(__file__)), 115 | '../lib/simpack_template_12ns_100ps_8000ps_4160ps.tar.bz2') 116 | 117 | if not os.path.isfile(templatetar): 118 | raise Exception('Template-Tar does not exist:', templatetar) 119 | 120 | tartemplate = tarfile.open(templatetar, mode='r:bz2') 121 | 122 | walk(folder, tartemplate) 123 | 124 | 125 | 126 | -------------------------------------------------------------------------------- /cadee/qscripts/q_pdbindex.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | # -*- coding: utf-8 -*- 3 | # 4 | # MIT License 5 | # 6 | # Copyright (c) 2016 Miha Purg 7 | # 8 | # Permission is hereby granted, free of charge, to any person obtaining a copy 9 | # of this software and associated documentation files (the "Software"), to deal 10 | # in the Software without restriction, including without limitation the rights 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | # copies of the Software, and to permit persons to whom the Software is 13 | # furnished to do so, subject to the following conditions: 14 | # 15 | # The above copyright notice and this permission notice shall be included in all 16 | # copies or substantial portions of the Software. 17 | # 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | # SOFTWARE. 25 | # 26 | # 27 | # 28 | # changes the placeholders inside template files ("$593.CA$") to pdb indexes ("1123") 29 | # takes in three arguments: PDB (after qprep), file containing the placeholders (q_makefep.py generated FEP file, input templates), output filename 30 | # extra keyword that can be used instead of the placeholder is 'LAST.ID' (no explanation needed) 31 | 32 | import re 33 | from lib.common import backup_file 34 | 35 | PLACEHOLDER_RE = re.compile("\$\S+\.\S+\$") 36 | 37 | def convertToIndexes(inputstring, pdbfile, ignore_comments=True): 38 | atoms = {} 39 | 40 | with open(pdbfile, 'r') as f: 41 | for line in f.readlines(): 42 | if line.startswith("ATOM") or line.startswith("HETATM"): 43 | index = line[6:12].strip() 44 | pdb_id = line[22:26].strip() + "." + line[12:17].strip() 45 | atoms[pdb_id] = index 46 | 47 | last_pdbid = pdb_id 48 | 49 | outputstring="" 50 | for line in inputstring.split("\n"): 51 | comment="" 52 | if ignore_comments and "#" in line: 53 | i = line.index("#") 54 | line,comment = line[:i], line[i:] 55 | 56 | c = findPlaceholders(line) 57 | for pid in c: 58 | pid = pid.strip("$") 59 | pid2 = pid.replace("LAST.ID", last_pdbid) 60 | try: 61 | padding = (len(pid2)+2 - len(atoms[pid2])) * " " 62 | except KeyError: 63 | raise KeyError("Atom '$%s$' does not exist in the pdb structure." % pid2) 64 | line = re.sub("\$" + pid + "\$", atoms[pid2] + padding, line) 65 | 66 | outputstring += line + comment + "\n" 67 | return outputstring 68 | 69 | def findPlaceholders(inputstring): 70 | return PLACEHOLDER_RE.findall(inputstring) 71 | 72 | 73 | if __name__ == "__main__": 74 | 75 | import sys 76 | import os 77 | try: 78 | import argparse 79 | except ImportError: 80 | import lib.argparse as argparse 81 | 82 | parser = argparse.ArgumentParser() 83 | parser.add_argument('pdb', help = 'pdb structure file (created with qprep)') 84 | parser.add_argument('inp', help = 'input/fep file containing the placeholders') 85 | parser.add_argument('out', help = 'output filename') 86 | 87 | if len(sys.argv) < 3: 88 | parser.print_help() 89 | sys.exit(1) 90 | 91 | args = parser.parse_args() 92 | 93 | for k,v in vars(args).iteritems(): 94 | if k in ['inp', 'pdb'] and not os.path.lexists(v): 95 | print "FATAL! File %s doesn't exist." % v 96 | sys.exit(1) 97 | 98 | inpstr = open(args.inp, "r").read() 99 | 100 | try: 101 | outstring = convertToIndexes(inpstr,args.pdb) 102 | except KeyError as e: 103 | print "FATAL! Exception raised: %s" % str(e) 104 | sys.exit(1) 105 | 106 | backup = backup_file(args.out) 107 | if backup: 108 | print "Backed up '%s' to '%s'" % (args.out, backup) 109 | open(args.out, 'w').write(outstring) 110 | print "Created file '%s'..." % args.out 111 | 112 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | """ 4 | CADEE: Computer-Aided Directed Enzyme Evolution 5 | 6 | To install CADEE type: "python setup.py install" 7 | or for a single user "python setup.py install --user" 8 | 9 | Author: {0} ({1}) 10 | 11 | This program is part of CADEE, the framework for 12 | Computer-Aided Directed Evolution of Enzymes. 13 | 14 | """ 15 | 16 | from setuptools import setup 17 | 18 | import os 19 | import sys 20 | from glob import glob 21 | 22 | __author__ = "Beat Amrein" 23 | __email__ = "beat.amrein@gmail.com" 24 | 25 | execfile('cadee/version.py') 26 | 27 | import cadee.executables.exe as exe 28 | 29 | print('Welcome to CADEE Pre-Setup Check.') 30 | print() 31 | 32 | QEXES=['Qdyn6', 'Qprep6', 'Qfep6', 'Qcalc6'] 33 | qexedir=os.path.join(os.path.dirname(os.path.abspath(__file__)), 'cadee/executables/q/') 34 | 35 | def installation_failed(): 36 | """ 37 | Abort the setup/installation process. 38 | """ 39 | print('') 40 | print('CADEE installation has failed.') 41 | import sys 42 | sys.exit(1) 43 | 44 | def q_missing(exe): 45 | """ 46 | Print message about the missing Q6-executable and call installation_failed. 47 | """ 48 | print() 49 | print('ERROR: Could not find {0}. Please install Q6.'.format(exe)) 50 | print("") 51 | print(' Copy the binaries to {0}.'.format(qexedir)) 52 | print(' -OR-') 53 | print(' Ensure the binaries are in $PATH.'.format(exe)) 54 | print() 55 | print(' Q6 can be obtained free of charge from {0}.'.format('https://github.com/qusers/Q6')) 56 | installation_failed() 57 | 58 | if not (sys.version_info[0] == 2 and sys.version_info[1] == 7): 59 | print('Need Python 2.7') 60 | installation_failed() 61 | 62 | # There are many versions of executables named Qdyn6. 63 | # CADEE should stick to one version, so include with the cadee installation. 64 | for qexe in QEXES: 65 | if not exe.which(qexe, True): 66 | 67 | print('Warning: Could not find {0} in {1}'.format(qexe, qexedir)) 68 | print(' Searching in $PATH...') 69 | print() 70 | if exe.which(qexe): 71 | print(' Found {0} in {1}.'.format(qexe, exe.which(qexe))) 72 | print() 73 | print(' Will now copy {0} to {1}.'.format(qexe, qexedir)) 74 | 75 | # compatibility of python3 and python2 76 | try: 77 | input = raw_input 78 | except NameError: 79 | pass 80 | ans = input(' Proceed (y/N)?').lower() 81 | 82 | if ans == 'y': 83 | print(' Proceed.') 84 | import shutil 85 | shutil.copy2(exe.which(qexe), qexedir) 86 | print('cped {0} to {1}.'.format(exe.which(qexe), qexedir)) 87 | else: 88 | print(' Abort.') 89 | print(' Fatal: Cannot continue installation without {0}'.format(qexe)) 90 | q_missing(qexe) 91 | else: 92 | print('Fatal: Could not find {0} in $PATH.'.format(qexe)) 93 | q_missing(qexe) 94 | 95 | if not exe.which('babel'): 96 | print('ERROR: Could not find babel. Please install Open Babel and ensure the binaries are in $PATH.') 97 | print(' Using Ubuntu try: sudo apt-get install openbabel') 98 | print(' for Homebrew try: brew install open-babel') 99 | installation_failed() 100 | 101 | if not exe.which('Scwrl4'): 102 | print('ERROR: Scwrl4 is missing. Please install Scwrl4 and make sure the binary is in $PATH.') 103 | print() 104 | print(' Scwrl4 can be obtained from {0} (free for non-commercial use).'.format('http://dunbrack.fccc.edu/scwrl4/')) 105 | installation_failed() 106 | 107 | 108 | setup(name='cadee', 109 | version=__version__, 110 | description='Computer Aided Directed Evolution of Enzymes', 111 | url='http://github.com/kamerlinlab/cadee', 112 | author='Beat Anton Amrein', 113 | author_email='beat.amrein@gmail.com', 114 | license='GPLv2', 115 | packages=['cadee', 'cadee.ana', 'cadee.dyn', 'cadee.prep', 'cadee.executables', 'cadee.qscripts', 'cadee.tools' ], 116 | py_modules=['cadee'], 117 | package_data={'cadee': ['lib/*', 'qscripts/lib/*', 'qscripts/REAMDE.md', 'qscripts/LICENSE.txt', 'executables/q/q*', 'tools/*', 'version.py']}, 118 | install_requires=[ 119 | ['mpi4py==1.3.1'], 120 | ['numpy'], 121 | ], 122 | scripts={ 123 | 'cadee/cadee', 124 | }, 125 | zip_safe=False) 126 | -------------------------------------------------------------------------------- /cadee/qscripts/template_examples/genrelax_minim.proc: -------------------------------------------------------------------------------- 1 | # Example input for q_genrelax.py 2 | # 3 | # There are three sections in curly braces: 4 | # SCRIPT_VARS - contains user defined constants that will be replaced with their values if they appear in this input. Useful for seq.restr. Example: RESTRAINT1 5993 6003 5 | # try to make them unique and descriptive, not something like MD (that would suck for you) 6 | # GENERAL - qdyn5 parameters, used in all steps. 7 | # STEPS - qdyn5 parameters for each relaxation step, separated by "________________". These values override the ones defined in the GENERAL section. 8 | # 9 | # The files section is automatically generated for each step 10 | # The restraints are overriden 11 | # Atom indices can be specified with placeholders $RESID.ATOMNAME$ 12 | # 13 | # Procedure: 14 | # Heating of solvent with solute heavy atoms restrained, 15 | # cooling of solvent, heating the system to 300K and releasing restraints. 16 | 17 | ####################################################### 18 | {SCRIPT_VARS} 19 | ####################################################### 20 | SOLUTE $1.N$ $315.H14$ 21 | 22 | ####################################################### 23 | {GENERAL} 24 | ####################################################### 25 | [MD] 26 | stepsize 1 27 | temperature 300 28 | bath_coupling 100 29 | separate_scaling on 30 | lrf on 31 | 32 | [cut-offs] 33 | solute_solute 10 34 | solute_solvent 10 35 | solvent_solvent 10 36 | q_atom 99 37 | 38 | [lambdas] 39 | 1.00 0.00 40 | 41 | [intervals] 42 | non_bond 30 43 | output 1000 44 | trajectory 1000 45 | temperature 1000 46 | 47 | [distance_restraints] 48 | # distance restraints here 49 | 50 | [angle_restraints] 51 | # angle restraints here 52 | 53 | 54 | ####################################################### 55 | {STEPS} 56 | ####################################################### 57 | 58 | [MD] 59 | steps 10000 60 | stepsize 0.00001 61 | temperature 0.01 62 | bath_coupling 0.0001 63 | random_seed -1 # <1 will generate a random random_seed 64 | initial_temperature 1 65 | shake_solvent off 66 | 67 | [sequence_restraints] 68 | SOLUTE 200 0 0 0 69 | _____________________________________________________________ 70 | 71 | [MD] 72 | steps 10000 73 | stepsize 0.0001 74 | temperature 0.01 75 | bath_coupling 0.001 76 | shake_solvent off 77 | 78 | [sequence_restraints] 79 | SOLUTE 200 0 0 0 80 | _____________________________________________________________ 81 | 82 | [MD] 83 | steps 10000 84 | stepsize 0.01 85 | temperature 0.01 86 | bath_coupling 0.1 87 | shake_solvent off 88 | 89 | [sequence_restraints] 90 | SOLUTE 200 0 0 0 91 | _____________________________________________________________ 92 | 93 | [MD] 94 | steps 10000 95 | stepsize 0.1 96 | temperature 1 97 | bath_coupling 1 98 | shake_solvent off 99 | 100 | [sequence_restraints] 101 | SOLUTE 200 0 0 0 102 | _____________________________________________________________ 103 | 104 | [MD] 105 | steps 10000 106 | temperature 100 107 | bath_coupling 10 108 | 109 | [sequence_restraints] 110 | SOLUTE 200 0 0 0 111 | _____________________________________________________________ 112 | 113 | [MD] 114 | steps 10000 115 | temperature 300 116 | 117 | [sequence_restraints] 118 | SOLUTE 200 0 0 0 119 | _____________________________________________________________ 120 | 121 | [MD] 122 | steps 10000 123 | temperature 5 124 | 125 | [sequence_restraints] 126 | SOLUTE 200 0 0 0 127 | _____________________________________________________________ 128 | 129 | [MD] 130 | steps 10000 131 | temperature 5 132 | 133 | [sequence_restraints] 134 | SOLUTE 20 0 0 0 135 | _____________________________________________________________ 136 | 137 | [MD] 138 | steps 10000 139 | temperature 100 140 | 141 | [sequence_restraints] 142 | SOLUTE 20 0 0 0 143 | _____________________________________________________________ 144 | 145 | [MD] 146 | steps 30000 147 | 148 | [sequence_restraints] 149 | SOLUTE 2 0 0 0 150 | -------------------------------------------------------------------------------- /cadee/prep/clash.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """Module to locate Clashes in a pdbfile. 4 | 5 | Author: {0} ({1}) 6 | 7 | This program is part of CADEE, the framework for 8 | Computer-Aided Directed Evolution of Enzymes. 9 | """ 10 | 11 | 12 | from __future__ import print_function 13 | 14 | import logging 15 | import sys 16 | 17 | __author__ = "Beat Amrein" 18 | __email__ = "beat.amrein@gmail.com" 19 | 20 | logger = logging.getLogger('prep.clash') 21 | 22 | BACKBONE = ['H', 'N', 'O', 'C', 'CA'] 23 | IGNORE_BB = True 24 | IGNORE_H = True 25 | 26 | RADIUS = 2.5 # Default: 2.5 A. 27 | 28 | 29 | def clash_score(distance): 30 | """ 31 | Return clash_score offset equlibrium distance^6 (distance^6). 32 | """ 33 | if distance >= RADIUS: 34 | return 0 35 | ooe = (RADIUS-distance) 36 | return ooe**6 37 | 38 | 39 | def clashscore_and_residues(resnums, pdblines): 40 | """ 41 | Input: resids: List of residues 42 | Input: pdblines: List of ATOM-records OR filename 43 | 44 | Output: List of residues clashing with those indicated in resids 45 | """ 46 | 47 | logfile = open('clashscore', 'a') 48 | 49 | if not isinstance(pdblines, list): 50 | import tools 51 | pdblines = tools.read_pdbatoms(pdblines) 52 | 53 | if isinstance(resnums, (int, str)): 54 | resnums = [int(resnums)] 55 | 56 | score = 0. 57 | clashes = [] 58 | for resnum in resnums: 59 | incr_score, new_clashes = clashes_of_resnum(pdblines, 60 | resnum, logfile=logfile) 61 | score += incr_score 62 | clashes.extend(new_clashes) 63 | 64 | clashes = list(set(clashes)) 65 | 66 | logfile.write(str(clashes)) 67 | logfile.write(str(score)) 68 | 69 | return score, clashes 70 | 71 | 72 | def get_resnum_coords(resnum, pdblines): 73 | """ 74 | input: resnum: residue number 75 | pdblines: list of ATOM entries 76 | return: list of [x,y,z] coordinate 77 | """ 78 | import cadee.prep.tools as tools 79 | coords_a = [] 80 | anames_a = [] 81 | for line in pdblines: 82 | try: 83 | rnum = line[22:28] 84 | rnum = int(rnum) 85 | if IGNORE_BB and tools.is_backbone(line): 86 | continue 87 | if IGNORE_H and tools.is_hydrogen(line): 88 | continue 89 | if rnum == resnum: 90 | aname = line[13:17].strip() 91 | coords_a.append(tools.get_coords(line)) 92 | anames_a.append(aname) 93 | except Exception as e: 94 | logger.fatal('Fatal: Exception in line %s', line) 95 | for line in pdblines: 96 | logger.fatal(line.strip()) 97 | logger.fatal('Exception was: %s, %s, %s', e, e.args, e.message) 98 | raise e 99 | return coords_a, anames_a 100 | 101 | 102 | def clashes_of_resnum(pdblines, resnum, logfile=None): 103 | """ 104 | @param pdblines: list of pdbfile ATOM-records 105 | @param resnum: integer 106 | 107 | @return: score, residues 108 | """ 109 | import cadee.prep.tools as tools 110 | 111 | coords_a, anames_a = get_resnum_coords(resnum, pdblines) 112 | clashing_resids = [] 113 | 114 | score = 0. 115 | for line in pdblines: 116 | if IGNORE_H and tools.is_hydrogen(line): 117 | continue 118 | coord_b = tools.get_coords(line) 119 | for coord in coords_a: 120 | dist = tools.euklid_dist(coord, coord_b) 121 | if tools.euklid_dist(coord, coord_b) < RADIUS: 122 | if int(line[22:28]) != resnum: 123 | score += clash_score(dist) 124 | logger.debug('found clash: %s with resid: %s atom: %s dist: %s', line[:28], resnum, anames_a[coords_a.index(coord)], round(dist, 3)) # NOPEP8 125 | clashing_resids.append(int(line[23:26])) 126 | return score, list(set(clashing_resids)) 127 | 128 | 129 | def main(pdbfile, resnums): 130 | """Return clash-scores of resnums in pdbfile 131 | @param pdbfile: path to pdbfile 132 | @param resnums: int or list of ints 133 | """ 134 | import cadee.prep.tools as tools 135 | print(clashscore_and_residues(resnums, tools.read_pdbatoms(pdbfile))) 136 | 137 | if __name__ == "__main__": 138 | def usage(): 139 | """ Print usage info and exit""" 140 | print("Usage:") 141 | print(sys.argv[0], ' pdbfile residue-number ') 142 | sys.exit() 143 | 144 | if len(sys.argv) != 3: 145 | usage() 146 | main(sys.argv[1], sys.argv[2]) 147 | -------------------------------------------------------------------------------- /simpack_templates/readme.md: -------------------------------------------------------------------------------- 1 | SIMPACK TEMPLATES 2 | ================= 3 | 4 | In the context of CADEE, a simpack (SIMulation PACKage) is a collection of files (_pack_) needed to perform a warmup, equilibration and 5 | a free energy perturbation. The files included in a simpack are systematically named. This system is here explained in this readme. 6 | 7 | # Basic Structure 8 | A simpack is a tarball containing files only (no directories). 9 | 10 | The file names have their purpose encoded. See further down in the document for more information. 11 | 1. Temperization Input Files __(0*.inp)__. 12 | 2. Equlibration Files __(*eq.inp)__. 13 | 3. FEP Input Files __(*fep.inp)__. 14 | 4. EVB Analysis Files __(*.qana)__. 15 | 5. A readme file __(readme)__ (optional). 16 | 17 | Additionally all prepared simpacks ('cadee prep') contain: 18 | 6. A Topology for Q __(mutant.top)__ and - not strictly required - a pdbfile __(mutant.pdb)__. 19 | 7. A FEP-file for Q __(mutant.fep)__ 20 | 21 | Once, a simpack has been run ('cadee dyn'), the simpack contains output files: 22 | 8. Log Files generated by Qdyn6 __(0*.log)__, __(*eq.log)__ and __(*fep.log)__. 23 | *Note*: Logs are usually compressed and end then with __(*.log.gz)__. 24 | 9. Restart Files __(0*.re)__, __(*eq.re)__ and __(*fep.re)__. 25 | 10. Energy Files generated by Qdyn6 __(*fep.en)__ and also __(*eq.en)__ . 26 | *Note*: Energyfiles may be compressed and end then with __(*.en.gz)__. 27 | 11. dcd files, containing the compressed trajectory, __(*.dcd)__. 28 | 29 | Often, additional files are included. To avoid interference with CADEE, it is absolutely __CRUCIAL to avoid filenames ending on *.inp__. 30 | Because these could easily be mistaken as Q-input files. 31 | 32 | ##1 Temperization Input Files 33 | The user is advised to add or remove file without adjusting *ceate_template_based_simpack.py*. 34 | 35 | CADEE expects 8 files, named this way: 36 | __01_dyn_seed.inp__: 37 | The first file to be run. Contains the 'seed' keyword. Very short timestep (0.01fs). 38 | __02_dyn_no_shake.inp__: 39 | Disabled Shake short timestep (0.1fs). 40 | __03_dyn_warm_1.inp__: 41 | Warm to 1K. Normal timestep (1fs). 42 | __04_dyn_warm_150.inp__: 43 | Warming the system to 150K. 44 | __05_dyn_warm_300.inp__: 45 | The firth file, warming the system to 300K. 46 | __06_dyn_rls_water.inp__: 47 | Removing the sequence restraint on the water. 48 | __07_dyn_rls_protein1.inp__: 49 | Lowering the sequence restraint on protein. 50 | __08_dyn_rls_protein2.inp__: 51 | Removing the sequence restraint on the protein. 52 | 53 | 54 | ##2 Equlibration Input Files 55 | To avoid confusion all files that are not part of the temperization, start with a 4-digit number. The default simpack for example, 56 | the first equlibration lies in the file __1000_eq.inp__. The second equlibration is named __1010_eq.inp__. The third __1020_eq.inp__ etc. 57 | 58 | ##3 FEP Input Files 59 | To run EVB simulations, FEP files are needed. In the default simpack, every 20th equlibration, is followed by a medium sized FEP calculation, consisting of 26 windows. 60 | __1190_fep.inp__, __1200_fep.inp__, ..., __1450_fep.inp__ 61 | 62 | ##4 Q-Analysis Files 63 | To automatically map the EVB calculations, files ending on *.qana can be used. A simpack without *.qana files IS NOT MAPPED BY CADEE. 64 | A *.qana file contains the names of the energy files (without extension) in the __ordered by lambda__. 65 | An example file content of a qana could be: 66 | 1450_fep 1440_fep 1430_fep 1420_fep 1410_fep 1400_fep 1390_fep 1380_fep 1370_fep 1360_fep 1350_fep 1340_fep 1330_fep 1190_eq 1200_fep 1210_fep 1220_fep 1230_fep 1240_fep 1250_fep 1260_fep 1270_fep 1280_fep 1290_fep 1300_fep 1310_fep 1320_fep 67 | 68 | ##5 readme 69 | The readme is optional and should contain information for the user about the simpack. 70 | 71 | ##6 mutant.top / mutant.pdb 72 | The __topology__ is required to run any MD simulation. It will be added to the simpack by cadee prep. 73 | By default, the corresponding __pdb-file (mutant.pdb)__ is included, too. 74 | 75 | ##7 mutant.fep 76 | The __fepfile__ is required to compute EVB. It will be added to the simpack by cadee prep. 77 | 78 | 79 | ##8 Log Files 80 | Are output generated by Qdyn6 and once a step has been computed successfully, logfiles are usually compressed, ending with __(.log.gz)__. 81 | 82 | ##9 Restart Files 83 | Restart files contain coordinates and velocities and are used to continue the simulation from a given point. 84 | 85 | ##10 Energy Files 86 | Are generated by Qdyn6 __(*fep.en)__. *Note*: Energyfiles may be compressed and end then with __(.en.gz)__. 87 | 88 | ##11 DCD Files 89 | The dcd files contain the 'movie' of a simulation and can for example be viewed with VMD. To do so, the mutant.pdb is need, too. 90 | 91 | 92 | 93 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | CADEE 0.9.1 2 | =========== 3 | 4 | the framework for **C**omputer-**A**ided **D**irected **E**nzyme **E**volution. 5 | 6 | #### Description 7 | CADEE is a framework for computational directed evolution of enzymes. The methodology is described _en detail_ in the our publication [CADEE: Computer-Aided Directed Evolution of Enzymes](https://doi.org/10.1107/S2052252516018017). CADEE is particularly powerful for computational modeling of SN2-like reactions in an MM/EVB framework, but the approach can be used to model any enzymatic reactions. 8 | 9 | 10 | 11 | ## System Requirements 12 | - *nix like OS (we use GNU/Linux) 13 | - CPU: 1 or more core-weeks per mutant 14 | - Storage: 6-12 GB per mutant 15 | - Software: `sudo apt-get install gfortran openmpi-bin git openbabel mpich gcc python2.7 python-pip` 16 | - Python Version 2.7 17 | - Fortran Compiler (e.g. gfortran) 18 | - C Compiler (e.g. gcc) 19 | - OpenMPI Compiler & Launcher 20 | - Open Babel 21 | - git (optional) 22 | - pip (optional) 23 | - [SLURM](https://slurm.schedmd.com/) (optional, scheduler for super computers)* 24 | 25 | \* CADEE was written specifically to allow efficient usage of a cluster computer. It is not required to run CADEE on a cluster and for example preparation and analysis are best performed on a laptop computer. However, because enzymatic reactions have many degrees of freedom, the simulation of enzymatic reactions require considerable conformational sampling to allow for meaningful results. Therefore, we developed and tested CADEE on cluster computers which were coincidentally running SLURM. CADEE should be compatible with other schedulers but this remains untested to this day. 26 | 27 | # Installation 28 | 29 | ### External Executables: 30 | Both, CADEE and the the MD engine that CADEE is relying on [**Q6**](https://doi.org/10.1016/j.softx.2017.12.001) are free and opensource software licensed under the GPL v2. However, CADEE requires for some functionality __SCWRL4__, which is proprietary altough licensed free to academic users (research purposes). 31 | 32 | For mutations other than wild-type testing and alanine-scans, CADEE requires SCWRL4. 33 | - Q: 34 | https://github.com/qusers/Q6 35 | Download and compile Q. Optionally install to $PATH. 36 | - SCWRL4: 37 | Obtain a License for SCWRL4: (Free for Non-Commercial use) 38 | http://dunbrack.fccc.edu/scwrl4/license/index.html 39 | Download and install, make sure executable is in $PATH. 40 | 41 | Note: If you are not interested in the --libmut functionality of CADEE, you may place a dummy 42 | "Scwrl4" executable/script into your $PATH. This will obviously break the --libmut option. 43 | - Open Babel: Open Babel is a free program that can be installed with the os. The command in the 'System Requirements' section will install Open Babel automatically. 44 | 45 | 46 | ### Download and Install CADEE: 47 | We recommend using git to install CADEE, so that future releases of CADEE are easily accessible. 48 | 49 | Here the suggested steps: 50 | 51 | 52 | 53 | Clone into CADEE: 54 | 55 | `git clone https://github.com/kamerlinlab/cadee cadee` 56 | 57 | Install CADEE: 58 | 59 | `python setup.py install --user` 60 | 61 | 62 | ## Workflow 63 | See the [workflow](./workflow.md) document. 64 | 65 | ## How to Cite this Work 66 | The development of CADEE is mainly funded by academic research grants. To help 67 | us fund development, we humbly ask that you cite the CADEE paper: 68 | 69 | * CADEE: Computer-Aided Directed Evolution of Enzymes 70 | Amrein BA, Steffen-Munsberg F, Szeler I, Purg M, Kulkarni Y & Kamerlin SCL. 71 | IUCrJ 4, 50-64 (2017) 72 | DOI: [https://doi.org/10.1107/S2052252516018017](https://doi.org/10.1107/S2052252516018017) 73 | 74 | ## Disclaimer 75 | 76 | As this software was made in part by people employed by Georgia Tech University we must also clarify: “The software is provided “as is.” Neither the Georgia Institute of Technology nor any of its units or its employees, nor the software developers of CADEE or any other person affiliated with the creation, implementation, and upkeep of the software’s code base, knowledge base, and servers (collectively, the “Entities”) shall be held liable for your use of the platform or any data that you enter. The Entities do not warrant or make any representations of any kind or nature with respect to the System, and the Entities do not assume or have any responsibility or liability for any claims, damages, or losses resulting from your use of the platform. None of the Entities shall have any liability to you for use charges related to any device that you use to access the platform or use and receive the platform, including, without limitation, charges for Internet data packages and Personal Computers. THE ENTITIES DISCLAIM ALL WARRANTIES WITH REGARD TO THE SERVICE,INCLUDING WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE TO THE FULLEST EXTENT ALLOWED BY LAW.” 77 | 78 | -------------------------------------------------------------------------------- /cadee/qscripts/lib/plotdata.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | # -*- coding: utf-8 -*- 3 | # 4 | # MIT License 5 | # 6 | # Copyright (c) 2016 Miha Purg 7 | # 8 | # Permission is hereby granted, free of charge, to any person obtaining a copy 9 | # of this software and associated documentation files (the "Software"), to deal 10 | # in the Software without restriction, including without limitation the rights 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | # copies of the Software, and to permit persons to whom the Software is 13 | # furnished to do so, subject to the following conditions: 14 | # 15 | # The above copyright notice and this permission notice shall be included in all 16 | # copies or substantial portions of the Software. 17 | # 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | # SOFTWARE. 25 | # 26 | # 27 | # 28 | 29 | 30 | try: 31 | from collections import OrderedDict as ODict 32 | except ImportError: 33 | import lib.OrderedDict as ODict 34 | import json 35 | import sys 36 | 37 | 38 | class PlotDataJSONEncoder(json.JSONEncoder): 39 | def default(self, obj): 40 | if isinstance(obj, PlotData): 41 | return { "__type__": "PlotData", 42 | "title": obj.title, 43 | "plot_type": obj.plot_type, 44 | "xlabel": obj.xlabel, 45 | "ylabel": obj.ylabel, 46 | "subplots": obj.subplots } 47 | else: 48 | return json.JSONEncoder.default(self, obj) 49 | 50 | 51 | class PlotDataJSONDecoder(json.JSONDecoder): 52 | def __init__(self): 53 | if sys.version_info < (2,7): 54 | # object_pairs_hook is supported only in version 2.7 55 | print "You need python 2.7 or later to run this script, sorry (it's json's fault)!" 56 | sys.exit(1) 57 | super(PlotDataJSONDecoder, self).__init__(object_pairs_hook=self.decode_plotdata) 58 | 59 | def decode_plotdata(self, d): 60 | d = ODict(d) 61 | if "__type__" not in d: 62 | return d 63 | t = d["__type__"] 64 | if t == "PlotData": 65 | pd = PlotData(d["title"], d["plot_type"], d["xlabel"], d["ylabel"]) 66 | pd.subplots = d["subplots"] 67 | return pd 68 | else: 69 | return d 70 | 71 | 72 | class PlotData(object): 73 | def __init__(self, title, plot_type="line", 74 | xlabel=None, ylabel=None): 75 | self.title = title 76 | PLOT_TYPES = ["line", "bar"] 77 | if plot_type not in PLOT_TYPES: 78 | raise ValueError("'plot_type' %s not supported. Try one of these instead: %s" % (plot_type, ",".join(PLOT_TYPES)) ) 79 | self.plot_type = plot_type 80 | self.xlabel = xlabel 81 | self.ylabel = ylabel 82 | self.subplots = ODict() 83 | 84 | def add_subplot(self, label, xdata, ydata, yerror=None): 85 | self.subplots[label] = { "xdata": xdata, "ydata": ydata, "yerror": yerror } 86 | 87 | def export_grace(self): 88 | if self.plot_type == "line": 89 | typ = "xy" 90 | elif self.plot_type == "bar": 91 | typ = "bar" 92 | legends = self.subplots.keys() 93 | # creates this: 94 | # @s0 legend "rep_000" 95 | # @s1 legend "rep_001" ... 96 | set_config = "" 97 | for i,sp in enumerate(self.subplots.keys()): 98 | set_config += "@s%d legend \"%s\" \n" % (i, sp) # add legends 99 | if typ == "bar": 100 | set_config += "@s%d line type 0 \n" % (i,) # don't show the line in bar plots 101 | 102 | sets = "" 103 | for label,sp in self.subplots.iteritems(): 104 | if not sp["yerror"] or len(sp["yerror"]) != len(sp["xdata"]): 105 | yerror=[ "" for x in sp["xdata"] ] 106 | else: 107 | yerror=sp["yerror"] 108 | typ = typ + "dy" 109 | for x,y,dy in zip(sp["xdata"],sp["ydata"],yerror): 110 | sets += "%s %s %s\n" % (x,y,dy) 111 | sets += "&\n" 112 | 113 | return """# 114 | @type {typ} 115 | @title "{title}" 116 | @xaxis label "{xlabel}" 117 | @yaxis label "{ylabel}" 118 | {set_config} 119 | {sets} 120 | 121 | """.format(typ=typ, title=self.title, xlabel=self.xlabel.encode("utf-8"), ylabel=self.ylabel.encode("utf-8"), set_config=set_config, sets=sets) 122 | 123 | 124 | -------------------------------------------------------------------------------- /cadee/tools/pcadee.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH -A snic2016-34-27 3 | #SBATCH -p node 4 | #SBATCH --nodes 1 5 | #SBATCH -n 16 6 | #SBATCH -t 24:00:00 7 | # 8 | # Compute a folder with Simpacks with parallel Qdyn6p. 9 | # 10 | # Author: Beat Amrein 11 | # Email: beat.amrein@gmail.com 12 | # Date: 22.Feb 2017 13 | # Version: 0.1 14 | # 15 | # Description: Iterate trough a folder with Simpacks, using Qdyn6p. 16 | # Make sure you first test and adjust the child-script (srunq.sh). 17 | # Also note, that the child-script is expected to be placed in the 18 | # same folder like this script (see DIR-var need to adjust this). 19 | # 20 | # Installation: Put this script and the child-script (srunq.sh) into your $PATH. 21 | # This script needs a SLURM enviroment to work correctly. 22 | # 23 | # 24 | # This script is part of CADEE. 25 | # If you use this script, please cite: 26 | # Amrein et al. (2017), CADEE: Computer-Aided Directed Evolution of Enzymes, JUCrJ, p50-64 27 | # https://doi.org/10.1107/S2052252516018017 28 | # 29 | # Usage: 30 | # pcadee.sh /folder/with/simpacks 31 | # 32 | # TODO: If too few simpacks, auto-adjust 33 | # (say CORES=4, but AVAIL=16 and only 2 Simpacks, increase CORES to 8) 34 | # TODO: Before exiting, check if new simpacks available and run those, too. 35 | # 36 | # 37 | # This script is adjusted for usage on SLURM resources. 38 | # (see also child-script srunq.sh) 39 | ################# 40 | # USER SETTINGS # 41 | ################# 42 | 43 | # If you are not running SLURM, you may adjust following lines yourself: 44 | # SLURM_NTASKS # number of cores to use 45 | # SLURM_NNODES # number of nodes to use (only 1 supported) 46 | 47 | SLURM_NTASKS=4 # number of cores to use 48 | SLURM_NNODES=1 # number of nodes to use (only 1 supported) 49 | 50 | export CORES=4 51 | export MACHINE_NAME="$(hostname)" 52 | export SCRATCH_FOLDER=/tmp 53 | export BACKUPINTERVAL=540 # DEFAULT: 540, 9 minutes 54 | 55 | 56 | # folder where srunq.sh is located: 57 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 58 | 59 | ######################### 60 | # CLUSTER CUSTOMIZATION # 61 | ######################### 62 | 63 | echo "This is $MACHINE_NAME. Loading Modules:" 64 | case "$MACHINE_NAME" in 65 | "rackham") 66 | ml intel/17.1 intelmpi/17.1 python/2.7.11 67 | export QPATH="/home/fabst747/qsource/bin" 68 | export EXE="mpiexec -n $CORES -bind-to none $QPATH/Qdyn6p" 69 | ;; 70 | 71 | 72 | "abisko") 73 | if [ $CORES -ne 6 ] 74 | then 75 | echo "WARNING: You run a computation with \$CORES=$CORES on abisko." 76 | echo "This is discouraged. You should run it with 6 CORES." 77 | exit 78 | fi 79 | module -v load pgi/14.3-0 80 | module -v load openmpi/pgi/1.8.1 81 | export EXE="srun -n $CORES Qdyn6p" 82 | ;; 83 | 84 | 85 | "") 86 | echo "This job is not running in SNIC environment." 87 | ;; 88 | 89 | 90 | *) 91 | echo "THIS CLUSTER IS UNKNOWN!" 92 | echo "I will not add modules" 93 | 94 | export EXE="mpiexec -n $CORES $(which Qdyn6p)" 95 | ;; 96 | 97 | 98 | esac 99 | 100 | echo "Adjusted Q PATH! Executables:" 101 | echo $(/bin/ls $QPATH) 102 | 103 | if [ -z "$EXE" ] 104 | then 105 | echo "FATAL:" 106 | echo " You must configure the pcadee script properly." 107 | echo " \$EXE is not defined." 108 | exit 1 109 | fi 110 | 111 | 112 | 113 | 114 | ############## 115 | # INITIALIZE # 116 | ############## 117 | 118 | # slurm? 119 | if [ -z $SLURM_NTASKS ] 120 | then 121 | echo "Fatal: Need SLURM environment. Stop." 122 | exit 1 123 | fi 124 | 125 | # more than 2 nodes assignned? 126 | if [ $SLURM_NNODES -ne 1 ] 127 | then 128 | echo "FATAL: User Error" 129 | echo " This script can distribute jobs to up to 1 nodes, you asked for $SLURM_NNODES ." 130 | echo " The scrpit stops now, so you do not waste compute time. Bye!." 131 | exit 1 132 | fi 133 | 134 | # simpack-folder existing? 135 | if [ -z $1 ] 136 | then 137 | echo "Fatal: Missing Argument: Folder with Simpacks. Stop." 138 | echo "Usage:" 139 | echo " $0 /path/to/folder/with/simpacks/" 140 | exit 1 141 | fi 142 | 143 | 144 | export MAXTASK=$(($SLURM_NTASKS/$CORES)) 145 | 146 | SIMPACK_FOLDER=$(readlink -f "$1") 147 | 148 | ################ 149 | # PRINT CONFIG # 150 | ################ 151 | 152 | echo "Simpack Folder $SIMPACK_FOLDER" 153 | 154 | echo "Will use $CORES per simpack." 155 | echo "Will run at most $MAXTASK simpacks at one time." 156 | echo "This will use $(($CORES*$MAXTASK)) cores from $SLURM_NTASKS" 157 | 158 | echo "" 159 | echo "" 160 | echo " Will Distribute Jobs and Start Work in 1 Second" 161 | echo " ===============================================" 162 | echo "" 163 | 164 | sleep 1 165 | 166 | 167 | find $SIMPACK_FOLDER -name "*.tar" | xargs -i --max-procs=$MAXTASK bash -c "echo {}; $DIR/srunq.sh {}; echo {}; exit" 168 | 169 | echo "" 170 | echo "" 171 | echo "" 172 | echo "No Simpacks left. Terminating after $SECONDS." 173 | 174 | -------------------------------------------------------------------------------- /INSTALL.md: -------------------------------------------------------------------------------- 1 | CADEE 0.9 2 | ========= 3 | 4 | CADEE the framework of Computer-Aided Directed Enzyme Evolution. 5 | 6 | # Installation 7 | 8 | ##System Requirements 9 | - Multicore CPU 10 | - Storage: 6-12 GB per mutant 11 | - *nix like OS (GNU/Linux, MacOSX) 12 | - Python Version 2.7 13 | - Fortran Compiler, C Compiler, OpenMPI Compiler, MPI Launcher, git, Open Babel, pip 14 | ``` 15 | sudo apt-get install gfortran gcc openmpi-bin mpich git openbabel python-pip 16 | ``` 17 | 18 | ##External Executables: 19 | While CADEE itself is free software, it requires non-free components. 20 | The MD engine that CADEE is relying on is Q6 which is free and opensource software. 21 | 22 | For mutations other than wild-type testing and alanine-scans, CADEE requires SCWRL4, 23 | which is also free for Non-Commerical usage. 24 | - Q: 25 | https://github.com/qusers/Q6 26 | Download and compile Q. Optionally install to $PATH. 27 | - SCWRL4: 28 | Obtain a License for SCWRL4: (Free for Non-Commercial use) 29 | http://dunbrack.fccc.edu/scwrl4/license/index.html 30 | Download and install, make sure executable is in $PATH. 31 | 32 | Note: If you are not interested in the --libmut functionality of CADEE, you may place a dummy 33 | "Scwrl4" executable/script into your $PATH. This will obviously break the --libmut option. 34 | 35 | ##Download and Install CADEE: 36 | We recommend using git to install CADEE, so that future releases of CADEE are easily accessible. 37 | 38 | Here the suggested steps: 39 | 40 | Download CADEE: 41 | ``` 42 | cd $HOME 43 | mkdir -p Downloads # create a folder 44 | cd Downloads 45 | git clone https://github.com/kamerlinlab/cadee cadee # clone CADEE 46 | ``` 47 | 48 | Next, install CADEE: 49 | ``` 50 | cd $HOME/Downloads/cadee # cd into cloned folder 51 | python setup.py install --user # install for current user *ONLY* 52 | ``` 53 | 54 | 55 | 56 | ## Workflow 57 | # 1: Generate Inputs (simpacks) 58 | 1. Prerequisites: 59 | i) PDB file generated with qprep5 60 | ii) FEP file for/of reference/wild-type 61 | iii) qprep5-inputfile, you used to generate PDB file in i) 62 | iv) path to the folder with the libraries you used to generate the inputfiles. 63 | 64 | 2. Then, check if CADEE accepts your inputfiles: 65 | $ ./cadee.py wt.pdb wt.fep qprep5.inp ./libraries 66 | Probably you were asked to adjust your qprep.inp file, implement the changes. 67 | (CADEE needs absolute paths to your library, for example.) 68 | 69 | 3. Prepare and alanine scan for 16 mutants: 70 | $ ./cadee.py wt.pdb wt.fep qprep5.inp ./libraries --alascan --nummuts 16 71 | This created a folder (ala_scan) with subfolders containing the topology and fepfile: mutant.top, mutant.fep, qprep5.inp 72 | It will contain 16 subfolders, labelled in scheme 0XX_ALA, 1XX_ALA. 73 | 74 | 4. If you have SCWRL4 installed, you can also do arbitraty mutations, eg mutate residue 15 to glutamic: 75 | = requirements:SCWRL4 installed (see Installation Section) 76 | $ ./cadee.py wt.pdb wt.fep qprep5.ipn ./libraries --libmut 15:E 77 | This created a folder (libmut) with subfolders containing the topologies and fepfiles 78 | 79 | or even a saturation on 15 80 | $ ./cadee.py wt.pdb wt.fep qprep5.ipn ./libraries --libmut 15:SATURATE 81 | 82 | the libmut argument is very powerful, here other options include: 83 | --libmut 137:SATURATE (20AA) 84 | --libmut 137:POLAR (9AA) 85 | --libmut 137:NEGATIVE (2AA) 86 | --libmut 137:UNCHARGED (4AA) 87 | --libmut 137:SHRINK (variable) 88 | --libmut 137:'CGP' (3AA) 89 | --libmut 137:'DEKCPG' (6AA) 90 | -OR- 91 | --libmut 137:'DEKCPG' 138:'DEKCPG' (6AAx6AA=36AA) 92 | --libmut 137:'DEKCPG' 138:'DEKCPG' 139:SATURATE (6AAx6AAx20=720AA) 93 | 94 | # 2: Submission of Jobs 95 | (You might have to adjust the following instructions to fit the machine you are executing it.) 96 | Next you have to actually run the generated simpacks. 97 | see also ensemble/submit.sh for an example script 98 | 99 | # 3: Analysis of results 100 | When your data is, you can then analyse the cadee.db to generate either *csv or *html files. 101 | For *.csv: 102 | analyse/extract_to_csv_medium.py 103 | analyse/extract_to_csv_us.py 104 | For *.html 105 | analyse/analyse.py 106 | 107 | Other tools: 108 | analyse/fuse_dbs.py 109 | 110 | # Troubleshooting 111 | 112 | ##Manually installing mpi4py 1.3.1 113 | - mpi4py 1.3.1 (CADEE will try to install mpi4py) 114 | Download: https://pypi.python.org/pypi/mpi4py/1.3.1 115 | ``` 116 | wget -O mpi4py-1.3.1.tar.gz https://pypi.python.org/packages/26/b4/1a9678ec113b5c654477354169131c88be3f65e665d7de7c5ef306f2f2a5/mpi4py-1.3.1.tar.gz 117 | ``` 118 | If your machine supports special compilers, load one (e.g. intel): 119 | ``` 120 | module add intel/16.0.3 121 | ``` 122 | and then install mpi4py, eg. 123 | ``` 124 | tar xf mpi4py-1.3.1.tar.gz 125 | cd mpi4py-1.3.1 126 | tar xf python setup.py install --user 127 | ``` 128 | -------------------------------------------------------------------------------- /cadee/qscripts/qscripts_config.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | # -*- coding: utf-8 -*- 3 | # 4 | # MIT License 5 | # 6 | # Copyright (c) 2016 Miha Purg 7 | # 8 | # Permission is hereby granted, free of charge, to any person obtaining a copy 9 | # of this software and associated documentation files (the "Software"), to deal 10 | # in the Software without restriction, including without limitation the rights 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | # copies of the Software, and to permit persons to whom the Software is 13 | # furnished to do so, subject to the following conditions: 14 | # 15 | # The above copyright notice and this permission notice shall be included in all 16 | # copies or substantial portions of the Software. 17 | # 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | # SOFTWARE. 25 | # 26 | # 27 | # 28 | 29 | import sys 30 | import os 31 | import ConfigParser 32 | 33 | 34 | CFG_VERSION=6 # this should change when the config file format changes or something, then old config files will not work 35 | # absolute config path 36 | _CFG_FILE=os.path.abspath(os.path.join(os.path.dirname(__file__), "qscripts.cfg")) 37 | _CFG_FILE_DEFAULT=os.path.abspath(os.path.join(os.path.dirname(__file__), "lib", "qscripts.cfg.default")) 38 | 39 | 40 | 41 | class _QScriptsConfig(object): 42 | def __init__(self, cfgfile): 43 | 44 | self.cfgfile = cfgfile 45 | if not os.path.lexists(self.cfgfile): 46 | print "Configuration file '%s' not found. Please run 'qscripts_config.py'." % self.cfgfile 47 | sys.exit(1) 48 | 49 | self.config=ConfigParser.SafeConfigParser() 50 | 51 | try: 52 | self.config.read(self.cfgfile) 53 | except ConfigParser.ParsingError: 54 | print "Configuration file '%s' could not be read. Fix it or remove and run 'qscripts_config.py'." % self.cfgfile 55 | sys.exit(1) 56 | 57 | # check version of config file 58 | version = self.config.getint("other", "cfgversion") 59 | if version != CFG_VERSION: 60 | print "Your configuration file '%s' is outdated. Please remove it and run 'qscripts_config.py'." % self.cfgfile 61 | sys.exit(1) 62 | 63 | def get(self, section, option): 64 | try: 65 | return self.config.get(section, option) 66 | except (ConfigParser.NoSectionError, ConfigParser.NoOptionError) as e: 67 | print "Somehow your configuration file '%s' got messed up. Please fix it or remove it and rerun 'qscripts_config.py'" % self.cfgfile 68 | print "Details: %s" % str(e) 69 | sys.exit(1) 70 | 71 | def set(self, section, option, value): 72 | self.config.set(section, option, value) 73 | 74 | 75 | 76 | def get_exec_path(name): 77 | #paths=[] 78 | #for path in os.environ["PATH"].split(os.pathsep): 79 | # path=path.strip('"') 80 | # if not os.path.lexists(path): 81 | # continue 82 | # for ex in os.listdir(path): 83 | # if name in ex: 84 | # ex = os.path.join(path,ex) 85 | # if not ex in paths: 86 | # paths.append(ex) 87 | # 88 | #if not paths: 89 | # print "No '%s' executable was found in your PATH. Please add the path to qfep to the config file manually." % name 90 | #else: 91 | # print "These '%s' executables were found in your PATH. Choose the correct one or write the path.\n" % name 92 | # for i,path in enumerate(paths): 93 | # print " [%d] %s" % (i, path) 94 | # path="" 95 | # while not path: 96 | # try: 97 | # inp=raw_input("? ") 98 | # i=int(inp) 99 | # path=paths[i] 100 | # except ValueError: 101 | # if inp: 102 | # path=inp 103 | # except IndexError: 104 | # pass 105 | # return path 106 | #return "" 107 | import cadee.executables.exe as exe 108 | path = exe.which(name) 109 | if path is None: 110 | path = exe.which(name+'5') 111 | if path is None: 112 | print('Binary not found: ', name) 113 | raise (Exception, 'Please install %s ', name) 114 | print(path) 115 | return path 116 | 117 | 118 | def main(): 119 | if os.path.lexists(_CFG_FILE): 120 | print "Configuration file '%s' exists. Please remove it before running this script." % _CFG_FILE 121 | sys.exit(1) 122 | 123 | QScriptsConfig = _QScriptsConfig(_CFG_FILE_DEFAULT) 124 | print "Creating a new configuration file...\n" 125 | 126 | print "Qfep executable:" 127 | qfep_path=get_exec_path("qfep") 128 | QScriptsConfig.set("qexec", "qfep", qfep_path) 129 | print "Qcalc executable (version which supports group contribution calculations):" 130 | qcalc_path=get_exec_path("qcalc") 131 | QScriptsConfig.set("qexec", "qcalc", qcalc_path) 132 | 133 | QScriptsConfig.config.write(open(_CFG_FILE, "w+")) 134 | print "\n\nThe following config file was created with some default values:\n%s" % _CFG_FILE 135 | 136 | 137 | if __name__ == "__main__" or not os.path.isfile(_CFG_FILE): 138 | try: 139 | main() 140 | except KeyboardInterrupt: 141 | print "\nCtrl-C detected. Quitting..." 142 | sys.exit(1) 143 | 144 | QScriptsConfig = _QScriptsConfig(_CFG_FILE) 145 | 146 | -------------------------------------------------------------------------------- /cadee/prep/test_tools.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | This are unittests for tools.py 4 | 5 | Author: {0} ({1}) 6 | 7 | This program is part of CADEE, the framework for 8 | Computer-Aided Directed Evolution of Enzymes. 9 | """ 10 | 11 | 12 | from __future__ import print_function 13 | import unittest 14 | import tools as tools 15 | import os 16 | 17 | __author__ = "Beat Amrein" 18 | __email__ = "beat.amrein@gmail.com" 19 | 20 | class MyToolsTests(unittest.TestCase): 21 | waterpdb = """REMARK THIS IS A TEST WITH WATER ONLY 22 | ATOM 5341 O HOH 466 30.350 117.099 41.362 23 | ATOM 5342 H1 HOH 466 30.351 116.208 41.710 24 | ATOM 5343 H2 HOH 466 30.035 117.006 40.463 25 | ATOM 5344 O HOH 467 30.350 120.201 22.750 26 | ATOM 5345 H1 HOH 467 30.744 119.572 23.353 27 | ATOM 5346 H2 HOH 467 29.482 120.374 23.115 28 | ATOM 5347 O H2O 468 30.350 120.201 32.056 29 | ATOM 5348 H1 H2O 468 30.804 119.606 31.460 30 | ATOM 5349 H2 H2O 468 29.961 119.629 32.717 31 | ATOM 5350 O WAT 469 30.350 120.201 35.158 32 | ATOM 5351 H1 WAT 469 30.440 120.765 35.926 33 | ATOM 5352 H2 WAT 469 29.625 120.584 34.664 34 | ATOM 5353 O HOH 470 33.452 89.181 35.158 35 | ATOM 5354 H1 HOH 470 33.864 89.527 35.950 36 | """ 37 | 38 | def test_isint(self): 39 | self.assertFalse(tools.isint('a')) 40 | self.assertFalse(tools.isint('0.3')) 41 | self.assertTrue(tools.isint('3')) 42 | self.assertTrue(tools.isint('-1')) 43 | 44 | def test_isnum(self): 45 | self.assertTrue(tools.isnum('0.1')) 46 | self.assertTrue(tools.isnum('-0.1')) 47 | self.assertTrue(tools.isnum('0.0e10')) 48 | self.assertTrue(tools.isnum('5')) 49 | self.assertFalse(tools.isnum('inf')) 50 | self.assertFalse(tools.isnum('nan')) 51 | self.assertFalse(tools.isnum('ab')) 52 | 53 | def test_log_exists(self): 54 | import os 55 | fil = '.test.logfile.temporary' 56 | with open(fil, 'w') as f: 57 | f.write('\n') 58 | self.assertTrue(tools.bool_log_exists(fil)) 59 | os.remove(fil) 60 | self.assertFalse(tools.bool_log_exists(fil)) 61 | 62 | def test_pdb_water_only(self): 63 | import os 64 | fil = '.test.logfile.temporary.pdb' 65 | with open(fil, 'w') as f: 66 | f.write(MyToolsTests.waterpdb) 67 | self.assertTrue(tools.is_pdb_water_only(fil)) 68 | with open(fil, 'a') as f: 69 | f.write("""ATOM 5342 O ASP 467 30.350 117.099 41.362 70 | """) 71 | self.assertFalse(tools.is_pdb_water_only(fil)) 72 | os.remove(fil) 73 | 74 | def test_euklid_dist(self): 75 | dot0 = [0, 0, 0] 76 | dot1 = [0, 0, 1] 77 | dot2 = [0, 0, 2] 78 | self.assertEqual(tools.euklid_dist(dot0, dot1), 1) 79 | self.assertEqual(tools.euklid_dist(dot0, dot2), 2) 80 | self.assertEqual(tools.euklid_dist(dot1, dot2), 1) 81 | 82 | def test_get_atomnumber(self): 83 | fil = '.test.logfile.temporary.pdb' 84 | with open(fil, 'w') as f: 85 | f.write(MyToolsTests.waterpdb) 86 | self.assertEqual(tools.get_atomnumber(fil, 'O', 'HOH', 470), 5353) 87 | with open(fil, 'a') as f: 88 | f.write("""ATOM 5362 O ASP 467 30.350 117.099 41.362 89 | """) 90 | f.write("""HETATM 5363 O ASP 468 30.350 117.099 41.362 91 | """) 92 | f.write("""ATOM 5364 O ASP 469 30.350 117.099 41.362 93 | """) 94 | self.assertEqual(tools.get_atomnumber(fil, 'O', 'ASP', 467), 5362) 95 | self.assertEqual(tools.get_atomnumber(fil, 'O', 'ASP', 468), 5363) 96 | 97 | # INEXISTENT RECORD 98 | with self.assertRaisesRegexp(Exception, "Atom.*not found in.*"): 99 | tools.get_atomnumber(fil, 'O', 'HIS', 466) 100 | 101 | # NOT ATOM RECORD 102 | with open(fil, 'a') as f: 103 | f.write("""TER 5362 O ASP 666 104 | """) 105 | with self.assertRaisesRegexp(Exception, "Atom.*not found in.*"): 106 | tools.get_atomnumber(fil, 'O', 'ASP', 666) 107 | 108 | os.remove(fil) 109 | 110 | def test_rename_pdb_res(self): 111 | residue = ['ATOM 5341 O HOH 466 30.350 117.099 41.362', 112 | 'ATOM 5342 H1 HOH 466 30.351 116.208 41.710', 113 | 'ATOM 5343 H2 HOH 466 30.035 117.006 40.463'] 114 | renres = ['ATOM 5341 O H2O 466 30.350 117.099 41.362', 115 | 'ATOM 5342 H1 H2O 466 30.351 116.208 41.710', 116 | 'ATOM 5343 H2 H2O 466 30.035 117.006 40.463'] 117 | 118 | self.assertEqual(tools.rename_pdb_res(residue, 'H2O'), renres) 119 | 120 | with self.assertRaisesRegexp(Exception, 'PDB-Residue must be exactly 3 characters.'): # NOPEP8 121 | tools.rename_pdb_res(residue, 'WATER') 122 | 123 | def test_check_qprep_pdb(self): 124 | line1 = 'ATOM 5342 H1 HOH 466 30.351 116.208 41.710' 125 | line2 = 'HETATM 5343 H1 HOH 466 30.351 116.208 41.710' 126 | line3 = 'TER 5344 H1 HOH 466 30.351 116.208 41.710' 127 | line4 = 'GAP 5344 H1 HOH 466 30.351 116.208 41.710' 128 | self.assertTrue(tools.check_qprep_pdb(line1)) 129 | self.assertTrue(tools.check_qprep_pdb(line2)) 130 | self.assertFalse(tools.check_qprep_pdb(line3)) 131 | self.assertFalse(tools.check_qprep_pdb(line4)) 132 | 133 | line5 = 'ATOM 5342 H1 HOH 466 30.351 116.208 41.710 ' 134 | with self.assertRaisesRegexp(Exception, "Not a Qprep5-ed pdbfile.*"): 135 | tools.check_qprep_pdb(line5) 136 | line6 = 'HETATM 5343 H1 HOH 466 30.351 116.208 41.710 blabalba' # NOPEP8 137 | with self.assertRaisesRegexp(Exception, "Not a Qprep5-ed pdbfile.*"): 138 | tools.check_qprep_pdb(line6) 139 | 140 | unittest.main() 141 | -------------------------------------------------------------------------------- /cadee/cadee: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from __future__ import print_function 3 | 4 | """CADEE Command Line Interface. 5 | 6 | Use this script to prepare, run and analyze CADEE simulations. 7 | 8 | Author: {0} ({1}) 9 | 10 | This program is part of CADEE, the framework for 11 | Computer-Aided Directed Evolution of Enzymes. 12 | """ 13 | 14 | __author__ = "Beat Amrein" 15 | __email__ = "beat.amrein@gmail.com" 16 | 17 | import sys 18 | import os 19 | import cadee.dyn.mpi as mpi 20 | if mpi.mpi: 21 | if mpi.rank == 0: 22 | rank0 = True 23 | else: 24 | rank0 = False 25 | else: 26 | rank0=True 27 | 28 | # checking if qscripts is configured, or configure it now: 29 | qscripts = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'qscripts') 30 | if not os.path.isfile(os.path.join(qscripts,'qscripts.cfg')): 31 | import cadee.qscripts.qscripts_config 32 | 33 | 34 | def greeter(): 35 | if rank0: 36 | import cadee.version as version 37 | print(""" 38 | 39 | ___ _ ___ ___ ___ 40 | / __| /_\ | \| __| __| Computer-Aided Directed 41 | | (__ / _ \| |) | _|| _| Enzyme Evolution 42 | \___/_/ \_\___/|___|___| 43 | v. {0} 44 | 45 | (C) Copyright 2017 Beat Anton Amrein & Shina Caroline Lynn Kamerlin 46 | """.format(version.__version__)) 47 | 48 | 49 | 50 | greeter() 51 | 52 | 53 | def usage(exitcode=1): 54 | if rank0: 55 | print() 56 | print() 57 | print('Usage:') 58 | print(' cadee [ prep(p) | dyn(d) | ana(a) | tool(t) ]') 59 | print() 60 | print(' Multi Core Tasks:') 61 | print(' mpirun -n X cadee dyn') 62 | print(' mpiexec -n X cadee dyn') 63 | print(' X == Number of cores to use; 2+.') 64 | sys.exit(exitcode) 65 | 66 | if len(sys.argv) < 2: 67 | usage() 68 | 69 | cmd = sys.argv[1].lower() 70 | sys.argv.remove(cmd) 71 | 72 | if cmd == 'dyn' or cmd == 'd': 73 | import cadee.dyn.mpi as mpi 74 | if mpi.size < 2: 75 | print() 76 | print() 77 | print('Error: X must be greater or equal 2') 78 | usage() 79 | 80 | import cadee.dyn.ensemble as ens 81 | ens.parse_args() 82 | 83 | elif cmd == 'dynp' or cmd == 'dp': 84 | print('') 85 | print('This information is for UNIX power users and admins.') 86 | print('++++++++++++++++++++++++++++++++++++++++++++++++++++') 87 | print('') 88 | print('') 89 | print('A parallel (pcadee) laucher script exists, to run cadee with Qdyn6*p*.') 90 | print() 91 | print('Note: This is always less efficient than running "dyn".') 92 | print() 93 | print('These tools assume a SNIC SLURM environment. And needs adjustments to neatly run on your machine.') 94 | print('') 95 | print('The scripts consist of a SLURM submission script $CADEE_DIR/cadee/tools/pcadee.sh') 96 | print('And a "task" script $CADEE_DIR/cadee/tools/srunq.sh') 97 | print() 98 | print(" To run a simulation, adjust pcadee.sh to your needs and make sure that srunq.sh is in your $PATH.") 99 | print() 100 | print(' Then run or submit pcadee (bash pcadee.sh or sbatch pcadee.sh).') 101 | usage() 102 | 103 | elif cmd == 'prep' or cmd == 'p': 104 | import cadee.prep.prep as prep 105 | prep.main() 106 | 107 | elif cmd == 'ana' or cmd == 'analyse' or cmd == 'analyze' or cmd == 'a': 108 | 109 | if len(sys.argv) < 2: 110 | usage() 111 | 112 | 113 | def ana_usage(): 114 | if rank0: 115 | print() 116 | print() 117 | print('Analysis Options:') 118 | print(' cadee ana [ cat | alanize | csv | csv_exo ]') 119 | print() 120 | print(' cat:') 121 | print(' Description: Utility to conCATenate two or more cadee.db files.') 122 | print(' Example: cadee ana cat cadee1.db cadee2.db ') 123 | print(' (Will create "concat_cadee.db")') 124 | print('') 125 | print(' alanize: ____ ___') 126 | print(' Description: ALANine scan analIZEer: Visual alanine scan analysis.') 127 | print(' Example: cadee ana alanize') 128 | print(' (Will create "index.html" file. Open with a browser.)') 129 | print('') 130 | print(' csv:') 131 | print(' Description: Export activation barrier to a csv file.') 132 | print(' Example: cadee ana csv cadee.db dGstar.csv') 133 | print(' (Will create "dGstar.csv" file)') 134 | print('') 135 | print(' csv_exo:') 136 | print(' Description: Export gibbs free energy a csv file.') 137 | print(' Example: cadee ana csv cadee.db ddG.csv') 138 | print(' (Will create "ddG.csv" file)') 139 | print('') 140 | 141 | # print(' cadee [ dyn(d) | dynp(dp) | prep(p) | analyse(a) | tool(t) ]') 142 | sys.exit(1) 143 | 144 | subcmd = sys.argv[1].lower() 145 | 146 | import os 147 | 148 | if len(sys.argv) < 2: 149 | ana_usage() 150 | 151 | fullcmd = [cmd + ' ' + subcmd] 152 | fullcmd.extend(sys.argv[2:]) 153 | 154 | if subcmd == 'cat': 155 | import cadee.ana.cat_cadee_dbs as cat 156 | cat.main(sys.argv[2:]) 157 | elif subcmd == 'csv_exo': 158 | import cadee.ana.export_to_csv as csv 159 | csv.main(fullcmd, 'exo') 160 | elif subcmd == 'csv': 161 | import cadee.ana.export_to_csv as csv 162 | csv.main(fullcmd) 163 | elif subcmd == 'alanize': 164 | import cadee.ana.alanize as alanyze 165 | alanyze.main(sys.argv[2]) 166 | else: 167 | ana_usage() 168 | 169 | 170 | 171 | elif cmd == 'tool' or cmd == 't': 172 | import cadee.tools.tools as tools 173 | tools.main(sys.argv, 'cadee tool') 174 | 175 | elif cmd == '--help': 176 | usage(0) 177 | 178 | else: 179 | print('Unknown command: "{0}".'.format(cmd)) 180 | usage() 181 | -------------------------------------------------------------------------------- /example/examples.sh: -------------------------------------------------------------------------------- 1 | i=1 2 | 3 | TESTS=1 4 | EDU=1 5 | SCRIPTS=1 6 | DELETE_FILES=0 7 | 8 | set -e 9 | 10 | export CADEE_DIR="$HOME/Downloads/cadee" 11 | 12 | if [[ $DELETE_FILES -eq 1 ]] 13 | then 14 | echo "Removing old files..." 15 | /bin/rm -fr "$CADEE_DIR" 16 | /bin/rm -fr "$HOME/global/cadee_tutorial" 17 | /bin/rm -fr "$HOME/global/cadee_tutorial_wallclock" 18 | /bin/rm -fr "$HOME/global/pedagogical_example" 19 | fi 20 | 21 | function increment(){ 22 | echo -e "========================================================================== 23 | \n\n\n\n 24 | 25 | TIME: $SECONDS 26 | ___________________________________________________________________________ 27 | NEXT SNIPPET: $i 28 | ___________________________________________________________________________ 29 | \n\n\n\n\n 30 | ==========================================================================" 31 | let i+=1 32 | } 33 | 34 | if [ $TESTS -eq 1 ] 35 | then 36 | 37 | increment 38 | 39 | # Code Input Snippet (1) 40 | /bin/bash --version 41 | 42 | increment 43 | 44 | 45 | # Code Input Snippet (2) 46 | sudo apt-get install gfortran openmpi-bin git openbabel 47 | sudo apt-get install mpich gcc python2.7 python-pip 48 | 49 | increment 50 | 51 | # Code Input Snippet (3) 52 | cd $HOME 53 | mkdir -p Downloads 54 | cd $HOME/Downloads 55 | git clone https://github.com/kamerlinlab/cadee cadee 56 | cd $HOME/Downloads/cadee 57 | export CADEE_DIR="$PWD" 58 | 59 | 60 | git checkout 0.8.5 61 | increment 62 | 63 | # Code Input Snippet (4) 64 | python setup.py install --user 65 | 66 | 67 | 68 | increment 69 | 70 | # Code Input Snippet (5) 71 | cadee --help 72 | 73 | increment 74 | 75 | # Code Input Snippet (6) 76 | mkdir testing_example 77 | cd testing_example 78 | cp -r $CADEE_DIR/example/* . 79 | cadee prep wt.pdb wt.fep wt.qpinp ./libraries/ --template $CADEE_DIR/simpack_templates/simpack_template_0.05ns_15ps_2.5ps_32.5ps.tar.bz2 80 | 81 | 82 | increment 83 | 84 | # Code Input Snippet (7) 85 | set +e 86 | cadee prep wt.pdb wt.fep wt.new.qpinp ./libraries/ 87 | if [[ $? -eq 0 ]] 88 | then 89 | echo "ERROR with SNIPPET 7: Expected Non-Zero Exit-Code." 90 | exit 91 | fi 92 | set -e 93 | 94 | 95 | increment 96 | 97 | # Code Input Snippet (8) 98 | mkdir -p $HOME/global/cadee_tutorial 99 | cp -r $CADEE_DIR/testing_example/* $HOME/global/cadee_tutorial 100 | # you may need to adjust mpirun 101 | mpirun.mpich -np 5 cadee dyn $HOME/global/cadee_tutorial/wt | tee cadee.log 102 | 103 | i=8 104 | 105 | increment 106 | 107 | # Code Input Snippet (9) 108 | mkdir -p $HOME/global/cadee_tutorial_wallclock 109 | cp -r $CADEE_DIR/testing_example/* $HOME/global/cadee_tutorial_wallclock 110 | $CADEE_DIR/cadee/tools/pcadee.sh $HOME/global/cadee_tutorial_wallclock/wt 111 | 112 | fi 113 | 114 | 115 | i=9 116 | if [ $EDU -eq 1 ] 117 | then 118 | 119 | increment 120 | 121 | 122 | # Code Input Snippet (10) 123 | cp -r $CADEE_DIR/example $CADEE_DIR/pedagogical_example 124 | cd $CADEE_DIR/pedagogical_example 125 | cadee prep wt.pdb wt.fep wt.qpinp libraries --alascan --nummuts 48 126 | 127 | 128 | 129 | increment 130 | 131 | # Code Input Snippet (11) 132 | cp -r $CADEE_DIR/pedagogical_example $HOME/global 133 | set +e 134 | mpirun.mpich -np 5 cadee dyn $HOME/global/pedagogical_example/ala_scan 135 | set -e 136 | 137 | increment 138 | 139 | # Code Input Snippet (12) 140 | mpirun.mpich -n 2 cadee dyn $PWD --hij 60.0 --alpha 229.0 --force 141 | 142 | 143 | increment 144 | 145 | # Code Input Snippet (13) 146 | cadee ana alanize cadee.db 147 | firefox index.html 148 | 149 | 150 | increment 151 | 152 | # Code Input Snippet (14) 153 | cadee ana csv cadee.db activation_barriers.csv #dG* 154 | cadee ana csv_exo cadee.db free_energy.csv #ddG 155 | /bin/ls 156 | 157 | increment 158 | 159 | # Code Input Snippet (15) 160 | set +e 161 | cadee ana cat cadee.db cadee.db1 162 | set -e 163 | /bin/ls *.db 164 | 165 | increment 166 | 167 | # Code Input Snippet (16) 168 | cd $CADEE_DIR/pedagogical_example 169 | cadee prep wt.pdb wt.fep wt.new.qpinp libraries --libmut 92:SATURATE --libmut 163:SATURATE --libmut 171:SATURATE 170 | 171 | increment 172 | 173 | # Code Input Snippet (17) 174 | mv libmut point_saturation 175 | cadee prep wt.pdb wt.fep wt.new.qpinp libraries --libmut 92:'AGH' 163:'SPHECA' 171:'WSRLD' 176 | 177 | fi 178 | 179 | 180 | i=17 181 | 182 | if [[ $SCRIPTS -eq 1 ]] 183 | then 184 | 185 | set +e 186 | 187 | increment 188 | 189 | # Code Input Snippet (18) 190 | ok=1 191 | msg="\n\n\n\n\n" 192 | python -c 'import cadee' 193 | if [[ $? -ne 0 ]] 194 | then 195 | msg="$msg\nUnable to load cadee module!" 196 | ok=0 197 | fi 198 | cadee > /dev/null 199 | if [[ $? -eq 127 ]] 200 | then 201 | msg="$msg\nUnable to locate 'cadee' in \$PATH." 202 | ok=0 203 | fi 204 | if [[ $ok -eq 1 ]] 205 | then 206 | msg="$msg\nCADEE is installed." 207 | else 208 | msg="$msg\nCADEE is *NOT* installed! 209 | DIAGNOSIS:" 210 | fi 211 | echo -e "\n\n$msg" 212 | 213 | increment 214 | 215 | # Code Input Snippet (19) 216 | # ONLY FOR DEBIAN/UBUNTU 217 | echo $PATH | grep "$HOME/.local/bin" || echo 'Please fix $PATH.' 218 | 219 | increment 220 | 221 | # Code Input Snippet (20) 222 | # Ubuntu / Debian ONLY 223 | if [ -d "$HOME/.local/bin" ] ; then 224 | ok=0 225 | echo $PATH | grep -q "$HOME/.local/bin:" && ok=1 226 | echo $PATH | grep -q ":$HOME/.local/bin" && ok=1 227 | if [ $ok -eq 0 ] 228 | then 229 | echo 'export PATH="$HOME/.local/bin:$PATH"' >> $HOME/.profile 230 | source $HOME/.profile 231 | echo 'Added $HOME/.local/bin to .profile.' 232 | else 233 | echo 'Stop: $HOME/.local/bin is already in your $PATH.' 234 | fi 235 | else 236 | echo 'Stop: $HOME/.local/bin is not a directory. ' 237 | fi 238 | 239 | increment 240 | 241 | # Code Input Snippet (20) 242 | # Ubuntu / Debian ONLY 243 | if [ -d "$HOME/.local/bin" ] ; then 244 | ok=0 245 | echo $PATH | grep -q "$HOME/.local/bin:" && ok=1 246 | echo $PATH | grep -q ":$HOME/.local/bin" && ok=1 247 | if [ $ok -eq 0 ] 248 | then 249 | echo 'export PATH="$HOME/.local/bin:$PATH"' >> $HOME/.profile 250 | source $HOME/.profile 251 | echo 'Added $HOME/.local/bin to .profile.' 252 | else 253 | echo 'Stop: $HOME/.local/bin is already in your $PATH.' 254 | fi 255 | else 256 | echo 'Stop: $HOME/.local/bin is not a directory. ' 257 | fi 258 | 259 | increment 260 | 261 | # Code Input Snippet (21) 262 | cadee tool repair_simpack wt_0.tar 263 | 264 | fi 265 | -------------------------------------------------------------------------------- /cadee/prep/fep.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """Module providing methods to create a new FEPfile 4 | 5 | Author: {0} ({1}) 6 | 7 | This module is part of CADEE, the framework for 8 | Computer-Aided Directed Evolution of Enzymes. 9 | """ 10 | 11 | 12 | from __future__ import print_function 13 | 14 | import logging 15 | import sys 16 | 17 | from tools import get_pdb_atom, check_qprep_pdb, read_pdbatoms 18 | 19 | __author__ = "Beat Amrein" 20 | __email__ = "beat.amrein@gmail.com" 21 | 22 | logger = logging.getLogger('prep.fep') 23 | 24 | # ENUMS (easy reading) 25 | ATOMS = 1 26 | BONDS = 2 27 | ANGLES = 3 28 | DIHEDRALS = 4 29 | IMPROPERS = 5 30 | 31 | 32 | def get_section(line): 33 | """Assign section by reading FEP-File line with '['""" 34 | if "[atoms]" in line.lower(): 35 | return ATOMS 36 | elif "[change_bonds]" in line.lower(): 37 | return BONDS 38 | elif "[change_angles]" in line.lower(): 39 | return ANGLES 40 | elif "[change_torsions]" in line.lower(): 41 | return DIHEDRALS 42 | elif "[change_impropers]" in line.lower(): 43 | return IMPROPERS 44 | else: 45 | return None 46 | 47 | 48 | def _rewrite_fep_section(wtlist, mutdict, line, output, section): 49 | """Rewrite FEPFILE line 50 | @param wtlist: list of atoms in wtpdb 51 | @param mutdict: dict of atoms in mutantpdb 52 | @param line: line of wtfep 53 | @param output: file or None, used to print output to 54 | @param section: enum (integer) 55 | """ 56 | # strip off comment in line 57 | if "!" in line[1:]: 58 | (line, comment) = line.split("!", 1) 59 | else: 60 | comment = "" 61 | 62 | parts = line.split() 63 | if section == ATOMS: 64 | try: 65 | parts[1] = mutdict[wtlist[int(parts[1])]] 66 | except KeyError: 67 | logger.error('FEPFile/PDBFile missmatch:') 68 | logger.error('Could not find %s in wild-type pdbfile!', 69 | wtlist[int(parts[1])]) 70 | raise 71 | except IndexError: 72 | logger.error('FEPFile/PDBFile missmatch:') 73 | logger.error('Could not find atomnr %s in mutated pdbfile.', 74 | int(parts[1])) 75 | raise 76 | except ValueError: 77 | logger.error('FEPFile/PDBFile missmatch:') 78 | logger.error('An error happened, (%s is not a qatom-number), while reading FEPfile-line: %s', parts[1], line) # NOPEP8 79 | raise 80 | except Exception: 81 | logger.error('FEPFile/PDBFile missmatch:') 82 | logger.error('Could not look up FEP atoms: %s, %s, %s, %s', 83 | parts, 84 | parts[1], 85 | int(parts[1]), 86 | wtlist[int(parts[1])]) 87 | raise 88 | 89 | elif section == BONDS: 90 | parts[0] = mutdict[wtlist[int(parts[0])]] 91 | parts[1] = mutdict[wtlist[int(parts[1])]] 92 | 93 | elif section == ANGLES: 94 | parts[0] = mutdict[wtlist[int(parts[0])]] 95 | parts[1] = mutdict[wtlist[int(parts[1])]] 96 | parts[2] = mutdict[wtlist[int(parts[2])]] 97 | 98 | elif section == DIHEDRALS or section == IMPROPERS: 99 | parts[0] = mutdict[wtlist[int(parts[0])]] 100 | parts[1] = mutdict[wtlist[int(parts[1])]] 101 | parts[2] = mutdict[wtlist[int(parts[2])]] 102 | parts[3] = mutdict[wtlist[int(parts[3])]] 103 | 104 | for part in parts: 105 | print(str(part), end="\t", file=output) 106 | 107 | if comment != "": 108 | print("!", end="", file=output) 109 | 110 | print(comment, file=output) 111 | 112 | 113 | def create_fep(wtpdb, wtfep, mutpdb, outfep=None): 114 | """Create FEP file with wild-type pdb, wild-type fep, and mutant pdb. 115 | NOTE: Both wtpdb and mutpdb must be output of Qprep5. 116 | 117 | If outfep is None, the fepfile is printed to stdout. 118 | """ 119 | 120 | wtlist = [0] 121 | mutdict = {} 122 | 123 | # open output file 124 | if outfep is None: 125 | output = None 126 | else: 127 | output = open(outfep, "w") 128 | 129 | # parse wt-pdb (the starting pdb) 130 | i = 0 131 | for line in read_pdbatoms(wtpdb): 132 | check_qprep_pdb(line) 133 | anum, code = get_pdb_atom(line) 134 | i += 1 135 | if i != anum: 136 | raise Exception("Out of sync: wt-pdb atom numbers are out of order.") # NOPEP8 137 | wtlist.append(code) 138 | 139 | # parse mutant pdb 140 | for line in read_pdbatoms(mutpdb): 141 | check_qprep_pdb(line) 142 | anum, code = get_pdb_atom(line) 143 | mutdict.update({code: anum}) 144 | 145 | # Parse and Rewrite the fep file. 146 | section = None 147 | for line in open(wtfep): 148 | 149 | line = line[:-1] # strip off \n 150 | line = line.replace('#', '!') # unify coment str 151 | 152 | if "[" in line: 153 | section = get_section(line) 154 | 155 | if len(line.split()) < 2: 156 | print(line, file=output) 157 | continue 158 | 159 | if line[0] == "!": 160 | print(line, file=output) 161 | continue 162 | 163 | if section is None: 164 | print(line, file=output) 165 | else: 166 | try: 167 | _rewrite_fep_section(wtlist, mutdict, line, output, section) 168 | except KeyError as err: 169 | logger.error('Error creating the (mutated) fepfile for %s', mutpdb) # NOPEP8 170 | logger.error('CADEE was unable to find the atom named "%s"', err.message) # NOPEP8 171 | logger.error(' Please fix your fepfile and rerun.') # NOPEP8 172 | raise 173 | 174 | 175 | if __name__ == "__main__": 176 | # Parse Command Line 177 | 178 | def usage(): 179 | """Print Usage and Exit""" 180 | print('') 181 | print('Usage:') 182 | print(' '+sys.argv[0]+' qprep-wt.pdb wt.fep qprep-mutant.pdb [mutant.fep]') # NOPEP8 183 | print(' Optional:') 184 | print(' [mutant.fep] is optional. if neglected, print to stdout.') # NOPEP8 185 | print('') 186 | sys.exit() 187 | 188 | if len(sys.argv) < 4 or len(sys.argv) > 5: 189 | usage() 190 | 191 | else: 192 | if len(sys.argv) == 5: 193 | create_fep(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4]) 194 | else: 195 | create_fep(sys.argv[1], sys.argv[2], sys.argv[3]) 196 | -------------------------------------------------------------------------------- /cadee/prep/test_genseqs.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | This are the unittests for genseq 5 | 6 | Author: {0} ({1}) 7 | 8 | This program is part of CADEE, the framework for 9 | Computer-Aided Directed Evolution of Enzymes. 10 | """ 11 | 12 | 13 | from __future__ import print_function 14 | import unittest 15 | import genseqs as genseqs 16 | 17 | import logging 18 | 19 | __author__ = "Beat Amrein" 20 | __email__ = "beat.amrein@gmail.com" 21 | 22 | logger = logging.getLogger('prep.genseqs') 23 | 24 | 25 | class MyGenseqsTests(unittest.TestCase): 26 | ALL = ['A', 'R', 'N', 'D', 'C', 'E', 'Q', 'G', 'H', 'I', 'L', 'K', 27 | 'M', 'F', 'P', 'S', 'T', 'W', 'Y', 'V'] 28 | 29 | def test_genseq2_empty(self): 30 | first = [MyGenseqsTests.ALL] 31 | second = genseqs.genseq2(MyGenseqsTests.ALL, []) 32 | self.assertEqual(first, second) 33 | 34 | def test_genseq2_mutate0(self): 35 | """try: mutate resid0 (DOES NOT EXISTS)""" 36 | with self.assertRaises(ValueError): 37 | genseqs.genseq2(MyGenseqsTests.ALL, [(0, list('ARN'))]) 38 | 39 | def test_genseq2_mutate1(self): 40 | """mutate resid1 to R""" 41 | first = MyGenseqsTests.ALL[:] 42 | first[0] = 'R' 43 | first = [MyGenseqsTests.ALL, first] 44 | second = genseqs.genseq2(MyGenseqsTests.ALL, [(1, ['R'])]) 45 | self.assertListEqual(first, second) 46 | 47 | def test_genseq2_mutate2(self): 48 | """only single-letter amino acids are accepted """ 49 | with self.assertRaises(ValueError): 50 | genseqs.genseq2(MyGenseqsTests.ALL, [(1, ['RS'])]) 51 | 52 | def test_genseq2_mutate3(self): 53 | """introduce 2 mutations on 1 resid """ 54 | a = MyGenseqsTests.ALL[:] 55 | b = a[:] 56 | a[0] = 'R' 57 | b[0] = 'S' 58 | first = [MyGenseqsTests.ALL, a, b] 59 | second = genseqs.genseq2(MyGenseqsTests.ALL, [(1, ['R', 'S'])]) 60 | self.assertListEqual(first, second) 61 | 62 | def test_genseq2_mutate4(self): 63 | """mutate 2 residues on 1 positions""" 64 | wt = MyGenseqsTests.ALL[0:3] 65 | a = wt[:] 66 | b = a[:] 67 | a[0] = 'T' 68 | b[1] = 'S' 69 | c = a[:] 70 | c[1] = 'S' 71 | first = [wt, b, a, c] 72 | second = genseqs.genseq2(wt, [(1, ['T']), (2, ['S'])]) 73 | self.assertListEqual(first, second) 74 | 75 | def test_genseq2_mutate5(self): 76 | """mutate 2 residues on 2 positions""" 77 | self.maxDiff = None 78 | wt = list('ARN') 79 | a1 = list('ASN') 80 | a2 = list('AVN') 81 | a3 = list('TRN') 82 | a4 = list('QRN') 83 | a5 = list('TSN') 84 | a6 = list('QSN') 85 | a7 = list('TVN') 86 | a8 = list('QVN') 87 | 88 | first = [wt, a1, a2, a3, a4, a5, a6, a7, a8] 89 | second = genseqs.genseq2(wt, [(1, ['T', 'Q']), (2, ['S', 'V'])]) 90 | 91 | self.assertListEqual(first, second) 92 | 93 | def test_genseq2_mutate6(self): 94 | """mutate 2 residues on 2 positions, and 1 residue on 1 position""" 95 | self.maxDiff = None 96 | wt = list('ARN') 97 | a1 = list('ASN') 98 | a2 = list('AVN') 99 | a3 = list('TRN') 100 | a4 = list('QRN') 101 | a5 = list('TSN') 102 | a6 = list('QSN') 103 | a7 = list('TVN') 104 | a8 = list('QVN') 105 | 106 | bt = list('ARW') 107 | b1 = list('ASW') 108 | b2 = list('AVW') 109 | b3 = list('TRW') 110 | b4 = list('QRW') 111 | b5 = list('TSW') 112 | b6 = list('QSW') 113 | b7 = list('TVW') 114 | b8 = list('QVW') 115 | 116 | first = [wt, a1, a2, a3, a4, a5, a6, a7, a8, 117 | bt, b1, b2, b3, b4, b5, b6, b7, b8] 118 | 119 | second = genseqs.genseq2(wt, [(1, ['T', 'Q']), (2, ['S', 'V']), (3, ['W'])]) 120 | 121 | self.assertListEqual(sorted(first), sorted(second)) 122 | 123 | def test_genseq2_mutate7(self): 124 | """mutate 3 residues on 2 positions""" 125 | self.maxDiff = None 126 | wt = list('ARN') 127 | a1 = list('ASN') 128 | a2 = list('AVN') 129 | a3 = list('TRN') 130 | a4 = list('QRN') 131 | a5 = list('TSN') 132 | a6 = list('QSN') 133 | a7 = list('TVN') 134 | a8 = list('QVN') 135 | 136 | bt = list('ARW') 137 | b1 = list('ASW') 138 | b2 = list('AVW') 139 | b3 = list('TRW') 140 | b4 = list('QRW') 141 | b5 = list('TSW') 142 | b6 = list('QSW') 143 | b7 = list('TVW') 144 | b8 = list('QVW') 145 | 146 | ct = list('ARL') 147 | c1 = list('ASL') 148 | c2 = list('AVL') 149 | c3 = list('TRL') 150 | c4 = list('QRL') 151 | c5 = list('TSL') 152 | c6 = list('QSL') 153 | c7 = list('TVL') 154 | c8 = list('QVL') 155 | 156 | first = [wt, a1, a2, a3, a4, a5, a6, a7, a8, 157 | bt, b1, b2, b3, b4, b5, b6, b7, b8, 158 | ct, c1, c2, c3, c4, c5, c6, c7, c8] 159 | 160 | second = genseqs.genseq2(wt, [(1, ['T', 'Q']), (2, ['S', 'V']), (3, ['W', 'L'])]) 161 | 162 | self.assertListEqual(sorted(first), sorted(second)) 163 | 164 | def test_genseq2_mutate8(self): 165 | """mutate 3 residues to 3 positions""" 166 | wt = list('ARN') 167 | nummut = genseqs.genseq2(wt, [(1, ['T', 'Q', 'P']), 168 | (2, ['S', 'V', 'E']), 169 | (3, ['W', 'L', 'D'])]) 170 | self.assertEqual(len(nummut), 64) 171 | 172 | def test_genseq2_mutate9(self): 173 | """mutate 3 residues to 3 positions""" 174 | wt = MyGenseqsTests.ALL 175 | nummut = genseqs.genseq2(wt, [(1, ['T', 'Q', 'P']), 176 | (9, ['S', 'V', 'E']), 177 | (20, ['W', 'L', 'D'])]) 178 | self.assertEqual(len(nummut), 64) 179 | 180 | def test_genseq2_mutate10(self): 181 | """mutate 3 residues to 20 positions""" 182 | wt = MyGenseqsTests.ALL 183 | all = MyGenseqsTests.ALL 184 | nummut = genseqs.genseq2(wt, [(1, all), 185 | (9, all), 186 | (20, all)]) 187 | self.assertEqual(len(nummut), 8000) 188 | 189 | def test_genseq2_mutate11(self): 190 | """mutate 4 residues to 10 positions""" 191 | wt = MyGenseqsTests.ALL 192 | ten = MyGenseqsTests.ALL[:10] 193 | nummut = genseqs.genseq2(wt, [(1, ten), 194 | (3, ten), 195 | (5, ten), 196 | (9, ten)], keepdupes=True) 197 | self.assertEqual(len(nummut), 11*11*11*11) 198 | 199 | unittest.main() 200 | -------------------------------------------------------------------------------- /cadee/qscripts/q_rescale.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | 3 | ### Original: msrescale.py 4 | ### 5 | ### matej.repic@ki.si, Oct 2013 6 | ## 7 | ### Rescale the charges obtained from QM and make them suitable for molaris. 8 | ### The script needs the amino lib entry of the residue in a separate file. 9 | ### If the absolute sum of old charges in a group exceeds the threshold 10 | ### parameter, the script prompts the user for a desired charge. 11 | 12 | # 13 | # Modified by yours truly to work with Q 14 | # 15 | 16 | 17 | import os 18 | import sys 19 | import time 20 | import re 21 | from shutil import copyfile 22 | try: 23 | import argparse 24 | except ImportError: 25 | import lib.argparse as argparse 26 | from lib.common import backup_file, __version__ 27 | from qscripts_config import QScriptsConfig 28 | 29 | parser = argparse.ArgumentParser() 30 | parser.add_argument("lib_file", help = "Q library file (single entry only)") 31 | parser.add_argument("-t", dest="threshold", type=float, help="Charge group charge threshold for user prompt of net charge. Default is 0.4.", default=0.4) 32 | 33 | if len(sys.argv) == 1: 34 | parser.print_help() 35 | sys.exit(1) 36 | 37 | args = parser.parse_args() 38 | 39 | if not os.path.lexists(args.lib_file): 40 | print "FATAL! File %s doesn't exist." % args.lib_file 41 | sys.exit(1) 42 | 43 | # qlib file parser 44 | def parseQlibFile(inp_file): 45 | 46 | section="" 47 | 48 | atom_dict = {} 49 | cg_list = [] 50 | cg_allatoms = [] # to check for bad entries 51 | 52 | lines = open(inp_file, 'r').readlines() 53 | 54 | for l in lines: 55 | l = re.split("#|\!", l)[0].strip() 56 | if l == "": # ignore empty lines 57 | continue 58 | if l[0] == "{": 59 | resid_name = l.strip('{}') 60 | continue 61 | # if a new section begins, set the section variable 62 | if l[0] == "[": 63 | section = l.strip("[]") 64 | if section not in ( "atoms", "bonds", "impropers", "charge_groups" ): 65 | print "Unsupported section: %s\n" % section 66 | sys.exit(1) 67 | continue 68 | if section == "atoms": 69 | atom = l.split() 70 | atom_dict[ atom[1] ] = [ atom[0], atom[2], float(atom[3]) ] # key=name, value=[ index, ff type, charge ] 71 | 72 | elif section == "charge_groups": 73 | cgrp = l.split() 74 | for atom_name in cgrp: 75 | if atom_name not in atom_dict.keys(): 76 | print "\nAtom %s is not defined in the [atoms] section. Fix it please." % (atom_name) 77 | sys.exit(1) 78 | cg_list.append( cgrp ) 79 | cg_allatoms.extend( cgrp ) 80 | 81 | for atom_name in atom_dict.keys(): 82 | if atom_name not in cg_allatoms: 83 | print "\nAtom %s is not defined in the [charge_groups] section. Fix it please." % (atom_name) 84 | sys.exit(1) 85 | 86 | return resid_name, atom_dict, "", cg_list 87 | 88 | 89 | 90 | # rescale the charge 91 | def rescale(_oc_, _ocs_, _ocas_, net_crg): 92 | 93 | # Equation provided by Ram, round to two decimals (molaris reads only two) 94 | # Q reads 4 95 | try: 96 | new_crg = round( _oc_ - abs(_oc_) * ( _ocs_ - net_crg ) / _ocas_ , 5 ) 97 | return new_crg 98 | 99 | except ZeroDivisionError: 100 | return _oc_ 101 | 102 | 103 | #parse the Qlib file 104 | rn, atom_dict, bl, cgrp_atom_list = parseQlibFile(args.lib_file) 105 | print cgrp_atom_list 106 | 107 | cgrp_nc_dict = {} # net charges for charge groups 108 | 109 | # sum charges to target for each en group 110 | for cgrp in cgrp_atom_list : 111 | 112 | target = 0 113 | 114 | # construct a list of old charges and old absolute charges 115 | oc = [ atom_dict[atom][2] for atom in cgrp ] 116 | oca = [ abs( atom_dict[atom][2] ) for atom in cgrp ] 117 | 118 | # ask user for charge if the absolute sum exceeds the threshold 119 | if abs(sum(oc)) > args.threshold: 120 | print "\n%10.5f charge for group %s" % ( sum(oc), " ".join(cgrp) ) 121 | while True: 122 | try: 123 | target = int(raw_input("Specify target charge:")) 124 | except ValueError: 125 | print "Error: Non-integer charge" 126 | continue 127 | except KeyboardInterrupt: 128 | print "\n\nExiting. Goodbye!"; sys.exit() 129 | break 130 | 131 | # append new charge to atom_dict 132 | for atom in cgrp: 133 | atom_dict[atom].append( rescale( atom_dict[atom][2], sum(oc), sum(oca), target ) ) 134 | 135 | # construct lists of temporary charges and temporary absolute charges 136 | tc = [ round( atom_dict[atom][3], 5 ) for atom in cgrp ] 137 | tac = [ abs( round( atom_dict[atom][3], 5 ) ) for atom in cgrp ] 138 | 139 | # find largest absolute charge 140 | max_abs_index = int( tac.index( max(tac) ) ) 141 | mai = max_abs_index 142 | 143 | # correct the excess charge 144 | atom_dict[cgrp[mai]][3] -= ( sum(tc) - target ) 145 | 146 | nc = sum([ atom_dict[atom][3] for atom in cgrp ]) 147 | 148 | cgrp_nc_dict["_".join(cgrp)] = nc 149 | 150 | 151 | # modify the Qlib file 152 | 153 | section="" 154 | 155 | lib_lines = open(args.lib_file, 'r').readlines() 156 | new_lib = [] 157 | 158 | for line in lib_lines: 159 | line = line.strip() 160 | l = re.split("#|\!", line) 161 | try: 162 | comment = "# " + "#".join( l[1:] ).strip() 163 | except IndexError: 164 | comment = "" 165 | l = l[0].strip() 166 | 167 | if l == "": # ignore empty lines 168 | new_lib.append(line) 169 | continue 170 | if l[0] == "{": 171 | new_lib.append(line) 172 | new_lib.append("# Rounded and rescaled with q_rescale.py (v%s) (%s) " % (__version__, time.ctime() )) 173 | continue 174 | # if a new section begins, set the section variable 175 | if l[0] == "[": 176 | new_lib.append(line) 177 | section = l 178 | continue 179 | if section == "[atoms]": 180 | atom = l.split() 181 | index, atom_name, ff_type, old_charge = atom[0:4] 182 | new_charge = atom_dict[atom_name][3] 183 | 184 | new_line = "%5s %-5s %-15s %10.5f # %10s %s" % (index, atom_name, ff_type, new_charge, old_charge, comment) 185 | new_lib.append(new_line) 186 | 187 | elif section == "[charge_groups]": 188 | cgrp = l.split() 189 | new_line = "%s # net: %.1f" % (" ".join(cgrp), cgrp_nc_dict["_".join(cgrp)]) 190 | 191 | new_lib.append(new_line) 192 | else: 193 | new_lib.append(line) 194 | 195 | 196 | backup = backup_file(args.lib_file) 197 | if backup: 198 | print "Backed up '%s' to '%s'" % (args.lib_file, backup) 199 | open(args.lib_file, 'w').write( "\n".join(new_lib) ) 200 | print "The library file was successfully modified...\n" 201 | 202 | 203 | 204 | 205 | 206 | 207 | -------------------------------------------------------------------------------- /cadee/qscripts/lib/common.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | # -*- coding: utf-8 -*- 3 | # 4 | # MIT License 5 | # 6 | # Copyright (c) 2016 Miha Purg 7 | # 8 | # Permission is hereby granted, free of charge, to any person obtaining a copy 9 | # of this software and associated documentation files (the "Software"), to deal 10 | # in the Software without restriction, including without limitation the rights 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | # copies of the Software, and to permit persons to whom the Software is 13 | # furnished to do so, subject to the following conditions: 14 | # 15 | # The above copyright notice and this permission notice shall be included in all 16 | # copies or substantial portions of the Software. 17 | # 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | # SOFTWARE. 25 | # 26 | # 27 | # 28 | # Some common classes and functions 29 | 30 | import math 31 | import os 32 | import shutil 33 | 34 | 35 | __version__ = "0.1.10" 36 | 37 | 38 | def backup_file( filename ): 39 | """ 40 | Checks if a file exists, makes a backup (#filename.1#, #filename.2#...). 41 | Returns the new basename as a string or empty string if the file was not found. 42 | 43 | Args: 44 | filename (string): name of file to backup 45 | 46 | """ 47 | if os.path.lexists( filename ): 48 | di = os.path.dirname( filename ) 49 | fn = os.path.basename( filename ) 50 | backup_filename = fn 51 | i = 1 52 | while os.path.lexists( os.path.join(di,backup_filename) ): 53 | backup_filename = "#%s.%d#" % (fn, i) 54 | i += 1 55 | shutil.copy2( filename, os.path.join(di,backup_filename) ) 56 | return backup_filename 57 | return "" 58 | 59 | 60 | 61 | # no need for numpy to do these basic stats 62 | class np(): 63 | @staticmethod 64 | def mean(vals): 65 | N = len(vals) 66 | if N == 0: 67 | return float('nan') 68 | return sum(vals) * 1.0 / N 69 | 70 | @staticmethod 71 | def std(vals, ddof=1): 72 | N = len(vals) 73 | if N == 0 or N-ddof == 0: 74 | return float('nan') 75 | mean = np.mean(vals) 76 | variance = map(lambda x: (x-mean)**2, vals) 77 | return math.sqrt( sum(variance)/(N-ddof) ) 78 | 79 | @staticmethod 80 | def median(vals): 81 | N = len(vals) 82 | if N == 0: 83 | return float('nan') 84 | vals = sorted(vals) 85 | if N % 2 == 0: #even 86 | return np.mean( (vals[N/2-1], vals[N/2]) ) 87 | else: #odd 88 | return vals[N/2] 89 | 90 | 91 | 92 | 93 | 94 | class DataContainer(object): 95 | """ 96 | Contains a two dimensional array of values: 97 | 98 | [ [ row1_column1, row1_column2, row1_column3, ...], 99 | [ row2_column1, row2_column2, row2_column3, ...], 100 | ... ] 101 | 102 | and column titles. 103 | 104 | Args: 105 | coltitles (list): column titles 106 | 107 | Example of usage: 108 | >>> dg_de = DataContainer( ['Energy_gap', 'dG'] ) 109 | >>> dg_de.add_row( [-300.0, 10.0 ] 110 | >>> rows = dg_de.get_rows( reversed(dg_de.get_column_titles()) ) # reversed rows 111 | >>> cols = dg_de.get_columns( columns=[0, 1] ) 112 | """ 113 | 114 | def __init__(self, coltitles): 115 | if not isinstance(coltitles, (list,tuple)): coltitles = [ coltitles, ] 116 | self._column_titles = list(coltitles) 117 | self._rows = [] # a list containing rows of values (each row is a list with length = len(coltitles)) 118 | self.comment = None 119 | 120 | 121 | def get_columns(self, columns=None): 122 | """ 123 | Transposes the array and returns the columns instead of rows. 124 | 125 | Args: 126 | columns (list), optional: return only columns with these indices and/or titles 127 | 128 | Returns: 129 | list of columns (list of lists) 130 | """ 131 | if not columns: columns = [] 132 | col_inds = [] 133 | for col in columns: 134 | if type(col) == int: 135 | col_inds.append(col) 136 | else: 137 | col_inds.append(self._column_titles.index(str(col))) 138 | cols = zip(*self._rows) # transpose 139 | if col_inds: 140 | return [ cols[i] for i in col_inds] 141 | else: 142 | return cols 143 | 144 | 145 | def get_rows(self, columns=None): 146 | """ 147 | Returns the rows. 148 | 149 | Args: 150 | columns (list), optional: return only columns with these indices and/or titles 151 | 152 | Returns: 153 | list of rows (list of lists) 154 | """ 155 | if columns: 156 | cols = self.get_columns(columns) 157 | return zip(*cols) 158 | else: 159 | return self._rows 160 | 161 | 162 | def get_column_titles(self): 163 | """ 164 | Returns: 165 | list of column names (list) 166 | """ 167 | 168 | return self._column_titles 169 | 170 | 171 | def add_row(self, row): 172 | """ 173 | Args: 174 | row (list): a list of values 175 | 176 | Raises: 177 | ValueError: if number of elements in row is not equal to number of column titles 178 | """ 179 | if len(row) != len(self._column_titles): 180 | raise ValueError("Number of elements is not equal to number of columns, in row:\n%s" % row) 181 | self._rows.append(list(row)) 182 | 183 | 184 | def delete_rows(self): 185 | """ 186 | Removes the rows. 187 | """ 188 | self._rows = [] 189 | 190 | def __str__(self): 191 | if self.comment: 192 | s = "#" + self.comment + "\n" 193 | else: 194 | s = "" 195 | for name in self._column_titles: 196 | width = len(name) 197 | if width<10: 198 | width=10 199 | s += " {name:{width}} ".format(name=name, width=width) 200 | for row in self._rows: 201 | s += "\n" 202 | for i,val in enumerate(row): 203 | try: 204 | width=len(self._column_titles[i]) 205 | if width<10: 206 | width=10 207 | except IndexError: 208 | width=20 209 | if type(val) == float: 210 | s+=" {val:{width}.2f} ".format(val=val, width=width ) 211 | else: 212 | s+=" {val:{width}} ".format(val=str(val), width=width) 213 | return s 214 | 215 | -------------------------------------------------------------------------------- /cadee/prep/alascan.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ Create Alanine Scan Inputs 4 | 5 | Usage: python alascan.py qprep-wt.pdb wt.fep qprep.inp 6 | 7 | Author: {0} ({1}) 8 | 9 | This program is part of CADEE, the framework for 10 | Computer-Aided Directed Evolution of Enzymes. 11 | """ 12 | 13 | 14 | from __future__ import print_function 15 | 16 | import logging 17 | import os 18 | import sys 19 | 20 | import qprep5 as qprep5 21 | import tools as tools 22 | 23 | __author__ = "Beat Amrein" 24 | __email__ = "beat.amrein@gmail.com" 25 | 26 | logger = logging.getLogger('prep.alascan') 27 | 28 | # CONSTANTS (easy reading) 29 | ALA = ['N', 'H', 'CA', 'HA', 'CB', 'HB1', 'HB2', 'HB3', 'C', 'O'] 30 | MINALA = ['N', 'CA', 'CB', 'C', 'O'] 31 | 32 | 33 | def get_number_mutatable(wtpdb, wtfep, qprep5inp, mutate_radius=None, 34 | center_xyz=None, immutable_resids=[]): 35 | """Prepare AlaScan Inputs. 36 | @param wtpdb: pdbfile of reference run 37 | """ 38 | 39 | return len(get_mutatable_residues(wtpdb, wtfep, qprep5inp, mutate_radius, 40 | center_xyz, immutable_resids)[0]) 41 | 42 | 43 | def get_mutatable_residues(wtpdb, wtfep, qprep5inp, mutate_radius=None, 44 | center_xyz=None, immutable_resids=[]): 45 | """parse wt-pdb (the starting pdb) and return mutatable residues 46 | mutatable residues are within mutate_radius of center_xyz. 47 | """ 48 | 49 | def round_xyz(xyz): 50 | """round xyz coordinates for printing""" 51 | ret = str(round(xyz[0],3)) + " " 52 | ret += str(round(xyz[1],3)) + " " 53 | ret += str(round(xyz[2],3)) + " " 54 | return ret 55 | 56 | msgs = {} 57 | 58 | if (mutate_radius is None) and (center_xyz is None): 59 | mutate_radius, center_xyz = qprep5.get_sphere_size_center(qprep5inp, wtpdb) # NOPEP8 60 | mutate_radius *= 0.85 61 | msg = 'Looked up radius (scaled by 85%) and center_xyz in qprep5inp -> {0}, {1}'.format(mutate_radius, round_xyz(center_xyz)) 62 | elif mutate_radius is None: 63 | mutate_radius = qprep5.get_sphere_size_center(qprep5inp, wtpdb)[0] # NOPEP8 64 | mutate_radius *= 0.85 65 | msg = 'Look up radius (scaled by 85%) in qprep5inp -> {0}'.format(mutate_radius) 66 | elif center_xyz is None: 67 | center_xyz = qprep5.get_sphere_size_center(qprep5inp, wtpdb)[1] # NOPEP8 68 | msg = 'Look up center_xyz in qprep5inp -> {0}'.format(round_xyz(center_xyz)) 69 | else: 70 | raise 'WTF' 71 | 72 | msgs[msg] = 1 73 | 74 | immutable_fepresids = tools.get_fep_resids(wtpdb, wtfep) 75 | 76 | i = 0 77 | resids = [] 78 | 79 | # TODO: Check if the backbone of the residue is within the cutoff region 80 | for line in open(wtpdb): 81 | if not tools.check_qprep_pdb(line): 82 | continue 83 | 84 | anum, aname, resname, resnum = tools.get_pdb_atom_info(line) 85 | i += 1 86 | if i != anum: 87 | raise Exception("Out of Sync") 88 | 89 | if resname.upper() == 'ALA': 90 | continue 91 | 92 | if resname.upper() == 'GLY': 93 | continue 94 | 95 | msg = '' 96 | if int(resnum) in immutable_resids: 97 | msg = "Won't mutate residue, immutable: {0}".format(resnum) 98 | msgs[msg] = 1 99 | elif int(resnum) in immutable_fepresids: 100 | msg = "Won't mutate residue, contains FEPatoms: {0}".format(resnum) 101 | msgs[msg] = 1 102 | if msg: 103 | logger.debug("%s: %s|%s|%s|%s", msg, anum, aname, resname, resnum) 104 | continue 105 | 106 | if aname.upper().strip() == 'CA': 107 | logger.debug('Found CA %s', resnum) 108 | coords = tools.get_coords(line) 109 | distance_from_center = tools.euklid_dist(coords, center_xyz) # NOPEP8 110 | if distance_from_center < mutate_radius: 111 | resids.append((resname, str(resnum))) 112 | 113 | resids = sorted(list(set(resids))) 114 | return resids, msgs 115 | 116 | 117 | def main(wtpdb, wtfep, qprep5inp, outputfolder, 118 | mutate_radius=None, center_xyz=None, immutable_resids=[]): 119 | """Prepare AlaScan Inputs. 120 | :param wtpdb: pdbfile of reference run 121 | :param wtfep: fepfile of reference run 122 | :param qprep5inp: qprep5-input of reference run 123 | :param outputfolder: to store mutants in 124 | :param mutate_radius: radius_to_mutate 125 | :param center_xyz: center of simulation sphere 126 | """ 127 | 128 | def write_raw_ala_mutant(raw_mut_name, resname2ala, resnum2ala, wtpdb): 129 | """write output pdb""" 130 | 131 | output = open(raw_mut_name, 'w') 132 | 133 | for line in open(wtpdb): 134 | line = line[:-1] 135 | if not tools.check_qprep_pdb(line): 136 | print(line, file=output) 137 | continue 138 | 139 | aname, resname, resnum = tools.get_pdb_atom_info(line)[1:4] 140 | 141 | if resname2ala == resname and resnum2ala == str(resnum): 142 | line = line[:17] + 'ALA' + line[20:] 143 | if aname.strip() not in ALA: 144 | continue # remove this atom 145 | 146 | print(line, file=output) 147 | output.close() 148 | 149 | if not os.path.exists(qprep5inp): 150 | logger.warning('WARNING: inexistent qprep5-input: %s', qprep5inp) 151 | raise Exception('Qprep-input file does not exist') 152 | 153 | # do actual mutation 154 | 155 | mutatable, msgs = get_mutatable_residues(wtpdb, wtfep, qprep5inp, 156 | mutate_radius, 157 | center_xyz, immutable_resids=[]) 158 | for each in msgs: 159 | logger.info(each) 160 | 161 | for resid in mutatable: 162 | resname2ala, resnum2ala = resid 163 | 164 | if resname2ala.upper() == 'ALA': 165 | logger.warning('%s is ALA, skip', resid) 166 | continue 167 | elif resname2ala.upper() == 'GLY': 168 | logger.warning('%s is GLU, skip', resid) 169 | continue 170 | else: 171 | # previous 2 cases should not happen 172 | logger.info('Mutate %s to ALA', resid) 173 | 174 | # prepare output folder 175 | raw_mut_folder = outputfolder + '/' + resname2ala + resnum2ala + 'ALA' 176 | raw_mut_folder = os.path.abspath(raw_mut_folder) 177 | raw_mut_name = os.path.abspath(raw_mut_folder + '/raw_mutant.pdb') 178 | try: 179 | os.makedirs(raw_mut_folder) 180 | except OSError: 181 | logging.info("Skipping %s %s", raw_mut_folder, "exists!") 182 | continue 183 | 184 | write_raw_ala_mutant(raw_mut_name, resname2ala, resnum2ala, wtpdb) 185 | 186 | try: 187 | qprep5.create_top_and_fep(qprep5inp, raw_mut_folder, 188 | in_pdb=raw_mut_name, wtpdb=wtpdb, 189 | wtfep=wtfep) 190 | except KeyError: 191 | import shutil 192 | failedfolder = raw_mut_folder+'_fep_failed' 193 | while os.path.isdir(failedfolder): 194 | failedfolder += "1" 195 | shutil.move(raw_mut_folder, failedfolder) 196 | logger.warning('FEP-generator failed, moved to %s', failedfolder) 197 | continue 198 | 199 | 200 | if __name__ == "__main__": 201 | # Parse Command Line 202 | 203 | def usage(): 204 | """Print usage and exit""" 205 | print('') 206 | print('Usage:') 207 | print(' ' + sys.argv[0] + ' qprep-wt.pdb wt.fep qprep.inp') 208 | print('') 209 | sys.exit(1) 210 | 211 | if len(sys.argv) != 4: 212 | print(sys.argv) 213 | usage() 214 | else: 215 | # Parse Command Line 216 | main(os.path.abspath(sys.argv[1]), 217 | os.path.abspath(sys.argv[2]), 218 | os.path.abspath(sys.argv[3]), 219 | os.getcwd()+"/ala_scan") 220 | -------------------------------------------------------------------------------- /cadee/tools/srunq.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Compute one Simpack with Qdyn6p. 3 | # 4 | # Author: Beat Amrein 5 | # Email: beat.amrein@gmail.com 6 | # Date: 05.Mar 2017 7 | # Version: 0.2 8 | # 9 | # Description: Run a simulation with Qdyn6p (parallel). 10 | # This script can be launched multiple times, 11 | # to allow filling a compute node. 12 | # The script extracts the simpack and then iterates 13 | # the input files (for simpack in *inp). 14 | # 15 | # Installation: Put this script and the parent-script (pcadee.sh) into your $PATH. 16 | # This script needs a SNIC enviroment to work correctly (see SETTINGS). 17 | # 18 | # 19 | # This script is part of CADEE. 20 | # If you use this script, please cite: 21 | # Amrein et al. (2017), CADEE: Computer-Aided Directed Evolution of Enzymes, JUCrJ, p50-64 22 | # https://doi.org/10.1107/S2052252516018017 23 | # 24 | # Usage: 25 | # srunq.sh $SIMPACK 26 | # 27 | # TODO: 28 | # on-the-fly mapping. 29 | # 30 | # This script is adjusted for usage on SNIC resources. 31 | # (see also section SETTINGS and CLUSTER CUSTOMIZATION) 32 | # 33 | # 34 | 35 | ############ 36 | # SETTINGS # 37 | ############ 38 | 39 | 40 | export MYNAME=$(basename "$1") 41 | function write { 42 | echo -e "$MYNAME @$SECONDS > $*" 43 | } 44 | 45 | # Prepare Tempary Directory 46 | if [ -z $SCRATCH_FOLDER ] 47 | then 48 | # WARNING: 49 | # If you adjust this for non SNIC systems, make sure 50 | # you give the absolute path, or adjust the else clause 51 | echo "FATAL: CONFIGURATION ERROR \$SCRATCH_FOLDER not defined in environment($0)." 52 | exit 3 53 | else 54 | TEMPDIR="$SCRATCH_FOLDER/$$" 55 | mkdir -p "$TEMPDIR" 56 | function cleanup { 57 | /bin/rm -rf "$TEMPDIR" 58 | echo "Cleanup Done." 59 | } 60 | trap cleanup EXIT 61 | fi 62 | 63 | 64 | ################## 65 | # INITIALIZATION # 66 | ################## 67 | 68 | write "Start: $(date)" 69 | 70 | # Evaluation of Input-Files 71 | # no argument. stop. 72 | if [ $# -eq 0 ]; then 73 | write "Fatal: No parameter suplied. Need a SIMPACK. Stop." 74 | write "Usage:" 75 | write " $0 /path/to/simpack.tar" 76 | exit 4 77 | 78 | # one argument. is it file or a file mask? 79 | elif [ $# -eq 1 ]; then 80 | SIMPACK=$(readlink -f "$1") 81 | if [ ${SIMPACK##*.} == "tar" ]; then 82 | write "You supplied a SIMPACK: $SIMPACK" 83 | else 84 | write "Fatal: Simpacks have to have a .tar extension. Stop." 85 | exit 5 86 | fi 87 | # more than one argument. not ok. 88 | else 89 | write "Fatal: Currently only 1 SIMPACK can be supplied. Stop." 90 | write "Usage:" 91 | write " $0 /path/to/simpack.tar" 92 | exit 6 93 | fi 94 | 95 | # is SIMPACK variable assigned? 96 | if [ -z $SIMPACK ] 97 | then 98 | write "Fatal. Can not have happened error. Stop." 99 | exit 7 100 | fi 101 | 102 | 103 | 104 | if [ -z "$EXE" ] 105 | then 106 | write "Fatal. \$EXE not assigned. Stop." 107 | exit 1 108 | fi 109 | 110 | 111 | ################## 112 | # UNPACK SIMPACK # 113 | ################## 114 | 115 | write "Unpacking Simpack ($SIMPACK) to tmpdir ($TEMPDIR)." 116 | cd "$TEMPDIR" 117 | tar xf "$SIMPACK" 118 | tarexit=$? 119 | if [ $tarexit -ne 0 ] 120 | then 121 | write "Fatal: There was a problem unpacking the simpack! (exitcode $tarexit). Stop." 122 | exit $tarexit 123 | fi 124 | 125 | ############# 126 | # Functions # 127 | ############# 128 | 129 | 130 | # Search for changed files and append them to the simpack. 131 | function backup { 132 | # if an argument is given, always backup. 133 | if [ -z $1 ] 134 | then 135 | BACKUP=0 136 | else 137 | BACKUP=1 138 | fi 139 | 140 | if [ $BACKUP -eq 0 ] 141 | then 142 | 143 | if [ ! -f timestamp ] 144 | then 145 | touch timestamp 146 | BACKUP=1 147 | fi 148 | 149 | TIME_PASSED=$(( $(date +%s) - $(date +%s -r timestamp) )) 150 | 151 | if [ $TIME_PASSED -gt $BACKUPINTERVAL ] 152 | then 153 | BACKUP=1 154 | fi 155 | fi 156 | 157 | if [ $BACKUP -ne 0 ] 158 | then 159 | tbs=$(date +%s) 160 | find . -newer timestamp | xargs tar --no-recursion --file=$SIMPACK --append 161 | tarexit=$? 162 | if [ $tarexit -ne 0 ] 163 | then 164 | write "FATAL: (backup) tar exited with non-zero. Stop." 165 | exit 1 166 | fi 167 | touch timestamp 168 | write "Backup Complete, Duration: $((`date +%s`-$tbs)), [ $(date) ]" 169 | else 170 | write "Backup Skipped." 171 | fi 172 | } 173 | 174 | 175 | function isnan { 176 | # TEST FOR NaN 177 | NAN=$( grep "SUM.*NaN.*NaN.*NaN" $1 | wc -l ) 178 | if [ $NAN -ne 0 ] 179 | then 180 | write "FATAL: We've got a NaN! Stop." 181 | ERROR=1 182 | fi 183 | return 0 184 | } 185 | 186 | 187 | ################ 188 | # Print Config # 189 | ################ 190 | 191 | write "" 192 | write "" 193 | write "###########" 194 | write "# CONFIG: #" 195 | write "###########" 196 | write " bkp int: $BACKUPINTERVAL" 197 | write " simpack: $SIMPACK" 198 | write " cores: $CORES" 199 | write " exe: $EXE" 200 | write " md5sum: $(md5sum $(echo $EXE | rev | cut -d' ' -f1 | rev))" 201 | write " workdir: $PWD" 202 | write "" 203 | write "" 204 | write "" 205 | 206 | 207 | ############# 208 | # Work Loop # 209 | ############# 210 | 211 | ERROR=0 212 | 213 | touch timestamp 214 | 215 | for inp in *.inp 216 | do 217 | file=${inp%.inp} 218 | write "Working Directory; `hostname`:`pwd`" 219 | write "Preparing $file ... " 220 | 221 | # check for logfile ... 222 | if [ -f "$file.log" ] 223 | then 224 | write "LogFile $file.log exists! " 225 | if [ $(tail -n 10 $file.log | grep "terminated normally" | wc -l ) -eq 1 ] 226 | then 227 | write "Last run ended successfully. Skipping this run." 228 | continue 229 | else 230 | write "Will rerun, logfile is lacking 'terminated normally'." 231 | fi 232 | fi 233 | 234 | # check for gzipped log ... 235 | if [ -f "$file.log.gz" ] 236 | then 237 | write "LogFile $file.log.gz exists! " 238 | if [ $(zcat ${file}.log.gz | tail -n 10 | grep "terminated normally" | wc -l ) -eq 1 ] 239 | then 240 | write "Last run ended successfully. Skipping this run." 241 | continue 242 | else 243 | write "Will rerun, logfile is lacking 'terminated normally'." 244 | fi 245 | fi 246 | 247 | #RUN IT 248 | write "Running MD Simulation on $file.inp ..." 249 | 250 | string=$(grep "restart.*.rest.re" ${file}.inp) 251 | 252 | unset restraintname 253 | if [ ! -z $string ] 254 | then 255 | stringarray=($string) 256 | restraintname=${stringarray[-1]} 257 | restartname="${restraintname%.*}" 258 | if [ ! -f $restraintname ] 259 | then 260 | if [ -f ${restartname}.re ] 261 | then 262 | cp -v "${restartname}.re" "${restraintname}" 263 | else 264 | write "ERROR: Restartfile not found: ${file}.re" 265 | write "ERROR: Don't know what to do!" 266 | ERROR=1 267 | break 268 | fi 269 | fi 270 | fi 271 | 272 | if $EXE ${file}.inp > ${file}.log 273 | then 274 | write "Finished: ${file}.log" 275 | # check if NAN? 276 | isnan ${file}.log 277 | # TODO: on-the-fly mapping 278 | write "Zipping." 279 | gzip ${file}.log 280 | [ -f "${file}.en" ] && gzip ${file}.en 281 | [[ ! -z $restraintname ]] && [[ -f "$restraintname" ]] && /bin/rm "$restraintname" 282 | backup 283 | else 284 | ERROR=1 285 | fi 286 | 287 | if [ $ERROR -ne 0 ] 288 | then 289 | write "Critical: An error occured with ${file}.inp" 290 | write " Will now stop." 291 | break 292 | fi 293 | done 294 | 295 | backup 1 296 | 297 | echo "" 298 | echo "" 299 | echo "" 300 | echo "" 301 | if [ $ERROR -eq 1 ] 302 | then 303 | write "Abnormal Termination" 304 | elif [ $ERROR -eq 0 ] 305 | then 306 | write "All OK." 307 | fi 308 | 309 | write "End: $(date)" 310 | write "Duration: $SECONDS s" 311 | 312 | exit $ERROR 313 | -------------------------------------------------------------------------------- /cadee/tools/repair_simpack.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # This script is supposed to repair a run that went out of storage space (hard quoata limit). 4 | # It can also help, if an instance was killed while writing the simpack to disk. 5 | 6 | # Author: Beat Amrein, beat.amrein@gmail.com 7 | # This script is part of CADEE. 8 | 9 | # Version 0.8.1 10 | 11 | 12 | if [ -d /scratch ] 13 | then 14 | echo "Using /scratch for temporary files." 15 | TEMP=/scratch 16 | else 17 | echo "Using /tmp for temporary files." 18 | TEMP=/tmp 19 | fi 20 | 21 | 22 | function usage(){ 23 | echo "Usage: 24 | " 25 | 26 | if [ $(echo $0 | grep python | wc -l) -eq 1 ] 27 | then 28 | echo " cadee tool repair_simpack /path/to/tararchive.tar [ [ --force ] || [ --checkene ] ]" 29 | else 30 | echo " $0 /path/to/tararchive.tar [ [ --force ] || [ --checkene ] ]" 31 | fi 32 | echo " 33 | 34 | This script will ... 35 | 36 | 1. Search duplicate logfiles. 37 | 2. Search duplicate energy files. 38 | 3. Search missing restartfiles. 39 | 4. Search damaged logfiles. 40 | 5. Search for logfiles lacking 'terminated normally'. 41 | 6. Search for gzipped logfiles lacking 'terminated normally'. 42 | 43 | ( --checkene ) 44 | => 7. Search for energy files with invalid sizes and delete them including log,re and dcd. 45 | 46 | => 8. Repack the tarball. (If problems have been found). 47 | 48 | Options (exclusive): 49 | --force will continue even if the tar-utility has a non-0 exitcode, and always repack 50 | --checkene will unpack, check the energyfile - sizes and repack. 51 | " 52 | exit 1 53 | } 54 | 55 | 56 | set -e 57 | 58 | found=0 59 | 60 | if [ -z $1 ] 61 | then 62 | echo "need a simpack (.tar) to fix (got noting)!" 63 | usage 64 | fi 65 | 66 | if [ ! -f $1 ] 67 | then 68 | echo "need a simpack (.tar) to fix (got $1)!" 69 | usage 70 | fi 71 | 72 | TMPFOLDER="/$TEMP/$$" 73 | 74 | if [ -d $TMPFOLDER ] 75 | then 76 | echo "TEMPORARY FOLDER EXISTS. $TMPFOLDER" 77 | echo "STOP. (You may try to remove it)." 78 | exit 79 | fi 80 | 81 | simpack="$(cd "$(dirname "$1")"; pwd)/$(basename "$1")" 82 | 83 | mkdir -p $TMPFOLDER 84 | cd $TMPFOLDER 85 | 86 | function finish { 87 | /bin/rm -rf $TMPFOLDER 88 | } 89 | 90 | trap finish INT TERM 91 | 92 | pwd 93 | 94 | set +e 95 | tar xf $simpack 96 | tarexit=$? 97 | set -e 98 | 99 | 100 | if [ $tarexit -ne 0 ] 101 | then 102 | echo "tar exit code: $tarexit" 103 | if [ "$2" == '--force' ] 104 | then 105 | echo Specified --force flag. 106 | echo Will now repack the tar archive. 107 | found=1 108 | else 109 | echo There was an error unpacking. 110 | echo use --force as second argument to force unpacking 111 | exit 2 112 | fi 113 | fi 114 | 115 | 116 | 117 | echo "1. Searching duplicate logfiles:" 118 | for file in $(/bin/ls *.log || true) 119 | do 120 | if [ -f "${file}.gz" ] 121 | then 122 | let found+=1 123 | set +e 124 | zcmp $file "${file}.gz" 125 | if [ $? -eq 0 ] 126 | then 127 | echo 'log is equal to log.gz. delete log' 128 | /bin/rm $file 129 | else 130 | echo 'log is not equal to log.gz!' 131 | zcat $file.gz | tail | grep -q "terminated normally" 132 | if [ $? -eq 0 ] 133 | then 134 | echo "$file.gz seems correct" 135 | echo "==> keeping $file.gz" 136 | /bin/rm $file 137 | else 138 | echo "$file.gz seems bad. keeping $file." 139 | echo "keeping $file" 140 | /bin/rm $file.gz 141 | fi 142 | fi 143 | set -e 144 | fi 145 | done 146 | 147 | echo "2. Searching duplicate energy files:" 148 | for file in $(/bin/ls *.en || true) 149 | do 150 | if [ -f "${file}.gz" ] 151 | then 152 | let found+=1 153 | size=$(cat $file | wc -c) 154 | set +e 155 | gize=$(zcat ${file}.gz | wc -c) 156 | [ $? -ne 0 ] && gize=0 157 | echo "Duplicate energy files: $file/.gz" 158 | if [ $size -gt $gize ] 159 | then 160 | echo "Uncompressed is bigger than compressed one! Removing compressed one!" 161 | /bin/rm "${file}.gz" 162 | gzip "${file}" 163 | else 164 | echo "Removing uncompressed file." 165 | /bin/rm -v "${file}" 166 | fi 167 | fi 168 | set -e 169 | done 170 | 171 | 172 | echo "3. Searching missing restartfiles:" 173 | for file in $(/bin/ls *_dyn*.log.gz ||true) $(/bin/ls *_eq.log.gz||true) $(/bin/ls ????_fep.log.gz||true) 174 | do 175 | if [ ! -f "${file%.log.gz}.re" ] 176 | then 177 | echo "The logfile exists, but there is no restartfile. Removing outpuffiles of $file ..." 178 | /bin/rm -v $file 179 | /bin/rm -f -v "${file%.log.gz}.dcd" 180 | /bin/rm -f -v "${file%.log.gz}.en.gz" 181 | /bin/rm -f -v "${file%.log.gz}.log" 182 | let found+=1 183 | fi 184 | done 185 | 186 | echo "4. Searching damaged logfiles:" 187 | for file in $(/bin/ls *_dyn*.log.gz ||true) $(/bin/ls *_eq.log.gz||true) $(/bin/ls ????_fep.log.gz||true) 188 | do 189 | set +e 190 | zcat $file > /dev/null 191 | if [ "$?" -eq "1" ] 192 | then 193 | echo zcat error 194 | /bin/rm $file 195 | fn="${file%.*}" 196 | fn="${fn%.*}" 197 | /bin/rm $fn.re 198 | /bin/rm $fn.dcd 199 | /bin/rm $fn.en 200 | let found+=1 201 | fi 202 | done 203 | set -e 204 | 205 | echo "5. Search for logfiles lacking 'terminated normally':" 206 | for file in $(/bin/ls *_dyn*.log ||true) $(/bin/ls *_eq.log||true) $(/bin/ls ????_fep.log||true) 207 | do 208 | ok=1 209 | set +e 210 | (tail $file | grep -q "terminated normally") || ok=0 211 | set -e 212 | if [ $ok -eq 0 ] 213 | then 214 | echo 'Bad Logfile:' $file 'deleting ...' 215 | /bin/rm $file 216 | let found+=1 217 | fi 218 | done 219 | 220 | echo "6. Search for gzipped logfiles lacking 'terminated normally':" 221 | for file in $(/bin/ls *_dyn*.log.gz ||true) $(/bin/ls *_eq.log.gz||true) $(/bin/ls ????_fep.log.gz||true) 222 | do 223 | ok=1 224 | set +e 225 | (zcat $file | tail | grep -q "terminated normally") || ok=0 226 | set -e 227 | if [ $ok -eq 0 ] 228 | then 229 | echo 'Bad Logfile:' $file 'deleting ...' 230 | /bin/rm $file 231 | let found+=1 232 | fi 233 | done 234 | 235 | if [ ! -z $2 ] && [ "$2" == '--checkene' ] 236 | then 237 | echo "7. Search for energy files with invalid sizes:" 238 | echo " i) Unzipping energy files" 239 | gunzip -f *.en.gz || true 240 | echo " ii) collecting files sizes ..." 241 | /bin/ls -l *.en | awk '{ print $5}' | sort | uniq -c | sort -g 242 | 243 | numsizes=$(/bin/ls -l *.en | awk '{print $5}' | sort | uniq -c | sort -g | wc -l) 244 | 245 | if [ $numsizes -gt 1 ] 246 | then 247 | echo "Expected 1 files size, found $numsizes ." 248 | echo "There is a problem in this simpack ..." 249 | size1=$(/bin/ls -l *.en | awk '{print $5}' | sort | uniq -c | sort -g | awk '{ print $2}' | tail -n 1 ) 250 | 251 | echo " iii) Comparing to assigned sizes; Size=$size1." 252 | for fil in $(/bin/ls *.en) 253 | do 254 | size=$(cat $fil | wc -c) 255 | if [[ $size -ne $size1 ]] 256 | then 257 | let found+=1 258 | noext="${fil/.en/}" 259 | echo "Energy File Size for $fil is invalid ($size). Removing $noext/en/re/log/dcd ..." 260 | echo $noext 261 | /bin/rm -rfv "${noext}.dcd" 262 | /bin/rm -rfv "${noext}.log" 263 | /bin/rm -rfv "${noext}.log.gz" 264 | /bin/rm -rfv "${noext}.re" 265 | /bin/rm -rfv "$fil" 266 | fi 267 | done 268 | else 269 | echo "File sizes OK." 270 | fi 271 | echo " iv) ReZipping EnergyFiles ..." 272 | gzip *en 273 | fi 274 | 275 | echo -n "$simpack: " 276 | if [ $found -gt 0 ] 277 | then 278 | echo "Found $found issues! Overwriting simpack ..." 279 | if [ $found -gt 100 ] 280 | then 281 | echo "" 282 | echo "FOUND MANY PROBLEMS. SEE ABOVE." 283 | echo "" 284 | echo "THIS WILL OVERWRITE YOUR SIMPACK." 285 | echo 286 | echo "DO YOU HAVE A BACKUP?" 287 | echo 288 | select yn in "Yes" "No"; do 289 | case $yn in 290 | Yes ) 291 | break;; 292 | No ) 293 | echo "Stop."; 294 | exit;; 295 | esac 296 | done 297 | fi 298 | tar cf $simpack * 299 | echo "Repacking Done. Ready for resubmission." 300 | else 301 | echo "No Problems with this simpack. Awesome!" 302 | fi 303 | 304 | finish 305 | -------------------------------------------------------------------------------- /cadee/prep/genseqs.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | Generate Sequence from a pdbfile and to modify the squences. 5 | 6 | Author: {0} ({1}) 7 | 8 | This module is part of CADEE, the framework for 9 | Computer-Aided Directed Evolution of Enzymes. 10 | """ 11 | 12 | 13 | from __future__ import print_function 14 | 15 | import logging 16 | import os 17 | import sys 18 | import time 19 | 20 | import config 21 | 22 | __author__ = "Beat Amrein" 23 | __email__ = "beat.amrein@gmail.com" 24 | 25 | logger = logging.getLogger('prep.genseqs') 26 | 27 | # ERROR/EXIT CODES 28 | ERR_USAGE = 1 29 | ERR_OUTPUTFOLDER_EXISTS = 2 30 | ERR_TOPO_GENERATION_WT = 3 31 | ERR_QPREP5_INEXISTENT = 4 32 | ERR_MKTOP_INEXISTENT = 5 33 | ERR_NO_BABEL = 6 34 | 35 | # CONSTANTS 36 | NLC = '\n' 37 | 38 | 39 | def genseq2(wtseq, mutations, keepdupes=False): 40 | """ generate a sequences library based of wtseq 41 | @param: list of tupel, [ (resid, library), (resid, library), ...] 42 | 43 | @returns: list of sequences 44 | """ 45 | 46 | def estimator(mutations): 47 | est = 1 48 | for mut in mutations: 49 | lib = mut[1] 50 | est *= (len(lib)+1) 51 | return est 52 | 53 | logger.info('will mutate wtseq %s and create about %s mutations', 54 | wtseq, estimator(mutations)) 55 | 56 | seqo = list(wtseq) 57 | sequences = [seqo] 58 | while len(mutations) > 0: 59 | newseqs = sequences[:] 60 | res, lib = mutations.pop() 61 | for seqo in sequences: 62 | res = int(res) 63 | if res < 1: 64 | raise ValueError('Impossible: resid < 1!', res) 65 | pos = res - 1 66 | for aa in lib: 67 | if len(aa) != 1: 68 | raise ValueError('Impossible 1-letter aminoacid', 69 | aa, 'in lib', lib) 70 | seqn = seqo[:] 71 | seqn[pos] = aa 72 | if keepdupes or seqn not in newseqs: 73 | newseqs.append(seqn) 74 | sequences = newseqs 75 | 76 | return sequences 77 | 78 | 79 | def combine(lib, pos): 80 | """generate combinations of up to 7. 81 | @param lib: library 82 | @param pos: positions to mutate 83 | # TODO: implement in readable (recursively) 84 | """ 85 | numseqs = 1 86 | for each in lib: 87 | numseqs *= len(each) 88 | logger.info('Generating %s %s', numseqs, 'sequeces. Please wait.') 89 | seqlib = [] 90 | 91 | logger.info('Library %s, Positions %s', lib, pos) 92 | 93 | for every in lib[0]: 94 | if len(pos) > 1: 95 | for every2, in lib[1]: 96 | if len(pos) > 2: 97 | for every3, in lib[2]: 98 | if len(pos) > 3: 99 | for every4, in lib[3]: 100 | if len(pos) > 4: 101 | for every5, in lib[4]: 102 | if len(pos) > 5: 103 | for every6, in lib[5]: 104 | if len(pos) > 6: 105 | for every7 in lib[6]: 106 | seqlib.append([every, 107 | every2, 108 | every3, 109 | every4, 110 | every5, 111 | every6, 112 | every7]) 113 | else: 114 | seqlib.append([every, 115 | every2, 116 | every3, 117 | every4, 118 | every5, 119 | every6]) 120 | else: 121 | seqlib.append([every, 122 | every2, 123 | every3, 124 | every4, 125 | every5]) 126 | else: 127 | seqlib.append([every, every2, every3, 128 | every4, every4]) 129 | else: 130 | seqlib.append([every, every2, every3]) 131 | else: 132 | seqlib.append([every, every2]) 133 | else: 134 | seqlib.append([every]) 135 | 136 | return seqlib 137 | 138 | 139 | def gen_seqlib(sequence, pos, lib): 140 | """ 141 | Generates sequences, mutating at pos[x] to all as in lib[x] 142 | Generates sequences, mutating at pos[x] if len(lib)==1, 143 | the same lib will be used for all 144 | Return sequences 145 | """ 146 | # is lib a string? 147 | if isinstance(lib, str): 148 | lib = [lib] 149 | 150 | # when only 1 library is given, reuse it 151 | if len(lib) == 1: 152 | while range(1, len(pos)): 153 | lib.append(lib[0]) 154 | 155 | if len(pos) != len(lib): 156 | msg = 'Bad Input: Dimensions of pos and lib must be equal: ' 157 | msg += 'found: #pos: {0}, #lib {1}'.format(len(pos), len(lib)) 158 | raise (Exception, msg) 159 | 160 | seqlib = combine(lib, pos) 161 | 162 | # insert combinations into sequence 163 | sequences_1d = {} 164 | for i in range(0, len(seqlib)): 165 | nfa = list(sequence) 166 | for j, posj in pos: 167 | if nfa[posj].upper() != seqlib[i][j].upper(): 168 | nfa[posj] = seqlib[i][j] 169 | modseq = ''.join(nfa) 170 | sequences_1d[modseq] = 1 171 | 172 | return sequences_1d 173 | 174 | 175 | def get_fasta(wtpdb): 176 | """Return fasta code of wtpdb""" 177 | 178 | # preparations 179 | 180 | from pyscwrl import babel_pdb_for_scwrl 181 | 182 | babel_pdb_for_scwrl(wtpdb) 183 | 184 | # read fasta 185 | fasta = '' 186 | for line in open('proper.fasta'): 187 | line = line[:-1] 188 | if line[0] == '>': 189 | # fasta-comment, ignore line 190 | continue 191 | for char in line: 192 | fasta += char.lower() 193 | 194 | return fasta 195 | 196 | 197 | def get_sequences(wtpdb, resids, library): 198 | """Return list of sequences for resids, created with library""" 199 | print(wtpdb, resids) 200 | # Get the fasta sequence from pdbfile 201 | fasta = get_fasta(wtpdb) 202 | 203 | posids = [] 204 | # position - ids start from 0 (not 1), so we have to convert 205 | for resid in resids: 206 | posids.append(int(resid)-1) 207 | 208 | # generate sequences: 209 | sequences = gen_seqlib(fasta, posids, [library]) 210 | 211 | return sequences 212 | 213 | if __name__ == "__main__": 214 | # Parse Command Line 215 | LIB = config.SatLibs.ALL 216 | 217 | def usage(): 218 | """Print Usage and exit""" 219 | print('') 220 | print('Usage:') 221 | print(' ' + sys.argv[0] + ' qprep-wt.pdb res1 [ res2 ...] ]') 222 | print('') 223 | sys.exit(ERR_USAGE) 224 | 225 | def get_resnumbers(args): 226 | """Return residue-numbers as list-of-integers""" 227 | resids = [] 228 | for resid in args: 229 | try: 230 | resids.append(int(resid)) 231 | except ValueError: 232 | print('ValueError with ', resid, ' expected: Integer') 233 | usage() 234 | if len(resids) > 7: 235 | print('FATAL:') 236 | print('You ask me to mutate more than 7 residues at one time.') 237 | print('This is NOT IMPLEMENTED... ...probably a BAD IDEA :') 238 | print('This is a bad idea, because we grow with LIBRARY^{#RES}!') 239 | print('In your case ', len(LIB), '^', len(LIB), '=', 240 | len(LIB)**len(resids), '!') 241 | usage() 242 | return resids 243 | 244 | START = time.time() 245 | 246 | if len(sys.argv) < 3: 247 | usage() 248 | 249 | if len(get_resnumbers) > 7: 250 | usage() 251 | 252 | get_sequences(os.path.abspath(sys.argv[1]), 253 | get_resnumbers(sys.argv[2:]), LIB) 254 | 255 | print('time', round(time.time()-START, 2), 's') 256 | -------------------------------------------------------------------------------- /cadee/qscripts/q_dynplot.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # 4 | # Retrieve temperatures (solvent, solute) and energy contributions (vdw, el, ...) from Qdyn log file 5 | # 6 | 7 | 8 | logfile="$(pwd)/$1" 9 | dat_folder=dynplot 10 | if [[ -e "$dat_folder" ]]; then 11 | echo "Please remove the folder '$dat_folder' manually 12 | " 13 | exit 1 14 | fi 15 | mkdir $dat_folder 16 | cd $dat_folder 17 | 18 | # energy contributions 19 | # 20 | # Example output: 21 | # 22 | ## ======================= Energy summary at step 1500 ======================== 23 | ## el vdW bond angle torsion improper 24 | ## solute 0.00 0.00 13.31 9.56 4.26 0.70 25 | ## solvent -5623.35 806.80 0.00 0.00 0.00 0.00 26 | ## solute-solvent 0.00 0.00 27 | ## LRF -30.79 28 | ## Q-atom -224.52 -4.85 -73.72 5.66 0.00 0.00 29 | ## 30 | ## total fix slvnt_rad slvnt_pol shell solute 31 | ## restraints -218.28 0.00 -241.91 18.14 0.00 5.49 32 | ## 33 | ## total potential kinetic 34 | ## SUM -4261.11 -5335.21 1074.10 35 | # 36 | # Save all this data to: 37 | # E_solute.dat 38 | # E_solvent.dat 39 | # E_solute-solvent.dat 40 | # E_LRF.dat 41 | # EQ.dat 42 | # E_restraints.dat 43 | # E_total.dat 44 | 45 | grep "Energy summary at step" -A15 ${logfile} | awk '{ 46 | 47 | if ($2 == "Energy") step=$6; 48 | else if ($1 == "solute") print step, $2, $3, $4, $5, $6, $7 >> "E_solute.dat"; 49 | else if ($1 == "solvent") print step, $2, $3, $4, $5, $6, $7 >> "E_solvent.dat"; 50 | else if ($1 == "solute-solvent") print step, $2,$3 >> "E_solute-solvent.dat"; 51 | else if ($1 == "LRF") print step, $2 >> "E_LRF.dat"; 52 | else if ($1 == "Q-atom") print step, $2, $3, $4, $5, $6, $7 >> "EQ.dat"; 53 | else if ($1 == "restraints") print step, $2, $3, $4, $5, $6, $7 >> "E_restraints.dat"; 54 | else if ($1 == "SUM") print step,$2,$3,$4 >> "E_total.dat"; 55 | }' 56 | 57 | # Q Energy contributions in each state 58 | # 59 | # 60 | ## ======================= Q-atom energies at step 500 ======================== 61 | ## type st lambda el vdW bond angle torsion improper 62 | ## Q-Q 1 1.0000 -22.88 3.10 63 | ## Q-Q 2 0.0000 -70.69 26.38 64 | ## 65 | ## Q-wat 1 1.0000 -235.81 -3.69 66 | ## Q-wat 2 0.0000 -84.07 -18.18 67 | ## 68 | ## Q-surr. 1 1.0000 -235.81 -3.69 69 | ## Q-surr. 2 0.0000 -84.07 -18.18 70 | ## 71 | ## Q-any 1 1.0000 -258.70 -0.59 -93.59 1.65 0.00 0.00 72 | ## Q-any 2 0.0000 -154.76 8.20 -3.42 45.02 0.00 0.00 73 | ## 74 | ## type st lambda total restraint 75 | ## Q-SUM 1 1.0000 -347.10 4.12 76 | ## Q-SUM 2 0.0000 -104.50 0.45 77 | # 78 | # Save all this data to: 79 | # EQ_qq_1.dat 80 | # EQ_qq_2.dat 81 | # EQ_qwat_1.dat 82 | # EQ_qwat_2.dat 83 | # EQ_qsurr_1.dat 84 | # EQ_qsurr_2.dat 85 | # EQ_bonded_1.dat 86 | # EQ_bonded_2.dat 87 | # EQ_total_1.dat 88 | # EQ_total_2.dat 89 | 90 | grep "Q-atom energies at step" -A17 ${logfile} | awk '{ 91 | 92 | if ($2 == "Q-atom") step=$6; 93 | else if ($1 == "Q-Q" && $2 == "1") print step, $3, $4, $5 >> "EQ_qq_1.dat"; 94 | else if ($1 == "Q-Q" && $2 == "2") print step, $3, $4, $5 >> "EQ_qq_2.dat"; 95 | else if ($1 == "Q-wat" && $2 == "1") print step, $3, $4, $5 >> "EQ_qwat_1.dat"; 96 | else if ($1 == "Q-wat" && $2 == "2") print step, $3, $4, $5 >> "EQ_qwat_2.dat"; 97 | else if ($1 == "Q-surr." && $2 == "1") print step, $3, $4, $5 >> "EQ_qsurr_1.dat"; 98 | else if ($1 == "Q-surr." && $2 == "2") print step, $3, $4, $5 >> "EQ_qsurr_2.dat"; 99 | else if ($1 == "Q-any" && $2 == "1") print step, $3, $6, $7, $8, $9 >> "EQ_bonded_1.dat"; 100 | else if ($1 == "Q-any" && $2 == "2") print step, $3, $6, $7, $8, $9 >> "EQ_bonded_2.dat"; 101 | else if ($1 == "Q-SUM" && $2 == "1") print step, $3, $4, $5 >> "EQ_total_1.dat"; 102 | else if ($1 == "Q-SUM" && $2 == "2") print step, $3, $4, $5 >> "EQ_total_2.dat"; 103 | }' 104 | 105 | 106 | 107 | 108 | 109 | # MAKE GNUPLOT INPUTS 110 | 111 | echo ' 112 | set term wxt persist 113 | set key out 114 | 115 | set title "Solute energy contributions" 116 | plot "E_solute.dat" u :2 w lp ps 0.5 lw 0.5 title "El",\ 117 | "E_solute.dat" u :3 w lp ps 0.5 lw 0.5 title "vdW",\ 118 | "E_solute.dat" u :4 w lp ps 0.5 lw 0.5 title "Bond",\ 119 | "E_solute.dat" u :5 w lp ps 0.5 lw 0.5 title "Angle",\ 120 | "E_solute.dat" u :6 w lp ps 0.5 lw 0.5 title "Torsion",\ 121 | "E_solute.dat" u :7 w lp ps 0.5 lw 0.5 title "Improper" 122 | ' > E_solute.plot 123 | 124 | echo ' 125 | set term wxt persist 126 | set key out 127 | 128 | set title "Solvent energy contributions" 129 | plot "E_solvent.dat" u :2 w lp ps 0.5 lw 0.5 title "El",\ 130 | "E_solvent.dat" u :3 w lp ps 0.5 lw 0.5 title "vdW",\ 131 | "E_solvent.dat" u :4 w lp ps 0.5 lw 0.5 title "Bond",\ 132 | "E_solvent.dat" u :5 w lp ps 0.5 lw 0.5 title "Angle",\ 133 | "E_solvent.dat" u :6 w lp ps 0.5 lw 0.5 title "Torsion",\ 134 | "E_solvent.dat" u :7 w lp ps 0.5 lw 0.5 title "Improper" 135 | ' > E_solvent.plot 136 | 137 | echo ' 138 | set term wxt persist 139 | set key out 140 | 141 | set title "Solute-solvent energy contributions" 142 | plot "E_solute-solvent.dat" u :2 w lp ps 0.5 lw 0.5 title "El",\ 143 | "E_solute-solvent.dat" u :3 w lp ps 0.5 lw 0.5 title "vdW" 144 | 145 | ' > E_solute-solvent.plot 146 | 147 | echo ' 148 | set term wxt persist 149 | set key out 150 | 151 | set title "LRF energy contribution" 152 | plot "E_LRF.dat" u :2 w lp ps 0.5 lw 0.5 title "LRF El" 153 | 154 | ' > E_LRF.plot 155 | 156 | echo ' 157 | set term wxt persist 158 | set key out 159 | 160 | set title "Q energy contributions (both states)" 161 | plot "EQ.dat" u :2 w lp ps 0.5 lw 0.5 title "El",\ 162 | "EQ.dat" u :3 w lp ps 0.5 lw 0.5 title "vdW",\ 163 | "EQ.dat" u :4 w lp ps 0.5 lw 0.5 title "Bond",\ 164 | "EQ.dat" u :5 w lp ps 0.5 lw 0.5 title "Angle",\ 165 | "EQ.dat" u :6 w lp ps 0.5 lw 0.5 title "Torsion",\ 166 | "EQ.dat" u :7 w lp ps 0.5 lw 0.5 title "Improper" 167 | ' > EQ.plot 168 | 169 | 170 | 171 | echo ' 172 | set term wxt persist 173 | set key out 174 | 175 | set title "Restraints energy contributions" 176 | plot "E_restraints.dat" u :2 w lp ps 0.5 lw 0.5 title "Total",\ 177 | "E_restraints.dat" u :3 w lp ps 0.5 lw 0.5 title "Fix",\ 178 | "E_restraints.dat" u :4 w lp ps 0.5 lw 0.5 title "Solvent_rad",\ 179 | "E_restraints.dat" u :5 w lp ps 0.5 lw 0.5 title "Solvent_pol",\ 180 | "E_restraints.dat" u :6 w lp ps 0.5 lw 0.5 title "Shell",\ 181 | "E_restraints.dat" u :7 w lp ps 0.5 lw 0.5 title "Solute" 182 | ' > E_restraints.plot 183 | 184 | echo ' 185 | set term wxt persist 186 | set key out 187 | 188 | set title "SUM of energies" 189 | plot "E_total.dat" u :2 w lp ps 0.5 lw 0.5 title "Total",\ 190 | "E_total.dat" u :3 w lp ps 0.5 lw 0.5 title "Potential",\ 191 | "E_total.dat" u :4 w lp ps 0.5 lw 0.5 title "Kinetic" 192 | ' > E_total.plot 193 | 194 | echo ' 195 | set term wxt persist 196 | set key out 197 | 198 | set title "Q energies: Nonbonding interactions" 199 | plot "EQ_qq_1.dat" u :3 w lp ps 0.5 lw 0.5 title "(1) El_qq",\ 200 | "EQ_qq_2.dat" u :3 w lp ps 0.5 lw 0.5 title "(2) El_qq",\ 201 | "EQ_qq_1.dat" u :4 w lp ps 0.5 lw 0.5 title "(1) vdW_qq",\ 202 | "EQ_qq_2.dat" u :4 w lp ps 0.5 lw 0.5 title "(2) vdW_qq",\ 203 | "EQ_qwat_1.dat" u :3 w lp ps 0.5 lw 0.5 title "(1) El_qwat",\ 204 | "EQ_qwat_2.dat" u :3 w lp ps 0.5 lw 0.5 title "(2) El_qwat",\ 205 | "EQ_qwat_1.dat" u :4 w lp ps 0.5 lw 0.5 title "(1) vdW_qwat",\ 206 | "EQ_qwat_2.dat" u :4 w lp ps 0.5 lw 0.5 title "(2) vdW_qwat",\ 207 | "EQ_qsurr_1.dat" u :3 w lp ps 0.5 lw 0.5 title "(1) El_qsurr",\ 208 | "EQ_qsurr_2.dat" u :3 w lp ps 0.5 lw 0.5 title "(2) El_qsurr",\ 209 | "EQ_qsurr_1.dat" u :4 w lp ps 0.5 lw 0.5 title "(1) vdW_qsurr",\ 210 | "EQ_qsurr_2.dat" u :4 w lp ps 0.5 lw 0.5 title "(2) vdW_qsurr" 211 | ' > EQ_nonbond.plot 212 | 213 | echo ' 214 | set term wxt persist 215 | set key out 216 | 217 | set title "Q energies: bonding" 218 | plot "EQ_bonded_1.dat" u :3 w lp ps 0.5 lw 0.5 title "(1) Bond",\ 219 | "EQ_bonded_1.dat" u :4 w lp ps 0.5 lw 0.5 title "(1) Angle",\ 220 | "EQ_bonded_1.dat" u :5 w lp ps 0.5 lw 0.5 title "(1) Torsion",\ 221 | "EQ_bonded_1.dat" u :6 w lp ps 0.5 lw 0.5 title "(1) Improper",\ 222 | "EQ_bonded_2.dat" u :3 w lp ps 0.5 lw 0.5 title "(2) Bond",\ 223 | "EQ_bonded_2.dat" u :4 w lp ps 0.5 lw 0.5 title "(2) Angle",\ 224 | "EQ_bonded_2.dat" u :5 w lp ps 0.5 lw 0.5 title "(2) Torsion",\ 225 | "EQ_bonded_2.dat" u :6 w lp ps 0.5 lw 0.5 title "(2) Improper" 226 | 227 | ' > EQ_bonding.plot 228 | 229 | echo ' 230 | set term wxt persist 231 | set key out 232 | 233 | set title "Q energies: Total" 234 | plot "EQ_total_1.dat" u :3 w lp ps 0.5 lw 0.5 title "State (1)",\ 235 | "EQ_total_2.dat" u :3 w lp ps 0.5 lw 0.5 title "State (2)" 236 | 237 | ' > EQ_total.plot 238 | 239 | 240 | gnuplot E_solute.plot 241 | gnuplot E_solvent.plot 242 | gnuplot E_solute-solvent.plot 243 | gnuplot E_LRF.plot 244 | gnuplot EQ.plot 245 | gnuplot E_restraints.plot 246 | gnuplot E_total.plot 247 | gnuplot EQ_nonbond.plot 248 | gnuplot EQ_bonding.plot 249 | gnuplot EQ_total.plot 250 | -------------------------------------------------------------------------------- /cadee/qscripts/lib/OrderedDict.py: -------------------------------------------------------------------------------- 1 | # Downloaded from: 2 | # http://code.activestate.com/recipes/576693/ 3 | # 4 | # Backport of OrderedDict() class that runs on Python 2.4, 2.5, 2.6, 2.7 and pypy. 5 | # Passes Python2.7's test suite and incorporates all the latest updates. 6 | 7 | try: 8 | from thread import get_ident as _get_ident 9 | except ImportError: 10 | from dummy_thread import get_ident as _get_ident 11 | 12 | try: 13 | from _abcoll import KeysView, ValuesView, ItemsView 14 | except ImportError: 15 | pass 16 | 17 | 18 | class OrderedDict(dict): 19 | 'Dictionary that remembers insertion order' 20 | # An inherited dict maps keys to values. 21 | # The inherited dict provides __getitem__, __len__, __contains__, and get. 22 | # The remaining methods are order-aware. 23 | # Big-O running times for all methods are the same as for regular dictionaries. 24 | 25 | # The internal self.__map dictionary maps keys to links in a doubly linked list. 26 | # The circular doubly linked list starts and ends with a sentinel element. 27 | # The sentinel element never gets deleted (this simplifies the algorithm). 28 | # Each link is stored as a list of length three: [PREV, NEXT, KEY]. 29 | 30 | def __init__(self, *args, **kwds): 31 | '''Initialize an ordered dictionary. Signature is the same as for 32 | regular dictionaries, but keyword arguments are not recommended 33 | because their insertion order is arbitrary. 34 | 35 | ''' 36 | if len(args) > 1: 37 | raise TypeError('expected at most 1 arguments, got %d' % len(args)) 38 | try: 39 | self.__root 40 | except AttributeError: 41 | self.__root = root = [] # sentinel node 42 | root[:] = [root, root, None] 43 | self.__map = {} 44 | self.__update(*args, **kwds) 45 | 46 | def __setitem__(self, key, value, dict_setitem=dict.__setitem__): 47 | 'od.__setitem__(i, y) <==> od[i]=y' 48 | # Setting a new item creates a new link which goes at the end of the linked 49 | # list, and the inherited dictionary is updated with the new key/value pair. 50 | if key not in self: 51 | root = self.__root 52 | last = root[0] 53 | last[1] = root[0] = self.__map[key] = [last, root, key] 54 | dict_setitem(self, key, value) 55 | 56 | def __delitem__(self, key, dict_delitem=dict.__delitem__): 57 | 'od.__delitem__(y) <==> del od[y]' 58 | # Deleting an existing item uses self.__map to find the link which is 59 | # then removed by updating the links in the predecessor and successor nodes. 60 | dict_delitem(self, key) 61 | link_prev, link_next, key = self.__map.pop(key) 62 | link_prev[1] = link_next 63 | link_next[0] = link_prev 64 | 65 | def __iter__(self): 66 | 'od.__iter__() <==> iter(od)' 67 | root = self.__root 68 | curr = root[1] 69 | while curr is not root: 70 | yield curr[2] 71 | curr = curr[1] 72 | 73 | def __reversed__(self): 74 | 'od.__reversed__() <==> reversed(od)' 75 | root = self.__root 76 | curr = root[0] 77 | while curr is not root: 78 | yield curr[2] 79 | curr = curr[0] 80 | 81 | def clear(self): 82 | 'od.clear() -> None. Remove all items from od.' 83 | try: 84 | for node in self.__map.itervalues(): 85 | del node[:] 86 | root = self.__root 87 | root[:] = [root, root, None] 88 | self.__map.clear() 89 | except AttributeError: 90 | pass 91 | dict.clear(self) 92 | 93 | def popitem(self, last=True): 94 | '''od.popitem() -> (k, v), return and remove a (key, value) pair. 95 | Pairs are returned in LIFO order if last is true or FIFO order if false. 96 | 97 | ''' 98 | if not self: 99 | raise KeyError('dictionary is empty') 100 | root = self.__root 101 | if last: 102 | link = root[0] 103 | link_prev = link[0] 104 | link_prev[1] = root 105 | root[0] = link_prev 106 | else: 107 | link = root[1] 108 | link_next = link[1] 109 | root[1] = link_next 110 | link_next[0] = root 111 | key = link[2] 112 | del self.__map[key] 113 | value = dict.pop(self, key) 114 | return key, value 115 | 116 | # -- the following methods do not depend on the internal structure -- 117 | 118 | def keys(self): 119 | 'od.keys() -> list of keys in od' 120 | return list(self) 121 | 122 | def values(self): 123 | 'od.values() -> list of values in od' 124 | return [self[key] for key in self] 125 | 126 | def items(self): 127 | 'od.items() -> list of (key, value) pairs in od' 128 | return [(key, self[key]) for key in self] 129 | 130 | def iterkeys(self): 131 | 'od.iterkeys() -> an iterator over the keys in od' 132 | return iter(self) 133 | 134 | def itervalues(self): 135 | 'od.itervalues -> an iterator over the values in od' 136 | for k in self: 137 | yield self[k] 138 | 139 | def iteritems(self): 140 | 'od.iteritems -> an iterator over the (key, value) items in od' 141 | for k in self: 142 | yield (k, self[k]) 143 | 144 | def update(*args, **kwds): 145 | '''od.update(E, **F) -> None. Update od from dict/iterable E and F. 146 | 147 | If E is a dict instance, does: for k in E: od[k] = E[k] 148 | If E has a .keys() method, does: for k in E.keys(): od[k] = E[k] 149 | Or if E is an iterable of items, does: for k, v in E: od[k] = v 150 | In either case, this is followed by: for k, v in F.items(): od[k] = v 151 | 152 | ''' 153 | if len(args) > 2: 154 | raise TypeError('update() takes at most 2 positional ' 155 | 'arguments (%d given)' % (len(args),)) 156 | elif not args: 157 | raise TypeError('update() takes at least 1 argument (0 given)') 158 | self = args[0] 159 | # Make progressively weaker assumptions about "other" 160 | other = () 161 | if len(args) == 2: 162 | other = args[1] 163 | if isinstance(other, dict): 164 | for key in other: 165 | self[key] = other[key] 166 | elif hasattr(other, 'keys'): 167 | for key in other.keys(): 168 | self[key] = other[key] 169 | else: 170 | for key, value in other: 171 | self[key] = value 172 | for key, value in kwds.items(): 173 | self[key] = value 174 | 175 | __update = update # let subclasses override update without breaking __init__ 176 | 177 | __marker = object() 178 | 179 | def pop(self, key, default=__marker): 180 | '''od.pop(k[,d]) -> v, remove specified key and return the corresponding value. 181 | If key is not found, d is returned if given, otherwise KeyError is raised. 182 | 183 | ''' 184 | if key in self: 185 | result = self[key] 186 | del self[key] 187 | return result 188 | if default is self.__marker: 189 | raise KeyError(key) 190 | return default 191 | 192 | def setdefault(self, key, default=None): 193 | 'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od' 194 | if key in self: 195 | return self[key] 196 | self[key] = default 197 | return default 198 | 199 | def __repr__(self, _repr_running={}): 200 | 'od.__repr__() <==> repr(od)' 201 | call_key = id(self), _get_ident() 202 | if call_key in _repr_running: 203 | return '...' 204 | _repr_running[call_key] = 1 205 | try: 206 | if not self: 207 | return '%s()' % (self.__class__.__name__,) 208 | return '%s(%r)' % (self.__class__.__name__, self.items()) 209 | finally: 210 | del _repr_running[call_key] 211 | 212 | def __reduce__(self): 213 | 'Return state information for pickling' 214 | items = [[k, self[k]] for k in self] 215 | inst_dict = vars(self).copy() 216 | for k in vars(OrderedDict()): 217 | inst_dict.pop(k, None) 218 | if inst_dict: 219 | return (self.__class__, (items,), inst_dict) 220 | return self.__class__, (items,) 221 | 222 | def copy(self): 223 | 'od.copy() -> a shallow copy of od' 224 | return self.__class__(self) 225 | 226 | @classmethod 227 | def fromkeys(cls, iterable, value=None): 228 | '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S 229 | and values equal to v (which defaults to None). 230 | 231 | ''' 232 | d = cls() 233 | for key in iterable: 234 | d[key] = value 235 | return d 236 | 237 | def __eq__(self, other): 238 | '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive 239 | while comparison to a regular mapping is order-insensitive. 240 | 241 | ''' 242 | if isinstance(other, OrderedDict): 243 | return len(self)==len(other) and self.items() == other.items() 244 | return dict.__eq__(self, other) 245 | 246 | def __ne__(self, other): 247 | return not self == other 248 | 249 | # -- the following methods are only used in Python 2.7 -- 250 | 251 | def viewkeys(self): 252 | "od.viewkeys() -> a set-like object providing a view on od's keys" 253 | return KeysView(self) 254 | 255 | def viewvalues(self): 256 | "od.viewvalues() -> an object providing a view on od's values" 257 | return ValuesView(self) 258 | 259 | def viewitems(self): 260 | "od.viewitems() -> a set-like object providing a view on od's items" 261 | return ItemsView(self) 262 | -------------------------------------------------------------------------------- /cadee/qscripts/q_dyntemps.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | # -*- coding: utf-8 -*- 3 | # 4 | # MIT License 5 | # 6 | # Copyright (c) 2016 Miha Purg 7 | # 8 | # Permission is hereby granted, free of charge, to any person obtaining a copy 9 | # of this software and associated documentation files (the "Software"), to deal 10 | # in the Software without restriction, including without limitation the rights 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | # copies of the Software, and to permit persons to whom the Software is 13 | # furnished to do so, subject to the following conditions: 14 | # 15 | # The above copyright notice and this permission notice shall be included in all 16 | # copies or substantial portions of the Software. 17 | # 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | # SOFTWARE. 25 | # 26 | # 27 | # 28 | # 29 | # Get temperatures from Qdyn5 logfile and print out the mean, stdev and median (for all points and for last 90%) 30 | # 31 | 32 | from lib.common import np 33 | import sys 34 | 35 | 36 | total=[] 37 | free=[] 38 | solute=[] 39 | solvent=[] 40 | 41 | with open( sys.argv[1], 'r' ) as f: 42 | lines = f.readlines() 43 | for line in lines: 44 | ## Temperature at step 1: T_tot= 319.1 T_free= 319.1 45 | ## T_free_solute= 377.8 T_free_solvent= 317.7 46 | 47 | if "Temperature at" in line: 48 | line = line.replace("step", " ") # fix for large step numbers 49 | line = line.replace(":"," ") 50 | a = line.split() 51 | total.append( float( a[4] ) ) 52 | free.append(float( a[6] ) ) 53 | 54 | if "T_free_solute=" in line: 55 | a = line.split() 56 | solute.append( float( a[1] ) ) 57 | solvent.append( float (a[3] ) ) 58 | 59 | 60 | # remove 10 percent 61 | n = len(total) 62 | st = n/10 63 | 64 | tn10 = total[st:] 65 | fn10 = free[st:] 66 | sln10 = solute[st:] 67 | stn10 = solvent[st:] 68 | 69 | if len(solute): 70 | total_mean = np.mean(total) 71 | free_mean = np.mean(free) 72 | solute_mean = np.mean(solute) 73 | solvent_mean = np.mean(solvent) 74 | tn10_mean = np.mean(tn10) 75 | fn10_mean = np.mean(fn10) 76 | sln10_mean = np.mean(sln10) 77 | stn10_mean = np.mean(stn10) 78 | print "%-20s %10s %10s %10s %10s | %-20s %10s %10s %10s %10s" % ("All %d frames" % n, 79 | "Total", 80 | "Free", 81 | "Solute", 82 | "Solvent", 83 | "First 10% removed", 84 | "Total", 85 | "Free", 86 | "Solute", 87 | "Solvent") 88 | print "-" * 150 89 | print "%-20s %10.3f %10.3f %10.3f %10.3f | %-20s %10.3f %10.3f %10.3f %10.3f" % ("mean", 90 | total_mean, 91 | free_mean, 92 | solute_mean, 93 | solvent_mean, 94 | "mean", 95 | tn10_mean, 96 | fn10_mean, 97 | sln10_mean, 98 | stn10_mean) 99 | 100 | print "%-20s %10.3f %10.3f %10.3f %10.3f | %-20s %10.3f %10.3f %10.3f %10.3f" % ("median", 101 | np.median(total), 102 | np.median(free), 103 | np.median(solute), 104 | np.median(solvent), 105 | "median", 106 | np.median(tn10), 107 | np.median(fn10), 108 | np.median(sln10), 109 | np.median(stn10) ) 110 | 111 | print "%-20s %10.3f %10.3f %10.3f %10.3f | %-20s %10.3f %10.3f %10.3f %10.3f" % ("stdev", 112 | np.std(total), 113 | np.std(free), 114 | np.std(solute), 115 | np.std(solvent), 116 | "stdev", 117 | np.std(tn10), 118 | np.std(fn10), 119 | np.std(sln10), 120 | np.std(stn10) ) 121 | 122 | print "%-20s %10.3f %10.3f %10.3f %10.3f | %-20s %10.3f %10.3f %10.3f %10.3f" % ("max_abs_dev", 123 | max( map( lambda x: abs(x-total_mean), total) ), 124 | max( map( lambda x: abs(x-free_mean), free) ), 125 | max( map( lambda x: abs(x-solute_mean), solute) ), 126 | max( map( lambda x: abs(x-solvent_mean), solvent) ), 127 | "max_abs_dev", 128 | max( map( lambda x: abs(x-tn10_mean), tn10) ), 129 | max( map( lambda x: abs(x-fn10_mean), fn10) ), 130 | max( map( lambda x: abs(x-sln10_mean), sln10) ), 131 | max( map( lambda x: abs(x-stn10_mean), stn10) ) ) 132 | elif len(total): # gas_phase, no solvent 133 | print "%-20s %10s %10s | %-20s %10s %10s" % ("All %d frames" % n, 134 | "Total", 135 | "Free", 136 | "First 10% removed", 137 | "Total", 138 | "Free") 139 | print "-" * 150 140 | print "%-20s %10.3f %10.3f | %-20s %10.3f %10.3f" % ("mean", 141 | np.mean(total), 142 | np.mean(free), 143 | "mean", 144 | np.mean(tn10), 145 | np.mean(fn10) ) 146 | 147 | print "%-20s %10.3f %10.3f | %-20s %10.3f %10.3f" % ("median", 148 | np.median(total), 149 | np.median(free), 150 | "median", 151 | np.median(tn10), 152 | np.median(fn10) ) 153 | 154 | print "%-20s %10.3f %10.3f | %-20s %10.3f %10.3f" % ("stdev", 155 | np.std(total), 156 | np.std(free), 157 | "stdev", 158 | np.std(tn10), 159 | np.std(fn10) ) 160 | 161 | print "%-20s %10.3f %10.3f | %-20s %10.3f %10.3f" % ("max_abs_dev", 162 | max( map( lambda x: abs(x-total_mean), total) ), 163 | max( map( lambda x: abs(x-free_mean), free) ), 164 | "max_abs_dev", 165 | max( map( lambda x: abs(x-tn10_mean), tn10) ), 166 | max( map( lambda x: abs(x-fn10_mean), fn10) ) ) 167 | else: 168 | print "No data!" 169 | --------------------------------------------------------------------------------