├── DNAWORKS.inp ├── LICENSE ├── Makefile ├── README.md ├── control_func.f90 ├── dnaworks.f90 ├── dnaworks_data.f90 ├── dnaworks_test.f90 ├── email_func.f90 ├── encoding.f90 ├── input.f90 ├── misc_func.f90 ├── mutate.f90 ├── output.f90 ├── overlaps.f90 ├── scores.f90 ├── str_func.f90 └── time_func.f90 /DNAWORKS.inp: -------------------------------------------------------------------------------- 1 | # DNAWORKS.inp sample 2 | # David Hoover, 2010-11-09 3 | # 4 | # Directives must be flat against the left margin 5 | # 6 | # Comments demarcated by '#'. All text following # is ignored. 7 | # 8 | # $I = integer 9 | # $R = real, floating point number 10 | # $S = string (must be in double quotes) 11 | # [ ] = optional 12 | # | = exclusive conditional 13 | # 14 | # title $S 15 | # title "" # default 16 | # TITLE "mutant1" 17 | # TITLE "test2" 18 | # TITLE "mutant2" 19 | 20 | # timelimit 0 # seconds until giving up, 0 means wait forever 21 | 22 | # email $S 23 | # EMAIl "webtools@helix.nih.gov" 24 | 25 | # melting low $I [ high $I ] [ tolerance $I ] 26 | # melting low 62 # default 27 | melting low 75 # default 28 | 29 | # length low $I [ high $I ] [ random ] 30 | # length low 40 # default 31 | length low 180 # default 32 | 33 | # frequency [ threshold $I ] [ random ] [ strict ] [ scored ] 34 | # frequency threshold 10 # default 35 | 36 | # concentration [ oligo $R ] [ sodium $R ] [ magnesium $R ] 37 | # concentration oligo 1E-7 sodium 0.05 magnesium 0.002 # default 38 | 39 | # solutions $I 40 | # solutions 1 # default 41 | 42 | # repeat $I 43 | # repeat 8 # default 44 | 45 | # misprime $I [ tip $I ] [ max $I ] 46 | # misprime 18 tip 6 max 8 # default 47 | 48 | # weight [ twt #R ] [ cwt #R ] [ rwt #R ] [ mwt #R ] [ gwt #R ] [ awt #R ] [ lwt #R ] [ pwt #R ] [ fwt #R ] 49 | # weight twt 1.0 cwt 1.0 rwt 1.0 mwt 1.0 gwt 1.0 awt 1.0 lwt 1.0 pwt 1.0 fwt 1.0 # default 50 | 51 | # tbio 52 | # nogaps 53 | # logfile $S 54 | # logfile "LOGFILE.txt" # default 55 | # LOGFILE "MyOutput.txt" 56 | # LOGFILE "mutant1.out" 57 | 58 | # previous $I [ $S ] 59 | # previous 1 "LOGFILE.txt" # default 60 | # 61 | # Mutant run: 62 | # PREVious 1 63 | # PREVious 1 "mutant1.out" 64 | # 65 | 66 | # pattern 67 | # AflII CTTAAG 68 | # BamHI GGATCC 69 | # // 70 | 71 | codon ecoli2 72 | 73 | # protein #reverse #gapfix 74 | protein 75 | pattdkslkd iliqgtknlp ileiasnnqp qnvdsvcsgt lqktedvhlm 76 | gftlsgqkva dspleaskrw afrtgvppkn veytegeeak tcynisvtdp 77 | // 78 | 79 | # nucleotide #reverse #gapfix 80 | # gagctcggat ccactactcg acccacgcgt ccgcccacgc gtccggccag gacctctgtg 81 | # aaccggtcgg ggcgggggcc gcctggccgg gagtctgctc ggcggtgggt ggccgaggaa 82 | # gggagagaac gatcgcggag cagggcgccc gaactccggg cgccgcgcca tgcgccgggc 83 | # cagccgagac tacggcaagt acctgcgcag ctcggaggag atgggcagcg gccccggcgt 84 | # cccacacgag ggtccgctgc accccgcgcc ttctgcaccg gctccggcgc cgccacccgc 85 | # cgcctcccgc tccatgttcc tggccctcct ggggctggga ctgggccagg tggtctgcag 86 | # catcgctctg ttcctgtact ttcgagcgca gatggatcct aacagaatat cagaagacag 87 | # cactcactgc ttttatagaa tcctgagact ccatgaaaac gcaggtttgc aggactcgac 88 | # tctggagagt gaagacacac tacctgactc ctgcaggagg atgaaacaag cctttcaggg 89 | # ggccgtgcag aaggaactgc aacacattgt ggggccacag cgcttctcag gagctccagc 90 | # tatgatggaa ggctcatggt tggatgtggc ccagcgaggc aagcctgagg cccagccatt 91 | # tgcacacctc accatcaatg ctgccagcat cccatcgggt tcccataaag tcactctgtc 92 | # ctcttggtac cacgatcgag gctgggccaa gatctctaac atgacgttaa gcaacggaaa 93 | # actaagggtt aaccaagatg gcttctatta cctgtacgcc aacatttgct ttcggcatca 94 | # tgaaacatcg ggaagcgtac ctacagacta tcttcagctg atggtgtatg tcgttaaaac 95 | # cagcatcaaa atcccaagtt ctcataacct gatgaaagga gggagcacga aaaactggtc 96 | # gggcaattct gaattccact tttattccat aaatgttggg ggatttttca agctccgagc 97 | # tggtgaagaa attagcattc aggtgtccaa cccttccctg ctggatccgg atcaagatgc 98 | # gacgtacttt ggggctttca aagttcagga catagactga gactcatttc gtggaacatt 99 | # // 100 | # 101 | # 102 | # 103 | #------------------------------------------------------------------------------- 104 | # OTHER EXAMPLES: 105 | #------------------------------------------------------------------------------- 106 | 107 | # Nucleotide examples: 108 | # 109 | # NUCLeotide 110 | # CCATG 111 | # // 112 | # 113 | # NUCLeotide 114 | # GGGTTC 115 | # // 116 | # 117 | # NUCLeotide 118 | # 1 CCATGGCGGCTGGTCAGGCGTTCCGTAAATTCCTGCCGCTGTTCGACCGTGTTCTCGTGG 119 | # 61 AACGCTCTGAAGTTGAAACC 120 | # // 121 | # 122 | # NUCLeotide 123 | # RRW 124 | # // 125 | # 126 | # NUCLeotide REVERSE 127 | # TCTGCGGGTGGTATCGTGCTGACCGGTTCTGCGGCTG 128 | # 121 CGAAAGTGCTGCAGGCGACCGTTGTTGCGGTTGGTTCTGGTTCTAAAGGTAAAGGTGGT 129 | # // 130 | # 131 | # NUCLeotide GAPFIX 132 | # NNN 133 | # // 134 | # 135 | # NUCLeotide 136 | # ATCCAGCCGGTTTCTGTTAAGGTTGGTGACAAAGTTCTGCTGCCGGAATACGGCGGTA 137 | # 241 CCAAAGTTGTTCTGGACGACAAAGACTACTTCCTGTTCCGTGACGGTGACATCCTGGGTA 138 | # 301 AGTACGTTGACTAAGGGTTC 139 | # // 140 | # 141 | # Protein examples: 142 | # 143 | # PROTein 144 | # AAGQAFRKFLPLFDRVLVERSEVET 145 | # // 146 | # 147 | # PROTein GAPFIX 148 | # K 149 | # // 150 | # 151 | # PROTein 152 | # SAGGIVLTGSAAAKVLQATVVAVGSGSKGKGG 153 | # // 154 | # 155 | # PROTein GAPFIX 156 | # E 157 | # // 158 | # 159 | # PROTein 160 | # IQPVSVKVGDKVLLPEYGGTKVVLDDKDYFLFRDGDILGKYVDX 161 | # // 162 | # 163 | # Pattern examples: 164 | # 165 | # PATTern 166 | # EcoRI GAATTC 167 | # PstI CtgcaG 168 | # BamHI GGATCC 169 | # KpnI GGTACC 170 | # NdeI CATATG 171 | # PvuII CAGCTG 172 | # SwaI ATTTAAAT 173 | # FseI GGCCGGCC 174 | # NotI GCGGCCGC 175 | # NcoI CCATGG 176 | # silly RWGGTcGRY 177 | # // 178 | # 179 | # Codon Frequency Tables: 180 | # 181 | # CODOn S. cerevesiae 182 | # 183 | # CODOn E. coli 184 | # 185 | # CODOn ecoli2 186 | # 187 | # CODOn 188 | #Gly GGG 40359.00 11.39 0.16 189 | #Gly GGA 34894.00 9.85 0.13 190 | #Gly GGT 89915.00 25.37 0.35 191 | #Gly GGC 94608.00 26.70 0.36 192 | #Glu GAG 66665.00 18.81 0.33 193 | #Glu GAA 137748.00 38.87 0.67 194 | #Asp GAT 116164.00 32.78 0.63 195 | #Asp GAC 67865.00 19.15 0.37 196 | #Val GTG 85263.00 24.06 0.34 197 | #Val GTA 41283.00 11.65 0.17 198 | #Val GTT 70627.00 19.93 0.29 199 | #Val GTC 50417.00 14.23 0.20 200 | #Ala GCG 104293.00 29.43 0.32 201 | #Ala GCA 75329.00 21.26 0.23 202 | #Ala GCT 60787.00 17.15 0.19 203 | #Ala GCC 85138.00 24.03 0.26 204 | #Arg AGG 7966.00 2.25 0.04 205 | #Arg AGA 13784.00 3.89 0.07 206 | #Ser AGT 35966.00 10.15 0.16 207 | #Ser AGC 53286.00 15.04 0.24 208 | #Lys AAG 45133.00 12.74 0.26 209 | #Lys AAA 125351.00 35.37 0.74 210 | #Asn AAT 75086.00 21.19 0.50 211 | #Asn AAC 75334.00 21.26 0.50 212 | #Met ATG 92952.00 26.23 1.00 213 | #Ile ATA 25982.00 7.33 0.12 214 | #Ile ATT 105218.00 29.69 0.49 215 | #Ile ATC 83118.00 23.46 0.39 216 | #Thr ACG 48560.00 13.70 0.25 217 | #Thr ACA 34483.00 9.73 0.17 218 | #Thr ACT 37430.00 10.56 0.19 219 | #Thr ACC 77023.00 21.74 0.39 220 | #Trp TGG 48949.00 13.81 1.00 221 | #End TGA 3616.00 1.02 0.31 222 | #Cys TGT 18601.00 5.25 0.46 223 | #Cys TGC 21434.00 6.05 0.54 224 | #End TAG 978.00 0.28 0.08 225 | #End TAA 7024.00 1.98 0.60 226 | #Tyr TAT 62750.00 17.71 0.59 227 | #Tyr TAC 43034.00 12.14 0.41 228 | #Leu TTG 45581.00 12.86 0.13 229 | #Leu TTA 51320.00 14.48 0.14 230 | #Phe TTT 78743.00 22.22 0.58 231 | #Phe TTC 56591.00 15.97 0.42 232 | #Ser TCG 29993.00 8.46 0.13 233 | #Ser TCA 32814.00 9.26 0.15 234 | #Ser TCT 37586.00 10.61 0.17 235 | #Ser TCC 32586.00 9.20 0.15 236 | #Arg CGG 21391.00 6.04 0.11 237 | #Arg CGA 13645.00 3.85 0.07 238 | #Arg CGT 70009.00 19.76 0.36 239 | #Arg CGC 68569.00 19.35 0.35 240 | #Gln CAG 100346.00 28.32 0.66 241 | #Gln CAA 51275.00 14.47 0.34 242 | #His CAT 44633.00 12.60 0.58 243 | #His CAC 32678.00 9.22 0.42 244 | #Leu CTG 168885.00 47.66 0.47 245 | #Leu CTA 15275.00 4.31 0.04 246 | #Leu CTT 42704.00 12.05 0.12 247 | #Leu CTC 35873.00 10.12 0.10 248 | #Pro CCG 72450.00 20.44 0.49 249 | #Pro CCA 30515.00 8.61 0.21 250 | #Pro CCT 26805.00 7.56 0.18 251 | #Pro CCC 19008.00 5.36 0.13 252 | #// 253 | 254 | 255 | #------------------------------------------------------------------------------- 256 | # MORE EXAMPLES TO TRY: 257 | #------------------------------------------------------------------------------- 258 | 259 | #NUCLeotide 260 | # 1 gagctcggat ccactactcg acccacgcgt ccgcccacgc gtccggccag gacctctgtg 261 | # 61 aaccggtcgg ggcgggggcc gcctggccgg gagtctgctc ggcggtgggt ggccgaggaa 262 | # 121 gggagagaac gatcgcggag cagggcgccc gaactccggg cgccgcgcca tgcgccgggc 263 | # 181 cagccgagac tacggcaagt acctgcgcag ctcggaggag atgggcagcg gccccggcgt 264 | # 241 cccacacgag ggtccgctgc accccgcgcc ttctgcaccg gctccggcgc cgccacccgc 265 | # 301 cgcctcccgc tccatgttcc tggccctcct ggggctggga ctgggccagg tggtctgcag 266 | # 361 catcgctctg ttcctgtact ttcgagcgca gatggatcct aacagaatat cagaagacag 267 | # 421 cactcactgc ttttatagaa tcctgagact ccatgaaaac gcaggtttgc aggactcgac 268 | # 481 tctggagagt gaagacacac tacctgactc ctgcaggagg atgaaacaag cctttcaggg 269 | # 541 ggccgtgcag aaggaactgc aacacattgt ggggccacag cgcttctcag gagctccagc 270 | # 601 tatgatggaa ggctcatggt tggatgtggc ccagcgaggc aagcctgagg cccagccatt 271 | # 661 tgcacacctc accatcaatg ctgccagcat cccatcgggt tcccataaag tcactctgtc 272 | # 721 ctcttggtac cacgatcgag gctgggccaa gatctctaac atgacgttaa gcaacggaaa 273 | # 781 actaagggtt aaccaagatg gcttctatta cctgtacgcc aacatttgct ttcggcatca 274 | # 841 tgaaacatcg ggaagcgtac ctacagacta tcttcagctg atggtgtatg tcgttaaaac 275 | # 901 cagcatcaaa atcccaagtt ctcataacct gatgaaagga gggagcacga aaaactggtc 276 | # 961 gggcaattct gaattccact tttattccat aaatgttggg ggatttttca agctccgagc 277 | # 1021 tggtgaagaa attagcattc aggtgtccaa cccttccctg ctggatccgg atcaagatgc 278 | # 1081 gacgtacttt ggggctttca aagttcagga catagactga gactcatttc gtggaacatt 279 | #// 280 | 281 | 282 | 283 | #NUCLeotide REVERSE 284 | # 361 tttcccagtc acgacgttgt aaaacgacgg ccagtgccaa gcttgcatgc ctgcaggtcg 285 | # 421 actctagagg atccccgggt accgagctcg aattcgtaat catggtcata gctgtttcct 286 | #// 287 | 288 | #PROTein gapfix 289 | #RRRRR 290 | #// 291 | 292 | #NUCLeotide 293 | #GTAGCGACTAGCAT 294 | #// 295 | 296 | #NUCLeotide 297 | # 361 tttcccagtc acgacgttgt aaaacgacgg ccagtgccaa gcttgcatgc ctgcaggtcg 298 | # 421 actctagagg atccccgggt accgagctcg aattcgtaat catggtcata gctgtttcct 299 | #// 300 | 301 | #PROTein REVERSE (hBD2) 302 | # GIGDPVTCLDCGAISHPVFCPDRYKQIGTCGLPGTKCCKKPXX 303 | #// 304 | 305 | #NUCLeotide gapfix 306 | #TGATGATTATTA 307 | #// 308 | 309 | #PROTein 310 | # KVFGDCELAAAMKRHGLDNYRGYSLGNWVCAAKFESNFNTQATNRNTDGSTDYGILQINS 311 | # RWWCNDGRTPGSRNLCNIPCSALLSSDITASVNCAKKIVSDGNGMNAWVAWRNRCKGTDV 312 | # XX 313 | #// 314 | 315 | #NUCLeotide gapfix 316 | #TAGAAAACGC 317 | #// 318 | 319 | 320 | #PROTein (GFP) 321 | # 1 mskgeelftg vvpilveldg dvnghkfsvs gegegdatyg kltlkfictt gklpvpwptl 322 | # 61 vttfsygvqc fsrypdhmkq hdffksampe gyvqertiff kddgnyktra evkfegdtlv 323 | #121 nrielkgidf kedgnilghk leynynshnv yimadkqkng ikvnfkirhn iedgsvqlad 324 | #281 hyqqntpigd gpvllpdnhy lstqsalskd pnekrdhmvl lefvtaagit hgmdelyk 325 | #// 326 | 327 | #NUCLeotide (GFP) 328 | # 1 GGGGGGGGGGGTGAAGAACTGTTCACCGGCGTTGTTCCGATCCTGGTTGAACTGGATGGT 329 | # 61 GACGTGAATGGTCACAAATTCTCTGTTTCTGGTGAGGGTGAAGGCGACGCGACCTACGGC 330 | # 121 AAACTCACCCTGAAATTCATCTGCACCACCGGTAAACTGCCGGTTCCGTGGCCGACCCTG 331 | # 181 GTTACCACCTTCTCTTACGGTGTTCAGTGTTTCTCTCGTTATCCGGACCACATGAAACAG 332 | # 241 CACGATTTTTTCAAATCTGCGATGCCGGAAGGTTACGTTCAGGAACGTACCATCTTCTTC 333 | # 301 AAGGACGACGGCAACTATAAAACCCGTGCGGAAGTTAAATTCGAAGGTGACACCCTCGTG 334 | # 361 AACCGTATCGAACTGAAAGGTATCGACTTCAAAGAAGACGGTAATATCCTGGGCCACAAA 335 | # 421 CTCGAATACAACTACAACTCCCACAACGTTTACATTATGGCGGACAAGCAAAAGAACGGT 336 | # 481 ATCAAAGTGAACTTCAAGATCCGCCACAACATCGAGGACGGTTCTGTTCAGCTCGCGGAT 337 | # 541 CACTACCAACAGAATACCCCAATCGGCGACGGTCCGGTTCTCCTGCCGGACAACCACTAT 338 | # 601 CTGTCTACCCAGTCTGCGCTGTCTAAGGACCCGAACGAAAAACGCGATCATATGGTGCTG 339 | # 661 CTGGAATTCGTTACCGCGGCTGGTATTACTCACGGTATGGACGAACTGTACAAA 340 | #// 341 | 342 | #PROTein (Ovalbumin) 343 | # 1 gsigaasmef cfdvfkelkv hhanenifyc piaimsalam vylgakdstr tqinkvvrfd 344 | # 61 klpgfgdxie aqcgtsvnvh sslrdilnqi tkpndvysfs lasrlyaeer ypilpeylqc 345 | #121 vkelyrggle pinfqtaadq arelinswve sqtngiirnv lqpxsvdsqt amvlvnaivf 346 | #181 kglwekafkd edtqampfrv teqeskpvqm myqiglfrva smasekmkil elpfaxgtms 347 | #241 mlvllpdevs gleqlesiin fekltewtss nvmeerkikv ylprmkmeek ynltsvlmam 348 | #301 gitdvfsssa nlsgissaex lkisqavhaa haeineagre vvgxaeagvd aasvseefra 349 | #361 dhpflfcikh iatnavlffg rcvsp 350 | #// 351 | 352 | #PROTein (Human Asparaginase) REVERSE 353 | # 1 MAHHHHHHAR AVGPERRLLA VYTGGTIGMR SELGVLVPGT GLAAILRTLP MFHDEEHARA 354 | # 61 RGLSEDTLVL PPDSRNQRIL YTVLECQPLF DSSDMTIAEW VRVAQTIKRH YEQYHGFVVI 355 | #121 HGTDTMAFAA SMLSFMLENL QKTVILTGAQ VPIHALWSDG RENLLGALLM AGQYVIPEVC 356 | #181 LFFQNQLFRG NRATKVDARR FAAFCSPNLL PLATVGADIT INRELVRKVD GKAGLVVHSS 357 | #241 MEQDVGLLRL YPGIPAALVR AFLQPPLKGV VMETFGSGNG PTKPDLLQEL RVATERGLVI 358 | #301 VNCTHCLQGA VTTDYAAGMA MAGAGVISGF DMTSEAALAK LSYVLGQPGL SLDVRKELLT 359 | #361 KDLRGEMTPP SVEERRPSLQ GNTLGGGVSW LLSLSGSQEA DALRNALVPS LACAAAHAGD 360 | #421 VEALQALVEL GSDLGLVDFN GQTPLHAAAR GGHTEAVTML LQRGVDVNTR DTDGFSPLLL 361 | #481 AVRGRHPGVI GLLREAGASL STQELEEAGT ELCRLAYRAD LEGLQVWWQA GADLGQPGYD 362 | #541 GHSALHVAEA AGNLAVVAFL QSLEGAVGAQ APCPEVLPGV X 363 | #// 364 | 365 | #PROTein (lysine ketoglutarate reductase/saccharopine dehydrogenase) 366 | # 1 MLQVHRTGLG RLGVSLSKGL HHKAVLAVRR EDVNAWERRA PLAPKHIKGI TNLGYKVLIQ 367 | # 61 PSNRRAIHDK DYVKAGGILQ EDISEACLIL GVKRPPEEKL MSRKTYAFFS HTIKAQEANM 368 | #121 GLLDEILKQE IRLIDYEKMV DHRGVRVVAF GQWAGVAGMI NILHGMGLRL LALGHHTPFM 369 | #181 HIGMAHNYRN SSQAVQAVRD AGYEISLGLM PKSIGPLTFV FTGTGNVSKG AQAIFNELPC 370 | #241 EYVEPHELKE VSQTGDLRKV YGTVLSRHHH LVRKTDAVYD PAEYDKHPER YISRFNTDIA 371 | #301 PYTTCLINGI YWEQNTPRLL TRQDAQSLLA PGKFSPAGVE GCPALPHKLV AICDISADTG 372 | #361 GSIEFMTECT TIEHPFCMYD ADQHIIHDSV EGSGILMCSI DNLPAQLPIE ATECFGDMLY 373 | #421 PYVEEMILSD ATQPLESQNF SPVVRDAVIT SNGTLPDKYK YIQTLRESRE RAQSLSMGTR 374 | #481 RKVLVLGSGY ISEPVLEYLS RDGNIEITVG SDMKNQIEQL GKKYNINPVS MDICKQEEKL 375 | #541 GFLVAKQDLV ISLLPYVLHP LVAKACITNK VNMVTASYIT PALKELEKSV EDAGITIIGE 376 | #601 LGLDPGLDHM LAMETIDKAK EVGATIESYI SYCGGLPAPE HSNNPLRYKF SWSPVGVLMN 377 | #661 VMQSATYLLD GKVVNVAGGI SFLDAVTSMD FFPGLNLEGY PNRDSTKYAE IYGISSAHTL 378 | #721 LRGTLRYKGY MKALNGFVKL GLINREALPA FRPEANPLTW KQLLCDLVGI SPSSEHDVLK 379 | #781 EAVLKKLGGD NTQLEAAEWL GLLGDEQVPQ AESILDALSK HLVMKLSYGP EEKDMIVMRD 380 | #841 SFGIRHPSGH LEHKTIDLVA YGDINGFSAM AKTVGLPTAM AAKMLLDGEI GAKGLMGPFS 381 | #901 KEIYGPILER IKAEGIIYTT QSTIKPX 382 | #// 383 | 384 | #NUCLeotide 385 | #GGGG 386 | #// 387 | 388 | #NUCLeotide (pUC18) 389 | # 1 tcgcgcgttt cggtgatgac ggtgaaaacc tctgacacat gcagctcccg gagacggtca 390 | # 61 cagcttgtct gtaagcggat gccgggagca gacaagcccg tcagggcgcg tcagcgggtg 391 | # 121 ttggcgggtg tcggggctgg cttaactatg cggcatcaga gcagattgta ctgagagtgc 392 | # 181 accatatgcg gtgtgaaata ccgcacagat gcgtaaggag aaaataccgc atcaggcgcc 393 | # 241 attcgccatt caggctgcgc aactgttggg aagggcgatc ggtgcgggcc tcttcgctat 394 | # 301 tacgccagct ggcgaaaggg ggatgtgctg caaggcgatt aagttgggta acgccagggt 395 | # 361 tttcccagtc acgacgttgt aaaacgacgg ccagtgccaa gcttgcatgc ctgcaggtcg 396 | # 421 actctagagg atccccgggt accgagctcg aattcgtaat catggtcata gctgtttcct 397 | # 481 gtgtgaaatt gttatccgct cacaattcca cacaacatac gagccggaag cataaagtgt 398 | # 541 aaagcctggg gtgcctaatg agtgagctaa ctcacattaa ttgcgttgcg ctcactgccc 399 | # 601 gctttccagt cgggaaacct gtcgtgccag ctgcattaat gaatcggcca acgcgcgggg 400 | # 661 agaggcggtt tgcgtattgg gcgctcttcc gcttcctcgc tcactgactc gctgcgctcg 401 | # 721 gtcgttcggc tgcggcgagc ggtatcagct cactcaaagg cggtaatacg gttatccaca 402 | # 781 gaatcagggg ataacgcagg aaagaacatg tgagcaaaag gccagcaaaa ggccaggaac 403 | # 841 cgtaaaaagg ccgcgttgct ggcgtttttc cataggctcc gcccccctga cgagcatcac 404 | # 901 aaaaatcgac gctcaagtca gaggtggcga aacccgacag gactataaag ataccaggcg 405 | # 961 tttccccctg gaagctccct cgtgcgctct cctgttccga ccctgccgct taccggatac 406 | # 1021 ctgtccgcct ttctcccttc gggaagcgtg gcgctttctc atagctcacg ctgtaggtat 407 | # 1081 ctcagttcgg tgtaggtcgt tcgctccaag ctgggctgtg tgcacgaacc ccccgttcag 408 | # 1141 cccgaccgct gcgccttatc cggtaactat cgtcttgagt ccaacccggt aagacacgac 409 | # 1201 ttatcgccac tggcagcagc cactggtaac aggattagca gagcgaggta tgtaggcggt 410 | # 1261 gctacagagt tcttgaagtg gtggcctaac tacggctaca ctagaaggac agtatttggt 411 | # 1321 atctgcgctc tgctgaagcc agttaccttc ggaaaaagag ttggtagctc ttgatccggc 412 | # 1381 aaacaaacca ccgctggtag cggtggtttt tttgtttgca agcagcagat tacgcgcaga 413 | # 1441 aaaaaaggat ctcaagaaga tcctttgatc ttttctacgg ggtctgacgc tcagtggaac 414 | # 1501 gaaaactcac gttaagggat tttggtcatg agattatcaa aaaggatctt cacctagatc 415 | # 1561 cttttaaatt aaaaatgaag ttttaaatca atctaaagta tatatgagta aacttggtct 416 | # 1621 gacagttacc aatgcttaat cagtgaggca cctatctcag cgatctgtct atttcgttca 417 | # 1681 tccatagttg cctgactccc cgtcgtgtag ataactacga tacgggaggg cttaccatct 418 | # 1741 ggccccagtg ctgcaatgat accgcgagac ccacgctcac cggctccaga tttatcagca 419 | # 1801 ataaaccagc cagccggaag ggccgagcgc agaagtggtc ctgcaacttt atccgcctcc 420 | # 1861 atccagtcta ttaattgttg ccgggaagct agagtaagta gttcgccagt taatagtttg 421 | # 1921 cgcaacgttg ttgccattgc tacaggcatc gtggtgtcac gctcgtcgtt tggtatggct 422 | # 1981 tcattcagct ccggttccca acgatcaagg cgagttacat gatcccccat gttgtgcaaa 423 | # 2041 aaagcggtta gctccttcgg tcctccgatc gttgtcagaa gtaagttggc cgcagtgtta 424 | # 2101 tcactcatgg ttatggcagc actgcataat tctcttactg tcatgccatc cgtaagatgc 425 | # 2161 ttttctgtga ctggtgagta ctcaaccaag tcattctgag aatagtgtat gcggcgaccg 426 | # 2221 agttgctctt gcccggcgtc aatacgggat aataccgcgc cacatagcag aactttaaaa 427 | # 2281 gtgctcatca ttggaaaacg ttcttcgggg cgaaaactct caaggatctt accgctgttg 428 | # 2341 agatccagtt cgatgtaacc cactcgtgca cccaactgat cttcagcatc ttttactttc 429 | # 2401 accagcgttt ctgggtgagc aaaaacagga aggcaaaatg ccgcaaaaaa gggaataagg 430 | # 2461 gcgacacgga aatgttgaat actcatactc ttcctttttc aatattattg aagcatttat 431 | # 2521 cagggttatt gtctcatgag cggatacata tttgaatgta tttagaaaaa taaacaaata 432 | # 2581 ggggttccgc gcacatttcc ccgaaaagtg ccacctgacg tctaagaaac cattattatc 433 | # 2641 atgacattaa cctataaaaa taggcgtatc acgaggccct ttcgtc 434 | #// 435 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 2, June 1991 3 | 4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc., 5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | Preamble 10 | 11 | The licenses for most software are designed to take away your 12 | freedom to share and change it. By contrast, the GNU General Public 13 | License is intended to guarantee your freedom to share and change free 14 | software--to make sure the software is free for all its users. This 15 | General Public License applies to most of the Free Software 16 | Foundation's software and to any other program whose authors commit to 17 | using it. (Some other Free Software Foundation software is covered by 18 | the GNU Lesser General Public License instead.) You can apply it to 19 | your programs, too. 20 | 21 | When we speak of free software, we are referring to freedom, not 22 | price. Our General Public Licenses are designed to make sure that you 23 | have the freedom to distribute copies of free software (and charge for 24 | this service if you wish), that you receive source code or can get it 25 | if you want it, that you can change the software or use pieces of it 26 | in new free programs; and that you know you can do these things. 27 | 28 | To protect your rights, we need to make restrictions that forbid 29 | anyone to deny you these rights or to ask you to surrender the rights. 30 | These restrictions translate to certain responsibilities for you if you 31 | distribute copies of the software, or if you modify it. 32 | 33 | For example, if you distribute copies of such a program, whether 34 | gratis or for a fee, you must give the recipients all the rights that 35 | you have. You must make sure that they, too, receive or can get the 36 | source code. And you must show them these terms so they know their 37 | rights. 38 | 39 | We protect your rights with two steps: (1) copyright the software, and 40 | (2) offer you this license which gives you legal permission to copy, 41 | distribute and/or modify the software. 42 | 43 | Also, for each author's protection and ours, we want to make certain 44 | that everyone understands that there is no warranty for this free 45 | software. If the software is modified by someone else and passed on, we 46 | want its recipients to know that what they have is not the original, so 47 | that any problems introduced by others will not reflect on the original 48 | authors' reputations. 49 | 50 | Finally, any free program is threatened constantly by software 51 | patents. We wish to avoid the danger that redistributors of a free 52 | program will individually obtain patent licenses, in effect making the 53 | program proprietary. To prevent this, we have made it clear that any 54 | patent must be licensed for everyone's free use or not licensed at all. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | GNU GENERAL PUBLIC LICENSE 60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 61 | 62 | 0. This License applies to any program or other work which contains 63 | a notice placed by the copyright holder saying it may be distributed 64 | under the terms of this General Public License. The "Program", below, 65 | refers to any such program or work, and a "work based on the Program" 66 | means either the Program or any derivative work under copyright law: 67 | that is to say, a work containing the Program or a portion of it, 68 | either verbatim or with modifications and/or translated into another 69 | language. (Hereinafter, translation is included without limitation in 70 | the term "modification".) Each licensee is addressed as "you". 71 | 72 | Activities other than copying, distribution and modification are not 73 | covered by this License; they are outside its scope. The act of 74 | running the Program is not restricted, and the output from the Program 75 | is covered only if its contents constitute a work based on the 76 | Program (independent of having been made by running the Program). 77 | Whether that is true depends on what the Program does. 78 | 79 | 1. You may copy and distribute verbatim copies of the Program's 80 | source code as you receive it, in any medium, provided that you 81 | conspicuously and appropriately publish on each copy an appropriate 82 | copyright notice and disclaimer of warranty; keep intact all the 83 | notices that refer to this License and to the absence of any warranty; 84 | and give any other recipients of the Program a copy of this License 85 | along with the Program. 86 | 87 | You may charge a fee for the physical act of transferring a copy, and 88 | you may at your option offer warranty protection in exchange for a fee. 89 | 90 | 2. You may modify your copy or copies of the Program or any portion 91 | of it, thus forming a work based on the Program, and copy and 92 | distribute such modifications or work under the terms of Section 1 93 | above, provided that you also meet all of these conditions: 94 | 95 | a) You must cause the modified files to carry prominent notices 96 | stating that you changed the files and the date of any change. 97 | 98 | b) You must cause any work that you distribute or publish, that in 99 | whole or in part contains or is derived from the Program or any 100 | part thereof, to be licensed as a whole at no charge to all third 101 | parties under the terms of this License. 102 | 103 | c) If the modified program normally reads commands interactively 104 | when run, you must cause it, when started running for such 105 | interactive use in the most ordinary way, to print or display an 106 | announcement including an appropriate copyright notice and a 107 | notice that there is no warranty (or else, saying that you provide 108 | a warranty) and that users may redistribute the program under 109 | these conditions, and telling the user how to view a copy of this 110 | License. (Exception: if the Program itself is interactive but 111 | does not normally print such an announcement, your work based on 112 | the Program is not required to print an announcement.) 113 | 114 | These requirements apply to the modified work as a whole. If 115 | identifiable sections of that work are not derived from the Program, 116 | and can be reasonably considered independent and separate works in 117 | themselves, then this License, and its terms, do not apply to those 118 | sections when you distribute them as separate works. But when you 119 | distribute the same sections as part of a whole which is a work based 120 | on the Program, the distribution of the whole must be on the terms of 121 | this License, whose permissions for other licensees extend to the 122 | entire whole, and thus to each and every part regardless of who wrote it. 123 | 124 | Thus, it is not the intent of this section to claim rights or contest 125 | your rights to work written entirely by you; rather, the intent is to 126 | exercise the right to control the distribution of derivative or 127 | collective works based on the Program. 128 | 129 | In addition, mere aggregation of another work not based on the Program 130 | with the Program (or with a work based on the Program) on a volume of 131 | a storage or distribution medium does not bring the other work under 132 | the scope of this License. 133 | 134 | 3. You may copy and distribute the Program (or a work based on it, 135 | under Section 2) in object code or executable form under the terms of 136 | Sections 1 and 2 above provided that you also do one of the following: 137 | 138 | a) Accompany it with the complete corresponding machine-readable 139 | source code, which must be distributed under the terms of Sections 140 | 1 and 2 above on a medium customarily used for software interchange; or, 141 | 142 | b) Accompany it with a written offer, valid for at least three 143 | years, to give any third party, for a charge no more than your 144 | cost of physically performing source distribution, a complete 145 | machine-readable copy of the corresponding source code, to be 146 | distributed under the terms of Sections 1 and 2 above on a medium 147 | customarily used for software interchange; or, 148 | 149 | c) Accompany it with the information you received as to the offer 150 | to distribute corresponding source code. (This alternative is 151 | allowed only for noncommercial distribution and only if you 152 | received the program in object code or executable form with such 153 | an offer, in accord with Subsection b above.) 154 | 155 | The source code for a work means the preferred form of the work for 156 | making modifications to it. For an executable work, complete source 157 | code means all the source code for all modules it contains, plus any 158 | associated interface definition files, plus the scripts used to 159 | control compilation and installation of the executable. However, as a 160 | special exception, the source code distributed need not include 161 | anything that is normally distributed (in either source or binary 162 | form) with the major components (compiler, kernel, and so on) of the 163 | operating system on which the executable runs, unless that component 164 | itself accompanies the executable. 165 | 166 | If distribution of executable or object code is made by offering 167 | access to copy from a designated place, then offering equivalent 168 | access to copy the source code from the same place counts as 169 | distribution of the source code, even though third parties are not 170 | compelled to copy the source along with the object code. 171 | 172 | 4. You may not copy, modify, sublicense, or distribute the Program 173 | except as expressly provided under this License. Any attempt 174 | otherwise to copy, modify, sublicense or distribute the Program is 175 | void, and will automatically terminate your rights under this License. 176 | However, parties who have received copies, or rights, from you under 177 | this License will not have their licenses terminated so long as such 178 | parties remain in full compliance. 179 | 180 | 5. You are not required to accept this License, since you have not 181 | signed it. However, nothing else grants you permission to modify or 182 | distribute the Program or its derivative works. These actions are 183 | prohibited by law if you do not accept this License. Therefore, by 184 | modifying or distributing the Program (or any work based on the 185 | Program), you indicate your acceptance of this License to do so, and 186 | all its terms and conditions for copying, distributing or modifying 187 | the Program or works based on it. 188 | 189 | 6. Each time you redistribute the Program (or any work based on the 190 | Program), the recipient automatically receives a license from the 191 | original licensor to copy, distribute or modify the Program subject to 192 | these terms and conditions. You may not impose any further 193 | restrictions on the recipients' exercise of the rights granted herein. 194 | You are not responsible for enforcing compliance by third parties to 195 | this License. 196 | 197 | 7. If, as a consequence of a court judgment or allegation of patent 198 | infringement or for any other reason (not limited to patent issues), 199 | conditions are imposed on you (whether by court order, agreement or 200 | otherwise) that contradict the conditions of this License, they do not 201 | excuse you from the conditions of this License. If you cannot 202 | distribute so as to satisfy simultaneously your obligations under this 203 | License and any other pertinent obligations, then as a consequence you 204 | may not distribute the Program at all. For example, if a patent 205 | license would not permit royalty-free redistribution of the Program by 206 | all those who receive copies directly or indirectly through you, then 207 | the only way you could satisfy both it and this License would be to 208 | refrain entirely from distribution of the Program. 209 | 210 | If any portion of this section is held invalid or unenforceable under 211 | any particular circumstance, the balance of the section is intended to 212 | apply and the section as a whole is intended to apply in other 213 | circumstances. 214 | 215 | It is not the purpose of this section to induce you to infringe any 216 | patents or other property right claims or to contest validity of any 217 | such claims; this section has the sole purpose of protecting the 218 | integrity of the free software distribution system, which is 219 | implemented by public license practices. Many people have made 220 | generous contributions to the wide range of software distributed 221 | through that system in reliance on consistent application of that 222 | system; it is up to the author/donor to decide if he or she is willing 223 | to distribute software through any other system and a licensee cannot 224 | impose that choice. 225 | 226 | This section is intended to make thoroughly clear what is believed to 227 | be a consequence of the rest of this License. 228 | 229 | 8. If the distribution and/or use of the Program is restricted in 230 | certain countries either by patents or by copyrighted interfaces, the 231 | original copyright holder who places the Program under this License 232 | may add an explicit geographical distribution limitation excluding 233 | those countries, so that distribution is permitted only in or among 234 | countries not thus excluded. In such case, this License incorporates 235 | the limitation as if written in the body of this License. 236 | 237 | 9. The Free Software Foundation may publish revised and/or new versions 238 | of the General Public License from time to time. Such new versions will 239 | be similar in spirit to the present version, but may differ in detail to 240 | address new problems or concerns. 241 | 242 | Each version is given a distinguishing version number. If the Program 243 | specifies a version number of this License which applies to it and "any 244 | later version", you have the option of following the terms and conditions 245 | either of that version or of any later version published by the Free 246 | Software Foundation. If the Program does not specify a version number of 247 | this License, you may choose any version ever published by the Free Software 248 | Foundation. 249 | 250 | 10. If you wish to incorporate parts of the Program into other free 251 | programs whose distribution conditions are different, write to the author 252 | to ask for permission. For software which is copyrighted by the Free 253 | Software Foundation, write to the Free Software Foundation; we sometimes 254 | make exceptions for this. Our decision will be guided by the two goals 255 | of preserving the free status of all derivatives of our free software and 256 | of promoting the sharing and reuse of software generally. 257 | 258 | NO WARRANTY 259 | 260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY 261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN 262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED 264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS 266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE 267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, 268 | REPAIR OR CORRECTION. 269 | 270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR 272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, 273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING 274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED 275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY 276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER 277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE 278 | POSSIBILITY OF SUCH DAMAGES. 279 | 280 | END OF TERMS AND CONDITIONS 281 | 282 | How to Apply These Terms to Your New Programs 283 | 284 | If you develop a new program, and you want it to be of the greatest 285 | possible use to the public, the best way to achieve this is to make it 286 | free software which everyone can redistribute and change under these terms. 287 | 288 | To do so, attach the following notices to the program. It is safest 289 | to attach them to the start of each source file to most effectively 290 | convey the exclusion of warranty; and each file should have at least 291 | the "copyright" line and a pointer to where the full notice is found. 292 | 293 | {description} 294 | Copyright (C) {year} {fullname} 295 | 296 | This program is free software; you can redistribute it and/or modify 297 | it under the terms of the GNU General Public License as published by 298 | the Free Software Foundation; either version 2 of the License, or 299 | (at your option) any later version. 300 | 301 | This program is distributed in the hope that it will be useful, 302 | but WITHOUT ANY WARRANTY; without even the implied warranty of 303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 304 | GNU General Public License for more details. 305 | 306 | You should have received a copy of the GNU General Public License along 307 | with this program; if not, write to the Free Software Foundation, Inc., 308 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 309 | 310 | Also add information on how to contact you by electronic and paper mail. 311 | 312 | If the program is interactive, make it output a short notice like this 313 | when it starts in an interactive mode: 314 | 315 | Gnomovision version 69, Copyright (C) year name of author 316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 317 | This is free software, and you are welcome to redistribute it 318 | under certain conditions; type `show c' for details. 319 | 320 | The hypothetical commands `show w' and `show c' should show the appropriate 321 | parts of the General Public License. Of course, the commands you use may 322 | be called something other than `show w' and `show c'; they could even be 323 | mouse-clicks or menu items--whatever suits your program. 324 | 325 | You should also get your employer (if you work as a programmer) or your 326 | school, if any, to sign a "copyright disclaimer" for the program, if 327 | necessary. Here is a sample; alter the names: 328 | 329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program 330 | `Gnomovision' (which makes passes at compilers) written by James Hacker. 331 | 332 | {signature of Ty Coon}, 1 April 1989 333 | Ty Coon, President of Vice 334 | 335 | This General Public License does not permit incorporating your program into 336 | proprietary programs. If your program is a subroutine library, you may 337 | consider it more useful to permit linking proprietary applications with the 338 | library. If this is what you want to do, use the GNU Lesser General 339 | Public License instead of this License. -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # compiler 2 | FC = gfortran 3 | 4 | # compile flags 5 | #FCFLAGS = -g -fbounds-check -O2 -static-libgcc -static 6 | FCFLAGS = -g -fbounds-check -O2 7 | 8 | # link flags 9 | FLFLAGS = -g 10 | 11 | # program name 12 | PROGRAM = dnaworks 13 | 14 | # required objects 15 | objects = dnaworks.o dnaworks_data.o dnaworks_test.o \ 16 | control_func.o email_func.o encoding.o input.o misc_func.o \ 17 | mutate.o output.o overlaps.o scores.o str_func.o time_func.o 18 | 19 | # required modules 20 | modules = dnaworks_data.mod dnaworks_test.mod 21 | 22 | # the main linking step 23 | $(PROGRAM): $(objects) 24 | $(FC) $(FCFLAGS) -o $(PROGRAM) $(objects) 25 | 26 | # specific requirements for each object 27 | $(objects): $(modules) 28 | 29 | # compile recipe for modules 30 | %.mod: %.f90 31 | $(FC) $(FLFLAGS) -c $< 32 | 33 | # compile recipe for objects 34 | %.o: %.f90 35 | $(FC) $(FLFLAGS) -c $< 36 | 37 | # extra rules 38 | .PHONY: clean 39 | clean: 40 | rm -f $(objects) $(modules) $(PROGRAM) 41 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | DNAWorks 2 | ======== 3 | 4 | Automatic oligonucleotide design for PCR-based gene synthesis 5 | 6 | DNAWorks v3.2.4 7 | David Hoover 8 | May 04, 2017 9 | 10 | DNAWorks takes as input nucleotide and/or protein sequences, codon 11 | information, and other variables, and attempts to optimize a synthetic 12 | gene. It then outputs the gene with a variety of histograms and metrics 13 | for judging the probability of success for generating the gene by PCR. It 14 | also outputs the oligonucleotide sequences required for PCR synthesis of 15 | the synthetic gene. 16 | 17 | This program is based on this publication: 18 | 19 | Hoover DM, Lubkowski J. DNAWorks: an automated method for designing 20 | oligonucleotides for PCR-based gene synthesis. Nucleic Acids Res. 2002 May 21 | 15;30(10):e43. PubMed PMID: 12000848; PubMed Central PMCID: PMC115297. 22 | 23 | Kindly reference this publication if you use this for your work. 24 | 25 | 26 | Installation 27 | ============ 28 | 29 | Currently, DNAWorks is written in Fortran. It will require a Fortran compiler on a UNIX system. 30 | 31 | If you do not have gfortran, make, or git, then on a Linux machine, install these packages (Ubuntu): 32 | 33 | ``` 34 | apt-get install gfortran make git_hub 35 | ``` 36 | 37 | or on Centos 38 | 39 | ``` 40 | yum install gfortran make git 41 | ``` 42 | 43 | Then download DNAWorks and compile with make: 44 | 45 | ``` 46 | git clone https://github.com/davidhoover/DNAWorks.git 47 | cd DNAWorks 48 | make 49 | ``` 50 | 51 | and the dnaworks executable should compile. 52 | 53 | Run 54 | === 55 | 56 | Instructions (can be displayed by typing ```./dnaworks -help```): 57 | 58 | ``` 59 | COMMAND-LINE OPTIONS 60 | ============================================================================== 61 | 62 | The command line is as follows: 63 | 64 | % dnaworks [ inputfile ] [ -t0 | -t1 | -t2 | -t3 ] 65 | 66 | The default inputfile is 'DNAWORKS.inp'. All options, except for those 67 | on the command line, are read from the inputfile. See below for a complete 68 | description of the options. 69 | 70 | The flags -t0, -t1, -t2, and -t3 are for testing purposes. They report 71 | the internal actions within the program based on the level input. 72 | 73 | -t0 Relatively simple output, only subroutine names 74 | -t1 Most subroutine names reported 75 | -t2 Heavy output, all subroutines, some functions 76 | -t3 Way too much output, all subroutines and functions reported 77 | 78 | INPUTFILE OPTIONS 79 | ============================================================================== 80 | 81 | The input is case insensitive, except for quoted strings. Any string 82 | can be quoted, but it's not necessary unless the case must be preserved or 83 | if there are spaces or special characters (#,!). The quotes can 84 | be single or double, but must begin and end around the intended 85 | string. 86 | 87 | Any text that follows a '#' or '!' is considered comments, and will 88 | be ignored. 89 | 90 | Options in the inputfile are of the following types: 91 | 92 | [ S ] string 93 | 94 | Strings are converted to uppercase, unless quoted (either " or '') 95 | 96 | [ #I ] integer number 97 | [ #R ] real number 98 | 99 | Integers are, well, integers. Real numbers can be floating point numbers 100 | (e.g., 12.345) or scientific notation (e.g., -12.36E+4). 101 | 102 | [ name ] directive 103 | 104 | Directives are special strings the enable or disable particular functions. 105 | In general, only the first 4 or 5 characters are actually read, so they 106 | can be abbreviated. 107 | 108 | Directives must be placed flat against the left margin of the input file, 109 | otherwise they will be ignored. 110 | 111 | ------------------------------------------------------------------------------ 112 | 113 | INPUT DIRECTIVES: 114 | 115 | [ tbio ] 116 | 117 | The method of gene synthesis employed by DNAWorks is termed 118 | 'thermodynamically balanced', in that all the oligonucleotides should 119 | assemble and anneal at the same temperature. The amplification occurs 120 | everywhere at once, and ideally can generate the gene with just one round 121 | of PCR. However, there are sticky cases where the gene does not amplify, 122 | and constructing the gene in pieces is not successful. 123 | 124 | A more controlled method of gene synthesis, termed 'thermodynamically 125 | balanced inside-out', was developed for cases where problems occurred 126 | during PCR synthesis (Gao, et al., 2003). In an assembly set of 127 | oligonucleotides, the first half of the oligos are all synthesized in the 128 | sense orientation, and the other half are synthesized as reverse complements 129 | in the anti-sense orientation of the gene. The gene assembly and amplification 130 | is thus done in steps of 0.4-0.6 kb from the center pair of 131 | oligonucleotides outward. 132 | 133 | Enabling tbio will enable thermodynamically balanced inside-out output. 134 | 135 | 136 | [ nogaps ] 137 | 138 | 139 | By default, DNAWorks will try to keep all oligos the same size as the chosen 140 | length. If the size is beyond the sizes required for the chosen Tm, gaps 141 | are introduced between overlap regions. The directive nogaps will keep oligos 142 | as short as possible, with no gaps between the overlap regions. 143 | 144 | Restricting oligos to no gaps may slow down the optimization somewhat, and 145 | may result in higher scores due to a higher probability of misprimes. 146 | 147 | ------------------------------------------------------------------------------ 148 | 149 | INPUT OPTIONS: 150 | 151 | 152 | logfile [ S ] 153 | 154 | 155 | The default output file is 'LOGFILE.txt'. Entering a string after the 156 | logfile option will change the name of the logfile. 157 | 158 | 159 | title [ S ] 160 | 161 | 162 | It's always good to give the output a title to keep it unique and to give 163 | you an easy way to keep track of what the output is. 164 | 165 | 166 | timelimit [ #I ] 167 | 168 | 169 | Set a time limit for the run, in seconds. This keeps the program from 170 | running forever. A value of 0 (the default) means no limit. 171 | 172 | 173 | solutions [ #I ] 174 | 175 | 176 | Normally DNAWorks only generates a single solution for a set of parameters. 177 | Since the optimization involves a lot of random number calls, and that it is 178 | impossible to get to the 'true minimum' by Monte Carlo methods, sometimes 179 | generating more than one solutions is a good thing. Look for the best 180 | solution in the end. The range is 1-99. 181 | 182 | 183 | melting [ #I ] [ low #I high #I ] [ tolerance #I ] 184 | 185 | 186 | This governs the chosen melting or annealing temperature for the oligos. 187 | Giving a single integer (between 55 and 75) will generate a single solution. 188 | A range of melting temperatures can be given with the low and high options, 189 | and a solution for each temperature will be generated. The tolerance value 190 | is by default +/- 1 degree, but it can be modified. Don't set it too high 191 | or the point of the program can be lost! 192 | 193 | 194 | length [ #I ] [ low #I high #I ] [ random ] 195 | 196 | 197 | This sets the ideal length of the oligo. Because the oligos can have gaps, 198 | they can be as long as you wish, but remember that errors accumulate in 199 | synthetic DNA oligos very quickly beyond around 50 nts! 200 | 201 | By default, an attempt is made to force all oligos to be the same size as the 202 | chosen length. On occasion this can lead to a higher probability of 203 | misprimes. Also, this can limit successful optimization when sequences 204 | are gapfixed (see below), since gap position and size will be limited. In 205 | this case, enabling the length directive random causes oligos to be 206 | designed with random length (between 20 nt and the length chosen). 207 | 208 | 209 | frequency [ threshold #I ] [ random ] [ strict ] [ score ] 210 | 211 | 212 | The frequency threshold is the cutoff for which codons are used for 213 | reverse translation of protein sequences into DNA. For example, a value of 214 | 20 will allow only those codons whose frequencies equal or exceed 20%. 215 | 216 | By default, DNAWorks uses the highest frequency codons for the initial 217 | reverse translation of the protein sequences. Having the random option 218 | present causes the program to choose the initial codons at random. 219 | 220 | By default, DNAWorks always uses the two highest frequency codons for 221 | optimization. To override this default, enabling strict will 222 | force the program to strictly use only those codons that are within the 223 | chosen codon frequency threshold. Be careful, because setting a high 224 | codon frequency threshold (>20%) and strict will result in many protein 225 | residues with a single codon available, and thus very little room for 226 | optimization. 227 | 228 | To accelerate convergence, DNAWorks does not continuously score codon 229 | frequency. This is allowed because only the highest frequency codons are 230 | usually used. However, for the particularly picky user, enabling scored will 231 | force the program to continuously evaluate the codon frequency score. This 232 | will have the effect of increasing the overall frequency of codons (at 233 | the cost of other scores...). 234 | 235 | 236 | concentration [ oligo #R ] [ sodium #R ] [ magnesium #R ] 237 | 238 | 239 | The concentration of oligonucleotides, monovalent cations (Na+, K+), and 240 | magnesium in the PCR reaction can have profound effects on the annealing 241 | temperatures of the oligonucleotides. The user can enter the desired 242 | concentrations for the PCR reaction. 243 | 244 | The effects of these components on the annealing temperature is based on 245 | the program HyTher (Nicolas Peyret, Pirro Saro and John SantaLucia, Jr.). 246 | 247 | Values are in moles per liter, and can be entered in scientific notation 248 | for simplicity. 249 | 250 | Oligonucleotides must be between 100 um (1E-4 M) and 1 nm (1E-9 M), 251 | monovalent cations must be between 10 and 1000 mM, and magnesium must be 252 | between 0 and 200 mM. 253 | 254 | 255 | repeat [ #I ] 256 | 257 | 258 | DNAWorks continuously monitors the synthetic gene for any repeats that 259 | occur within the gene. A repeat can be a direct repeat, an inverted 260 | repeat (which can result in a hairpin), or a palindromic repeat. If a 261 | repeat occurs that is above a certain length, it can lead to stable 262 | annealing of oligos to unexpected positions and mispriming. Such mispriming 263 | can result in either no PCR product, or a long smear on a gel. 264 | 265 | The value for repeat governs the minimum length of nucleotides considered 266 | a repeat. The default value is 8. Increasing this number will 267 | decrease the number of repeats found, while decreasing it will do the 268 | opposite. 269 | 270 | 271 | misprime [ #I ] [ tip #I ] [ max #I ] 272 | 273 | 274 | The major flaw to PCR-based gene synthesis is mispriming. This occurs when 275 | an oligo anneals to an unexpected position on the PCR template. To prevent 276 | this from happening, DNAWorks compares the ends of each oligo with the 277 | current synthetic sequence and analyzes its potential to anneal to that 278 | site. 279 | 280 | A misprime is a special variant of a repeat, in that it only occurs at the 281 | business end (3') of an oligo. 282 | 283 | The first number for misprime is the length of the sequence to compare. The 284 | default value is 18. 285 | 286 | The tip number is number of nucleotides that must be exactly identical at 287 | the tip of the oligo. The default is 6. This value is based on little more 288 | than guessing, but increasing it will cause very few misprimes to be 289 | identified, and decreasing will cause too many to be identified. 290 | 291 | The max number is the maximum number of non-identical nucleotides in the 292 | misprime sequence. The default is 8. This number is again a guess. It 293 | is generally not understood why non-identical sequences anneal to each other, 294 | but it is based on structural and electrostatic principles that are way too 295 | difficult to incorporate into this program. Again, increasing the number 296 | results in too many misprimes to be identified, decreasing it causes too few. 297 | 298 | Needless to say, the misprime value is just plain prudence, but not 299 | necessarily fact. 300 | 301 | 302 | weight [ twt #R ] [ cwt #R ] [ rwt #R ] [ mwt #R ] [ gwt #R ] [ awt #R ] 303 | [ lwt #R ] [ pwt #R ] [ fwt #R ] 304 | 305 | 306 | DNAWorks optimizes a synthetic gene by evaluating the scores of a set of 307 | features: annealing temperature (T), codon frequency (C), repeat (R), 308 | misprime potential (M), GC- (G) and AT- (A) content, length (L), gapfix (F) 309 | and pattern constraining (P). The default weights of each individual feature 310 | score are set to 1. By increasing the weight of an individual feature, the 311 | final output can be nudged to favoring one feature over the others. For 312 | example, in the case where the potential synthetic genes for a set of 313 | sequences chronically suffers from high number of repeats, increasing the 314 | weight of the repeat score (RWT) might decrease the final repeat score at 315 | the expense of the other feature scores. 316 | 317 | Beware, as modulating the weights is not fully tested. Remember that this 318 | merely skews the results toward one feature or another, and may do more 319 | harm than good. In most cases keeping the weights balanced is the best 320 | approach. 321 | 322 | 323 | previous [ #I ] [ S ] 324 | 325 | 326 | DNAWorks allows old sets of oligonucleotides to be read back with a new, 327 | mutant gene. It then calculates scores for the mutant gene with overlap 328 | positions and parameters identical to the original solution. It then 329 | outputs only those oligonucleotides that need to be changed. This is very 330 | useful for generating mutants, since in general only one or two new oligos 331 | need to be synthesized. 332 | 333 | The integer refers to the previous solution number, and the string is the 334 | name of the previous logfile. 335 | 336 | ------------------------------------------------------------------------------ 337 | 338 | INPUT SECTIONS: 339 | 340 | nucleotide [ reverse | gapfix ] 341 | ... 342 | // 343 | 344 | Nucleotide sequences can only include A,C,G, or T in the nucleotide 345 | section. They can also include degenerate sequences: 346 | 347 | B = C or G or T rev. compl. = V 348 | D = A or G or T rev. compl. = H 349 | H = A or C or T rev. compl. = D 350 | K = G or T rev. compl. = M 351 | M = A or C rev. compl. = K 352 | N = A or C or G or T rev. compl. = N 353 | R = A or G rev. compl. = Y 354 | S = C or G rev. compl. = S 355 | V = A or C or G rev. compl. = B 356 | W = A or T rev. compl. = W 357 | Y = C or T rev. compl. = R 358 | 359 | protein [ reverse | gapfix ] 360 | ... 361 | // 362 | 363 | Protein sequences can be input through the protein section, but can only 364 | include the single-letter abbreviations of the 20 standard amino acids 365 | (A,C,D,E,F,G,H,I,K,L,M,N,P,Q,R,S,T,V,W,Y). Stop codons are designated by X. 366 | 367 | The reverse directive causes the nucleotide sequence (either original or 368 | translated from the protein sequence) to be reversed on incorporation in 369 | the synthetic gene. 370 | 371 | The gapfix directive is used when the sequence should not fall within 372 | overlap regions, but rather only in the gaps or overhangs that are single 373 | stranded in the annealed assembly prior to PCR. This is advantageous for 374 | subsequent mutations by oligonucleotide replacement. For example, if a 375 | synthetic gene will be exhaustively mutated at a single codon, having the 376 | codon entirely within a gap region will allow its mutation by replacing a 377 | single oligonucleotide, rather than two or three. 378 | 379 | The gapfix directive will enable Fixed Gap Scoring. Any nt that are 380 | designated as gapfixed but fall within overlap regions will increase the 381 | global score. DNAWorks will then try to minimize the score by moving the 382 | gap regions toward the gapfixed nucleotides. Because gap regions are 383 | generally short (less than 10 nt), the sequence should be very short. 384 | Otherwise the global score will remain quite high, and other features (Tm, 385 | repeats, misprimes) will not receive as much attention. 386 | 387 | Gapfixing is much more effective when oligo lengths are allowed be 388 | randomized, rather than fixed to the length chosen by default. See 389 | length option, above, for more details. 390 | 391 | 392 | codon [ ecoli2 | E. coli | C. elegans | D. melanogaster | H. sapiens | 393 | M. musculus | R. novegicus | S. cerevesiae | X. laevis | P. pastoris ] 394 | [ ... 395 | // ] 396 | 397 | Codon frequencies can be entered manually in the codon section using 398 | GCG-format codon frequencies. If a directive corresponding to a given 399 | organism is present, the codon frequency for that organism will be used. 400 | 401 | pattern 402 | ... 403 | // 404 | 405 | Nucleotide patterns can be screened if entered in the pattern section. 406 | Pattern sequences can be normal or degenerate nucleotide sequences. 407 | ``` 408 | 409 | 410 | Good luck! 411 | -------------------------------------------------------------------------------- /control_func.f90: -------------------------------------------------------------------------------- 1 | SUBROUTINE Get_Args 2 | 3 | USE dnaworks_data 4 | USE dnaworks_test 5 | IMPLICIT NONE 6 | 7 | CHARACTER(LEN=30) :: ARGV(100) ! command line arguments 8 | INTEGER :: ARGC ! number of command line arguments 9 | INTEGER :: i,j 10 | 11 | ! IARGC returns the total number of arguments on the command line 12 | 13 | ARGC=IARGC() 14 | 15 | ! GETARG returns the argument that corresponds to the argument number, with 16 | ! zero equal to the command itself 17 | 18 | DO i=1,ARGC 19 | CALL GETARG(i,ARGV(i)) 20 | 21 | ! Turn on testing mode 22 | 23 | IF (INDEX(ARGV(i),"-t3").eq.1) THEN 24 | TEST3=.TRUE. 25 | TEST2=.TRUE. 26 | TEST1=.TRUE. 27 | TEST0=.TRUE. 28 | ELSE IF (INDEX(ARGV(i),"-t2").eq.1) THEN 29 | TEST2=.TRUE. 30 | TEST1=.TRUE. 31 | TEST0=.TRUE. 32 | ELSE IF (INDEX(ARGV(i),"-t1").eq.1) THEN 33 | TEST1=.TRUE. 34 | TEST0=.TRUE. 35 | ELSE IF (INDEX(ARGV(i),"-t0").eq.1) THEN 36 | TEST0=.TRUE. 37 | ELSE IF (INDEX(ARGV(i),"-q").eq.1) THEN 38 | QUIET=.TRUE. 39 | ELSE IF (INDEX(ARGV(i),"-fast").eq.1) THEN 40 | FAST=.TRUE. 41 | ELSE IF (INDEX(ARGV(i),"-help").eq.1) THEN 42 | CALL Print_Help 43 | ELSE 44 | 45 | ! Assign inputfile from ARGV(1) if possible 46 | 47 | inputfile=ARGV(i) 48 | END IF 49 | END DO 50 | 51 | END SUBROUTINE Get_Args 52 | SUBROUTINE Oligo_Design(SolutionNo,num) 53 | ! 54 | ! This subroutine is the main engine of the program. 55 | 56 | USE dnaworks_data 57 | USE dnaworks_test 58 | IMPLICIT NONE 59 | 60 | INTEGER :: i,j,k,main_count,z,timediff 61 | INTEGER :: nlimit ! max number of successful changes before continuing 62 | INTEGER :: nover ! max number of changes before dropping the temperature 63 | INTEGER :: maxlimit ! max number of counts before quitting 64 | INTEGER :: nsucc 65 | INTEGER :: count 66 | INTEGER :: SolutionNo ! current solution number 67 | INTEGER :: num ! where to print output? 68 | LOGICAL :: ans 69 | REAL :: t ! initial temperature 70 | REAL :: tfactr ! how much to drop temperature 71 | REAL :: gain 72 | REAL :: guess 73 | REAL :: rand 74 | INTEGER,EXTERNAL :: CurrentTimeSeconds 75 | 76 | IF (TEST0) PRINT *,"Oligo_Design" !TEST0 77 | 78 | nlimit=50 79 | nover=500 80 | nsucc=0 81 | count=0 82 | t=0.5 83 | tfactr=0.96 84 | main_count=0 85 | maxlimit=1500 86 | IF (FAST) maxlimit=300 87 | 88 | WRITE(UNIT=console,FMT="('')") 89 | 90 | BestDNA = CurrDNA ! initialize BestDNA 91 | 92 | tempdrop: DO i=1,1000 ! There will be a total of 1000 drops in temperature 93 | 94 | ! just dump out if there are no protein residues 95 | 96 | IF (PROTlen.eq.0.and.(.not.OligoLenRandom)) THEN 97 | BestDNA = CurrDNA 98 | EXIT tempdrop 99 | END IF 100 | 101 | nsucc=0 102 | 103 | mutate: DO j=1,nover ! within which there will be mutation/length change rounds. 104 | 105 | StoreDNA = CurrDNA ! Make a backup of the current solution 106 | 107 | IF (PROTlen.gt.0) CALL Mutate_Sequence ! only mutate protein sequence 108 | main_count=main_count+1 109 | 110 | CALL Generate_Overlaps(SolutionNo) ! Generate overlaps for the mutated sequence 111 | 112 | IF (MOD(CurrDNA%NumOlaps,2).eq.0) CALL Stop_Program("Even number of overlaps. Try adjusting parameters.") 113 | 114 | gain = CurrDNA%OverallScore - StoreDNA%OverallScore ! Determine whether the mutated solution is any better than the old one 115 | 116 | ! If the gain is good enough or if the temperature is high enough, we have a successful mutation round. 117 | 118 | CALL RANDOM_NUMBER(rand) 119 | ans=(gain.lt.0.0).or.(rand.lt.exp(-gain/t)) ! Metropolis 120 | IF (ans) THEN 121 | nsucc=nsucc+1 122 | ELSE 123 | CurrDNA=StoreDNA ! If not, go back to the original sequence and try again. 124 | END IF 125 | 126 | IF (CurrDNA%OverallScore.lt.BestDNA%OverallScore) THEN 127 | BestDNA = CurrDNA ! If the current sequence is better than the best sequence, replace the BestDNA with CurrDNA. 128 | count=0 129 | ELSE 130 | count=count+1 ! If the current score does not achieve a better value than the best score, then start counting. 131 | END IF 132 | 133 | IF ((MOD(main_count,100)).eq.0) WRITE(UNIT=num,FMT=& 134 | "(6x,i5,' optimization rounds, best = ',f9.3,' Rep =',i4,' Mis =',i4)") & 135 | main_count,BestDNA%OverallScore,BestDNA%RN,BestDNA%MSN ! Keep the user informed 136 | 137 | IF (count.gt.maxlimit) THEN ! If the count between drops is greater than 300, then quit. 138 | EXIT tempdrop 139 | END IF 140 | 141 | IF (nsucc.ge.nlimit) EXIT mutate ! If there are more than successful rounds, exit the mutation loop and move to the next temperature drop. 142 | 143 | timediff = CurrentTimeSeconds()-MainTimeStart; 144 | IF (MainTimeLimit.GT.0.and.timediff.GE.MainTimeLimit) THEN ! Dump out if out of time 145 | WRITE(UNIT=console,FMT="(/,'Main time limit reached.')") 146 | WRITE(UNIT=outputnum,FMT="(/,'Main time limit reached.')") 147 | TimesUp=.TRUE. 148 | EXIT mutate 149 | END IF 150 | 151 | CALL FLUSH(console) 152 | 153 | END DO mutate 154 | 155 | t=t*tfactr ! Drop the temperature 156 | 157 | IF (nsucc.eq.0.or.t.lt.0.0001.or.TimesUp) THEN ! If within rounds of mutation there are no successes or the temperature is too low, or out of time, then quit. 158 | WRITE(UNIT=num,FMT="('Limit of simulated annealing, quitting.')") 159 | EXIT tempdrop 160 | END IF 161 | 162 | IF (BestDNA%OverallScore.lt.0.001) EXIT tempdrop ! If the best score is good enough, then quit. 163 | 164 | END DO tempdrop 165 | 166 | CurrDNA = BestDNA ! Push the best solution from Oligo_Design into the current solution 167 | 168 | CALL Revert_Degenerates 169 | CALL Print_FinalDNA_Log(outputnum,SolutionNo) 170 | CALL Print_Scores_Log(console) 171 | CALL Print_Scores_Log(outputnum) 172 | CALL Print_Histogram(outputnum,SolutionNo) 173 | CALL Print_Pattern_Screen(outputnum) 174 | CALL Print_Oligo_Log(outputnum) 175 | 176 | END SUBROUTINE Oligo_Design 177 | SUBROUTINE Run_Dnaworks() 178 | 179 | USE dnaworks_data 180 | USE dnaworks_test 181 | IMPLICIT NONE 182 | 183 | INTEGER :: i,j,k,l,timediff 184 | INTEGER,EXTERNAL :: CurrentTimeSeconds 185 | 186 | IF (TEST0) PRINT *,"Run_Dnaworks" !TEST0 187 | 188 | ! start the loops 189 | 190 | melt: DO j=MeltTempLo,MeltTempHi 191 | MeltTemp=j 192 | oligo: DO l=OligoLenLo,OligoLenHi 193 | OligoLen=l 194 | main: DO i=1,NumberOfSolutions 195 | SequenceTranslated=.FALSE. 196 | CALL Translate_Protein 197 | 198 | ! Dump out if out of time 199 | 200 | timediff = CurrentTimeSeconds()-MainTimeStart; 201 | IF (MainTimeLimit.GT.0.and.timediff.GE.MainTimeLimit) THEN 202 | WRITE(UNIT=console,FMT="(/,'Main time limit reached.')") 203 | WRITE(UNIT=outputnum,FMT="(/,'Main time limit reached.')") 204 | TimesUp=.TRUE. 205 | FinalScore(CurrSolutionNo)%Oligo=OligoLen 206 | FinalScore(CurrSolutionNo)%MeltT=MeltTemp 207 | EXIT melt 208 | END IF 209 | 210 | CurrSolutionNo = CurrSolutionNo+1 211 | 212 | CALL Print_Param_Log(console,CurrSolutionNo) 213 | CALL Print_Param_Log(outputnum,CurrSolutionNo) 214 | CALL FLUSH(console) 215 | 216 | CALL Generate_Overlaps(CurrSolutionNo) 217 | 218 | IF (MOD(CurrDNA%NumOlaps,2).eq.0) THEN 219 | IF (.not.QUIET) THEN 220 | WRITE(UNIT=console,FMT="('Even number of overlaps - trial ',i4,' abandoned')") CurrSolutionNo 221 | WRITE(UNIT=outputnum,FMT="('Even number of overlaps - trial ',i4,' abandoned')") CurrSolutionNo 222 | END IF 223 | FinalScore(CurrSolutionNo)%FinaScore=999999 224 | CYCLE main 225 | END IF 226 | 227 | ! If everything is ok, go to Oligo_Design 228 | 229 | CALL Oligo_Design(CurrSolutionNo,console) 230 | 231 | ! Keep track of times 232 | 233 | CALL Print_Estimated_Time(CurrSolutionNo) 234 | 235 | ! Update FinalScore tally 236 | 237 | FinalScore(CurrSolutionNo)%Oligo=OligoLen 238 | FinalScore(CurrSolutionNo)%MeltT=MeltTemp 239 | 240 | END DO main 241 | END DO oligo 242 | END DO melt 243 | 244 | ! in case optimization stopped prematurely 245 | 246 | TotalNumberOfSolutions=CurrSolutionNo 247 | 248 | END SUBROUTINE Run_Dnaworks 249 | SUBROUTINE Stop_Program(message) 250 | ! 251 | ! This subroutine stops the program and displays an error message. 252 | 253 | USE dnaworks_data 254 | USE dnaworks_test 255 | IMPLICIT NONE 256 | 257 | CHARACTER(LEN=*) :: message 258 | INTEGER :: i 259 | 260 | IF (TEST0) PRINT *,"Stop_Program" !TEST0 261 | 262 | WRITE(UNIT=console,FMT="(' ')") 263 | WRITE(UNIT=console,FMT="('Program error:')") 264 | WRITE(UNIT=console,FMT="(a)") message 265 | WRITE(UNIT=console,FMT="('Exiting program now')") 266 | CALL FLUSH(console) 267 | 268 | WRITE(UNIT=outputnum,FMT="(' ')") 269 | WRITE(UNIT=outputnum,FMT="('Program error:')") 270 | WRITE(UNIT=outputnum,FMT="(a)") message 271 | WRITE(UNIT=outputnum,FMT="('Exiting program now')") 272 | 273 | CLOSE (UNIT=outputnum) 274 | STOP 275 | 276 | END SUBROUTINE Stop_Program 277 | -------------------------------------------------------------------------------- /dnaworks.f90: -------------------------------------------------------------------------------- 1 | PROGRAM dnaworks 2 | 3 | USE dnaworks_data 4 | USE dnaworks_test 5 | IMPLICIT NONE 6 | 7 | INTEGER,EXTERNAL :: CurrentTimeSeconds 8 | 9 | IF (TEST0) PRINT *,"DNAWORKS start" !TEST0 10 | CALL RANDOM_SEED() 11 | 12 | MainTimeStart=CurrentTimeSeconds() ! when does the run begin? 13 | 14 | CALL Get_Args ! get the command arguments, if any 15 | CALL Default_Param 16 | CALL Read_Input 17 | 18 | IF (DNAlen.LE.50) CALL Stop_Program("DNA length is less than 50 nt.") 19 | 20 | ! Reset weights if there is no protein to mutate 21 | 22 | IF (PROTlen.eq.0) THEN 23 | Cwt=0.0 ! weight for codon scoring 24 | Rwt=0.0 ! weight for repeat scoring 25 | Gwt=0.0 ! weight for GC scoring 26 | Awt=0.0 ! weight for AT scoring 27 | Pwt=0.0 ! weight for pattern scoring 28 | END IF 29 | 30 | ! start logfile 31 | 32 | OPEN (UNIT=outputnum,FILE=outputfile,FORM="FORMATTED",STATUS="REPLACE") 33 | 34 | CALL Print_Output_Start(outputnum) 35 | CALL Print_Output_Start(console) 36 | CALL FLUSH(console) 37 | CALL Print_Seq_Log(outputnum) 38 | IF (PROTlen.gt.0) CALL Print_Codon_Log(outputnum) 39 | CALL Print_Pattern_Log(outputnum) 40 | ! CALL Print_TranslatedDNA(outputnum) 41 | 42 | ! determine the number of solutions 43 | 44 | TotalNumberOfSolutions=(NumberOfSolutions*(MeltTempHi-MeltTempLo+1)* & 45 | & (OligoLenHi-OligoLenLo+1)) 46 | IF (TotalNumberOfSolutions.gt.9999) CALL Stop_Program("Too many trials. Limit the range of parameters.") 47 | WRITE(UNIT=console,FMT="('')") 48 | WRITE(UNIT=console,FMT="(20x,'Starting ',i3,' trial',$)") TotalNumberOfSolutions 49 | IF (TotalNumberOfSolutions.gt.1) WRITE(UNIT=console,FMT="('s',$)") 50 | WRITE(UNIT=console,FMT="('...')") 51 | CALL FLUSH(console) 52 | 53 | CALL Run_Dnaworks() 54 | 55 | CALL Print_Final_Tally(console) 56 | CALL Print_Output_End(console) 57 | CALL FLUSH(console) 58 | CALL Print_Final_Tally(outputnum) 59 | CALL Print_Output_End(outputnum) 60 | 61 | CLOSE (UNIT=outputnum) 62 | 63 | IF (LEN_TRIM(email).GT.5) THEN 64 | CALL Send_Email 65 | END IF 66 | 67 | WRITE(UNIT=console,FMT="(' ')") 68 | WRITE(UNIT=console,FMT="('Finished ')") 69 | 70 | END PROGRAM dnaworks 71 | -------------------------------------------------------------------------------- /dnaworks_data.f90: -------------------------------------------------------------------------------- 1 | MODULE dnaworks_data 2 | 3 | IMPLICIT NONE 4 | SAVE 5 | 6 | ! GLOBAL 7 | 8 | INTEGER :: console=6 ! print to console 9 | INTEGER :: inputnum=9 ! input files 10 | INTEGER :: outputnum=10 ! output logfile 11 | INTEGER :: oldlognum=11 ! old logfile output 12 | INTEGER :: PrevTrial=0 ! previous trial to fix oligos 13 | INTEGER :: OligoLen=40 ! user input oligo size 14 | INTEGER :: OligoLenHi=40 ! user input oligo size (upper limit) 15 | INTEGER :: OligoLenLo=40 ! user input oligo size (lower limit) 16 | LOGICAL :: OligoLenRandom=.FALSE. ! allow oligolen to vary between 20 17 | INTEGER :: MeltTemp=60 ! Ideal melting temperature 18 | INTEGER :: MeltTempHi=60 ! Ideal melting temperature (upper limit) 19 | INTEGER :: MeltTempLo=60 ! Ideal melting temperature (lower limit) 20 | INTEGER :: MeltTol=1 ! Tolerance for melting temperature deviation 21 | INTEGER :: SeqOptimToler=50 ! Lowest allowed codon frequency 22 | INTEGER :: TotalNumberOfSolutions 23 | INTEGER :: NumberOfSolutions=1 24 | INTEGER :: RepLen=8 ! determines the size of repeats to minimize 25 | INTEGER :: MPLn=18 ! length of misprimes 26 | INTEGER :: MPTip=6 ! identical tip of the misprime, in nts 27 | INTEGER :: MaxPROTlen=3333 ! maximum number of protein residues 28 | INTEGER :: MaxDNAlen=9999 ! maximum number of nucleotide residues 29 | INTEGER :: MaxNonId=8 ! maximum number of non-identical nts in misprime 30 | INTEGER :: MutProtPos=0 ! which codon should be mutated 31 | INTEGER :: MutNtPos(3) ! which nts are mutated 32 | INTEGER :: MutNtNum=0 ! how many nts are mutated (zero if none) 33 | INTEGER :: nt2aa(9999) ! DNApos to aa (1-21) or 0 34 | INTEGER :: nt2overlap(9999) ! DNApos to overlap or 0 35 | INTEGER :: nt2Solig(9999) ! DNApos to oligo or 0 36 | INTEGER :: nt2Aolig(9999) ! DNApos to antisense oligo or 0 37 | INTEGER :: nt2prot(9999) ! DNApos to PROTpos or 0 38 | INTEGER :: prot2aa(3333) ! PROTpos to aa (1-21) 39 | INTEGER :: prot2nt(3333) ! PROTpos to DNApos (middle nt of codon) 40 | INTEGER :: DNAlen=0 ! the length of the entire DNA 41 | INTEGER :: PROTlen=0 ! the length of the all the proteins 42 | INTEGER :: mutPROTnum=0 ! the number of mutated aa 43 | INTEGER :: mutPROT2prot(3333) ! mutPROTpos to PROTpos 44 | INTEGER :: NumberOfChains=0 ! number of isolated protein chains 45 | INTEGER :: prot2chain(3333) ! prot pos to protein number (NumberOfChains) 46 | LOGICAL :: ChainReverse(99) ! true if chain is reversed, indexed by 47 | LOGICAL :: ChainGapFix(99) ! true if chain is reversed, indexed by 48 | 49 | CHARACTER(LEN=9999) :: INITseq='' ! initial input sequence (DNA and prot) 50 | 51 | ! for degenerate nt 52 | CHARACTER(LEN=9999) :: ORIGDNAseq='' ! the original dna sequence 53 | INTEGER :: NumDegPos=0 ! total number of degenerate nt 54 | INTEGER :: DegPos(999) ! positions of degenerate sequences 55 | INTEGER :: CurrSolutionNo=0 56 | 57 | INTEGER :: INITlen=0 ! the length of the initial input sequence 58 | INTEGER :: NumberOfSeq=0 ! number of sequences, DNA or protein 59 | INTEGER :: INIT2Seq(9999) ! 60 | LOGICAL :: SeqIsProt(99) ! true if sequence is prot, false if DNA 61 | LOGICAL :: SeqReverse(99) ! true if sequence is reversed, indexed by seq number (NumberOfSeq) 62 | LOGICAL :: SeqGapFix(99) ! true if sequence is to be gapfixed, indexed by seq number (NumberOfSeq) 63 | CHARACTER(LEN=80) :: email='' 64 | CHARACTER(LEN=80) :: jobname='' 65 | CHARACTER(LEN=80) :: OLDjobname='' 66 | CHARACTER(LEN=30) :: oldlogfile="OLDLOGFILE.txt" 67 | CHARACTER(LEN=30) :: inputfile="DNAWORKS.inp" 68 | CHARACTER(LEN=30) :: outputfile="LOGFILE.txt" 69 | CHARACTER(LEN=256) :: InputArray(9999) ! contents of DNAWORKS.inp 70 | CHARACTER(LEN=256) :: InputArrayUC(9999) ! contents of DNAWORKS.inp, uppercase 71 | INTEGER :: InputArrayNum ! number of lines in DNAWORKS.inp 72 | 73 | CHARACTER(LEN=9999) :: SCRATCH='' ! scratch string for various calls 74 | CHARACTER(LEN=9999) :: OLDDNAseq='' ! DNA sequence from previous trial 75 | CHARACTER(LEN=3333) :: PROTseq='' ! protein sequence 76 | CHARACTER(LEN=3333) :: OLDPROTseq='' ! protein sequence from previous trial 77 | 78 | CHARACTER(LEN=64) :: bar64 = "----------------------------------------------------------------" 79 | CHARACTER(LEN=80) :: bar80 = "--------------------------------------------------------------------------------" 80 | INTEGER :: MainTimeLimit=0 ! time limit for entire run 81 | INTEGER :: MainTimeStart ! for time control of the program 82 | REAL :: Twt=1.0 ! weight for MeltTm scoring 83 | REAL :: Cwt=1.0 ! weight for codon scoring 84 | REAL :: Rwt=1.0 ! weight for repeat scoring 85 | REAL :: Mwt=1.0 ! weight for mispriming scoring 86 | REAL :: Gwt=1.0 ! weight for GC scoring 87 | REAL :: Awt=1.0 ! weight for AT scoring 88 | REAL :: Lwt=1.0 ! weight for length scoring 89 | REAL :: Pwt=1.0 ! weight for pattern scoring 90 | REAL :: Fwt=1.0 ! weight for gap fixing 91 | REAL :: XScore(3333) ! cocon-based total score for mutation 92 | LOGICAL :: CodonStrict=.FALSE. ! use strict frequency threshold 93 | LOGICAL :: ScoreCodons=.FALSE. ! calculate codon scores 94 | LOGICAL :: CodonRandom=.FALSE. ! translate using random codons 95 | LOGICAL :: MutantRun=.FALSE. ! if this is a mutation only run 96 | LOGICAL :: GapFix=.FALSE. ! are any positions fixed in the gaps? 97 | INTEGER :: LogfileOffset=1 ! how many blank characters precede the line? 98 | LOGICAL :: JACEK=.FALSE. 99 | CHARACTER(LEN=80) :: MAILPATH="/usr/bin/Mail" 100 | LOGICAL :: TBIO=.FALSE. 101 | LOGICAL :: NOGAPS=.FALSE. ! if no gaps are desired 102 | LOGICAL :: QUIET=.FALSE. 103 | LOGICAL :: FAST=.FALSE. ! cut corners 104 | LOGICAL :: TimesUp=.FALSE. 105 | LOGICAL :: SequenceTranslated=.FALSE. ! if false, generate all scores; if 106 | ! true, only generate scores that 107 | ! change when overlaps change 108 | REAL :: OligoConc=2e-7 ! 200 nM oligo 109 | REAL :: SodiumConc=5e-2 ! 50 mM sodium 110 | REAL :: MgConc=2e-3 ! 2 mM magnesium 111 | REAL :: RGasConstant=1.9872 ! gas constant 112 | REAL :: Kelvin=273.15 ! conversion from kelvin to celsius 113 | REAL :: OligoCorr ! correction factor for oligo conc. 114 | REAL :: OligoCorrSC ! correction factor for self-comp. oligo 115 | REAL :: SaltCorr ! correction factor for cations 116 | 117 | ! PATTERNS 118 | 119 | TYPE Pattern 120 | CHARACTER(LEN=80) :: SeqRC 121 | CHARACTER(LEN=80) :: Seq 122 | INTEGER :: Len 123 | CHARACTER(LEN=80) :: Name 124 | LOGICAL :: SelfCompl 125 | LOGICAL :: Degen 126 | LOGICAL :: Isoschiz 127 | END TYPE 128 | 129 | TYPE(Pattern) :: PTN(999) 130 | INTEGER :: PTNnum=0 131 | 132 | ! SOLUTIONS 133 | 134 | TYPE Tally 135 | REAL :: InitScore 136 | REAL :: FinaScore 137 | REAL :: TmRange 138 | INTEGER :: NumOligs 139 | INTEGER :: Oligo 140 | INTEGER :: MeltT 141 | INTEGER :: LongestOligo 142 | INTEGER :: Repeats 143 | INTEGER :: Misprimes 144 | INTEGER :: LowestOlap 145 | END TYPE 146 | 147 | TYPE(Tally) :: FinalScore(9999) 148 | 149 | ! TEST 150 | 151 | TYPE Test_Tally 152 | REAL :: Score 153 | INTEGER :: Oligo 154 | INTEGER :: MeltT 155 | INTEGER :: Count 156 | INTEGER :: Time 157 | END TYPE 158 | 159 | TYPE(Test_Tally) :: Test_Scores(400) 160 | 161 | ! TABLES 162 | 163 | TYPE KnownCodon 164 | CHARACTER(LEN=3) :: Seq 165 | CHARACTER(LEN=3) :: AA3 166 | CHARACTER(LEN=1) :: AA1 167 | CHARACTER(LEN=3) :: SeqRC ! Reverse complement of sequence 168 | INTEGER :: num(3) ! numerical representation of codon 169 | INTEGER :: numRC(3) ! numerical representation of codon 170 | REAL :: Freq 171 | REAL :: Number 172 | LOGICAL :: Check 173 | END TYPE KnownCodon 174 | 175 | TYPE(KnownCodon) :: CFT(64) ! Codon Frequency Table 176 | 177 | TYPE KnownAA 178 | CHARACTER(LEN=3) :: AA3 179 | CHARACTER(LEN=1) :: AA1 180 | REAL :: Freq(10) 181 | REAL :: NumberSum 182 | INTEGER :: NumOfCodons 183 | INTEGER :: NumOfActiveCodons 184 | INTEGER :: Codon(10) 185 | END TYPE KnownAA 186 | 187 | TYPE(KnownAA) :: AAT(21) ! Amino Acid Table 188 | 189 | ! Degenerate sequences 190 | 191 | TYPE DegenerateSeq ! Table of degenerate sequences 192 | CHARACTER(LEN=1) :: DegNT 193 | INTEGER :: NumOfNT 194 | INTEGER :: NumSeq(4) 195 | CHARACTER(LEN=1) :: Seq(4) 196 | END TYPE DegenerateSeq 197 | 198 | TYPE(DegenerateSeq) :: DegenSeq(11) 199 | 200 | ! PRE-EXISTANT CFTs 201 | 202 | CHARACTER(LEN=30) :: Organism 203 | 204 | CHARACTER(LEN=5),DIMENSION(3,64) :: ecoli2CFT = & 205 | RESHAPE( (/& 206 | "Gly ","GGG ","0.044","Gly ","GGA ","0.020","Gly ","GGT ","0.508","Gly ","GGC ","0.428",& 207 | "Glu ","GAG ","0.247","Glu ","GAA ","0.754","Asp ","GAT ","0.461","Asp ","GAC ","0.540",& 208 | "Val ","GTG ","0.268","Val ","GTA ","0.200","Val ","GTT ","0.398","Val ","GTC ","0.135",& 209 | "Ala ","GCG ","0.323","Ala ","GCA ","0.240","Ala ","GCT ","0.275","Ala ","GCC ","0.161",& 210 | "Arg ","AGG ","0.003","Arg ","AGA ","0.006","Ser ","AGT ","0.045","Ser ","AGC ","0.243",& 211 | "Lys ","AAG ","0.215","Lys ","AAA ","0.786","Asn ","AAT ","0.173","Asn ","AAC ","0.828",& 212 | "Met ","ATG ","1.000","Ile ","ATA ","0.006","Ile ","ATT ","0.335","Ile ","ATC ","0.659",& 213 | "Thr ","ACG ","0.127","Thr ","ACA ","0.047","Thr ","ACT ","0.291","Thr ","ACC ","0.536",& 214 | "Trp ","TGG ","1.000","End ","TGA ","0.352","Cys ","TGT ","0.389","Cys ","TGC ","0.612",& 215 | "End ","TAG ","0.076","End ","TAA ","0.630","Tyr ","TAT ","0.352","Tyr ","TAC ","0.648",& 216 | "Leu ","TTG ","0.055","Leu ","TTA ","0.034","Phe ","TTT ","0.291","Phe ","TTC ","0.709",& 217 | "Ser ","TCG ","0.074","Ser ","TCA ","0.048","Ser ","TCT ","0.324","Ser ","TCC ","0.266",& 218 | "Arg ","CGG ","0.008","Arg ","CGA ","0.011","Arg ","CGT ","0.643","Arg ","CGC ","0.330",& 219 | "Gln ","CAG ","0.814","Gln ","CAA ","0.187","His ","CAT ","0.298","His ","CAC ","0.702",& 220 | "Leu ","CTG ","0.767","Leu ","CTA ","0.008","Leu ","CTT ","0.056","Leu ","CTC ","0.080",& 221 | "Pro ","CCG ","0.719","Pro ","CCA ","0.153","Pro ","CCT ","0.112","Pro ","CCC ","0.016"/), (/3,64/) ) 222 | 223 | CHARACTER(LEN=5),DIMENSION(3,64) :: celCFT = & 224 | RESHAPE( (/& 225 | "Gly ","GGG ","0.08 ","Gly ","GGA ","0.59 ","Gly ","GGT ","0.20 ","Gly ","GGC ","0.12 ", & 226 | "Glu ","GAG ","0.38 ","Glu ","GAA ","0.62 ","Asp ","GAT ","0.68 ","Asp ","GAC ","0.32 ", & 227 | "Val ","GTG ","0.23 ","Val ","GTA ","0.16 ","Val ","GTT ","0.39 ","Val ","GTC ","0.22 ", & 228 | "Ala ","GCG ","0.13 ","Ala ","GCA ","0.31 ","Ala ","GCT ","0.36 ","Ala ","GCC ","0.20 ", & 229 | "Arg ","AGG ","0.08 ","Arg ","AGA ","0.29 ","Ser ","AGT ","0.15 ","Ser ","AGC ","0.10 ", & 230 | "Lys ","AAG ","0.41 ","Lys ","AAA ","0.59 ","Asn ","AAT ","0.62 ","Asn ","AAC ","0.38 ", & 231 | "Met ","ATG ","1.00 ","Ile ","ATA ","0.16 ","Ile ","ATT ","0.53 ","Ile ","ATC ","0.31 ", & 232 | "Thr ","ACG ","0.15 ","Thr ","ACA ","0.34 ","Thr ","ACT ","0.32 ","Thr ","ACC ","0.18 ", & 233 | "Trp ","TGG ","1.00 ","End ","TGA ","0.39 ","Cys ","TGT ","0.55 ","Cys ","TGC ","0.45 ", & 234 | "End ","TAG ","0.18 ","End ","TAA ","0.44 ","Tyr ","TAT ","0.56 ","Tyr ","TAC ","0.44 ", & 235 | "Leu ","TTG ","0.23 ","Leu ","TTA ","0.11 ","Phe ","TTT ","0.49 ","Phe ","TTC ","0.51 ", & 236 | "Ser ","TCG ","0.15 ","Ser ","TCA ","0.25 ","Ser ","TCT ","0.21 ","Ser ","TCC ","0.13 ", & 237 | "Arg ","CGG ","0.09 ","Arg ","CGA ","0.23 ","Arg ","CGT ","0.21 ","Arg ","CGC ","0.10 ", & 238 | "Gln ","CAG ","0.34 ","Gln ","CAA ","0.66 ","His ","CAT ","0.60 ","His ","CAC ","0.40 ", & 239 | "Leu ","CTG ","0.14 ","Leu ","CTA ","0.09 ","Leu ","CTT ","0.25 ","Leu ","CTC ","0.17 ", & 240 | "Pro ","CCG ","0.20 ","Pro ","CCA ","0.53 ","Pro ","CCT ","0.18 ","Pro ","CCC ","0.09 "/), (/3,64/) ) 241 | 242 | CHARACTER(LEN=5),DIMENSION(3,64) :: dmeCFT = & 243 | RESHAPE( (/& 244 | "Gly ","GGG ","0.07 ","Gly ","GGA ","0.28 ","Gly ","GGT ","0.21 ","Gly ","GGC ","0.43 ", & 245 | "Glu ","GAG ","0.67 ","Glu ","GAA ","0.33 ","Asp ","GAT ","0.53 ","Asp ","GAC ","0.47 ", & 246 | "Val ","GTG ","0.47 ","Val ","GTA ","0.11 ","Val ","GTT ","0.18 ","Val ","GTC ","0.24 ", & 247 | "Ala ","GCG ","0.19 ","Ala ","GCA ","0.17 ","Ala ","GCT ","0.19 ","Ala ","GCC ","0.45 ", & 248 | "Arg ","AGG ","0.11 ","Arg ","AGA ","0.09 ","Ser ","AGT ","0.14 ","Ser ","AGC ","0.25 ", & 249 | "Lys ","AAG ","0.70 ","Lys ","AAA ","0.30 ","Asn ","AAT ","0.44 ","Asn ","AAC ","0.56 ", & 250 | "Met ","ATG ","1.00 ","Ile ","ATA ","0.19 ","Ile ","ATT ","0.34 ","Ile ","ATC ","0.47 ", & 251 | "Thr ","ACG ","0.26 ","Thr ","ACA ","0.20 ","Thr ","ACT ","0.17 ","Thr ","ACC ","0.38 ", & 252 | "Trp ","TGG ","1.00 ","End ","TGA ","0.25 ","Cys ","TGT ","0.29 ","Cys ","TGC ","0.71 ", & 253 | "End ","TAG ","0.33 ","End ","TAA ","0.41 ","Tyr ","TAT ","0.37 ","Tyr ","TAC ","0.63 ", & 254 | "Leu ","TTG ","0.18 ","Leu ","TTA ","0.05 ","Phe ","TTT ","0.37 ","Phe ","TTC ","0.63 ", & 255 | "Ser ","TCG ","0.20 ","Ser ","TCA ","0.09 ","Ser ","TCT ","0.08 ","Ser ","TCC ","0.23 ", & 256 | "Arg ","CGG ","0.15 ","Arg ","CGA ","0.15 ","Arg ","CGT ","0.16 ","Arg ","CGC ","0.33 ", & 257 | "Gln ","CAG ","0.70 ","Gln ","CAA ","0.30 ","His ","CAT ","0.40 ","His ","CAC ","0.60 ", & 258 | "Leu ","CTG ","0.43 ","Leu ","CTA ","0.09 ","Leu ","CTT ","0.10 ","Leu ","CTC ","0.15 ", & 259 | "Pro ","CCG ","0.29 ","Pro ","CCA ","0.25 ","Pro ","CCT ","0.13 ","Pro ","CCC ","0.33 "/), (/3,64/) ) 260 | 261 | CHARACTER(LEN=5),DIMENSION(3,64) :: hsaCFT = & 262 | RESHAPE( (/& 263 | "Gly ","GGG ","0.25 ","Gly ","GGA ","0.25 ","Gly ","GGT ","0.16 ","Gly ","GGC ","0.34 ", & 264 | "Glu ","GAG ","0.58 ","Glu ","GAA ","0.42 ","Asp ","GAT ","0.46 ","Asp ","GAC ","0.54 ", & 265 | "Val ","GTG ","0.47 ","Val ","GTA ","0.12 ","Val ","GTT ","0.18 ","Val ","GTC ","0.24 ", & 266 | "Ala ","GCG ","0.11 ","Ala ","GCA ","0.23 ","Ala ","GCT ","0.26 ","Ala ","GCC ","0.40 ", & 267 | "Arg ","AGG ","0.21 ","Arg ","AGA ","0.21 ","Ser ","AGT ","0.15 ","Ser ","AGC ","0.24 ", & 268 | "Lys ","AAG ","0.57 ","Lys ","AAA ","0.43 ","Asn ","AAT ","0.47 ","Asn ","AAC ","0.53 ", & 269 | "Met ","ATG ","1.00 ","Ile ","ATA ","0.17 ","Ile ","ATT ","0.36 ","Ile ","ATC ","0.47 ", & 270 | "Thr ","ACG ","0.11 ","Thr ","ACA ","0.28 ","Thr ","ACT ","0.25 ","Thr ","ACC ","0.36 ", & 271 | "Trp ","TGG ","1.00 ","End ","TGA ","0.47 ","Cys ","TGT ","0.45 ","Cys ","TGC ","0.55 ", & 272 | "End ","TAG ","0.23 ","End ","TAA ","0.30 ","Tyr ","TAT ","0.44 ","Tyr ","TAC ","0.56 ", & 273 | "Leu ","TTG ","0.13 ","Leu ","TTA ","0.08 ","Phe ","TTT ","0.46 ","Phe ","TTC ","0.54 ", & 274 | "Ser ","TCG ","0.06 ","Ser ","TCA ","0.15 ","Ser ","TCT ","0.19 ","Ser ","TCC ","0.22 ", & 275 | "Arg ","CGG ","0.20 ","Arg ","CGA ","0.11 ","Arg ","CGT ","0.08 ","Arg ","CGC ","0.19 ", & 276 | "Gln ","CAG ","0.74 ","Gln ","CAA ","0.26 ","His ","CAT ","0.42 ","His ","CAC ","0.58 ", & 277 | "Leu ","CTG ","0.40 ","Leu ","CTA ","0.07 ","Leu ","CTT ","0.13 ","Leu ","CTC ","0.20 ", & 278 | "Pro ","CCG ","0.11 ","Pro ","CCA ","0.28 ","Pro ","CCT ","0.28 ","Pro ","CCC ","0.33 "/), (/3,64/) ) 279 | 280 | CHARACTER(LEN=5),DIMENSION(3,64) :: mmuCFT = & 281 | RESHAPE( (/& 282 | "Gly ","GGG ","0.24 ","Gly ","GGA ","0.26 ","Gly ","GGT ","0.18 ","Gly ","GGC ","0.33 ", & 283 | "Glu ","GAG ","0.60 ","Glu ","GAA ","0.40 ","Asp ","GAT ","0.44 ","Asp ","GAC ","0.56 ", & 284 | "Val ","GTG ","0.46 ","Val ","GTA ","0.12 ","Val ","GTT ","0.17 ","Val ","GTC ","0.25 ", & 285 | "Ala ","GCG ","0.10 ","Ala ","GCA ","0.23 ","Ala ","GCT ","0.29 ","Ala ","GCC ","0.38 ", & 286 | "Arg ","AGG ","0.22 ","Arg ","AGA ","0.21 ","Ser ","AGT ","0.15 ","Ser ","AGC ","0.24 ", & 287 | "Lys ","AAG ","0.61 ","Lys ","AAA ","0.39 ","Asn ","AAT ","0.43 ","Asn ","AAC ","0.57 ", & 288 | "Met ","ATG ","1.00 ","Ile ","ATA ","0.16 ","Ile ","ATT ","0.34 ","Ile ","ATC ","0.50 ", & 289 | "Thr ","ACG ","0.11 ","Thr ","ACA ","0.29 ","Thr ","ACT ","0.25 ","Thr ","ACC ","0.35 ", & 290 | "Trp ","TGG ","1.00 ","End ","TGA ","0.49 ","Cys ","TGT ","0.48 ","Cys ","TGC ","0.52 ", & 291 | "End ","TAG ","0.23 ","End ","TAA ","0.28 ","Tyr ","TAT ","0.43 ","Tyr ","TAC ","0.57 ", & 292 | "Leu ","TTG ","0.13 ","Leu ","TTA ","0.06 ","Phe ","TTT ","0.44 ","Phe ","TTC ","0.56 ", & 293 | "Ser ","TCG ","0.05 ","Ser ","TCA ","0.14 ","Ser ","TCT ","0.20 ","Ser ","TCC ","0.22 ", & 294 | "Arg ","CGG ","0.19 ","Arg ","CGA ","0.12 ","Arg ","CGT ","0.09 ","Arg ","CGC ","0.17 ", & 295 | "Gln ","CAG ","0.75 ","Gln ","CAA ","0.25 ","His ","CAT ","0.40 ","His ","CAC ","0.60 ", & 296 | "Leu ","CTG ","0.40 ","Leu ","CTA ","0.08 ","Leu ","CTT ","0.13 ","Leu ","CTC ","0.20 ", & 297 | "Pro ","CCG ","0.10 ","Pro ","CCA ","0.28 ","Pro ","CCT ","0.31 ","Pro ","CCC ","0.31 "/), (/3,64/) ) 298 | 299 | CHARACTER(LEN=5),DIMENSION(3,64) :: rnoCFT = & 300 | RESHAPE( (/& 301 | "Gly ","GGG ","0.24 ","Gly ","GGA ","0.25 ","Gly ","GGT ","0.17 ","Gly ","GGC ","0.34 ", & 302 | "Glu ","GAG ","0.61 ","Glu ","GAA ","0.39 ","Asp ","GAT ","0.43 ","Asp ","GAC ","0.57 ", & 303 | "Val ","GTG ","0.47 ","Val ","GTA ","0.11 ","Val ","GTT ","0.16 ","Val ","GTC ","0.25 ", & 304 | "Ala ","GCG ","0.10 ","Ala ","GCA ","0.22 ","Ala ","GCT ","0.28 ","Ala ","GCC ","0.39 ", & 305 | "Arg ","AGG ","0.21 ","Arg ","AGA ","0.20 ","Ser ","AGT ","0.15 ","Ser ","AGC ","0.24 ", & 306 | "Lys ","AAG ","0.62 ","Lys ","AAA ","0.38 ","Asn ","AAT ","0.41 ","Asn ","AAC ","0.59 ", & 307 | "Met ","ATG ","1.00 ","Ile ","ATA ","0.15 ","Ile ","ATT ","0.33 ","Ile ","ATC ","0.52 ", & 308 | "Thr ","ACG ","0.11 ","Thr ","ACA ","0.28 ","Thr ","ACT ","0.24 ","Thr ","ACC ","0.37 ", & 309 | "Trp ","TGG ","1.00 ","End ","TGA ","0.50 ","Cys ","TGT ","0.45 ","Cys ","TGC ","0.55 ", & 310 | "End ","TAG ","0.22 ","End ","TAA ","0.28 ","Tyr ","TAT ","0.40 ","Tyr ","TAC ","0.60 ", & 311 | "Leu ","TTG ","0.13 ","Leu ","TTA ","0.06 ","Phe ","TTT ","0.42 ","Phe ","TTC ","0.58 ", & 312 | "Ser ","TCG ","0.06 ","Ser ","TCA ","0.14 ","Ser ","TCT ","0.19 ","Ser ","TCC ","0.23 ", & 313 | "Arg ","CGG ","0.20 ","Arg ","CGA ","0.12 ","Arg ","CGT ","0.09 ","Arg ","CGC ","0.18 ", & 314 | "Gln ","CAG ","0.75 ","Gln ","CAA ","0.25 ","His ","CAT ","0.39 ","His ","CAC ","0.61 ", & 315 | "Leu ","CTG ","0.41 ","Leu ","CTA ","0.08 ","Leu ","CTT ","0.12 ","Leu ","CTC ","0.20 ", & 316 | "Pro ","CCG ","0.11 ","Pro ","CCA ","0.28 ","Pro ","CCT ","0.30 ","Pro ","CCC ","0.31 "/), (/3,64/) ) 317 | 318 | CHARACTER(LEN=5),DIMENSION(3,64) :: sceCFT = & 319 | RESHAPE( (/& 320 | "Gly ","GGG ","0.12 ","Gly ","GGA ","0.22 ","Gly ","GGT ","0.47 ","Gly ","GGC ","0.19 ", & 321 | "Glu ","GAG ","0.30 ","Glu ","GAA ","0.70 ","Asp ","GAT ","0.65 ","Asp ","GAC ","0.35 ", & 322 | "Val ","GTG ","0.19 ","Val ","GTA ","0.21 ","Val ","GTT ","0.39 ","Val ","GTC ","0.21 ", & 323 | "Ala ","GCG ","0.11 ","Ala ","GCA ","0.29 ","Ala ","GCT ","0.38 ","Ala ","GCC ","0.22 ", & 324 | "Arg ","AGG ","0.21 ","Arg ","AGA ","0.48 ","Ser ","AGT ","0.16 ","Ser ","AGC ","0.11 ", & 325 | "Lys ","AAG ","0.42 ","Lys ","AAA ","0.58 ","Asn ","AAT ","0.59 ","Asn ","AAC ","0.41 ", & 326 | "Met ","ATG ","1.00 ","Ile ","ATA ","0.27 ","Ile ","ATT ","0.46 ","Ile ","ATC ","0.26 ", & 327 | "Thr ","ACG ","0.14 ","Thr ","ACA ","0.30 ","Thr ","ACT ","0.35 ","Thr ","ACC ","0.22 ", & 328 | "Trp ","TGG ","1.00 ","End ","TGA ","0.30 ","Cys ","TGT ","0.63 ","Cys ","TGC ","0.37 ", & 329 | "End ","TAG ","0.23 ","End ","TAA ","0.47 ","Tyr ","TAT ","0.56 ","Tyr ","TAC ","0.44 ", & 330 | "Leu ","TTG ","0.29 ","Leu ","TTA ","0.28 ","Phe ","TTT ","0.59 ","Phe ","TTC ","0.41 ", & 331 | "Ser ","TCG ","0.10 ","Ser ","TCA ","0.21 ","Ser ","TCT ","0.26 ","Ser ","TCC ","0.16 ", & 332 | "Arg ","CGG ","0.04 ","Arg ","CGA ","0.07 ","Arg ","CGT ","0.15 ","Arg ","CGC ","0.06 ", & 333 | "Gln ","CAG ","0.31 ","Gln ","CAA ","0.69 ","His ","CAT ","0.64 ","His ","CAC ","0.36 ", & 334 | "Leu ","CTG ","0.11 ","Leu ","CTA ","0.14 ","Leu ","CTT ","0.13 ","Leu ","CTC ","0.06 ", & 335 | "Pro ","CCG ","0.12 ","Pro ","CCA ","0.41 ","Pro ","CCT ","0.31 ","Pro ","CCC ","0.16 "/), (/3,64/) ) 336 | 337 | CHARACTER(LEN=5),DIMENSION(3,64) :: xlaCFT = & 338 | RESHAPE( (/& 339 | "Gly ","GGG ","0.21 ","Gly ","GGA ","0.35 ","Gly ","GGT ","0.21 ","Gly ","GGC ","0.23 ", & 340 | "Glu ","GAG ","0.48 ","Glu ","GAA ","0.52 ","Asp ","GAT ","0.57 ","Asp ","GAC ","0.43 ", & 341 | "Val ","GTG ","0.36 ","Val ","GTA ","0.17 ","Val ","GTT ","0.27 ","Val ","GTC ","0.20 ", & 342 | "Ala ","GCG ","0.07 ","Ala ","GCA ","0.32 ","Ala ","GCT ","0.33 ","Ala ","GCC ","0.27 ", & 343 | "Arg ","AGG ","0.22 ","Arg ","AGA ","0.28 ","Ser ","AGT ","0.18 ","Ser ","AGC ","0.20 ", & 344 | "Lys ","AAG ","0.49 ","Lys ","AAA ","0.51 ","Asn ","AAT ","0.52 ","Asn ","AAC ","0.48 ", & 345 | "Met ","ATG ","1.00 ","Ile ","ATA ","0.23 ","Ile ","ATT ","0.42 ","Ile ","ATC ","0.35 ", & 346 | "Thr ","ACG ","0.09 ","Thr ","ACA ","0.35 ","Thr ","ACT ","0.30 ","Thr ","ACC ","0.26 ", & 347 | "Trp ","TGG ","1.00 ","End ","TGA ","0.39 ","Cys ","TGT ","0.50 ","Cys ","TGC ","0.50 ", & 348 | "End ","TAG ","0.18 ","End ","TAA ","0.43 ","Tyr ","TAT ","0.51 ","Tyr ","TAC ","0.49 ", & 349 | "Leu ","TTG ","0.16 ","Leu ","TTA ","0.11 ","Phe ","TTT ","0.56 ","Phe ","TTC ","0.44 ", & 350 | "Ser ","TCG ","0.05 ","Ser ","TCA ","0.16 ","Ser ","TCT ","0.23 ","Ser ","TCC ","0.19 ", & 351 | "Arg ","CGG ","0.12 ","Arg ","CGA ","0.12 ","Arg ","CGT ","0.12 ","Arg ","CGC ","0.13 ", & 352 | "Gln ","CAG ","0.64 ","Gln ","CAA ","0.36 ","His ","CAT ","0.50 ","His ","CAC ","0.50 ", & 353 | "Leu ","CTG ","0.30 ","Leu ","CTA ","0.10 ","Leu ","CTT ","0.19 ","Leu ","CTC ","0.14 ", & 354 | "Pro ","CCG ","0.09 ","Pro ","CCA ","0.37 ","Pro ","CCT ","0.32 ","Pro ","CCC ","0.22 "/), (/3,64/) ) 355 | 356 | CHARACTER(LEN=5),DIMENSION(3,64) :: ecoCFT = & 357 | RESHAPE( (/& 358 | "Gly ","GGG ","0.16 ","Gly ","GGA ","0.15 ","Gly ","GGT ","0.34 ","Gly ","GGC ","0.35 ", & 359 | "Glu ","GAG ","0.33 ","Glu ","GAA ","0.67 ","Asp ","GAT ","0.64 ","Asp ","GAC ","0.36 ", & 360 | "Val ","GTG ","0.34 ","Val ","GTA ","0.17 ","Val ","GTT ","0.29 ","Val ","GTC ","0.20 ", & 361 | "Ala ","GCG ","0.31 ","Ala ","GCA ","0.24 ","Ala ","GCT ","0.19 ","Ala ","GCC ","0.26 ", & 362 | "Arg ","AGG ","0.05 ","Arg ","AGA ","0.08 ","Ser ","AGT ","0.17 ","Ser ","AGC ","0.23 ", & 363 | "Lys ","AAG ","0.27 ","Lys ","AAA ","0.73 ","Asn ","AAT ","0.52 ","Asn ","AAC ","0.48 ", & 364 | "Met ","ATG ","1.00 ","Ile ","ATA ","0.14 ","Ile ","ATT ","0.49 ","Ile ","ATC ","0.37 ", & 365 | "Thr ","ACG ","0.24 ","Thr ","ACA ","0.19 ","Thr ","ACT ","0.19 ","Thr ","ACC ","0.38 ", & 366 | "Trp ","TGG ","1.00 ","End ","TGA ","0.31 ","Cys ","TGT ","0.47 ","Cys ","TGC ","0.53 ", & 367 | "End ","TAG ","0.09 ","End ","TAA ","0.60 ","Tyr ","TAT ","0.60 ","Tyr ","TAC ","0.40 ", & 368 | "Leu ","TTG ","0.13 ","Leu ","TTA ","0.15 ","Phe ","TTT ","0.59 ","Phe ","TTC ","0.41 ", & 369 | "Ser ","TCG ","0.13 ","Ser ","TCA ","0.15 ","Ser ","TCT ","0.17 ","Ser ","TCC ","0.14 ", & 370 | "Arg ","CGG ","0.12 ","Arg ","CGA ","0.07 ","Arg ","CGT ","0.34 ","Arg ","CGC ","0.34 ", & 371 | "Gln ","CAG ","0.66 ","Gln ","CAA ","0.34 ","His ","CAT ","0.59 ","His ","CAC ","0.41 ", & 372 | "Leu ","CTG ","0.46 ","Leu ","CTA ","0.04 ","Leu ","CTT ","0.12 ","Leu ","CTC ","0.10 ", & 373 | "Pro ","CCG ","0.47 ","Pro ","CCA ","0.21 ","Pro ","CCT ","0.19 ","Pro ","CCC ","0.14 "/), (/3,64/) ) 374 | 375 | CHARACTER(LEN=5),DIMENSION(3,64) :: ppaCFT = & 376 | RESHAPE( (/& 377 | "Phe ","TTT ","0.54 ","Phe ","TTC ","0.46 ","Ser ","TCT ","0.29 ","Ser ","TCC ","0.20 ", & 378 | "Ser ","TCA ","0.19 ","Ser ","TCG ","0.09 ","Ser ","AGT ","0.15 ","Ser ","AGC ","0.09 ", & 379 | "Tyr ","TAT ","0.46 ","Tyr ","TAC ","0.55 ","Cys ","TGT ","0.65 ","Cys ","TGC ","0.35 ", & 380 | "Leu ","TTA ","0.16 ","Leu ","TTG ","0.33 ","Leu ","CTT ","0.16 ","Leu ","CTC ","0.08 ", & 381 | "Leu ","CTA ","0.11 ","Leu ","CTG ","0.16 ","End ","TAA ","0.53 ","End ","TGA ","0.18 ", & 382 | "End ","TAG ","0.29 ","Trp ","TGG ","1.00 ","Pro ","CCT ","0.35 ","Pro ","CCC ","0.15 ", & 383 | "Pro ","CCA ","0.41 ","Pro ","CCG ","0.09 ","His ","CAT ","0.57 ","His ","CAC ","0.43 ", & 384 | "Arg ","CGT ","0.16 ","Arg ","CGC ","0.05 ","Arg ","CGA ","0.10 ","Arg ","CGG ","0.05 ", & 385 | "Arg ","AGA ","0.48 ","Arg ","AGG ","0.16 ","Gln ","CAA ","0.61 ","Gln ","CAG ","0.39 ", & 386 | "Ile ","ATT ","0.50 ","Ile ","ATC ","0.30 ","Ile ","ATA ","0.19 ","Thr ","ACT ","0.40 ", & 387 | "Thr ","ACC ","0.25 ","Thr ","ACA ","0.24 ","Thr ","ACG ","0.11 ","Asn ","AAC ","0.51 ", & 388 | "Asn ","AAT ","0.49 ","Lys ","AAA ","0.47 ","Lys ","AAG ","0.53 ","Met ","ATG ","1.00 ", & 389 | "Val ","GTT ","0.42 ","Val ","GTC ","0.23 ","Val ","GTA ","0.15 ","Val ","GTG ","0.19 ", & 390 | "Ala ","GCT ","0.45 ","Ala ","GCC ","0.26 ","Ala ","GCA ","0.23 ","Ala ","GCG ","0.06 ", & 391 | "Asp ","GAT ","0.58 ","Asp ","GAC ","0.42 ","Gly ","GGT ","0.44 ","Gly ","GGC ","0.14 ", & 392 | "Gly ","GGA ","0.32 ","Gly ","GGG ","0.10 ","Glu ","GAA ","0.57 ","Glu ","GAG ","0.43 "/), (/3,64/) ) 393 | 394 | ! SOLUTIONS 395 | 396 | TYPE DNA 397 | CHARACTER(LEN=9999) :: DNAseq='' ! the actual DNA sequence,in ACGT nts 398 | INTEGER :: NumOlaps=0 399 | ! INTEGER :: NumOlaps=0 ! the total number of overlaps 400 | INTEGER :: OlapsPos(999,2) ! the positions of the first and last 401 | ! nucleotides in the overlap 402 | INTEGER(KIND=1) :: NUMseq(9999) ! the nt sequence as numbers (-1,-3,3,1) 403 | INTEGER(KIND=1) :: prot2cod(3333) ! PROTpos to codon (1-64) 404 | INTEGER(KIND=1) :: nt2cod(9999) ! DNApos to codon (1-64) or 0 405 | REAL :: MeltT(999) ! melting temps for the overlaps 406 | REAL :: TScore(999) ! overlap-based score of MeltTm deviance 407 | REAL :: CScore(3333) ! codon-based score of codon frequency 408 | REAL :: LScore(9999) ! nt-based score of oligo length 409 | INTEGER :: RScore(9999) ! nt-based score of repeats 410 | INTEGER :: PScore(9999) ! nt-based score of pattern matching 411 | INTEGER :: MScore(9999) ! nt-based score of potential mispriming 412 | INTEGER :: AScore(9999) ! nt-based score of AT content 413 | INTEGER :: GScore(9999) ! nt-based score of GC content 414 | INTEGER :: FScore(9999) ! nt-based score of gap-fixed positions 415 | REAL :: TotalGScore=0 ! total score for GC content 416 | REAL :: TotalAScore=0 ! total score for AT content 417 | REAL :: TotalLScore=0 ! total score for oligo length 418 | REAL :: TotalCScore=0 ! total score for codons 419 | REAL :: TotalTScore=0 ! total score for temperature 420 | REAL :: TotalRScore=0 ! total score for repeats 421 | REAL :: TotalPScore=0 ! total score for patterns 422 | REAL :: TotalMScore=0 ! total score for mispriming 423 | REAL :: TotalFScore=0 ! total score for gap-fixed positions 424 | REAL :: OverallScore=0 ! Sum of all the total scores 425 | INTEGER :: RN=0 ! number of tandem repeats 426 | INTEGER :: RS1(9999) ! starting position for primary seq 427 | INTEGER :: RS2(9999) ! starting position for secondary seq 428 | INTEGER :: RLn(9999) ! size of repeat (not oligo ends) 429 | ! INTEGER(KIND=1) :: RX(9999) ! direct=1,inverse=-1 430 | INTEGER :: RX(9999) ! direct=1,inverse=-1 431 | INTEGER :: MN=0 ! number of potential misprimes 432 | INTEGER :: M1(9999) ! starting position for potential misprime in prim 433 | INTEGER :: M2(9999) ! starting position for potential misprime in seco 434 | INTEGER :: MX(9999) ! Type of potential misprime (DS,IS,DA,IA) 435 | INTEGER :: MSN=0 ! number of actual misprimes 436 | INTEGER :: MS1(9999) ! starting position for actual misprime in prim 437 | INTEGER :: MS2(9999) ! starting position for actual misprime in seco 438 | INTEGER :: MSX(9999) ! Type of actual misprime (DS,IS,DA,IA) 439 | INTEGER :: MOL(9999) ! overlap the misprime is in 440 | INTEGER :: ntID_GC(9999) ! window of GC content 441 | INTEGER :: ntID_AT(9999) ! window of AT content 442 | INTEGER :: ntID_Tip(9999) ! unique number for Tip matching 443 | INTEGER :: ntID_TipRC(9999) ! unique number for Tip (reverse complement) 444 | INTEGER :: ntID_Rep(9999) ! unique number for Repeat matching 445 | INTEGER :: ntID_RepRC(9999) ! repeat matching (reverse complement) 446 | LOGICAL :: GapFixPos(9999) ! should nt be fixed within a gap? 447 | LOGICAL :: Degen(9999) ! true if the nt is degenerate 448 | INTEGER :: DegenNum(9999) ! numerical index for degenerate sequence (1-11) 449 | END TYPE 450 | 451 | TYPE(DNA) :: CurrDNA 452 | TYPE(DNA) :: StoreDNA 453 | TYPE(DNA) :: BestDNA 454 | TYPE(DNA) :: BestOverlapDNA 455 | 456 | END MODULE dnaworks_data 457 | -------------------------------------------------------------------------------- /dnaworks_test.f90: -------------------------------------------------------------------------------- 1 | MODULE dnaworks_test 2 | 3 | IMPLICIT NONE 4 | SAVE 5 | 6 | LOGICAL :: TEST0=.FALSE. ! Print TEST0 messages 7 | LOGICAL :: TEST1=.FALSE. ! Print TEST1 messages 8 | LOGICAL :: TEST2=.FALSE. ! Print TEST2 messages 9 | LOGICAL :: TEST3=.FALSE. ! Print TEST3 messages 10 | 11 | END MODULE dnaworks_test 12 | -------------------------------------------------------------------------------- /email_func.f90: -------------------------------------------------------------------------------- 1 | SUBROUTINE Send_Email 2 | 3 | USE dnaworks_data 4 | USE dnaworks_test 5 | IMPLICIT NONE 6 | 7 | INTEGER,EXTERNAL :: CurrentTimeSeconds 8 | INTEGER :: start 9 | CHARACTER(LEN=1000) :: text 10 | CHARACTER(LEN=500) :: text1,text2 11 | 12 | IF (TEST0) PRINT *,"Send_Email" !TEST0 13 | 14 | WRITE(text1,FMT="(a80,' -s ""DNAWorks Output - ',a80)") MAILPATH,jobname 15 | WRITE(text2,FMT="('"" <',a,' ',a80)") outputfile,email 16 | 17 | text=text1(1:LEN_TRIM(text1))//text2(1:LEN_TRIM(text2)) 18 | 19 | ! PRINT *,text 20 | 21 | CALL SYSTEM(text) 22 | 23 | ! The following is a waste of time. It should take about 10 seconds to 24 | ! go through the loop. This should give the program enough time to send out 25 | ! an email. 26 | 27 | start=CurrentTimeSeconds() 28 | DO WHILE (CurrentTimeSeconds()-start.LT.10) 29 | END DO 30 | 31 | END SUBROUTINE Send_Email 32 | -------------------------------------------------------------------------------- /encoding.f90: -------------------------------------------------------------------------------- 1 | SUBROUTINE Create_ntID_Arrays() 2 | ! 3 | ! Create or update (if MutProtPos isn't zero) nucleotide id arrays. 4 | ! Note that INTEGER(KIND=4) can have only 9 digits! INTEGER(KIND=8) can 5 | ! hold 17 digits... 6 | ! 7 | ! A ntID array holds an n-digit integer in place of the sequence. 8 | ! A=-1 T=1 C=-3 G=3 9 | ! 10 | ! ACGTACGTACGTACGT with a RepLen = 8 would be shown as 11 | ! ........ ntID_Rep(1) = 12341234 12 | ! ........ ntID_Rep(2) = 23412341 13 | ! ........ ntID_Rep(3) = 34123412 14 | ! ........ ntID_Rep(4) = 41234123 15 | ! 16 | ! and so on... 17 | 18 | USE dnaworks_data 19 | USE dnaworks_test 20 | IMPLICIT NONE 21 | 22 | INTEGER :: i,j,m,n,a1,a2,b1,b2,t1,t2,fin 23 | 24 | IF (TEST2) PRINT *,'Create_ntID_Arrays' 25 | 26 | IF (MutProtPos.eq.0) THEN 27 | a1=1 28 | a2=DNAlen-MPTip+1 29 | b1=1 30 | b2=DNAlen-RepLen+1 31 | ELSE 32 | a1=(MAX(1,(MutNtPos(1)-MPTip))) 33 | a2=(MIN((DNAlen-MPTip+1),(MutNtPos(MutNtNum)+1))) 34 | b1=(MAX(1,(MutNtPos(1)-RepLen))) 35 | b2=(MIN((DNAlen-RepLen+1),(MutNtPos(MutNtNum)+1))) 36 | END IF 37 | 38 | ! update misprime arrays 39 | 40 | fin=MPTip-1 41 | DO i=a1,a2 42 | CurrDNA%ntID_Tip(i)=0 43 | CurrDNA%ntID_TipRC(i)=0 44 | DO j=0,fin 45 | SELECT CASE(CurrDNA%NUMseq(i+fin-j)) 46 | CASE(-1) 47 | CurrDNA%ntID_Tip(i)=CurrDNA%ntID_Tip(i)+(1*(10**j)) 48 | CASE(-3) 49 | CurrDNA%ntID_Tip(i)=CurrDNA%ntID_Tip(i)+(2*(10**j)) 50 | CASE(3) 51 | CurrDNA%ntID_Tip(i)=CurrDNA%ntID_Tip(i)+(3*(10**j)) 52 | CASE(1) 53 | CurrDNA%ntID_Tip(i)=CurrDNA%ntID_Tip(i)+(4*(10**j)) 54 | END SELECT 55 | SELECT CASE(CurrDNA%NUMseq(i+j)) 56 | CASE(-1) 57 | CurrDNA%ntID_TipRC(i)=CurrDNA%ntID_TipRC(i)+(4*(10**j)) 58 | CASE(-3) 59 | CurrDNA%ntID_TipRC(i)=CurrDNA%ntID_TipRC(i)+(3*(10**j)) 60 | CASE(3) 61 | CurrDNA%ntID_TipRC(i)=CurrDNA%ntID_TipRC(i)+(2*(10**j)) 62 | CASE(1) 63 | CurrDNA%ntID_TipRC(i)=CurrDNA%ntID_TipRC(i)+(1*(10**j)) 64 | END SELECT 65 | END DO 66 | END DO 67 | 68 | ! update repeat arrays 69 | 70 | fin=RepLen-1 71 | DO i=b1,b2 72 | CurrDNA%ntID_Rep(i)=0 73 | CurrDNA%ntID_RepRC(i)=0 74 | DO j=0,fin 75 | SELECT CASE(CurrDNA%NUMseq(i+fin-j)) 76 | CASE(-1) 77 | CurrDNA%ntID_Rep(i)=CurrDNA%ntID_Rep(i)+(1*(10**j)) 78 | CASE(-3) 79 | CurrDNA%ntID_Rep(i)=CurrDNA%ntID_Rep(i)+(2*(10**j)) 80 | CASE(3) 81 | CurrDNA%ntID_Rep(i)=CurrDNA%ntID_Rep(i)+(3*(10**j)) 82 | CASE(1) 83 | CurrDNA%ntID_Rep(i)=CurrDNA%ntID_Rep(i)+(4*(10**j)) 84 | END SELECT 85 | SELECT CASE(CurrDNA%NUMseq(i+j)) 86 | CASE(-1) 87 | CurrDNA%ntID_RepRC(i)=CurrDNA%ntID_RepRC(i)+(4*(10**j)) 88 | CASE(-3) 89 | CurrDNA%ntID_RepRC(i)=CurrDNA%ntID_RepRC(i)+(3*(10**j)) 90 | CASE(3) 91 | CurrDNA%ntID_RepRC(i)=CurrDNA%ntID_RepRC(i)+(2*(10**j)) 92 | CASE(1) 93 | CurrDNA%ntID_RepRC(i)=CurrDNA%ntID_RepRC(i)+(1*(10**j)) 94 | END SELECT 95 | END DO 96 | END DO 97 | 98 | ! update GC array 99 | 100 | fin=RepLen-1 101 | DO i=b1,b2 102 | CurrDNA%ntID_GC(i)=0 103 | DO j=(i+0),(i+fin) 104 | IF (ABS(CurrDNA%NUMseq(j)).eq.1) CurrDNA%ntID_GC(i)=CurrDNA%ntID_GC(i)+1 105 | END DO 106 | END DO 107 | 108 | ! update AT array 109 | 110 | fin=RepLen-1 111 | DO i=b1,b2 112 | CurrDNA%ntID_AT(i)=0 113 | DO j=(i+0),(i+fin) 114 | IF (ABS(CurrDNA%NUMseq(j)).eq.3) CurrDNA%ntID_AT(i)=CurrDNA%ntID_AT(i)+1 115 | END DO 116 | END DO 117 | 118 | END SUBROUTINE Create_ntID_Arrays 119 | SUBROUTINE Sort_Misprime_Arrays() 120 | 121 | USE dnaworks_data 122 | USE dnaworks_test 123 | IMPLICIT NONE 124 | 125 | INTEGER :: i,j,k 126 | 127 | IF (TEST2) PRINT *,"Sort_Misprime_Arrays" !TEST2 128 | 129 | ! Sort misprime pairs 130 | 131 | DO i=1,CurrDNA%MN-1 ! integer sort 132 | DO j=i+1,CurrDNA%MN 133 | IF (CurrDNA%M1(i).gt.CurrDNA%M1(j)) THEN 134 | CALL IntSwap(CurrDNA%M1(i),CurrDNA%M1(j)) 135 | CALL IntSwap(CurrDNA%M2(i),CurrDNA%M2(j)) 136 | CALL IntSwap(CurrDNA%MX(i),CurrDNA%MX(j)) 137 | END IF 138 | END DO 139 | END DO 140 | 141 | END SUBROUTINE Sort_Misprime_Arrays 142 | SUBROUTINE Sort_Repeat_Arrays 143 | 144 | USE dnaworks_data 145 | USE dnaworks_test 146 | IMPLICIT NONE 147 | 148 | INTEGER :: i,j,k 149 | 150 | IF (TEST2) PRINT *,"Sort_Repeat_Arrays" !TEST2 151 | 152 | ! Rearrange repeat pairs 153 | 154 | DO i=1,CurrDNA%RN 155 | IF (CurrDNA%RS1(i).gt.CurrDNA%RS2(i)) THEN 156 | CALL IntSwap(CurrDNA%RS1(i),CurrDNA%RS2(i)) 157 | END IF 158 | END DO 159 | 160 | ! Sort repeat pairs 161 | 162 | DO i=1,CurrDNA%RN-1 ! integer sort 163 | DO j=i+1,CurrDNA%RN 164 | IF (CurrDNA%RS1(i).gt.CurrDNA%RS1(j)) THEN 165 | CALL IntSwap(CurrDNA%RS1(i),CurrDNA%RS1(j)) 166 | CALL IntSwap(CurrDNA%RS2(i),CurrDNA%RS2(j)) 167 | CALL IntSwap(CurrDNA%RLn(i),CurrDNA%RLn(j)) 168 | CALL IntSwap(CurrDNA%RX(i),CurrDNA%RX(j)) 169 | END IF 170 | END DO 171 | END DO 172 | 173 | END SUBROUTINE Sort_Repeat_Arrays 174 | SUBROUTINE Translate_Protein 175 | ! 176 | ! Translate the mutatable protein residues into DNA sequence 177 | 178 | USE dnaworks_data 179 | USE dnaworks_test 180 | IMPLICIT NONE 181 | 182 | INTEGER :: i,k,p,d,x 183 | REAL :: rand 184 | LOGICAL :: no_codons 185 | INTEGER,EXTERNAL :: NT2Int 186 | CHARACTER(LEN=3) :: tempCodonSeq 187 | 188 | IF (TEST0) PRINT *,"Translate_Protein" !TEST0 189 | 190 | ! Reset MutProtPos 191 | 192 | MutProtPos=0 193 | 194 | IF (.not.SequenceTranslated) THEN ! avoid the first time 195 | main: DO i=1,mutPROTnum 196 | p=mutPROT2prot(i) 197 | d=prot2nt(p) 198 | 199 | ! Choose the codon randomly unless the codon is not allowed 200 | 201 | k = 1 202 | IF (CodonRandom) THEN 203 | CALL RANDOM_NUMBER(rand) 204 | k=(INT(rand*(AAT(prot2aa(p))%NumOfActiveCodons)))+1 205 | END IF 206 | tempCodonSeq=CFT(AAT(prot2aa(p))%Codon(k))%Seq 207 | 208 | ! Create the codon and insert it into the DNA sequence. 209 | 210 | ! If the chain is reversed, put in reverse complement 211 | 212 | IF (ChainReverse(prot2chain(p))) CALL RevComplStr(tempCodonSeq) 213 | CurrDNA%DNAseq(d-1:d+1)=tempCodonSeq 214 | 215 | ! Fill prot2cod array 216 | 217 | CurrDNA%prot2cod(p) = AAT(prot2aa(p))%Codon(k) 218 | CurrDNA%nt2cod(d) = AAT(prot2aa(p))%Codon(k) 219 | 220 | ! Fill the numerical sequence array 221 | 222 | CurrDNA%NUMseq(d-1)=NT2Int(CurrDNA%DNAseq(d-1:d-1)) 223 | CurrDNA%NUMseq(d)=NT2Int(CurrDNA%DNAseq(d:d)) 224 | CurrDNA%NUMseq(d+1)=NT2Int(CurrDNA%DNAseq(d+1:d+1)) 225 | 226 | END DO main 227 | SequenceTranslated=.TRUE. 228 | END IF 229 | 230 | END SUBROUTINE Translate_Protein 231 | -------------------------------------------------------------------------------- /misc_func.f90: -------------------------------------------------------------------------------- 1 | SUBROUTINE Fix_Degenerates 2 | ! 3 | ! Fix degenerate sequences to A,C,G or T 4 | 5 | USE dnaworks_data 6 | USE dnaworks_test 7 | IMPLICIT NONE 8 | 9 | INTEGER :: i,j,k 10 | REAL :: rand 11 | 12 | IF (TEST0) PRINT *,"Fix_Degenerates" !TEST0 13 | 14 | DO i=1,NumDegPos 15 | 16 | ! Choose a nt at random from possible 17 | CALL RANDOM_NUMBER(rand) 18 | j = CurrDNA%DegenNum(DegPos(i)) ! index for degenerate sequence 19 | k=(INT(rand*(DegenSeq(j)%NumOfNT)))+1 ! choice for index 20 | CurrDNA%DNAseq(DegPos(i):DegPos(i)) = DegenSeq(j)%Seq(k) ! assign seq 21 | CurrDNA%NumSeq(DegPos(i)) = DegenSeq(j)%NumSeq(k) ! assign NumSeq 22 | 23 | END DO 24 | 25 | END SUBROUTINE Fix_Degenerates 26 | SUBROUTINE IntSwap(firstelement,lastelement) 27 | 28 | ! Swap integers 29 | 30 | USE dnaworks_test 31 | IMPLICIT NONE 32 | 33 | INTEGER :: firstelement,lastelement,dummy 34 | 35 | IF (TEST3) PRINT *,"IntSwap" !TEST3 36 | 37 | dummy=firstelement 38 | firstelement=lastelement 39 | lastelement=dummy 40 | 41 | END SUBROUTINE IntSwap 42 | SUBROUTINE RealSwap(firstelement,lastelement) 43 | 44 | ! Swap real numbers 45 | 46 | USE dnaworks_test 47 | IMPLICIT NONE 48 | 49 | REAL :: firstelement,lastelement,dummy 50 | 51 | IF (TEST3) PRINT *,"RealSwap" !TEST3 52 | 53 | dummy=firstelement 54 | firstelement=lastelement 55 | lastelement=dummy 56 | 57 | END SUBROUTINE RealSwap 58 | SUBROUTINE Revert_Degenerates 59 | ! 60 | ! Revert degenerate sequences back to original 61 | 62 | USE dnaworks_data 63 | USE dnaworks_test 64 | IMPLICIT NONE 65 | 66 | INTEGER :: i,j 67 | 68 | IF (TEST0) PRINT *,"Revert_Degenerates" !TEST0 69 | 70 | DO i=1,NumDegPos 71 | 72 | j = CurrDNA%DegenNum(DegPos(i)) ! index for degenerate sequence at that position 73 | CurrDNA%DNAseq(DegPos(i):DegPos(i)) = DegenSeq(j)%DegNT ! assign seq 74 | 75 | END DO 76 | 77 | END SUBROUTINE Revert_Degenerates 78 | -------------------------------------------------------------------------------- /mutate.f90: -------------------------------------------------------------------------------- 1 | SUBROUTINE Find_Mut_Pot_Misprimes() 2 | ! 3 | ! This is a position dependent replacement for Find_Potential_Misprimes. 4 | 5 | USE dnaworks_data 6 | USE dnaworks_test 7 | IMPLICIT NONE 8 | 9 | INTEGER :: i,j,start,finish 10 | LOGICAL,EXTERNAL :: HMatchNum 11 | 12 | IF (TEST2) PRINT *,"Find_Mut_Pot_Misprimes" !TEST2 13 | 14 | ! Get rid of potential misprimes in the current mutant range 15 | 16 | CALL Decrement_Misprime_Arrays 17 | 18 | ! Make sure the search doesn't go beyond the possible ranges 19 | 20 | start=MAX(1,MutNtPos(1)-MPLn) 21 | finish=MIN((MutNtPos(MutNtNum)+1),(DNAlen-MPLn+1)) 22 | 23 | ! If MutNtPos(1) <= MPLn+1, only run the second half-search 24 | 25 | IF (MutNtPos(1).gt.(MPLn+1)) THEN 26 | 27 | ! First half-search 28 | 29 | DO i=1,MutNtPos(1)-MPLn-1 30 | DO j=MutNtPos(1)-MPLn,finish 31 | IF (HMatchNum(i,j,1)) THEN 32 | IF (CurrDNA%ntID_Tip(i+MPLn-MPTip).eq.CurrDNA%ntID_Tip(j+MPLn-MPTip)) & 33 | CALL Increment_Misprime_Arrays(i,j,1) 34 | IF (CurrDNA%ntID_Tip(i).eq.CurrDNA%ntID_Tip(j)) & 35 | CALL Increment_Misprime_Arrays(i,j,4) 36 | END IF 37 | IF (HMatchNum(i,j,-1)) THEN 38 | IF (CurrDNA%ntID_Tip(i).eq.CurrDNA%ntID_TipRC(j+MPLn-MPTip)) & 39 | CALL Increment_Misprime_Arrays(i,j,2) 40 | IF (CurrDNA%ntID_Tip(i+MPLn-MPTip).eq.CurrDNA%ntID_TipRC(j)) & 41 | CALL Increment_Misprime_Arrays(i,j,3) 42 | END IF 43 | END DO 44 | END DO 45 | END IF 46 | 47 | ! Second half-search 48 | 49 | DO i=start,finish 50 | DO j=i,DNAlen-MPLn+1 51 | IF (HMatchNum(i,j,1)) THEN 52 | IF (CurrDNA%ntID_Tip(i+MPLn-MPTip).eq.CurrDNA%ntID_Tip(j+MPLn-MPTip)) & 53 | CALL Increment_Misprime_Arrays(i,j,1) 54 | IF (CurrDNA%ntID_Tip(i).eq.CurrDNA%ntID_Tip(j)) & 55 | CALL Increment_Misprime_Arrays(i,j,4) 56 | END IF 57 | IF (HMatchNum(i,j,-1)) THEN 58 | IF (CurrDNA%ntID_Tip(i).eq.CurrDNA%ntID_TipRC(j+MPLn-MPTip)) & 59 | CALL Increment_Misprime_Arrays(i,j,2) 60 | IF (CurrDNA%ntID_Tip(i+MPLn-MPTip).eq.CurrDNA%ntID_TipRC(j)) & 61 | CALL Increment_Misprime_Arrays(i,j,3) 62 | END IF 63 | END DO 64 | END DO 65 | 66 | CALL Sort_Misprime_Arrays 67 | 68 | END SUBROUTINE Find_Mut_Pot_Misprimes 69 | SUBROUTINE Find_Mutated_Repeats() 70 | 71 | USE dnaworks_data 72 | USE dnaworks_test 73 | IMPLICIT NONE 74 | 75 | INTEGER :: i,j,start,finish 76 | LOGICAL,EXTERNAL :: PairWithinKnownRepeat 77 | 78 | IF (TEST2) PRINT *,"Find_Mutated_Repeats" !TEST2 79 | 80 | CALL Decrement_Repeat_Arrays 81 | 82 | ! Make sure the search doesn't go beyond the possible ranges 83 | 84 | start=MAX(1,MutNtPos(1)-RepLen) 85 | finish=MIN((MutNtPos(MutNtNum)+1),(DNAlen-RepLen+1)) 86 | 87 | ! If MutNtPos(1) <= RepLen+1, only run the second half-search 88 | 89 | IF (MutNtPos(1).gt.(RepLen+1)) THEN 90 | 91 | ! First half-search 92 | 93 | DO i=1,(MutNtPos(1)-RepLen-1) 94 | DO j=(MutNtPos(1)-RepLen),finish 95 | 96 | ! Direct repeat search 97 | 98 | IF (i.ne.j) THEN 99 | IF (.not.PairWithinKnownRepeat(i,j,1)) THEN 100 | IF (CurrDNA%ntID_Rep(i).eq.CurrDNA%ntID_Rep(j)) & 101 | CALL Increment_Repeat_Arrays(i,j,1) 102 | END IF 103 | END IF 104 | 105 | ! Inverse repeat search 106 | 107 | IF (.not.PairWithinKnownRepeat(i,j,-1)) THEN 108 | IF (CurrDNA%ntID_Rep(i).eq.CurrDNA%ntID_RepRC(j)) & 109 | CALL Increment_Repeat_Arrays(i,j,-1) 110 | END IF 111 | END DO 112 | END DO 113 | END IF 114 | 115 | ! Second half-search 116 | 117 | DO i=start,finish 118 | DO j=i,DNAlen-RepLen+1 119 | 120 | ! Direct repeat search 121 | 122 | IF (i.ne.j) THEN 123 | IF (.not.PairWithinKnownRepeat(i,j,1)) THEN 124 | IF (CurrDNA%ntID_Rep(i).eq.CurrDNA%ntID_Rep(j)) & 125 | CALL Increment_Repeat_Arrays(i,j,1) 126 | END IF 127 | END IF 128 | 129 | ! Inverse repeat search 130 | 131 | IF (.not.PairWithinKnownRepeat(i,j,-1)) THEN 132 | IF (CurrDNA%ntID_Rep(i).eq.CurrDNA%ntID_RepRC(j)) & 133 | CALL Increment_Repeat_Arrays(i,j,-1) 134 | END IF 135 | 136 | END DO 137 | END DO 138 | 139 | CALL Sort_Repeat_Arrays 140 | 141 | END SUBROUTINE Find_Mutated_Repeats 142 | SUBROUTINE Mutate_Sequence 143 | ! 144 | ! Mutate a single codon to an alternate codon. The residue choice is 145 | ! determined in Mutate_Wheel, and the codon choice is made here. 146 | 147 | USE dnaworks_data 148 | USE dnaworks_test 149 | IMPLICIT NONE 150 | 151 | INTEGER :: i,j,choice 152 | INTEGER :: AAN ! amino acid choice (1-21) 153 | ! INTEGER :: refCN ! old codon number (1-64) 154 | INTEGER :: i1,i2,i3 ! nt positions of choice 155 | REAL :: rand 156 | ! CHARACTER(LEN=3) :: refCOD ! old codon sequence 157 | 158 | IF (TEST1) PRINT *,"Mutate_Sequence" 159 | 160 | ! Generate XScores 161 | 162 | CALL Equalize_Scores 163 | 164 | ! Determine position to mutate 165 | 166 | CALL Mutate_Wheel 167 | 168 | ! Determine what amino acid exists at the selected residue MutProtPos. 169 | ! AAN is a number between 1 and 21, corresponding to a specific amino acid 170 | 171 | AAN=prot2aa(MutProtPos) 172 | 173 | ! Determine the nt positions and the actual codon sequence for that residue 174 | 175 | i1=prot2nt(MutProtPos)-1 176 | i2=prot2nt(MutProtPos) 177 | i3=prot2nt(MutProtPos)+1 178 | ! 179 | ! randomly choose codon and make sure it's different and available for that AAN 180 | 181 | IF (AAT(AAN)%NumOfActiveCodons.eq.2) THEN 182 | choice=1 183 | IF (AAT(AAN)%Codon(choice).eq.CurrDNA%prot2cod(MutProtPos)) choice=2 184 | ELSE 185 | choose: DO i=1,1000 186 | CALL RANDOM_NUMBER(rand) 187 | choice=(INT(rand*(AAT(AAN)%NumOfActiveCodons)))+1 188 | IF (AAT(AAN)%Codon(choice).ne.CurrDNA%prot2cod(MutProtPos)) EXIT choose 189 | END DO choose 190 | END IF 191 | 192 | ! update the arrays and change the DNA sequence, then quit 193 | 194 | CurrDNA%prot2cod(MutProtPos)=AAT(AAN)%Codon(choice) 195 | CurrDNA%nt2cod(i2) = AAT(AAN)%Codon(choice) 196 | 197 | IF (ChainReverse(prot2chain(MutProtPos))) THEN 198 | CurrDNA%DNAseq(i1:i3)=CFT(AAT(AAN)%Codon(choice))%SeqRC 199 | CurrDNA%NUMseq(i1)=CFT(AAT(AAN)%Codon(choice))%numRC(1) 200 | CurrDNA%NUMseq(i2)=CFT(AAT(AAN)%Codon(choice))%numRC(2) 201 | CurrDNA%NUMseq(i3)=CFT(AAT(AAN)%Codon(choice))%numRC(3) 202 | ELSE 203 | CurrDNA%DNAseq(i1:i3)=CFT(AAT(AAN)%Codon(choice))%Seq 204 | CurrDNA%NUMseq(i1)=CFT(AAT(AAN)%Codon(choice))%num(1) 205 | CurrDNA%NUMseq(i2)=CFT(AAT(AAN)%Codon(choice))%num(2) 206 | CurrDNA%NUMseq(i3)=CFT(AAT(AAN)%Codon(choice))%num(3) 207 | END IF 208 | 209 | ! Set the MutNtPos values 210 | 211 | MutNtNum=0 212 | MutNtPos(1)=0 213 | MutNtPos(2)=0 214 | MutNtPos(3)=0 215 | 216 | IF (CurrDNA%NUMseq(i1).ne.StoreDNA%NUMseq(i1)) THEN 217 | MutNtNum=MutNtNum+1 218 | MutNtPos(MutNtNum)=i1 219 | END IF 220 | IF (CurrDNA%NUMseq(i2).ne.StoreDNA%NUMseq(i2)) THEN 221 | MutNtNum=MutNtNum+1 222 | MutNtPos(MutNtNum)=i2 223 | END IF 224 | IF (CurrDNA%NUMseq(i3).ne.StoreDNA%NUMseq(i3)) THEN 225 | MutNtNum=MutNtNum+1 226 | MutNtPos(MutNtNum)=i3 227 | END IF 228 | 229 | SequenceTranslated=.TRUE. 230 | 231 | END SUBROUTINE Mutate_Sequence 232 | SUBROUTINE Mutate_Wheel 233 | ! 234 | ! Choose a position to mutate based on the score of mutatable codons 235 | 236 | USE dnaworks_data 237 | USE dnaworks_test 238 | IMPLICIT NONE 239 | 240 | INTEGER :: i,j,k 241 | REAL :: rand 242 | REAL :: ZScore(3333) ! an accumulated codon-based overall score 243 | REAL :: choice 244 | 245 | IF (TEST1) PRINT *,"Mutate_Wheel" 246 | 247 | ! Generate ZScore array 248 | 249 | ZScore(1)=XScore(1) 250 | 251 | ! If there are more than one codon to be mutated, 252 | 253 | IF (mutPROTnum.gt.1) THEN 254 | 255 | DO i=2,mutPROTnum 256 | ZScore(i)=ZScore(i-1)+XScore(i) 257 | END DO 258 | 259 | ! Pick a random number between 0 and the sum of all the xScore values. 260 | 261 | CALL RANDOM_NUMBER(rand) 262 | choice=rand*ZScore(mutPROTnum) 263 | 264 | ! Find the codon that corresponds to this number, assign to MutProtPos 265 | 266 | inner: DO j=1,mutPROTnum 267 | IF (ZScore(j).ge.choice) THEN 268 | MutProtPos=mutPROT2prot(j) 269 | EXIT inner 270 | END IF 271 | END DO inner 272 | ELSE 273 | 274 | ! Otherwise, just choose the first codon 275 | 276 | MutProtPos=mutPROT2prot(1) 277 | 278 | END IF 279 | 280 | END SUBROUTINE Mutate_Wheel 281 | -------------------------------------------------------------------------------- /overlaps.f90: -------------------------------------------------------------------------------- 1 | INTEGER FUNCTION ForOlap(first) 2 | 3 | USE dnaworks_data 4 | USE dnaworks_test 5 | IMPLICIT NONE 6 | 7 | INTEGER :: first,last ! positions in DNAseq 8 | REAL,EXTERNAL :: TmCalc 9 | REAL :: diff,diff_lo,diff_hi,diff2 10 | LOGICAL :: done 11 | INTEGER :: shift 12 | 13 | IF (TEST3) PRINT *,"ForOlap" !TEST3 14 | 15 | done=.FALSE. 16 | shift=32 17 | 18 | last=first+shift 19 | shift=shift/2 20 | 21 | loop: DO WHILE (.not.done) 22 | IF (last.ge.DNAlen) THEN 23 | last=last-shift 24 | ELSE 25 | diff=MeltTemp-(TmCalc(first,last)) 26 | IF (ABS(diff).gt.MeltTol) THEN 27 | IF (diff.gt.0) THEN 28 | last=last+shift 29 | ELSE 30 | last=last-shift 31 | END IF 32 | ELSE 33 | done=.TRUE. 34 | END IF 35 | END IF 36 | shift=shift/2 37 | IF (shift.le.1) EXIT loop 38 | END DO loop 39 | 40 | ! For the final step, determine which of the final two positions is best 41 | 42 | IF (.not.done) THEN 43 | IF (last.le.(DNAlen-1)) THEN 44 | diff=MeltTemp-(TmCalc(first,last)) 45 | IF (diff.gt.0) THEN 46 | shift=1 47 | ELSE 48 | shift=-1 49 | END IF 50 | last=last+shift 51 | diff2=MeltTemp-(TmCalc(first,last)) 52 | IF (ABS(diff).lt.ABS(diff2)) last=last-shift 53 | END IF 54 | IF ((DNAlen-last).le.2) last=DNAlen 55 | END IF 56 | 57 | ForOlap = last 58 | 59 | END FUNCTION ForOlap 60 | SUBROUTINE Generate_Overlaps(SolutionNo) 61 | ! 62 | ! The nucleotide sequence is broken into overlaps of around 20 nucleotides 63 | ! each, depending on the calculated Tm. The set of potential oligos is 64 | ! then analyzed and the best trial is kept. A gap is allowed between 65 | ! overlaps to give oligos of size oligoLen. 66 | ! 67 | ! The structure of the overlap array is as follows 68 | ! 69 | ! 1,1 1,2 2,1 2,2 3,1 3,2 4,1 4,2 5,1 5,2 70 | ! ................ .................... .................. 71 | ! ......................... ...................... .......... 72 | ! ------- --------- ------ ------ ------- 73 | ! 74 | ! OVERLAP: 1 2 3 4 5 75 | ! 76 | 77 | USE dnaworks_data 78 | USE dnaworks_test 79 | IMPLICIT NONE 80 | 81 | INTEGER :: i,j,k,l,m 82 | INTEGER :: first ! first nt of the overlap 83 | INTEGER :: last=1 ! last nt of the overlap 84 | INTEGER :: SolutionNo ! current solution number 85 | INTEGER :: shift ! number of nt before first overlap 86 | INTEGER :: reset ! first overlap size 87 | REAL :: rand 88 | INTEGER :: olength ! the number of nt to skip ahead 89 | LOGICAL :: changed ! true if CurrDNA%OverallScore.lt.BestOverlapDNA%OverallScore 90 | 91 | IF (TEST1) PRINT *,"Generate_Overlaps" 92 | 93 | changed=.FALSE. 94 | 95 | CALL Fix_Degenerates ! pin down degenerate sequences 96 | 97 | IF (.not.MutantRun) THEN 98 | 99 | generate: DO k=1,10000 100 | 101 | ! initialize the nt2overlap array 102 | 103 | shift=0 104 | last=1 105 | changed=.FALSE. 106 | 107 | DO i=1,DNAlen 108 | nt2overlap=0 109 | END DO 110 | 111 | BestOverlapDNA = CurrDNA ! initialize BestOverlapDNA values 112 | BestOverlapDNA%OverallScore = 9999 113 | 114 | DO i=1,999 ! initialize the arrays 115 | CurrDNA%OlapsPos(i,1)=0 116 | CurrDNA%OlapsPos(i,2)=0 117 | END DO 118 | CurrDNA%NumOlaps=0 119 | 120 | ! Determine the new CurrDNA%OlapsPos values 121 | 122 | outer: DO i=1,1000 ! keep shifting 123 | 124 | IF (NOGAPS) THEN 125 | olength = 0 126 | ELSE 127 | IF (OligoLenRandom) THEN 128 | CALL RANDOM_NUMBER(rand) 129 | rand = (rand*(OligoLen-20)) 130 | olength = (INT(rand))+20 ! randomize oligo length 131 | ELSE 132 | olength = OligoLen 133 | END IF 134 | END IF 135 | 136 | CurrDNA%NumOlaps=0 ! initialize the number of overlaps 137 | first=1+shift 138 | CALL Make_Olap(first,last) 139 | IF ((shift.gt.0).and.(CurrDNA%OlapsPos(1,2).ge.OligoLen)) EXIT outer 140 | last=first+olength-1 141 | first=last-7 ! the minimal overlap size is 7 142 | IF (first.le.CurrDNA%OlapsPos(1,2)) THEN 143 | first = CurrDNA%OlapsPos(1,2)+1 144 | last = first+7 145 | END IF 146 | 147 | inner: DO j=1,999 148 | 149 | IF (NOGAPS) THEN 150 | olength = 0 151 | ELSE 152 | IF (OligoLenRandom) THEN 153 | CALL RANDOM_NUMBER(rand) 154 | rand = (rand*(OligoLen-20)) 155 | olength = (INT(rand))+20 ! randomize oligo length 156 | ELSE 157 | olength = OligoLen 158 | END IF 159 | END IF 160 | 161 | IF (last.ge.DNAlen) EXIT inner 162 | CALL Make_Olap(first,last) 163 | last=first+olength-1 164 | first=last-7 165 | IF (first.le.CurrDNA%OlapsPos(CurrDNA%NumOlaps,2)) THEN 166 | first = CurrDNA%OlapsPos(CurrDNA%NumOlaps,2)+1 167 | last = first+7 168 | END IF 169 | END DO inner 170 | 171 | shift=shift+1 172 | 173 | IF ((MOD(CurrDNA%NumOlaps,2)).eq.0) THEN 174 | CYCLE outer 175 | END IF 176 | 177 | CALL Evaluate_Scores 178 | 179 | IF (CurrDNA%OverallScore.lt.BestOverlapDNA%OverallScore) THEN 180 | BestOverlapDNA = CurrDNA 181 | changed=.TRUE. 182 | END IF 183 | 184 | END DO outer 185 | 186 | CurrDNA=BestOverlapDNA ! revert to best solution 187 | 188 | IF (MOD(CurrDNA%NumOlaps,2).eq.1) THEN 189 | EXIT generate 190 | ELSE 191 | IF (TEST0) PRINT *,k,"EVEN OVERLAPS" !TEST0 192 | 193 | ! Take drastic action to get optimization moving 194 | 195 | IF ((MOD(k,200)).eq.0) THEN 196 | OligoLenLo=OligoLenLo+1 197 | OligoLenHi=OligoLenHi+1 198 | OligoLen=OligoLen+1 199 | WRITE(UNIT=console,FMT="('')") 200 | WRITE(UNIT=outputnum,FMT="('')") 201 | WRITE(UNIT=console,FMT="(' Too many sets of even overlaps -- increasing oligo length to',i4)") OligoLen 202 | WRITE(UNIT=outputnum,FMT="(' Too many sets of even overlaps -- increasing oligo length to',i4)") OligoLen 203 | END IF 204 | END IF 205 | END DO generate 206 | 207 | END IF 208 | 209 | ! Assign the nt2overlap array 210 | 211 | DO i=1,DNAlen 212 | DO j=1,CurrDNA%NumOlaps 213 | IF (i.ge.CurrDNA%OlapsPos(j,1).and.i.le.CurrDNA%OlapsPos(j,2)) THEN 214 | nt2overlap(i)=j 215 | END IF 216 | END DO 217 | END DO 218 | 219 | IF (.not.changed) CALL Evaluate_Scores ! in case the CurrDNA is never better than BestOverlapDNA 220 | 221 | FinalScore(SolutionNo)%InitScore=CurrDNA%OverallScore 222 | 223 | END SUBROUTINE Generate_Overlaps 224 | SUBROUTINE Make_Olap(first,last) 225 | ! 226 | ! Simplifies the process of finding overlaps. The OlapsPos and MeltT values 227 | ! are recorded in this subroutine for each overlap. It also automates the 228 | ! decision making about forward or reverse methods of generating overlaps. 229 | 230 | USE dnaworks_data 231 | USE dnaworks_test 232 | IMPLICIT NONE 233 | 234 | INTEGER :: first,last 235 | INTEGER,EXTERNAL :: ForOlap 236 | INTEGER,EXTERNAL :: RevOlap 237 | REAL,EXTERNAL :: TmCalc 238 | 239 | IF (TEST2) PRINT *,"Make_Olap" !TEST2 240 | 241 | IF (CurrDNA%NumOlaps.eq.0) THEN 242 | 243 | last=ForOlap(first) 244 | 245 | CurrDNA%NumOlaps=1 246 | CurrDNA%OlapsPos(1,1)=first 247 | CurrDNA%OlapsPos(1,2)=last 248 | CurrDNA%MeltT(1)=TmCalc(first,last) 249 | 250 | ! PRINT *,first,last,CurrDNA%MeltT(1) 251 | ELSE 252 | 253 | first=RevOlap(last) 254 | 255 | IF (first.le.CurrDNA%OlapsPos(CurrDNA%NumOlaps,2)) THEN 256 | first = CurrDNA%OlapsPos(CurrDNA%NumOlaps,2)+1 257 | last = ForOlap(first) 258 | END IF 259 | 260 | IF (last.lt.DNAlen) THEN 261 | CurrDNA%NumOlaps=CurrDNA%NumOlaps+1 262 | CurrDNA%OlapsPos(CurrDNA%NumOlaps,1)=first 263 | CurrDNA%OlapsPos(CurrDNA%NumOlaps,2)=last 264 | CurrDNA%MeltT(CurrDNA%NumOlaps)=TmCalc(first,last) 265 | END IF 266 | END IF 267 | 268 | END SUBROUTINE Make_Olap 269 | INTEGER FUNCTION RevOlap(last) 270 | 271 | USE dnaworks_data 272 | USE dnaworks_test 273 | IMPLICIT NONE 274 | 275 | INTEGER :: first,last ! positions in DNAseq 276 | REAL,EXTERNAL :: TmCalc 277 | REAL :: diff,diff_lo,diff_hi,diff2 278 | LOGICAL :: done 279 | INTEGER :: shift 280 | 281 | IF (TEST3) PRINT *,"RevOlap" !TEST3 282 | 283 | shift=32 284 | done=.FALSE. 285 | 286 | first=last-shift 287 | shift=shift/2 288 | 289 | loop: DO WHILE (.not.done) 290 | IF (first.le.1) THEN 291 | first=first+shift 292 | ELSE 293 | diff=MeltTemp-(TmCalc(first,last)) 294 | IF (ABS(diff).gt.MeltTol) THEN 295 | IF (diff.gt.0) THEN 296 | first=first-shift 297 | ELSE 298 | first=first+shift 299 | END IF 300 | ELSE 301 | done=.TRUE. 302 | END IF 303 | END IF 304 | shift=shift/2 305 | IF (shift.le.1) EXIT loop 306 | END DO loop 307 | 308 | ! For the final step, determine which of the final two positions is best 309 | 310 | IF (.not.done) THEN 311 | IF (first.ge.2) THEN 312 | diff=MeltTemp-(TmCalc(first,last)) 313 | IF (diff.gt.0) THEN 314 | shift=-1 315 | ELSE 316 | shift=1 317 | END IF 318 | first=first+shift 319 | diff2=MeltTemp-(TmCalc(first,last)) 320 | IF (ABS(diff).lt.ABS(diff2)) first=first-shift 321 | END IF 322 | IF (first.le.2) first=1 323 | END IF 324 | 325 | RevOlap = first 326 | 327 | END FUNCTION RevOlap 328 | -------------------------------------------------------------------------------- /scores.f90: -------------------------------------------------------------------------------- 1 | SUBROUTINE AT_Score 2 | ! 3 | ! Find all the 8 nt windows of solid AT content and update AScore. 4 | 5 | USE dnaworks_data 6 | USE dnaworks_test 7 | IMPLICIT NONE 8 | 9 | INTEGER :: i,j 10 | 11 | IF (TEST1) PRINT *,"AT_Score" !TEST1 12 | 13 | DO i=1,DNAlen 14 | CurrDNA%AScore(i) = 0 15 | END DO 16 | 17 | DO i=1,DNAlen-7 18 | IF (CurrDNA%ntID_AT(i).eq.0) THEN 19 | DO j=i,(i+7) 20 | CurrDNA%AScore(j)=CurrDNA%AScore(j)+1 21 | END DO 22 | END IF 23 | END DO 24 | 25 | CurrDNA%TotalAScore = 0.0 ! Initialize the repeat scores 26 | DO i=1,DNAlen 27 | CurrDNA%TotalAScore=CurrDNA%TotalAScore+CurrDNA%AScore(i) 28 | END DO 29 | CurrDNA%TotalAScore=CurrDNA%TotalAScore*20/DNAlen 30 | 31 | END SUBROUTINE AT_Score 32 | SUBROUTINE Average_Evaluate_Scores() 33 | ! 34 | ! This subroutine determines the average scores for the current sequence, each time 35 | ! changing the degenerate sequences. It updates 36 | ! TScore, CScore, RScore, and PScore arrays, the Total*Score values. 37 | 38 | USE dnaworks_data 39 | USE dnaworks_test 40 | IMPLICIT NONE 41 | 42 | INTEGER :: i 43 | REAL :: dC, dL, dT, dR, dM, dG, dA, dF, dP, dTotal 44 | 45 | IF (TEST1) PRINT *,"Average_Evaluate_Scores" !TEST1 46 | 47 | dC=0 48 | dL=0 49 | dT=0 50 | dR=0 51 | dM=0 52 | dG=0 53 | dA=0 54 | dF=0 55 | dP=0 56 | dTotal=0 57 | 58 | DO i=1,NumDegPos*10 59 | CALL Fix_Degenerates 60 | CALL Create_ntID_Arrays 61 | CALL Temp_Score ! TScore(i) based on olaps 62 | CALL Misprime_Score ! MScore(i) based on nt 63 | CALL Length_Score ! LScore(i) based on nt 64 | CALL GapFix_Score ! FScore(i) based on nt 65 | IF (ScoreCodons) CALL Codon_Score ! CScore(i) based on codons 66 | CALL Repeat_Score ! RScore(i) based on nt 67 | CALL GC_Score ! GScore(i) based on nt 68 | CALL AT_Score ! AScore(i) based on nt 69 | CALL Pattern_Score ! PScore(i) based on nt 70 | 71 | dC=CurrDNA%TotalCScore+dC 72 | dL=CurrDNA%TotalLScore+dL 73 | dT=CurrDNA%TotalTScore+dT 74 | dR=CurrDNA%TotalRScore+dR 75 | dM=CurrDNA%TotalMScore+dM 76 | dG=CurrDNA%TotalGScore+dG 77 | dA=CurrDNA%TotalAScore+dA 78 | dF=CurrDNA%TotalFScore+dF 79 | dP=CurrDNA%TotalPScore+dP 80 | dTotal=dC+dL+dT+dR+dM+dG+dA+dF+dP+dTotal 81 | 82 | END DO 83 | 84 | CurrDNA%TotalCScore=dC/(NumDegPos*10) 85 | CurrDNA%TotalLScore=dL/(NumDegPos*10) 86 | CurrDNA%TotalTScore=dT/(NumDegPos*10) 87 | CurrDNA%TotalRScore=dR/(NumDegPos*10) 88 | CurrDNA%TotalMScore=dM/(NumDegPos*10) 89 | CurrDNA%TotalGScore=dG/(NumDegPos*10) 90 | CurrDNA%TotalAScore=dA/(NumDegPos*10) 91 | CurrDNA%TotalFScore=dF/(NumDegPos*10) 92 | CurrDNA%TotalPScore=dP/(NumDegPos*10) 93 | 94 | CurrDNA%OverallScore = (Cwt*CurrDNA%TotalCScore)+& 95 | (Lwt*CurrDNA%TotalLScore)+& 96 | (Twt*CurrDNA%TotalTScore)+& 97 | (Rwt*CurrDNA%TotalRScore)+& 98 | (Mwt*CurrDNA%TotalMScore)+& 99 | (Gwt*CurrDNA%TotalGScore)+& 100 | (Awt*CurrDNA%TotalAScore)+& 101 | (Fwt*CurrDNA%TotalFScore)+& 102 | (Pwt*CurrDNA%TotalPScore) 103 | 104 | CALL Revert_Degenerates 105 | 106 | END SUBROUTINE Average_Evaluate_Scores 107 | SUBROUTINE Codon_Score 108 | ! 109 | ! This subroutine calculates a global score for codons based on frequency. 110 | 111 | USE dnaworks_data 112 | USE dnaworks_test 113 | IMPLICIT NONE 114 | 115 | INTEGER :: i 116 | 117 | IF (TEST1) PRINT *,"Codon_Score" !TEST1 118 | 119 | CurrDNA%TotalCScore=0.0 120 | 121 | IF (MutProtPos.eq.0) THEN 122 | DO i=1,PROTlen 123 | CurrDNA%CScore(i)=(1-(CFT(CurrDNA%prot2cod(i))%Freq/AAT(prot2aa(i))%Freq(1)))**4 124 | END DO 125 | ELSE 126 | CurrDNA%CScore(MutProtPos)=(1-(CFT(CurrDNA%prot2cod(MutProtPos))%Freq/AAT(prot2aa(MutProtPos))%Freq(1)))**4 127 | END IF 128 | 129 | DO i=1,PROTlen 130 | CurrDNA%TotalCScore=CurrDNA%TotalCScore+CurrDNA%CScore(i) 131 | END DO 132 | 133 | CurrDNA%TotalCScore = CurrDNA%TotalCScore/DNAlen 134 | 135 | END SUBROUTINE Codon_Score 136 | SUBROUTINE Decrement_Misprime_Arrays() 137 | ! 138 | ! Removes potential misprime pairs within the current mutant range 139 | 140 | USE dnaworks_data 141 | USE dnaworks_test 142 | IMPLICIT NONE 143 | 144 | INTEGER :: i,j,y,ct 145 | INTEGER :: TempM1(9999) 146 | INTEGER :: TempM2(9999) 147 | INTEGER :: TempMX(9999) 148 | 149 | IF (TEST2) PRINT *,"Decrement_Misprime_Arrays" !TEST2 150 | 151 | ct=0 152 | y=MutNtPos(MutNtNum)+1 153 | 154 | IF (CurrDNA%MN.gt.0) THEN 155 | loop: DO i=1,CurrDNA%MN 156 | IF ((((CurrDNA%M1(i)+MPLn).ge.MutNtPos(1)).and.& 157 | (CurrDNA%M1(i).le.y)).or.& 158 | (((CurrDNA%M2(i)+MPLn).ge.MutNtPos(1)).and.& 159 | (CurrDNA%M2(i).le.y))) THEN 160 | CYCLE loop 161 | ELSE 162 | ct=ct+1 163 | TempM1(ct)=CurrDNA%M1(i) 164 | TempM2(ct)=CurrDNA%M2(i) 165 | TempMX(ct)=CurrDNA%MX(i) 166 | END IF 167 | END DO loop 168 | 169 | CurrDNA%MN=ct 170 | 171 | DO i=1,CurrDNA%MN 172 | CurrDNA%M1(i)=TempM1(i) 173 | CurrDNA%M2(i)=TempM2(i) 174 | CurrDNA%MX(i)=TempMX(i) 175 | END DO 176 | END IF 177 | 178 | END SUBROUTINE Decrement_Misprime_Arrays 179 | SUBROUTINE Decrement_Repeat_Arrays() 180 | ! 181 | ! Remove repeat pairs and erase scores 182 | 183 | USE dnaworks_data 184 | USE dnaworks_test 185 | IMPLICIT NONE 186 | 187 | INTEGER :: i,j,k,y,ct 188 | INTEGER :: TempRS1(9999) 189 | INTEGER :: TempRS2(9999) 190 | INTEGER :: TempLn(9999) 191 | INTEGER :: TempRX(9999) 192 | 193 | IF (TEST2) PRINT *,"Decrement_Repeat_Arrays" !TEST2 194 | 195 | ct=0 196 | y=MutNtPos(MutNtNum)+1 197 | 198 | IF (CurrDNA%RN.gt.0) THEN 199 | loop: DO i=1,CurrDNA%RN 200 | IF ((((CurrDNA%RS1(i)+CurrDNA%RLn(i)).ge.MutNtPos(1)).and.& 201 | (CurrDNA%RS1(i).le.y)).or.& 202 | (((CurrDNA%RS2(i)+CurrDNA%RLn(i)).ge.MutNtPos(1)).and.& 203 | (CurrDNA%RS2(i).le.y))) THEN 204 | DO k=CurrDNA%RS1(i),(CurrDNA%RS1(i)+CurrDNA%RLn(i)-1) 205 | CurrDNA%RScore(k) = CurrDNA%RScore(k)-1 206 | END DO 207 | DO k=CurrDNA%RS2(i),(CurrDNA%RS2(i)+CurrDNA%RLn(i)-1) 208 | CurrDNA%RScore(k) = CurrDNA%RScore(k)-1 209 | END DO 210 | CYCLE loop 211 | ELSE 212 | ct=ct+1 213 | TempRS1(ct)=CurrDNA%RS1(i) 214 | TempRS2(ct)=CurrDNA%RS2(i) 215 | TempLn(ct)=CurrDNA%RLn(i) 216 | TempRX(ct)=CurrDNA%RX(i) 217 | END IF 218 | END DO loop 219 | 220 | CurrDNA%RN=ct 221 | 222 | DO i=1,CurrDNA%RN 223 | CurrDNA%RS1(i)=TempRS1(i) 224 | CurrDNA%RS2(i)=TempRS2(i) 225 | CurrDNA%RLn(i)=TempLn(i) 226 | CurrDNA%RX(i)=TempRX(i) 227 | END DO 228 | END IF 229 | 230 | END SUBROUTINE Decrement_Repeat_Arrays 231 | LOGICAL FUNCTION DegCmpr(instr,seq) 232 | ! 233 | ! This function compares a restriction site in degenerate form with a sequence. 234 | ! It returns .TRUE. if the site matches, and .FALSE. if it does not. The two 235 | ! strings MUST be the same length. 236 | ! 237 | ! The function uses the NEB format of nucleotide degeneracy: 238 | ! 239 | ! B = C or G or T rev. compl. = V 240 | ! D = A or G or T rev. compl. = H 241 | ! H = A or C or T rev. compl. = D 242 | ! K = G or T rev. compl. = M 243 | ! M = A or C rev. compl. = K 244 | ! N = A or C or G or T rev. compl. = N 245 | ! R = A or G rev. compl. = Y 246 | ! S = C or G rev. compl. = S 247 | ! V = A or C or G rev. compl. = B 248 | ! W = A or T rev. compl. = W 249 | ! Y = C or T rev. compl. = R 250 | ! 251 | USE dnaworks_data 252 | USE dnaworks_test 253 | IMPLICIT NONE 254 | 255 | CHARACTER(LEN=100) :: instr,seq 256 | INTEGER :: i 257 | INTEGER :: Slen 258 | INTEGER :: stot 259 | 260 | IF (TEST3) PRINT *,"DegCmpr" !TEST3 261 | 262 | Slen=LEN_TRIM(instr) 263 | DegCmpr=.FALSE. 264 | 265 | stot=0 266 | 267 | ! First, check site in sense orientation 268 | 269 | DO i=1,Slen 270 | SELECT CASE(instr(i:i)) 271 | CASE('A') 272 | IF (seq(i:i).EQ.'A') THEN 273 | stot=stot+1 ; ELSE ; EXIT ; END IF 274 | CASE('C') 275 | IF (seq(i:i).EQ.'C') THEN 276 | stot=stot+1 ; ELSE ; EXIT ; END IF 277 | CASE('G') 278 | IF (seq(i:i).EQ.'G') THEN 279 | stot=stot+1 ; ELSE ; EXIT ; END IF 280 | CASE('T') 281 | IF (seq(i:i).EQ.'T') THEN 282 | stot=stot+1 ; ELSE ; EXIT ; END IF 283 | CASE('B') 284 | IF (seq(i:i).EQ.'C'.OR.seq(i:i).EQ.'G'.OR.seq(i:i).EQ.'T') THEN 285 | stot=stot+1 ; ELSE ; EXIT ; END IF 286 | CASE('D') 287 | IF (seq(i:i).EQ.'A'.OR.seq(i:i).EQ.'G'.OR.seq(i:i).EQ.'T') THEN 288 | stot=stot+1 ; ELSE ; EXIT ; END IF 289 | CASE('H') 290 | IF (seq(i:i).EQ.'A'.OR.seq(i:i).EQ.'C'.OR.seq(i:i).EQ.'T') THEN 291 | stot=stot+1 ; ELSE ; EXIT ; END IF 292 | CASE('K') 293 | IF (seq(i:i).EQ.'G'.OR.seq(i:i).EQ.'T') THEN 294 | stot=stot+1 ; ELSE ; EXIT ; END IF 295 | CASE('M') 296 | IF (seq(i:i).EQ.'A'.OR.seq(i:i).EQ.'C') THEN 297 | stot=stot+1 ; ELSE ; EXIT ; END IF 298 | CASE('N') 299 | IF (seq(i:i).EQ.'A'.OR.seq(i:i).EQ.'C'.OR.seq(i:i).EQ.'G'.OR.seq(i:i).EQ.'T') THEN 300 | stot=stot+1 ; ELSE ; EXIT ; END IF 301 | CASE('R') 302 | IF (seq(i:i).EQ.'A'.OR.seq(i:i).EQ.'G') THEN 303 | stot=stot+1 ; ELSE ; EXIT ; END IF 304 | CASE('S') 305 | IF (seq(i:i).EQ.'C'.OR.seq(i:i).EQ.'G') THEN 306 | stot=stot+1 ; ELSE ; EXIT ; END IF 307 | CASE('V') 308 | IF (seq(i:i).EQ.'A'.OR.seq(i:i).EQ.'C'.OR.seq(i:i).EQ.'G') THEN 309 | stot=stot+1 ; ELSE ; EXIT ; END IF 310 | CASE('W') 311 | IF (seq(i:i).EQ.'A'.OR.seq(i:i).EQ.'T') THEN 312 | stot=stot+1 ; ELSE ; EXIT ; END IF 313 | CASE('Y') 314 | IF (seq(i:i).EQ.'C'.OR.seq(i:i).EQ.'T') THEN 315 | stot=stot+1 ; ELSE ; EXIT ; END IF 316 | END SELECT 317 | END DO 318 | 319 | IF (stot.EQ.Slen) DegCmpr=.TRUE. 320 | 321 | END FUNCTION DegCmpr 322 | SUBROUTINE Equalize_Scores() 323 | ! 324 | ! Converts individual scores to codon-based Xscores for mutation rounds. 325 | ! XScore is a combination of all scores applied to each codon. This should 326 | ! allow for a more targeted mutation. 327 | 328 | USE dnaworks_data 329 | USE dnaworks_test 330 | IMPLICIT NONE 331 | 332 | INTEGER :: i,j 333 | REAL :: CodPerOlap(999) ! number of codons per overlap, for each overlap 334 | REAL :: TScorePerCod(999) ! average TScore per codon, for each overlap 335 | REAL :: r1 336 | 337 | IF (TEST1) PRINT *,"Equalize_Scores" 338 | 339 | ! Initialize XScore 340 | 341 | DO i=1,mutPROTnum ! only use the mutatable codons 342 | XScore(i)=0 343 | END DO 344 | 345 | ! Initialize values for CodPerOlap 346 | 347 | DO i=1,999 348 | CodPerOlap(i)=0 349 | END DO 350 | 351 | ! Find how many codons are in each overlap, avoiding non-coding regions 352 | ! If nt is within a codon, is within an overlap, and is unique protein residue 353 | 354 | DO i=1,DNAlen 355 | IF ((nt2prot(i).ne.0).and.(nt2overlap(i).ne.0).and.(nt2prot(i).ne.nt2prot(i-1))) THEN 356 | CodPerOlap(nt2overlap(i))=CodPerOlap(nt2overlap(i))+1 357 | END IF 358 | END DO 359 | 360 | ! The TScore for each codon a fraction of the total TScore(i) for the overlap 361 | 362 | DO j=1,CurrDNA%NumOlaps 363 | IF (CodPerOlap(j).ne.0) TScorePerCod(j)=Twt*(CurrDNA%TScore(j)/CodPerOlap(j)) 364 | END DO 365 | 366 | ! Assign the XScore for TScore, CScore, RScore, PScore, GScore, LScore, 367 | ! and AScore for each codon 368 | 369 | DO i=1,mutPROTnum 370 | 371 | j=prot2nt(mutPROT2prot(i)) ! the middle nt of the codon 372 | 373 | ! if the middle nt of a codon is within an overlap, the XScore for that codon 374 | ! is the average TScore per codon for the overlap 375 | 376 | IF (nt2overlap(j).ne.0) XScore(i)=TScorePerCod(nt2overlap(j)) 377 | 378 | ! the CScore contribution is already for the codon 379 | 380 | XScore(i)=XScore(i)+(Cwt*CurrDNA%CScore(i))+& 381 | (Rwt*REAL(CurrDNA%RScore(j-1)+CurrDNA%RScore(j)+CurrDNA%RScore(j+1)))+& 382 | (Mwt*REAL(CurrDNA%MScore(j-1)+CurrDNA%MScore(j)+CurrDNA%MScore(j+1)))+& 383 | (Gwt*REAL(CurrDNA%GScore(j-1)+CurrDNA%GScore(j)+CurrDNA%GScore(j+1)))+& 384 | (Awt*REAL(CurrDNA%AScore(j-1)+CurrDNA%AScore(j)+CurrDNA%AScore(j+1)))+& 385 | (Lwt*REAL(CurrDNA%LScore(j-1)+CurrDNA%LScore(j)+CurrDNA%LScore(j+1)))+& 386 | (Fwt*REAL(CurrDNA%FScore(j-1)+CurrDNA%FScore(j)+CurrDNA%FScore(j+1)))+& 387 | (Pwt*REAL(CurrDNA%PScore(j-1)+CurrDNA%PScore(j)+CurrDNA%PScore(j+1))) 388 | 389 | END DO 390 | 391 | END SUBROUTINE Equalize_Scores 392 | SUBROUTINE Evaluate_Scores() 393 | ! 394 | ! This subroutine determines the scores for the current sequence. It updates 395 | ! TScore, CScore, RScore, and PScore arrays, the Total*Score values. 396 | 397 | USE dnaworks_data 398 | USE dnaworks_test 399 | IMPLICIT NONE 400 | 401 | INTEGER :: i,j 402 | REAL :: dC, dL, dT, dR, dM, dG, dA, dF, dP, dTotal 403 | 404 | IF (TEST1) PRINT *,"Evaluate_Scores" !TEST1 405 | 406 | ! Degenerate sequences: loop several times and get average scores 407 | 408 | IF (NumDegPos.eq.0) THEN 409 | 410 | ! If the sequence is recently translated, create the ntID arrays 411 | 412 | CALL Create_ntID_Arrays 413 | CALL Temp_Score ! TScore(i) based on olaps 414 | CALL Misprime_Score ! MScore(i) based on nt 415 | CALL Length_Score ! LScore(i) based on nt 416 | CALL GapFix_Score ! FScore(i) based on nt 417 | 418 | ! The following scores will not change when the overlap positions are moved, 419 | ! but only when the sequence is re-translated after a mutation (also does not 420 | ! apply for DNA-only runs) 421 | 422 | IF (ScoreCodons) CALL Codon_Score ! CScore(i) based on codons 423 | CALL Repeat_Score ! RScore(i) based on nt 424 | CALL GC_Score ! GScore(i) based on nt 425 | CALL AT_Score ! AScore(i) based on nt 426 | CALL Pattern_Score ! PScore(i) based on nt 427 | 428 | ! Update CurrDNA%OverallScore 429 | 430 | CurrDNA%OverallScore = (Cwt*CurrDNA%TotalCScore)+& 431 | (Lwt*CurrDNA%TotalLScore)+& 432 | (Twt*CurrDNA%TotalTScore)+& 433 | (Rwt*CurrDNA%TotalRScore)+& 434 | (Mwt*CurrDNA%TotalMScore)+& 435 | (Gwt*CurrDNA%TotalGScore)+& 436 | (Awt*CurrDNA%TotalAScore)+& 437 | (Fwt*CurrDNA%TotalFScore)+& 438 | (Pwt*CurrDNA%TotalPScore) 439 | ELSE 440 | CALL Average_Evaluate_Scores 441 | END IF 442 | 443 | END SUBROUTINE Evaluate_Scores 444 | SUBROUTINE Find_Actual_Misprimes() 445 | ! 446 | ! If one of the positions in a misprime pair aligns to the end of an overlap, 447 | ! and if the tip of the overlap is identical (direct or inverse), then raise 448 | ! the score on the nts (CurrDNA%MScore). 449 | 450 | ! 1. direct-sense(DS): forward primer mispriming on the sense strand 451 | ! 452 | ! --------------> --------------> 453 | ! ||||||||||||||| ..........||||| 454 | ! ------------------------------------------------------- 455 | ! 456 | ! 2. inverse-sense(IS): reverse primer mispriming on the sense strand 457 | ! NOTE THAT IF THE FORWARD OLIGO MATCHES M2, MSX = 5, NOT 2 458 | ! 459 | ! --------------> 460 | ! ..........||||| 461 | ! ------------------------------------------------------- 462 | ! ||||||||||||||| 463 | ! <-------------- 464 | ! 465 | ! 3. inverse-antisense(IA): forward primer mispriming on the antisense strand 466 | ! NOTE THAT IF THE REVERSE OLIGO MATCHES M2, MSX = 6, NOT 3 467 | ! 468 | ! --------------> 469 | ! ||||||||||||||| 470 | ! ------------------------------------------------------- 471 | ! |||||.......... 472 | ! <-------------- 473 | ! 474 | ! 4. direct-antisense(DA): reverse primer mispriming on the antisense strand 475 | ! 476 | ! ------------------------------------------------------- 477 | ! ||||||||||||||| |||||.......... 478 | ! <-------------- <-------------- 479 | ! 480 | USE dnaworks_data 481 | USE dnaworks_test 482 | IMPLICIT NONE 483 | 484 | INTEGER :: i,j,mp 485 | INTEGER :: o1,o2,m1,m2,mx 486 | 487 | IF (TEST2) PRINT *,"Find_Actual_Misprimes" !TEST2 488 | 489 | mp=(CurrDNA%NumOlaps+1)/2 490 | 491 | ! Initialize actual misprime arrays 492 | 493 | DO i=1,DNAlen 494 | CurrDNA%MScore(i) = 0 495 | END DO 496 | CurrDNA%MSN=0 497 | 498 | mpair: DO i=1,CurrDNA%MN 499 | m1=CurrDNA%M1(i) 500 | m2=CurrDNA%M2(i) 501 | mx=CurrDNA%MX(i) 502 | olap: DO j=1,CurrDNA%NumOlaps 503 | o1=CurrDNA%OlapsPos(j,1) 504 | o2=(CurrDNA%OlapsPos(j,2)-MPLn+1) 505 | IF (TBIO) THEN 506 | IF (j.lt.mp) THEN 507 | SELECT CASE(mx) 508 | CASE(1) ! direct-sense 509 | IF (o2.eq.m1) THEN 510 | CALL Increment_Misprime_Scores(o2,m2,1,j) 511 | ELSE IF (o2.eq.m2) THEN 512 | CALL Increment_Misprime_Scores(o2,m1,1,j) 513 | END IF 514 | CASE(2) ! inverse-sense 515 | IF (o2.eq.m2) CALL Increment_Misprime_Scores(m2,m1,5,j) 516 | CASE(3) ! inverse-antisense 517 | IF (o2.eq.m1) CALL Increment_Misprime_Scores(o2,m2,3,j) 518 | END SELECT 519 | ELSE IF (j.gt.mp) THEN 520 | SELECT CASE(mx) 521 | CASE(2) ! inverse-sense 522 | IF (o1.eq.m1) CALL Increment_Misprime_Scores(o1,m2,2,j) 523 | CASE(3) ! inverse-antisense 524 | IF (o1.eq.m2) CALL Increment_Misprime_Scores(o1,m1,6,j) 525 | CASE(4) ! direct-antisense 526 | IF (o1.eq.m1) THEN 527 | CALL Increment_Misprime_Scores(o1,m2,4,j) 528 | ELSE IF (o1.eq.m2) THEN 529 | CALL Increment_Misprime_Scores(o1,m1,4,j) 530 | END IF 531 | END SELECT 532 | ELSE 533 | SELECT CASE(mx) 534 | CASE(1) ! direct-sense 535 | IF (o2.eq.m1) THEN 536 | CALL Increment_Misprime_Scores(o2,m2,1,j) 537 | ELSE IF (o2.eq.m2) THEN 538 | CALL Increment_Misprime_Scores(o2,m1,1,j) 539 | END IF 540 | CASE(2) ! inverse-sense 541 | IF (o2.eq.m2) THEN 542 | CALL Increment_Misprime_Scores(m2,m1,5,j) 543 | ELSE IF (o1.eq.m1) THEN 544 | CALL Increment_Misprime_Scores(o1,m2,2,j) 545 | END IF 546 | CASE(3) ! inverse-antisense 547 | IF (o2.eq.m1) THEN 548 | CALL Increment_Misprime_Scores(o2,m2,3,j) 549 | ELSE IF (o1.eq.m2) THEN 550 | CALL Increment_Misprime_Scores(o1,m1,6,j) 551 | END IF 552 | CASE(4) ! direct-antisense 553 | IF (o1.eq.m1) THEN 554 | CALL Increment_Misprime_Scores(o1,m2,4,j) 555 | ELSE IF (o1.eq.m2) THEN 556 | CALL Increment_Misprime_Scores(o1,m1,4,j) 557 | END IF 558 | END SELECT 559 | END IF 560 | ELSE 561 | IF (MOD(j,2).eq.0) THEN 562 | CYCLE olap 563 | ELSE 564 | SELECT CASE(mx) 565 | CASE(1) ! direct-sense 566 | IF (o2.eq.m1) THEN 567 | CALL Increment_Misprime_Scores(o2,m2,1,j) 568 | ELSE IF (o2.eq.m2) THEN 569 | CALL Increment_Misprime_Scores(o2,m1,1,j) 570 | END IF 571 | CASE(2) ! inverse-sense 572 | IF (o2.eq.m2) THEN 573 | CALL Increment_Misprime_Scores(m2,m1,5,j) 574 | ELSE IF (o1.eq.m1) THEN 575 | CALL Increment_Misprime_Scores(o1,m2,2,j) 576 | END IF 577 | CASE(3) ! inverse-antisense 578 | IF (o2.eq.m1) THEN 579 | CALL Increment_Misprime_Scores(o2,m2,3,j) 580 | ELSE IF (o1.eq.m2) THEN 581 | CALL Increment_Misprime_Scores(o1,m1,6,j) 582 | END IF 583 | CASE(4) ! direct-antisense 584 | IF (o1.eq.m1) THEN 585 | CALL Increment_Misprime_Scores(o1,m2,4,j) 586 | ELSE IF (o1.eq.m2) THEN 587 | CALL Increment_Misprime_Scores(o1,m1,4,j) 588 | END IF 589 | END SELECT 590 | END IF 591 | END IF 592 | END DO olap 593 | END DO mpair 594 | 595 | END SUBROUTINE Find_Actual_Misprimes 596 | SUBROUTINE Find_Potential_Misprimes 597 | ! 598 | ! This subroutine finds all misprimes in the sequence, both direct and 599 | ! inverse, regardless of position, equal or longer than MPLn. 600 | ! The inverse search allows palindromic misprimes (i=j). 601 | ! The number of potential misprimes is in CurrDNA%MN 602 | ! It records the positions and sizes in the global arrays CurrDNA%M1, 603 | ! CurrDNA%M2, and CurrDNA%MX. 604 | ! The actual misprimes are determined by Find_Actual_Misprimes 605 | 606 | ! 1. direct-sense(DS): forward primer mispriming on the sense strand 607 | ! 608 | ! --------------> --------------> 609 | ! ||||||||||||||| ..........||||| 610 | ! ------------------------------------------------------- 611 | ! 612 | ! 2. inverse-sense(IS): reverse primer mispriming on the sense strand 613 | ! NOTE THAT IF THE FORWARD OLIGO MATCHES M2, MSX = 5, NOT 2 614 | ! 615 | ! --------------> 616 | ! ..........||||| 617 | ! ------------------------------------------------------- 618 | ! ||||||||||||||| 619 | ! <-------------- 620 | ! 621 | ! 3. inverse-antisense(IA): forward primer mispriming on the antisense strand 622 | ! NOTE THAT IF THE REVERSE OLIGO MATCHES M2, MSX = 6, NOT 3 623 | ! 624 | ! --------------> 625 | ! ||||||||||||||| 626 | ! ------------------------------------------------------- 627 | ! |||||.......... 628 | ! <-------------- 629 | ! 630 | ! 4. direct-antisense(DA): reverse primer mispriming on the antisense strand 631 | ! 632 | ! ------------------------------------------------------- 633 | ! ||||||||||||||| |||||.......... 634 | ! <-------------- <-------------- 635 | ! 636 | USE dnaworks_data 637 | USE dnaworks_test 638 | IMPLICIT NONE 639 | 640 | INTEGER :: i,j 641 | LOGICAL,EXTERNAL :: HMatchNum 642 | 643 | IF (TEST2) PRINT *,"Find_Potential_Misprimes" !TEST2 644 | 645 | ! Initialize the potential misprime arrays 646 | 647 | CurrDNA%MN=0 648 | 649 | ! Find the potential misprimes 650 | 651 | DO i=1,DNAlen-MPLn+1 652 | DO j=i,DNAlen-MPLn+1 653 | IF (HMatchNum(i,j,1)) THEN 654 | IF (CurrDNA%ntID_Tip(i+MPLn-MPTip).eq.CurrDNA%ntID_Tip(j+MPLn-MPTip)) & 655 | CALL Increment_Misprime_Arrays(i,j,1) 656 | IF (CurrDNA%ntID_Tip(i).eq.CurrDNA%ntID_Tip(j)) & 657 | CALL Increment_Misprime_Arrays(i,j,4) 658 | END IF 659 | IF (HMatchNum(i,j,-1)) THEN 660 | IF (CurrDNA%ntID_Tip(i).eq.CurrDNA%ntID_TipRC(j+MPLn-MPTip)) & 661 | CALL Increment_Misprime_Arrays(i,j,2) 662 | IF (CurrDNA%ntID_Tip(i+MPLn-MPTip).eq.CurrDNA%ntID_TipRC(j)) & 663 | CALL Increment_Misprime_Arrays(i,j,3) 664 | END IF 665 | END DO 666 | END DO 667 | 668 | END SUBROUTINE Find_Potential_Misprimes 669 | SUBROUTINE Find_Repeats() 670 | ! 671 | ! This subroutine finds all repeats in the sequence, both direct and 672 | ! inverse, regardless of position, equal or longer than RepLen. 673 | ! The inverse search allows palindromic repeats (i=j). 674 | ! It records the positions and sizes in the global arrays CurrDNA%RS1, 675 | ! CurrDNA%RS2, and CurrDNA%RLn. Then it overwrites the array CurrDNA%RScore. 676 | 677 | USE dnaworks_data 678 | USE dnaworks_test 679 | IMPLICIT NONE 680 | 681 | INTEGER :: i,j 682 | LOGICAL,EXTERNAL :: PairWithinKnownRepeat 683 | 684 | IF (TEST2) PRINT *,"Find_Repeats" !TEST2 685 | 686 | DO i=1,DNAlen 687 | CurrDNA%RScore(i) = 0 688 | END DO 689 | CurrDNA%RN=0 690 | 691 | DO i=1,DNAlen-RepLen+1 692 | DO j=i,DNAlen-RepLen+1 693 | IF (i.ne.j) THEN 694 | IF (.not.PairWithinKnownRepeat(i,j,1)) THEN 695 | IF (CurrDNA%ntID_Rep(i).eq.CurrDNA%ntID_Rep(j)) & 696 | CALL Increment_Repeat_Arrays(i,j,1) 697 | END IF 698 | END IF 699 | IF (.not.PairWithinKnownRepeat(i,j,-1)) THEN 700 | IF (CurrDNA%ntID_Rep(i).eq.CurrDNA%ntID_RepRC(j)) & 701 | CALL Increment_Repeat_Arrays(i,j,-1) 702 | END IF 703 | END DO 704 | END DO 705 | 706 | END SUBROUTINE Find_Repeats 707 | SUBROUTINE GC_Score 708 | ! 709 | ! Find all the 8 nt windows of solid GC content and update GScore. 710 | 711 | USE dnaworks_data 712 | USE dnaworks_test 713 | IMPLICIT NONE 714 | 715 | INTEGER :: i,j 716 | 717 | IF (TEST1) PRINT *,"GC_Score" !TEST1 718 | 719 | DO i=1,DNAlen 720 | CurrDNA%GScore(i) = 0 721 | END DO 722 | 723 | DO i=1,DNAlen-7 724 | IF (CurrDNA%ntID_GC(i).eq.0) THEN 725 | DO j=i,(i+7) 726 | CurrDNA%GScore(j)=CurrDNA%GScore(j)+1 727 | END DO 728 | END IF 729 | END DO 730 | 731 | CurrDNA%TotalGScore = 0.0 ! Initialize the repeat scores 732 | DO i=1,DNAlen 733 | CurrDNA%TotalGScore=CurrDNA%TotalGScore+CurrDNA%GScore(i) 734 | END DO 735 | CurrDNA%TotalGScore=CurrDNA%TotalGScore*20/DNAlen 736 | 737 | END SUBROUTINE GC_Score 738 | SUBROUTINE GapFix_Score 739 | ! 740 | ! This subroutine returns the GapFix scores for each nt position in the 741 | ! GapFixPos array. 742 | ! 743 | USE dnaworks_data 744 | USE dnaworks_test 745 | IMPLICIT NONE 746 | 747 | INTEGER :: i,j,c 748 | 749 | IF (TEST1) PRINT *,"GapFix_Score" !TEST1 750 | 751 | ! initialize scores 752 | 753 | CurrDNA%TotalFScore=0 754 | DO i = 1,DNAlen 755 | CurrDNA%FScore(i)=0 756 | END DO 757 | 758 | DO i=1,DNAlen 759 | 760 | ! if the position should be within gap 761 | 762 | IF(CurrDNA%GapFixPos(i)) THEN 763 | DO j=1,CurrDNA%NumOlaps 764 | 765 | ! and it is not within a gap (it's in an overlap instead) 766 | 767 | IF (i.ge.CurrDNA%OlapsPos(j,1).and.i.le.CurrDNA%OlapsPos(j,2)) THEN 768 | 769 | ! increase its score 770 | 771 | CurrDNA%Fscore(i)=10 772 | 773 | END IF 774 | END DO 775 | END IF 776 | END DO 777 | 778 | ! generate summary of scores 779 | 780 | DO i=1,DNAlen 781 | CurrDNA%TotalFScore=CurrDNA%TotalFScore+CurrDNA%FScore(i) 782 | END DO 783 | CurrDNA%TotalFScore=CurrDNA%TotalFScore*20/DNAlen 784 | 785 | END SUBROUTINE GapFix_Score 786 | LOGICAL FUNCTION HMatchNum(pos1,pos2,dir) 787 | ! 788 | ! If two positions of equal length are homologous (MaxNonId or fewer 789 | ! non-identical nts), returns true 790 | 791 | USE dnaworks_data 792 | USE dnaworks_test 793 | IMPLICIT NONE 794 | 795 | INTEGER :: pos1,pos2,i,a,b,dir,ct 796 | 797 | IF (TEST3) PRINT *,"HMatchNum" !TEST3 798 | 799 | ct=0 800 | 801 | IF (dir.eq.1) THEN 802 | direct: DO i=1,MPLn 803 | HMatchNum=.FALSE. 804 | IF (pos1.eq.pos2) EXIT direct 805 | a=pos1+i-1 806 | b=pos2+i-1 807 | IF (b.gt.DNAlen) EXIT direct 808 | IF ((CurrDNA%NUMseq(a)-CurrDNA%NUMseq(b)).ne.0) THEN 809 | ct=ct+1 810 | IF (ct.gt.MaxNonId) EXIT direct 811 | END IF 812 | ! PRINT *,dir,pos1,pos2,CurrDNA%NUMseq(a),CurrDNA%NUMseq(b) 813 | HMatchNum=.TRUE. 814 | END DO direct 815 | ELSE 816 | inverse: DO i=1,MPLn 817 | HMatchNum=.FALSE. 818 | a=pos1+i-1 819 | b=pos2+MPLn-i 820 | IF (b.gt.DNAlen) EXIT inverse 821 | IF ((CurrDNA%NUMseq(a)+CurrDNA%NUMseq(b)).ne.0) THEN 822 | ct=ct+1 823 | IF (ct.gt.MaxNonId) EXIT inverse 824 | END IF 825 | HMatchNum=.TRUE. 826 | END DO inverse 827 | END IF 828 | 829 | END FUNCTION HMatchNum 830 | SUBROUTINE Increment_Misprime_Arrays(pos1,pos2,dir) 831 | ! 832 | ! Add another misprime pair to the arrays and update scores 833 | 834 | USE dnaworks_data 835 | USE dnaworks_test 836 | IMPLICIT NONE 837 | 838 | INTEGER :: pos1,pos2,dir,i 839 | CHARACTER(LEN=80) :: text 840 | 841 | IF (TEST2) PRINT *,"Increment_Misprime_Arrays" !TEST2 842 | 843 | CurrDNA%MN=CurrDNA%MN+1 844 | IF (CurrDNA%MN.ge.MaxDNAlen) THEN 845 | WRITE(text,FMT="('MN = ',i9,' Too many misprimes.')") CurrDNA%MN 846 | ! DO i=1,CurrDNA%MN 847 | ! PRINT *,CurrDNA%M1(i),CurrDNA%M2(i),CurrDNA%MX(i) 848 | ! END DO 849 | CALL Stop_Program(text) 850 | END IF 851 | CurrDNA%M1(CurrDNA%MN)=pos1 852 | CurrDNA%M2(CurrDNA%MN)=pos2 853 | CurrDNA%MX(CurrDNA%MN)=dir 854 | 855 | END SUBROUTINE Increment_Misprime_Arrays 856 | SUBROUTINE Increment_Misprime_Scores(o,m,t,j) 857 | ! 858 | ! Increment the scores and update the actual misprime arrays 859 | 860 | USE dnaworks_data 861 | USE dnaworks_test 862 | IMPLICIT NONE 863 | 864 | INTEGER :: o,m,t,i,j 865 | LOGICAL :: x 866 | 867 | IF (TEST2) PRINT *,"Increment_Misprime_Scores" !TEST2 868 | 869 | CurrDNA%MSN=CurrDNA%MSN+1 870 | CurrDNA%MS1(CurrDNA%MSN)=o 871 | CurrDNA%MS2(CurrDNA%MSN)=m 872 | CurrDNA%MSX(CurrDNA%MSN)=t 873 | CurrDNA%MOL(CurrDNA%MSN)=j 874 | DO i=o,o+MPLn-1 875 | CurrDNA%MScore(i)=CurrDNA%MScore(i)+1 876 | END DO 877 | DO i=m,m+MPLn-1 878 | CurrDNA%MScore(i)=CurrDNA%MScore(i)+1 879 | END DO 880 | 881 | END SUBROUTINE Increment_Misprime_Scores 882 | SUBROUTINE Increment_Repeat_Arrays(i,j,dir) 883 | ! 884 | ! Add another repeat pair to the arrays and update scores after expansion 885 | 886 | USE dnaworks_data 887 | USE dnaworks_test 888 | IMPLICIT NONE 889 | 890 | INTEGER :: i,j,k 891 | INTEGER :: pos1,pos2,dir,length 892 | INTEGER :: last,diff,a 893 | 894 | IF (TEST2) PRINT *,"Increment_Repeat_Arrays" !TEST2 895 | 896 | pos1=i 897 | pos2=j 898 | diff=j-i 899 | length=RepLen 900 | 901 | ! Expand direct repeats 902 | 903 | IF (dir.eq.1) THEN 904 | starting: DO pos1=(i-1),1,-1 905 | pos2=pos1+diff 906 | IF (CurrDNA%NUMseq(pos1).ne.CurrDNA%NUMseq(pos2)) THEN 907 | pos2=pos2+1 908 | EXIT starting 909 | END IF 910 | END DO starting 911 | pos1=pos2-diff 912 | length=RepLen+(i-pos1) ! In case pos2 is DNAlen-RepLen-1 913 | ending: DO last=(j+RepLen),DNAlen+1 914 | IF (last.eq.(DNAlen+1)) EXIT ending 915 | IF (CurrDNA%NUMseq(last-diff).ne.CurrDNA%NUMseq(last)) EXIT ending 916 | END DO ending 917 | length=last-pos2 ! Final answer 918 | 919 | ELSE 920 | 921 | ! Expand inverse repeats 922 | 923 | startingRC: DO a=1,MaxDNAlen 924 | pos1=i-a 925 | last=j+length-1+a 926 | IF ((pos1.lt.1).or.(last.gt.DNAlen).or.& 927 | (CurrDNA%NUMseq(pos1).ne.(-1*(CurrDNA%NUMseq(last))))) THEN 928 | pos1=pos1+1 929 | last=last-1 930 | EXIT startingRC 931 | END IF 932 | END DO startingRC 933 | endingRC: DO a=1,MaxDNAlen 934 | pos2=j-a 935 | last=i+length-1+a 936 | IF ((pos2.lt.1).or.(last.gt.DNAlen).or.& 937 | (CurrDNA%NUMseq(last).ne.(-1*(CurrDNA%NUMseq(pos2))))) THEN 938 | pos2=pos2+1 939 | last=last-1 940 | EXIT endingRC 941 | END IF 942 | END DO endingRC 943 | length=last-pos1+1 ! Final answer 944 | END IF 945 | 946 | CurrDNA%RN=CurrDNA%RN+1 947 | IF (CurrDNA%RN.ge.MaxDNAlen) CALL Stop_Program("Too many repeats.") 948 | CurrDNA%RS1(CurrDNA%RN)=pos1 949 | CurrDNA%RS2(CurrDNA%RN)=pos2 950 | CurrDNA%RLn(CurrDNA%RN)=length 951 | CurrDNA%RX(CurrDNA%RN)=dir 952 | DO k=pos1,(pos1+length-1) 953 | CurrDNA%RScore(k) = CurrDNA%RScore(k)+1 954 | END DO 955 | DO k=pos2,(pos2+length-1) 956 | CurrDNA%RScore(k) = CurrDNA%RScore(k)+1 957 | END DO 958 | 959 | END SUBROUTINE Increment_Repeat_Arrays 960 | SUBROUTINE Length_Score 961 | ! 962 | ! This subroutine evaluates the length of the oligos and gives a penalty to 963 | ! all the nts in the oligo if it exceeds OligoLen (except for the first and 964 | ! last oligos, of course). 965 | 966 | USE dnaworks_data 967 | USE dnaworks_test 968 | IMPLICIT NONE 969 | 970 | INTEGER :: i,j 971 | INTEGER :: overrun !the length of the oligo goes past OligoLen 972 | 973 | IF (TEST1) PRINT *,"Length_Score" !TEST1 974 | 975 | DO i=CurrDNA%OlapsPos(1,1),CurrDNA%OlapsPos(CurrDNA%NumOlaps,2) 976 | CurrDNA%LScore(i)=0 977 | END DO 978 | 979 | !PRINT *,'START' 980 | 981 | DO i=2,CurrDNA%NumOlaps 982 | overrun=CurrDNA%OlapsPos(i,2)-CurrDNA%OlapsPos((i-1),1)-OligoLen+1 983 | 984 | !PRINT *,i,CurrDNA%OlapsPos(i,2),CurrDNA%OlapsPos((i-1),1),OligoLen,overrun,CurrDNA%MeltT(i) 985 | 986 | IF (overrun.gt.0) THEN 987 | DO j=CurrDNA%OlapsPos((i-1),1),CurrDNA%OlapsPos(i,2) 988 | CurrDNA%LScore(j)=(overrun+2)**2 989 | END DO 990 | END IF 991 | END DO 992 | 993 | !PRINT *,'FINISH' 994 | 995 | CurrDNA%TotalLScore = 0.0 996 | DO i=CurrDNA%OlapsPos(1,1),CurrDNA%OlapsPos(CurrDNA%NumOlaps,2) 997 | CurrDNA%TotalLScore = CurrDNA%TotalLScore + CurrDNA%LScore(i) 998 | END DO 999 | CurrDNA%TotalLScore=CurrDNA%TotalLScore*20/DNAlen 1000 | 1001 | END SUBROUTINE Length_Score 1002 | SUBROUTINE Misprime_Score 1003 | ! 1004 | ! Determine the current mispriming score. If at the beginning of a run 1005 | ! (MutProtPos=0), then generate the potential misprime arrays, and then 1006 | ! find the actual misprimes. 1007 | ! 1008 | ! During the run, the potential misprime arrays only need to be regenerated 1009 | ! once after each mutation. The potential misprime arrays are only modified 1010 | ! around the site of mutation. The actual misprimes are then determined after 1011 | ! every overlap set generation. 1012 | ! 1013 | ! Only evaluating the mutation site speeds up the calculation more than 10-fold. 1014 | 1015 | USE dnaworks_data 1016 | USE dnaworks_test 1017 | IMPLICIT NONE 1018 | 1019 | INTEGER :: i 1020 | 1021 | IF (TEST1) PRINT *,"Misprime_Score" !TEST1 1022 | 1023 | IF (MutProtPos.eq.0) THEN 1024 | IF (SequenceTranslated) CALL Find_Potential_Misprimes 1025 | CALL Find_Actual_Misprimes 1026 | ELSE 1027 | IF (SequenceTranslated) CALL Find_Mut_Pot_Misprimes 1028 | CALL Find_Actual_Misprimes 1029 | END IF 1030 | 1031 | CurrDNA%TotalMScore = 0.0 ! Initialize the mispriming scores 1032 | DO i=1,DNAlen 1033 | CurrDNA%TotalMScore=CurrDNA%TotalMScore+CurrDNA%MScore(i) 1034 | END DO 1035 | CurrDNA%TotalMScore=CurrDNA%TotalMScore*20/DNAlen 1036 | 1037 | END SUBROUTINE Misprime_Score 1038 | LOGICAL FUNCTION PairWithinKnownRepeat(i,j,dir) 1039 | ! 1040 | ! Returns true if pair of residues is already part of a repeat pair 1041 | 1042 | USE dnaworks_data 1043 | USE dnaworks_test 1044 | IMPLICIT NONE 1045 | 1046 | INTEGER :: i,j,k,dir 1047 | 1048 | IF (TEST3) PRINT *,"PairWithinKnownRepeat" !TEST3 1049 | 1050 | PairWithinKnownRepeat=.FALSE. 1051 | 1052 | IF (CurrDNA%RN.gt.0) THEN 1053 | loop2: DO k=1,CurrDNA%RN 1054 | IF ((CurrDNA%RS1(k).le.i).and.& 1055 | ((CurrDNA%RS1(k)+CurrDNA%RLn(k)-RepLen).ge.i).and.& 1056 | (CurrDNA%RS2(k).le.j).and.& 1057 | ((CurrDNA%RS2(k)+CurrDNA%RLn(k)-RepLen).ge.j).and.& 1058 | (CurrDNA%RX(k).eq.dir)) THEN 1059 | PairWithinKnownRepeat=.TRUE. 1060 | EXIT loop2 1061 | END IF 1062 | END DO loop2 1063 | END IF 1064 | 1065 | END FUNCTION PairWithinKnownRepeat 1066 | SUBROUTINE Pattern_Score 1067 | ! 1068 | ! This subroutine looks through the DNA sequence corresponding to the protein 1069 | ! region and identifies sequence patterns, either for restriction sites or 1070 | ! user-input sequences. When it finds the pattern, it increases the score 1071 | ! for those nucleotides in the pattern. 1072 | ! 1073 | ! There are two situtations -- when degenerate patterns are present, and when 1074 | ! they are not. 1075 | 1076 | ! 1077 | ! This scoring evaluation currently uses text-based comparisons, so it will 1078 | ! be slow... 1079 | 1080 | USE dnaworks_data 1081 | USE dnaworks_test 1082 | IMPLICIT NONE 1083 | 1084 | INTEGER :: i,j,k,n 1085 | CHARACTER(LEN=9999) :: text,ftext,rtext 1086 | LOGICAL,EXTERNAL :: DegCmpr 1087 | INTEGER :: start,curr,finis 1088 | 1089 | IF (TEST1) PRINT *,"Pattern_Score" !TEST1 1090 | 1091 | CurrDNA%TotalPScore = 0.0 1092 | DO k=1,DNAlen 1093 | CurrDNA%PScore(k) = 0 1094 | END DO 1095 | 1096 | main: DO i=1,PTNnum 1097 | 1098 | ! skip the site if it is an isoschizomer 1099 | 1100 | IF (PTN(i)%Isoschiz) CYCLE main 1101 | 1102 | ! treat degenerate sites differently 1103 | 1104 | IF (PTN(i)%Degen) THEN 1105 | ftext=PTN(i)%Seq(1:PTN(i)%Len) 1106 | rtext=PTN(i)%SeqRC(1:PTN(i)%Len) 1107 | deg: DO j=1,(DNAlen-PTN(i)%Len+1) 1108 | IF (DegCmpr(ftext(1:PTN(i)%Len),CurrDNA%DNAseq(j:j+PTN(i)%Len-1))) THEN 1109 | DO k=j,j+PTN(i)%Len-1 1110 | CurrDNA%PScore(k)=CurrDNA%PScore(k)+1 1111 | END DO 1112 | END IF 1113 | IF (.not.PTN(i)%SelfCompl) THEN 1114 | IF (DegCmpr(rtext(1:PTN(i)%Len),CurrDNA%DNAseq(j:j+PTN(i)%Len-1))) THEN 1115 | DO k=j,j+PTN(i)%Len-1 1116 | CurrDNA%PScore(k)=CurrDNA%PScore(k)+1 1117 | END DO 1118 | END IF 1119 | END IF 1120 | END DO deg 1121 | 1122 | ELSE 1123 | 1124 | ! not degenerate 1125 | 1126 | curr=0 1127 | start=1 1128 | finis=DNAlen 1129 | ! forward direction 1130 | forward: DO n=1,DNAlen 1131 | j=INDEX(CurrDNA%DNAseq(start:finis),PTN(i)%Seq(1:PTN(i)%Len)) 1132 | curr=curr+j 1133 | IF (j.eq.0) THEN 1134 | EXIT forward 1135 | ELSE 1136 | DO k=curr,(curr+PTN(i)%Len) 1137 | CurrDNA%PScore(k)=CurrDNA%PScore(k)+1 1138 | END DO 1139 | start=curr+1 1140 | END IF 1141 | END DO forward 1142 | ! reverse direction if needed 1143 | IF (.not.PTN(i)%SelfCompl) THEN 1144 | curr=0 1145 | start=1 1146 | reverse: DO n=1,DNAlen 1147 | j=INDEX(CurrDNA%DNAseq(start:finis),PTN(i)%SeqRC(1:PTN(i)%Len)) 1148 | curr=curr+j 1149 | IF (j.eq.0) THEN 1150 | EXIT reverse 1151 | ELSE 1152 | DO k=curr,(curr+PTN(i)%Len) 1153 | CurrDNA%PScore(k)=CurrDNA%PScore(k)+1 1154 | END DO 1155 | start=curr+1 1156 | END IF 1157 | END DO reverse 1158 | END IF 1159 | END IF 1160 | END DO main 1161 | 1162 | DO i=1,DNAlen 1163 | CurrDNA%TotalPScore=CurrDNA%TotalPScore+CurrDNA%PScore(i) 1164 | END DO 1165 | CurrDNA%TotalPScore=CurrDNA%TotalPScore*20/DNAlen 1166 | 1167 | END SUBROUTINE Pattern_Score 1168 | SUBROUTINE Repeat_Score 1169 | ! 1170 | ! This subroutine calculates the score for finding tandem repeats, both 1171 | ! direct and inverted (RC), in the trial DNA sequence. The entire sequence 1172 | ! is queried against itself in the first run. If a codon has been mutated, 1173 | ! only the small region around the mutation is queried against the sequence. 1174 | ! The score is then applied to the nts it is found within. 1175 | ! 1176 | ! Only evaluating the mutation site speeds up the calculation more than 10-fold. 1177 | 1178 | USE dnaworks_data 1179 | USE dnaworks_test 1180 | IMPLICIT NONE 1181 | 1182 | INTEGER :: i,j,pos 1183 | 1184 | IF (TEST1) PRINT *,"Repeat_Score" ! TEST1 1185 | 1186 | IF (MutProtPos.eq.0) THEN 1187 | CALL Find_Repeats 1188 | ELSE 1189 | CALL Find_Mutated_Repeats 1190 | END IF 1191 | 1192 | CurrDNA%TotalRScore = 0.0 ! Initialize the repeat scores 1193 | DO i=1,DNAlen 1194 | CurrDNA%TotalRScore=CurrDNA%TotalRScore+CurrDNA%RScore(i) 1195 | END DO 1196 | CurrDNA%TotalRScore=CurrDNA%TotalRScore*20/DNAlen 1197 | 1198 | END SUBROUTINE Repeat_Score 1199 | SUBROUTINE Temp_Score 1200 | ! 1201 | ! This subroutine returns the Melting Temperature scores for each overlap as 1202 | ! well as the total Tm score. 1203 | ! 1204 | ! The score is calculated as follow: 1205 | ! - for the temperatures within the range (MeltTemp-MeltTol ... MeltTemp + 1206 | ! MeltTol) score is calculated using a quadratic function of difference 1207 | ! - outside of this range also the 10 times square of the second difference 1208 | ! is added 1209 | ! 1210 | USE dnaworks_data 1211 | USE dnaworks_test 1212 | IMPLICIT NONE 1213 | 1214 | INTEGER :: i,c 1215 | REAL :: diff,maxT,minT 1216 | REAL,EXTERNAL :: TmCalc 1217 | 1218 | IF (TEST1) PRINT *,"Temp_Score" !TEST1 1219 | 1220 | maxT=0 1221 | minT=999 1222 | 1223 | CurrDNA%TotalTScore=0 1224 | DO i = 1,CurrDNA%NumOlaps 1225 | CurrDNA%TScore(i)=0 1226 | END DO 1227 | 1228 | DO i=1,CurrDNA%NumOlaps 1229 | CurrDNA%MeltT(i)=TmCalc(CurrDNA%OlapsPos(i,1),CurrDNA%OlapsPos(i,2)) 1230 | END DO 1231 | 1232 | DO i = 1,CurrDNA%NumOlaps 1233 | diff=ABS(MeltTemp-CurrDNA%MeltT(i)) 1234 | IF(diff.gt.MeltTol)THEN 1235 | diff=MAX(1.0,(diff-MeltTol)) 1236 | CurrDNA%TScore(i)=(diff**2)/10 1237 | ELSE 1238 | CurrDNA%TScore(i)=0 1239 | END IF 1240 | END DO 1241 | 1242 | DO i=1,CurrDNA%NumOlaps 1243 | CurrDNA%TotalTScore=CurrDNA%TotalTScore+CurrDNA%TScore(i) 1244 | END DO 1245 | CurrDNA%TotalTScore=CurrDNA%TotalTScore*20/DNAlen 1246 | 1247 | END SUBROUTINE Temp_Score 1248 | REAL FUNCTION TmCalc(start,finish) 1249 | ! 1250 | ! This function returns the melting temperature for an overlap of nucleotides 1251 | ! start to finish. 1252 | ! 1253 | ! The melting temperature is based on the paper by John SantaLucia 1254 | ! Jr., "A unified view of polymer, dumbbell, and oligonucleotide DNA 1255 | ! nearest-neighbor thermodynamics", Biochemistry Vol. 95, Issue 4, 1256 | ! 1460-1465, 1998. Tm in Celcius is calculated using the following formula: 1257 | ! 1258 | ! Tm = [dH/(dS+(R*ln(OligoConc))+(0.368*ln(Na)*N)+(??*??MgConc*N))]-273.15 1259 | ! 1260 | ! where 1261 | ! dH = sum of individual dH for each nucleotide pair, kcal/mol 1262 | ! dS = sum of individual dS for each nucleotide pair, cal/k*mol 1263 | ! RGasConstant = gas constant, 1.987 cal/K*mol 1264 | ! OligoConc = template concentration, mol/liter 1265 | ! Na = monovalent cation concentration (sodium), mol/liter 1266 | ! MgConc = magnesium concentration, mol/liter 1267 | ! N = number of backbone phosphates (number of nucleotides - 1) 1268 | ! Kelvin = convert Kelvin to Celsius, 273.15 1269 | ! 1270 | ! Additionally, the Tm is modified by the terminal nucleotides: 1271 | ! 1272 | ! dH = dH + 2.2 for A/T 1273 | ! dS = dS + 6.935 for A/T 1274 | ! 1275 | ! The Tm is also modified by the presence of self-complementarity: 1276 | ! 1277 | ! dS = dS - 1.4 1278 | ! 1279 | ! Numerical sequences are used instead of strings 1280 | ! 1281 | ! This subroutine is by far the most heavily used subroutine in the program. 1282 | 1283 | USE dnaworks_data 1284 | USE dnaworks_test 1285 | IMPLICIT NONE 1286 | 1287 | INTEGER :: start,finish,i 1288 | REAL :: dh,ds 1289 | LOGICAL :: self_compl 1290 | 1291 | IF (TEST3) PRINT *,"TmCalc" !TEST3 1292 | 1293 | ! Initialize values 1294 | 1295 | self_compl=.FALSE. 1296 | dh=0.2 1297 | ds=-5.68 1298 | 1299 | ! Make sure the overlap is more than 7 nt long 1300 | 1301 | IF ((finish-start+1).le.7) THEN 1302 | tmcalc=0 1303 | RETURN 1304 | END IF 1305 | 1306 | ! Sum the dH, dS values 1307 | 1308 | DO i=start,finish-1 1309 | 1310 | SELECT CASE(CurrDNA%NUMseq(i)) 1311 | CASE(-1) ! A-> 1312 | SELECT CASE(CurrDNA%NUMseq(i+1)) 1313 | CASE(-1) ! AA 1314 | dh=dh-7.9 1315 | ds=ds-22.2473 ! 22.25 1316 | CASE(1) ! AT 1317 | dh=dh-7.2 1318 | ds=ds-20.38082 ! 20.35 1319 | CASE(-3) ! AC 1320 | dh=dh-8.4 1321 | ds=ds-22.44082 ! 22.44 1322 | CASE(3) ! AG 1323 | dh=dh-7.8 1324 | ds=ds-21.02469 ! 21.03 1325 | END SELECT 1326 | CASE(1) ! T-> 1327 | SELECT CASE(CurrDNA%NUMseq(i+1)) 1328 | CASE(-1) ! TA 1329 | dh=dh-7.2 1330 | ds=ds-21.34081 ! 20.32 1331 | CASE(1) ! TT 1332 | dh=dh-7.9 1333 | ds=ds-22.2473 ! 22.25 1334 | CASE(-3) ! TC 1335 | dh=dh-8.2 1336 | ds=ds-22.24469 ! 22.25 1337 | CASE(3) ! TG 1338 | dh=dh-8.5 1339 | ds=ds-22.73082 ! 22.73 1340 | END SELECT 1341 | CASE(-3) ! C-> 1342 | SELECT CASE(CurrDNA%NUMseq(i+1)) 1343 | CASE(-1) ! CA 1344 | dh=dh-8.5 1345 | ds=ds-22.73082 ! 22.73 1346 | CASE(1) ! CT 1347 | dh=dh-7.8 1348 | ds=ds-21.02469 ! 21.03 1349 | CASE(-3) ! CC 1350 | dh=dh-8.0 1351 | ds=ds-19.8612 ! 19.86 1352 | CASE(3) ! CG 1353 | dh=dh-10.6 1354 | ds=ds-27.17776 ! 27.15 1355 | END SELECT 1356 | CASE(3) ! G-> 1357 | SELECT CASE(CurrDNA%NUMseq(i+1)) 1358 | CASE(-1) ! GA 1359 | dh=dh-8.2 1360 | ds=ds-22.24469 ! 22.25 1361 | CASE(1) ! GT 1362 | dh=dh-8.4 1363 | ds=ds-22.44082 ! 22.44 1364 | CASE(-3) ! GC 1365 | dh=dh-9.8 1366 | ds=ds-24.37776 ! 24.35 1367 | CASE(3) ! GG 1368 | dh=dh-8.0 1369 | ds=ds-19.8612 ! 19.86 1370 | END SELECT 1371 | END SELEcT 1372 | END DO 1373 | 1374 | ! Correct for A or T at the termini 1375 | 1376 | IF (ABS(CurrDNA%NUMseq(start)).eq.1) THEN 1377 | dh=dh+2.2 1378 | ds=ds+6.935 1379 | END IF 1380 | 1381 | IF (ABS(CurrDNA%NUMseq(finish)).eq.1) THEN 1382 | dh=dh+2.2 1383 | ds=ds+6.935 1384 | END IF 1385 | 1386 | ! Correct for self-complementarity 1387 | 1388 | inner1: DO i=start,finish 1389 | IF ((CurrDNA%NUMseq(i)+CurrDNA%NUMseq(finish-start-i)).eq.0) THEN 1390 | self_compl=.TRUE. 1391 | ELSE 1392 | self_compl=.FALSE. 1393 | EXIT inner1 1394 | END IF 1395 | END DO inner1 1396 | 1397 | IF (self_compl) ds=ds-1.4 1398 | 1399 | ! Make corrections for oligo concentration 1400 | 1401 | IF (self_compl) THEN 1402 | ds=ds+OligoCorrSC 1403 | ELSE 1404 | ds=ds+OligoCorr 1405 | END IF 1406 | 1407 | ! Make corrections for cation concentrations 1408 | 1409 | ds=ds+(SaltCorr*(finish-start)) 1410 | 1411 | ! Now find the actual Tm 1412 | 1413 | TmCalc=(1000*dh/ds)-Kelvin 1414 | 1415 | END FUNCTION TmCalc 1416 | SUBROUTINE TmCorrect 1417 | ! 1418 | ! Create salt and oligo corrections for Tm 1419 | 1420 | USE dnaworks_data 1421 | USE dnaworks_test 1422 | IMPLICIT NONE 1423 | 1424 | IF (TEST0) PRINT *,"TmCorrect" !TEST0 1425 | 1426 | ! Find adjustment values 1427 | 1428 | ! PRINT *,OligoConc,SodiumConc,MgConc 1429 | 1430 | IF (OligoConc.lt.1e-9) OligoConc=1e-9 1431 | IF (OligoConc.gt.1e-4) OligoConc=1e-4 1432 | 1433 | IF (SodiumConc.gt.1.000) SodiumConc=1.000 1434 | IF (SodiumConc.le.1.000.and.SodiumConc.gt.0.750) SodiumConc=1.000 1435 | IF (SodiumConc.le.0.750.and.SodiumConc.gt.0.500) SodiumConc=0.750 1436 | IF (SodiumConc.le.0.500.and.SodiumConc.gt.0.250) SodiumConc=0.500 1437 | IF (SodiumConc.le.0.250.and.SodiumConc.gt.0.200) SodiumConc=0.250 1438 | IF (SodiumConc.le.0.200.and.SodiumConc.gt.0.150) SodiumConc=0.200 1439 | IF (SodiumConc.le.0.150.and.SodiumConc.gt.0.100) SodiumConc=0.150 1440 | IF (SodiumConc.le.0.100.and.SodiumConc.gt.0.075) SodiumConc=0.100 1441 | IF (SodiumConc.le.0.075.and.SodiumConc.gt.0.050) SodiumConc=0.075 1442 | IF (SodiumConc.le.0.050.and.SodiumConc.gt.0.025) SodiumConc=0.050 1443 | IF (SodiumConc.le.0.025.and.SodiumConc.gt.0.010) SodiumConc=0.025 1444 | IF (SodiumConc.le.0.010) SodiumConc=0.010 1445 | 1446 | IF (MgConc.gt.0.2000) MgConc=0.200 1447 | IF (MgConc.le.0.2000.and.MgConc.gt.0.1000) MgConc=0.2000 1448 | IF (MgConc.le.0.1000.and.MgConc.gt.0.0500) MgConc=0.1000 1449 | IF (MgConc.le.0.0500.and.MgConc.gt.0.0200) MgConc=0.0500 1450 | IF (MgConc.le.0.0200.and.MgConc.gt.0.0100) MgConc=0.0200 1451 | IF (MgConc.le.0.0100.and.MgConc.gt.0.0050) MgConc=0.0100 1452 | IF (MgConc.le.0.0050.and.MgConc.gt.0.0040) MgConc=0.0050 1453 | IF (MgConc.le.0.0040.and.MgConc.gt.0.0030) MgConc=0.0040 1454 | IF (MgConc.le.0.0030.and.MgConc.gt.0.0020) MgConc=0.0030 1455 | IF (MgConc.le.0.0020.and.MgConc.gt.0.0015) MgConc=0.0020 1456 | IF (MgConc.le.0.0015.and.MgConc.gt.0.0010) MgConc=0.0015 1457 | IF (MgConc.le.0.0010.and.MgConc.gt.0.0005) MgConc=0.0010 1458 | IF (MgConc.le.0.0005.and.MgConc.gt.0.0000) MgConc=0.0005 1459 | IF (MgConc.le.0) MgConc=0 1460 | 1461 | ! PRINT *,OligoConc,SodiumConc,MgConc 1462 | 1463 | OligoCorr=RGasConstant*(LOG(((OligoConc/100)/2))) 1464 | OligoCorrSC=RGasConstant*(LOG((OligoConc/100))) 1465 | 1466 | ! Sorry about this, I couldn't figure out the equation. 1467 | 1468 | IF (SodiumConc.eq.10.and.MgConc.eq.0.0) SaltCorr=-1.6960 1469 | IF (SodiumConc.eq.10.and.MgConc.eq.0.5) SaltCorr=-0.9125 1470 | IF (SodiumConc.eq.10.and.MgConc.eq.1.0) SaltCorr=-0.7996 1471 | IF (SodiumConc.eq.10.and.MgConc.eq.1.5) SaltCorr=-0.7287 1472 | IF (SodiumConc.eq.10.and.MgConc.eq.2.0) SaltCorr=-0.6803 1473 | IF (SodiumConc.eq.10.and.MgConc.eq.3.0) SaltCorr=-0.6094 1474 | IF (SodiumConc.eq.10.and.MgConc.eq.4.0) SaltCorr=-0.5578 1475 | IF (SodiumConc.eq.10.and.MgConc.eq.5.0) SaltCorr=-0.5191 1476 | IF (SodiumConc.eq.10.and.MgConc.eq.10.0) SaltCorr=-0.3966 1477 | IF (SodiumConc.eq.10.and.MgConc.eq.20.0) SaltCorr=-0.2741 1478 | IF (SodiumConc.eq.10.and.MgConc.eq.50.0) SaltCorr=-0.1064 1479 | IF (SodiumConc.eq.10.and.MgConc.eq.100.0) SaltCorr=0.0193 1480 | IF (SodiumConc.eq.10.and.MgConc.eq.200.0) SaltCorr=0.1451 1481 | IF (SodiumConc.eq.25.and.MgConc.eq.0.0) SaltCorr=-1.3574 1482 | IF (SodiumConc.eq.25.and.MgConc.eq.0.5) SaltCorr=-0.8512 1483 | IF (SodiumConc.eq.25.and.MgConc.eq.1.0) SaltCorr=-0.7545 1484 | IF (SodiumConc.eq.25.and.MgConc.eq.1.5) SaltCorr=-0.6900 1485 | IF (SodiumConc.eq.25.and.MgConc.eq.2.0) SaltCorr=-0.6481 1486 | IF (SodiumConc.eq.25.and.MgConc.eq.3.0) SaltCorr=-0.5836 1487 | IF (SodiumConc.eq.25.and.MgConc.eq.4.0) SaltCorr=-0.5352 1488 | IF (SodiumConc.eq.25.and.MgConc.eq.5.0) SaltCorr=-0.4965 1489 | IF (SodiumConc.eq.25.and.MgConc.eq.10.0) SaltCorr=-0.3805 1490 | IF (SodiumConc.eq.25.and.MgConc.eq.20.0) SaltCorr=-0.2612 1491 | IF (SodiumConc.eq.25.and.MgConc.eq.50.0) SaltCorr=-0.1000 1492 | IF (SodiumConc.eq.25.and.MgConc.eq.100.0) SaltCorr=0.0226 1493 | IF (SodiumConc.eq.25.and.MgConc.eq.200.0) SaltCorr=0.1483 1494 | IF (SodiumConc.eq.50.and.MgConc.eq.0.0) SaltCorr=-1.1027 1495 | IF (SodiumConc.eq.50.and.MgConc.eq.0.5) SaltCorr=-0.7706 1496 | IF (SodiumConc.eq.50.and.MgConc.eq.1.0) SaltCorr=-0.6868 1497 | IF (SodiumConc.eq.50.and.MgConc.eq.1.5) SaltCorr=-0.6352 1498 | IF (SodiumConc.eq.50.and.MgConc.eq.2.0) SaltCorr=-0.5965 1499 | IF (SodiumConc.eq.50.and.MgConc.eq.3.0) SaltCorr=-0.5385 1500 | IF (SodiumConc.eq.50.and.MgConc.eq.4.0) SaltCorr=-0.4965 1501 | IF (SodiumConc.eq.50.and.MgConc.eq.5.0) SaltCorr=-0.4643 1502 | IF (SodiumConc.eq.50.and.MgConc.eq.10.0) SaltCorr=-0.3547 1503 | IF (SodiumConc.eq.50.and.MgConc.eq.20.0) SaltCorr=-0.2418 1504 | IF (SodiumConc.eq.50.and.MgConc.eq.50.0) SaltCorr=-0.0871 1505 | IF (SodiumConc.eq.50.and.MgConc.eq.100.0) SaltCorr=0.0322 1506 | IF (SodiumConc.eq.50.and.MgConc.eq.200.0) SaltCorr=0.1548 1507 | IF (SodiumConc.eq.75.and.MgConc.eq.0.0) SaltCorr=-0.9544 1508 | IF (SodiumConc.eq.75.and.MgConc.eq.0.5) SaltCorr=-0.6997 1509 | IF (SodiumConc.eq.75.and.MgConc.eq.1.0) SaltCorr=-0.6320 1510 | IF (SodiumConc.eq.75.and.MgConc.eq.1.5) SaltCorr=-0.5868 1511 | IF (SodiumConc.eq.75.and.MgConc.eq.2.0) SaltCorr=-0.5546 1512 | IF (SodiumConc.eq.75.and.MgConc.eq.3.0) SaltCorr=-0.5030 1513 | IF (SodiumConc.eq.75.and.MgConc.eq.4.0) SaltCorr=-0.4643 1514 | IF (SodiumConc.eq.75.and.MgConc.eq.5.0) SaltCorr=-0.4320 1515 | IF (SodiumConc.eq.75.and.MgConc.eq.10.0) SaltCorr=-0.3321 1516 | IF (SodiumConc.eq.75.and.MgConc.eq.20.0) SaltCorr=-0.2257 1517 | IF (SodiumConc.eq.75.and.MgConc.eq.50.0) SaltCorr=-0.0774 1518 | IF (SodiumConc.eq.75.and.MgConc.eq.100.0) SaltCorr=0.0419 1519 | IF (SodiumConc.eq.75.and.MgConc.eq.200.0) SaltCorr=0.1612 1520 | IF (SodiumConc.eq.100.and.MgConc.eq.0.0) SaltCorr=-0.8480 1521 | IF (SodiumConc.eq.100.and.MgConc.eq.0.5) SaltCorr=-0.6449 1522 | IF (SodiumConc.eq.100.and.MgConc.eq.1.0) SaltCorr=-0.5836 1523 | IF (SodiumConc.eq.100.and.MgConc.eq.1.5) SaltCorr=-0.5449 1524 | IF (SodiumConc.eq.100.and.MgConc.eq.2.0) SaltCorr=-0.5127 1525 | IF (SodiumConc.eq.100.and.MgConc.eq.3.0) SaltCorr=-0.4675 1526 | IF (SodiumConc.eq.100.and.MgConc.eq.4.0) SaltCorr=-0.4320 1527 | IF (SodiumConc.eq.100.and.MgConc.eq.5.0) SaltCorr=-0.4030 1528 | IF (SodiumConc.eq.100.and.MgConc.eq.10.0) SaltCorr=-0.3095 1529 | IF (SodiumConc.eq.100.and.MgConc.eq.20.0) SaltCorr=-0.2096 1530 | IF (SodiumConc.eq.100.and.MgConc.eq.50.0) SaltCorr=-0.0645 1531 | IF (SodiumConc.eq.100.and.MgConc.eq.100.0) SaltCorr=0.0484 1532 | IF (SodiumConc.eq.100.and.MgConc.eq.200.0) SaltCorr=0.1677 1533 | IF (SodiumConc.eq.150.and.MgConc.eq.0.0) SaltCorr=-0.6964 1534 | IF (SodiumConc.eq.150.and.MgConc.eq.0.5) SaltCorr=-0.5514 1535 | IF (SodiumConc.eq.150.and.MgConc.eq.1.0) SaltCorr=-0.5030 1536 | IF (SodiumConc.eq.150.and.MgConc.eq.1.5) SaltCorr=-0.4707 1537 | IF (SodiumConc.eq.150.and.MgConc.eq.2.0) SaltCorr=-0.4449 1538 | IF (SodiumConc.eq.150.and.MgConc.eq.3.0) SaltCorr=-0.4063 1539 | IF (SodiumConc.eq.150.and.MgConc.eq.4.0) SaltCorr=-0.3772 1540 | IF (SodiumConc.eq.150.and.MgConc.eq.5.0) SaltCorr=-0.3514 1541 | IF (SodiumConc.eq.150.and.MgConc.eq.10.0) SaltCorr=-0.2708 1542 | IF (SodiumConc.eq.150.and.MgConc.eq.20.0) SaltCorr=-0.1773 1543 | IF (SodiumConc.eq.150.and.MgConc.eq.50.0) SaltCorr=-0.0451 1544 | IF (SodiumConc.eq.150.and.MgConc.eq.100.0) SaltCorr=0.0645 1545 | IF (SodiumConc.eq.150.and.MgConc.eq.200.0) SaltCorr=0.1805 1546 | IF (SodiumConc.eq.200.and.MgConc.eq.0.0) SaltCorr=-0.5933 1547 | IF (SodiumConc.eq.200.and.MgConc.eq.0.5) SaltCorr=-0.4772 1548 | IF (SodiumConc.eq.200.and.MgConc.eq.1.0) SaltCorr=-0.4385 1549 | IF (SodiumConc.eq.200.and.MgConc.eq.1.5) SaltCorr=-0.4095 1550 | IF (SodiumConc.eq.200.and.MgConc.eq.2.0) SaltCorr=-0.3901 1551 | IF (SodiumConc.eq.200.and.MgConc.eq.3.0) SaltCorr=-0.3547 1552 | IF (SodiumConc.eq.200.and.MgConc.eq.4.0) SaltCorr=-0.3289 1553 | IF (SodiumConc.eq.200.and.MgConc.eq.5.0) SaltCorr=-0.3095 1554 | IF (SodiumConc.eq.200.and.MgConc.eq.10.0) SaltCorr=-0.2354 1555 | IF (SodiumConc.eq.200.and.MgConc.eq.20.0) SaltCorr=-0.1483 1556 | IF (SodiumConc.eq.200.and.MgConc.eq.50.0) SaltCorr=-0.0226 1557 | IF (SodiumConc.eq.200.and.MgConc.eq.100.0) SaltCorr=0.0806 1558 | IF (SodiumConc.eq.200.and.MgConc.eq.200.0) SaltCorr=0.1902 1559 | IF (SodiumConc.eq.250.and.MgConc.eq.0.0) SaltCorr=-0.5094 1560 | IF (SodiumConc.eq.250.and.MgConc.eq.0.5) SaltCorr=-0.4159 1561 | IF (SodiumConc.eq.250.and.MgConc.eq.1.0) SaltCorr=-0.3805 1562 | IF (SodiumConc.eq.250.and.MgConc.eq.1.5) SaltCorr=-0.3579 1563 | IF (SodiumConc.eq.250.and.MgConc.eq.2.0) SaltCorr=-0.3386 1564 | IF (SodiumConc.eq.250.and.MgConc.eq.3.0) SaltCorr=-0.3095 1565 | IF (SodiumConc.eq.250.and.MgConc.eq.4.0) SaltCorr=-0.2870 1566 | IF (SodiumConc.eq.250.and.MgConc.eq.5.0) SaltCorr=-0.2676 1567 | IF (SodiumConc.eq.250.and.MgConc.eq.10.0) SaltCorr=-0.1999 1568 | IF (SodiumConc.eq.250.and.MgConc.eq.20.0) SaltCorr=-0.1225 1569 | IF (SodiumConc.eq.250.and.MgConc.eq.50.0) SaltCorr=-0.0032 1570 | IF (SodiumConc.eq.250.and.MgConc.eq.100.0) SaltCorr=0.0935 1571 | IF (SodiumConc.eq.250.and.MgConc.eq.200.0) SaltCorr=0.1999 1572 | IF (SodiumConc.eq.500.and.MgConc.eq.0.0) SaltCorr=-0.2547 1573 | IF (SodiumConc.eq.500.and.MgConc.eq.0.5) SaltCorr=-0.2031 1574 | IF (SodiumConc.eq.500.and.MgConc.eq.1.0) SaltCorr=-0.1838 1575 | IF (SodiumConc.eq.500.and.MgConc.eq.1.5) SaltCorr=-0.1709 1576 | IF (SodiumConc.eq.500.and.MgConc.eq.2.0) SaltCorr=-0.1612 1577 | IF (SodiumConc.eq.500.and.MgConc.eq.3.0) SaltCorr=-0.1419 1578 | IF (SodiumConc.eq.500.and.MgConc.eq.4.0) SaltCorr=-0.1258 1579 | IF (SodiumConc.eq.500.and.MgConc.eq.5.0) SaltCorr=-0.1129 1580 | IF (SodiumConc.eq.500.and.MgConc.eq.10.0) SaltCorr=-0.0677 1581 | IF (SodiumConc.eq.500.and.MgConc.eq.20.0) SaltCorr=-0.0129 1582 | IF (SodiumConc.eq.500.and.MgConc.eq.50.0) SaltCorr=0.0774 1583 | IF (SodiumConc.eq.500.and.MgConc.eq.100.0) SaltCorr=0.1612 1584 | IF (SodiumConc.eq.500.and.MgConc.eq.200.0) SaltCorr=0.2515 1585 | IF (SodiumConc.eq.750.and.MgConc.eq.0.0) SaltCorr=-0.1064 1586 | IF (SodiumConc.eq.750.and.MgConc.eq.0.5) SaltCorr=-0.0709 1587 | IF (SodiumConc.eq.750.and.MgConc.eq.1.0) SaltCorr=-0.0580 1588 | IF (SodiumConc.eq.750.and.MgConc.eq.1.5) SaltCorr=-0.0484 1589 | IF (SodiumConc.eq.750.and.MgConc.eq.2.0) SaltCorr=-0.0387 1590 | IF (SodiumConc.eq.750.and.MgConc.eq.3.0) SaltCorr=-0.0258 1591 | IF (SodiumConc.eq.750.and.MgConc.eq.4.0) SaltCorr=-0.0161 1592 | IF (SodiumConc.eq.750.and.MgConc.eq.5.0) SaltCorr=-0.0064 1593 | IF (SodiumConc.eq.750.and.MgConc.eq.10.0) SaltCorr=0.0290 1594 | IF (SodiumConc.eq.750.and.MgConc.eq.20.0) SaltCorr=0.0709 1595 | IF (SodiumConc.eq.750.and.MgConc.eq.50.0) SaltCorr=0.1451 1596 | IF (SodiumConc.eq.750.and.MgConc.eq.100.0) SaltCorr=0.2160 1597 | IF (SodiumConc.eq.750.and.MgConc.eq.200.0) SaltCorr=0.2934 1598 | IF (SodiumConc.eq.1000.and.MgConc.eq.0.0) SaltCorr=0.0000 1599 | IF (SodiumConc.eq.1000.and.MgConc.eq.0.5) SaltCorr=0.0258 1600 | IF (SodiumConc.eq.1000.and.MgConc.eq.1.0) SaltCorr=0.0355 1601 | IF (SodiumConc.eq.1000.and.MgConc.eq.1.5) SaltCorr=0.0451 1602 | IF (SodiumConc.eq.1000.and.MgConc.eq.2.0) SaltCorr=0.0516 1603 | IF (SodiumConc.eq.1000.and.MgConc.eq.3.0) SaltCorr=0.0613 1604 | IF (SodiumConc.eq.1000.and.MgConc.eq.4.0) SaltCorr=0.0709 1605 | IF (SodiumConc.eq.1000.and.MgConc.eq.5.0) SaltCorr=0.0774 1606 | IF (SodiumConc.eq.1000.and.MgConc.eq.10.0) SaltCorr=0.1064 1607 | IF (SodiumConc.eq.1000.and.MgConc.eq.20.0) SaltCorr=0.1419 1608 | IF (SodiumConc.eq.1000.and.MgConc.eq.50.0) SaltCorr=0.2031 1609 | IF (SodiumConc.eq.1000.and.MgConc.eq.100.0) SaltCorr=0.2644 1610 | IF (SodiumConc.eq.1000.and.MgConc.eq.200.0) SaltCorr=0.3353 1611 | 1612 | END SUBROUTINE TmCorrect 1613 | -------------------------------------------------------------------------------- /str_func.f90: -------------------------------------------------------------------------------- 1 | CHARACTER(LEN=80) FUNCTION CenterStr(str) 2 | 3 | ! Centers the input string within an 80 character output string. 4 | 5 | USE dnaworks_data 6 | USE dnaworks_test 7 | IMPLICIT NONE 8 | 9 | CHARACTER(LEN=80) :: str ! the string input 10 | INTEGER :: length ! length of the string without trailing blanks 11 | INTEGER :: midpoint ! the midpoint in the string 12 | INTEGER :: i 13 | 14 | IF (TEST3) PRINT *,"CenterStr" !TEST3 15 | 16 | str=TRIM(str) 17 | length=LEN_TRIM(str) 18 | midpoint=(length/2)+1 19 | 20 | CenterStr="" 21 | 22 | DO i=1,length 23 | CenterStr(40-midpoint+i:40-midpoint+i)=str(i:i) 24 | END DO 25 | 26 | CenterStr(1:1) = '|' 27 | CenterStr(80:80) = '|' 28 | 29 | END FUNCTION CenterStr 30 | SUBROUTINE ComplStr(str) 31 | ! 32 | ! Returns the DNA-complement of the section of a string 33 | ! 34 | ! 35 | ! A .................... = T 36 | ! C .................... = G 37 | ! G .................... = C 38 | ! T .................... = A 39 | ! M = A or C ........... = K 40 | ! R = A or G ........... = Y 41 | ! W = A or T ........... = W 42 | ! S = C or G ........... = S 43 | ! Y = C or T ........... = R 44 | ! K = G or T ........... = M 45 | ! V = A or C or G ...... = B 46 | ! H = A or C or T ...... = D 47 | ! D = A or G or T ...... = H 48 | ! B = C or G or T ...... = V 49 | ! N = A or C or G or T . = N 50 | 51 | USE dnaworks_test 52 | IMPLICIT NONE 53 | 54 | CHARACTER(LEN=*) :: str 55 | INTEGER :: i 56 | 57 | IF (TEST3) PRINT *,'ComplStr' 58 | 59 | DO i=1,LEN_TRIM(str) 60 | SELECT CASE(str(i:i)) 61 | CASE('A') 62 | str(i:i)="T" 63 | CASE('T') 64 | str(i:i)="A" 65 | CASE('G') 66 | str(i:i)="C" 67 | CASE('C') 68 | str(i:i)="G" 69 | CASE('M') 70 | str(i:i)="K" 71 | CASE('R') 72 | str(i:i)="Y" 73 | CASE('W') 74 | str(i:i)="W" 75 | CASE('S') 76 | str(i:i)="S" 77 | CASE('Y') 78 | str(i:i)="R" 79 | CASE('K') 80 | str(i:i)="M" 81 | CASE('V') 82 | str(i:i)="B" 83 | CASE('H') 84 | str(i:i)="D" 85 | CASE('D') 86 | str(i:i)="H" 87 | CASE('B') 88 | str(i:i)="V" 89 | CASE('N') 90 | str(i:i)="N" 91 | END SELECT 92 | END DO 93 | 94 | END SUBROUTINE ComplStr 95 | INTEGER FUNCTION NT2Int(nt) 96 | ! 97 | ! Converts a nt into an integer. 98 | 99 | USE dnaworks_test 100 | IMPLICIT NONE 101 | 102 | CHARACTER(LEN=1) :: nt 103 | 104 | IF (TEST3) PRINT *,"NT2Int" !TEST3 105 | 106 | SELECT CASE(nt(1:1)) ! convert sequence to num representation 107 | CASE('A') 108 | NT2Int=-1 109 | CASE('T') 110 | NT2Int=1 111 | CASE('C') 112 | NT2Int=-3 113 | CASE('G') 114 | NT2Int=3 115 | END SELECT 116 | 117 | END FUNCTION NT2Int 118 | SUBROUTINE RevComplStr(str) 119 | ! 120 | ! Returns the reverse complement of a string 121 | 122 | USE dnaworks_test 123 | IMPLICIT NONE 124 | 125 | CHARACTER(LEN=*) :: str 126 | 127 | IF (TEST3) PRINT *,'RevComplStr' 128 | 129 | CALL RevStr(str) 130 | CALL ComplStr(str) 131 | 132 | END SUBROUTINE RevComplStr 133 | SUBROUTINE RevStr(str) 134 | ! 135 | ! Returns the reverse of a string 136 | 137 | USE dnaworks_data 138 | USE dnaworks_test 139 | IMPLICIT NONE 140 | 141 | CHARACTER(LEN=*) :: str 142 | INTEGER :: i,j 143 | 144 | IF (TEST3) PRINT *,'RevStr' 145 | 146 | DO i=1,LEN_TRIM(str) 147 | j=(LEN_TRIM(str))-i+1 148 | SCRATCH(i:i)=str(j:j) 149 | END DO 150 | 151 | str=SCRATCH(1:(LEN_TRIM(str))) 152 | 153 | END SUBROUTINE RevStr 154 | INTEGER FUNCTION StrToInt(str) 155 | ! 156 | ! Converts a string into an integer. All spaces are assigned zero. 157 | 158 | USE dnaworks_test 159 | IMPLICIT NONE 160 | 161 | CHARACTER(LEN=*) :: str 162 | INTEGER :: i,strlen,val,a,j 163 | 164 | IF (TEST3) PRINT *,"StrToInt" 165 | 166 | ! initial values 167 | 168 | j=1 169 | StrToInt=0 170 | 171 | ! find length of string 172 | 173 | strlen=LEN(str) 174 | 175 | ! convert all non-numerical characters to spaces, except for '-' sign 176 | 177 | DO i=1,strlen 178 | a=IACHAR(str(i:i)) 179 | IF ((a.ge.48.and.a.le.57).or.(str(i:i).eq."-")) THEN 180 | str(i:i)=str(i:i) 181 | ELSE 182 | str(i:i)=" " 183 | END IF 184 | END DO 185 | 186 | ! shift string to right 187 | 188 | str=ADJUSTR(str) 189 | 190 | ! convert to integer,going from right to left 191 | 192 | DO i=strlen,1,-1 193 | a=IACHAR(str(i:i)) 194 | IF (a.ge.48.and.a.le.57) THEN 195 | val=a-48 196 | ELSE 197 | val=0 198 | END IF 199 | StrToInt=(val*j)+StrToInt 200 | j=j*10 201 | END DO 202 | 203 | ! find sign 204 | 205 | IF ((INDEX(str,'-')).gt.0) StrToInt=StrToInt*(-1) 206 | 207 | END FUNCTION StrToInt 208 | REAL FUNCTION StrToReal(str) 209 | ! 210 | ! Converts a string into an real number. Blanks are ignored. 211 | 212 | USE dnaworks_test 213 | IMPLICIT NONE 214 | 215 | CHARACTER(LEN=*) :: str 216 | INTEGER :: i,strlen,a 217 | INTEGER :: de 218 | INTEGER,EXTERNAL :: StrToInt 219 | REAL :: front_real,back_real,ex_real,de_real,si 220 | CHARACTER(LEN=1) :: b 221 | 222 | IF (TEST3) PRINT *,"StrToReal" !TEST3 223 | 224 | ! initial values 225 | 226 | StrToReal=0 227 | front_real=0 228 | back_real=0 229 | si=1 230 | 231 | ! convert all useless characters to spaces 232 | 233 | DO i=1,LEN(str) 234 | b=str(i:i) 235 | a=IACHAR(b) 236 | IF ((a.ge.48.and.a.le.57).or.(b.eq."-").or.(b.eq.'.').or.& 237 | (b.eq.'e').or.(b.eq.'E')) THEN 238 | str(i:i)=str(i:i) 239 | ELSE 240 | str(i:i)=" " 241 | END IF 242 | END DO 243 | 244 | ! shift string to left 245 | 246 | str=ADJUSTL(str) 247 | strlen=LEN_TRIM(str) 248 | 249 | ! find sign 250 | 251 | IF (str(1:1).eq.'-') si=-1 252 | 253 | ! find exponents, if there 254 | 255 | IF ((INDEX(str,'e')).gt.0) THEN 256 | ex_real = (REAL(10))**(REAL(StrToInt(str(((INDEX(str,'e'))+1):strlen)))) 257 | strlen = (INDEX(str,'e'))-1 258 | ELSE IF ((INDEX(str,'E')).gt.0) THEN 259 | 260 | ex_real = (REAL(10))**(REAL(StrToInt(str(((INDEX(str,'E'))+1):strlen)))) 261 | strlen = (INDEX(str,'E'))-1 262 | ELSE 263 | ex_real=1 264 | END IF 265 | 266 | ! find decimal position 267 | 268 | de=INDEX(str,'.') 269 | 270 | ! find values 271 | 272 | IF (de.gt.0) THEN 273 | front_real = REAL(ABS(StrToInt(str(1:(de-1))))) 274 | de_real=(REAL(1))/((REAL(10))**(strlen-de)) 275 | back_real = (REAL(ABS(StrToInt(str((de+1):strlen)))))*de_real 276 | ELSE 277 | front_real = REAL(ABS(StrToInt(str(1:strlen)))) 278 | back_real=0 279 | END IF 280 | 281 | StrToReal=(front_real+back_real)*si*ex_real 282 | 283 | END FUNCTION StrToReal 284 | SUBROUTINE ToLowerCase(str) 285 | 286 | ! Converts a string to lower case 287 | 288 | USE dnaworks_test 289 | IMPLICIT NONE 290 | 291 | CHARACTER(LEN=*) :: str 292 | INTEGER :: i,j 293 | 294 | IF (TEST3) PRINT *,'ToLowerCase' 295 | 296 | DO i=1,LEN_TRIM(str) 297 | j = ICHAR(str(i:i)) 298 | IF (65.LE.j.AND.j.LE.90) str(i:i)=CHAR(j+32) 299 | END DO 300 | 301 | END SUBROUTINE ToLowerCase 302 | SUBROUTINE ToUpperCase(str) 303 | 304 | ! Converts a string to upper case 305 | 306 | USE dnaworks_test 307 | IMPLICIT NONE 308 | 309 | CHARACTER(LEN=*) :: str 310 | INTEGER :: i,j 311 | 312 | IF (TEST3) PRINT *,'ToUpperCase' 313 | 314 | DO i=1,LEN_TRIM(str) 315 | j = ICHAR(str(i:i)) 316 | IF (97.LE.j.AND.j.LE.122) str(i:i)=CHAR(j-32) 317 | END DO 318 | 319 | END SUBROUTINE ToUpperCase 320 | -------------------------------------------------------------------------------- /time_func.f90: -------------------------------------------------------------------------------- 1 | CHARACTER(LEN=10) FUNCTION CurrentDate() 2 | ! 3 | ! Returns the date as DD/MM/YYYY in a 10 character string 4 | 5 | USE dnaworks_data 6 | USE dnaworks_test 7 | IMPLICIT NONE 8 | 9 | INTEGER :: values(8) 10 | CHARACTER(LEN=8) :: date 11 | CHARACTER(LEN=10) :: time 12 | CHARACTER(LEN=5) :: zone 13 | CHARACTER(LEN=2) :: MM,DD 14 | CHARACTER(LEN=4) :: YYYY 15 | 16 | IF (TEST3) PRINT *,"CurrentDate" !TEST3 17 | 18 | CALL DATE_AND_TIME(date,time,zone,values) 19 | 20 | ! Capture months 21 | 22 | IF (values(2).GE.10) THEN 23 | WRITE(UNIT=MM,FMT="(i2)") values(2) 24 | ELSE 25 | WRITE(UNIT=MM,FMT="('0',i1)") values(2) 26 | END IF 27 | 28 | ! Capture days 29 | 30 | IF (values(3).GE.10) THEN 31 | WRITE(UNIT=DD,FMT="(i2)") values(3) 32 | ELSE 33 | WRITE(UNIT=DD,FMT="('0',i1)") values(3) 34 | END IF 35 | 36 | ! Capture year 37 | 38 | WRITE(UNIT=YYYY,FMT="(i4)") values(1) 39 | 40 | CurrentDate = MM//'/'//DD//'/'//YYYY 41 | 42 | END FUNCTION CurrentDate 43 | CHARACTER(LEN=6) FUNCTION CurrentDateNice() 44 | ! 45 | ! Returns the date as YYMMDD in an 6 character string 46 | 47 | USE dnaworks_data 48 | USE dnaworks_test 49 | IMPLICIT NONE 50 | 51 | INTEGER :: values(8) 52 | CHARACTER(LEN=8) :: date 53 | CHARACTER(LEN=10) :: time 54 | CHARACTER(LEN=5) :: zone 55 | CHARACTER(LEN=2) :: MM,DD 56 | CHARACTER(LEN=4) :: YYYY 57 | 58 | IF (TEST3) PRINT *,"CurrentDateNice" !TEST3 59 | 60 | CALL DATE_AND_TIME(date,time,zone,values) 61 | 62 | ! Capture months 63 | 64 | IF (values(2).GE.10) THEN 65 | WRITE(UNIT=MM,FMT="(i2)") values(2) 66 | ELSE 67 | WRITE(UNIT=MM,FMT="('0',i1)") values(2) 68 | END IF 69 | 70 | ! Capture days 71 | 72 | IF (values(3).GE.10) THEN 73 | WRITE(UNIT=DD,FMT="(i2)") values(3) 74 | ELSE 75 | WRITE(UNIT=DD,FMT="('0',i1)") values(3) 76 | END IF 77 | 78 | ! Capture year 79 | 80 | WRITE(UNIT=YYYY,FMT="(i4)") values(1) 81 | 82 | CurrentDateNice = YYYY(3:4)//MM//DD 83 | 84 | END FUNCTION CurrentDateNice 85 | CHARACTER(LEN=8) FUNCTION CurrentTime() 86 | ! 87 | ! Returns the time as HH:MM:SS in a 8 character string 88 | 89 | USE dnaworks_data 90 | USE dnaworks_test 91 | IMPLICIT NONE 92 | 93 | INTEGER :: values(8) 94 | CHARACTER(LEN=8) :: date 95 | CHARACTER(LEN=10) :: time 96 | CHARACTER(LEN=5) :: zone 97 | CHARACTER(LEN=2) :: HH,MM,SS 98 | 99 | IF (TEST3) PRINT *,"CurrentTime" !TEST3 100 | 101 | CALL DATE_AND_TIME(date,time,zone,values) 102 | 103 | ! Capture hours 104 | 105 | IF (values(5).GE.10) THEN 106 | WRITE(UNIT=HH,FMT="(i2)") values(5) 107 | ELSE IF (values(5).EQ.0) THEN 108 | WRITE(UNIT=HH,FMT="('00')") 109 | ELSE 110 | WRITE(UNIT=HH,FMT="('0',i1)") values(5) 111 | END IF 112 | 113 | ! Capture minutes 114 | 115 | IF (values(6).GE.10) THEN 116 | WRITE(UNIT=MM,FMT="(i2)") values(6) 117 | ELSE IF (values(6).EQ.0) THEN 118 | WRITE(UNIT=MM,FMT="('00')") 119 | ELSE 120 | WRITE(UNIT=MM,FMT="('0',i1)") values(6) 121 | END IF 122 | 123 | ! Capture seconds 124 | 125 | IF (values(7).GE.10) THEN 126 | WRITE(UNIT=SS,FMT="(i2)") values(7) 127 | ELSE IF (values(7).EQ.0) THEN 128 | WRITE(UNIT=SS,FMT="('00')") 129 | ELSE 130 | WRITE(UNIT=SS,FMT="('0',i1)") values(7) 131 | END IF 132 | 133 | CurrentTime = HH//':'//MM//':'//SS 134 | 135 | END FUNCTION CurrentTime 136 | INTEGER FUNCTION CurrentTimeSeconds() 137 | ! 138 | ! Returns the time in seconds since Wed Dec 31 19:00:00 1969, adjusting for 139 | ! leap years. However, there is NO adjustment for daylight saving time 140 | 141 | USE dnaworks_test 142 | IMPLICIT NONE 143 | 144 | INTEGER :: values(8),x 145 | CHARACTER(LEN=8) :: date 146 | CHARACTER(LEN=10) :: time 147 | CHARACTER(LEN=5) :: zone 148 | LOGICAL :: leap_year 149 | 150 | IF (TEST3) PRINT *,"CurrentTimeSeconds" !TEST3 151 | ! PRINT *,"CurrentTimeSeconds" !TEST3 152 | 153 | leap_year=.FALSE. 154 | 155 | ! x = number of seconds from 12/31/1969 19:00:00 to 1/1/2001 00:00:00 156 | 157 | x=978325200 158 | 159 | ! find current date and time 160 | 161 | CALL DATE_AND_TIME(date,time,zone,values) 162 | 163 | ! is this year a leap year? 164 | 165 | IF (MOD(values(1),4).eq.0) leap_year=.TRUE. 166 | 167 | ! find yearly sums 168 | 169 | x=x+((values(1)-2001)*(365*86400))+(((values(1)-2001)/4)*86400) 170 | 171 | ! find monthly sums 172 | 173 | SELECT CASE(values(2)) 174 | CASE(2) 175 | x=x+(31*86400) 176 | CASE(3) 177 | x=x+(59*86400) 178 | CASE(4) 179 | x=x+(90*86400) 180 | CASE(5) 181 | x=x+(120*86400) 182 | CASE(6) 183 | x=x+(151*86400) 184 | CASE(7) 185 | x=x+(181*86400) 186 | CASE(8) 187 | x=x+(212*86400) 188 | CASE(9) 189 | x=x+(243*86400) 190 | CASE(10) 191 | x=x+(273*86400) 192 | CASE(11) 193 | x=x+(304*86400) 194 | CASE(12) 195 | x=x+(334*86400) 196 | END SELECT 197 | 198 | ! correct for leap day in February 199 | 200 | IF ((leap_year).and.(values(2).gt.2)) x=x+86400 201 | 202 | ! are we in daylight savings time? 203 | 204 | ! spring=7-(MOD((2800+values(1)-2474+(INT(values(1)/4))),7)) 205 | ! fall=spring+20 206 | ! IF (spring.le.4) fall=fall+7 207 | ! 208 | ! IF ((values(2).ge.4).and.(values(2).le.10).and.& 209 | ! &(values(3).ge.spring).and.(values(3).le.fall).and.& 210 | ! &(values(5).ge.2)) x=x-3600 211 | 212 | ! find final sum 213 | 214 | CurrentTimeSeconds=x+((values(3)-1)*86400)+(values(5)*3600)+(values(6)*60)+values(7) 215 | ! PRINT *,CurrentTimeSeconds !TEST3 216 | 217 | END FUNCTION CurrentTimeSeconds 218 | --------------------------------------------------------------------------------