├── DNAWORKS.inp
├── LICENSE
├── Makefile
├── README.md
├── control_func.f90
├── dnaworks.f90
├── dnaworks_data.f90
├── dnaworks_test.f90
├── email_func.f90
├── encoding.f90
├── input.f90
├── misc_func.f90
├── mutate.f90
├── output.f90
├── overlaps.f90
├── scores.f90
├── str_func.f90
└── time_func.f90
/DNAWORKS.inp:
--------------------------------------------------------------------------------
1 | # DNAWORKS.inp sample
2 | # David Hoover, 2010-11-09
3 | #
4 | # Directives must be flat against the left margin
5 | #
6 | # Comments demarcated by '#'. All text following # is ignored.
7 | #
8 | # $I = integer
9 | # $R = real, floating point number
10 | # $S = string (must be in double quotes)
11 | # [ ] = optional
12 | # | = exclusive conditional
13 | #
14 | # title $S
15 | # title "" # default
16 | # TITLE "mutant1"
17 | # TITLE "test2"
18 | # TITLE "mutant2"
19 |
20 | # timelimit 0 # seconds until giving up, 0 means wait forever
21 |
22 | # email $S
23 | # EMAIl "webtools@helix.nih.gov"
24 |
25 | # melting low $I [ high $I ] [ tolerance $I ]
26 | # melting low 62 # default
27 | melting low 75 # default
28 |
29 | # length low $I [ high $I ] [ random ]
30 | # length low 40 # default
31 | length low 180 # default
32 |
33 | # frequency [ threshold $I ] [ random ] [ strict ] [ scored ]
34 | # frequency threshold 10 # default
35 |
36 | # concentration [ oligo $R ] [ sodium $R ] [ magnesium $R ]
37 | # concentration oligo 1E-7 sodium 0.05 magnesium 0.002 # default
38 |
39 | # solutions $I
40 | # solutions 1 # default
41 |
42 | # repeat $I
43 | # repeat 8 # default
44 |
45 | # misprime $I [ tip $I ] [ max $I ]
46 | # misprime 18 tip 6 max 8 # default
47 |
48 | # weight [ twt #R ] [ cwt #R ] [ rwt #R ] [ mwt #R ] [ gwt #R ] [ awt #R ] [ lwt #R ] [ pwt #R ] [ fwt #R ]
49 | # weight twt 1.0 cwt 1.0 rwt 1.0 mwt 1.0 gwt 1.0 awt 1.0 lwt 1.0 pwt 1.0 fwt 1.0 # default
50 |
51 | # tbio
52 | # nogaps
53 | # logfile $S
54 | # logfile "LOGFILE.txt" # default
55 | # LOGFILE "MyOutput.txt"
56 | # LOGFILE "mutant1.out"
57 |
58 | # previous $I [ $S ]
59 | # previous 1 "LOGFILE.txt" # default
60 | #
61 | # Mutant run:
62 | # PREVious 1
63 | # PREVious 1 "mutant1.out"
64 | #
65 |
66 | # pattern
67 | # AflII CTTAAG
68 | # BamHI GGATCC
69 | # //
70 |
71 | codon ecoli2
72 |
73 | # protein #reverse #gapfix
74 | protein
75 | pattdkslkd iliqgtknlp ileiasnnqp qnvdsvcsgt lqktedvhlm
76 | gftlsgqkva dspleaskrw afrtgvppkn veytegeeak tcynisvtdp
77 | //
78 |
79 | # nucleotide #reverse #gapfix
80 | # gagctcggat ccactactcg acccacgcgt ccgcccacgc gtccggccag gacctctgtg
81 | # aaccggtcgg ggcgggggcc gcctggccgg gagtctgctc ggcggtgggt ggccgaggaa
82 | # gggagagaac gatcgcggag cagggcgccc gaactccggg cgccgcgcca tgcgccgggc
83 | # cagccgagac tacggcaagt acctgcgcag ctcggaggag atgggcagcg gccccggcgt
84 | # cccacacgag ggtccgctgc accccgcgcc ttctgcaccg gctccggcgc cgccacccgc
85 | # cgcctcccgc tccatgttcc tggccctcct ggggctggga ctgggccagg tggtctgcag
86 | # catcgctctg ttcctgtact ttcgagcgca gatggatcct aacagaatat cagaagacag
87 | # cactcactgc ttttatagaa tcctgagact ccatgaaaac gcaggtttgc aggactcgac
88 | # tctggagagt gaagacacac tacctgactc ctgcaggagg atgaaacaag cctttcaggg
89 | # ggccgtgcag aaggaactgc aacacattgt ggggccacag cgcttctcag gagctccagc
90 | # tatgatggaa ggctcatggt tggatgtggc ccagcgaggc aagcctgagg cccagccatt
91 | # tgcacacctc accatcaatg ctgccagcat cccatcgggt tcccataaag tcactctgtc
92 | # ctcttggtac cacgatcgag gctgggccaa gatctctaac atgacgttaa gcaacggaaa
93 | # actaagggtt aaccaagatg gcttctatta cctgtacgcc aacatttgct ttcggcatca
94 | # tgaaacatcg ggaagcgtac ctacagacta tcttcagctg atggtgtatg tcgttaaaac
95 | # cagcatcaaa atcccaagtt ctcataacct gatgaaagga gggagcacga aaaactggtc
96 | # gggcaattct gaattccact tttattccat aaatgttggg ggatttttca agctccgagc
97 | # tggtgaagaa attagcattc aggtgtccaa cccttccctg ctggatccgg atcaagatgc
98 | # gacgtacttt ggggctttca aagttcagga catagactga gactcatttc gtggaacatt
99 | # //
100 | #
101 | #
102 | #
103 | #-------------------------------------------------------------------------------
104 | # OTHER EXAMPLES:
105 | #-------------------------------------------------------------------------------
106 |
107 | # Nucleotide examples:
108 | #
109 | # NUCLeotide
110 | # CCATG
111 | # //
112 | #
113 | # NUCLeotide
114 | # GGGTTC
115 | # //
116 | #
117 | # NUCLeotide
118 | # 1 CCATGGCGGCTGGTCAGGCGTTCCGTAAATTCCTGCCGCTGTTCGACCGTGTTCTCGTGG
119 | # 61 AACGCTCTGAAGTTGAAACC
120 | # //
121 | #
122 | # NUCLeotide
123 | # RRW
124 | # //
125 | #
126 | # NUCLeotide REVERSE
127 | # TCTGCGGGTGGTATCGTGCTGACCGGTTCTGCGGCTG
128 | # 121 CGAAAGTGCTGCAGGCGACCGTTGTTGCGGTTGGTTCTGGTTCTAAAGGTAAAGGTGGT
129 | # //
130 | #
131 | # NUCLeotide GAPFIX
132 | # NNN
133 | # //
134 | #
135 | # NUCLeotide
136 | # ATCCAGCCGGTTTCTGTTAAGGTTGGTGACAAAGTTCTGCTGCCGGAATACGGCGGTA
137 | # 241 CCAAAGTTGTTCTGGACGACAAAGACTACTTCCTGTTCCGTGACGGTGACATCCTGGGTA
138 | # 301 AGTACGTTGACTAAGGGTTC
139 | # //
140 | #
141 | # Protein examples:
142 | #
143 | # PROTein
144 | # AAGQAFRKFLPLFDRVLVERSEVET
145 | # //
146 | #
147 | # PROTein GAPFIX
148 | # K
149 | # //
150 | #
151 | # PROTein
152 | # SAGGIVLTGSAAAKVLQATVVAVGSGSKGKGG
153 | # //
154 | #
155 | # PROTein GAPFIX
156 | # E
157 | # //
158 | #
159 | # PROTein
160 | # IQPVSVKVGDKVLLPEYGGTKVVLDDKDYFLFRDGDILGKYVDX
161 | # //
162 | #
163 | # Pattern examples:
164 | #
165 | # PATTern
166 | # EcoRI GAATTC
167 | # PstI CtgcaG
168 | # BamHI GGATCC
169 | # KpnI GGTACC
170 | # NdeI CATATG
171 | # PvuII CAGCTG
172 | # SwaI ATTTAAAT
173 | # FseI GGCCGGCC
174 | # NotI GCGGCCGC
175 | # NcoI CCATGG
176 | # silly RWGGTcGRY
177 | # //
178 | #
179 | # Codon Frequency Tables:
180 | #
181 | # CODOn S. cerevesiae
182 | #
183 | # CODOn E. coli
184 | #
185 | # CODOn ecoli2
186 | #
187 | # CODOn
188 | #Gly GGG 40359.00 11.39 0.16
189 | #Gly GGA 34894.00 9.85 0.13
190 | #Gly GGT 89915.00 25.37 0.35
191 | #Gly GGC 94608.00 26.70 0.36
192 | #Glu GAG 66665.00 18.81 0.33
193 | #Glu GAA 137748.00 38.87 0.67
194 | #Asp GAT 116164.00 32.78 0.63
195 | #Asp GAC 67865.00 19.15 0.37
196 | #Val GTG 85263.00 24.06 0.34
197 | #Val GTA 41283.00 11.65 0.17
198 | #Val GTT 70627.00 19.93 0.29
199 | #Val GTC 50417.00 14.23 0.20
200 | #Ala GCG 104293.00 29.43 0.32
201 | #Ala GCA 75329.00 21.26 0.23
202 | #Ala GCT 60787.00 17.15 0.19
203 | #Ala GCC 85138.00 24.03 0.26
204 | #Arg AGG 7966.00 2.25 0.04
205 | #Arg AGA 13784.00 3.89 0.07
206 | #Ser AGT 35966.00 10.15 0.16
207 | #Ser AGC 53286.00 15.04 0.24
208 | #Lys AAG 45133.00 12.74 0.26
209 | #Lys AAA 125351.00 35.37 0.74
210 | #Asn AAT 75086.00 21.19 0.50
211 | #Asn AAC 75334.00 21.26 0.50
212 | #Met ATG 92952.00 26.23 1.00
213 | #Ile ATA 25982.00 7.33 0.12
214 | #Ile ATT 105218.00 29.69 0.49
215 | #Ile ATC 83118.00 23.46 0.39
216 | #Thr ACG 48560.00 13.70 0.25
217 | #Thr ACA 34483.00 9.73 0.17
218 | #Thr ACT 37430.00 10.56 0.19
219 | #Thr ACC 77023.00 21.74 0.39
220 | #Trp TGG 48949.00 13.81 1.00
221 | #End TGA 3616.00 1.02 0.31
222 | #Cys TGT 18601.00 5.25 0.46
223 | #Cys TGC 21434.00 6.05 0.54
224 | #End TAG 978.00 0.28 0.08
225 | #End TAA 7024.00 1.98 0.60
226 | #Tyr TAT 62750.00 17.71 0.59
227 | #Tyr TAC 43034.00 12.14 0.41
228 | #Leu TTG 45581.00 12.86 0.13
229 | #Leu TTA 51320.00 14.48 0.14
230 | #Phe TTT 78743.00 22.22 0.58
231 | #Phe TTC 56591.00 15.97 0.42
232 | #Ser TCG 29993.00 8.46 0.13
233 | #Ser TCA 32814.00 9.26 0.15
234 | #Ser TCT 37586.00 10.61 0.17
235 | #Ser TCC 32586.00 9.20 0.15
236 | #Arg CGG 21391.00 6.04 0.11
237 | #Arg CGA 13645.00 3.85 0.07
238 | #Arg CGT 70009.00 19.76 0.36
239 | #Arg CGC 68569.00 19.35 0.35
240 | #Gln CAG 100346.00 28.32 0.66
241 | #Gln CAA 51275.00 14.47 0.34
242 | #His CAT 44633.00 12.60 0.58
243 | #His CAC 32678.00 9.22 0.42
244 | #Leu CTG 168885.00 47.66 0.47
245 | #Leu CTA 15275.00 4.31 0.04
246 | #Leu CTT 42704.00 12.05 0.12
247 | #Leu CTC 35873.00 10.12 0.10
248 | #Pro CCG 72450.00 20.44 0.49
249 | #Pro CCA 30515.00 8.61 0.21
250 | #Pro CCT 26805.00 7.56 0.18
251 | #Pro CCC 19008.00 5.36 0.13
252 | #//
253 |
254 |
255 | #-------------------------------------------------------------------------------
256 | # MORE EXAMPLES TO TRY:
257 | #-------------------------------------------------------------------------------
258 |
259 | #NUCLeotide
260 | # 1 gagctcggat ccactactcg acccacgcgt ccgcccacgc gtccggccag gacctctgtg
261 | # 61 aaccggtcgg ggcgggggcc gcctggccgg gagtctgctc ggcggtgggt ggccgaggaa
262 | # 121 gggagagaac gatcgcggag cagggcgccc gaactccggg cgccgcgcca tgcgccgggc
263 | # 181 cagccgagac tacggcaagt acctgcgcag ctcggaggag atgggcagcg gccccggcgt
264 | # 241 cccacacgag ggtccgctgc accccgcgcc ttctgcaccg gctccggcgc cgccacccgc
265 | # 301 cgcctcccgc tccatgttcc tggccctcct ggggctggga ctgggccagg tggtctgcag
266 | # 361 catcgctctg ttcctgtact ttcgagcgca gatggatcct aacagaatat cagaagacag
267 | # 421 cactcactgc ttttatagaa tcctgagact ccatgaaaac gcaggtttgc aggactcgac
268 | # 481 tctggagagt gaagacacac tacctgactc ctgcaggagg atgaaacaag cctttcaggg
269 | # 541 ggccgtgcag aaggaactgc aacacattgt ggggccacag cgcttctcag gagctccagc
270 | # 601 tatgatggaa ggctcatggt tggatgtggc ccagcgaggc aagcctgagg cccagccatt
271 | # 661 tgcacacctc accatcaatg ctgccagcat cccatcgggt tcccataaag tcactctgtc
272 | # 721 ctcttggtac cacgatcgag gctgggccaa gatctctaac atgacgttaa gcaacggaaa
273 | # 781 actaagggtt aaccaagatg gcttctatta cctgtacgcc aacatttgct ttcggcatca
274 | # 841 tgaaacatcg ggaagcgtac ctacagacta tcttcagctg atggtgtatg tcgttaaaac
275 | # 901 cagcatcaaa atcccaagtt ctcataacct gatgaaagga gggagcacga aaaactggtc
276 | # 961 gggcaattct gaattccact tttattccat aaatgttggg ggatttttca agctccgagc
277 | # 1021 tggtgaagaa attagcattc aggtgtccaa cccttccctg ctggatccgg atcaagatgc
278 | # 1081 gacgtacttt ggggctttca aagttcagga catagactga gactcatttc gtggaacatt
279 | #//
280 |
281 |
282 |
283 | #NUCLeotide REVERSE
284 | # 361 tttcccagtc acgacgttgt aaaacgacgg ccagtgccaa gcttgcatgc ctgcaggtcg
285 | # 421 actctagagg atccccgggt accgagctcg aattcgtaat catggtcata gctgtttcct
286 | #//
287 |
288 | #PROTein gapfix
289 | #RRRRR
290 | #//
291 |
292 | #NUCLeotide
293 | #GTAGCGACTAGCAT
294 | #//
295 |
296 | #NUCLeotide
297 | # 361 tttcccagtc acgacgttgt aaaacgacgg ccagtgccaa gcttgcatgc ctgcaggtcg
298 | # 421 actctagagg atccccgggt accgagctcg aattcgtaat catggtcata gctgtttcct
299 | #//
300 |
301 | #PROTein REVERSE (hBD2)
302 | # GIGDPVTCLDCGAISHPVFCPDRYKQIGTCGLPGTKCCKKPXX
303 | #//
304 |
305 | #NUCLeotide gapfix
306 | #TGATGATTATTA
307 | #//
308 |
309 | #PROTein
310 | # KVFGDCELAAAMKRHGLDNYRGYSLGNWVCAAKFESNFNTQATNRNTDGSTDYGILQINS
311 | # RWWCNDGRTPGSRNLCNIPCSALLSSDITASVNCAKKIVSDGNGMNAWVAWRNRCKGTDV
312 | # XX
313 | #//
314 |
315 | #NUCLeotide gapfix
316 | #TAGAAAACGC
317 | #//
318 |
319 |
320 | #PROTein (GFP)
321 | # 1 mskgeelftg vvpilveldg dvnghkfsvs gegegdatyg kltlkfictt gklpvpwptl
322 | # 61 vttfsygvqc fsrypdhmkq hdffksampe gyvqertiff kddgnyktra evkfegdtlv
323 | #121 nrielkgidf kedgnilghk leynynshnv yimadkqkng ikvnfkirhn iedgsvqlad
324 | #281 hyqqntpigd gpvllpdnhy lstqsalskd pnekrdhmvl lefvtaagit hgmdelyk
325 | #//
326 |
327 | #NUCLeotide (GFP)
328 | # 1 GGGGGGGGGGGTGAAGAACTGTTCACCGGCGTTGTTCCGATCCTGGTTGAACTGGATGGT
329 | # 61 GACGTGAATGGTCACAAATTCTCTGTTTCTGGTGAGGGTGAAGGCGACGCGACCTACGGC
330 | # 121 AAACTCACCCTGAAATTCATCTGCACCACCGGTAAACTGCCGGTTCCGTGGCCGACCCTG
331 | # 181 GTTACCACCTTCTCTTACGGTGTTCAGTGTTTCTCTCGTTATCCGGACCACATGAAACAG
332 | # 241 CACGATTTTTTCAAATCTGCGATGCCGGAAGGTTACGTTCAGGAACGTACCATCTTCTTC
333 | # 301 AAGGACGACGGCAACTATAAAACCCGTGCGGAAGTTAAATTCGAAGGTGACACCCTCGTG
334 | # 361 AACCGTATCGAACTGAAAGGTATCGACTTCAAAGAAGACGGTAATATCCTGGGCCACAAA
335 | # 421 CTCGAATACAACTACAACTCCCACAACGTTTACATTATGGCGGACAAGCAAAAGAACGGT
336 | # 481 ATCAAAGTGAACTTCAAGATCCGCCACAACATCGAGGACGGTTCTGTTCAGCTCGCGGAT
337 | # 541 CACTACCAACAGAATACCCCAATCGGCGACGGTCCGGTTCTCCTGCCGGACAACCACTAT
338 | # 601 CTGTCTACCCAGTCTGCGCTGTCTAAGGACCCGAACGAAAAACGCGATCATATGGTGCTG
339 | # 661 CTGGAATTCGTTACCGCGGCTGGTATTACTCACGGTATGGACGAACTGTACAAA
340 | #//
341 |
342 | #PROTein (Ovalbumin)
343 | # 1 gsigaasmef cfdvfkelkv hhanenifyc piaimsalam vylgakdstr tqinkvvrfd
344 | # 61 klpgfgdxie aqcgtsvnvh sslrdilnqi tkpndvysfs lasrlyaeer ypilpeylqc
345 | #121 vkelyrggle pinfqtaadq arelinswve sqtngiirnv lqpxsvdsqt amvlvnaivf
346 | #181 kglwekafkd edtqampfrv teqeskpvqm myqiglfrva smasekmkil elpfaxgtms
347 | #241 mlvllpdevs gleqlesiin fekltewtss nvmeerkikv ylprmkmeek ynltsvlmam
348 | #301 gitdvfsssa nlsgissaex lkisqavhaa haeineagre vvgxaeagvd aasvseefra
349 | #361 dhpflfcikh iatnavlffg rcvsp
350 | #//
351 |
352 | #PROTein (Human Asparaginase) REVERSE
353 | # 1 MAHHHHHHAR AVGPERRLLA VYTGGTIGMR SELGVLVPGT GLAAILRTLP MFHDEEHARA
354 | # 61 RGLSEDTLVL PPDSRNQRIL YTVLECQPLF DSSDMTIAEW VRVAQTIKRH YEQYHGFVVI
355 | #121 HGTDTMAFAA SMLSFMLENL QKTVILTGAQ VPIHALWSDG RENLLGALLM AGQYVIPEVC
356 | #181 LFFQNQLFRG NRATKVDARR FAAFCSPNLL PLATVGADIT INRELVRKVD GKAGLVVHSS
357 | #241 MEQDVGLLRL YPGIPAALVR AFLQPPLKGV VMETFGSGNG PTKPDLLQEL RVATERGLVI
358 | #301 VNCTHCLQGA VTTDYAAGMA MAGAGVISGF DMTSEAALAK LSYVLGQPGL SLDVRKELLT
359 | #361 KDLRGEMTPP SVEERRPSLQ GNTLGGGVSW LLSLSGSQEA DALRNALVPS LACAAAHAGD
360 | #421 VEALQALVEL GSDLGLVDFN GQTPLHAAAR GGHTEAVTML LQRGVDVNTR DTDGFSPLLL
361 | #481 AVRGRHPGVI GLLREAGASL STQELEEAGT ELCRLAYRAD LEGLQVWWQA GADLGQPGYD
362 | #541 GHSALHVAEA AGNLAVVAFL QSLEGAVGAQ APCPEVLPGV X
363 | #//
364 |
365 | #PROTein (lysine ketoglutarate reductase/saccharopine dehydrogenase)
366 | # 1 MLQVHRTGLG RLGVSLSKGL HHKAVLAVRR EDVNAWERRA PLAPKHIKGI TNLGYKVLIQ
367 | # 61 PSNRRAIHDK DYVKAGGILQ EDISEACLIL GVKRPPEEKL MSRKTYAFFS HTIKAQEANM
368 | #121 GLLDEILKQE IRLIDYEKMV DHRGVRVVAF GQWAGVAGMI NILHGMGLRL LALGHHTPFM
369 | #181 HIGMAHNYRN SSQAVQAVRD AGYEISLGLM PKSIGPLTFV FTGTGNVSKG AQAIFNELPC
370 | #241 EYVEPHELKE VSQTGDLRKV YGTVLSRHHH LVRKTDAVYD PAEYDKHPER YISRFNTDIA
371 | #301 PYTTCLINGI YWEQNTPRLL TRQDAQSLLA PGKFSPAGVE GCPALPHKLV AICDISADTG
372 | #361 GSIEFMTECT TIEHPFCMYD ADQHIIHDSV EGSGILMCSI DNLPAQLPIE ATECFGDMLY
373 | #421 PYVEEMILSD ATQPLESQNF SPVVRDAVIT SNGTLPDKYK YIQTLRESRE RAQSLSMGTR
374 | #481 RKVLVLGSGY ISEPVLEYLS RDGNIEITVG SDMKNQIEQL GKKYNINPVS MDICKQEEKL
375 | #541 GFLVAKQDLV ISLLPYVLHP LVAKACITNK VNMVTASYIT PALKELEKSV EDAGITIIGE
376 | #601 LGLDPGLDHM LAMETIDKAK EVGATIESYI SYCGGLPAPE HSNNPLRYKF SWSPVGVLMN
377 | #661 VMQSATYLLD GKVVNVAGGI SFLDAVTSMD FFPGLNLEGY PNRDSTKYAE IYGISSAHTL
378 | #721 LRGTLRYKGY MKALNGFVKL GLINREALPA FRPEANPLTW KQLLCDLVGI SPSSEHDVLK
379 | #781 EAVLKKLGGD NTQLEAAEWL GLLGDEQVPQ AESILDALSK HLVMKLSYGP EEKDMIVMRD
380 | #841 SFGIRHPSGH LEHKTIDLVA YGDINGFSAM AKTVGLPTAM AAKMLLDGEI GAKGLMGPFS
381 | #901 KEIYGPILER IKAEGIIYTT QSTIKPX
382 | #//
383 |
384 | #NUCLeotide
385 | #GGGG
386 | #//
387 |
388 | #NUCLeotide (pUC18)
389 | # 1 tcgcgcgttt cggtgatgac ggtgaaaacc tctgacacat gcagctcccg gagacggtca
390 | # 61 cagcttgtct gtaagcggat gccgggagca gacaagcccg tcagggcgcg tcagcgggtg
391 | # 121 ttggcgggtg tcggggctgg cttaactatg cggcatcaga gcagattgta ctgagagtgc
392 | # 181 accatatgcg gtgtgaaata ccgcacagat gcgtaaggag aaaataccgc atcaggcgcc
393 | # 241 attcgccatt caggctgcgc aactgttggg aagggcgatc ggtgcgggcc tcttcgctat
394 | # 301 tacgccagct ggcgaaaggg ggatgtgctg caaggcgatt aagttgggta acgccagggt
395 | # 361 tttcccagtc acgacgttgt aaaacgacgg ccagtgccaa gcttgcatgc ctgcaggtcg
396 | # 421 actctagagg atccccgggt accgagctcg aattcgtaat catggtcata gctgtttcct
397 | # 481 gtgtgaaatt gttatccgct cacaattcca cacaacatac gagccggaag cataaagtgt
398 | # 541 aaagcctggg gtgcctaatg agtgagctaa ctcacattaa ttgcgttgcg ctcactgccc
399 | # 601 gctttccagt cgggaaacct gtcgtgccag ctgcattaat gaatcggcca acgcgcgggg
400 | # 661 agaggcggtt tgcgtattgg gcgctcttcc gcttcctcgc tcactgactc gctgcgctcg
401 | # 721 gtcgttcggc tgcggcgagc ggtatcagct cactcaaagg cggtaatacg gttatccaca
402 | # 781 gaatcagggg ataacgcagg aaagaacatg tgagcaaaag gccagcaaaa ggccaggaac
403 | # 841 cgtaaaaagg ccgcgttgct ggcgtttttc cataggctcc gcccccctga cgagcatcac
404 | # 901 aaaaatcgac gctcaagtca gaggtggcga aacccgacag gactataaag ataccaggcg
405 | # 961 tttccccctg gaagctccct cgtgcgctct cctgttccga ccctgccgct taccggatac
406 | # 1021 ctgtccgcct ttctcccttc gggaagcgtg gcgctttctc atagctcacg ctgtaggtat
407 | # 1081 ctcagttcgg tgtaggtcgt tcgctccaag ctgggctgtg tgcacgaacc ccccgttcag
408 | # 1141 cccgaccgct gcgccttatc cggtaactat cgtcttgagt ccaacccggt aagacacgac
409 | # 1201 ttatcgccac tggcagcagc cactggtaac aggattagca gagcgaggta tgtaggcggt
410 | # 1261 gctacagagt tcttgaagtg gtggcctaac tacggctaca ctagaaggac agtatttggt
411 | # 1321 atctgcgctc tgctgaagcc agttaccttc ggaaaaagag ttggtagctc ttgatccggc
412 | # 1381 aaacaaacca ccgctggtag cggtggtttt tttgtttgca agcagcagat tacgcgcaga
413 | # 1441 aaaaaaggat ctcaagaaga tcctttgatc ttttctacgg ggtctgacgc tcagtggaac
414 | # 1501 gaaaactcac gttaagggat tttggtcatg agattatcaa aaaggatctt cacctagatc
415 | # 1561 cttttaaatt aaaaatgaag ttttaaatca atctaaagta tatatgagta aacttggtct
416 | # 1621 gacagttacc aatgcttaat cagtgaggca cctatctcag cgatctgtct atttcgttca
417 | # 1681 tccatagttg cctgactccc cgtcgtgtag ataactacga tacgggaggg cttaccatct
418 | # 1741 ggccccagtg ctgcaatgat accgcgagac ccacgctcac cggctccaga tttatcagca
419 | # 1801 ataaaccagc cagccggaag ggccgagcgc agaagtggtc ctgcaacttt atccgcctcc
420 | # 1861 atccagtcta ttaattgttg ccgggaagct agagtaagta gttcgccagt taatagtttg
421 | # 1921 cgcaacgttg ttgccattgc tacaggcatc gtggtgtcac gctcgtcgtt tggtatggct
422 | # 1981 tcattcagct ccggttccca acgatcaagg cgagttacat gatcccccat gttgtgcaaa
423 | # 2041 aaagcggtta gctccttcgg tcctccgatc gttgtcagaa gtaagttggc cgcagtgtta
424 | # 2101 tcactcatgg ttatggcagc actgcataat tctcttactg tcatgccatc cgtaagatgc
425 | # 2161 ttttctgtga ctggtgagta ctcaaccaag tcattctgag aatagtgtat gcggcgaccg
426 | # 2221 agttgctctt gcccggcgtc aatacgggat aataccgcgc cacatagcag aactttaaaa
427 | # 2281 gtgctcatca ttggaaaacg ttcttcgggg cgaaaactct caaggatctt accgctgttg
428 | # 2341 agatccagtt cgatgtaacc cactcgtgca cccaactgat cttcagcatc ttttactttc
429 | # 2401 accagcgttt ctgggtgagc aaaaacagga aggcaaaatg ccgcaaaaaa gggaataagg
430 | # 2461 gcgacacgga aatgttgaat actcatactc ttcctttttc aatattattg aagcatttat
431 | # 2521 cagggttatt gtctcatgag cggatacata tttgaatgta tttagaaaaa taaacaaata
432 | # 2581 ggggttccgc gcacatttcc ccgaaaagtg ccacctgacg tctaagaaac cattattatc
433 | # 2641 atgacattaa cctataaaaa taggcgtatc acgaggccct ttcgtc
434 | #//
435 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | GNU GENERAL PUBLIC LICENSE
2 | Version 2, June 1991
3 |
4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
6 | Everyone is permitted to copy and distribute verbatim copies
7 | of this license document, but changing it is not allowed.
8 |
9 | Preamble
10 |
11 | The licenses for most software are designed to take away your
12 | freedom to share and change it. By contrast, the GNU General Public
13 | License is intended to guarantee your freedom to share and change free
14 | software--to make sure the software is free for all its users. This
15 | General Public License applies to most of the Free Software
16 | Foundation's software and to any other program whose authors commit to
17 | using it. (Some other Free Software Foundation software is covered by
18 | the GNU Lesser General Public License instead.) You can apply it to
19 | your programs, too.
20 |
21 | When we speak of free software, we are referring to freedom, not
22 | price. Our General Public Licenses are designed to make sure that you
23 | have the freedom to distribute copies of free software (and charge for
24 | this service if you wish), that you receive source code or can get it
25 | if you want it, that you can change the software or use pieces of it
26 | in new free programs; and that you know you can do these things.
27 |
28 | To protect your rights, we need to make restrictions that forbid
29 | anyone to deny you these rights or to ask you to surrender the rights.
30 | These restrictions translate to certain responsibilities for you if you
31 | distribute copies of the software, or if you modify it.
32 |
33 | For example, if you distribute copies of such a program, whether
34 | gratis or for a fee, you must give the recipients all the rights that
35 | you have. You must make sure that they, too, receive or can get the
36 | source code. And you must show them these terms so they know their
37 | rights.
38 |
39 | We protect your rights with two steps: (1) copyright the software, and
40 | (2) offer you this license which gives you legal permission to copy,
41 | distribute and/or modify the software.
42 |
43 | Also, for each author's protection and ours, we want to make certain
44 | that everyone understands that there is no warranty for this free
45 | software. If the software is modified by someone else and passed on, we
46 | want its recipients to know that what they have is not the original, so
47 | that any problems introduced by others will not reflect on the original
48 | authors' reputations.
49 |
50 | Finally, any free program is threatened constantly by software
51 | patents. We wish to avoid the danger that redistributors of a free
52 | program will individually obtain patent licenses, in effect making the
53 | program proprietary. To prevent this, we have made it clear that any
54 | patent must be licensed for everyone's free use or not licensed at all.
55 |
56 | The precise terms and conditions for copying, distribution and
57 | modification follow.
58 |
59 | GNU GENERAL PUBLIC LICENSE
60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
61 |
62 | 0. This License applies to any program or other work which contains
63 | a notice placed by the copyright holder saying it may be distributed
64 | under the terms of this General Public License. The "Program", below,
65 | refers to any such program or work, and a "work based on the Program"
66 | means either the Program or any derivative work under copyright law:
67 | that is to say, a work containing the Program or a portion of it,
68 | either verbatim or with modifications and/or translated into another
69 | language. (Hereinafter, translation is included without limitation in
70 | the term "modification".) Each licensee is addressed as "you".
71 |
72 | Activities other than copying, distribution and modification are not
73 | covered by this License; they are outside its scope. The act of
74 | running the Program is not restricted, and the output from the Program
75 | is covered only if its contents constitute a work based on the
76 | Program (independent of having been made by running the Program).
77 | Whether that is true depends on what the Program does.
78 |
79 | 1. You may copy and distribute verbatim copies of the Program's
80 | source code as you receive it, in any medium, provided that you
81 | conspicuously and appropriately publish on each copy an appropriate
82 | copyright notice and disclaimer of warranty; keep intact all the
83 | notices that refer to this License and to the absence of any warranty;
84 | and give any other recipients of the Program a copy of this License
85 | along with the Program.
86 |
87 | You may charge a fee for the physical act of transferring a copy, and
88 | you may at your option offer warranty protection in exchange for a fee.
89 |
90 | 2. You may modify your copy or copies of the Program or any portion
91 | of it, thus forming a work based on the Program, and copy and
92 | distribute such modifications or work under the terms of Section 1
93 | above, provided that you also meet all of these conditions:
94 |
95 | a) You must cause the modified files to carry prominent notices
96 | stating that you changed the files and the date of any change.
97 |
98 | b) You must cause any work that you distribute or publish, that in
99 | whole or in part contains or is derived from the Program or any
100 | part thereof, to be licensed as a whole at no charge to all third
101 | parties under the terms of this License.
102 |
103 | c) If the modified program normally reads commands interactively
104 | when run, you must cause it, when started running for such
105 | interactive use in the most ordinary way, to print or display an
106 | announcement including an appropriate copyright notice and a
107 | notice that there is no warranty (or else, saying that you provide
108 | a warranty) and that users may redistribute the program under
109 | these conditions, and telling the user how to view a copy of this
110 | License. (Exception: if the Program itself is interactive but
111 | does not normally print such an announcement, your work based on
112 | the Program is not required to print an announcement.)
113 |
114 | These requirements apply to the modified work as a whole. If
115 | identifiable sections of that work are not derived from the Program,
116 | and can be reasonably considered independent and separate works in
117 | themselves, then this License, and its terms, do not apply to those
118 | sections when you distribute them as separate works. But when you
119 | distribute the same sections as part of a whole which is a work based
120 | on the Program, the distribution of the whole must be on the terms of
121 | this License, whose permissions for other licensees extend to the
122 | entire whole, and thus to each and every part regardless of who wrote it.
123 |
124 | Thus, it is not the intent of this section to claim rights or contest
125 | your rights to work written entirely by you; rather, the intent is to
126 | exercise the right to control the distribution of derivative or
127 | collective works based on the Program.
128 |
129 | In addition, mere aggregation of another work not based on the Program
130 | with the Program (or with a work based on the Program) on a volume of
131 | a storage or distribution medium does not bring the other work under
132 | the scope of this License.
133 |
134 | 3. You may copy and distribute the Program (or a work based on it,
135 | under Section 2) in object code or executable form under the terms of
136 | Sections 1 and 2 above provided that you also do one of the following:
137 |
138 | a) Accompany it with the complete corresponding machine-readable
139 | source code, which must be distributed under the terms of Sections
140 | 1 and 2 above on a medium customarily used for software interchange; or,
141 |
142 | b) Accompany it with a written offer, valid for at least three
143 | years, to give any third party, for a charge no more than your
144 | cost of physically performing source distribution, a complete
145 | machine-readable copy of the corresponding source code, to be
146 | distributed under the terms of Sections 1 and 2 above on a medium
147 | customarily used for software interchange; or,
148 |
149 | c) Accompany it with the information you received as to the offer
150 | to distribute corresponding source code. (This alternative is
151 | allowed only for noncommercial distribution and only if you
152 | received the program in object code or executable form with such
153 | an offer, in accord with Subsection b above.)
154 |
155 | The source code for a work means the preferred form of the work for
156 | making modifications to it. For an executable work, complete source
157 | code means all the source code for all modules it contains, plus any
158 | associated interface definition files, plus the scripts used to
159 | control compilation and installation of the executable. However, as a
160 | special exception, the source code distributed need not include
161 | anything that is normally distributed (in either source or binary
162 | form) with the major components (compiler, kernel, and so on) of the
163 | operating system on which the executable runs, unless that component
164 | itself accompanies the executable.
165 |
166 | If distribution of executable or object code is made by offering
167 | access to copy from a designated place, then offering equivalent
168 | access to copy the source code from the same place counts as
169 | distribution of the source code, even though third parties are not
170 | compelled to copy the source along with the object code.
171 |
172 | 4. You may not copy, modify, sublicense, or distribute the Program
173 | except as expressly provided under this License. Any attempt
174 | otherwise to copy, modify, sublicense or distribute the Program is
175 | void, and will automatically terminate your rights under this License.
176 | However, parties who have received copies, or rights, from you under
177 | this License will not have their licenses terminated so long as such
178 | parties remain in full compliance.
179 |
180 | 5. You are not required to accept this License, since you have not
181 | signed it. However, nothing else grants you permission to modify or
182 | distribute the Program or its derivative works. These actions are
183 | prohibited by law if you do not accept this License. Therefore, by
184 | modifying or distributing the Program (or any work based on the
185 | Program), you indicate your acceptance of this License to do so, and
186 | all its terms and conditions for copying, distributing or modifying
187 | the Program or works based on it.
188 |
189 | 6. Each time you redistribute the Program (or any work based on the
190 | Program), the recipient automatically receives a license from the
191 | original licensor to copy, distribute or modify the Program subject to
192 | these terms and conditions. You may not impose any further
193 | restrictions on the recipients' exercise of the rights granted herein.
194 | You are not responsible for enforcing compliance by third parties to
195 | this License.
196 |
197 | 7. If, as a consequence of a court judgment or allegation of patent
198 | infringement or for any other reason (not limited to patent issues),
199 | conditions are imposed on you (whether by court order, agreement or
200 | otherwise) that contradict the conditions of this License, they do not
201 | excuse you from the conditions of this License. If you cannot
202 | distribute so as to satisfy simultaneously your obligations under this
203 | License and any other pertinent obligations, then as a consequence you
204 | may not distribute the Program at all. For example, if a patent
205 | license would not permit royalty-free redistribution of the Program by
206 | all those who receive copies directly or indirectly through you, then
207 | the only way you could satisfy both it and this License would be to
208 | refrain entirely from distribution of the Program.
209 |
210 | If any portion of this section is held invalid or unenforceable under
211 | any particular circumstance, the balance of the section is intended to
212 | apply and the section as a whole is intended to apply in other
213 | circumstances.
214 |
215 | It is not the purpose of this section to induce you to infringe any
216 | patents or other property right claims or to contest validity of any
217 | such claims; this section has the sole purpose of protecting the
218 | integrity of the free software distribution system, which is
219 | implemented by public license practices. Many people have made
220 | generous contributions to the wide range of software distributed
221 | through that system in reliance on consistent application of that
222 | system; it is up to the author/donor to decide if he or she is willing
223 | to distribute software through any other system and a licensee cannot
224 | impose that choice.
225 |
226 | This section is intended to make thoroughly clear what is believed to
227 | be a consequence of the rest of this License.
228 |
229 | 8. If the distribution and/or use of the Program is restricted in
230 | certain countries either by patents or by copyrighted interfaces, the
231 | original copyright holder who places the Program under this License
232 | may add an explicit geographical distribution limitation excluding
233 | those countries, so that distribution is permitted only in or among
234 | countries not thus excluded. In such case, this License incorporates
235 | the limitation as if written in the body of this License.
236 |
237 | 9. The Free Software Foundation may publish revised and/or new versions
238 | of the General Public License from time to time. Such new versions will
239 | be similar in spirit to the present version, but may differ in detail to
240 | address new problems or concerns.
241 |
242 | Each version is given a distinguishing version number. If the Program
243 | specifies a version number of this License which applies to it and "any
244 | later version", you have the option of following the terms and conditions
245 | either of that version or of any later version published by the Free
246 | Software Foundation. If the Program does not specify a version number of
247 | this License, you may choose any version ever published by the Free Software
248 | Foundation.
249 |
250 | 10. If you wish to incorporate parts of the Program into other free
251 | programs whose distribution conditions are different, write to the author
252 | to ask for permission. For software which is copyrighted by the Free
253 | Software Foundation, write to the Free Software Foundation; we sometimes
254 | make exceptions for this. Our decision will be guided by the two goals
255 | of preserving the free status of all derivatives of our free software and
256 | of promoting the sharing and reuse of software generally.
257 |
258 | NO WARRANTY
259 |
260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
268 | REPAIR OR CORRECTION.
269 |
270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
278 | POSSIBILITY OF SUCH DAMAGES.
279 |
280 | END OF TERMS AND CONDITIONS
281 |
282 | How to Apply These Terms to Your New Programs
283 |
284 | If you develop a new program, and you want it to be of the greatest
285 | possible use to the public, the best way to achieve this is to make it
286 | free software which everyone can redistribute and change under these terms.
287 |
288 | To do so, attach the following notices to the program. It is safest
289 | to attach them to the start of each source file to most effectively
290 | convey the exclusion of warranty; and each file should have at least
291 | the "copyright" line and a pointer to where the full notice is found.
292 |
293 | {description}
294 | Copyright (C) {year} {fullname}
295 |
296 | This program is free software; you can redistribute it and/or modify
297 | it under the terms of the GNU General Public License as published by
298 | the Free Software Foundation; either version 2 of the License, or
299 | (at your option) any later version.
300 |
301 | This program is distributed in the hope that it will be useful,
302 | but WITHOUT ANY WARRANTY; without even the implied warranty of
303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
304 | GNU General Public License for more details.
305 |
306 | You should have received a copy of the GNU General Public License along
307 | with this program; if not, write to the Free Software Foundation, Inc.,
308 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
309 |
310 | Also add information on how to contact you by electronic and paper mail.
311 |
312 | If the program is interactive, make it output a short notice like this
313 | when it starts in an interactive mode:
314 |
315 | Gnomovision version 69, Copyright (C) year name of author
316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
317 | This is free software, and you are welcome to redistribute it
318 | under certain conditions; type `show c' for details.
319 |
320 | The hypothetical commands `show w' and `show c' should show the appropriate
321 | parts of the General Public License. Of course, the commands you use may
322 | be called something other than `show w' and `show c'; they could even be
323 | mouse-clicks or menu items--whatever suits your program.
324 |
325 | You should also get your employer (if you work as a programmer) or your
326 | school, if any, to sign a "copyright disclaimer" for the program, if
327 | necessary. Here is a sample; alter the names:
328 |
329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program
330 | `Gnomovision' (which makes passes at compilers) written by James Hacker.
331 |
332 | {signature of Ty Coon}, 1 April 1989
333 | Ty Coon, President of Vice
334 |
335 | This General Public License does not permit incorporating your program into
336 | proprietary programs. If your program is a subroutine library, you may
337 | consider it more useful to permit linking proprietary applications with the
338 | library. If this is what you want to do, use the GNU Lesser General
339 | Public License instead of this License.
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | # compiler
2 | FC = gfortran
3 |
4 | # compile flags
5 | #FCFLAGS = -g -fbounds-check -O2 -static-libgcc -static
6 | FCFLAGS = -g -fbounds-check -O2
7 |
8 | # link flags
9 | FLFLAGS = -g
10 |
11 | # program name
12 | PROGRAM = dnaworks
13 |
14 | # required objects
15 | objects = dnaworks.o dnaworks_data.o dnaworks_test.o \
16 | control_func.o email_func.o encoding.o input.o misc_func.o \
17 | mutate.o output.o overlaps.o scores.o str_func.o time_func.o
18 |
19 | # required modules
20 | modules = dnaworks_data.mod dnaworks_test.mod
21 |
22 | # the main linking step
23 | $(PROGRAM): $(objects)
24 | $(FC) $(FCFLAGS) -o $(PROGRAM) $(objects)
25 |
26 | # specific requirements for each object
27 | $(objects): $(modules)
28 |
29 | # compile recipe for modules
30 | %.mod: %.f90
31 | $(FC) $(FLFLAGS) -c $<
32 |
33 | # compile recipe for objects
34 | %.o: %.f90
35 | $(FC) $(FLFLAGS) -c $<
36 |
37 | # extra rules
38 | .PHONY: clean
39 | clean:
40 | rm -f $(objects) $(modules) $(PROGRAM)
41 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | DNAWorks
2 | ========
3 |
4 | Automatic oligonucleotide design for PCR-based gene synthesis
5 |
6 | DNAWorks v3.2.4
7 | David Hoover
8 | May 04, 2017
9 |
10 | DNAWorks takes as input nucleotide and/or protein sequences, codon
11 | information, and other variables, and attempts to optimize a synthetic
12 | gene. It then outputs the gene with a variety of histograms and metrics
13 | for judging the probability of success for generating the gene by PCR. It
14 | also outputs the oligonucleotide sequences required for PCR synthesis of
15 | the synthetic gene.
16 |
17 | This program is based on this publication:
18 |
19 | Hoover DM, Lubkowski J. DNAWorks: an automated method for designing
20 | oligonucleotides for PCR-based gene synthesis. Nucleic Acids Res. 2002 May
21 | 15;30(10):e43. PubMed PMID: 12000848; PubMed Central PMCID: PMC115297.
22 |
23 | Kindly reference this publication if you use this for your work.
24 |
25 |
26 | Installation
27 | ============
28 |
29 | Currently, DNAWorks is written in Fortran. It will require a Fortran compiler on a UNIX system.
30 |
31 | If you do not have gfortran, make, or git, then on a Linux machine, install these packages (Ubuntu):
32 |
33 | ```
34 | apt-get install gfortran make git_hub
35 | ```
36 |
37 | or on Centos
38 |
39 | ```
40 | yum install gfortran make git
41 | ```
42 |
43 | Then download DNAWorks and compile with make:
44 |
45 | ```
46 | git clone https://github.com/davidhoover/DNAWorks.git
47 | cd DNAWorks
48 | make
49 | ```
50 |
51 | and the dnaworks executable should compile.
52 |
53 | Run
54 | ===
55 |
56 | Instructions (can be displayed by typing ```./dnaworks -help```):
57 |
58 | ```
59 | COMMAND-LINE OPTIONS
60 | ==============================================================================
61 |
62 | The command line is as follows:
63 |
64 | % dnaworks [ inputfile ] [ -t0 | -t1 | -t2 | -t3 ]
65 |
66 | The default inputfile is 'DNAWORKS.inp'. All options, except for those
67 | on the command line, are read from the inputfile. See below for a complete
68 | description of the options.
69 |
70 | The flags -t0, -t1, -t2, and -t3 are for testing purposes. They report
71 | the internal actions within the program based on the level input.
72 |
73 | -t0 Relatively simple output, only subroutine names
74 | -t1 Most subroutine names reported
75 | -t2 Heavy output, all subroutines, some functions
76 | -t3 Way too much output, all subroutines and functions reported
77 |
78 | INPUTFILE OPTIONS
79 | ==============================================================================
80 |
81 | The input is case insensitive, except for quoted strings. Any string
82 | can be quoted, but it's not necessary unless the case must be preserved or
83 | if there are spaces or special characters (#,!). The quotes can
84 | be single or double, but must begin and end around the intended
85 | string.
86 |
87 | Any text that follows a '#' or '!' is considered comments, and will
88 | be ignored.
89 |
90 | Options in the inputfile are of the following types:
91 |
92 | [ S ] string
93 |
94 | Strings are converted to uppercase, unless quoted (either " or '')
95 |
96 | [ #I ] integer number
97 | [ #R ] real number
98 |
99 | Integers are, well, integers. Real numbers can be floating point numbers
100 | (e.g., 12.345) or scientific notation (e.g., -12.36E+4).
101 |
102 | [ name ] directive
103 |
104 | Directives are special strings the enable or disable particular functions.
105 | In general, only the first 4 or 5 characters are actually read, so they
106 | can be abbreviated.
107 |
108 | Directives must be placed flat against the left margin of the input file,
109 | otherwise they will be ignored.
110 |
111 | ------------------------------------------------------------------------------
112 |
113 | INPUT DIRECTIVES:
114 |
115 | [ tbio ]
116 |
117 | The method of gene synthesis employed by DNAWorks is termed
118 | 'thermodynamically balanced', in that all the oligonucleotides should
119 | assemble and anneal at the same temperature. The amplification occurs
120 | everywhere at once, and ideally can generate the gene with just one round
121 | of PCR. However, there are sticky cases where the gene does not amplify,
122 | and constructing the gene in pieces is not successful.
123 |
124 | A more controlled method of gene synthesis, termed 'thermodynamically
125 | balanced inside-out', was developed for cases where problems occurred
126 | during PCR synthesis (Gao, et al., 2003). In an assembly set of
127 | oligonucleotides, the first half of the oligos are all synthesized in the
128 | sense orientation, and the other half are synthesized as reverse complements
129 | in the anti-sense orientation of the gene. The gene assembly and amplification
130 | is thus done in steps of 0.4-0.6 kb from the center pair of
131 | oligonucleotides outward.
132 |
133 | Enabling tbio will enable thermodynamically balanced inside-out output.
134 |
135 |
136 | [ nogaps ]
137 |
138 |
139 | By default, DNAWorks will try to keep all oligos the same size as the chosen
140 | length. If the size is beyond the sizes required for the chosen Tm, gaps
141 | are introduced between overlap regions. The directive nogaps will keep oligos
142 | as short as possible, with no gaps between the overlap regions.
143 |
144 | Restricting oligos to no gaps may slow down the optimization somewhat, and
145 | may result in higher scores due to a higher probability of misprimes.
146 |
147 | ------------------------------------------------------------------------------
148 |
149 | INPUT OPTIONS:
150 |
151 |
152 | logfile [ S ]
153 |
154 |
155 | The default output file is 'LOGFILE.txt'. Entering a string after the
156 | logfile option will change the name of the logfile.
157 |
158 |
159 | title [ S ]
160 |
161 |
162 | It's always good to give the output a title to keep it unique and to give
163 | you an easy way to keep track of what the output is.
164 |
165 |
166 | timelimit [ #I ]
167 |
168 |
169 | Set a time limit for the run, in seconds. This keeps the program from
170 | running forever. A value of 0 (the default) means no limit.
171 |
172 |
173 | solutions [ #I ]
174 |
175 |
176 | Normally DNAWorks only generates a single solution for a set of parameters.
177 | Since the optimization involves a lot of random number calls, and that it is
178 | impossible to get to the 'true minimum' by Monte Carlo methods, sometimes
179 | generating more than one solutions is a good thing. Look for the best
180 | solution in the end. The range is 1-99.
181 |
182 |
183 | melting [ #I ] [ low #I high #I ] [ tolerance #I ]
184 |
185 |
186 | This governs the chosen melting or annealing temperature for the oligos.
187 | Giving a single integer (between 55 and 75) will generate a single solution.
188 | A range of melting temperatures can be given with the low and high options,
189 | and a solution for each temperature will be generated. The tolerance value
190 | is by default +/- 1 degree, but it can be modified. Don't set it too high
191 | or the point of the program can be lost!
192 |
193 |
194 | length [ #I ] [ low #I high #I ] [ random ]
195 |
196 |
197 | This sets the ideal length of the oligo. Because the oligos can have gaps,
198 | they can be as long as you wish, but remember that errors accumulate in
199 | synthetic DNA oligos very quickly beyond around 50 nts!
200 |
201 | By default, an attempt is made to force all oligos to be the same size as the
202 | chosen length. On occasion this can lead to a higher probability of
203 | misprimes. Also, this can limit successful optimization when sequences
204 | are gapfixed (see below), since gap position and size will be limited. In
205 | this case, enabling the length directive random causes oligos to be
206 | designed with random length (between 20 nt and the length chosen).
207 |
208 |
209 | frequency [ threshold #I ] [ random ] [ strict ] [ score ]
210 |
211 |
212 | The frequency threshold is the cutoff for which codons are used for
213 | reverse translation of protein sequences into DNA. For example, a value of
214 | 20 will allow only those codons whose frequencies equal or exceed 20%.
215 |
216 | By default, DNAWorks uses the highest frequency codons for the initial
217 | reverse translation of the protein sequences. Having the random option
218 | present causes the program to choose the initial codons at random.
219 |
220 | By default, DNAWorks always uses the two highest frequency codons for
221 | optimization. To override this default, enabling strict will
222 | force the program to strictly use only those codons that are within the
223 | chosen codon frequency threshold. Be careful, because setting a high
224 | codon frequency threshold (>20%) and strict will result in many protein
225 | residues with a single codon available, and thus very little room for
226 | optimization.
227 |
228 | To accelerate convergence, DNAWorks does not continuously score codon
229 | frequency. This is allowed because only the highest frequency codons are
230 | usually used. However, for the particularly picky user, enabling scored will
231 | force the program to continuously evaluate the codon frequency score. This
232 | will have the effect of increasing the overall frequency of codons (at
233 | the cost of other scores...).
234 |
235 |
236 | concentration [ oligo #R ] [ sodium #R ] [ magnesium #R ]
237 |
238 |
239 | The concentration of oligonucleotides, monovalent cations (Na+, K+), and
240 | magnesium in the PCR reaction can have profound effects on the annealing
241 | temperatures of the oligonucleotides. The user can enter the desired
242 | concentrations for the PCR reaction.
243 |
244 | The effects of these components on the annealing temperature is based on
245 | the program HyTher (Nicolas Peyret, Pirro Saro and John SantaLucia, Jr.).
246 |
247 | Values are in moles per liter, and can be entered in scientific notation
248 | for simplicity.
249 |
250 | Oligonucleotides must be between 100 um (1E-4 M) and 1 nm (1E-9 M),
251 | monovalent cations must be between 10 and 1000 mM, and magnesium must be
252 | between 0 and 200 mM.
253 |
254 |
255 | repeat [ #I ]
256 |
257 |
258 | DNAWorks continuously monitors the synthetic gene for any repeats that
259 | occur within the gene. A repeat can be a direct repeat, an inverted
260 | repeat (which can result in a hairpin), or a palindromic repeat. If a
261 | repeat occurs that is above a certain length, it can lead to stable
262 | annealing of oligos to unexpected positions and mispriming. Such mispriming
263 | can result in either no PCR product, or a long smear on a gel.
264 |
265 | The value for repeat governs the minimum length of nucleotides considered
266 | a repeat. The default value is 8. Increasing this number will
267 | decrease the number of repeats found, while decreasing it will do the
268 | opposite.
269 |
270 |
271 | misprime [ #I ] [ tip #I ] [ max #I ]
272 |
273 |
274 | The major flaw to PCR-based gene synthesis is mispriming. This occurs when
275 | an oligo anneals to an unexpected position on the PCR template. To prevent
276 | this from happening, DNAWorks compares the ends of each oligo with the
277 | current synthetic sequence and analyzes its potential to anneal to that
278 | site.
279 |
280 | A misprime is a special variant of a repeat, in that it only occurs at the
281 | business end (3') of an oligo.
282 |
283 | The first number for misprime is the length of the sequence to compare. The
284 | default value is 18.
285 |
286 | The tip number is number of nucleotides that must be exactly identical at
287 | the tip of the oligo. The default is 6. This value is based on little more
288 | than guessing, but increasing it will cause very few misprimes to be
289 | identified, and decreasing will cause too many to be identified.
290 |
291 | The max number is the maximum number of non-identical nucleotides in the
292 | misprime sequence. The default is 8. This number is again a guess. It
293 | is generally not understood why non-identical sequences anneal to each other,
294 | but it is based on structural and electrostatic principles that are way too
295 | difficult to incorporate into this program. Again, increasing the number
296 | results in too many misprimes to be identified, decreasing it causes too few.
297 |
298 | Needless to say, the misprime value is just plain prudence, but not
299 | necessarily fact.
300 |
301 |
302 | weight [ twt #R ] [ cwt #R ] [ rwt #R ] [ mwt #R ] [ gwt #R ] [ awt #R ]
303 | [ lwt #R ] [ pwt #R ] [ fwt #R ]
304 |
305 |
306 | DNAWorks optimizes a synthetic gene by evaluating the scores of a set of
307 | features: annealing temperature (T), codon frequency (C), repeat (R),
308 | misprime potential (M), GC- (G) and AT- (A) content, length (L), gapfix (F)
309 | and pattern constraining (P). The default weights of each individual feature
310 | score are set to 1. By increasing the weight of an individual feature, the
311 | final output can be nudged to favoring one feature over the others. For
312 | example, in the case where the potential synthetic genes for a set of
313 | sequences chronically suffers from high number of repeats, increasing the
314 | weight of the repeat score (RWT) might decrease the final repeat score at
315 | the expense of the other feature scores.
316 |
317 | Beware, as modulating the weights is not fully tested. Remember that this
318 | merely skews the results toward one feature or another, and may do more
319 | harm than good. In most cases keeping the weights balanced is the best
320 | approach.
321 |
322 |
323 | previous [ #I ] [ S ]
324 |
325 |
326 | DNAWorks allows old sets of oligonucleotides to be read back with a new,
327 | mutant gene. It then calculates scores for the mutant gene with overlap
328 | positions and parameters identical to the original solution. It then
329 | outputs only those oligonucleotides that need to be changed. This is very
330 | useful for generating mutants, since in general only one or two new oligos
331 | need to be synthesized.
332 |
333 | The integer refers to the previous solution number, and the string is the
334 | name of the previous logfile.
335 |
336 | ------------------------------------------------------------------------------
337 |
338 | INPUT SECTIONS:
339 |
340 | nucleotide [ reverse | gapfix ]
341 | ...
342 | //
343 |
344 | Nucleotide sequences can only include A,C,G, or T in the nucleotide
345 | section. They can also include degenerate sequences:
346 |
347 | B = C or G or T rev. compl. = V
348 | D = A or G or T rev. compl. = H
349 | H = A or C or T rev. compl. = D
350 | K = G or T rev. compl. = M
351 | M = A or C rev. compl. = K
352 | N = A or C or G or T rev. compl. = N
353 | R = A or G rev. compl. = Y
354 | S = C or G rev. compl. = S
355 | V = A or C or G rev. compl. = B
356 | W = A or T rev. compl. = W
357 | Y = C or T rev. compl. = R
358 |
359 | protein [ reverse | gapfix ]
360 | ...
361 | //
362 |
363 | Protein sequences can be input through the protein section, but can only
364 | include the single-letter abbreviations of the 20 standard amino acids
365 | (A,C,D,E,F,G,H,I,K,L,M,N,P,Q,R,S,T,V,W,Y). Stop codons are designated by X.
366 |
367 | The reverse directive causes the nucleotide sequence (either original or
368 | translated from the protein sequence) to be reversed on incorporation in
369 | the synthetic gene.
370 |
371 | The gapfix directive is used when the sequence should not fall within
372 | overlap regions, but rather only in the gaps or overhangs that are single
373 | stranded in the annealed assembly prior to PCR. This is advantageous for
374 | subsequent mutations by oligonucleotide replacement. For example, if a
375 | synthetic gene will be exhaustively mutated at a single codon, having the
376 | codon entirely within a gap region will allow its mutation by replacing a
377 | single oligonucleotide, rather than two or three.
378 |
379 | The gapfix directive will enable Fixed Gap Scoring. Any nt that are
380 | designated as gapfixed but fall within overlap regions will increase the
381 | global score. DNAWorks will then try to minimize the score by moving the
382 | gap regions toward the gapfixed nucleotides. Because gap regions are
383 | generally short (less than 10 nt), the sequence should be very short.
384 | Otherwise the global score will remain quite high, and other features (Tm,
385 | repeats, misprimes) will not receive as much attention.
386 |
387 | Gapfixing is much more effective when oligo lengths are allowed be
388 | randomized, rather than fixed to the length chosen by default. See
389 | length option, above, for more details.
390 |
391 |
392 | codon [ ecoli2 | E. coli | C. elegans | D. melanogaster | H. sapiens |
393 | M. musculus | R. novegicus | S. cerevesiae | X. laevis | P. pastoris ]
394 | [ ...
395 | // ]
396 |
397 | Codon frequencies can be entered manually in the codon section using
398 | GCG-format codon frequencies. If a directive corresponding to a given
399 | organism is present, the codon frequency for that organism will be used.
400 |
401 | pattern
402 | ...
403 | //
404 |
405 | Nucleotide patterns can be screened if entered in the pattern section.
406 | Pattern sequences can be normal or degenerate nucleotide sequences.
407 | ```
408 |
409 |
410 | Good luck!
411 |
--------------------------------------------------------------------------------
/control_func.f90:
--------------------------------------------------------------------------------
1 | SUBROUTINE Get_Args
2 |
3 | USE dnaworks_data
4 | USE dnaworks_test
5 | IMPLICIT NONE
6 |
7 | CHARACTER(LEN=30) :: ARGV(100) ! command line arguments
8 | INTEGER :: ARGC ! number of command line arguments
9 | INTEGER :: i,j
10 |
11 | ! IARGC returns the total number of arguments on the command line
12 |
13 | ARGC=IARGC()
14 |
15 | ! GETARG returns the argument that corresponds to the argument number, with
16 | ! zero equal to the command itself
17 |
18 | DO i=1,ARGC
19 | CALL GETARG(i,ARGV(i))
20 |
21 | ! Turn on testing mode
22 |
23 | IF (INDEX(ARGV(i),"-t3").eq.1) THEN
24 | TEST3=.TRUE.
25 | TEST2=.TRUE.
26 | TEST1=.TRUE.
27 | TEST0=.TRUE.
28 | ELSE IF (INDEX(ARGV(i),"-t2").eq.1) THEN
29 | TEST2=.TRUE.
30 | TEST1=.TRUE.
31 | TEST0=.TRUE.
32 | ELSE IF (INDEX(ARGV(i),"-t1").eq.1) THEN
33 | TEST1=.TRUE.
34 | TEST0=.TRUE.
35 | ELSE IF (INDEX(ARGV(i),"-t0").eq.1) THEN
36 | TEST0=.TRUE.
37 | ELSE IF (INDEX(ARGV(i),"-q").eq.1) THEN
38 | QUIET=.TRUE.
39 | ELSE IF (INDEX(ARGV(i),"-fast").eq.1) THEN
40 | FAST=.TRUE.
41 | ELSE IF (INDEX(ARGV(i),"-help").eq.1) THEN
42 | CALL Print_Help
43 | ELSE
44 |
45 | ! Assign inputfile from ARGV(1) if possible
46 |
47 | inputfile=ARGV(i)
48 | END IF
49 | END DO
50 |
51 | END SUBROUTINE Get_Args
52 | SUBROUTINE Oligo_Design(SolutionNo,num)
53 | !
54 | ! This subroutine is the main engine of the program.
55 |
56 | USE dnaworks_data
57 | USE dnaworks_test
58 | IMPLICIT NONE
59 |
60 | INTEGER :: i,j,k,main_count,z,timediff
61 | INTEGER :: nlimit ! max number of successful changes before continuing
62 | INTEGER :: nover ! max number of changes before dropping the temperature
63 | INTEGER :: maxlimit ! max number of counts before quitting
64 | INTEGER :: nsucc
65 | INTEGER :: count
66 | INTEGER :: SolutionNo ! current solution number
67 | INTEGER :: num ! where to print output?
68 | LOGICAL :: ans
69 | REAL :: t ! initial temperature
70 | REAL :: tfactr ! how much to drop temperature
71 | REAL :: gain
72 | REAL :: guess
73 | REAL :: rand
74 | INTEGER,EXTERNAL :: CurrentTimeSeconds
75 |
76 | IF (TEST0) PRINT *,"Oligo_Design" !TEST0
77 |
78 | nlimit=50
79 | nover=500
80 | nsucc=0
81 | count=0
82 | t=0.5
83 | tfactr=0.96
84 | main_count=0
85 | maxlimit=1500
86 | IF (FAST) maxlimit=300
87 |
88 | WRITE(UNIT=console,FMT="('')")
89 |
90 | BestDNA = CurrDNA ! initialize BestDNA
91 |
92 | tempdrop: DO i=1,1000 ! There will be a total of 1000 drops in temperature
93 |
94 | ! just dump out if there are no protein residues
95 |
96 | IF (PROTlen.eq.0.and.(.not.OligoLenRandom)) THEN
97 | BestDNA = CurrDNA
98 | EXIT tempdrop
99 | END IF
100 |
101 | nsucc=0
102 |
103 | mutate: DO j=1,nover ! within which there will be mutation/length change rounds.
104 |
105 | StoreDNA = CurrDNA ! Make a backup of the current solution
106 |
107 | IF (PROTlen.gt.0) CALL Mutate_Sequence ! only mutate protein sequence
108 | main_count=main_count+1
109 |
110 | CALL Generate_Overlaps(SolutionNo) ! Generate overlaps for the mutated sequence
111 |
112 | IF (MOD(CurrDNA%NumOlaps,2).eq.0) CALL Stop_Program("Even number of overlaps. Try adjusting parameters.")
113 |
114 | gain = CurrDNA%OverallScore - StoreDNA%OverallScore ! Determine whether the mutated solution is any better than the old one
115 |
116 | ! If the gain is good enough or if the temperature is high enough, we have a successful mutation round.
117 |
118 | CALL RANDOM_NUMBER(rand)
119 | ans=(gain.lt.0.0).or.(rand.lt.exp(-gain/t)) ! Metropolis
120 | IF (ans) THEN
121 | nsucc=nsucc+1
122 | ELSE
123 | CurrDNA=StoreDNA ! If not, go back to the original sequence and try again.
124 | END IF
125 |
126 | IF (CurrDNA%OverallScore.lt.BestDNA%OverallScore) THEN
127 | BestDNA = CurrDNA ! If the current sequence is better than the best sequence, replace the BestDNA with CurrDNA.
128 | count=0
129 | ELSE
130 | count=count+1 ! If the current score does not achieve a better value than the best score, then start counting.
131 | END IF
132 |
133 | IF ((MOD(main_count,100)).eq.0) WRITE(UNIT=num,FMT=&
134 | "(6x,i5,' optimization rounds, best = ',f9.3,' Rep =',i4,' Mis =',i4)") &
135 | main_count,BestDNA%OverallScore,BestDNA%RN,BestDNA%MSN ! Keep the user informed
136 |
137 | IF (count.gt.maxlimit) THEN ! If the count between drops is greater than 300, then quit.
138 | EXIT tempdrop
139 | END IF
140 |
141 | IF (nsucc.ge.nlimit) EXIT mutate ! If there are more than successful rounds, exit the mutation loop and move to the next temperature drop.
142 |
143 | timediff = CurrentTimeSeconds()-MainTimeStart;
144 | IF (MainTimeLimit.GT.0.and.timediff.GE.MainTimeLimit) THEN ! Dump out if out of time
145 | WRITE(UNIT=console,FMT="(/,'Main time limit reached.')")
146 | WRITE(UNIT=outputnum,FMT="(/,'Main time limit reached.')")
147 | TimesUp=.TRUE.
148 | EXIT mutate
149 | END IF
150 |
151 | CALL FLUSH(console)
152 |
153 | END DO mutate
154 |
155 | t=t*tfactr ! Drop the temperature
156 |
157 | IF (nsucc.eq.0.or.t.lt.0.0001.or.TimesUp) THEN ! If within rounds of mutation there are no successes or the temperature is too low, or out of time, then quit.
158 | WRITE(UNIT=num,FMT="('Limit of simulated annealing, quitting.')")
159 | EXIT tempdrop
160 | END IF
161 |
162 | IF (BestDNA%OverallScore.lt.0.001) EXIT tempdrop ! If the best score is good enough, then quit.
163 |
164 | END DO tempdrop
165 |
166 | CurrDNA = BestDNA ! Push the best solution from Oligo_Design into the current solution
167 |
168 | CALL Revert_Degenerates
169 | CALL Print_FinalDNA_Log(outputnum,SolutionNo)
170 | CALL Print_Scores_Log(console)
171 | CALL Print_Scores_Log(outputnum)
172 | CALL Print_Histogram(outputnum,SolutionNo)
173 | CALL Print_Pattern_Screen(outputnum)
174 | CALL Print_Oligo_Log(outputnum)
175 |
176 | END SUBROUTINE Oligo_Design
177 | SUBROUTINE Run_Dnaworks()
178 |
179 | USE dnaworks_data
180 | USE dnaworks_test
181 | IMPLICIT NONE
182 |
183 | INTEGER :: i,j,k,l,timediff
184 | INTEGER,EXTERNAL :: CurrentTimeSeconds
185 |
186 | IF (TEST0) PRINT *,"Run_Dnaworks" !TEST0
187 |
188 | ! start the loops
189 |
190 | melt: DO j=MeltTempLo,MeltTempHi
191 | MeltTemp=j
192 | oligo: DO l=OligoLenLo,OligoLenHi
193 | OligoLen=l
194 | main: DO i=1,NumberOfSolutions
195 | SequenceTranslated=.FALSE.
196 | CALL Translate_Protein
197 |
198 | ! Dump out if out of time
199 |
200 | timediff = CurrentTimeSeconds()-MainTimeStart;
201 | IF (MainTimeLimit.GT.0.and.timediff.GE.MainTimeLimit) THEN
202 | WRITE(UNIT=console,FMT="(/,'Main time limit reached.')")
203 | WRITE(UNIT=outputnum,FMT="(/,'Main time limit reached.')")
204 | TimesUp=.TRUE.
205 | FinalScore(CurrSolutionNo)%Oligo=OligoLen
206 | FinalScore(CurrSolutionNo)%MeltT=MeltTemp
207 | EXIT melt
208 | END IF
209 |
210 | CurrSolutionNo = CurrSolutionNo+1
211 |
212 | CALL Print_Param_Log(console,CurrSolutionNo)
213 | CALL Print_Param_Log(outputnum,CurrSolutionNo)
214 | CALL FLUSH(console)
215 |
216 | CALL Generate_Overlaps(CurrSolutionNo)
217 |
218 | IF (MOD(CurrDNA%NumOlaps,2).eq.0) THEN
219 | IF (.not.QUIET) THEN
220 | WRITE(UNIT=console,FMT="('Even number of overlaps - trial ',i4,' abandoned')") CurrSolutionNo
221 | WRITE(UNIT=outputnum,FMT="('Even number of overlaps - trial ',i4,' abandoned')") CurrSolutionNo
222 | END IF
223 | FinalScore(CurrSolutionNo)%FinaScore=999999
224 | CYCLE main
225 | END IF
226 |
227 | ! If everything is ok, go to Oligo_Design
228 |
229 | CALL Oligo_Design(CurrSolutionNo,console)
230 |
231 | ! Keep track of times
232 |
233 | CALL Print_Estimated_Time(CurrSolutionNo)
234 |
235 | ! Update FinalScore tally
236 |
237 | FinalScore(CurrSolutionNo)%Oligo=OligoLen
238 | FinalScore(CurrSolutionNo)%MeltT=MeltTemp
239 |
240 | END DO main
241 | END DO oligo
242 | END DO melt
243 |
244 | ! in case optimization stopped prematurely
245 |
246 | TotalNumberOfSolutions=CurrSolutionNo
247 |
248 | END SUBROUTINE Run_Dnaworks
249 | SUBROUTINE Stop_Program(message)
250 | !
251 | ! This subroutine stops the program and displays an error message.
252 |
253 | USE dnaworks_data
254 | USE dnaworks_test
255 | IMPLICIT NONE
256 |
257 | CHARACTER(LEN=*) :: message
258 | INTEGER :: i
259 |
260 | IF (TEST0) PRINT *,"Stop_Program" !TEST0
261 |
262 | WRITE(UNIT=console,FMT="(' ')")
263 | WRITE(UNIT=console,FMT="('Program error:')")
264 | WRITE(UNIT=console,FMT="(a)") message
265 | WRITE(UNIT=console,FMT="('Exiting program now')")
266 | CALL FLUSH(console)
267 |
268 | WRITE(UNIT=outputnum,FMT="(' ')")
269 | WRITE(UNIT=outputnum,FMT="('Program error:')")
270 | WRITE(UNIT=outputnum,FMT="(a)") message
271 | WRITE(UNIT=outputnum,FMT="('Exiting program now')")
272 |
273 | CLOSE (UNIT=outputnum)
274 | STOP
275 |
276 | END SUBROUTINE Stop_Program
277 |
--------------------------------------------------------------------------------
/dnaworks.f90:
--------------------------------------------------------------------------------
1 | PROGRAM dnaworks
2 |
3 | USE dnaworks_data
4 | USE dnaworks_test
5 | IMPLICIT NONE
6 |
7 | INTEGER,EXTERNAL :: CurrentTimeSeconds
8 |
9 | IF (TEST0) PRINT *,"DNAWORKS start" !TEST0
10 | CALL RANDOM_SEED()
11 |
12 | MainTimeStart=CurrentTimeSeconds() ! when does the run begin?
13 |
14 | CALL Get_Args ! get the command arguments, if any
15 | CALL Default_Param
16 | CALL Read_Input
17 |
18 | IF (DNAlen.LE.50) CALL Stop_Program("DNA length is less than 50 nt.")
19 |
20 | ! Reset weights if there is no protein to mutate
21 |
22 | IF (PROTlen.eq.0) THEN
23 | Cwt=0.0 ! weight for codon scoring
24 | Rwt=0.0 ! weight for repeat scoring
25 | Gwt=0.0 ! weight for GC scoring
26 | Awt=0.0 ! weight for AT scoring
27 | Pwt=0.0 ! weight for pattern scoring
28 | END IF
29 |
30 | ! start logfile
31 |
32 | OPEN (UNIT=outputnum,FILE=outputfile,FORM="FORMATTED",STATUS="REPLACE")
33 |
34 | CALL Print_Output_Start(outputnum)
35 | CALL Print_Output_Start(console)
36 | CALL FLUSH(console)
37 | CALL Print_Seq_Log(outputnum)
38 | IF (PROTlen.gt.0) CALL Print_Codon_Log(outputnum)
39 | CALL Print_Pattern_Log(outputnum)
40 | ! CALL Print_TranslatedDNA(outputnum)
41 |
42 | ! determine the number of solutions
43 |
44 | TotalNumberOfSolutions=(NumberOfSolutions*(MeltTempHi-MeltTempLo+1)* &
45 | & (OligoLenHi-OligoLenLo+1))
46 | IF (TotalNumberOfSolutions.gt.9999) CALL Stop_Program("Too many trials. Limit the range of parameters.")
47 | WRITE(UNIT=console,FMT="('')")
48 | WRITE(UNIT=console,FMT="(20x,'Starting ',i3,' trial',$)") TotalNumberOfSolutions
49 | IF (TotalNumberOfSolutions.gt.1) WRITE(UNIT=console,FMT="('s',$)")
50 | WRITE(UNIT=console,FMT="('...')")
51 | CALL FLUSH(console)
52 |
53 | CALL Run_Dnaworks()
54 |
55 | CALL Print_Final_Tally(console)
56 | CALL Print_Output_End(console)
57 | CALL FLUSH(console)
58 | CALL Print_Final_Tally(outputnum)
59 | CALL Print_Output_End(outputnum)
60 |
61 | CLOSE (UNIT=outputnum)
62 |
63 | IF (LEN_TRIM(email).GT.5) THEN
64 | CALL Send_Email
65 | END IF
66 |
67 | WRITE(UNIT=console,FMT="(' ')")
68 | WRITE(UNIT=console,FMT="('Finished ')")
69 |
70 | END PROGRAM dnaworks
71 |
--------------------------------------------------------------------------------
/dnaworks_data.f90:
--------------------------------------------------------------------------------
1 | MODULE dnaworks_data
2 |
3 | IMPLICIT NONE
4 | SAVE
5 |
6 | ! GLOBAL
7 |
8 | INTEGER :: console=6 ! print to console
9 | INTEGER :: inputnum=9 ! input files
10 | INTEGER :: outputnum=10 ! output logfile
11 | INTEGER :: oldlognum=11 ! old logfile output
12 | INTEGER :: PrevTrial=0 ! previous trial to fix oligos
13 | INTEGER :: OligoLen=40 ! user input oligo size
14 | INTEGER :: OligoLenHi=40 ! user input oligo size (upper limit)
15 | INTEGER :: OligoLenLo=40 ! user input oligo size (lower limit)
16 | LOGICAL :: OligoLenRandom=.FALSE. ! allow oligolen to vary between 20
17 | INTEGER :: MeltTemp=60 ! Ideal melting temperature
18 | INTEGER :: MeltTempHi=60 ! Ideal melting temperature (upper limit)
19 | INTEGER :: MeltTempLo=60 ! Ideal melting temperature (lower limit)
20 | INTEGER :: MeltTol=1 ! Tolerance for melting temperature deviation
21 | INTEGER :: SeqOptimToler=50 ! Lowest allowed codon frequency
22 | INTEGER :: TotalNumberOfSolutions
23 | INTEGER :: NumberOfSolutions=1
24 | INTEGER :: RepLen=8 ! determines the size of repeats to minimize
25 | INTEGER :: MPLn=18 ! length of misprimes
26 | INTEGER :: MPTip=6 ! identical tip of the misprime, in nts
27 | INTEGER :: MaxPROTlen=3333 ! maximum number of protein residues
28 | INTEGER :: MaxDNAlen=9999 ! maximum number of nucleotide residues
29 | INTEGER :: MaxNonId=8 ! maximum number of non-identical nts in misprime
30 | INTEGER :: MutProtPos=0 ! which codon should be mutated
31 | INTEGER :: MutNtPos(3) ! which nts are mutated
32 | INTEGER :: MutNtNum=0 ! how many nts are mutated (zero if none)
33 | INTEGER :: nt2aa(9999) ! DNApos to aa (1-21) or 0
34 | INTEGER :: nt2overlap(9999) ! DNApos to overlap or 0
35 | INTEGER :: nt2Solig(9999) ! DNApos to oligo or 0
36 | INTEGER :: nt2Aolig(9999) ! DNApos to antisense oligo or 0
37 | INTEGER :: nt2prot(9999) ! DNApos to PROTpos or 0
38 | INTEGER :: prot2aa(3333) ! PROTpos to aa (1-21)
39 | INTEGER :: prot2nt(3333) ! PROTpos to DNApos (middle nt of codon)
40 | INTEGER :: DNAlen=0 ! the length of the entire DNA
41 | INTEGER :: PROTlen=0 ! the length of the all the proteins
42 | INTEGER :: mutPROTnum=0 ! the number of mutated aa
43 | INTEGER :: mutPROT2prot(3333) ! mutPROTpos to PROTpos
44 | INTEGER :: NumberOfChains=0 ! number of isolated protein chains
45 | INTEGER :: prot2chain(3333) ! prot pos to protein number (NumberOfChains)
46 | LOGICAL :: ChainReverse(99) ! true if chain is reversed, indexed by
47 | LOGICAL :: ChainGapFix(99) ! true if chain is reversed, indexed by
48 |
49 | CHARACTER(LEN=9999) :: INITseq='' ! initial input sequence (DNA and prot)
50 |
51 | ! for degenerate nt
52 | CHARACTER(LEN=9999) :: ORIGDNAseq='' ! the original dna sequence
53 | INTEGER :: NumDegPos=0 ! total number of degenerate nt
54 | INTEGER :: DegPos(999) ! positions of degenerate sequences
55 | INTEGER :: CurrSolutionNo=0
56 |
57 | INTEGER :: INITlen=0 ! the length of the initial input sequence
58 | INTEGER :: NumberOfSeq=0 ! number of sequences, DNA or protein
59 | INTEGER :: INIT2Seq(9999) !
60 | LOGICAL :: SeqIsProt(99) ! true if sequence is prot, false if DNA
61 | LOGICAL :: SeqReverse(99) ! true if sequence is reversed, indexed by seq number (NumberOfSeq)
62 | LOGICAL :: SeqGapFix(99) ! true if sequence is to be gapfixed, indexed by seq number (NumberOfSeq)
63 | CHARACTER(LEN=80) :: email=''
64 | CHARACTER(LEN=80) :: jobname=''
65 | CHARACTER(LEN=80) :: OLDjobname=''
66 | CHARACTER(LEN=30) :: oldlogfile="OLDLOGFILE.txt"
67 | CHARACTER(LEN=30) :: inputfile="DNAWORKS.inp"
68 | CHARACTER(LEN=30) :: outputfile="LOGFILE.txt"
69 | CHARACTER(LEN=256) :: InputArray(9999) ! contents of DNAWORKS.inp
70 | CHARACTER(LEN=256) :: InputArrayUC(9999) ! contents of DNAWORKS.inp, uppercase
71 | INTEGER :: InputArrayNum ! number of lines in DNAWORKS.inp
72 |
73 | CHARACTER(LEN=9999) :: SCRATCH='' ! scratch string for various calls
74 | CHARACTER(LEN=9999) :: OLDDNAseq='' ! DNA sequence from previous trial
75 | CHARACTER(LEN=3333) :: PROTseq='' ! protein sequence
76 | CHARACTER(LEN=3333) :: OLDPROTseq='' ! protein sequence from previous trial
77 |
78 | CHARACTER(LEN=64) :: bar64 = "----------------------------------------------------------------"
79 | CHARACTER(LEN=80) :: bar80 = "--------------------------------------------------------------------------------"
80 | INTEGER :: MainTimeLimit=0 ! time limit for entire run
81 | INTEGER :: MainTimeStart ! for time control of the program
82 | REAL :: Twt=1.0 ! weight for MeltTm scoring
83 | REAL :: Cwt=1.0 ! weight for codon scoring
84 | REAL :: Rwt=1.0 ! weight for repeat scoring
85 | REAL :: Mwt=1.0 ! weight for mispriming scoring
86 | REAL :: Gwt=1.0 ! weight for GC scoring
87 | REAL :: Awt=1.0 ! weight for AT scoring
88 | REAL :: Lwt=1.0 ! weight for length scoring
89 | REAL :: Pwt=1.0 ! weight for pattern scoring
90 | REAL :: Fwt=1.0 ! weight for gap fixing
91 | REAL :: XScore(3333) ! cocon-based total score for mutation
92 | LOGICAL :: CodonStrict=.FALSE. ! use strict frequency threshold
93 | LOGICAL :: ScoreCodons=.FALSE. ! calculate codon scores
94 | LOGICAL :: CodonRandom=.FALSE. ! translate using random codons
95 | LOGICAL :: MutantRun=.FALSE. ! if this is a mutation only run
96 | LOGICAL :: GapFix=.FALSE. ! are any positions fixed in the gaps?
97 | INTEGER :: LogfileOffset=1 ! how many blank characters precede the line?
98 | LOGICAL :: JACEK=.FALSE.
99 | CHARACTER(LEN=80) :: MAILPATH="/usr/bin/Mail"
100 | LOGICAL :: TBIO=.FALSE.
101 | LOGICAL :: NOGAPS=.FALSE. ! if no gaps are desired
102 | LOGICAL :: QUIET=.FALSE.
103 | LOGICAL :: FAST=.FALSE. ! cut corners
104 | LOGICAL :: TimesUp=.FALSE.
105 | LOGICAL :: SequenceTranslated=.FALSE. ! if false, generate all scores; if
106 | ! true, only generate scores that
107 | ! change when overlaps change
108 | REAL :: OligoConc=2e-7 ! 200 nM oligo
109 | REAL :: SodiumConc=5e-2 ! 50 mM sodium
110 | REAL :: MgConc=2e-3 ! 2 mM magnesium
111 | REAL :: RGasConstant=1.9872 ! gas constant
112 | REAL :: Kelvin=273.15 ! conversion from kelvin to celsius
113 | REAL :: OligoCorr ! correction factor for oligo conc.
114 | REAL :: OligoCorrSC ! correction factor for self-comp. oligo
115 | REAL :: SaltCorr ! correction factor for cations
116 |
117 | ! PATTERNS
118 |
119 | TYPE Pattern
120 | CHARACTER(LEN=80) :: SeqRC
121 | CHARACTER(LEN=80) :: Seq
122 | INTEGER :: Len
123 | CHARACTER(LEN=80) :: Name
124 | LOGICAL :: SelfCompl
125 | LOGICAL :: Degen
126 | LOGICAL :: Isoschiz
127 | END TYPE
128 |
129 | TYPE(Pattern) :: PTN(999)
130 | INTEGER :: PTNnum=0
131 |
132 | ! SOLUTIONS
133 |
134 | TYPE Tally
135 | REAL :: InitScore
136 | REAL :: FinaScore
137 | REAL :: TmRange
138 | INTEGER :: NumOligs
139 | INTEGER :: Oligo
140 | INTEGER :: MeltT
141 | INTEGER :: LongestOligo
142 | INTEGER :: Repeats
143 | INTEGER :: Misprimes
144 | INTEGER :: LowestOlap
145 | END TYPE
146 |
147 | TYPE(Tally) :: FinalScore(9999)
148 |
149 | ! TEST
150 |
151 | TYPE Test_Tally
152 | REAL :: Score
153 | INTEGER :: Oligo
154 | INTEGER :: MeltT
155 | INTEGER :: Count
156 | INTEGER :: Time
157 | END TYPE
158 |
159 | TYPE(Test_Tally) :: Test_Scores(400)
160 |
161 | ! TABLES
162 |
163 | TYPE KnownCodon
164 | CHARACTER(LEN=3) :: Seq
165 | CHARACTER(LEN=3) :: AA3
166 | CHARACTER(LEN=1) :: AA1
167 | CHARACTER(LEN=3) :: SeqRC ! Reverse complement of sequence
168 | INTEGER :: num(3) ! numerical representation of codon
169 | INTEGER :: numRC(3) ! numerical representation of codon
170 | REAL :: Freq
171 | REAL :: Number
172 | LOGICAL :: Check
173 | END TYPE KnownCodon
174 |
175 | TYPE(KnownCodon) :: CFT(64) ! Codon Frequency Table
176 |
177 | TYPE KnownAA
178 | CHARACTER(LEN=3) :: AA3
179 | CHARACTER(LEN=1) :: AA1
180 | REAL :: Freq(10)
181 | REAL :: NumberSum
182 | INTEGER :: NumOfCodons
183 | INTEGER :: NumOfActiveCodons
184 | INTEGER :: Codon(10)
185 | END TYPE KnownAA
186 |
187 | TYPE(KnownAA) :: AAT(21) ! Amino Acid Table
188 |
189 | ! Degenerate sequences
190 |
191 | TYPE DegenerateSeq ! Table of degenerate sequences
192 | CHARACTER(LEN=1) :: DegNT
193 | INTEGER :: NumOfNT
194 | INTEGER :: NumSeq(4)
195 | CHARACTER(LEN=1) :: Seq(4)
196 | END TYPE DegenerateSeq
197 |
198 | TYPE(DegenerateSeq) :: DegenSeq(11)
199 |
200 | ! PRE-EXISTANT CFTs
201 |
202 | CHARACTER(LEN=30) :: Organism
203 |
204 | CHARACTER(LEN=5),DIMENSION(3,64) :: ecoli2CFT = &
205 | RESHAPE( (/&
206 | "Gly ","GGG ","0.044","Gly ","GGA ","0.020","Gly ","GGT ","0.508","Gly ","GGC ","0.428",&
207 | "Glu ","GAG ","0.247","Glu ","GAA ","0.754","Asp ","GAT ","0.461","Asp ","GAC ","0.540",&
208 | "Val ","GTG ","0.268","Val ","GTA ","0.200","Val ","GTT ","0.398","Val ","GTC ","0.135",&
209 | "Ala ","GCG ","0.323","Ala ","GCA ","0.240","Ala ","GCT ","0.275","Ala ","GCC ","0.161",&
210 | "Arg ","AGG ","0.003","Arg ","AGA ","0.006","Ser ","AGT ","0.045","Ser ","AGC ","0.243",&
211 | "Lys ","AAG ","0.215","Lys ","AAA ","0.786","Asn ","AAT ","0.173","Asn ","AAC ","0.828",&
212 | "Met ","ATG ","1.000","Ile ","ATA ","0.006","Ile ","ATT ","0.335","Ile ","ATC ","0.659",&
213 | "Thr ","ACG ","0.127","Thr ","ACA ","0.047","Thr ","ACT ","0.291","Thr ","ACC ","0.536",&
214 | "Trp ","TGG ","1.000","End ","TGA ","0.352","Cys ","TGT ","0.389","Cys ","TGC ","0.612",&
215 | "End ","TAG ","0.076","End ","TAA ","0.630","Tyr ","TAT ","0.352","Tyr ","TAC ","0.648",&
216 | "Leu ","TTG ","0.055","Leu ","TTA ","0.034","Phe ","TTT ","0.291","Phe ","TTC ","0.709",&
217 | "Ser ","TCG ","0.074","Ser ","TCA ","0.048","Ser ","TCT ","0.324","Ser ","TCC ","0.266",&
218 | "Arg ","CGG ","0.008","Arg ","CGA ","0.011","Arg ","CGT ","0.643","Arg ","CGC ","0.330",&
219 | "Gln ","CAG ","0.814","Gln ","CAA ","0.187","His ","CAT ","0.298","His ","CAC ","0.702",&
220 | "Leu ","CTG ","0.767","Leu ","CTA ","0.008","Leu ","CTT ","0.056","Leu ","CTC ","0.080",&
221 | "Pro ","CCG ","0.719","Pro ","CCA ","0.153","Pro ","CCT ","0.112","Pro ","CCC ","0.016"/), (/3,64/) )
222 |
223 | CHARACTER(LEN=5),DIMENSION(3,64) :: celCFT = &
224 | RESHAPE( (/&
225 | "Gly ","GGG ","0.08 ","Gly ","GGA ","0.59 ","Gly ","GGT ","0.20 ","Gly ","GGC ","0.12 ", &
226 | "Glu ","GAG ","0.38 ","Glu ","GAA ","0.62 ","Asp ","GAT ","0.68 ","Asp ","GAC ","0.32 ", &
227 | "Val ","GTG ","0.23 ","Val ","GTA ","0.16 ","Val ","GTT ","0.39 ","Val ","GTC ","0.22 ", &
228 | "Ala ","GCG ","0.13 ","Ala ","GCA ","0.31 ","Ala ","GCT ","0.36 ","Ala ","GCC ","0.20 ", &
229 | "Arg ","AGG ","0.08 ","Arg ","AGA ","0.29 ","Ser ","AGT ","0.15 ","Ser ","AGC ","0.10 ", &
230 | "Lys ","AAG ","0.41 ","Lys ","AAA ","0.59 ","Asn ","AAT ","0.62 ","Asn ","AAC ","0.38 ", &
231 | "Met ","ATG ","1.00 ","Ile ","ATA ","0.16 ","Ile ","ATT ","0.53 ","Ile ","ATC ","0.31 ", &
232 | "Thr ","ACG ","0.15 ","Thr ","ACA ","0.34 ","Thr ","ACT ","0.32 ","Thr ","ACC ","0.18 ", &
233 | "Trp ","TGG ","1.00 ","End ","TGA ","0.39 ","Cys ","TGT ","0.55 ","Cys ","TGC ","0.45 ", &
234 | "End ","TAG ","0.18 ","End ","TAA ","0.44 ","Tyr ","TAT ","0.56 ","Tyr ","TAC ","0.44 ", &
235 | "Leu ","TTG ","0.23 ","Leu ","TTA ","0.11 ","Phe ","TTT ","0.49 ","Phe ","TTC ","0.51 ", &
236 | "Ser ","TCG ","0.15 ","Ser ","TCA ","0.25 ","Ser ","TCT ","0.21 ","Ser ","TCC ","0.13 ", &
237 | "Arg ","CGG ","0.09 ","Arg ","CGA ","0.23 ","Arg ","CGT ","0.21 ","Arg ","CGC ","0.10 ", &
238 | "Gln ","CAG ","0.34 ","Gln ","CAA ","0.66 ","His ","CAT ","0.60 ","His ","CAC ","0.40 ", &
239 | "Leu ","CTG ","0.14 ","Leu ","CTA ","0.09 ","Leu ","CTT ","0.25 ","Leu ","CTC ","0.17 ", &
240 | "Pro ","CCG ","0.20 ","Pro ","CCA ","0.53 ","Pro ","CCT ","0.18 ","Pro ","CCC ","0.09 "/), (/3,64/) )
241 |
242 | CHARACTER(LEN=5),DIMENSION(3,64) :: dmeCFT = &
243 | RESHAPE( (/&
244 | "Gly ","GGG ","0.07 ","Gly ","GGA ","0.28 ","Gly ","GGT ","0.21 ","Gly ","GGC ","0.43 ", &
245 | "Glu ","GAG ","0.67 ","Glu ","GAA ","0.33 ","Asp ","GAT ","0.53 ","Asp ","GAC ","0.47 ", &
246 | "Val ","GTG ","0.47 ","Val ","GTA ","0.11 ","Val ","GTT ","0.18 ","Val ","GTC ","0.24 ", &
247 | "Ala ","GCG ","0.19 ","Ala ","GCA ","0.17 ","Ala ","GCT ","0.19 ","Ala ","GCC ","0.45 ", &
248 | "Arg ","AGG ","0.11 ","Arg ","AGA ","0.09 ","Ser ","AGT ","0.14 ","Ser ","AGC ","0.25 ", &
249 | "Lys ","AAG ","0.70 ","Lys ","AAA ","0.30 ","Asn ","AAT ","0.44 ","Asn ","AAC ","0.56 ", &
250 | "Met ","ATG ","1.00 ","Ile ","ATA ","0.19 ","Ile ","ATT ","0.34 ","Ile ","ATC ","0.47 ", &
251 | "Thr ","ACG ","0.26 ","Thr ","ACA ","0.20 ","Thr ","ACT ","0.17 ","Thr ","ACC ","0.38 ", &
252 | "Trp ","TGG ","1.00 ","End ","TGA ","0.25 ","Cys ","TGT ","0.29 ","Cys ","TGC ","0.71 ", &
253 | "End ","TAG ","0.33 ","End ","TAA ","0.41 ","Tyr ","TAT ","0.37 ","Tyr ","TAC ","0.63 ", &
254 | "Leu ","TTG ","0.18 ","Leu ","TTA ","0.05 ","Phe ","TTT ","0.37 ","Phe ","TTC ","0.63 ", &
255 | "Ser ","TCG ","0.20 ","Ser ","TCA ","0.09 ","Ser ","TCT ","0.08 ","Ser ","TCC ","0.23 ", &
256 | "Arg ","CGG ","0.15 ","Arg ","CGA ","0.15 ","Arg ","CGT ","0.16 ","Arg ","CGC ","0.33 ", &
257 | "Gln ","CAG ","0.70 ","Gln ","CAA ","0.30 ","His ","CAT ","0.40 ","His ","CAC ","0.60 ", &
258 | "Leu ","CTG ","0.43 ","Leu ","CTA ","0.09 ","Leu ","CTT ","0.10 ","Leu ","CTC ","0.15 ", &
259 | "Pro ","CCG ","0.29 ","Pro ","CCA ","0.25 ","Pro ","CCT ","0.13 ","Pro ","CCC ","0.33 "/), (/3,64/) )
260 |
261 | CHARACTER(LEN=5),DIMENSION(3,64) :: hsaCFT = &
262 | RESHAPE( (/&
263 | "Gly ","GGG ","0.25 ","Gly ","GGA ","0.25 ","Gly ","GGT ","0.16 ","Gly ","GGC ","0.34 ", &
264 | "Glu ","GAG ","0.58 ","Glu ","GAA ","0.42 ","Asp ","GAT ","0.46 ","Asp ","GAC ","0.54 ", &
265 | "Val ","GTG ","0.47 ","Val ","GTA ","0.12 ","Val ","GTT ","0.18 ","Val ","GTC ","0.24 ", &
266 | "Ala ","GCG ","0.11 ","Ala ","GCA ","0.23 ","Ala ","GCT ","0.26 ","Ala ","GCC ","0.40 ", &
267 | "Arg ","AGG ","0.21 ","Arg ","AGA ","0.21 ","Ser ","AGT ","0.15 ","Ser ","AGC ","0.24 ", &
268 | "Lys ","AAG ","0.57 ","Lys ","AAA ","0.43 ","Asn ","AAT ","0.47 ","Asn ","AAC ","0.53 ", &
269 | "Met ","ATG ","1.00 ","Ile ","ATA ","0.17 ","Ile ","ATT ","0.36 ","Ile ","ATC ","0.47 ", &
270 | "Thr ","ACG ","0.11 ","Thr ","ACA ","0.28 ","Thr ","ACT ","0.25 ","Thr ","ACC ","0.36 ", &
271 | "Trp ","TGG ","1.00 ","End ","TGA ","0.47 ","Cys ","TGT ","0.45 ","Cys ","TGC ","0.55 ", &
272 | "End ","TAG ","0.23 ","End ","TAA ","0.30 ","Tyr ","TAT ","0.44 ","Tyr ","TAC ","0.56 ", &
273 | "Leu ","TTG ","0.13 ","Leu ","TTA ","0.08 ","Phe ","TTT ","0.46 ","Phe ","TTC ","0.54 ", &
274 | "Ser ","TCG ","0.06 ","Ser ","TCA ","0.15 ","Ser ","TCT ","0.19 ","Ser ","TCC ","0.22 ", &
275 | "Arg ","CGG ","0.20 ","Arg ","CGA ","0.11 ","Arg ","CGT ","0.08 ","Arg ","CGC ","0.19 ", &
276 | "Gln ","CAG ","0.74 ","Gln ","CAA ","0.26 ","His ","CAT ","0.42 ","His ","CAC ","0.58 ", &
277 | "Leu ","CTG ","0.40 ","Leu ","CTA ","0.07 ","Leu ","CTT ","0.13 ","Leu ","CTC ","0.20 ", &
278 | "Pro ","CCG ","0.11 ","Pro ","CCA ","0.28 ","Pro ","CCT ","0.28 ","Pro ","CCC ","0.33 "/), (/3,64/) )
279 |
280 | CHARACTER(LEN=5),DIMENSION(3,64) :: mmuCFT = &
281 | RESHAPE( (/&
282 | "Gly ","GGG ","0.24 ","Gly ","GGA ","0.26 ","Gly ","GGT ","0.18 ","Gly ","GGC ","0.33 ", &
283 | "Glu ","GAG ","0.60 ","Glu ","GAA ","0.40 ","Asp ","GAT ","0.44 ","Asp ","GAC ","0.56 ", &
284 | "Val ","GTG ","0.46 ","Val ","GTA ","0.12 ","Val ","GTT ","0.17 ","Val ","GTC ","0.25 ", &
285 | "Ala ","GCG ","0.10 ","Ala ","GCA ","0.23 ","Ala ","GCT ","0.29 ","Ala ","GCC ","0.38 ", &
286 | "Arg ","AGG ","0.22 ","Arg ","AGA ","0.21 ","Ser ","AGT ","0.15 ","Ser ","AGC ","0.24 ", &
287 | "Lys ","AAG ","0.61 ","Lys ","AAA ","0.39 ","Asn ","AAT ","0.43 ","Asn ","AAC ","0.57 ", &
288 | "Met ","ATG ","1.00 ","Ile ","ATA ","0.16 ","Ile ","ATT ","0.34 ","Ile ","ATC ","0.50 ", &
289 | "Thr ","ACG ","0.11 ","Thr ","ACA ","0.29 ","Thr ","ACT ","0.25 ","Thr ","ACC ","0.35 ", &
290 | "Trp ","TGG ","1.00 ","End ","TGA ","0.49 ","Cys ","TGT ","0.48 ","Cys ","TGC ","0.52 ", &
291 | "End ","TAG ","0.23 ","End ","TAA ","0.28 ","Tyr ","TAT ","0.43 ","Tyr ","TAC ","0.57 ", &
292 | "Leu ","TTG ","0.13 ","Leu ","TTA ","0.06 ","Phe ","TTT ","0.44 ","Phe ","TTC ","0.56 ", &
293 | "Ser ","TCG ","0.05 ","Ser ","TCA ","0.14 ","Ser ","TCT ","0.20 ","Ser ","TCC ","0.22 ", &
294 | "Arg ","CGG ","0.19 ","Arg ","CGA ","0.12 ","Arg ","CGT ","0.09 ","Arg ","CGC ","0.17 ", &
295 | "Gln ","CAG ","0.75 ","Gln ","CAA ","0.25 ","His ","CAT ","0.40 ","His ","CAC ","0.60 ", &
296 | "Leu ","CTG ","0.40 ","Leu ","CTA ","0.08 ","Leu ","CTT ","0.13 ","Leu ","CTC ","0.20 ", &
297 | "Pro ","CCG ","0.10 ","Pro ","CCA ","0.28 ","Pro ","CCT ","0.31 ","Pro ","CCC ","0.31 "/), (/3,64/) )
298 |
299 | CHARACTER(LEN=5),DIMENSION(3,64) :: rnoCFT = &
300 | RESHAPE( (/&
301 | "Gly ","GGG ","0.24 ","Gly ","GGA ","0.25 ","Gly ","GGT ","0.17 ","Gly ","GGC ","0.34 ", &
302 | "Glu ","GAG ","0.61 ","Glu ","GAA ","0.39 ","Asp ","GAT ","0.43 ","Asp ","GAC ","0.57 ", &
303 | "Val ","GTG ","0.47 ","Val ","GTA ","0.11 ","Val ","GTT ","0.16 ","Val ","GTC ","0.25 ", &
304 | "Ala ","GCG ","0.10 ","Ala ","GCA ","0.22 ","Ala ","GCT ","0.28 ","Ala ","GCC ","0.39 ", &
305 | "Arg ","AGG ","0.21 ","Arg ","AGA ","0.20 ","Ser ","AGT ","0.15 ","Ser ","AGC ","0.24 ", &
306 | "Lys ","AAG ","0.62 ","Lys ","AAA ","0.38 ","Asn ","AAT ","0.41 ","Asn ","AAC ","0.59 ", &
307 | "Met ","ATG ","1.00 ","Ile ","ATA ","0.15 ","Ile ","ATT ","0.33 ","Ile ","ATC ","0.52 ", &
308 | "Thr ","ACG ","0.11 ","Thr ","ACA ","0.28 ","Thr ","ACT ","0.24 ","Thr ","ACC ","0.37 ", &
309 | "Trp ","TGG ","1.00 ","End ","TGA ","0.50 ","Cys ","TGT ","0.45 ","Cys ","TGC ","0.55 ", &
310 | "End ","TAG ","0.22 ","End ","TAA ","0.28 ","Tyr ","TAT ","0.40 ","Tyr ","TAC ","0.60 ", &
311 | "Leu ","TTG ","0.13 ","Leu ","TTA ","0.06 ","Phe ","TTT ","0.42 ","Phe ","TTC ","0.58 ", &
312 | "Ser ","TCG ","0.06 ","Ser ","TCA ","0.14 ","Ser ","TCT ","0.19 ","Ser ","TCC ","0.23 ", &
313 | "Arg ","CGG ","0.20 ","Arg ","CGA ","0.12 ","Arg ","CGT ","0.09 ","Arg ","CGC ","0.18 ", &
314 | "Gln ","CAG ","0.75 ","Gln ","CAA ","0.25 ","His ","CAT ","0.39 ","His ","CAC ","0.61 ", &
315 | "Leu ","CTG ","0.41 ","Leu ","CTA ","0.08 ","Leu ","CTT ","0.12 ","Leu ","CTC ","0.20 ", &
316 | "Pro ","CCG ","0.11 ","Pro ","CCA ","0.28 ","Pro ","CCT ","0.30 ","Pro ","CCC ","0.31 "/), (/3,64/) )
317 |
318 | CHARACTER(LEN=5),DIMENSION(3,64) :: sceCFT = &
319 | RESHAPE( (/&
320 | "Gly ","GGG ","0.12 ","Gly ","GGA ","0.22 ","Gly ","GGT ","0.47 ","Gly ","GGC ","0.19 ", &
321 | "Glu ","GAG ","0.30 ","Glu ","GAA ","0.70 ","Asp ","GAT ","0.65 ","Asp ","GAC ","0.35 ", &
322 | "Val ","GTG ","0.19 ","Val ","GTA ","0.21 ","Val ","GTT ","0.39 ","Val ","GTC ","0.21 ", &
323 | "Ala ","GCG ","0.11 ","Ala ","GCA ","0.29 ","Ala ","GCT ","0.38 ","Ala ","GCC ","0.22 ", &
324 | "Arg ","AGG ","0.21 ","Arg ","AGA ","0.48 ","Ser ","AGT ","0.16 ","Ser ","AGC ","0.11 ", &
325 | "Lys ","AAG ","0.42 ","Lys ","AAA ","0.58 ","Asn ","AAT ","0.59 ","Asn ","AAC ","0.41 ", &
326 | "Met ","ATG ","1.00 ","Ile ","ATA ","0.27 ","Ile ","ATT ","0.46 ","Ile ","ATC ","0.26 ", &
327 | "Thr ","ACG ","0.14 ","Thr ","ACA ","0.30 ","Thr ","ACT ","0.35 ","Thr ","ACC ","0.22 ", &
328 | "Trp ","TGG ","1.00 ","End ","TGA ","0.30 ","Cys ","TGT ","0.63 ","Cys ","TGC ","0.37 ", &
329 | "End ","TAG ","0.23 ","End ","TAA ","0.47 ","Tyr ","TAT ","0.56 ","Tyr ","TAC ","0.44 ", &
330 | "Leu ","TTG ","0.29 ","Leu ","TTA ","0.28 ","Phe ","TTT ","0.59 ","Phe ","TTC ","0.41 ", &
331 | "Ser ","TCG ","0.10 ","Ser ","TCA ","0.21 ","Ser ","TCT ","0.26 ","Ser ","TCC ","0.16 ", &
332 | "Arg ","CGG ","0.04 ","Arg ","CGA ","0.07 ","Arg ","CGT ","0.15 ","Arg ","CGC ","0.06 ", &
333 | "Gln ","CAG ","0.31 ","Gln ","CAA ","0.69 ","His ","CAT ","0.64 ","His ","CAC ","0.36 ", &
334 | "Leu ","CTG ","0.11 ","Leu ","CTA ","0.14 ","Leu ","CTT ","0.13 ","Leu ","CTC ","0.06 ", &
335 | "Pro ","CCG ","0.12 ","Pro ","CCA ","0.41 ","Pro ","CCT ","0.31 ","Pro ","CCC ","0.16 "/), (/3,64/) )
336 |
337 | CHARACTER(LEN=5),DIMENSION(3,64) :: xlaCFT = &
338 | RESHAPE( (/&
339 | "Gly ","GGG ","0.21 ","Gly ","GGA ","0.35 ","Gly ","GGT ","0.21 ","Gly ","GGC ","0.23 ", &
340 | "Glu ","GAG ","0.48 ","Glu ","GAA ","0.52 ","Asp ","GAT ","0.57 ","Asp ","GAC ","0.43 ", &
341 | "Val ","GTG ","0.36 ","Val ","GTA ","0.17 ","Val ","GTT ","0.27 ","Val ","GTC ","0.20 ", &
342 | "Ala ","GCG ","0.07 ","Ala ","GCA ","0.32 ","Ala ","GCT ","0.33 ","Ala ","GCC ","0.27 ", &
343 | "Arg ","AGG ","0.22 ","Arg ","AGA ","0.28 ","Ser ","AGT ","0.18 ","Ser ","AGC ","0.20 ", &
344 | "Lys ","AAG ","0.49 ","Lys ","AAA ","0.51 ","Asn ","AAT ","0.52 ","Asn ","AAC ","0.48 ", &
345 | "Met ","ATG ","1.00 ","Ile ","ATA ","0.23 ","Ile ","ATT ","0.42 ","Ile ","ATC ","0.35 ", &
346 | "Thr ","ACG ","0.09 ","Thr ","ACA ","0.35 ","Thr ","ACT ","0.30 ","Thr ","ACC ","0.26 ", &
347 | "Trp ","TGG ","1.00 ","End ","TGA ","0.39 ","Cys ","TGT ","0.50 ","Cys ","TGC ","0.50 ", &
348 | "End ","TAG ","0.18 ","End ","TAA ","0.43 ","Tyr ","TAT ","0.51 ","Tyr ","TAC ","0.49 ", &
349 | "Leu ","TTG ","0.16 ","Leu ","TTA ","0.11 ","Phe ","TTT ","0.56 ","Phe ","TTC ","0.44 ", &
350 | "Ser ","TCG ","0.05 ","Ser ","TCA ","0.16 ","Ser ","TCT ","0.23 ","Ser ","TCC ","0.19 ", &
351 | "Arg ","CGG ","0.12 ","Arg ","CGA ","0.12 ","Arg ","CGT ","0.12 ","Arg ","CGC ","0.13 ", &
352 | "Gln ","CAG ","0.64 ","Gln ","CAA ","0.36 ","His ","CAT ","0.50 ","His ","CAC ","0.50 ", &
353 | "Leu ","CTG ","0.30 ","Leu ","CTA ","0.10 ","Leu ","CTT ","0.19 ","Leu ","CTC ","0.14 ", &
354 | "Pro ","CCG ","0.09 ","Pro ","CCA ","0.37 ","Pro ","CCT ","0.32 ","Pro ","CCC ","0.22 "/), (/3,64/) )
355 |
356 | CHARACTER(LEN=5),DIMENSION(3,64) :: ecoCFT = &
357 | RESHAPE( (/&
358 | "Gly ","GGG ","0.16 ","Gly ","GGA ","0.15 ","Gly ","GGT ","0.34 ","Gly ","GGC ","0.35 ", &
359 | "Glu ","GAG ","0.33 ","Glu ","GAA ","0.67 ","Asp ","GAT ","0.64 ","Asp ","GAC ","0.36 ", &
360 | "Val ","GTG ","0.34 ","Val ","GTA ","0.17 ","Val ","GTT ","0.29 ","Val ","GTC ","0.20 ", &
361 | "Ala ","GCG ","0.31 ","Ala ","GCA ","0.24 ","Ala ","GCT ","0.19 ","Ala ","GCC ","0.26 ", &
362 | "Arg ","AGG ","0.05 ","Arg ","AGA ","0.08 ","Ser ","AGT ","0.17 ","Ser ","AGC ","0.23 ", &
363 | "Lys ","AAG ","0.27 ","Lys ","AAA ","0.73 ","Asn ","AAT ","0.52 ","Asn ","AAC ","0.48 ", &
364 | "Met ","ATG ","1.00 ","Ile ","ATA ","0.14 ","Ile ","ATT ","0.49 ","Ile ","ATC ","0.37 ", &
365 | "Thr ","ACG ","0.24 ","Thr ","ACA ","0.19 ","Thr ","ACT ","0.19 ","Thr ","ACC ","0.38 ", &
366 | "Trp ","TGG ","1.00 ","End ","TGA ","0.31 ","Cys ","TGT ","0.47 ","Cys ","TGC ","0.53 ", &
367 | "End ","TAG ","0.09 ","End ","TAA ","0.60 ","Tyr ","TAT ","0.60 ","Tyr ","TAC ","0.40 ", &
368 | "Leu ","TTG ","0.13 ","Leu ","TTA ","0.15 ","Phe ","TTT ","0.59 ","Phe ","TTC ","0.41 ", &
369 | "Ser ","TCG ","0.13 ","Ser ","TCA ","0.15 ","Ser ","TCT ","0.17 ","Ser ","TCC ","0.14 ", &
370 | "Arg ","CGG ","0.12 ","Arg ","CGA ","0.07 ","Arg ","CGT ","0.34 ","Arg ","CGC ","0.34 ", &
371 | "Gln ","CAG ","0.66 ","Gln ","CAA ","0.34 ","His ","CAT ","0.59 ","His ","CAC ","0.41 ", &
372 | "Leu ","CTG ","0.46 ","Leu ","CTA ","0.04 ","Leu ","CTT ","0.12 ","Leu ","CTC ","0.10 ", &
373 | "Pro ","CCG ","0.47 ","Pro ","CCA ","0.21 ","Pro ","CCT ","0.19 ","Pro ","CCC ","0.14 "/), (/3,64/) )
374 |
375 | CHARACTER(LEN=5),DIMENSION(3,64) :: ppaCFT = &
376 | RESHAPE( (/&
377 | "Phe ","TTT ","0.54 ","Phe ","TTC ","0.46 ","Ser ","TCT ","0.29 ","Ser ","TCC ","0.20 ", &
378 | "Ser ","TCA ","0.19 ","Ser ","TCG ","0.09 ","Ser ","AGT ","0.15 ","Ser ","AGC ","0.09 ", &
379 | "Tyr ","TAT ","0.46 ","Tyr ","TAC ","0.55 ","Cys ","TGT ","0.65 ","Cys ","TGC ","0.35 ", &
380 | "Leu ","TTA ","0.16 ","Leu ","TTG ","0.33 ","Leu ","CTT ","0.16 ","Leu ","CTC ","0.08 ", &
381 | "Leu ","CTA ","0.11 ","Leu ","CTG ","0.16 ","End ","TAA ","0.53 ","End ","TGA ","0.18 ", &
382 | "End ","TAG ","0.29 ","Trp ","TGG ","1.00 ","Pro ","CCT ","0.35 ","Pro ","CCC ","0.15 ", &
383 | "Pro ","CCA ","0.41 ","Pro ","CCG ","0.09 ","His ","CAT ","0.57 ","His ","CAC ","0.43 ", &
384 | "Arg ","CGT ","0.16 ","Arg ","CGC ","0.05 ","Arg ","CGA ","0.10 ","Arg ","CGG ","0.05 ", &
385 | "Arg ","AGA ","0.48 ","Arg ","AGG ","0.16 ","Gln ","CAA ","0.61 ","Gln ","CAG ","0.39 ", &
386 | "Ile ","ATT ","0.50 ","Ile ","ATC ","0.30 ","Ile ","ATA ","0.19 ","Thr ","ACT ","0.40 ", &
387 | "Thr ","ACC ","0.25 ","Thr ","ACA ","0.24 ","Thr ","ACG ","0.11 ","Asn ","AAC ","0.51 ", &
388 | "Asn ","AAT ","0.49 ","Lys ","AAA ","0.47 ","Lys ","AAG ","0.53 ","Met ","ATG ","1.00 ", &
389 | "Val ","GTT ","0.42 ","Val ","GTC ","0.23 ","Val ","GTA ","0.15 ","Val ","GTG ","0.19 ", &
390 | "Ala ","GCT ","0.45 ","Ala ","GCC ","0.26 ","Ala ","GCA ","0.23 ","Ala ","GCG ","0.06 ", &
391 | "Asp ","GAT ","0.58 ","Asp ","GAC ","0.42 ","Gly ","GGT ","0.44 ","Gly ","GGC ","0.14 ", &
392 | "Gly ","GGA ","0.32 ","Gly ","GGG ","0.10 ","Glu ","GAA ","0.57 ","Glu ","GAG ","0.43 "/), (/3,64/) )
393 |
394 | ! SOLUTIONS
395 |
396 | TYPE DNA
397 | CHARACTER(LEN=9999) :: DNAseq='' ! the actual DNA sequence,in ACGT nts
398 | INTEGER :: NumOlaps=0
399 | ! INTEGER :: NumOlaps=0 ! the total number of overlaps
400 | INTEGER :: OlapsPos(999,2) ! the positions of the first and last
401 | ! nucleotides in the overlap
402 | INTEGER(KIND=1) :: NUMseq(9999) ! the nt sequence as numbers (-1,-3,3,1)
403 | INTEGER(KIND=1) :: prot2cod(3333) ! PROTpos to codon (1-64)
404 | INTEGER(KIND=1) :: nt2cod(9999) ! DNApos to codon (1-64) or 0
405 | REAL :: MeltT(999) ! melting temps for the overlaps
406 | REAL :: TScore(999) ! overlap-based score of MeltTm deviance
407 | REAL :: CScore(3333) ! codon-based score of codon frequency
408 | REAL :: LScore(9999) ! nt-based score of oligo length
409 | INTEGER :: RScore(9999) ! nt-based score of repeats
410 | INTEGER :: PScore(9999) ! nt-based score of pattern matching
411 | INTEGER :: MScore(9999) ! nt-based score of potential mispriming
412 | INTEGER :: AScore(9999) ! nt-based score of AT content
413 | INTEGER :: GScore(9999) ! nt-based score of GC content
414 | INTEGER :: FScore(9999) ! nt-based score of gap-fixed positions
415 | REAL :: TotalGScore=0 ! total score for GC content
416 | REAL :: TotalAScore=0 ! total score for AT content
417 | REAL :: TotalLScore=0 ! total score for oligo length
418 | REAL :: TotalCScore=0 ! total score for codons
419 | REAL :: TotalTScore=0 ! total score for temperature
420 | REAL :: TotalRScore=0 ! total score for repeats
421 | REAL :: TotalPScore=0 ! total score for patterns
422 | REAL :: TotalMScore=0 ! total score for mispriming
423 | REAL :: TotalFScore=0 ! total score for gap-fixed positions
424 | REAL :: OverallScore=0 ! Sum of all the total scores
425 | INTEGER :: RN=0 ! number of tandem repeats
426 | INTEGER :: RS1(9999) ! starting position for primary seq
427 | INTEGER :: RS2(9999) ! starting position for secondary seq
428 | INTEGER :: RLn(9999) ! size of repeat (not oligo ends)
429 | ! INTEGER(KIND=1) :: RX(9999) ! direct=1,inverse=-1
430 | INTEGER :: RX(9999) ! direct=1,inverse=-1
431 | INTEGER :: MN=0 ! number of potential misprimes
432 | INTEGER :: M1(9999) ! starting position for potential misprime in prim
433 | INTEGER :: M2(9999) ! starting position for potential misprime in seco
434 | INTEGER :: MX(9999) ! Type of potential misprime (DS,IS,DA,IA)
435 | INTEGER :: MSN=0 ! number of actual misprimes
436 | INTEGER :: MS1(9999) ! starting position for actual misprime in prim
437 | INTEGER :: MS2(9999) ! starting position for actual misprime in seco
438 | INTEGER :: MSX(9999) ! Type of actual misprime (DS,IS,DA,IA)
439 | INTEGER :: MOL(9999) ! overlap the misprime is in
440 | INTEGER :: ntID_GC(9999) ! window of GC content
441 | INTEGER :: ntID_AT(9999) ! window of AT content
442 | INTEGER :: ntID_Tip(9999) ! unique number for Tip matching
443 | INTEGER :: ntID_TipRC(9999) ! unique number for Tip (reverse complement)
444 | INTEGER :: ntID_Rep(9999) ! unique number for Repeat matching
445 | INTEGER :: ntID_RepRC(9999) ! repeat matching (reverse complement)
446 | LOGICAL :: GapFixPos(9999) ! should nt be fixed within a gap?
447 | LOGICAL :: Degen(9999) ! true if the nt is degenerate
448 | INTEGER :: DegenNum(9999) ! numerical index for degenerate sequence (1-11)
449 | END TYPE
450 |
451 | TYPE(DNA) :: CurrDNA
452 | TYPE(DNA) :: StoreDNA
453 | TYPE(DNA) :: BestDNA
454 | TYPE(DNA) :: BestOverlapDNA
455 |
456 | END MODULE dnaworks_data
457 |
--------------------------------------------------------------------------------
/dnaworks_test.f90:
--------------------------------------------------------------------------------
1 | MODULE dnaworks_test
2 |
3 | IMPLICIT NONE
4 | SAVE
5 |
6 | LOGICAL :: TEST0=.FALSE. ! Print TEST0 messages
7 | LOGICAL :: TEST1=.FALSE. ! Print TEST1 messages
8 | LOGICAL :: TEST2=.FALSE. ! Print TEST2 messages
9 | LOGICAL :: TEST3=.FALSE. ! Print TEST3 messages
10 |
11 | END MODULE dnaworks_test
12 |
--------------------------------------------------------------------------------
/email_func.f90:
--------------------------------------------------------------------------------
1 | SUBROUTINE Send_Email
2 |
3 | USE dnaworks_data
4 | USE dnaworks_test
5 | IMPLICIT NONE
6 |
7 | INTEGER,EXTERNAL :: CurrentTimeSeconds
8 | INTEGER :: start
9 | CHARACTER(LEN=1000) :: text
10 | CHARACTER(LEN=500) :: text1,text2
11 |
12 | IF (TEST0) PRINT *,"Send_Email" !TEST0
13 |
14 | WRITE(text1,FMT="(a80,' -s ""DNAWorks Output - ',a80)") MAILPATH,jobname
15 | WRITE(text2,FMT="('"" <',a,' ',a80)") outputfile,email
16 |
17 | text=text1(1:LEN_TRIM(text1))//text2(1:LEN_TRIM(text2))
18 |
19 | ! PRINT *,text
20 |
21 | CALL SYSTEM(text)
22 |
23 | ! The following is a waste of time. It should take about 10 seconds to
24 | ! go through the loop. This should give the program enough time to send out
25 | ! an email.
26 |
27 | start=CurrentTimeSeconds()
28 | DO WHILE (CurrentTimeSeconds()-start.LT.10)
29 | END DO
30 |
31 | END SUBROUTINE Send_Email
32 |
--------------------------------------------------------------------------------
/encoding.f90:
--------------------------------------------------------------------------------
1 | SUBROUTINE Create_ntID_Arrays()
2 | !
3 | ! Create or update (if MutProtPos isn't zero) nucleotide id arrays.
4 | ! Note that INTEGER(KIND=4) can have only 9 digits! INTEGER(KIND=8) can
5 | ! hold 17 digits...
6 | !
7 | ! A ntID array holds an n-digit integer in place of the sequence.
8 | ! A=-1 T=1 C=-3 G=3
9 | !
10 | ! ACGTACGTACGTACGT with a RepLen = 8 would be shown as
11 | ! ........ ntID_Rep(1) = 12341234
12 | ! ........ ntID_Rep(2) = 23412341
13 | ! ........ ntID_Rep(3) = 34123412
14 | ! ........ ntID_Rep(4) = 41234123
15 | !
16 | ! and so on...
17 |
18 | USE dnaworks_data
19 | USE dnaworks_test
20 | IMPLICIT NONE
21 |
22 | INTEGER :: i,j,m,n,a1,a2,b1,b2,t1,t2,fin
23 |
24 | IF (TEST2) PRINT *,'Create_ntID_Arrays'
25 |
26 | IF (MutProtPos.eq.0) THEN
27 | a1=1
28 | a2=DNAlen-MPTip+1
29 | b1=1
30 | b2=DNAlen-RepLen+1
31 | ELSE
32 | a1=(MAX(1,(MutNtPos(1)-MPTip)))
33 | a2=(MIN((DNAlen-MPTip+1),(MutNtPos(MutNtNum)+1)))
34 | b1=(MAX(1,(MutNtPos(1)-RepLen)))
35 | b2=(MIN((DNAlen-RepLen+1),(MutNtPos(MutNtNum)+1)))
36 | END IF
37 |
38 | ! update misprime arrays
39 |
40 | fin=MPTip-1
41 | DO i=a1,a2
42 | CurrDNA%ntID_Tip(i)=0
43 | CurrDNA%ntID_TipRC(i)=0
44 | DO j=0,fin
45 | SELECT CASE(CurrDNA%NUMseq(i+fin-j))
46 | CASE(-1)
47 | CurrDNA%ntID_Tip(i)=CurrDNA%ntID_Tip(i)+(1*(10**j))
48 | CASE(-3)
49 | CurrDNA%ntID_Tip(i)=CurrDNA%ntID_Tip(i)+(2*(10**j))
50 | CASE(3)
51 | CurrDNA%ntID_Tip(i)=CurrDNA%ntID_Tip(i)+(3*(10**j))
52 | CASE(1)
53 | CurrDNA%ntID_Tip(i)=CurrDNA%ntID_Tip(i)+(4*(10**j))
54 | END SELECT
55 | SELECT CASE(CurrDNA%NUMseq(i+j))
56 | CASE(-1)
57 | CurrDNA%ntID_TipRC(i)=CurrDNA%ntID_TipRC(i)+(4*(10**j))
58 | CASE(-3)
59 | CurrDNA%ntID_TipRC(i)=CurrDNA%ntID_TipRC(i)+(3*(10**j))
60 | CASE(3)
61 | CurrDNA%ntID_TipRC(i)=CurrDNA%ntID_TipRC(i)+(2*(10**j))
62 | CASE(1)
63 | CurrDNA%ntID_TipRC(i)=CurrDNA%ntID_TipRC(i)+(1*(10**j))
64 | END SELECT
65 | END DO
66 | END DO
67 |
68 | ! update repeat arrays
69 |
70 | fin=RepLen-1
71 | DO i=b1,b2
72 | CurrDNA%ntID_Rep(i)=0
73 | CurrDNA%ntID_RepRC(i)=0
74 | DO j=0,fin
75 | SELECT CASE(CurrDNA%NUMseq(i+fin-j))
76 | CASE(-1)
77 | CurrDNA%ntID_Rep(i)=CurrDNA%ntID_Rep(i)+(1*(10**j))
78 | CASE(-3)
79 | CurrDNA%ntID_Rep(i)=CurrDNA%ntID_Rep(i)+(2*(10**j))
80 | CASE(3)
81 | CurrDNA%ntID_Rep(i)=CurrDNA%ntID_Rep(i)+(3*(10**j))
82 | CASE(1)
83 | CurrDNA%ntID_Rep(i)=CurrDNA%ntID_Rep(i)+(4*(10**j))
84 | END SELECT
85 | SELECT CASE(CurrDNA%NUMseq(i+j))
86 | CASE(-1)
87 | CurrDNA%ntID_RepRC(i)=CurrDNA%ntID_RepRC(i)+(4*(10**j))
88 | CASE(-3)
89 | CurrDNA%ntID_RepRC(i)=CurrDNA%ntID_RepRC(i)+(3*(10**j))
90 | CASE(3)
91 | CurrDNA%ntID_RepRC(i)=CurrDNA%ntID_RepRC(i)+(2*(10**j))
92 | CASE(1)
93 | CurrDNA%ntID_RepRC(i)=CurrDNA%ntID_RepRC(i)+(1*(10**j))
94 | END SELECT
95 | END DO
96 | END DO
97 |
98 | ! update GC array
99 |
100 | fin=RepLen-1
101 | DO i=b1,b2
102 | CurrDNA%ntID_GC(i)=0
103 | DO j=(i+0),(i+fin)
104 | IF (ABS(CurrDNA%NUMseq(j)).eq.1) CurrDNA%ntID_GC(i)=CurrDNA%ntID_GC(i)+1
105 | END DO
106 | END DO
107 |
108 | ! update AT array
109 |
110 | fin=RepLen-1
111 | DO i=b1,b2
112 | CurrDNA%ntID_AT(i)=0
113 | DO j=(i+0),(i+fin)
114 | IF (ABS(CurrDNA%NUMseq(j)).eq.3) CurrDNA%ntID_AT(i)=CurrDNA%ntID_AT(i)+1
115 | END DO
116 | END DO
117 |
118 | END SUBROUTINE Create_ntID_Arrays
119 | SUBROUTINE Sort_Misprime_Arrays()
120 |
121 | USE dnaworks_data
122 | USE dnaworks_test
123 | IMPLICIT NONE
124 |
125 | INTEGER :: i,j,k
126 |
127 | IF (TEST2) PRINT *,"Sort_Misprime_Arrays" !TEST2
128 |
129 | ! Sort misprime pairs
130 |
131 | DO i=1,CurrDNA%MN-1 ! integer sort
132 | DO j=i+1,CurrDNA%MN
133 | IF (CurrDNA%M1(i).gt.CurrDNA%M1(j)) THEN
134 | CALL IntSwap(CurrDNA%M1(i),CurrDNA%M1(j))
135 | CALL IntSwap(CurrDNA%M2(i),CurrDNA%M2(j))
136 | CALL IntSwap(CurrDNA%MX(i),CurrDNA%MX(j))
137 | END IF
138 | END DO
139 | END DO
140 |
141 | END SUBROUTINE Sort_Misprime_Arrays
142 | SUBROUTINE Sort_Repeat_Arrays
143 |
144 | USE dnaworks_data
145 | USE dnaworks_test
146 | IMPLICIT NONE
147 |
148 | INTEGER :: i,j,k
149 |
150 | IF (TEST2) PRINT *,"Sort_Repeat_Arrays" !TEST2
151 |
152 | ! Rearrange repeat pairs
153 |
154 | DO i=1,CurrDNA%RN
155 | IF (CurrDNA%RS1(i).gt.CurrDNA%RS2(i)) THEN
156 | CALL IntSwap(CurrDNA%RS1(i),CurrDNA%RS2(i))
157 | END IF
158 | END DO
159 |
160 | ! Sort repeat pairs
161 |
162 | DO i=1,CurrDNA%RN-1 ! integer sort
163 | DO j=i+1,CurrDNA%RN
164 | IF (CurrDNA%RS1(i).gt.CurrDNA%RS1(j)) THEN
165 | CALL IntSwap(CurrDNA%RS1(i),CurrDNA%RS1(j))
166 | CALL IntSwap(CurrDNA%RS2(i),CurrDNA%RS2(j))
167 | CALL IntSwap(CurrDNA%RLn(i),CurrDNA%RLn(j))
168 | CALL IntSwap(CurrDNA%RX(i),CurrDNA%RX(j))
169 | END IF
170 | END DO
171 | END DO
172 |
173 | END SUBROUTINE Sort_Repeat_Arrays
174 | SUBROUTINE Translate_Protein
175 | !
176 | ! Translate the mutatable protein residues into DNA sequence
177 |
178 | USE dnaworks_data
179 | USE dnaworks_test
180 | IMPLICIT NONE
181 |
182 | INTEGER :: i,k,p,d,x
183 | REAL :: rand
184 | LOGICAL :: no_codons
185 | INTEGER,EXTERNAL :: NT2Int
186 | CHARACTER(LEN=3) :: tempCodonSeq
187 |
188 | IF (TEST0) PRINT *,"Translate_Protein" !TEST0
189 |
190 | ! Reset MutProtPos
191 |
192 | MutProtPos=0
193 |
194 | IF (.not.SequenceTranslated) THEN ! avoid the first time
195 | main: DO i=1,mutPROTnum
196 | p=mutPROT2prot(i)
197 | d=prot2nt(p)
198 |
199 | ! Choose the codon randomly unless the codon is not allowed
200 |
201 | k = 1
202 | IF (CodonRandom) THEN
203 | CALL RANDOM_NUMBER(rand)
204 | k=(INT(rand*(AAT(prot2aa(p))%NumOfActiveCodons)))+1
205 | END IF
206 | tempCodonSeq=CFT(AAT(prot2aa(p))%Codon(k))%Seq
207 |
208 | ! Create the codon and insert it into the DNA sequence.
209 |
210 | ! If the chain is reversed, put in reverse complement
211 |
212 | IF (ChainReverse(prot2chain(p))) CALL RevComplStr(tempCodonSeq)
213 | CurrDNA%DNAseq(d-1:d+1)=tempCodonSeq
214 |
215 | ! Fill prot2cod array
216 |
217 | CurrDNA%prot2cod(p) = AAT(prot2aa(p))%Codon(k)
218 | CurrDNA%nt2cod(d) = AAT(prot2aa(p))%Codon(k)
219 |
220 | ! Fill the numerical sequence array
221 |
222 | CurrDNA%NUMseq(d-1)=NT2Int(CurrDNA%DNAseq(d-1:d-1))
223 | CurrDNA%NUMseq(d)=NT2Int(CurrDNA%DNAseq(d:d))
224 | CurrDNA%NUMseq(d+1)=NT2Int(CurrDNA%DNAseq(d+1:d+1))
225 |
226 | END DO main
227 | SequenceTranslated=.TRUE.
228 | END IF
229 |
230 | END SUBROUTINE Translate_Protein
231 |
--------------------------------------------------------------------------------
/misc_func.f90:
--------------------------------------------------------------------------------
1 | SUBROUTINE Fix_Degenerates
2 | !
3 | ! Fix degenerate sequences to A,C,G or T
4 |
5 | USE dnaworks_data
6 | USE dnaworks_test
7 | IMPLICIT NONE
8 |
9 | INTEGER :: i,j,k
10 | REAL :: rand
11 |
12 | IF (TEST0) PRINT *,"Fix_Degenerates" !TEST0
13 |
14 | DO i=1,NumDegPos
15 |
16 | ! Choose a nt at random from possible
17 | CALL RANDOM_NUMBER(rand)
18 | j = CurrDNA%DegenNum(DegPos(i)) ! index for degenerate sequence
19 | k=(INT(rand*(DegenSeq(j)%NumOfNT)))+1 ! choice for index
20 | CurrDNA%DNAseq(DegPos(i):DegPos(i)) = DegenSeq(j)%Seq(k) ! assign seq
21 | CurrDNA%NumSeq(DegPos(i)) = DegenSeq(j)%NumSeq(k) ! assign NumSeq
22 |
23 | END DO
24 |
25 | END SUBROUTINE Fix_Degenerates
26 | SUBROUTINE IntSwap(firstelement,lastelement)
27 |
28 | ! Swap integers
29 |
30 | USE dnaworks_test
31 | IMPLICIT NONE
32 |
33 | INTEGER :: firstelement,lastelement,dummy
34 |
35 | IF (TEST3) PRINT *,"IntSwap" !TEST3
36 |
37 | dummy=firstelement
38 | firstelement=lastelement
39 | lastelement=dummy
40 |
41 | END SUBROUTINE IntSwap
42 | SUBROUTINE RealSwap(firstelement,lastelement)
43 |
44 | ! Swap real numbers
45 |
46 | USE dnaworks_test
47 | IMPLICIT NONE
48 |
49 | REAL :: firstelement,lastelement,dummy
50 |
51 | IF (TEST3) PRINT *,"RealSwap" !TEST3
52 |
53 | dummy=firstelement
54 | firstelement=lastelement
55 | lastelement=dummy
56 |
57 | END SUBROUTINE RealSwap
58 | SUBROUTINE Revert_Degenerates
59 | !
60 | ! Revert degenerate sequences back to original
61 |
62 | USE dnaworks_data
63 | USE dnaworks_test
64 | IMPLICIT NONE
65 |
66 | INTEGER :: i,j
67 |
68 | IF (TEST0) PRINT *,"Revert_Degenerates" !TEST0
69 |
70 | DO i=1,NumDegPos
71 |
72 | j = CurrDNA%DegenNum(DegPos(i)) ! index for degenerate sequence at that position
73 | CurrDNA%DNAseq(DegPos(i):DegPos(i)) = DegenSeq(j)%DegNT ! assign seq
74 |
75 | END DO
76 |
77 | END SUBROUTINE Revert_Degenerates
78 |
--------------------------------------------------------------------------------
/mutate.f90:
--------------------------------------------------------------------------------
1 | SUBROUTINE Find_Mut_Pot_Misprimes()
2 | !
3 | ! This is a position dependent replacement for Find_Potential_Misprimes.
4 |
5 | USE dnaworks_data
6 | USE dnaworks_test
7 | IMPLICIT NONE
8 |
9 | INTEGER :: i,j,start,finish
10 | LOGICAL,EXTERNAL :: HMatchNum
11 |
12 | IF (TEST2) PRINT *,"Find_Mut_Pot_Misprimes" !TEST2
13 |
14 | ! Get rid of potential misprimes in the current mutant range
15 |
16 | CALL Decrement_Misprime_Arrays
17 |
18 | ! Make sure the search doesn't go beyond the possible ranges
19 |
20 | start=MAX(1,MutNtPos(1)-MPLn)
21 | finish=MIN((MutNtPos(MutNtNum)+1),(DNAlen-MPLn+1))
22 |
23 | ! If MutNtPos(1) <= MPLn+1, only run the second half-search
24 |
25 | IF (MutNtPos(1).gt.(MPLn+1)) THEN
26 |
27 | ! First half-search
28 |
29 | DO i=1,MutNtPos(1)-MPLn-1
30 | DO j=MutNtPos(1)-MPLn,finish
31 | IF (HMatchNum(i,j,1)) THEN
32 | IF (CurrDNA%ntID_Tip(i+MPLn-MPTip).eq.CurrDNA%ntID_Tip(j+MPLn-MPTip)) &
33 | CALL Increment_Misprime_Arrays(i,j,1)
34 | IF (CurrDNA%ntID_Tip(i).eq.CurrDNA%ntID_Tip(j)) &
35 | CALL Increment_Misprime_Arrays(i,j,4)
36 | END IF
37 | IF (HMatchNum(i,j,-1)) THEN
38 | IF (CurrDNA%ntID_Tip(i).eq.CurrDNA%ntID_TipRC(j+MPLn-MPTip)) &
39 | CALL Increment_Misprime_Arrays(i,j,2)
40 | IF (CurrDNA%ntID_Tip(i+MPLn-MPTip).eq.CurrDNA%ntID_TipRC(j)) &
41 | CALL Increment_Misprime_Arrays(i,j,3)
42 | END IF
43 | END DO
44 | END DO
45 | END IF
46 |
47 | ! Second half-search
48 |
49 | DO i=start,finish
50 | DO j=i,DNAlen-MPLn+1
51 | IF (HMatchNum(i,j,1)) THEN
52 | IF (CurrDNA%ntID_Tip(i+MPLn-MPTip).eq.CurrDNA%ntID_Tip(j+MPLn-MPTip)) &
53 | CALL Increment_Misprime_Arrays(i,j,1)
54 | IF (CurrDNA%ntID_Tip(i).eq.CurrDNA%ntID_Tip(j)) &
55 | CALL Increment_Misprime_Arrays(i,j,4)
56 | END IF
57 | IF (HMatchNum(i,j,-1)) THEN
58 | IF (CurrDNA%ntID_Tip(i).eq.CurrDNA%ntID_TipRC(j+MPLn-MPTip)) &
59 | CALL Increment_Misprime_Arrays(i,j,2)
60 | IF (CurrDNA%ntID_Tip(i+MPLn-MPTip).eq.CurrDNA%ntID_TipRC(j)) &
61 | CALL Increment_Misprime_Arrays(i,j,3)
62 | END IF
63 | END DO
64 | END DO
65 |
66 | CALL Sort_Misprime_Arrays
67 |
68 | END SUBROUTINE Find_Mut_Pot_Misprimes
69 | SUBROUTINE Find_Mutated_Repeats()
70 |
71 | USE dnaworks_data
72 | USE dnaworks_test
73 | IMPLICIT NONE
74 |
75 | INTEGER :: i,j,start,finish
76 | LOGICAL,EXTERNAL :: PairWithinKnownRepeat
77 |
78 | IF (TEST2) PRINT *,"Find_Mutated_Repeats" !TEST2
79 |
80 | CALL Decrement_Repeat_Arrays
81 |
82 | ! Make sure the search doesn't go beyond the possible ranges
83 |
84 | start=MAX(1,MutNtPos(1)-RepLen)
85 | finish=MIN((MutNtPos(MutNtNum)+1),(DNAlen-RepLen+1))
86 |
87 | ! If MutNtPos(1) <= RepLen+1, only run the second half-search
88 |
89 | IF (MutNtPos(1).gt.(RepLen+1)) THEN
90 |
91 | ! First half-search
92 |
93 | DO i=1,(MutNtPos(1)-RepLen-1)
94 | DO j=(MutNtPos(1)-RepLen),finish
95 |
96 | ! Direct repeat search
97 |
98 | IF (i.ne.j) THEN
99 | IF (.not.PairWithinKnownRepeat(i,j,1)) THEN
100 | IF (CurrDNA%ntID_Rep(i).eq.CurrDNA%ntID_Rep(j)) &
101 | CALL Increment_Repeat_Arrays(i,j,1)
102 | END IF
103 | END IF
104 |
105 | ! Inverse repeat search
106 |
107 | IF (.not.PairWithinKnownRepeat(i,j,-1)) THEN
108 | IF (CurrDNA%ntID_Rep(i).eq.CurrDNA%ntID_RepRC(j)) &
109 | CALL Increment_Repeat_Arrays(i,j,-1)
110 | END IF
111 | END DO
112 | END DO
113 | END IF
114 |
115 | ! Second half-search
116 |
117 | DO i=start,finish
118 | DO j=i,DNAlen-RepLen+1
119 |
120 | ! Direct repeat search
121 |
122 | IF (i.ne.j) THEN
123 | IF (.not.PairWithinKnownRepeat(i,j,1)) THEN
124 | IF (CurrDNA%ntID_Rep(i).eq.CurrDNA%ntID_Rep(j)) &
125 | CALL Increment_Repeat_Arrays(i,j,1)
126 | END IF
127 | END IF
128 |
129 | ! Inverse repeat search
130 |
131 | IF (.not.PairWithinKnownRepeat(i,j,-1)) THEN
132 | IF (CurrDNA%ntID_Rep(i).eq.CurrDNA%ntID_RepRC(j)) &
133 | CALL Increment_Repeat_Arrays(i,j,-1)
134 | END IF
135 |
136 | END DO
137 | END DO
138 |
139 | CALL Sort_Repeat_Arrays
140 |
141 | END SUBROUTINE Find_Mutated_Repeats
142 | SUBROUTINE Mutate_Sequence
143 | !
144 | ! Mutate a single codon to an alternate codon. The residue choice is
145 | ! determined in Mutate_Wheel, and the codon choice is made here.
146 |
147 | USE dnaworks_data
148 | USE dnaworks_test
149 | IMPLICIT NONE
150 |
151 | INTEGER :: i,j,choice
152 | INTEGER :: AAN ! amino acid choice (1-21)
153 | ! INTEGER :: refCN ! old codon number (1-64)
154 | INTEGER :: i1,i2,i3 ! nt positions of choice
155 | REAL :: rand
156 | ! CHARACTER(LEN=3) :: refCOD ! old codon sequence
157 |
158 | IF (TEST1) PRINT *,"Mutate_Sequence"
159 |
160 | ! Generate XScores
161 |
162 | CALL Equalize_Scores
163 |
164 | ! Determine position to mutate
165 |
166 | CALL Mutate_Wheel
167 |
168 | ! Determine what amino acid exists at the selected residue MutProtPos.
169 | ! AAN is a number between 1 and 21, corresponding to a specific amino acid
170 |
171 | AAN=prot2aa(MutProtPos)
172 |
173 | ! Determine the nt positions and the actual codon sequence for that residue
174 |
175 | i1=prot2nt(MutProtPos)-1
176 | i2=prot2nt(MutProtPos)
177 | i3=prot2nt(MutProtPos)+1
178 | !
179 | ! randomly choose codon and make sure it's different and available for that AAN
180 |
181 | IF (AAT(AAN)%NumOfActiveCodons.eq.2) THEN
182 | choice=1
183 | IF (AAT(AAN)%Codon(choice).eq.CurrDNA%prot2cod(MutProtPos)) choice=2
184 | ELSE
185 | choose: DO i=1,1000
186 | CALL RANDOM_NUMBER(rand)
187 | choice=(INT(rand*(AAT(AAN)%NumOfActiveCodons)))+1
188 | IF (AAT(AAN)%Codon(choice).ne.CurrDNA%prot2cod(MutProtPos)) EXIT choose
189 | END DO choose
190 | END IF
191 |
192 | ! update the arrays and change the DNA sequence, then quit
193 |
194 | CurrDNA%prot2cod(MutProtPos)=AAT(AAN)%Codon(choice)
195 | CurrDNA%nt2cod(i2) = AAT(AAN)%Codon(choice)
196 |
197 | IF (ChainReverse(prot2chain(MutProtPos))) THEN
198 | CurrDNA%DNAseq(i1:i3)=CFT(AAT(AAN)%Codon(choice))%SeqRC
199 | CurrDNA%NUMseq(i1)=CFT(AAT(AAN)%Codon(choice))%numRC(1)
200 | CurrDNA%NUMseq(i2)=CFT(AAT(AAN)%Codon(choice))%numRC(2)
201 | CurrDNA%NUMseq(i3)=CFT(AAT(AAN)%Codon(choice))%numRC(3)
202 | ELSE
203 | CurrDNA%DNAseq(i1:i3)=CFT(AAT(AAN)%Codon(choice))%Seq
204 | CurrDNA%NUMseq(i1)=CFT(AAT(AAN)%Codon(choice))%num(1)
205 | CurrDNA%NUMseq(i2)=CFT(AAT(AAN)%Codon(choice))%num(2)
206 | CurrDNA%NUMseq(i3)=CFT(AAT(AAN)%Codon(choice))%num(3)
207 | END IF
208 |
209 | ! Set the MutNtPos values
210 |
211 | MutNtNum=0
212 | MutNtPos(1)=0
213 | MutNtPos(2)=0
214 | MutNtPos(3)=0
215 |
216 | IF (CurrDNA%NUMseq(i1).ne.StoreDNA%NUMseq(i1)) THEN
217 | MutNtNum=MutNtNum+1
218 | MutNtPos(MutNtNum)=i1
219 | END IF
220 | IF (CurrDNA%NUMseq(i2).ne.StoreDNA%NUMseq(i2)) THEN
221 | MutNtNum=MutNtNum+1
222 | MutNtPos(MutNtNum)=i2
223 | END IF
224 | IF (CurrDNA%NUMseq(i3).ne.StoreDNA%NUMseq(i3)) THEN
225 | MutNtNum=MutNtNum+1
226 | MutNtPos(MutNtNum)=i3
227 | END IF
228 |
229 | SequenceTranslated=.TRUE.
230 |
231 | END SUBROUTINE Mutate_Sequence
232 | SUBROUTINE Mutate_Wheel
233 | !
234 | ! Choose a position to mutate based on the score of mutatable codons
235 |
236 | USE dnaworks_data
237 | USE dnaworks_test
238 | IMPLICIT NONE
239 |
240 | INTEGER :: i,j,k
241 | REAL :: rand
242 | REAL :: ZScore(3333) ! an accumulated codon-based overall score
243 | REAL :: choice
244 |
245 | IF (TEST1) PRINT *,"Mutate_Wheel"
246 |
247 | ! Generate ZScore array
248 |
249 | ZScore(1)=XScore(1)
250 |
251 | ! If there are more than one codon to be mutated,
252 |
253 | IF (mutPROTnum.gt.1) THEN
254 |
255 | DO i=2,mutPROTnum
256 | ZScore(i)=ZScore(i-1)+XScore(i)
257 | END DO
258 |
259 | ! Pick a random number between 0 and the sum of all the xScore values.
260 |
261 | CALL RANDOM_NUMBER(rand)
262 | choice=rand*ZScore(mutPROTnum)
263 |
264 | ! Find the codon that corresponds to this number, assign to MutProtPos
265 |
266 | inner: DO j=1,mutPROTnum
267 | IF (ZScore(j).ge.choice) THEN
268 | MutProtPos=mutPROT2prot(j)
269 | EXIT inner
270 | END IF
271 | END DO inner
272 | ELSE
273 |
274 | ! Otherwise, just choose the first codon
275 |
276 | MutProtPos=mutPROT2prot(1)
277 |
278 | END IF
279 |
280 | END SUBROUTINE Mutate_Wheel
281 |
--------------------------------------------------------------------------------
/overlaps.f90:
--------------------------------------------------------------------------------
1 | INTEGER FUNCTION ForOlap(first)
2 |
3 | USE dnaworks_data
4 | USE dnaworks_test
5 | IMPLICIT NONE
6 |
7 | INTEGER :: first,last ! positions in DNAseq
8 | REAL,EXTERNAL :: TmCalc
9 | REAL :: diff,diff_lo,diff_hi,diff2
10 | LOGICAL :: done
11 | INTEGER :: shift
12 |
13 | IF (TEST3) PRINT *,"ForOlap" !TEST3
14 |
15 | done=.FALSE.
16 | shift=32
17 |
18 | last=first+shift
19 | shift=shift/2
20 |
21 | loop: DO WHILE (.not.done)
22 | IF (last.ge.DNAlen) THEN
23 | last=last-shift
24 | ELSE
25 | diff=MeltTemp-(TmCalc(first,last))
26 | IF (ABS(diff).gt.MeltTol) THEN
27 | IF (diff.gt.0) THEN
28 | last=last+shift
29 | ELSE
30 | last=last-shift
31 | END IF
32 | ELSE
33 | done=.TRUE.
34 | END IF
35 | END IF
36 | shift=shift/2
37 | IF (shift.le.1) EXIT loop
38 | END DO loop
39 |
40 | ! For the final step, determine which of the final two positions is best
41 |
42 | IF (.not.done) THEN
43 | IF (last.le.(DNAlen-1)) THEN
44 | diff=MeltTemp-(TmCalc(first,last))
45 | IF (diff.gt.0) THEN
46 | shift=1
47 | ELSE
48 | shift=-1
49 | END IF
50 | last=last+shift
51 | diff2=MeltTemp-(TmCalc(first,last))
52 | IF (ABS(diff).lt.ABS(diff2)) last=last-shift
53 | END IF
54 | IF ((DNAlen-last).le.2) last=DNAlen
55 | END IF
56 |
57 | ForOlap = last
58 |
59 | END FUNCTION ForOlap
60 | SUBROUTINE Generate_Overlaps(SolutionNo)
61 | !
62 | ! The nucleotide sequence is broken into overlaps of around 20 nucleotides
63 | ! each, depending on the calculated Tm. The set of potential oligos is
64 | ! then analyzed and the best trial is kept. A gap is allowed between
65 | ! overlaps to give oligos of size oligoLen.
66 | !
67 | ! The structure of the overlap array is as follows
68 | !
69 | ! 1,1 1,2 2,1 2,2 3,1 3,2 4,1 4,2 5,1 5,2
70 | ! ................ .................... ..................
71 | ! ......................... ...................... ..........
72 | ! ------- --------- ------ ------ -------
73 | !
74 | ! OVERLAP: 1 2 3 4 5
75 | !
76 |
77 | USE dnaworks_data
78 | USE dnaworks_test
79 | IMPLICIT NONE
80 |
81 | INTEGER :: i,j,k,l,m
82 | INTEGER :: first ! first nt of the overlap
83 | INTEGER :: last=1 ! last nt of the overlap
84 | INTEGER :: SolutionNo ! current solution number
85 | INTEGER :: shift ! number of nt before first overlap
86 | INTEGER :: reset ! first overlap size
87 | REAL :: rand
88 | INTEGER :: olength ! the number of nt to skip ahead
89 | LOGICAL :: changed ! true if CurrDNA%OverallScore.lt.BestOverlapDNA%OverallScore
90 |
91 | IF (TEST1) PRINT *,"Generate_Overlaps"
92 |
93 | changed=.FALSE.
94 |
95 | CALL Fix_Degenerates ! pin down degenerate sequences
96 |
97 | IF (.not.MutantRun) THEN
98 |
99 | generate: DO k=1,10000
100 |
101 | ! initialize the nt2overlap array
102 |
103 | shift=0
104 | last=1
105 | changed=.FALSE.
106 |
107 | DO i=1,DNAlen
108 | nt2overlap=0
109 | END DO
110 |
111 | BestOverlapDNA = CurrDNA ! initialize BestOverlapDNA values
112 | BestOverlapDNA%OverallScore = 9999
113 |
114 | DO i=1,999 ! initialize the arrays
115 | CurrDNA%OlapsPos(i,1)=0
116 | CurrDNA%OlapsPos(i,2)=0
117 | END DO
118 | CurrDNA%NumOlaps=0
119 |
120 | ! Determine the new CurrDNA%OlapsPos values
121 |
122 | outer: DO i=1,1000 ! keep shifting
123 |
124 | IF (NOGAPS) THEN
125 | olength = 0
126 | ELSE
127 | IF (OligoLenRandom) THEN
128 | CALL RANDOM_NUMBER(rand)
129 | rand = (rand*(OligoLen-20))
130 | olength = (INT(rand))+20 ! randomize oligo length
131 | ELSE
132 | olength = OligoLen
133 | END IF
134 | END IF
135 |
136 | CurrDNA%NumOlaps=0 ! initialize the number of overlaps
137 | first=1+shift
138 | CALL Make_Olap(first,last)
139 | IF ((shift.gt.0).and.(CurrDNA%OlapsPos(1,2).ge.OligoLen)) EXIT outer
140 | last=first+olength-1
141 | first=last-7 ! the minimal overlap size is 7
142 | IF (first.le.CurrDNA%OlapsPos(1,2)) THEN
143 | first = CurrDNA%OlapsPos(1,2)+1
144 | last = first+7
145 | END IF
146 |
147 | inner: DO j=1,999
148 |
149 | IF (NOGAPS) THEN
150 | olength = 0
151 | ELSE
152 | IF (OligoLenRandom) THEN
153 | CALL RANDOM_NUMBER(rand)
154 | rand = (rand*(OligoLen-20))
155 | olength = (INT(rand))+20 ! randomize oligo length
156 | ELSE
157 | olength = OligoLen
158 | END IF
159 | END IF
160 |
161 | IF (last.ge.DNAlen) EXIT inner
162 | CALL Make_Olap(first,last)
163 | last=first+olength-1
164 | first=last-7
165 | IF (first.le.CurrDNA%OlapsPos(CurrDNA%NumOlaps,2)) THEN
166 | first = CurrDNA%OlapsPos(CurrDNA%NumOlaps,2)+1
167 | last = first+7
168 | END IF
169 | END DO inner
170 |
171 | shift=shift+1
172 |
173 | IF ((MOD(CurrDNA%NumOlaps,2)).eq.0) THEN
174 | CYCLE outer
175 | END IF
176 |
177 | CALL Evaluate_Scores
178 |
179 | IF (CurrDNA%OverallScore.lt.BestOverlapDNA%OverallScore) THEN
180 | BestOverlapDNA = CurrDNA
181 | changed=.TRUE.
182 | END IF
183 |
184 | END DO outer
185 |
186 | CurrDNA=BestOverlapDNA ! revert to best solution
187 |
188 | IF (MOD(CurrDNA%NumOlaps,2).eq.1) THEN
189 | EXIT generate
190 | ELSE
191 | IF (TEST0) PRINT *,k,"EVEN OVERLAPS" !TEST0
192 |
193 | ! Take drastic action to get optimization moving
194 |
195 | IF ((MOD(k,200)).eq.0) THEN
196 | OligoLenLo=OligoLenLo+1
197 | OligoLenHi=OligoLenHi+1
198 | OligoLen=OligoLen+1
199 | WRITE(UNIT=console,FMT="('')")
200 | WRITE(UNIT=outputnum,FMT="('')")
201 | WRITE(UNIT=console,FMT="(' Too many sets of even overlaps -- increasing oligo length to',i4)") OligoLen
202 | WRITE(UNIT=outputnum,FMT="(' Too many sets of even overlaps -- increasing oligo length to',i4)") OligoLen
203 | END IF
204 | END IF
205 | END DO generate
206 |
207 | END IF
208 |
209 | ! Assign the nt2overlap array
210 |
211 | DO i=1,DNAlen
212 | DO j=1,CurrDNA%NumOlaps
213 | IF (i.ge.CurrDNA%OlapsPos(j,1).and.i.le.CurrDNA%OlapsPos(j,2)) THEN
214 | nt2overlap(i)=j
215 | END IF
216 | END DO
217 | END DO
218 |
219 | IF (.not.changed) CALL Evaluate_Scores ! in case the CurrDNA is never better than BestOverlapDNA
220 |
221 | FinalScore(SolutionNo)%InitScore=CurrDNA%OverallScore
222 |
223 | END SUBROUTINE Generate_Overlaps
224 | SUBROUTINE Make_Olap(first,last)
225 | !
226 | ! Simplifies the process of finding overlaps. The OlapsPos and MeltT values
227 | ! are recorded in this subroutine for each overlap. It also automates the
228 | ! decision making about forward or reverse methods of generating overlaps.
229 |
230 | USE dnaworks_data
231 | USE dnaworks_test
232 | IMPLICIT NONE
233 |
234 | INTEGER :: first,last
235 | INTEGER,EXTERNAL :: ForOlap
236 | INTEGER,EXTERNAL :: RevOlap
237 | REAL,EXTERNAL :: TmCalc
238 |
239 | IF (TEST2) PRINT *,"Make_Olap" !TEST2
240 |
241 | IF (CurrDNA%NumOlaps.eq.0) THEN
242 |
243 | last=ForOlap(first)
244 |
245 | CurrDNA%NumOlaps=1
246 | CurrDNA%OlapsPos(1,1)=first
247 | CurrDNA%OlapsPos(1,2)=last
248 | CurrDNA%MeltT(1)=TmCalc(first,last)
249 |
250 | ! PRINT *,first,last,CurrDNA%MeltT(1)
251 | ELSE
252 |
253 | first=RevOlap(last)
254 |
255 | IF (first.le.CurrDNA%OlapsPos(CurrDNA%NumOlaps,2)) THEN
256 | first = CurrDNA%OlapsPos(CurrDNA%NumOlaps,2)+1
257 | last = ForOlap(first)
258 | END IF
259 |
260 | IF (last.lt.DNAlen) THEN
261 | CurrDNA%NumOlaps=CurrDNA%NumOlaps+1
262 | CurrDNA%OlapsPos(CurrDNA%NumOlaps,1)=first
263 | CurrDNA%OlapsPos(CurrDNA%NumOlaps,2)=last
264 | CurrDNA%MeltT(CurrDNA%NumOlaps)=TmCalc(first,last)
265 | END IF
266 | END IF
267 |
268 | END SUBROUTINE Make_Olap
269 | INTEGER FUNCTION RevOlap(last)
270 |
271 | USE dnaworks_data
272 | USE dnaworks_test
273 | IMPLICIT NONE
274 |
275 | INTEGER :: first,last ! positions in DNAseq
276 | REAL,EXTERNAL :: TmCalc
277 | REAL :: diff,diff_lo,diff_hi,diff2
278 | LOGICAL :: done
279 | INTEGER :: shift
280 |
281 | IF (TEST3) PRINT *,"RevOlap" !TEST3
282 |
283 | shift=32
284 | done=.FALSE.
285 |
286 | first=last-shift
287 | shift=shift/2
288 |
289 | loop: DO WHILE (.not.done)
290 | IF (first.le.1) THEN
291 | first=first+shift
292 | ELSE
293 | diff=MeltTemp-(TmCalc(first,last))
294 | IF (ABS(diff).gt.MeltTol) THEN
295 | IF (diff.gt.0) THEN
296 | first=first-shift
297 | ELSE
298 | first=first+shift
299 | END IF
300 | ELSE
301 | done=.TRUE.
302 | END IF
303 | END IF
304 | shift=shift/2
305 | IF (shift.le.1) EXIT loop
306 | END DO loop
307 |
308 | ! For the final step, determine which of the final two positions is best
309 |
310 | IF (.not.done) THEN
311 | IF (first.ge.2) THEN
312 | diff=MeltTemp-(TmCalc(first,last))
313 | IF (diff.gt.0) THEN
314 | shift=-1
315 | ELSE
316 | shift=1
317 | END IF
318 | first=first+shift
319 | diff2=MeltTemp-(TmCalc(first,last))
320 | IF (ABS(diff).lt.ABS(diff2)) first=first-shift
321 | END IF
322 | IF (first.le.2) first=1
323 | END IF
324 |
325 | RevOlap = first
326 |
327 | END FUNCTION RevOlap
328 |
--------------------------------------------------------------------------------
/scores.f90:
--------------------------------------------------------------------------------
1 | SUBROUTINE AT_Score
2 | !
3 | ! Find all the 8 nt windows of solid AT content and update AScore.
4 |
5 | USE dnaworks_data
6 | USE dnaworks_test
7 | IMPLICIT NONE
8 |
9 | INTEGER :: i,j
10 |
11 | IF (TEST1) PRINT *,"AT_Score" !TEST1
12 |
13 | DO i=1,DNAlen
14 | CurrDNA%AScore(i) = 0
15 | END DO
16 |
17 | DO i=1,DNAlen-7
18 | IF (CurrDNA%ntID_AT(i).eq.0) THEN
19 | DO j=i,(i+7)
20 | CurrDNA%AScore(j)=CurrDNA%AScore(j)+1
21 | END DO
22 | END IF
23 | END DO
24 |
25 | CurrDNA%TotalAScore = 0.0 ! Initialize the repeat scores
26 | DO i=1,DNAlen
27 | CurrDNA%TotalAScore=CurrDNA%TotalAScore+CurrDNA%AScore(i)
28 | END DO
29 | CurrDNA%TotalAScore=CurrDNA%TotalAScore*20/DNAlen
30 |
31 | END SUBROUTINE AT_Score
32 | SUBROUTINE Average_Evaluate_Scores()
33 | !
34 | ! This subroutine determines the average scores for the current sequence, each time
35 | ! changing the degenerate sequences. It updates
36 | ! TScore, CScore, RScore, and PScore arrays, the Total*Score values.
37 |
38 | USE dnaworks_data
39 | USE dnaworks_test
40 | IMPLICIT NONE
41 |
42 | INTEGER :: i
43 | REAL :: dC, dL, dT, dR, dM, dG, dA, dF, dP, dTotal
44 |
45 | IF (TEST1) PRINT *,"Average_Evaluate_Scores" !TEST1
46 |
47 | dC=0
48 | dL=0
49 | dT=0
50 | dR=0
51 | dM=0
52 | dG=0
53 | dA=0
54 | dF=0
55 | dP=0
56 | dTotal=0
57 |
58 | DO i=1,NumDegPos*10
59 | CALL Fix_Degenerates
60 | CALL Create_ntID_Arrays
61 | CALL Temp_Score ! TScore(i) based on olaps
62 | CALL Misprime_Score ! MScore(i) based on nt
63 | CALL Length_Score ! LScore(i) based on nt
64 | CALL GapFix_Score ! FScore(i) based on nt
65 | IF (ScoreCodons) CALL Codon_Score ! CScore(i) based on codons
66 | CALL Repeat_Score ! RScore(i) based on nt
67 | CALL GC_Score ! GScore(i) based on nt
68 | CALL AT_Score ! AScore(i) based on nt
69 | CALL Pattern_Score ! PScore(i) based on nt
70 |
71 | dC=CurrDNA%TotalCScore+dC
72 | dL=CurrDNA%TotalLScore+dL
73 | dT=CurrDNA%TotalTScore+dT
74 | dR=CurrDNA%TotalRScore+dR
75 | dM=CurrDNA%TotalMScore+dM
76 | dG=CurrDNA%TotalGScore+dG
77 | dA=CurrDNA%TotalAScore+dA
78 | dF=CurrDNA%TotalFScore+dF
79 | dP=CurrDNA%TotalPScore+dP
80 | dTotal=dC+dL+dT+dR+dM+dG+dA+dF+dP+dTotal
81 |
82 | END DO
83 |
84 | CurrDNA%TotalCScore=dC/(NumDegPos*10)
85 | CurrDNA%TotalLScore=dL/(NumDegPos*10)
86 | CurrDNA%TotalTScore=dT/(NumDegPos*10)
87 | CurrDNA%TotalRScore=dR/(NumDegPos*10)
88 | CurrDNA%TotalMScore=dM/(NumDegPos*10)
89 | CurrDNA%TotalGScore=dG/(NumDegPos*10)
90 | CurrDNA%TotalAScore=dA/(NumDegPos*10)
91 | CurrDNA%TotalFScore=dF/(NumDegPos*10)
92 | CurrDNA%TotalPScore=dP/(NumDegPos*10)
93 |
94 | CurrDNA%OverallScore = (Cwt*CurrDNA%TotalCScore)+&
95 | (Lwt*CurrDNA%TotalLScore)+&
96 | (Twt*CurrDNA%TotalTScore)+&
97 | (Rwt*CurrDNA%TotalRScore)+&
98 | (Mwt*CurrDNA%TotalMScore)+&
99 | (Gwt*CurrDNA%TotalGScore)+&
100 | (Awt*CurrDNA%TotalAScore)+&
101 | (Fwt*CurrDNA%TotalFScore)+&
102 | (Pwt*CurrDNA%TotalPScore)
103 |
104 | CALL Revert_Degenerates
105 |
106 | END SUBROUTINE Average_Evaluate_Scores
107 | SUBROUTINE Codon_Score
108 | !
109 | ! This subroutine calculates a global score for codons based on frequency.
110 |
111 | USE dnaworks_data
112 | USE dnaworks_test
113 | IMPLICIT NONE
114 |
115 | INTEGER :: i
116 |
117 | IF (TEST1) PRINT *,"Codon_Score" !TEST1
118 |
119 | CurrDNA%TotalCScore=0.0
120 |
121 | IF (MutProtPos.eq.0) THEN
122 | DO i=1,PROTlen
123 | CurrDNA%CScore(i)=(1-(CFT(CurrDNA%prot2cod(i))%Freq/AAT(prot2aa(i))%Freq(1)))**4
124 | END DO
125 | ELSE
126 | CurrDNA%CScore(MutProtPos)=(1-(CFT(CurrDNA%prot2cod(MutProtPos))%Freq/AAT(prot2aa(MutProtPos))%Freq(1)))**4
127 | END IF
128 |
129 | DO i=1,PROTlen
130 | CurrDNA%TotalCScore=CurrDNA%TotalCScore+CurrDNA%CScore(i)
131 | END DO
132 |
133 | CurrDNA%TotalCScore = CurrDNA%TotalCScore/DNAlen
134 |
135 | END SUBROUTINE Codon_Score
136 | SUBROUTINE Decrement_Misprime_Arrays()
137 | !
138 | ! Removes potential misprime pairs within the current mutant range
139 |
140 | USE dnaworks_data
141 | USE dnaworks_test
142 | IMPLICIT NONE
143 |
144 | INTEGER :: i,j,y,ct
145 | INTEGER :: TempM1(9999)
146 | INTEGER :: TempM2(9999)
147 | INTEGER :: TempMX(9999)
148 |
149 | IF (TEST2) PRINT *,"Decrement_Misprime_Arrays" !TEST2
150 |
151 | ct=0
152 | y=MutNtPos(MutNtNum)+1
153 |
154 | IF (CurrDNA%MN.gt.0) THEN
155 | loop: DO i=1,CurrDNA%MN
156 | IF ((((CurrDNA%M1(i)+MPLn).ge.MutNtPos(1)).and.&
157 | (CurrDNA%M1(i).le.y)).or.&
158 | (((CurrDNA%M2(i)+MPLn).ge.MutNtPos(1)).and.&
159 | (CurrDNA%M2(i).le.y))) THEN
160 | CYCLE loop
161 | ELSE
162 | ct=ct+1
163 | TempM1(ct)=CurrDNA%M1(i)
164 | TempM2(ct)=CurrDNA%M2(i)
165 | TempMX(ct)=CurrDNA%MX(i)
166 | END IF
167 | END DO loop
168 |
169 | CurrDNA%MN=ct
170 |
171 | DO i=1,CurrDNA%MN
172 | CurrDNA%M1(i)=TempM1(i)
173 | CurrDNA%M2(i)=TempM2(i)
174 | CurrDNA%MX(i)=TempMX(i)
175 | END DO
176 | END IF
177 |
178 | END SUBROUTINE Decrement_Misprime_Arrays
179 | SUBROUTINE Decrement_Repeat_Arrays()
180 | !
181 | ! Remove repeat pairs and erase scores
182 |
183 | USE dnaworks_data
184 | USE dnaworks_test
185 | IMPLICIT NONE
186 |
187 | INTEGER :: i,j,k,y,ct
188 | INTEGER :: TempRS1(9999)
189 | INTEGER :: TempRS2(9999)
190 | INTEGER :: TempLn(9999)
191 | INTEGER :: TempRX(9999)
192 |
193 | IF (TEST2) PRINT *,"Decrement_Repeat_Arrays" !TEST2
194 |
195 | ct=0
196 | y=MutNtPos(MutNtNum)+1
197 |
198 | IF (CurrDNA%RN.gt.0) THEN
199 | loop: DO i=1,CurrDNA%RN
200 | IF ((((CurrDNA%RS1(i)+CurrDNA%RLn(i)).ge.MutNtPos(1)).and.&
201 | (CurrDNA%RS1(i).le.y)).or.&
202 | (((CurrDNA%RS2(i)+CurrDNA%RLn(i)).ge.MutNtPos(1)).and.&
203 | (CurrDNA%RS2(i).le.y))) THEN
204 | DO k=CurrDNA%RS1(i),(CurrDNA%RS1(i)+CurrDNA%RLn(i)-1)
205 | CurrDNA%RScore(k) = CurrDNA%RScore(k)-1
206 | END DO
207 | DO k=CurrDNA%RS2(i),(CurrDNA%RS2(i)+CurrDNA%RLn(i)-1)
208 | CurrDNA%RScore(k) = CurrDNA%RScore(k)-1
209 | END DO
210 | CYCLE loop
211 | ELSE
212 | ct=ct+1
213 | TempRS1(ct)=CurrDNA%RS1(i)
214 | TempRS2(ct)=CurrDNA%RS2(i)
215 | TempLn(ct)=CurrDNA%RLn(i)
216 | TempRX(ct)=CurrDNA%RX(i)
217 | END IF
218 | END DO loop
219 |
220 | CurrDNA%RN=ct
221 |
222 | DO i=1,CurrDNA%RN
223 | CurrDNA%RS1(i)=TempRS1(i)
224 | CurrDNA%RS2(i)=TempRS2(i)
225 | CurrDNA%RLn(i)=TempLn(i)
226 | CurrDNA%RX(i)=TempRX(i)
227 | END DO
228 | END IF
229 |
230 | END SUBROUTINE Decrement_Repeat_Arrays
231 | LOGICAL FUNCTION DegCmpr(instr,seq)
232 | !
233 | ! This function compares a restriction site in degenerate form with a sequence.
234 | ! It returns .TRUE. if the site matches, and .FALSE. if it does not. The two
235 | ! strings MUST be the same length.
236 | !
237 | ! The function uses the NEB format of nucleotide degeneracy:
238 | !
239 | ! B = C or G or T rev. compl. = V
240 | ! D = A or G or T rev. compl. = H
241 | ! H = A or C or T rev. compl. = D
242 | ! K = G or T rev. compl. = M
243 | ! M = A or C rev. compl. = K
244 | ! N = A or C or G or T rev. compl. = N
245 | ! R = A or G rev. compl. = Y
246 | ! S = C or G rev. compl. = S
247 | ! V = A or C or G rev. compl. = B
248 | ! W = A or T rev. compl. = W
249 | ! Y = C or T rev. compl. = R
250 | !
251 | USE dnaworks_data
252 | USE dnaworks_test
253 | IMPLICIT NONE
254 |
255 | CHARACTER(LEN=100) :: instr,seq
256 | INTEGER :: i
257 | INTEGER :: Slen
258 | INTEGER :: stot
259 |
260 | IF (TEST3) PRINT *,"DegCmpr" !TEST3
261 |
262 | Slen=LEN_TRIM(instr)
263 | DegCmpr=.FALSE.
264 |
265 | stot=0
266 |
267 | ! First, check site in sense orientation
268 |
269 | DO i=1,Slen
270 | SELECT CASE(instr(i:i))
271 | CASE('A')
272 | IF (seq(i:i).EQ.'A') THEN
273 | stot=stot+1 ; ELSE ; EXIT ; END IF
274 | CASE('C')
275 | IF (seq(i:i).EQ.'C') THEN
276 | stot=stot+1 ; ELSE ; EXIT ; END IF
277 | CASE('G')
278 | IF (seq(i:i).EQ.'G') THEN
279 | stot=stot+1 ; ELSE ; EXIT ; END IF
280 | CASE('T')
281 | IF (seq(i:i).EQ.'T') THEN
282 | stot=stot+1 ; ELSE ; EXIT ; END IF
283 | CASE('B')
284 | IF (seq(i:i).EQ.'C'.OR.seq(i:i).EQ.'G'.OR.seq(i:i).EQ.'T') THEN
285 | stot=stot+1 ; ELSE ; EXIT ; END IF
286 | CASE('D')
287 | IF (seq(i:i).EQ.'A'.OR.seq(i:i).EQ.'G'.OR.seq(i:i).EQ.'T') THEN
288 | stot=stot+1 ; ELSE ; EXIT ; END IF
289 | CASE('H')
290 | IF (seq(i:i).EQ.'A'.OR.seq(i:i).EQ.'C'.OR.seq(i:i).EQ.'T') THEN
291 | stot=stot+1 ; ELSE ; EXIT ; END IF
292 | CASE('K')
293 | IF (seq(i:i).EQ.'G'.OR.seq(i:i).EQ.'T') THEN
294 | stot=stot+1 ; ELSE ; EXIT ; END IF
295 | CASE('M')
296 | IF (seq(i:i).EQ.'A'.OR.seq(i:i).EQ.'C') THEN
297 | stot=stot+1 ; ELSE ; EXIT ; END IF
298 | CASE('N')
299 | IF (seq(i:i).EQ.'A'.OR.seq(i:i).EQ.'C'.OR.seq(i:i).EQ.'G'.OR.seq(i:i).EQ.'T') THEN
300 | stot=stot+1 ; ELSE ; EXIT ; END IF
301 | CASE('R')
302 | IF (seq(i:i).EQ.'A'.OR.seq(i:i).EQ.'G') THEN
303 | stot=stot+1 ; ELSE ; EXIT ; END IF
304 | CASE('S')
305 | IF (seq(i:i).EQ.'C'.OR.seq(i:i).EQ.'G') THEN
306 | stot=stot+1 ; ELSE ; EXIT ; END IF
307 | CASE('V')
308 | IF (seq(i:i).EQ.'A'.OR.seq(i:i).EQ.'C'.OR.seq(i:i).EQ.'G') THEN
309 | stot=stot+1 ; ELSE ; EXIT ; END IF
310 | CASE('W')
311 | IF (seq(i:i).EQ.'A'.OR.seq(i:i).EQ.'T') THEN
312 | stot=stot+1 ; ELSE ; EXIT ; END IF
313 | CASE('Y')
314 | IF (seq(i:i).EQ.'C'.OR.seq(i:i).EQ.'T') THEN
315 | stot=stot+1 ; ELSE ; EXIT ; END IF
316 | END SELECT
317 | END DO
318 |
319 | IF (stot.EQ.Slen) DegCmpr=.TRUE.
320 |
321 | END FUNCTION DegCmpr
322 | SUBROUTINE Equalize_Scores()
323 | !
324 | ! Converts individual scores to codon-based Xscores for mutation rounds.
325 | ! XScore is a combination of all scores applied to each codon. This should
326 | ! allow for a more targeted mutation.
327 |
328 | USE dnaworks_data
329 | USE dnaworks_test
330 | IMPLICIT NONE
331 |
332 | INTEGER :: i,j
333 | REAL :: CodPerOlap(999) ! number of codons per overlap, for each overlap
334 | REAL :: TScorePerCod(999) ! average TScore per codon, for each overlap
335 | REAL :: r1
336 |
337 | IF (TEST1) PRINT *,"Equalize_Scores"
338 |
339 | ! Initialize XScore
340 |
341 | DO i=1,mutPROTnum ! only use the mutatable codons
342 | XScore(i)=0
343 | END DO
344 |
345 | ! Initialize values for CodPerOlap
346 |
347 | DO i=1,999
348 | CodPerOlap(i)=0
349 | END DO
350 |
351 | ! Find how many codons are in each overlap, avoiding non-coding regions
352 | ! If nt is within a codon, is within an overlap, and is unique protein residue
353 |
354 | DO i=1,DNAlen
355 | IF ((nt2prot(i).ne.0).and.(nt2overlap(i).ne.0).and.(nt2prot(i).ne.nt2prot(i-1))) THEN
356 | CodPerOlap(nt2overlap(i))=CodPerOlap(nt2overlap(i))+1
357 | END IF
358 | END DO
359 |
360 | ! The TScore for each codon a fraction of the total TScore(i) for the overlap
361 |
362 | DO j=1,CurrDNA%NumOlaps
363 | IF (CodPerOlap(j).ne.0) TScorePerCod(j)=Twt*(CurrDNA%TScore(j)/CodPerOlap(j))
364 | END DO
365 |
366 | ! Assign the XScore for TScore, CScore, RScore, PScore, GScore, LScore,
367 | ! and AScore for each codon
368 |
369 | DO i=1,mutPROTnum
370 |
371 | j=prot2nt(mutPROT2prot(i)) ! the middle nt of the codon
372 |
373 | ! if the middle nt of a codon is within an overlap, the XScore for that codon
374 | ! is the average TScore per codon for the overlap
375 |
376 | IF (nt2overlap(j).ne.0) XScore(i)=TScorePerCod(nt2overlap(j))
377 |
378 | ! the CScore contribution is already for the codon
379 |
380 | XScore(i)=XScore(i)+(Cwt*CurrDNA%CScore(i))+&
381 | (Rwt*REAL(CurrDNA%RScore(j-1)+CurrDNA%RScore(j)+CurrDNA%RScore(j+1)))+&
382 | (Mwt*REAL(CurrDNA%MScore(j-1)+CurrDNA%MScore(j)+CurrDNA%MScore(j+1)))+&
383 | (Gwt*REAL(CurrDNA%GScore(j-1)+CurrDNA%GScore(j)+CurrDNA%GScore(j+1)))+&
384 | (Awt*REAL(CurrDNA%AScore(j-1)+CurrDNA%AScore(j)+CurrDNA%AScore(j+1)))+&
385 | (Lwt*REAL(CurrDNA%LScore(j-1)+CurrDNA%LScore(j)+CurrDNA%LScore(j+1)))+&
386 | (Fwt*REAL(CurrDNA%FScore(j-1)+CurrDNA%FScore(j)+CurrDNA%FScore(j+1)))+&
387 | (Pwt*REAL(CurrDNA%PScore(j-1)+CurrDNA%PScore(j)+CurrDNA%PScore(j+1)))
388 |
389 | END DO
390 |
391 | END SUBROUTINE Equalize_Scores
392 | SUBROUTINE Evaluate_Scores()
393 | !
394 | ! This subroutine determines the scores for the current sequence. It updates
395 | ! TScore, CScore, RScore, and PScore arrays, the Total*Score values.
396 |
397 | USE dnaworks_data
398 | USE dnaworks_test
399 | IMPLICIT NONE
400 |
401 | INTEGER :: i,j
402 | REAL :: dC, dL, dT, dR, dM, dG, dA, dF, dP, dTotal
403 |
404 | IF (TEST1) PRINT *,"Evaluate_Scores" !TEST1
405 |
406 | ! Degenerate sequences: loop several times and get average scores
407 |
408 | IF (NumDegPos.eq.0) THEN
409 |
410 | ! If the sequence is recently translated, create the ntID arrays
411 |
412 | CALL Create_ntID_Arrays
413 | CALL Temp_Score ! TScore(i) based on olaps
414 | CALL Misprime_Score ! MScore(i) based on nt
415 | CALL Length_Score ! LScore(i) based on nt
416 | CALL GapFix_Score ! FScore(i) based on nt
417 |
418 | ! The following scores will not change when the overlap positions are moved,
419 | ! but only when the sequence is re-translated after a mutation (also does not
420 | ! apply for DNA-only runs)
421 |
422 | IF (ScoreCodons) CALL Codon_Score ! CScore(i) based on codons
423 | CALL Repeat_Score ! RScore(i) based on nt
424 | CALL GC_Score ! GScore(i) based on nt
425 | CALL AT_Score ! AScore(i) based on nt
426 | CALL Pattern_Score ! PScore(i) based on nt
427 |
428 | ! Update CurrDNA%OverallScore
429 |
430 | CurrDNA%OverallScore = (Cwt*CurrDNA%TotalCScore)+&
431 | (Lwt*CurrDNA%TotalLScore)+&
432 | (Twt*CurrDNA%TotalTScore)+&
433 | (Rwt*CurrDNA%TotalRScore)+&
434 | (Mwt*CurrDNA%TotalMScore)+&
435 | (Gwt*CurrDNA%TotalGScore)+&
436 | (Awt*CurrDNA%TotalAScore)+&
437 | (Fwt*CurrDNA%TotalFScore)+&
438 | (Pwt*CurrDNA%TotalPScore)
439 | ELSE
440 | CALL Average_Evaluate_Scores
441 | END IF
442 |
443 | END SUBROUTINE Evaluate_Scores
444 | SUBROUTINE Find_Actual_Misprimes()
445 | !
446 | ! If one of the positions in a misprime pair aligns to the end of an overlap,
447 | ! and if the tip of the overlap is identical (direct or inverse), then raise
448 | ! the score on the nts (CurrDNA%MScore).
449 |
450 | ! 1. direct-sense(DS): forward primer mispriming on the sense strand
451 | !
452 | ! --------------> -------------->
453 | ! ||||||||||||||| ..........|||||
454 | ! -------------------------------------------------------
455 | !
456 | ! 2. inverse-sense(IS): reverse primer mispriming on the sense strand
457 | ! NOTE THAT IF THE FORWARD OLIGO MATCHES M2, MSX = 5, NOT 2
458 | !
459 | ! -------------->
460 | ! ..........|||||
461 | ! -------------------------------------------------------
462 | ! |||||||||||||||
463 | ! <--------------
464 | !
465 | ! 3. inverse-antisense(IA): forward primer mispriming on the antisense strand
466 | ! NOTE THAT IF THE REVERSE OLIGO MATCHES M2, MSX = 6, NOT 3
467 | !
468 | ! -------------->
469 | ! |||||||||||||||
470 | ! -------------------------------------------------------
471 | ! |||||..........
472 | ! <--------------
473 | !
474 | ! 4. direct-antisense(DA): reverse primer mispriming on the antisense strand
475 | !
476 | ! -------------------------------------------------------
477 | ! ||||||||||||||| |||||..........
478 | ! <-------------- <--------------
479 | !
480 | USE dnaworks_data
481 | USE dnaworks_test
482 | IMPLICIT NONE
483 |
484 | INTEGER :: i,j,mp
485 | INTEGER :: o1,o2,m1,m2,mx
486 |
487 | IF (TEST2) PRINT *,"Find_Actual_Misprimes" !TEST2
488 |
489 | mp=(CurrDNA%NumOlaps+1)/2
490 |
491 | ! Initialize actual misprime arrays
492 |
493 | DO i=1,DNAlen
494 | CurrDNA%MScore(i) = 0
495 | END DO
496 | CurrDNA%MSN=0
497 |
498 | mpair: DO i=1,CurrDNA%MN
499 | m1=CurrDNA%M1(i)
500 | m2=CurrDNA%M2(i)
501 | mx=CurrDNA%MX(i)
502 | olap: DO j=1,CurrDNA%NumOlaps
503 | o1=CurrDNA%OlapsPos(j,1)
504 | o2=(CurrDNA%OlapsPos(j,2)-MPLn+1)
505 | IF (TBIO) THEN
506 | IF (j.lt.mp) THEN
507 | SELECT CASE(mx)
508 | CASE(1) ! direct-sense
509 | IF (o2.eq.m1) THEN
510 | CALL Increment_Misprime_Scores(o2,m2,1,j)
511 | ELSE IF (o2.eq.m2) THEN
512 | CALL Increment_Misprime_Scores(o2,m1,1,j)
513 | END IF
514 | CASE(2) ! inverse-sense
515 | IF (o2.eq.m2) CALL Increment_Misprime_Scores(m2,m1,5,j)
516 | CASE(3) ! inverse-antisense
517 | IF (o2.eq.m1) CALL Increment_Misprime_Scores(o2,m2,3,j)
518 | END SELECT
519 | ELSE IF (j.gt.mp) THEN
520 | SELECT CASE(mx)
521 | CASE(2) ! inverse-sense
522 | IF (o1.eq.m1) CALL Increment_Misprime_Scores(o1,m2,2,j)
523 | CASE(3) ! inverse-antisense
524 | IF (o1.eq.m2) CALL Increment_Misprime_Scores(o1,m1,6,j)
525 | CASE(4) ! direct-antisense
526 | IF (o1.eq.m1) THEN
527 | CALL Increment_Misprime_Scores(o1,m2,4,j)
528 | ELSE IF (o1.eq.m2) THEN
529 | CALL Increment_Misprime_Scores(o1,m1,4,j)
530 | END IF
531 | END SELECT
532 | ELSE
533 | SELECT CASE(mx)
534 | CASE(1) ! direct-sense
535 | IF (o2.eq.m1) THEN
536 | CALL Increment_Misprime_Scores(o2,m2,1,j)
537 | ELSE IF (o2.eq.m2) THEN
538 | CALL Increment_Misprime_Scores(o2,m1,1,j)
539 | END IF
540 | CASE(2) ! inverse-sense
541 | IF (o2.eq.m2) THEN
542 | CALL Increment_Misprime_Scores(m2,m1,5,j)
543 | ELSE IF (o1.eq.m1) THEN
544 | CALL Increment_Misprime_Scores(o1,m2,2,j)
545 | END IF
546 | CASE(3) ! inverse-antisense
547 | IF (o2.eq.m1) THEN
548 | CALL Increment_Misprime_Scores(o2,m2,3,j)
549 | ELSE IF (o1.eq.m2) THEN
550 | CALL Increment_Misprime_Scores(o1,m1,6,j)
551 | END IF
552 | CASE(4) ! direct-antisense
553 | IF (o1.eq.m1) THEN
554 | CALL Increment_Misprime_Scores(o1,m2,4,j)
555 | ELSE IF (o1.eq.m2) THEN
556 | CALL Increment_Misprime_Scores(o1,m1,4,j)
557 | END IF
558 | END SELECT
559 | END IF
560 | ELSE
561 | IF (MOD(j,2).eq.0) THEN
562 | CYCLE olap
563 | ELSE
564 | SELECT CASE(mx)
565 | CASE(1) ! direct-sense
566 | IF (o2.eq.m1) THEN
567 | CALL Increment_Misprime_Scores(o2,m2,1,j)
568 | ELSE IF (o2.eq.m2) THEN
569 | CALL Increment_Misprime_Scores(o2,m1,1,j)
570 | END IF
571 | CASE(2) ! inverse-sense
572 | IF (o2.eq.m2) THEN
573 | CALL Increment_Misprime_Scores(m2,m1,5,j)
574 | ELSE IF (o1.eq.m1) THEN
575 | CALL Increment_Misprime_Scores(o1,m2,2,j)
576 | END IF
577 | CASE(3) ! inverse-antisense
578 | IF (o2.eq.m1) THEN
579 | CALL Increment_Misprime_Scores(o2,m2,3,j)
580 | ELSE IF (o1.eq.m2) THEN
581 | CALL Increment_Misprime_Scores(o1,m1,6,j)
582 | END IF
583 | CASE(4) ! direct-antisense
584 | IF (o1.eq.m1) THEN
585 | CALL Increment_Misprime_Scores(o1,m2,4,j)
586 | ELSE IF (o1.eq.m2) THEN
587 | CALL Increment_Misprime_Scores(o1,m1,4,j)
588 | END IF
589 | END SELECT
590 | END IF
591 | END IF
592 | END DO olap
593 | END DO mpair
594 |
595 | END SUBROUTINE Find_Actual_Misprimes
596 | SUBROUTINE Find_Potential_Misprimes
597 | !
598 | ! This subroutine finds all misprimes in the sequence, both direct and
599 | ! inverse, regardless of position, equal or longer than MPLn.
600 | ! The inverse search allows palindromic misprimes (i=j).
601 | ! The number of potential misprimes is in CurrDNA%MN
602 | ! It records the positions and sizes in the global arrays CurrDNA%M1,
603 | ! CurrDNA%M2, and CurrDNA%MX.
604 | ! The actual misprimes are determined by Find_Actual_Misprimes
605 |
606 | ! 1. direct-sense(DS): forward primer mispriming on the sense strand
607 | !
608 | ! --------------> -------------->
609 | ! ||||||||||||||| ..........|||||
610 | ! -------------------------------------------------------
611 | !
612 | ! 2. inverse-sense(IS): reverse primer mispriming on the sense strand
613 | ! NOTE THAT IF THE FORWARD OLIGO MATCHES M2, MSX = 5, NOT 2
614 | !
615 | ! -------------->
616 | ! ..........|||||
617 | ! -------------------------------------------------------
618 | ! |||||||||||||||
619 | ! <--------------
620 | !
621 | ! 3. inverse-antisense(IA): forward primer mispriming on the antisense strand
622 | ! NOTE THAT IF THE REVERSE OLIGO MATCHES M2, MSX = 6, NOT 3
623 | !
624 | ! -------------->
625 | ! |||||||||||||||
626 | ! -------------------------------------------------------
627 | ! |||||..........
628 | ! <--------------
629 | !
630 | ! 4. direct-antisense(DA): reverse primer mispriming on the antisense strand
631 | !
632 | ! -------------------------------------------------------
633 | ! ||||||||||||||| |||||..........
634 | ! <-------------- <--------------
635 | !
636 | USE dnaworks_data
637 | USE dnaworks_test
638 | IMPLICIT NONE
639 |
640 | INTEGER :: i,j
641 | LOGICAL,EXTERNAL :: HMatchNum
642 |
643 | IF (TEST2) PRINT *,"Find_Potential_Misprimes" !TEST2
644 |
645 | ! Initialize the potential misprime arrays
646 |
647 | CurrDNA%MN=0
648 |
649 | ! Find the potential misprimes
650 |
651 | DO i=1,DNAlen-MPLn+1
652 | DO j=i,DNAlen-MPLn+1
653 | IF (HMatchNum(i,j,1)) THEN
654 | IF (CurrDNA%ntID_Tip(i+MPLn-MPTip).eq.CurrDNA%ntID_Tip(j+MPLn-MPTip)) &
655 | CALL Increment_Misprime_Arrays(i,j,1)
656 | IF (CurrDNA%ntID_Tip(i).eq.CurrDNA%ntID_Tip(j)) &
657 | CALL Increment_Misprime_Arrays(i,j,4)
658 | END IF
659 | IF (HMatchNum(i,j,-1)) THEN
660 | IF (CurrDNA%ntID_Tip(i).eq.CurrDNA%ntID_TipRC(j+MPLn-MPTip)) &
661 | CALL Increment_Misprime_Arrays(i,j,2)
662 | IF (CurrDNA%ntID_Tip(i+MPLn-MPTip).eq.CurrDNA%ntID_TipRC(j)) &
663 | CALL Increment_Misprime_Arrays(i,j,3)
664 | END IF
665 | END DO
666 | END DO
667 |
668 | END SUBROUTINE Find_Potential_Misprimes
669 | SUBROUTINE Find_Repeats()
670 | !
671 | ! This subroutine finds all repeats in the sequence, both direct and
672 | ! inverse, regardless of position, equal or longer than RepLen.
673 | ! The inverse search allows palindromic repeats (i=j).
674 | ! It records the positions and sizes in the global arrays CurrDNA%RS1,
675 | ! CurrDNA%RS2, and CurrDNA%RLn. Then it overwrites the array CurrDNA%RScore.
676 |
677 | USE dnaworks_data
678 | USE dnaworks_test
679 | IMPLICIT NONE
680 |
681 | INTEGER :: i,j
682 | LOGICAL,EXTERNAL :: PairWithinKnownRepeat
683 |
684 | IF (TEST2) PRINT *,"Find_Repeats" !TEST2
685 |
686 | DO i=1,DNAlen
687 | CurrDNA%RScore(i) = 0
688 | END DO
689 | CurrDNA%RN=0
690 |
691 | DO i=1,DNAlen-RepLen+1
692 | DO j=i,DNAlen-RepLen+1
693 | IF (i.ne.j) THEN
694 | IF (.not.PairWithinKnownRepeat(i,j,1)) THEN
695 | IF (CurrDNA%ntID_Rep(i).eq.CurrDNA%ntID_Rep(j)) &
696 | CALL Increment_Repeat_Arrays(i,j,1)
697 | END IF
698 | END IF
699 | IF (.not.PairWithinKnownRepeat(i,j,-1)) THEN
700 | IF (CurrDNA%ntID_Rep(i).eq.CurrDNA%ntID_RepRC(j)) &
701 | CALL Increment_Repeat_Arrays(i,j,-1)
702 | END IF
703 | END DO
704 | END DO
705 |
706 | END SUBROUTINE Find_Repeats
707 | SUBROUTINE GC_Score
708 | !
709 | ! Find all the 8 nt windows of solid GC content and update GScore.
710 |
711 | USE dnaworks_data
712 | USE dnaworks_test
713 | IMPLICIT NONE
714 |
715 | INTEGER :: i,j
716 |
717 | IF (TEST1) PRINT *,"GC_Score" !TEST1
718 |
719 | DO i=1,DNAlen
720 | CurrDNA%GScore(i) = 0
721 | END DO
722 |
723 | DO i=1,DNAlen-7
724 | IF (CurrDNA%ntID_GC(i).eq.0) THEN
725 | DO j=i,(i+7)
726 | CurrDNA%GScore(j)=CurrDNA%GScore(j)+1
727 | END DO
728 | END IF
729 | END DO
730 |
731 | CurrDNA%TotalGScore = 0.0 ! Initialize the repeat scores
732 | DO i=1,DNAlen
733 | CurrDNA%TotalGScore=CurrDNA%TotalGScore+CurrDNA%GScore(i)
734 | END DO
735 | CurrDNA%TotalGScore=CurrDNA%TotalGScore*20/DNAlen
736 |
737 | END SUBROUTINE GC_Score
738 | SUBROUTINE GapFix_Score
739 | !
740 | ! This subroutine returns the GapFix scores for each nt position in the
741 | ! GapFixPos array.
742 | !
743 | USE dnaworks_data
744 | USE dnaworks_test
745 | IMPLICIT NONE
746 |
747 | INTEGER :: i,j,c
748 |
749 | IF (TEST1) PRINT *,"GapFix_Score" !TEST1
750 |
751 | ! initialize scores
752 |
753 | CurrDNA%TotalFScore=0
754 | DO i = 1,DNAlen
755 | CurrDNA%FScore(i)=0
756 | END DO
757 |
758 | DO i=1,DNAlen
759 |
760 | ! if the position should be within gap
761 |
762 | IF(CurrDNA%GapFixPos(i)) THEN
763 | DO j=1,CurrDNA%NumOlaps
764 |
765 | ! and it is not within a gap (it's in an overlap instead)
766 |
767 | IF (i.ge.CurrDNA%OlapsPos(j,1).and.i.le.CurrDNA%OlapsPos(j,2)) THEN
768 |
769 | ! increase its score
770 |
771 | CurrDNA%Fscore(i)=10
772 |
773 | END IF
774 | END DO
775 | END IF
776 | END DO
777 |
778 | ! generate summary of scores
779 |
780 | DO i=1,DNAlen
781 | CurrDNA%TotalFScore=CurrDNA%TotalFScore+CurrDNA%FScore(i)
782 | END DO
783 | CurrDNA%TotalFScore=CurrDNA%TotalFScore*20/DNAlen
784 |
785 | END SUBROUTINE GapFix_Score
786 | LOGICAL FUNCTION HMatchNum(pos1,pos2,dir)
787 | !
788 | ! If two positions of equal length are homologous (MaxNonId or fewer
789 | ! non-identical nts), returns true
790 |
791 | USE dnaworks_data
792 | USE dnaworks_test
793 | IMPLICIT NONE
794 |
795 | INTEGER :: pos1,pos2,i,a,b,dir,ct
796 |
797 | IF (TEST3) PRINT *,"HMatchNum" !TEST3
798 |
799 | ct=0
800 |
801 | IF (dir.eq.1) THEN
802 | direct: DO i=1,MPLn
803 | HMatchNum=.FALSE.
804 | IF (pos1.eq.pos2) EXIT direct
805 | a=pos1+i-1
806 | b=pos2+i-1
807 | IF (b.gt.DNAlen) EXIT direct
808 | IF ((CurrDNA%NUMseq(a)-CurrDNA%NUMseq(b)).ne.0) THEN
809 | ct=ct+1
810 | IF (ct.gt.MaxNonId) EXIT direct
811 | END IF
812 | ! PRINT *,dir,pos1,pos2,CurrDNA%NUMseq(a),CurrDNA%NUMseq(b)
813 | HMatchNum=.TRUE.
814 | END DO direct
815 | ELSE
816 | inverse: DO i=1,MPLn
817 | HMatchNum=.FALSE.
818 | a=pos1+i-1
819 | b=pos2+MPLn-i
820 | IF (b.gt.DNAlen) EXIT inverse
821 | IF ((CurrDNA%NUMseq(a)+CurrDNA%NUMseq(b)).ne.0) THEN
822 | ct=ct+1
823 | IF (ct.gt.MaxNonId) EXIT inverse
824 | END IF
825 | HMatchNum=.TRUE.
826 | END DO inverse
827 | END IF
828 |
829 | END FUNCTION HMatchNum
830 | SUBROUTINE Increment_Misprime_Arrays(pos1,pos2,dir)
831 | !
832 | ! Add another misprime pair to the arrays and update scores
833 |
834 | USE dnaworks_data
835 | USE dnaworks_test
836 | IMPLICIT NONE
837 |
838 | INTEGER :: pos1,pos2,dir,i
839 | CHARACTER(LEN=80) :: text
840 |
841 | IF (TEST2) PRINT *,"Increment_Misprime_Arrays" !TEST2
842 |
843 | CurrDNA%MN=CurrDNA%MN+1
844 | IF (CurrDNA%MN.ge.MaxDNAlen) THEN
845 | WRITE(text,FMT="('MN = ',i9,' Too many misprimes.')") CurrDNA%MN
846 | ! DO i=1,CurrDNA%MN
847 | ! PRINT *,CurrDNA%M1(i),CurrDNA%M2(i),CurrDNA%MX(i)
848 | ! END DO
849 | CALL Stop_Program(text)
850 | END IF
851 | CurrDNA%M1(CurrDNA%MN)=pos1
852 | CurrDNA%M2(CurrDNA%MN)=pos2
853 | CurrDNA%MX(CurrDNA%MN)=dir
854 |
855 | END SUBROUTINE Increment_Misprime_Arrays
856 | SUBROUTINE Increment_Misprime_Scores(o,m,t,j)
857 | !
858 | ! Increment the scores and update the actual misprime arrays
859 |
860 | USE dnaworks_data
861 | USE dnaworks_test
862 | IMPLICIT NONE
863 |
864 | INTEGER :: o,m,t,i,j
865 | LOGICAL :: x
866 |
867 | IF (TEST2) PRINT *,"Increment_Misprime_Scores" !TEST2
868 |
869 | CurrDNA%MSN=CurrDNA%MSN+1
870 | CurrDNA%MS1(CurrDNA%MSN)=o
871 | CurrDNA%MS2(CurrDNA%MSN)=m
872 | CurrDNA%MSX(CurrDNA%MSN)=t
873 | CurrDNA%MOL(CurrDNA%MSN)=j
874 | DO i=o,o+MPLn-1
875 | CurrDNA%MScore(i)=CurrDNA%MScore(i)+1
876 | END DO
877 | DO i=m,m+MPLn-1
878 | CurrDNA%MScore(i)=CurrDNA%MScore(i)+1
879 | END DO
880 |
881 | END SUBROUTINE Increment_Misprime_Scores
882 | SUBROUTINE Increment_Repeat_Arrays(i,j,dir)
883 | !
884 | ! Add another repeat pair to the arrays and update scores after expansion
885 |
886 | USE dnaworks_data
887 | USE dnaworks_test
888 | IMPLICIT NONE
889 |
890 | INTEGER :: i,j,k
891 | INTEGER :: pos1,pos2,dir,length
892 | INTEGER :: last,diff,a
893 |
894 | IF (TEST2) PRINT *,"Increment_Repeat_Arrays" !TEST2
895 |
896 | pos1=i
897 | pos2=j
898 | diff=j-i
899 | length=RepLen
900 |
901 | ! Expand direct repeats
902 |
903 | IF (dir.eq.1) THEN
904 | starting: DO pos1=(i-1),1,-1
905 | pos2=pos1+diff
906 | IF (CurrDNA%NUMseq(pos1).ne.CurrDNA%NUMseq(pos2)) THEN
907 | pos2=pos2+1
908 | EXIT starting
909 | END IF
910 | END DO starting
911 | pos1=pos2-diff
912 | length=RepLen+(i-pos1) ! In case pos2 is DNAlen-RepLen-1
913 | ending: DO last=(j+RepLen),DNAlen+1
914 | IF (last.eq.(DNAlen+1)) EXIT ending
915 | IF (CurrDNA%NUMseq(last-diff).ne.CurrDNA%NUMseq(last)) EXIT ending
916 | END DO ending
917 | length=last-pos2 ! Final answer
918 |
919 | ELSE
920 |
921 | ! Expand inverse repeats
922 |
923 | startingRC: DO a=1,MaxDNAlen
924 | pos1=i-a
925 | last=j+length-1+a
926 | IF ((pos1.lt.1).or.(last.gt.DNAlen).or.&
927 | (CurrDNA%NUMseq(pos1).ne.(-1*(CurrDNA%NUMseq(last))))) THEN
928 | pos1=pos1+1
929 | last=last-1
930 | EXIT startingRC
931 | END IF
932 | END DO startingRC
933 | endingRC: DO a=1,MaxDNAlen
934 | pos2=j-a
935 | last=i+length-1+a
936 | IF ((pos2.lt.1).or.(last.gt.DNAlen).or.&
937 | (CurrDNA%NUMseq(last).ne.(-1*(CurrDNA%NUMseq(pos2))))) THEN
938 | pos2=pos2+1
939 | last=last-1
940 | EXIT endingRC
941 | END IF
942 | END DO endingRC
943 | length=last-pos1+1 ! Final answer
944 | END IF
945 |
946 | CurrDNA%RN=CurrDNA%RN+1
947 | IF (CurrDNA%RN.ge.MaxDNAlen) CALL Stop_Program("Too many repeats.")
948 | CurrDNA%RS1(CurrDNA%RN)=pos1
949 | CurrDNA%RS2(CurrDNA%RN)=pos2
950 | CurrDNA%RLn(CurrDNA%RN)=length
951 | CurrDNA%RX(CurrDNA%RN)=dir
952 | DO k=pos1,(pos1+length-1)
953 | CurrDNA%RScore(k) = CurrDNA%RScore(k)+1
954 | END DO
955 | DO k=pos2,(pos2+length-1)
956 | CurrDNA%RScore(k) = CurrDNA%RScore(k)+1
957 | END DO
958 |
959 | END SUBROUTINE Increment_Repeat_Arrays
960 | SUBROUTINE Length_Score
961 | !
962 | ! This subroutine evaluates the length of the oligos and gives a penalty to
963 | ! all the nts in the oligo if it exceeds OligoLen (except for the first and
964 | ! last oligos, of course).
965 |
966 | USE dnaworks_data
967 | USE dnaworks_test
968 | IMPLICIT NONE
969 |
970 | INTEGER :: i,j
971 | INTEGER :: overrun !the length of the oligo goes past OligoLen
972 |
973 | IF (TEST1) PRINT *,"Length_Score" !TEST1
974 |
975 | DO i=CurrDNA%OlapsPos(1,1),CurrDNA%OlapsPos(CurrDNA%NumOlaps,2)
976 | CurrDNA%LScore(i)=0
977 | END DO
978 |
979 | !PRINT *,'START'
980 |
981 | DO i=2,CurrDNA%NumOlaps
982 | overrun=CurrDNA%OlapsPos(i,2)-CurrDNA%OlapsPos((i-1),1)-OligoLen+1
983 |
984 | !PRINT *,i,CurrDNA%OlapsPos(i,2),CurrDNA%OlapsPos((i-1),1),OligoLen,overrun,CurrDNA%MeltT(i)
985 |
986 | IF (overrun.gt.0) THEN
987 | DO j=CurrDNA%OlapsPos((i-1),1),CurrDNA%OlapsPos(i,2)
988 | CurrDNA%LScore(j)=(overrun+2)**2
989 | END DO
990 | END IF
991 | END DO
992 |
993 | !PRINT *,'FINISH'
994 |
995 | CurrDNA%TotalLScore = 0.0
996 | DO i=CurrDNA%OlapsPos(1,1),CurrDNA%OlapsPos(CurrDNA%NumOlaps,2)
997 | CurrDNA%TotalLScore = CurrDNA%TotalLScore + CurrDNA%LScore(i)
998 | END DO
999 | CurrDNA%TotalLScore=CurrDNA%TotalLScore*20/DNAlen
1000 |
1001 | END SUBROUTINE Length_Score
1002 | SUBROUTINE Misprime_Score
1003 | !
1004 | ! Determine the current mispriming score. If at the beginning of a run
1005 | ! (MutProtPos=0), then generate the potential misprime arrays, and then
1006 | ! find the actual misprimes.
1007 | !
1008 | ! During the run, the potential misprime arrays only need to be regenerated
1009 | ! once after each mutation. The potential misprime arrays are only modified
1010 | ! around the site of mutation. The actual misprimes are then determined after
1011 | ! every overlap set generation.
1012 | !
1013 | ! Only evaluating the mutation site speeds up the calculation more than 10-fold.
1014 |
1015 | USE dnaworks_data
1016 | USE dnaworks_test
1017 | IMPLICIT NONE
1018 |
1019 | INTEGER :: i
1020 |
1021 | IF (TEST1) PRINT *,"Misprime_Score" !TEST1
1022 |
1023 | IF (MutProtPos.eq.0) THEN
1024 | IF (SequenceTranslated) CALL Find_Potential_Misprimes
1025 | CALL Find_Actual_Misprimes
1026 | ELSE
1027 | IF (SequenceTranslated) CALL Find_Mut_Pot_Misprimes
1028 | CALL Find_Actual_Misprimes
1029 | END IF
1030 |
1031 | CurrDNA%TotalMScore = 0.0 ! Initialize the mispriming scores
1032 | DO i=1,DNAlen
1033 | CurrDNA%TotalMScore=CurrDNA%TotalMScore+CurrDNA%MScore(i)
1034 | END DO
1035 | CurrDNA%TotalMScore=CurrDNA%TotalMScore*20/DNAlen
1036 |
1037 | END SUBROUTINE Misprime_Score
1038 | LOGICAL FUNCTION PairWithinKnownRepeat(i,j,dir)
1039 | !
1040 | ! Returns true if pair of residues is already part of a repeat pair
1041 |
1042 | USE dnaworks_data
1043 | USE dnaworks_test
1044 | IMPLICIT NONE
1045 |
1046 | INTEGER :: i,j,k,dir
1047 |
1048 | IF (TEST3) PRINT *,"PairWithinKnownRepeat" !TEST3
1049 |
1050 | PairWithinKnownRepeat=.FALSE.
1051 |
1052 | IF (CurrDNA%RN.gt.0) THEN
1053 | loop2: DO k=1,CurrDNA%RN
1054 | IF ((CurrDNA%RS1(k).le.i).and.&
1055 | ((CurrDNA%RS1(k)+CurrDNA%RLn(k)-RepLen).ge.i).and.&
1056 | (CurrDNA%RS2(k).le.j).and.&
1057 | ((CurrDNA%RS2(k)+CurrDNA%RLn(k)-RepLen).ge.j).and.&
1058 | (CurrDNA%RX(k).eq.dir)) THEN
1059 | PairWithinKnownRepeat=.TRUE.
1060 | EXIT loop2
1061 | END IF
1062 | END DO loop2
1063 | END IF
1064 |
1065 | END FUNCTION PairWithinKnownRepeat
1066 | SUBROUTINE Pattern_Score
1067 | !
1068 | ! This subroutine looks through the DNA sequence corresponding to the protein
1069 | ! region and identifies sequence patterns, either for restriction sites or
1070 | ! user-input sequences. When it finds the pattern, it increases the score
1071 | ! for those nucleotides in the pattern.
1072 | !
1073 | ! There are two situtations -- when degenerate patterns are present, and when
1074 | ! they are not.
1075 |
1076 | !
1077 | ! This scoring evaluation currently uses text-based comparisons, so it will
1078 | ! be slow...
1079 |
1080 | USE dnaworks_data
1081 | USE dnaworks_test
1082 | IMPLICIT NONE
1083 |
1084 | INTEGER :: i,j,k,n
1085 | CHARACTER(LEN=9999) :: text,ftext,rtext
1086 | LOGICAL,EXTERNAL :: DegCmpr
1087 | INTEGER :: start,curr,finis
1088 |
1089 | IF (TEST1) PRINT *,"Pattern_Score" !TEST1
1090 |
1091 | CurrDNA%TotalPScore = 0.0
1092 | DO k=1,DNAlen
1093 | CurrDNA%PScore(k) = 0
1094 | END DO
1095 |
1096 | main: DO i=1,PTNnum
1097 |
1098 | ! skip the site if it is an isoschizomer
1099 |
1100 | IF (PTN(i)%Isoschiz) CYCLE main
1101 |
1102 | ! treat degenerate sites differently
1103 |
1104 | IF (PTN(i)%Degen) THEN
1105 | ftext=PTN(i)%Seq(1:PTN(i)%Len)
1106 | rtext=PTN(i)%SeqRC(1:PTN(i)%Len)
1107 | deg: DO j=1,(DNAlen-PTN(i)%Len+1)
1108 | IF (DegCmpr(ftext(1:PTN(i)%Len),CurrDNA%DNAseq(j:j+PTN(i)%Len-1))) THEN
1109 | DO k=j,j+PTN(i)%Len-1
1110 | CurrDNA%PScore(k)=CurrDNA%PScore(k)+1
1111 | END DO
1112 | END IF
1113 | IF (.not.PTN(i)%SelfCompl) THEN
1114 | IF (DegCmpr(rtext(1:PTN(i)%Len),CurrDNA%DNAseq(j:j+PTN(i)%Len-1))) THEN
1115 | DO k=j,j+PTN(i)%Len-1
1116 | CurrDNA%PScore(k)=CurrDNA%PScore(k)+1
1117 | END DO
1118 | END IF
1119 | END IF
1120 | END DO deg
1121 |
1122 | ELSE
1123 |
1124 | ! not degenerate
1125 |
1126 | curr=0
1127 | start=1
1128 | finis=DNAlen
1129 | ! forward direction
1130 | forward: DO n=1,DNAlen
1131 | j=INDEX(CurrDNA%DNAseq(start:finis),PTN(i)%Seq(1:PTN(i)%Len))
1132 | curr=curr+j
1133 | IF (j.eq.0) THEN
1134 | EXIT forward
1135 | ELSE
1136 | DO k=curr,(curr+PTN(i)%Len)
1137 | CurrDNA%PScore(k)=CurrDNA%PScore(k)+1
1138 | END DO
1139 | start=curr+1
1140 | END IF
1141 | END DO forward
1142 | ! reverse direction if needed
1143 | IF (.not.PTN(i)%SelfCompl) THEN
1144 | curr=0
1145 | start=1
1146 | reverse: DO n=1,DNAlen
1147 | j=INDEX(CurrDNA%DNAseq(start:finis),PTN(i)%SeqRC(1:PTN(i)%Len))
1148 | curr=curr+j
1149 | IF (j.eq.0) THEN
1150 | EXIT reverse
1151 | ELSE
1152 | DO k=curr,(curr+PTN(i)%Len)
1153 | CurrDNA%PScore(k)=CurrDNA%PScore(k)+1
1154 | END DO
1155 | start=curr+1
1156 | END IF
1157 | END DO reverse
1158 | END IF
1159 | END IF
1160 | END DO main
1161 |
1162 | DO i=1,DNAlen
1163 | CurrDNA%TotalPScore=CurrDNA%TotalPScore+CurrDNA%PScore(i)
1164 | END DO
1165 | CurrDNA%TotalPScore=CurrDNA%TotalPScore*20/DNAlen
1166 |
1167 | END SUBROUTINE Pattern_Score
1168 | SUBROUTINE Repeat_Score
1169 | !
1170 | ! This subroutine calculates the score for finding tandem repeats, both
1171 | ! direct and inverted (RC), in the trial DNA sequence. The entire sequence
1172 | ! is queried against itself in the first run. If a codon has been mutated,
1173 | ! only the small region around the mutation is queried against the sequence.
1174 | ! The score is then applied to the nts it is found within.
1175 | !
1176 | ! Only evaluating the mutation site speeds up the calculation more than 10-fold.
1177 |
1178 | USE dnaworks_data
1179 | USE dnaworks_test
1180 | IMPLICIT NONE
1181 |
1182 | INTEGER :: i,j,pos
1183 |
1184 | IF (TEST1) PRINT *,"Repeat_Score" ! TEST1
1185 |
1186 | IF (MutProtPos.eq.0) THEN
1187 | CALL Find_Repeats
1188 | ELSE
1189 | CALL Find_Mutated_Repeats
1190 | END IF
1191 |
1192 | CurrDNA%TotalRScore = 0.0 ! Initialize the repeat scores
1193 | DO i=1,DNAlen
1194 | CurrDNA%TotalRScore=CurrDNA%TotalRScore+CurrDNA%RScore(i)
1195 | END DO
1196 | CurrDNA%TotalRScore=CurrDNA%TotalRScore*20/DNAlen
1197 |
1198 | END SUBROUTINE Repeat_Score
1199 | SUBROUTINE Temp_Score
1200 | !
1201 | ! This subroutine returns the Melting Temperature scores for each overlap as
1202 | ! well as the total Tm score.
1203 | !
1204 | ! The score is calculated as follow:
1205 | ! - for the temperatures within the range (MeltTemp-MeltTol ... MeltTemp +
1206 | ! MeltTol) score is calculated using a quadratic function of difference
1207 | ! - outside of this range also the 10 times square of the second difference
1208 | ! is added
1209 | !
1210 | USE dnaworks_data
1211 | USE dnaworks_test
1212 | IMPLICIT NONE
1213 |
1214 | INTEGER :: i,c
1215 | REAL :: diff,maxT,minT
1216 | REAL,EXTERNAL :: TmCalc
1217 |
1218 | IF (TEST1) PRINT *,"Temp_Score" !TEST1
1219 |
1220 | maxT=0
1221 | minT=999
1222 |
1223 | CurrDNA%TotalTScore=0
1224 | DO i = 1,CurrDNA%NumOlaps
1225 | CurrDNA%TScore(i)=0
1226 | END DO
1227 |
1228 | DO i=1,CurrDNA%NumOlaps
1229 | CurrDNA%MeltT(i)=TmCalc(CurrDNA%OlapsPos(i,1),CurrDNA%OlapsPos(i,2))
1230 | END DO
1231 |
1232 | DO i = 1,CurrDNA%NumOlaps
1233 | diff=ABS(MeltTemp-CurrDNA%MeltT(i))
1234 | IF(diff.gt.MeltTol)THEN
1235 | diff=MAX(1.0,(diff-MeltTol))
1236 | CurrDNA%TScore(i)=(diff**2)/10
1237 | ELSE
1238 | CurrDNA%TScore(i)=0
1239 | END IF
1240 | END DO
1241 |
1242 | DO i=1,CurrDNA%NumOlaps
1243 | CurrDNA%TotalTScore=CurrDNA%TotalTScore+CurrDNA%TScore(i)
1244 | END DO
1245 | CurrDNA%TotalTScore=CurrDNA%TotalTScore*20/DNAlen
1246 |
1247 | END SUBROUTINE Temp_Score
1248 | REAL FUNCTION TmCalc(start,finish)
1249 | !
1250 | ! This function returns the melting temperature for an overlap of nucleotides
1251 | ! start to finish.
1252 | !
1253 | ! The melting temperature is based on the paper by John SantaLucia
1254 | ! Jr., "A unified view of polymer, dumbbell, and oligonucleotide DNA
1255 | ! nearest-neighbor thermodynamics", Biochemistry Vol. 95, Issue 4,
1256 | ! 1460-1465, 1998. Tm in Celcius is calculated using the following formula:
1257 | !
1258 | ! Tm = [dH/(dS+(R*ln(OligoConc))+(0.368*ln(Na)*N)+(??*??MgConc*N))]-273.15
1259 | !
1260 | ! where
1261 | ! dH = sum of individual dH for each nucleotide pair, kcal/mol
1262 | ! dS = sum of individual dS for each nucleotide pair, cal/k*mol
1263 | ! RGasConstant = gas constant, 1.987 cal/K*mol
1264 | ! OligoConc = template concentration, mol/liter
1265 | ! Na = monovalent cation concentration (sodium), mol/liter
1266 | ! MgConc = magnesium concentration, mol/liter
1267 | ! N = number of backbone phosphates (number of nucleotides - 1)
1268 | ! Kelvin = convert Kelvin to Celsius, 273.15
1269 | !
1270 | ! Additionally, the Tm is modified by the terminal nucleotides:
1271 | !
1272 | ! dH = dH + 2.2 for A/T
1273 | ! dS = dS + 6.935 for A/T
1274 | !
1275 | ! The Tm is also modified by the presence of self-complementarity:
1276 | !
1277 | ! dS = dS - 1.4
1278 | !
1279 | ! Numerical sequences are used instead of strings
1280 | !
1281 | ! This subroutine is by far the most heavily used subroutine in the program.
1282 |
1283 | USE dnaworks_data
1284 | USE dnaworks_test
1285 | IMPLICIT NONE
1286 |
1287 | INTEGER :: start,finish,i
1288 | REAL :: dh,ds
1289 | LOGICAL :: self_compl
1290 |
1291 | IF (TEST3) PRINT *,"TmCalc" !TEST3
1292 |
1293 | ! Initialize values
1294 |
1295 | self_compl=.FALSE.
1296 | dh=0.2
1297 | ds=-5.68
1298 |
1299 | ! Make sure the overlap is more than 7 nt long
1300 |
1301 | IF ((finish-start+1).le.7) THEN
1302 | tmcalc=0
1303 | RETURN
1304 | END IF
1305 |
1306 | ! Sum the dH, dS values
1307 |
1308 | DO i=start,finish-1
1309 |
1310 | SELECT CASE(CurrDNA%NUMseq(i))
1311 | CASE(-1) ! A->
1312 | SELECT CASE(CurrDNA%NUMseq(i+1))
1313 | CASE(-1) ! AA
1314 | dh=dh-7.9
1315 | ds=ds-22.2473 ! 22.25
1316 | CASE(1) ! AT
1317 | dh=dh-7.2
1318 | ds=ds-20.38082 ! 20.35
1319 | CASE(-3) ! AC
1320 | dh=dh-8.4
1321 | ds=ds-22.44082 ! 22.44
1322 | CASE(3) ! AG
1323 | dh=dh-7.8
1324 | ds=ds-21.02469 ! 21.03
1325 | END SELECT
1326 | CASE(1) ! T->
1327 | SELECT CASE(CurrDNA%NUMseq(i+1))
1328 | CASE(-1) ! TA
1329 | dh=dh-7.2
1330 | ds=ds-21.34081 ! 20.32
1331 | CASE(1) ! TT
1332 | dh=dh-7.9
1333 | ds=ds-22.2473 ! 22.25
1334 | CASE(-3) ! TC
1335 | dh=dh-8.2
1336 | ds=ds-22.24469 ! 22.25
1337 | CASE(3) ! TG
1338 | dh=dh-8.5
1339 | ds=ds-22.73082 ! 22.73
1340 | END SELECT
1341 | CASE(-3) ! C->
1342 | SELECT CASE(CurrDNA%NUMseq(i+1))
1343 | CASE(-1) ! CA
1344 | dh=dh-8.5
1345 | ds=ds-22.73082 ! 22.73
1346 | CASE(1) ! CT
1347 | dh=dh-7.8
1348 | ds=ds-21.02469 ! 21.03
1349 | CASE(-3) ! CC
1350 | dh=dh-8.0
1351 | ds=ds-19.8612 ! 19.86
1352 | CASE(3) ! CG
1353 | dh=dh-10.6
1354 | ds=ds-27.17776 ! 27.15
1355 | END SELECT
1356 | CASE(3) ! G->
1357 | SELECT CASE(CurrDNA%NUMseq(i+1))
1358 | CASE(-1) ! GA
1359 | dh=dh-8.2
1360 | ds=ds-22.24469 ! 22.25
1361 | CASE(1) ! GT
1362 | dh=dh-8.4
1363 | ds=ds-22.44082 ! 22.44
1364 | CASE(-3) ! GC
1365 | dh=dh-9.8
1366 | ds=ds-24.37776 ! 24.35
1367 | CASE(3) ! GG
1368 | dh=dh-8.0
1369 | ds=ds-19.8612 ! 19.86
1370 | END SELECT
1371 | END SELEcT
1372 | END DO
1373 |
1374 | ! Correct for A or T at the termini
1375 |
1376 | IF (ABS(CurrDNA%NUMseq(start)).eq.1) THEN
1377 | dh=dh+2.2
1378 | ds=ds+6.935
1379 | END IF
1380 |
1381 | IF (ABS(CurrDNA%NUMseq(finish)).eq.1) THEN
1382 | dh=dh+2.2
1383 | ds=ds+6.935
1384 | END IF
1385 |
1386 | ! Correct for self-complementarity
1387 |
1388 | inner1: DO i=start,finish
1389 | IF ((CurrDNA%NUMseq(i)+CurrDNA%NUMseq(finish-start-i)).eq.0) THEN
1390 | self_compl=.TRUE.
1391 | ELSE
1392 | self_compl=.FALSE.
1393 | EXIT inner1
1394 | END IF
1395 | END DO inner1
1396 |
1397 | IF (self_compl) ds=ds-1.4
1398 |
1399 | ! Make corrections for oligo concentration
1400 |
1401 | IF (self_compl) THEN
1402 | ds=ds+OligoCorrSC
1403 | ELSE
1404 | ds=ds+OligoCorr
1405 | END IF
1406 |
1407 | ! Make corrections for cation concentrations
1408 |
1409 | ds=ds+(SaltCorr*(finish-start))
1410 |
1411 | ! Now find the actual Tm
1412 |
1413 | TmCalc=(1000*dh/ds)-Kelvin
1414 |
1415 | END FUNCTION TmCalc
1416 | SUBROUTINE TmCorrect
1417 | !
1418 | ! Create salt and oligo corrections for Tm
1419 |
1420 | USE dnaworks_data
1421 | USE dnaworks_test
1422 | IMPLICIT NONE
1423 |
1424 | IF (TEST0) PRINT *,"TmCorrect" !TEST0
1425 |
1426 | ! Find adjustment values
1427 |
1428 | ! PRINT *,OligoConc,SodiumConc,MgConc
1429 |
1430 | IF (OligoConc.lt.1e-9) OligoConc=1e-9
1431 | IF (OligoConc.gt.1e-4) OligoConc=1e-4
1432 |
1433 | IF (SodiumConc.gt.1.000) SodiumConc=1.000
1434 | IF (SodiumConc.le.1.000.and.SodiumConc.gt.0.750) SodiumConc=1.000
1435 | IF (SodiumConc.le.0.750.and.SodiumConc.gt.0.500) SodiumConc=0.750
1436 | IF (SodiumConc.le.0.500.and.SodiumConc.gt.0.250) SodiumConc=0.500
1437 | IF (SodiumConc.le.0.250.and.SodiumConc.gt.0.200) SodiumConc=0.250
1438 | IF (SodiumConc.le.0.200.and.SodiumConc.gt.0.150) SodiumConc=0.200
1439 | IF (SodiumConc.le.0.150.and.SodiumConc.gt.0.100) SodiumConc=0.150
1440 | IF (SodiumConc.le.0.100.and.SodiumConc.gt.0.075) SodiumConc=0.100
1441 | IF (SodiumConc.le.0.075.and.SodiumConc.gt.0.050) SodiumConc=0.075
1442 | IF (SodiumConc.le.0.050.and.SodiumConc.gt.0.025) SodiumConc=0.050
1443 | IF (SodiumConc.le.0.025.and.SodiumConc.gt.0.010) SodiumConc=0.025
1444 | IF (SodiumConc.le.0.010) SodiumConc=0.010
1445 |
1446 | IF (MgConc.gt.0.2000) MgConc=0.200
1447 | IF (MgConc.le.0.2000.and.MgConc.gt.0.1000) MgConc=0.2000
1448 | IF (MgConc.le.0.1000.and.MgConc.gt.0.0500) MgConc=0.1000
1449 | IF (MgConc.le.0.0500.and.MgConc.gt.0.0200) MgConc=0.0500
1450 | IF (MgConc.le.0.0200.and.MgConc.gt.0.0100) MgConc=0.0200
1451 | IF (MgConc.le.0.0100.and.MgConc.gt.0.0050) MgConc=0.0100
1452 | IF (MgConc.le.0.0050.and.MgConc.gt.0.0040) MgConc=0.0050
1453 | IF (MgConc.le.0.0040.and.MgConc.gt.0.0030) MgConc=0.0040
1454 | IF (MgConc.le.0.0030.and.MgConc.gt.0.0020) MgConc=0.0030
1455 | IF (MgConc.le.0.0020.and.MgConc.gt.0.0015) MgConc=0.0020
1456 | IF (MgConc.le.0.0015.and.MgConc.gt.0.0010) MgConc=0.0015
1457 | IF (MgConc.le.0.0010.and.MgConc.gt.0.0005) MgConc=0.0010
1458 | IF (MgConc.le.0.0005.and.MgConc.gt.0.0000) MgConc=0.0005
1459 | IF (MgConc.le.0) MgConc=0
1460 |
1461 | ! PRINT *,OligoConc,SodiumConc,MgConc
1462 |
1463 | OligoCorr=RGasConstant*(LOG(((OligoConc/100)/2)))
1464 | OligoCorrSC=RGasConstant*(LOG((OligoConc/100)))
1465 |
1466 | ! Sorry about this, I couldn't figure out the equation.
1467 |
1468 | IF (SodiumConc.eq.10.and.MgConc.eq.0.0) SaltCorr=-1.6960
1469 | IF (SodiumConc.eq.10.and.MgConc.eq.0.5) SaltCorr=-0.9125
1470 | IF (SodiumConc.eq.10.and.MgConc.eq.1.0) SaltCorr=-0.7996
1471 | IF (SodiumConc.eq.10.and.MgConc.eq.1.5) SaltCorr=-0.7287
1472 | IF (SodiumConc.eq.10.and.MgConc.eq.2.0) SaltCorr=-0.6803
1473 | IF (SodiumConc.eq.10.and.MgConc.eq.3.0) SaltCorr=-0.6094
1474 | IF (SodiumConc.eq.10.and.MgConc.eq.4.0) SaltCorr=-0.5578
1475 | IF (SodiumConc.eq.10.and.MgConc.eq.5.0) SaltCorr=-0.5191
1476 | IF (SodiumConc.eq.10.and.MgConc.eq.10.0) SaltCorr=-0.3966
1477 | IF (SodiumConc.eq.10.and.MgConc.eq.20.0) SaltCorr=-0.2741
1478 | IF (SodiumConc.eq.10.and.MgConc.eq.50.0) SaltCorr=-0.1064
1479 | IF (SodiumConc.eq.10.and.MgConc.eq.100.0) SaltCorr=0.0193
1480 | IF (SodiumConc.eq.10.and.MgConc.eq.200.0) SaltCorr=0.1451
1481 | IF (SodiumConc.eq.25.and.MgConc.eq.0.0) SaltCorr=-1.3574
1482 | IF (SodiumConc.eq.25.and.MgConc.eq.0.5) SaltCorr=-0.8512
1483 | IF (SodiumConc.eq.25.and.MgConc.eq.1.0) SaltCorr=-0.7545
1484 | IF (SodiumConc.eq.25.and.MgConc.eq.1.5) SaltCorr=-0.6900
1485 | IF (SodiumConc.eq.25.and.MgConc.eq.2.0) SaltCorr=-0.6481
1486 | IF (SodiumConc.eq.25.and.MgConc.eq.3.0) SaltCorr=-0.5836
1487 | IF (SodiumConc.eq.25.and.MgConc.eq.4.0) SaltCorr=-0.5352
1488 | IF (SodiumConc.eq.25.and.MgConc.eq.5.0) SaltCorr=-0.4965
1489 | IF (SodiumConc.eq.25.and.MgConc.eq.10.0) SaltCorr=-0.3805
1490 | IF (SodiumConc.eq.25.and.MgConc.eq.20.0) SaltCorr=-0.2612
1491 | IF (SodiumConc.eq.25.and.MgConc.eq.50.0) SaltCorr=-0.1000
1492 | IF (SodiumConc.eq.25.and.MgConc.eq.100.0) SaltCorr=0.0226
1493 | IF (SodiumConc.eq.25.and.MgConc.eq.200.0) SaltCorr=0.1483
1494 | IF (SodiumConc.eq.50.and.MgConc.eq.0.0) SaltCorr=-1.1027
1495 | IF (SodiumConc.eq.50.and.MgConc.eq.0.5) SaltCorr=-0.7706
1496 | IF (SodiumConc.eq.50.and.MgConc.eq.1.0) SaltCorr=-0.6868
1497 | IF (SodiumConc.eq.50.and.MgConc.eq.1.5) SaltCorr=-0.6352
1498 | IF (SodiumConc.eq.50.and.MgConc.eq.2.0) SaltCorr=-0.5965
1499 | IF (SodiumConc.eq.50.and.MgConc.eq.3.0) SaltCorr=-0.5385
1500 | IF (SodiumConc.eq.50.and.MgConc.eq.4.0) SaltCorr=-0.4965
1501 | IF (SodiumConc.eq.50.and.MgConc.eq.5.0) SaltCorr=-0.4643
1502 | IF (SodiumConc.eq.50.and.MgConc.eq.10.0) SaltCorr=-0.3547
1503 | IF (SodiumConc.eq.50.and.MgConc.eq.20.0) SaltCorr=-0.2418
1504 | IF (SodiumConc.eq.50.and.MgConc.eq.50.0) SaltCorr=-0.0871
1505 | IF (SodiumConc.eq.50.and.MgConc.eq.100.0) SaltCorr=0.0322
1506 | IF (SodiumConc.eq.50.and.MgConc.eq.200.0) SaltCorr=0.1548
1507 | IF (SodiumConc.eq.75.and.MgConc.eq.0.0) SaltCorr=-0.9544
1508 | IF (SodiumConc.eq.75.and.MgConc.eq.0.5) SaltCorr=-0.6997
1509 | IF (SodiumConc.eq.75.and.MgConc.eq.1.0) SaltCorr=-0.6320
1510 | IF (SodiumConc.eq.75.and.MgConc.eq.1.5) SaltCorr=-0.5868
1511 | IF (SodiumConc.eq.75.and.MgConc.eq.2.0) SaltCorr=-0.5546
1512 | IF (SodiumConc.eq.75.and.MgConc.eq.3.0) SaltCorr=-0.5030
1513 | IF (SodiumConc.eq.75.and.MgConc.eq.4.0) SaltCorr=-0.4643
1514 | IF (SodiumConc.eq.75.and.MgConc.eq.5.0) SaltCorr=-0.4320
1515 | IF (SodiumConc.eq.75.and.MgConc.eq.10.0) SaltCorr=-0.3321
1516 | IF (SodiumConc.eq.75.and.MgConc.eq.20.0) SaltCorr=-0.2257
1517 | IF (SodiumConc.eq.75.and.MgConc.eq.50.0) SaltCorr=-0.0774
1518 | IF (SodiumConc.eq.75.and.MgConc.eq.100.0) SaltCorr=0.0419
1519 | IF (SodiumConc.eq.75.and.MgConc.eq.200.0) SaltCorr=0.1612
1520 | IF (SodiumConc.eq.100.and.MgConc.eq.0.0) SaltCorr=-0.8480
1521 | IF (SodiumConc.eq.100.and.MgConc.eq.0.5) SaltCorr=-0.6449
1522 | IF (SodiumConc.eq.100.and.MgConc.eq.1.0) SaltCorr=-0.5836
1523 | IF (SodiumConc.eq.100.and.MgConc.eq.1.5) SaltCorr=-0.5449
1524 | IF (SodiumConc.eq.100.and.MgConc.eq.2.0) SaltCorr=-0.5127
1525 | IF (SodiumConc.eq.100.and.MgConc.eq.3.0) SaltCorr=-0.4675
1526 | IF (SodiumConc.eq.100.and.MgConc.eq.4.0) SaltCorr=-0.4320
1527 | IF (SodiumConc.eq.100.and.MgConc.eq.5.0) SaltCorr=-0.4030
1528 | IF (SodiumConc.eq.100.and.MgConc.eq.10.0) SaltCorr=-0.3095
1529 | IF (SodiumConc.eq.100.and.MgConc.eq.20.0) SaltCorr=-0.2096
1530 | IF (SodiumConc.eq.100.and.MgConc.eq.50.0) SaltCorr=-0.0645
1531 | IF (SodiumConc.eq.100.and.MgConc.eq.100.0) SaltCorr=0.0484
1532 | IF (SodiumConc.eq.100.and.MgConc.eq.200.0) SaltCorr=0.1677
1533 | IF (SodiumConc.eq.150.and.MgConc.eq.0.0) SaltCorr=-0.6964
1534 | IF (SodiumConc.eq.150.and.MgConc.eq.0.5) SaltCorr=-0.5514
1535 | IF (SodiumConc.eq.150.and.MgConc.eq.1.0) SaltCorr=-0.5030
1536 | IF (SodiumConc.eq.150.and.MgConc.eq.1.5) SaltCorr=-0.4707
1537 | IF (SodiumConc.eq.150.and.MgConc.eq.2.0) SaltCorr=-0.4449
1538 | IF (SodiumConc.eq.150.and.MgConc.eq.3.0) SaltCorr=-0.4063
1539 | IF (SodiumConc.eq.150.and.MgConc.eq.4.0) SaltCorr=-0.3772
1540 | IF (SodiumConc.eq.150.and.MgConc.eq.5.0) SaltCorr=-0.3514
1541 | IF (SodiumConc.eq.150.and.MgConc.eq.10.0) SaltCorr=-0.2708
1542 | IF (SodiumConc.eq.150.and.MgConc.eq.20.0) SaltCorr=-0.1773
1543 | IF (SodiumConc.eq.150.and.MgConc.eq.50.0) SaltCorr=-0.0451
1544 | IF (SodiumConc.eq.150.and.MgConc.eq.100.0) SaltCorr=0.0645
1545 | IF (SodiumConc.eq.150.and.MgConc.eq.200.0) SaltCorr=0.1805
1546 | IF (SodiumConc.eq.200.and.MgConc.eq.0.0) SaltCorr=-0.5933
1547 | IF (SodiumConc.eq.200.and.MgConc.eq.0.5) SaltCorr=-0.4772
1548 | IF (SodiumConc.eq.200.and.MgConc.eq.1.0) SaltCorr=-0.4385
1549 | IF (SodiumConc.eq.200.and.MgConc.eq.1.5) SaltCorr=-0.4095
1550 | IF (SodiumConc.eq.200.and.MgConc.eq.2.0) SaltCorr=-0.3901
1551 | IF (SodiumConc.eq.200.and.MgConc.eq.3.0) SaltCorr=-0.3547
1552 | IF (SodiumConc.eq.200.and.MgConc.eq.4.0) SaltCorr=-0.3289
1553 | IF (SodiumConc.eq.200.and.MgConc.eq.5.0) SaltCorr=-0.3095
1554 | IF (SodiumConc.eq.200.and.MgConc.eq.10.0) SaltCorr=-0.2354
1555 | IF (SodiumConc.eq.200.and.MgConc.eq.20.0) SaltCorr=-0.1483
1556 | IF (SodiumConc.eq.200.and.MgConc.eq.50.0) SaltCorr=-0.0226
1557 | IF (SodiumConc.eq.200.and.MgConc.eq.100.0) SaltCorr=0.0806
1558 | IF (SodiumConc.eq.200.and.MgConc.eq.200.0) SaltCorr=0.1902
1559 | IF (SodiumConc.eq.250.and.MgConc.eq.0.0) SaltCorr=-0.5094
1560 | IF (SodiumConc.eq.250.and.MgConc.eq.0.5) SaltCorr=-0.4159
1561 | IF (SodiumConc.eq.250.and.MgConc.eq.1.0) SaltCorr=-0.3805
1562 | IF (SodiumConc.eq.250.and.MgConc.eq.1.5) SaltCorr=-0.3579
1563 | IF (SodiumConc.eq.250.and.MgConc.eq.2.0) SaltCorr=-0.3386
1564 | IF (SodiumConc.eq.250.and.MgConc.eq.3.0) SaltCorr=-0.3095
1565 | IF (SodiumConc.eq.250.and.MgConc.eq.4.0) SaltCorr=-0.2870
1566 | IF (SodiumConc.eq.250.and.MgConc.eq.5.0) SaltCorr=-0.2676
1567 | IF (SodiumConc.eq.250.and.MgConc.eq.10.0) SaltCorr=-0.1999
1568 | IF (SodiumConc.eq.250.and.MgConc.eq.20.0) SaltCorr=-0.1225
1569 | IF (SodiumConc.eq.250.and.MgConc.eq.50.0) SaltCorr=-0.0032
1570 | IF (SodiumConc.eq.250.and.MgConc.eq.100.0) SaltCorr=0.0935
1571 | IF (SodiumConc.eq.250.and.MgConc.eq.200.0) SaltCorr=0.1999
1572 | IF (SodiumConc.eq.500.and.MgConc.eq.0.0) SaltCorr=-0.2547
1573 | IF (SodiumConc.eq.500.and.MgConc.eq.0.5) SaltCorr=-0.2031
1574 | IF (SodiumConc.eq.500.and.MgConc.eq.1.0) SaltCorr=-0.1838
1575 | IF (SodiumConc.eq.500.and.MgConc.eq.1.5) SaltCorr=-0.1709
1576 | IF (SodiumConc.eq.500.and.MgConc.eq.2.0) SaltCorr=-0.1612
1577 | IF (SodiumConc.eq.500.and.MgConc.eq.3.0) SaltCorr=-0.1419
1578 | IF (SodiumConc.eq.500.and.MgConc.eq.4.0) SaltCorr=-0.1258
1579 | IF (SodiumConc.eq.500.and.MgConc.eq.5.0) SaltCorr=-0.1129
1580 | IF (SodiumConc.eq.500.and.MgConc.eq.10.0) SaltCorr=-0.0677
1581 | IF (SodiumConc.eq.500.and.MgConc.eq.20.0) SaltCorr=-0.0129
1582 | IF (SodiumConc.eq.500.and.MgConc.eq.50.0) SaltCorr=0.0774
1583 | IF (SodiumConc.eq.500.and.MgConc.eq.100.0) SaltCorr=0.1612
1584 | IF (SodiumConc.eq.500.and.MgConc.eq.200.0) SaltCorr=0.2515
1585 | IF (SodiumConc.eq.750.and.MgConc.eq.0.0) SaltCorr=-0.1064
1586 | IF (SodiumConc.eq.750.and.MgConc.eq.0.5) SaltCorr=-0.0709
1587 | IF (SodiumConc.eq.750.and.MgConc.eq.1.0) SaltCorr=-0.0580
1588 | IF (SodiumConc.eq.750.and.MgConc.eq.1.5) SaltCorr=-0.0484
1589 | IF (SodiumConc.eq.750.and.MgConc.eq.2.0) SaltCorr=-0.0387
1590 | IF (SodiumConc.eq.750.and.MgConc.eq.3.0) SaltCorr=-0.0258
1591 | IF (SodiumConc.eq.750.and.MgConc.eq.4.0) SaltCorr=-0.0161
1592 | IF (SodiumConc.eq.750.and.MgConc.eq.5.0) SaltCorr=-0.0064
1593 | IF (SodiumConc.eq.750.and.MgConc.eq.10.0) SaltCorr=0.0290
1594 | IF (SodiumConc.eq.750.and.MgConc.eq.20.0) SaltCorr=0.0709
1595 | IF (SodiumConc.eq.750.and.MgConc.eq.50.0) SaltCorr=0.1451
1596 | IF (SodiumConc.eq.750.and.MgConc.eq.100.0) SaltCorr=0.2160
1597 | IF (SodiumConc.eq.750.and.MgConc.eq.200.0) SaltCorr=0.2934
1598 | IF (SodiumConc.eq.1000.and.MgConc.eq.0.0) SaltCorr=0.0000
1599 | IF (SodiumConc.eq.1000.and.MgConc.eq.0.5) SaltCorr=0.0258
1600 | IF (SodiumConc.eq.1000.and.MgConc.eq.1.0) SaltCorr=0.0355
1601 | IF (SodiumConc.eq.1000.and.MgConc.eq.1.5) SaltCorr=0.0451
1602 | IF (SodiumConc.eq.1000.and.MgConc.eq.2.0) SaltCorr=0.0516
1603 | IF (SodiumConc.eq.1000.and.MgConc.eq.3.0) SaltCorr=0.0613
1604 | IF (SodiumConc.eq.1000.and.MgConc.eq.4.0) SaltCorr=0.0709
1605 | IF (SodiumConc.eq.1000.and.MgConc.eq.5.0) SaltCorr=0.0774
1606 | IF (SodiumConc.eq.1000.and.MgConc.eq.10.0) SaltCorr=0.1064
1607 | IF (SodiumConc.eq.1000.and.MgConc.eq.20.0) SaltCorr=0.1419
1608 | IF (SodiumConc.eq.1000.and.MgConc.eq.50.0) SaltCorr=0.2031
1609 | IF (SodiumConc.eq.1000.and.MgConc.eq.100.0) SaltCorr=0.2644
1610 | IF (SodiumConc.eq.1000.and.MgConc.eq.200.0) SaltCorr=0.3353
1611 |
1612 | END SUBROUTINE TmCorrect
1613 |
--------------------------------------------------------------------------------
/str_func.f90:
--------------------------------------------------------------------------------
1 | CHARACTER(LEN=80) FUNCTION CenterStr(str)
2 |
3 | ! Centers the input string within an 80 character output string.
4 |
5 | USE dnaworks_data
6 | USE dnaworks_test
7 | IMPLICIT NONE
8 |
9 | CHARACTER(LEN=80) :: str ! the string input
10 | INTEGER :: length ! length of the string without trailing blanks
11 | INTEGER :: midpoint ! the midpoint in the string
12 | INTEGER :: i
13 |
14 | IF (TEST3) PRINT *,"CenterStr" !TEST3
15 |
16 | str=TRIM(str)
17 | length=LEN_TRIM(str)
18 | midpoint=(length/2)+1
19 |
20 | CenterStr=""
21 |
22 | DO i=1,length
23 | CenterStr(40-midpoint+i:40-midpoint+i)=str(i:i)
24 | END DO
25 |
26 | CenterStr(1:1) = '|'
27 | CenterStr(80:80) = '|'
28 |
29 | END FUNCTION CenterStr
30 | SUBROUTINE ComplStr(str)
31 | !
32 | ! Returns the DNA-complement of the section of a string
33 | !
34 | !
35 | ! A .................... = T
36 | ! C .................... = G
37 | ! G .................... = C
38 | ! T .................... = A
39 | ! M = A or C ........... = K
40 | ! R = A or G ........... = Y
41 | ! W = A or T ........... = W
42 | ! S = C or G ........... = S
43 | ! Y = C or T ........... = R
44 | ! K = G or T ........... = M
45 | ! V = A or C or G ...... = B
46 | ! H = A or C or T ...... = D
47 | ! D = A or G or T ...... = H
48 | ! B = C or G or T ...... = V
49 | ! N = A or C or G or T . = N
50 |
51 | USE dnaworks_test
52 | IMPLICIT NONE
53 |
54 | CHARACTER(LEN=*) :: str
55 | INTEGER :: i
56 |
57 | IF (TEST3) PRINT *,'ComplStr'
58 |
59 | DO i=1,LEN_TRIM(str)
60 | SELECT CASE(str(i:i))
61 | CASE('A')
62 | str(i:i)="T"
63 | CASE('T')
64 | str(i:i)="A"
65 | CASE('G')
66 | str(i:i)="C"
67 | CASE('C')
68 | str(i:i)="G"
69 | CASE('M')
70 | str(i:i)="K"
71 | CASE('R')
72 | str(i:i)="Y"
73 | CASE('W')
74 | str(i:i)="W"
75 | CASE('S')
76 | str(i:i)="S"
77 | CASE('Y')
78 | str(i:i)="R"
79 | CASE('K')
80 | str(i:i)="M"
81 | CASE('V')
82 | str(i:i)="B"
83 | CASE('H')
84 | str(i:i)="D"
85 | CASE('D')
86 | str(i:i)="H"
87 | CASE('B')
88 | str(i:i)="V"
89 | CASE('N')
90 | str(i:i)="N"
91 | END SELECT
92 | END DO
93 |
94 | END SUBROUTINE ComplStr
95 | INTEGER FUNCTION NT2Int(nt)
96 | !
97 | ! Converts a nt into an integer.
98 |
99 | USE dnaworks_test
100 | IMPLICIT NONE
101 |
102 | CHARACTER(LEN=1) :: nt
103 |
104 | IF (TEST3) PRINT *,"NT2Int" !TEST3
105 |
106 | SELECT CASE(nt(1:1)) ! convert sequence to num representation
107 | CASE('A')
108 | NT2Int=-1
109 | CASE('T')
110 | NT2Int=1
111 | CASE('C')
112 | NT2Int=-3
113 | CASE('G')
114 | NT2Int=3
115 | END SELECT
116 |
117 | END FUNCTION NT2Int
118 | SUBROUTINE RevComplStr(str)
119 | !
120 | ! Returns the reverse complement of a string
121 |
122 | USE dnaworks_test
123 | IMPLICIT NONE
124 |
125 | CHARACTER(LEN=*) :: str
126 |
127 | IF (TEST3) PRINT *,'RevComplStr'
128 |
129 | CALL RevStr(str)
130 | CALL ComplStr(str)
131 |
132 | END SUBROUTINE RevComplStr
133 | SUBROUTINE RevStr(str)
134 | !
135 | ! Returns the reverse of a string
136 |
137 | USE dnaworks_data
138 | USE dnaworks_test
139 | IMPLICIT NONE
140 |
141 | CHARACTER(LEN=*) :: str
142 | INTEGER :: i,j
143 |
144 | IF (TEST3) PRINT *,'RevStr'
145 |
146 | DO i=1,LEN_TRIM(str)
147 | j=(LEN_TRIM(str))-i+1
148 | SCRATCH(i:i)=str(j:j)
149 | END DO
150 |
151 | str=SCRATCH(1:(LEN_TRIM(str)))
152 |
153 | END SUBROUTINE RevStr
154 | INTEGER FUNCTION StrToInt(str)
155 | !
156 | ! Converts a string into an integer. All spaces are assigned zero.
157 |
158 | USE dnaworks_test
159 | IMPLICIT NONE
160 |
161 | CHARACTER(LEN=*) :: str
162 | INTEGER :: i,strlen,val,a,j
163 |
164 | IF (TEST3) PRINT *,"StrToInt"
165 |
166 | ! initial values
167 |
168 | j=1
169 | StrToInt=0
170 |
171 | ! find length of string
172 |
173 | strlen=LEN(str)
174 |
175 | ! convert all non-numerical characters to spaces, except for '-' sign
176 |
177 | DO i=1,strlen
178 | a=IACHAR(str(i:i))
179 | IF ((a.ge.48.and.a.le.57).or.(str(i:i).eq."-")) THEN
180 | str(i:i)=str(i:i)
181 | ELSE
182 | str(i:i)=" "
183 | END IF
184 | END DO
185 |
186 | ! shift string to right
187 |
188 | str=ADJUSTR(str)
189 |
190 | ! convert to integer,going from right to left
191 |
192 | DO i=strlen,1,-1
193 | a=IACHAR(str(i:i))
194 | IF (a.ge.48.and.a.le.57) THEN
195 | val=a-48
196 | ELSE
197 | val=0
198 | END IF
199 | StrToInt=(val*j)+StrToInt
200 | j=j*10
201 | END DO
202 |
203 | ! find sign
204 |
205 | IF ((INDEX(str,'-')).gt.0) StrToInt=StrToInt*(-1)
206 |
207 | END FUNCTION StrToInt
208 | REAL FUNCTION StrToReal(str)
209 | !
210 | ! Converts a string into an real number. Blanks are ignored.
211 |
212 | USE dnaworks_test
213 | IMPLICIT NONE
214 |
215 | CHARACTER(LEN=*) :: str
216 | INTEGER :: i,strlen,a
217 | INTEGER :: de
218 | INTEGER,EXTERNAL :: StrToInt
219 | REAL :: front_real,back_real,ex_real,de_real,si
220 | CHARACTER(LEN=1) :: b
221 |
222 | IF (TEST3) PRINT *,"StrToReal" !TEST3
223 |
224 | ! initial values
225 |
226 | StrToReal=0
227 | front_real=0
228 | back_real=0
229 | si=1
230 |
231 | ! convert all useless characters to spaces
232 |
233 | DO i=1,LEN(str)
234 | b=str(i:i)
235 | a=IACHAR(b)
236 | IF ((a.ge.48.and.a.le.57).or.(b.eq."-").or.(b.eq.'.').or.&
237 | (b.eq.'e').or.(b.eq.'E')) THEN
238 | str(i:i)=str(i:i)
239 | ELSE
240 | str(i:i)=" "
241 | END IF
242 | END DO
243 |
244 | ! shift string to left
245 |
246 | str=ADJUSTL(str)
247 | strlen=LEN_TRIM(str)
248 |
249 | ! find sign
250 |
251 | IF (str(1:1).eq.'-') si=-1
252 |
253 | ! find exponents, if there
254 |
255 | IF ((INDEX(str,'e')).gt.0) THEN
256 | ex_real = (REAL(10))**(REAL(StrToInt(str(((INDEX(str,'e'))+1):strlen))))
257 | strlen = (INDEX(str,'e'))-1
258 | ELSE IF ((INDEX(str,'E')).gt.0) THEN
259 |
260 | ex_real = (REAL(10))**(REAL(StrToInt(str(((INDEX(str,'E'))+1):strlen))))
261 | strlen = (INDEX(str,'E'))-1
262 | ELSE
263 | ex_real=1
264 | END IF
265 |
266 | ! find decimal position
267 |
268 | de=INDEX(str,'.')
269 |
270 | ! find values
271 |
272 | IF (de.gt.0) THEN
273 | front_real = REAL(ABS(StrToInt(str(1:(de-1)))))
274 | de_real=(REAL(1))/((REAL(10))**(strlen-de))
275 | back_real = (REAL(ABS(StrToInt(str((de+1):strlen)))))*de_real
276 | ELSE
277 | front_real = REAL(ABS(StrToInt(str(1:strlen))))
278 | back_real=0
279 | END IF
280 |
281 | StrToReal=(front_real+back_real)*si*ex_real
282 |
283 | END FUNCTION StrToReal
284 | SUBROUTINE ToLowerCase(str)
285 |
286 | ! Converts a string to lower case
287 |
288 | USE dnaworks_test
289 | IMPLICIT NONE
290 |
291 | CHARACTER(LEN=*) :: str
292 | INTEGER :: i,j
293 |
294 | IF (TEST3) PRINT *,'ToLowerCase'
295 |
296 | DO i=1,LEN_TRIM(str)
297 | j = ICHAR(str(i:i))
298 | IF (65.LE.j.AND.j.LE.90) str(i:i)=CHAR(j+32)
299 | END DO
300 |
301 | END SUBROUTINE ToLowerCase
302 | SUBROUTINE ToUpperCase(str)
303 |
304 | ! Converts a string to upper case
305 |
306 | USE dnaworks_test
307 | IMPLICIT NONE
308 |
309 | CHARACTER(LEN=*) :: str
310 | INTEGER :: i,j
311 |
312 | IF (TEST3) PRINT *,'ToUpperCase'
313 |
314 | DO i=1,LEN_TRIM(str)
315 | j = ICHAR(str(i:i))
316 | IF (97.LE.j.AND.j.LE.122) str(i:i)=CHAR(j-32)
317 | END DO
318 |
319 | END SUBROUTINE ToUpperCase
320 |
--------------------------------------------------------------------------------
/time_func.f90:
--------------------------------------------------------------------------------
1 | CHARACTER(LEN=10) FUNCTION CurrentDate()
2 | !
3 | ! Returns the date as DD/MM/YYYY in a 10 character string
4 |
5 | USE dnaworks_data
6 | USE dnaworks_test
7 | IMPLICIT NONE
8 |
9 | INTEGER :: values(8)
10 | CHARACTER(LEN=8) :: date
11 | CHARACTER(LEN=10) :: time
12 | CHARACTER(LEN=5) :: zone
13 | CHARACTER(LEN=2) :: MM,DD
14 | CHARACTER(LEN=4) :: YYYY
15 |
16 | IF (TEST3) PRINT *,"CurrentDate" !TEST3
17 |
18 | CALL DATE_AND_TIME(date,time,zone,values)
19 |
20 | ! Capture months
21 |
22 | IF (values(2).GE.10) THEN
23 | WRITE(UNIT=MM,FMT="(i2)") values(2)
24 | ELSE
25 | WRITE(UNIT=MM,FMT="('0',i1)") values(2)
26 | END IF
27 |
28 | ! Capture days
29 |
30 | IF (values(3).GE.10) THEN
31 | WRITE(UNIT=DD,FMT="(i2)") values(3)
32 | ELSE
33 | WRITE(UNIT=DD,FMT="('0',i1)") values(3)
34 | END IF
35 |
36 | ! Capture year
37 |
38 | WRITE(UNIT=YYYY,FMT="(i4)") values(1)
39 |
40 | CurrentDate = MM//'/'//DD//'/'//YYYY
41 |
42 | END FUNCTION CurrentDate
43 | CHARACTER(LEN=6) FUNCTION CurrentDateNice()
44 | !
45 | ! Returns the date as YYMMDD in an 6 character string
46 |
47 | USE dnaworks_data
48 | USE dnaworks_test
49 | IMPLICIT NONE
50 |
51 | INTEGER :: values(8)
52 | CHARACTER(LEN=8) :: date
53 | CHARACTER(LEN=10) :: time
54 | CHARACTER(LEN=5) :: zone
55 | CHARACTER(LEN=2) :: MM,DD
56 | CHARACTER(LEN=4) :: YYYY
57 |
58 | IF (TEST3) PRINT *,"CurrentDateNice" !TEST3
59 |
60 | CALL DATE_AND_TIME(date,time,zone,values)
61 |
62 | ! Capture months
63 |
64 | IF (values(2).GE.10) THEN
65 | WRITE(UNIT=MM,FMT="(i2)") values(2)
66 | ELSE
67 | WRITE(UNIT=MM,FMT="('0',i1)") values(2)
68 | END IF
69 |
70 | ! Capture days
71 |
72 | IF (values(3).GE.10) THEN
73 | WRITE(UNIT=DD,FMT="(i2)") values(3)
74 | ELSE
75 | WRITE(UNIT=DD,FMT="('0',i1)") values(3)
76 | END IF
77 |
78 | ! Capture year
79 |
80 | WRITE(UNIT=YYYY,FMT="(i4)") values(1)
81 |
82 | CurrentDateNice = YYYY(3:4)//MM//DD
83 |
84 | END FUNCTION CurrentDateNice
85 | CHARACTER(LEN=8) FUNCTION CurrentTime()
86 | !
87 | ! Returns the time as HH:MM:SS in a 8 character string
88 |
89 | USE dnaworks_data
90 | USE dnaworks_test
91 | IMPLICIT NONE
92 |
93 | INTEGER :: values(8)
94 | CHARACTER(LEN=8) :: date
95 | CHARACTER(LEN=10) :: time
96 | CHARACTER(LEN=5) :: zone
97 | CHARACTER(LEN=2) :: HH,MM,SS
98 |
99 | IF (TEST3) PRINT *,"CurrentTime" !TEST3
100 |
101 | CALL DATE_AND_TIME(date,time,zone,values)
102 |
103 | ! Capture hours
104 |
105 | IF (values(5).GE.10) THEN
106 | WRITE(UNIT=HH,FMT="(i2)") values(5)
107 | ELSE IF (values(5).EQ.0) THEN
108 | WRITE(UNIT=HH,FMT="('00')")
109 | ELSE
110 | WRITE(UNIT=HH,FMT="('0',i1)") values(5)
111 | END IF
112 |
113 | ! Capture minutes
114 |
115 | IF (values(6).GE.10) THEN
116 | WRITE(UNIT=MM,FMT="(i2)") values(6)
117 | ELSE IF (values(6).EQ.0) THEN
118 | WRITE(UNIT=MM,FMT="('00')")
119 | ELSE
120 | WRITE(UNIT=MM,FMT="('0',i1)") values(6)
121 | END IF
122 |
123 | ! Capture seconds
124 |
125 | IF (values(7).GE.10) THEN
126 | WRITE(UNIT=SS,FMT="(i2)") values(7)
127 | ELSE IF (values(7).EQ.0) THEN
128 | WRITE(UNIT=SS,FMT="('00')")
129 | ELSE
130 | WRITE(UNIT=SS,FMT="('0',i1)") values(7)
131 | END IF
132 |
133 | CurrentTime = HH//':'//MM//':'//SS
134 |
135 | END FUNCTION CurrentTime
136 | INTEGER FUNCTION CurrentTimeSeconds()
137 | !
138 | ! Returns the time in seconds since Wed Dec 31 19:00:00 1969, adjusting for
139 | ! leap years. However, there is NO adjustment for daylight saving time
140 |
141 | USE dnaworks_test
142 | IMPLICIT NONE
143 |
144 | INTEGER :: values(8),x
145 | CHARACTER(LEN=8) :: date
146 | CHARACTER(LEN=10) :: time
147 | CHARACTER(LEN=5) :: zone
148 | LOGICAL :: leap_year
149 |
150 | IF (TEST3) PRINT *,"CurrentTimeSeconds" !TEST3
151 | ! PRINT *,"CurrentTimeSeconds" !TEST3
152 |
153 | leap_year=.FALSE.
154 |
155 | ! x = number of seconds from 12/31/1969 19:00:00 to 1/1/2001 00:00:00
156 |
157 | x=978325200
158 |
159 | ! find current date and time
160 |
161 | CALL DATE_AND_TIME(date,time,zone,values)
162 |
163 | ! is this year a leap year?
164 |
165 | IF (MOD(values(1),4).eq.0) leap_year=.TRUE.
166 |
167 | ! find yearly sums
168 |
169 | x=x+((values(1)-2001)*(365*86400))+(((values(1)-2001)/4)*86400)
170 |
171 | ! find monthly sums
172 |
173 | SELECT CASE(values(2))
174 | CASE(2)
175 | x=x+(31*86400)
176 | CASE(3)
177 | x=x+(59*86400)
178 | CASE(4)
179 | x=x+(90*86400)
180 | CASE(5)
181 | x=x+(120*86400)
182 | CASE(6)
183 | x=x+(151*86400)
184 | CASE(7)
185 | x=x+(181*86400)
186 | CASE(8)
187 | x=x+(212*86400)
188 | CASE(9)
189 | x=x+(243*86400)
190 | CASE(10)
191 | x=x+(273*86400)
192 | CASE(11)
193 | x=x+(304*86400)
194 | CASE(12)
195 | x=x+(334*86400)
196 | END SELECT
197 |
198 | ! correct for leap day in February
199 |
200 | IF ((leap_year).and.(values(2).gt.2)) x=x+86400
201 |
202 | ! are we in daylight savings time?
203 |
204 | ! spring=7-(MOD((2800+values(1)-2474+(INT(values(1)/4))),7))
205 | ! fall=spring+20
206 | ! IF (spring.le.4) fall=fall+7
207 | !
208 | ! IF ((values(2).ge.4).and.(values(2).le.10).and.&
209 | ! &(values(3).ge.spring).and.(values(3).le.fall).and.&
210 | ! &(values(5).ge.2)) x=x-3600
211 |
212 | ! find final sum
213 |
214 | CurrentTimeSeconds=x+((values(3)-1)*86400)+(values(5)*3600)+(values(6)*60)+values(7)
215 | ! PRINT *,CurrentTimeSeconds !TEST3
216 |
217 | END FUNCTION CurrentTimeSeconds
218 |
--------------------------------------------------------------------------------