├── README.md
├── Wolff_Presentations
    ├── Feng-Doolittle.pdf
    ├── Gotoh.pdf
    ├── Needleman-Wunsch n=3.pdf
    ├── Needleman-Wunsch.pdf
    ├── Nussinov.pdf
    ├── SumOfPairs.pdf
    └── UPGMA_WPGM.pdf
├── bin
    ├── algorithmsInBioinformatics.py
    ├── blosum62.txt
    └── fengDoolittle.fas
├── report.pdf
└── source
    ├── .idea
        ├── .name
        ├── encodings.xml
        ├── misc.xml
        ├── modules.xml
        ├── scopes
        │   └── scope_settings.xml
        ├── source.iml
        ├── vcs.xml
        └── workspace.xml
    ├── lib
        ├── __init__.py
        ├── helper
        │   ├── IOHelper.py
        │   ├── __init__.py
        │   ├── mathHelper.py
        │   ├── multipleAlignmentHelper.py
        │   ├── pairwiseAlignmentHelper.py
        │   └── test
        │   │   └── IOHelperTest.py
        ├── multiple
        │   ├── __init__.py
        │   ├── fengDoolittle.py
        │   ├── needlemanWunschN3.py
        │   ├── sumOfPairs.py
        │   ├── test
        │   │   ├── fengDoolittleTest.py
        │   │   ├── needlemanWunschN3Test.py
        │   │   ├── sumOfPairsTest.py
        │   │   └── upgmaWpgmaTest.py
        │   └── upgmaWpgma.py
        ├── pairwise
        │   ├── __init__.py
        │   ├── gotoh.py
        │   ├── needlemanWunsch.py
        │   └── test
        │   │   ├── __init__.py
        │   │   ├── gotohTest.py
        │   │   └── needlemanWunschTest.py
        └── structurePrediction
        │   ├── __init__.py
        │   ├── nussinov.py
        │   └── test
        │       └── nussinovTest.py
    └── sequences


/README.md:
--------------------------------------------------------------------------------
 1 | # Algorithms In Bioinformatics
 2 | To run the algorithms execute the file "algorithmsInBioinformatics.py" in the folder source/bin.
 3 | 
 4 | ## Parameters
 5 | 
 6 | #### Help
 7 |   -h, --help            
 8 | 
 9 |   Show this help message and exit
10 | 
11 | #### Algorithms
12 | 
13 |   -a {nw,gotoh,nw3,fengDoolittle,sumOfPairs,upgma,wpgma,nussinov}, 
14 | 
15 |   --algorithm {nw,gotoh,nw3,fengDoolittle,sumOfPairs,upgma,wpgma,nussinov}
16 | 
17 |   Define which algorithm should be executed. Options are: 
18 | 
19 |   * 'nw' for the algorithm of Needleman and Wunsch.
20 |   * 'gotoh' for the algorithm of Osamu Gotoh.
21 |   * 'nw3' for the Needleman-Wunsch algorithm with three sequences. 
22 |   * 'fengDoolittle' for the heuristic multiple sequence alignment algorithm by Da-Fei Feng and Russell F. Doolittle.
23 |   * 'sumOfPairs' for the scoring of a multiple sequence alignment by Humberto Carrillo and David Lipman.
24 |   * 'upgma' or 'wpgma' is a clustering method to generate pylogenetic trees.
25 |   * 'nussinov' for the RNA secondary structure prediction algorithm by Ruth
26 |   Nussinov.
27 | 
28 | #### Input file
29 | 
30 |   -f INPUTFILE, --inputFile INPUTFILE
31 | 
32 |   Define the file in which the input sequences are defined. It have to be in fasta-format.
33 | 
34 | #### Output file
35 | 
36 |   -o OUTPUTFILE, --outputFile OUTPUTFILE
37 |   
38 |   Define in which file the output should be written. If
39 |   not defined, it is written to "outputFile.fas" in the
40 |   local directory.
41 | 
42 | #### Weight function
43 | 
44 |   -w WEIGHTFUNCTION, --weightFunction WEIGHTFUNCTION
45 |   
46 |   Name of a weight function definde in class
47 |   PairwiseAligmentHelper.
48 | 
49 | #### Gap costs    
50 | 
51 |   -gc GAPCOST, --gapCost GAPCOST
52 |   
53 |   Name of a gap function definde in class PairwiseAligmentHelper.
54 | 
55 | #### Number of solutions     
56 | 
57 |   --numberOfSolutions NUMBEROFSOLUTIONS
58 | 
59 |   Define the number of optimal solutions the Needleman-Wunsch algorithm should compute.
60 | 
61 | #### Output format    
62 | 
63 |   --outputFormat {graphML,newickTree}
64 | 
65 |   Define the output format of the output file. This function is only parsed if you choose 'upgma' or 'wpgma' as an algorithm. Default is Newick tree.
66 | 
67 | #### similarity score   
68 | 
69 |   --similarityScore SIMILARITYSCORE
70 | 
71 |   Name of a similarity score defined in class PairwiseAligmentHelper.
72 | 
73 | ## Support
74 | 
75 | If you are having issues, please let me know. Mail adress: wolffj[at]informatik[dot]uni-freiburg[dot]de


--------------------------------------------------------------------------------
/Wolff_Presentations/Feng-Doolittle.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/joachimwolff/algorithmsInBioinformatics/0d3d91b7cb2370426617c09d98796998b7c5d1d7/Wolff_Presentations/Feng-Doolittle.pdf


--------------------------------------------------------------------------------
/Wolff_Presentations/Gotoh.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/joachimwolff/algorithmsInBioinformatics/0d3d91b7cb2370426617c09d98796998b7c5d1d7/Wolff_Presentations/Gotoh.pdf


--------------------------------------------------------------------------------
/Wolff_Presentations/Needleman-Wunsch n=3.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/joachimwolff/algorithmsInBioinformatics/0d3d91b7cb2370426617c09d98796998b7c5d1d7/Wolff_Presentations/Needleman-Wunsch n=3.pdf


--------------------------------------------------------------------------------
/Wolff_Presentations/Needleman-Wunsch.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/joachimwolff/algorithmsInBioinformatics/0d3d91b7cb2370426617c09d98796998b7c5d1d7/Wolff_Presentations/Needleman-Wunsch.pdf


--------------------------------------------------------------------------------
/Wolff_Presentations/Nussinov.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/joachimwolff/algorithmsInBioinformatics/0d3d91b7cb2370426617c09d98796998b7c5d1d7/Wolff_Presentations/Nussinov.pdf


--------------------------------------------------------------------------------
/Wolff_Presentations/SumOfPairs.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/joachimwolff/algorithmsInBioinformatics/0d3d91b7cb2370426617c09d98796998b7c5d1d7/Wolff_Presentations/SumOfPairs.pdf


--------------------------------------------------------------------------------
/Wolff_Presentations/UPGMA_WPGM.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/joachimwolff/algorithmsInBioinformatics/0d3d91b7cb2370426617c09d98796998b7c5d1d7/Wolff_Presentations/UPGMA_WPGM.pdf


--------------------------------------------------------------------------------
/bin/algorithmsInBioinformatics.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/python
  2 | # Copyright 2014 Joachim Wolff
  3 | # Programming Course: Algorithms in Bioinformatics
  4 | # Tutors: Robert Kleinkauf, Omer Alkhnbashi
  5 | # Winter semester 2014/2015
  6 | #
  7 | # Chair of Bioinformatics
  8 | # Department of Computer Science
  9 | # Faculty of Engineering
 10 | # Albert-Ludwig-University Freiburg im Breisgau
 11 | #
 12 | # main class
 13 | 
 14 | import argparse
 15 | import os, sys
 16 | if os.name == "posix":
 17 |     lib_path = os.path.abspath('../lib')
 18 | elif os.name == "nt":
 19 |     lib_path = os.path.abspath('..\lib')
 20 | sys.path.append(lib_path)
 21 | 
 22 | from helper import IOHelper as io
 23 | from helper import MultipleAlignmentHelper as mah
 24 | from pairwise import NeedlemanWunsch as nw
 25 | from pairwise import Gotoh
 26 | from multiple import NeedlemanWunschN3 as NW3
 27 | from multiple import UpgmaWpgma
 28 | from multiple import FengDoolittle
 29 | from multiple import SumOfPairs
 30 | from structurePrediction import Nussinov
 31 | def main():
 32 |     """Method to parse the arguments and start the defined algorithms."""
 33 |     parser = argparse.ArgumentParser()
 34 |     parser.add_argument("-a", "--algorithm",
 35 |                         choices=["nw", "gotoh", "nw3", "fengDoolittle", "sumOfPairs","upgma", "wpgma", "nussinov"],
 36 |                         required=True,
 37 |         help="Define which algorithm should be executed. "
 38 |              "\nOptions are: 'nw' for the algorithm of Needleman and Wunsch,\n"
 39 |              "'gotoh' for the algorithm of Osamu Gotoh, \n"
 40 |              "'nw3' for the Needleman-Wunsch algorithm with three sequences, \n"
 41 |             "'fengDoolittle' for the heuristic multiple sequence alignment algorithm by Da-Fei Feng and Russell F. Doolittle,"
 42 |             "'sumOfPairs' for the scoring of a multiple sequence alignment by Humberto Carrillo and David Lipman."
 43 |              "'upgma' or 'wpgma' is a clustering method to generate phylogenetic trees, \n"
 44 |              "'nussinov' for the RNA secondary structure prediction algorithm by Ruth Nussinov.")
 45 |     parser.add_argument("-f", "--inputFile", dest="inputFile",
 46 |                         help="Define the file in which the input sequences are defined. It have to be in fasta-format.")
 47 |     parser.add_argument("-o", "--outputFile", help="Define in which file the output should be written. "
 48 |                                         "If not defined, it is written to \"outputFile.fas\" in the local directory.")
 49 |     parser.add_argument("-gc", "--gapCost", dest="gapCost",
 50 |                         help="Name of a gap function definde in class PairwiseAligmentHelper.")
 51 |     parser.add_argument("--numberOfSolutions", dest="numberOfSolutions",
 52 |                         help="Define the number of optimal solutions the Needleman-Wunsch algorithm should compute.")
 53 |     parser.add_argument("--outputFormat", dest="outputFormat", choices=["graphML", "newickTree"],
 54 |                         help="Define the output format of the output file. "
 55 |                              "This function is only parsed if you choose 'upgma' or 'wpgma' as an algorithm. Default is"
 56 |                              " Newick tree")
 57 |     parser.add_argument("--scoring", dest="similarityScore",
 58 |                         help="Name of a similarity score defined in class PairwiseAligmentHelper. Per default "
 59 |                              "\"pam\" and \"blosum\" (pam250 and blosum62) are implemented. Feel free to extend, you can find the "
 60 |                              "file \"PairwiseAligmentHelper.py\" in lib/helper. If this option is not defined, the pam250 matrix is choosen.")
 61 |     parser.add_argument("--gapPenalty", dest="gapPenalty", help="Define a gap penalty. Default for pam is 8 and blosum 6.")
 62 |     args = parser.parse_args()
 63 | 
 64 |     outputFile = ""
 65 |     weightFunction = ""
 66 |     if args.outputFile:
 67 |         outputFile = args.outputFile
 68 |     if args.similarityScore:
 69 |         weightFunction = args.similarityScore
 70 | 
 71 |     sequences = getSequencesFromFile(args.inputFile)
 72 |     if len(sequences) > 1:
 73 | 
 74 |         # pairwise alignment
 75 |         if args.algorithm == "nw":
 76 |             if outputFile == "":
 77 |                 outputFile = "needlemanWunsch.fas"
 78 |             if weightFunction == "":
 79 |                 weightFunction = "pam"
 80 |             numberOfSolutions = -1
 81 |             if args.numberOfSolutions:
 82 |                 numberOfSolutions = args.numberOfSolutions
 83 |             needlemanWunsch(sequences[0:2], scoreFunction = weightFunction, outputFile = outputFile, numberOfSolutions=numberOfSolutions)
 84 | 
 85 |         elif args.algorithm == "gotoh":
 86 |             if outputFile == "":
 87 |                 outputFile = "gotoh.fas"
 88 |             if weightFunction == "":
 89 |                 weightFunction = "pam"
 90 |             gapCost = "gapCost"
 91 |             if args.gapCost:
 92 |                 gapCost = args.gapCost
 93 |             gotoh(sequences[0:2], scoreFunction = weightFunction, costFunction =  gapCost, outputFile = outputFile)
 94 | 
 95 |         # multiple alignment
 96 | 
 97 |         elif args.algorithm == "upgma" or args.algorithm == "wpgma":
 98 |             newickTree = True
 99 |             if args.outputFormat == "graphML":
100 |                 newickTree = False
101 |             if outputFile == "":
102 |                 if args.algorithm == "upgma":
103 |                     outputFile = "upgma"
104 |                 else:
105 |                     outputFile = "wpgma"
106 |             upgmaWpgma(args.algorithm == "upgma", sequences, outputFile, newickTree)
107 | 
108 |         elif args.algorithm == "fengDoolittle":
109 |             if outputFile == "":
110 |                 outputFile = "fengDoolittle.fas"
111 |             if weightFunction == "":
112 |                 weightFunction = "pam"
113 |             similarityScore = "pam"
114 |             if args.similarityScore:
115 |                 similarityScore = args.similarityScore
116 |             fengDoolittle(sequences, weightFunction, similarityScore, outputFile)
117 |         elif args.algorithm == "sumOfPairs":
118 |             similarityScore = "pam"
119 |             if args.similarityScore:
120 |                 similarityScore = args.similarityScore
121 |             if args.gapPenalty:
122 |                 sumOfPairs(sequences, similarityScore, args.gapPenalty)
123 |             else:
124 |                 sumOfPairs(sequences, similarityScore)
125 | 
126 |         elif args.algorithm == "nw3":
127 |             if not (len(sequences) == 3):
128 |                 print "Wrong number of input sequences. Needleman-Wunsch n=3 needs exactly three sequences; ", \
129 |                     len(sequences) , " sequences are given."
130 |                 sys.exit()
131 |             if weightFunction == "":
132 |                 weightFunction = "pam"
133 |             if outputFile == "":
134 |                outputFile = "nw3.fas"
135 |             needlemanWunschN3(sequences[0:3], weightFunction = weightFunction, outputFile = outputFile)
136 | 
137 |     # multiple alignment
138 | 
139 |     elif len(sequences) == 1:
140 |         # structure prediction
141 |         if args.algorithm == "nussinov":
142 |             if outputFile == "":
143 |                 outputFile = "nussinov.dotBracket"
144 |             nussinov(sequences[0:1], outputFile)
145 |         else:
146 |             print "You have defined only one input sequence, but your defined algorithm \'",\
147 |                 args.algorithm, "\' needs at least two sequences."
148 |     else:
149 |         print "No sequences in input file defined."
150 |         sys.exit(0)
151 | def getSequencesFromFile(inputFile):
152 |     """Parse the input file to get the sequences. Returns the sequences as an array.
153 |             inputFile:  A fasta format file with the input sequences."""
154 |     sequences = io().readFastaFile(inputFile)
155 |     return sequences
156 | def needlemanWunsch(sequences, scoreFunction, outputFile, numberOfSolutions):
157 |     """Executes the Needleman-Wunsch algorithm with a default score function defined as: a == b -> 0 and a !=b --> 1.\n
158 |     Stores the alignments per default in file needlemanWunsch.fas.
159 |     To change the score function define a function in class PairwiseAligmentHelper and define the name as an input paramter.
160 |         scoreFunction:      The name of the weigh function which is defined in class PairwiseAligmentHelper.
161 |         outputFile:         The path to the output file.
162 |         numberOfSolutions:  Maximal number of optimal solutions which should be computed."""
163 |     print "\nThe following sequences are given:"
164 |     for i in sequences:
165 |         print i
166 |     print "\nComputing solution...\n\n"
167 |     result = nw().compute(sequences, scoreFunction, int(numberOfSolutions), scoringValue=True)
168 |     print "\nScore: ", result[1]
169 |     print "Number of optimal solutions: ", len(result[0])
170 |     print "\nOne solution is:\n", result[0][0][0], "\n", result[0][0][1]
171 |     print "\nFor more solutions look in the file \"needlemanWunsch.fas\" in the bin directory.\n"
172 |     io().writeFastaFile(result[0], outputFile)
173 | def gotoh(sequences, scoreFunction="weightFunctionDifference", costFunction="gapCost", outputFile="gotoh.fas"):
174 |     """Executes the Gotoh algorithm with a default score function defined as: a == b -> 0 and a !=b --> 1 and a cost function defined as: g(x) = 2 + k.\n
175 |     Stores the alignments per default in file gotoh.fas.
176 |     To change the score or cost function define a function in class PairwiseAligmentHelper and define the name as an input paramter.
177 |         scoreFunction:  The name of the weigh function which is defined in class PairwiseAligmentHelper.
178 |         costFunction:   The name of the gap cost function which is defined in class PairwiseAligmentHelper.
179 |         outputFile:     The path to the output file.
180 |         """
181 |     print "The following sequences are given:"
182 |     for i in sequences:
183 |         print i
184 |     print "Computing solution..."
185 |     gotoh = Gotoh(sequences[0], sequences[1], scoreFunction, costFunction)
186 |     result = gotoh.compute()
187 |     io().writeFastaFile(result, outputFile)
188 |     print "Number of solutions: ", len(result)
189 |     print "Score:", max(gotoh.computationMatrix[0][-1][-1], max(gotoh.computationMatrix[1][-1][-1], gotoh.computationMatrix[2][-1][-1]))
190 |     print "One solution is:\n", result[0][0], "\n", result[0][1]
191 |     print "For more solutions look in the file \"gotoh.fas\" in the bin directory."
192 | 
193 | def needlemanWunschN3(sequences, weightFunction="weightFunctionDifference", outputFile="nw3.fas"):
194 |     """Executes the Needleman-Wunsch algorithm with three sequences"""
195 |     print "\nThe following sequences are given:"
196 |     for i in sequences:
197 |         print i
198 |     print "\nComputing solution...\n\n"
199 |     nw3 = NW3(sequences[0], sequences[1], sequences[2], weightFunction)
200 |     result = nw3.execute()
201 | 
202 |     io().writeFastaFile(result, outputFile)
203 |     print "\nScore: ", nw3.computation_matrix[-1][-1][-1]
204 |     print "Number of optimal solutions: ", len(result)
205 |     print "\nOne solution is:\n", result[0][0], "\n", result[0][1], "\n", result[0][2]
206 |     print "\nFor more solutions look in the file \"nw3.fas\" in the bin directory.\n"
207 | 
208 | def upgmaWpgma(upgmaWpgma, sequences, outputFile, fileFormat):
209 |     """Executes the a phylogenetic clustering with a upgm or wpgm weighting.
210 |         sequences:  All defined input sequences as a list.
211 |         outputFile: The name of the output file
212 |         fileFormat: The file format of the output file"""
213 |     #create
214 |     print "The following sequences are given:"
215 |     for i in sequences:
216 |         print i
217 |     print "Computing clustering..."
218 |     data = mah().createDataForUpgmaWpgma(sequences)
219 |     if upgmaWpgma:
220 |         upgma = UpgmaWpgma(data[0], len(data[1]))
221 |         upgma.compute_clustering()
222 |         if not fileFormat:
223 |             outputFile += ".graphML"
224 |             io().writeGraphMLFile(upgma.mapping, outputFile)
225 |             print "Clustering written as graphML file: ", os.path.abspath(outputFile)
226 |         else:
227 |             outputFile += ".newickTree"
228 |             cluster = upgma.get_newick_tree(with_edge_weights=True)
229 |             io().writeNewickTree(cluster, outputFile)
230 |             print "Computed upgma cluster: ", cluster
231 |             print "The clustering was also written to: ", os.path.abspath(outputFile)
232 |     else:
233 |         wpgma = UpgmaWpgma(data[0], len(data[1]), False, data[2])
234 |         wpgma.compute_clustering()
235 |         if not fileFormat:
236 |             outputFile += ".graphML"
237 |             io().writeGraphMLFile(wpgma.mapping, outputFile)
238 |             print "Clustering written as graphML file: ", os.path.abspath(outputFile)
239 |         else:
240 |             outputFile += ".newickTree"
241 |             cluster = wpgma.get_newick_tree(with_edge_weights=True)
242 |             io().writeNewickTree(cluster, outputFile)
243 |             print "Computed wpgma cluster: ", cluster
244 |             print "The clustering was also written to: ", os.path.abspath(outputFile)
245 | 
246 | 
247 | def nussinov(sequence, outputFile):
248 |     """Executes the RNA-folding algorithm from Nussinov.
249 |         sequence:   The RNA-sequnce as a list.
250 |         outputFile: The name of the output file."""
251 |     print "\nThe following sequence is given:"
252 |     print sequence[0]
253 |     print "\n"
254 |     nussinov = Nussinov(sequence[0])
255 |     nussinov.execute()
256 |     print "\nDot-bracket: "
257 |     io().writeRnaDotBracketNotation(sequence[0], nussinov.pairedBases, outputFile)
258 |     print "The result was also written to: ", os.path.abspath(outputFile)
259 | 
260 | def sumOfPairs(sequences, scoringFunction, gapPenalty=-1):
261 |     """This method scores a multiple sequence alignment with the sum of pairs algorithm.
262 |         sequences:          The multiple sequence alignment.
263 |         scoringFunction:    Name of a similarity score defined in class PairwiseAligmentHelper."""
264 |     print "The following sequences are given:"
265 |     for i in sequences:
266 |         print i
267 |     if gapPenalty == -1:
268 |         sof = SumOfPairs(sequences, scoringFunction)
269 |     else:
270 |         sof = SumOfPairs(sequences, scoringFunction, gapPenalty)
271 |     print "Sum-of-pairs scoring: ", sof.execute()
272 | def fengDoolittle(sequences, weightFunction, similarityScore, outputFile):
273 |     """Executes the heuristic multiple sequence alignment by Feng and Doolittle.
274 |         sequences:          All input sequnces to align.
275 |         weightFunction:     The weight function defined in class PairwiseAlignmentHelper for the Needleman-Wunsch algorithm to compute the optimal local alignment.
276 |         similarityScore:    Name of a similarity score defined in class PairwiseAligmentHelper.
277 |         outputFile:         The output file name."""
278 |     fd = FengDoolittle(sequences, weightFunction, similarityScore)
279 |     alignmentDict = fd.computeMultipleAlignment()
280 |     alignment = [[]]
281 |     for i in alignmentDict:
282 |         alignment[0].append(alignmentDict[i])
283 |     io().writeFastaFile(alignment, outputFile)
284 |     print "Input sequences:\n"
285 |     for i in sequences:
286 |         print i
287 |     print "\nAlignment:"
288 |     for i in alignmentDict:
289 |         print alignmentDict[i]
290 |     print sumOfPairs(alignment[0], weightFunction)
291 | 
292 | 
293 | if __name__ == "__main__":
294 |     # try:
295 |     main()
296 |     # except:
297 |     #     "You discovered a bug! Please write an email to wolffj@informatik.uni-freiburg.de with your input parameters and I try to fix it."


--------------------------------------------------------------------------------
/bin/blosum62.txt:
--------------------------------------------------------------------------------
 1 | #  Matrix made by matblas from blosum62.iij
 2 | #  * column uses minimum score
 3 | #  BLOSUM Clustered Scoring Matrix in 1/2 Bit Units
 4 | #  Blocks Database = /data/blocks_5.0/blocks.dat
 5 | #  Cluster Percentage: >= 62
 6 | #  Entropy =   0.6979, Expected =  -0.5209
 7 | 
 8 |    A  R  N  D  C  Q  E  G  H  I  L  K  M  F  P  S  T  W  Y  V  *
 9 | A  4 -1 -2 -2  0 -1 -1  0 -2 -1 -1 -1 -1 -2 -1  1  0 -3 -2  0 -4
10 | R -1  5  0 -2 -3  1  0 -2  0 -3 -2  2 -1 -3 -2 -1 -1 -3 -2 -3 -4
11 | N -2  0  6  1 -3  0  0  0  1 -3 -3  0 -2 -3 -2  1  0 -4 -2 -3 -4
12 | D -2 -2  1  6 -3  0  2 -1 -1 -3 -4 -1 -3 -3 -1  0 -1 -4 -3 -3 -4
13 | C  0 -3 -3 -3  9 -3 -4 -3 -3 -1 -1 -3 -1 -2 -3 -1 -1 -2 -2 -1 -4
14 | Q -1  1  0  0 -3  5  2 -2  0 -3 -2  1  0 -3 -1  0 -1 -2 -1 -2 -4
15 | E -1  0  0  2 -4  2  5 -2  0 -3 -3  1 -2 -3 -1  0 -1 -3 -2 -2 -4
16 | G  0 -2  0 -1 -3 -2 -2  6 -2 -4 -4 -2 -3 -3 -2  0 -2 -2 -3 -3 -4
17 | H -2  0  1 -1 -3  0  0 -2  8 -3 -3 -1 -2 -1 -2 -1 -2 -2  2 -3 -4
18 | I -1 -3 -3 -3 -1 -3 -3 -4 -3  4  2 -3  1  0 -3 -2 -1 -3 -1  3 -4
19 | L -1 -2 -3 -4 -1 -2 -3 -4 -3  2  4 -2  2  0 -3 -2 -1 -2 -1  1 -4
20 | K -1  2  0 -1 -3  1  1 -2 -1 -3 -2  5 -1 -3 -1  0 -1 -3 -2 -2 -4
21 | M -1 -1 -2 -3 -1  0 -2 -3 -2  1  2 -1  5  0 -2 -1 -1 -1 -1  1 -4
22 | F -2 -3 -3 -3 -2 -3 -3 -3 -1  0  0 -3  0  6 -4 -2 -2  1  3 -1 -4
23 | P -1 -2 -2 -1 -3 -1 -1 -2 -2 -3 -3 -1 -2 -4  7 -1 -1 -4 -3 -2 -4
24 | S  1 -1  1  0 -1  0  0  0 -1 -2 -2  0 -1 -2 -1  4  1 -3 -2 -2 -4
25 | T  0 -1  0 -1 -1 -1 -1 -2 -2 -1 -1 -1 -1 -2 -1  1  5 -2 -2  0 -4
26 | W -3 -3 -4 -4 -2 -2 -3 -2 -2 -3 -2 -3 -1  1 -4 -3 -2 11  2 -3 -4
27 | Y -2 -2 -2 -3 -2 -1 -2 -3  2 -1 -1 -2 -1  3 -3 -2 -2  2  7 -1 -4
28 | V  0 -3 -3 -3 -1 -2 -2 -3 -3  3  1 -2  1 -1 -2 -2  0 -3 -1  4 -4
29 | * -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4  1


--------------------------------------------------------------------------------
/bin/fengDoolittle.fas:
--------------------------------------------------------------------------------
1 | >Alignment 0 sequence 0
2 | ILDXXXMDVVEGSAARFDCKVEGXXXYPDPEVMWFKDDNPXXXXXXVKXXXXXESRHXXFQIDYXXDEXXEGXXXN
3 | >Alignment 0 sequence 1
4 | RRLXXIPAARGGEISILCQPRAXXAPKATILWSKGTEIXXXXLGXXXXXXNSTXXXXRVTVXXXXTXXXXSXXXXD
5 | >Alignment 0 sequence 2
6 | XXRDPXVKTHEGWGVMLPCNPPAHXYPGLSYRWLLNEFPXXNFIPXXXTDGXXRHFXXVSXXXXQXXTXXXXTXXX
7 | >Alignment 0 sequence 3
8 | ISDXXTEADIGSNLRWGCAAAGXXKPRPMVRWLRNGEPXXXXLAXXXXXXSQNXXXXRVEVXXXXLXXXXAXXXXXX
9 | 


--------------------------------------------------------------------------------
/report.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/joachimwolff/algorithmsInBioinformatics/0d3d91b7cb2370426617c09d98796998b7c5d1d7/report.pdf


--------------------------------------------------------------------------------
/source/.idea/.name:
--------------------------------------------------------------------------------
1 | source


--------------------------------------------------------------------------------
/source/.idea/encodings.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="Encoding" useUTFGuessing="true" native2AsciiForPropertiesFiles="false" />
4 | </project>


--------------------------------------------------------------------------------
/source/.idea/misc.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project version="4">
 3 |   <component name="DaemonCodeAnalyzer">
 4 |     <disable_hints />
 5 |   </component>
 6 |   <component name="DependencyValidationManager">
 7 |     <option name="SKIP_IMPORT_STATEMENTS" value="false" />
 8 |   </component>
 9 |   <component name="Encoding" useUTFGuessing="true" native2AsciiForPropertiesFiles="false" />
10 |   <component name="ProjectLevelVcsManager" settingsEditedManually="false">
11 |     <OptionsSetting value="true" id="Add" />
12 |     <OptionsSetting value="true" id="Remove" />
13 |     <OptionsSetting value="true" id="Checkout" />
14 |     <OptionsSetting value="true" id="Update" />
15 |     <OptionsSetting value="true" id="Status" />
16 |     <OptionsSetting value="true" id="Edit" />
17 |     <ConfirmationsSetting value="0" id="Add" />
18 |     <ConfirmationsSetting value="0" id="Remove" />
19 |   </component>
20 |   <component name="ProjectModuleManager">
21 |     <modules />
22 |   </component>
23 |   <component name="ProjectRootManager" version="2" project-jdk-name="Python 2.7.6 (/usr/bin/python)" project-jdk-type="Python SDK" />
24 |   <component name="RunManager">
25 |     <list size="0" />
26 |   </component>
27 | </project>


--------------------------------------------------------------------------------
/source/.idea/modules.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ProjectModuleManager">
4 |     <modules>
5 |       <module fileurl="file://$PROJECT_DIR$/.idea/source.iml" filepath="$PROJECT_DIR$/.idea/source.iml" />
6 |     </modules>
7 |   </component>
8 | </project>


--------------------------------------------------------------------------------
/source/.idea/scopes/scope_settings.xml:
--------------------------------------------------------------------------------
1 | <component name="DependencyValidationManager">
2 |   <state>
3 |     <option name="SKIP_IMPORT_STATEMENTS" value="false" />
4 |   </state>
5 | </component>


--------------------------------------------------------------------------------
/source/.idea/source.iml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <module type="PYTHON_MODULE" version="4">
3 |   <component name="NewModuleRootManager">
4 |     <content url="file://$MODULE_DIR$" />
5 |     <orderEntry type="inheritedJdk" />
6 |     <orderEntry type="sourceFolder" forTests="false" />
7 |   </component>
8 | </module>


--------------------------------------------------------------------------------
/source/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="VcsDirectoryMappings">
4 |     <mapping directory="" vcs="" />
5 |   </component>
6 | </project>


--------------------------------------------------------------------------------
/source/.idea/workspace.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <project version="4">
  3 |   <component name="ChangeListManager">
  4 |     <list default="true" id="3d75c479-69ff-47f9-af16-de889221d95e" name="Default" comment="" />
  5 |     <ignored path="source.iws" />
  6 |     <ignored path=".idea/workspace.xml" />
  7 |     <option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
  8 |     <option name="TRACKING_ENABLED" value="true" />
  9 |     <option name="SHOW_DIALOG" value="false" />
 10 |     <option name="HIGHLIGHT_CONFLICTS" value="true" />
 11 |     <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
 12 |     <option name="LAST_RESOLUTION" value="IGNORE" />
 13 |   </component>
 14 |   <component name="ChangesViewManager" flattened_view="true" show_ignored="false" />
 15 |   <component name="CreatePatchCommitExecutor">
 16 |     <option name="PATCH_PATH" value="" />
 17 |   </component>
 18 |   <component name="DaemonCodeAnalyzer">
 19 |     <disable_hints />
 20 |   </component>
 21 |   <component name="ExecutionTargetManager" SELECTED_TARGET="default_target" />
 22 |   <component name="FavoritesManager">
 23 |     <favorites_list name="source" />
 24 |   </component>
 25 |   <component name="FileEditorManager">
 26 |     <leaf />
 27 |   </component>
 28 |   <component name="IdeDocumentHistory">
 29 |     <option name="CHANGED_PATHS">
 30 |       <list>
 31 |         <option value="$PROJECT_DIR$/lib/structurePrediction/nussinov.py" />
 32 |       </list>
 33 |     </option>
 34 |   </component>
 35 |   <component name="ProjectFrameBounds">
 36 |     <option name="y" value="23" />
 37 |     <option name="width" value="1366" />
 38 |     <option name="height" value="741" />
 39 |   </component>
 40 |   <component name="ProjectLevelVcsManager" settingsEditedManually="false">
 41 |     <OptionsSetting value="true" id="Add" />
 42 |     <OptionsSetting value="true" id="Remove" />
 43 |     <OptionsSetting value="true" id="Checkout" />
 44 |     <OptionsSetting value="true" id="Update" />
 45 |     <OptionsSetting value="true" id="Status" />
 46 |     <OptionsSetting value="true" id="Edit" />
 47 |     <ConfirmationsSetting value="0" id="Add" />
 48 |     <ConfirmationsSetting value="0" id="Remove" />
 49 |   </component>
 50 |   <component name="ProjectView">
 51 |     <navigator currentView="ProjectPane" proportions="" version="1">
 52 |       <flattenPackages />
 53 |       <showMembers />
 54 |       <showModules />
 55 |       <showLibraryContents />
 56 |       <hideEmptyPackages />
 57 |       <abbreviatePackageNames />
 58 |       <autoscrollToSource />
 59 |       <autoscrollFromSource />
 60 |       <sortByType />
 61 |     </navigator>
 62 |     <panes>
 63 |       <pane id="Scope" />
 64 |       <pane id="ProjectPane">
 65 |         <subPane>
 66 |           <PATH>
 67 |             <PATH_ELEMENT>
 68 |               <option name="myItemId" value="source" />
 69 |               <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" />
 70 |             </PATH_ELEMENT>
 71 |             <PATH_ELEMENT>
 72 |               <option name="myItemId" value="External Libraries" />
 73 |               <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ExternalLibrariesNode" />
 74 |             </PATH_ELEMENT>
 75 |             <PATH_ELEMENT>
 76 |               <option name="myItemId" value="&lt; Python 2.7.6 (/usr/bin/python) &gt;" />
 77 |               <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.NamedLibraryElementNode" />
 78 |             </PATH_ELEMENT>
 79 |           </PATH>
 80 |           <PATH>
 81 |             <PATH_ELEMENT>
 82 |               <option name="myItemId" value="source" />
 83 |               <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" />
 84 |             </PATH_ELEMENT>
 85 |             <PATH_ELEMENT>
 86 |               <option name="myItemId" value="source" />
 87 |               <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
 88 |             </PATH_ELEMENT>
 89 |           </PATH>
 90 |           <PATH>
 91 |             <PATH_ELEMENT>
 92 |               <option name="myItemId" value="source" />
 93 |               <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" />
 94 |             </PATH_ELEMENT>
 95 |             <PATH_ELEMENT>
 96 |               <option name="myItemId" value="source" />
 97 |               <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
 98 |             </PATH_ELEMENT>
 99 |             <PATH_ELEMENT>
100 |               <option name="myItemId" value="lib" />
101 |               <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
102 |             </PATH_ELEMENT>
103 |           </PATH>
104 |           <PATH>
105 |             <PATH_ELEMENT>
106 |               <option name="myItemId" value="source" />
107 |               <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" />
108 |             </PATH_ELEMENT>
109 |             <PATH_ELEMENT>
110 |               <option name="myItemId" value="source" />
111 |               <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
112 |             </PATH_ELEMENT>
113 |             <PATH_ELEMENT>
114 |               <option name="myItemId" value="lib" />
115 |               <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
116 |             </PATH_ELEMENT>
117 |             <PATH_ELEMENT>
118 |               <option name="myItemId" value="structurePrediction" />
119 |               <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
120 |             </PATH_ELEMENT>
121 |           </PATH>
122 |           <PATH>
123 |             <PATH_ELEMENT>
124 |               <option name="myItemId" value="source" />
125 |               <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" />
126 |             </PATH_ELEMENT>
127 |             <PATH_ELEMENT>
128 |               <option name="myItemId" value="source" />
129 |               <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
130 |             </PATH_ELEMENT>
131 |             <PATH_ELEMENT>
132 |               <option name="myItemId" value="lib" />
133 |               <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
134 |             </PATH_ELEMENT>
135 |             <PATH_ELEMENT>
136 |               <option name="myItemId" value="pairwise" />
137 |               <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
138 |             </PATH_ELEMENT>
139 |           </PATH>
140 |           <PATH>
141 |             <PATH_ELEMENT>
142 |               <option name="myItemId" value="source" />
143 |               <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" />
144 |             </PATH_ELEMENT>
145 |             <PATH_ELEMENT>
146 |               <option name="myItemId" value="source" />
147 |               <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
148 |             </PATH_ELEMENT>
149 |             <PATH_ELEMENT>
150 |               <option name="myItemId" value="lib" />
151 |               <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
152 |             </PATH_ELEMENT>
153 |             <PATH_ELEMENT>
154 |               <option name="myItemId" value="multiple" />
155 |               <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
156 |             </PATH_ELEMENT>
157 |           </PATH>
158 |           <PATH>
159 |             <PATH_ELEMENT>
160 |               <option name="myItemId" value="source" />
161 |               <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" />
162 |             </PATH_ELEMENT>
163 |             <PATH_ELEMENT>
164 |               <option name="myItemId" value="source" />
165 |               <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
166 |             </PATH_ELEMENT>
167 |             <PATH_ELEMENT>
168 |               <option name="myItemId" value="bin" />
169 |               <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
170 |             </PATH_ELEMENT>
171 |           </PATH>
172 |         </subPane>
173 |       </pane>
174 |     </panes>
175 |   </component>
176 |   <component name="PropertiesComponent">
177 |     <property name="last_opened_file_path" value="$PROJECT_DIR$" />
178 |     <property name="options.lastSelected" value="reference.settingsdialog.IDE.editor.colors.Font" />
179 |     <property name="options.splitter.main.proportions" value="0.3" />
180 |     <property name="options.splitter.details.proportions" value="0.2" />
181 |     <property name="FullScreen" value="false" />
182 |   </component>
183 |   <component name="PyConsoleOptionsProvider">
184 |     <option name="myPythonConsoleState">
185 |       <console-settings />
186 |     </option>
187 |   </component>
188 |   <component name="RunManager">
189 |     <list size="0" />
190 |   </component>
191 |   <component name="ShelveChangesManager" show_recycled="false" />
192 |   <component name="SvnConfiguration">
193 |     <configuration />
194 |   </component>
195 |   <component name="TaskManager">
196 |     <task active="true" id="Default" summary="Default task">
197 |       <changelist id="3d75c479-69ff-47f9-af16-de889221d95e" name="Default" comment="" />
198 |       <created>1422826067293</created>
199 |       <option name="number" value="Default" />
200 |       <updated>1422826067293</updated>
201 |     </task>
202 |     <servers />
203 |   </component>
204 |   <component name="ToolWindowManager">
205 |     <frame x="0" y="23" width="1366" height="741" extended-state="0" />
206 |     <editor active="false" />
207 |     <layout>
208 |       <window_info id="Changes" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="7" side_tool="false" content_ui="tabs" />
209 |       <window_info id="Terminal" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="7" side_tool="false" content_ui="tabs" />
210 |       <window_info id="TODO" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="6" side_tool="false" content_ui="tabs" />
211 |       <window_info id="Structure" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.25" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
212 |       <window_info id="Application Servers" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="7" side_tool="false" content_ui="tabs" />
213 |       <window_info id="Project" active="true" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="true" weight="0.24597365" sideWeight="0.5" order="3" side_tool="false" content_ui="combo" />
214 |       <window_info id="Python Console" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="7" side_tool="false" content_ui="tabs" />
215 |       <window_info id="Favorites" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="0" side_tool="true" content_ui="tabs" />
216 |       <window_info id="Event Log" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="7" side_tool="true" content_ui="tabs" />
217 |       <window_info id="Version Control" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="7" side_tool="false" content_ui="tabs" />
218 |       <window_info id="Cvs" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.25" sideWeight="0.5" order="4" side_tool="false" content_ui="tabs" />
219 |       <window_info id="Message" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="0" side_tool="false" content_ui="tabs" />
220 |       <window_info id="Find" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
221 |       <window_info id="Ant Build" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.25" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
222 |       <window_info id="Debug" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.4" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
223 |       <window_info id="Commander" active="false" anchor="right" auto_hide="false" internal_type="SLIDING" type="SLIDING" visible="false" weight="0.4" sideWeight="0.5" order="0" side_tool="false" content_ui="tabs" />
224 |       <window_info id="Run" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" />
225 |       <window_info id="Inspection" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.4" sideWeight="0.5" order="5" side_tool="false" content_ui="tabs" />
226 |       <window_info id="Hierarchy" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.25" sideWeight="0.5" order="2" side_tool="false" content_ui="combo" />
227 |     </layout>
228 |   </component>
229 |   <component name="Vcs.Log.UiProperties">
230 |     <option name="RECENTLY_FILTERED_USER_GROUPS">
231 |       <collection />
232 |     </option>
233 |     <option name="RECENTLY_FILTERED_BRANCH_GROUPS">
234 |       <collection />
235 |     </option>
236 |   </component>
237 |   <component name="VcsContentAnnotationSettings">
238 |     <option name="myLimit" value="2678400000" />
239 |   </component>
240 |   <component name="VcsManagerConfiguration">
241 |     <option name="myTodoPanelSettings">
242 |       <TodoPanelSettings />
243 |     </option>
244 |   </component>
245 |   <component name="XDebuggerManager">
246 |     <breakpoint-manager />
247 |     <watches-manager />
248 |   </component>
249 |   <component name="editorHistoryManager">
250 |     <entry file="file://$PROJECT_DIR$/lib/pairwise/gotoh.py">
251 |       <provider selected="true" editor-type-id="text-editor">
252 |         <state vertical-scroll-proportion="0.0" vertical-offset="1343" max-vertical-offset="4794">
253 |           <caret line="31" column="11" selection-start-line="31" selection-start-column="11" selection-end-line="31" selection-end-column="11" />
254 |           <folding />
255 |         </state>
256 |       </provider>
257 |     </entry>
258 |     <entry file="file://$PROJECT_DIR$/lib/multiple/fengDoolittle.py">
259 |       <provider selected="true" editor-type-id="text-editor">
260 |         <state vertical-scroll-proportion="0.0" vertical-offset="833" max-vertical-offset="3791">
261 |           <caret line="0" column="0" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" />
262 |           <folding />
263 |         </state>
264 |       </provider>
265 |     </entry>
266 |     <entry file="file://$PROJECT_DIR$/bin/algorithmsInBioinformatics.py">
267 |       <provider selected="true" editor-type-id="text-editor">
268 |         <state vertical-scroll-proportion="0.0" vertical-offset="1173" max-vertical-offset="4029">
269 |           <caret line="21" column="33" selection-start-line="21" selection-start-column="33" selection-end-line="21" selection-end-column="33" />
270 |           <folding />
271 |         </state>
272 |       </provider>
273 |     </entry>
274 |     <entry file="file://$PROJECT_DIR$/lib/structurePrediction/nussinov.py">
275 |       <provider selected="true" editor-type-id="text-editor">
276 |         <state vertical-scroll-proportion="0.0" vertical-offset="0" max-vertical-offset="1870">
277 |           <caret line="0" column="0" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" />
278 |           <folding />
279 |         </state>
280 |       </provider>
281 |     </entry>
282 |     <entry file="file://$PROJECT_DIR$/lib/pairwise/gotoh.py">
283 |       <provider selected="true" editor-type-id="text-editor">
284 |         <state vertical-scroll-proportion="0.0" vertical-offset="1343" max-vertical-offset="4794">
285 |           <caret line="31" column="11" selection-start-line="31" selection-start-column="11" selection-end-line="31" selection-end-column="11" />
286 |           <folding />
287 |         </state>
288 |       </provider>
289 |     </entry>
290 |     <entry file="file://$PROJECT_DIR$/lib/multiple/fengDoolittle.py">
291 |       <provider selected="true" editor-type-id="text-editor">
292 |         <state vertical-scroll-proportion="0.0" vertical-offset="833" max-vertical-offset="3791">
293 |           <caret line="0" column="0" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" />
294 |           <folding />
295 |         </state>
296 |       </provider>
297 |     </entry>
298 |     <entry file="file://$PROJECT_DIR$/bin/algorithmsInBioinformatics.py">
299 |       <provider selected="true" editor-type-id="text-editor">
300 |         <state vertical-scroll-proportion="0.0" vertical-offset="1173" max-vertical-offset="4029">
301 |           <caret line="21" column="33" selection-start-line="21" selection-start-column="33" selection-end-line="21" selection-end-column="33" />
302 |           <folding />
303 |         </state>
304 |       </provider>
305 |     </entry>
306 |     <entry file="file://$PROJECT_DIR$/lib/structurePrediction/nussinov.py">
307 |       <provider selected="true" editor-type-id="text-editor">
308 |         <state vertical-scroll-proportion="0.0" vertical-offset="0" max-vertical-offset="1870">
309 |           <caret line="0" column="0" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" />
310 |           <folding />
311 |         </state>
312 |       </provider>
313 |     </entry>
314 |     <entry file="file://$PROJECT_DIR$/lib/structurePrediction/nussinov.py">
315 |       <provider selected="true" editor-type-id="text-editor">
316 |         <state vertical-scroll-proportion="2.1909938" vertical-offset="0" max-vertical-offset="1870">
317 |           <caret line="83" column="42" selection-start-line="83" selection-start-column="42" selection-end-line="83" selection-end-column="42" />
318 |           <folding />
319 |         </state>
320 |       </provider>
321 |     </entry>
322 |     <entry file="file://$PROJECT_DIR$/bin/algorithmsInBioinformatics.py">
323 |       <provider selected="true" editor-type-id="text-editor">
324 |         <state vertical-scroll-proportion="-1.2934783" vertical-offset="1173" max-vertical-offset="4029">
325 |           <caret line="21" column="33" selection-start-line="21" selection-start-column="33" selection-end-line="21" selection-end-column="33" />
326 |           <folding />
327 |         </state>
328 |       </provider>
329 |     </entry>
330 |     <entry file="file://$PROJECT_DIR$/lib/multiple/fengDoolittle.py">
331 |       <provider selected="true" editor-type-id="text-editor">
332 |         <state vertical-scroll-proportion="-1.2934783" vertical-offset="833" max-vertical-offset="3791">
333 |           <caret line="0" column="0" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" />
334 |           <folding />
335 |         </state>
336 |       </provider>
337 |     </entry>
338 |     <entry file="file://$PROJECT_DIR$/lib/pairwise/gotoh.py">
339 |       <provider selected="true" editor-type-id="text-editor">
340 |         <state vertical-scroll-proportion="-1.3198757" vertical-offset="1343" max-vertical-offset="4794">
341 |           <caret line="31" column="11" selection-start-line="31" selection-start-column="11" selection-end-line="31" selection-end-column="11" />
342 |           <folding />
343 |         </state>
344 |       </provider>
345 |     </entry>
346 |   </component>
347 | </project>


--------------------------------------------------------------------------------
/source/lib/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/joachimwolff/algorithmsInBioinformatics/0d3d91b7cb2370426617c09d98796998b7c5d1d7/source/lib/__init__.py


--------------------------------------------------------------------------------
/source/lib/helper/IOHelper.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/python
 2 | # Copyright 2015 Joachim Wolff
 3 | # Programming Course: Algorithms in Bioinformatics
 4 | # Tutors: Robert Kleinkauf, Omer Alkhnbashi
 5 | # Winter semester 2014/2015
 6 | #
 7 | # Chair of Bioinformatics
 8 | # Department of Computer Science
 9 | # Faculty of Engineering
10 | # Albert-Ludwig-University Freiburg im Breisgau
11 | import os
12 | class IOHelper():
13 |     """Helper class for reading an writing files in different formats."""
14 |     def readFastaFile(self, inputFileName):
15 |         """Reads a given fasta file and returns it as a array. 
16 | 
17 |             inputFileName:              The path (relative or absolut) to the input fasta file."""
18 |         sequence = []
19 |         if not os.path.exists(inputFileName):
20 |             return sequence
21 | 
22 |         fileToRead = open(inputFileName, "r")
23 |         i = 0
24 |         for line in fileToRead.readlines():
25 |             if line.startswith(">"):
26 |                 continue
27 |             sequence.append(line.strip("\n"))
28 |             i += 1
29 |         fileToRead.close()
30 |         return sequence
31 | 
32 |     def writeFastaFile(self, sequences, outputFileName):
33 |         """Writes a the given sequences to a file in the fasta format.
34 |             sequences:      All computed alignemnts.
35 |                             A list of lists with two elements: [[,],...,[,]].
36 |             outputFileName: The path (relative or absolut) and the output file name.
37 |                             e.g.: "/path/to/file" or "file" to write it in the local directory."""
38 |         if not outputFileName.endswith(".fas"):
39 |             outputFileName += str(".fas")
40 |         fileToWrite = open(outputFileName, "w")
41 |         i = 0
42 |         while i < len(sequences):
43 |             for sequence in sequences[i]:
44 |                 fileToWrite.write('>Alignment '+ str(i) +' sequence ' + str(sequences[i].index(sequence)) + '\n')
45 |                 fileToWrite.write(sequence + '\n')
46 |             i += 1
47 |         fileToWrite.close()
48 | 
49 |     def writeGraphMLFile(self, clusteredNodesDictionary, outputFileName):
50 |         """Writes a tree computed by the UpgmaWpgma class in graphML-format to specified outputFileName."""
51 |         if not outputFileName.endswith(".graphml"):
52 |             outputFileName += str(".graphml")
53 |         fileToWrite = open(outputFileName, "w")
54 |         fileToWrite.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
55 |             +"\n<graphml xmlns=\"http://graphml.graphdrawing.org/xmlns\""
56 |             +"\n\t\txmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n"
57 |             +"\t\txsi:schemaLocation=\"http://graphml.graphdrawing.org/xmlns\n"
58 |             +"\t\thttp://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd\">"
59 |             +"\n\t<graph id=\"G\" edgedefault=\"undirected\">\n")
60 |         for i in clusteredNodesDictionary:
61 |             nodes = i.split(" ")
62 |             fileToWrite.write("\t\t<node id=\"" + nodes[0] + "\"/>\n")
63 |             fileToWrite.write("\t\t<node id=\"" + nodes[1] + "\"/>\n")
64 |             fileToWrite.write("\t\t<node id=\"" + str(clusteredNodesDictionary[i]) + "\"/>\n")
65 |         j = 0
66 |         for i in clusteredNodesDictionary:
67 |             nodes = i.split(" ")
68 |             fileToWrite.write("\t\t<edge id=\"" + str(j) + "\" source=\"" + nodes[0] + "\" target=\""+ str(clusteredNodesDictionary[i]) + "\"/>\n")
69 |             j += 1
70 |             fileToWrite.write("\t\t<edge id=\"" + str(j) + "\" source=\"" + nodes[1] + "\" target=\""+ str(clusteredNodesDictionary[i]) + "\"/>\n")
71 |             j += 1
72 | 
73 |         fileToWrite.write("\t</graph>\n</graphml>")
74 |         fileToWrite.close()
75 |     def writeRnaDotBracketNotation(self, sequence, pairedBases, outputFileName):
76 |         """Writes a given RNA sequence and the computed matching bases in dot-bracket notation to the file outputFileName."""
77 |         stack = {}
78 |         for i in range (0, len(sequence)):
79 |             if i in pairedBases:
80 |                 stack[i] = "("
81 |                 stack[pairedBases[i]] = ")"
82 |             else:
83 |                 if not i in stack:
84 |                     stack[i] = "."
85 |         fileToWrite = open(outputFileName, "w")
86 |         fileToWrite.write(sequence+"\n")
87 |         for i in sorted(stack):
88 |             fileToWrite.write(stack[i])
89 |     def writeNewickTree(self, newickTree, outputFileName):
90 |         fileToWrite = open(outputFileName, "w")
91 |         fileToWrite.write(newickTree)
92 | 


--------------------------------------------------------------------------------
/source/lib/helper/__init__.py:
--------------------------------------------------------------------------------
1 | from IOHelper import IOHelper
2 | from mathHelper import MathHelper
3 | from pairwiseAlignmentHelper import PairwiseAlignmentHelper
4 | from multipleAlignmentHelper import MultipleAlignmentHelper


--------------------------------------------------------------------------------
/source/lib/helper/mathHelper.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/python
 2 | # Copyright 2014 Joachim Wolff
 3 | # Programming Course: Algorithms in Bioinformatics
 4 | # Tutors: Robert Kleinkauf, Omer Alkhnbashi
 5 | # Winter semester 2014/2015
 6 | #
 7 | # Chair of Bioinformatics
 8 | # Department of Computer Science
 9 | # Faculty of Engineering
10 | # Albert-Ludwig-University Freiburg im Breisgau
11 | #
12 | # A math helper class. Some constants are defined here.
13 | class MathHelper():
14 | 	"""MathHelper class. Some constants are defined."""
15 | 	Inf = 1e3000
16 | 	NaN = 0 * Inf


--------------------------------------------------------------------------------
/source/lib/helper/multipleAlignmentHelper.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/python
 2 | # Copyright 2014 Joachim Wolff
 3 | # Programming Course: Algorithms in Bioinformatics
 4 | # Tutors: Robert Kleinkauf, Omer Alkhnbashi
 5 | # Winter semester 2014/2015
 6 | #
 7 | # Chair of Bioinformatics
 8 | # Department of Computer Science
 9 | # Faculty of Engineering
10 | # Albert-Ludwig-University Freiburg im Breisgau
11 | #
12 | from helper import PairwiseAlignmentHelper as pah
13 | class MultipleAlignmentHelper():
14 |   
15 |     noGap = 0
16 |     gapA = 1
17 |     gapB = 2
18 |     gapC = 3
19 |     gapAB = 4
20 |     gapBC = 5
21 |     gapAC = 6
22 |     
23 |     def weightFunctionDifference(self, a, b, c):
24 |         """Weight function with 0 if a==b==c, 1 if a==b, a==c or b==c, 2 else."""
25 |         if a == b and b == c:
26 |             return 0
27 |         elif a == b:
28 |             return 1
29 |         elif b == c:
30 |             return 1
31 |         elif a ==c :
32 |             return 1
33 |         else:
34 |             return 2
35 |     def createDataForUpgmaWpgma(self, sequences):
36 |         """Preprocessing of the sequences for the upgm/wpgm algorithm."""
37 |         differenceDictionary = {}
38 |         sequenceToIdMapping = {}
39 |         sequenceToLengthMapping = {}
40 |         mappingCount = 0
41 |         for i in sequences:
42 |             sequenceToIdMapping[i] = mappingCount
43 |             sequenceToLengthMapping[mappingCount] = len(i)
44 |             mappingCount += 1
45 | 
46 |         differenceScore = 0
47 |         for i in range(0, len(sequences)):
48 |             for j in range(i+1, len(sequences)):
49 |                 for k in range(0, max(len(sequences[i]), len(sequences[j]))):
50 |                     if k < len(sequences[j]) and k < len(sequences[i]):
51 |                         differenceScore += pah().weightFunctionDifference(sequences[i][k], sequences[j][k])
52 |                     elif k < len(sequences[i]):
53 |                         differenceScore += pah().weightFunctionDifference(sequences[i][k], "-", )
54 |                     elif k < len(sequences[j]):
55 |                         differenceScore += pah().weightFunctionDifference("-", sequences[j][k])
56 |                 key = str(i) + " " + str(j)
57 |                 differenceDictionary[key] = differenceScore
58 |                 differenceScore = 0
59 |         return [differenceDictionary, sequenceToIdMapping, sequenceToLengthMapping]                       
60 | 
61 | 
62 | 
63 | 
64 | 
65 | 


--------------------------------------------------------------------------------
/source/lib/helper/pairwiseAlignmentHelper.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/python
 2 | # Copyright 2014 Joachim Wolff
 3 | # Programming Course: Algorithms in Bioinformatics
 4 | # Tutors: Robert Kleinkauf, Omer Alkhnbashi
 5 | # Winter semester 2014/2015
 6 | #
 7 | # Chair of Bioinformatics
 8 | # Department of Computer Science
 9 | # Faculty of Engineering
10 | # Albert-Ludwig-University Freiburg im Breisgau
11 | 
12 | class PairwiseAlignmentHelper():
13 |     """Class to support the pairwise alignment algorithms Needleman-Wunsch and Gotoh."""
14 |     diagonalD = 0
15 |     dotQ = 1
16 |     dotP = 2
17 |     upD = 3
18 |     upP = 4
19 |     leftD = 5
20 |     leftQ = 6
21 |     matrixIndexD = 0
22 |     matrixIndexP = 1
23 |     matrixIndexQ = 2
24 |     left = 0
25 |     up = 1
26 |     diagonal = 2
27 | 
28 |     def weightFunctionDifference(self, a, b):
29 |         """Weight function with 0 if a==b and 1 else."""
30 |         if a == b:
31 |             return 0
32 |         elif a != b:
33 |             return 1
34 | 
35 |     def gapCost(self, x):
36 |         """Returns a gap cost of g(x) = 2 + k."""
37 |         return 2 + x
38 | 
39 |     def pam250(self, a, b):
40 |         """Returns the value of an amino acid given a pam250 matrix. If it is a gap, 1 is returned.
41 |             Source: http://www.icp.ucl.ac.be/~opperd/private/pam250.html"""
42 |         pam250 = [[13, 6, 9, 9, 5, 8, 9, 12, 6, 8, 6, 7, 7, 4, 11, 11, 11, 2, 4, 9]
43 |             , [3, 17, 4, 3, 2, 5, 3, 2, 6, 3, 2, 9, 4, 1, 4, 4, 3, 7, 2, 2]
44 |             , [4, 4, 6, 7, 2, 5, 6, 4, 6, 3, 2, 5, 3, 2, 4, 5, 4, 2, 3, 3]
45 |             , [5, 4, 8, 11, 1, 7, 10, 5, 6, 3, 2, 5, 3, 1, 4, 5, 5, 1, 2, 3]
46 |             , [2, 1, 1, 1, 52, 1, 1, 2, 2, 2, 1, 1, 1, 1, 2, 3, 2, 1, 4, 2]
47 |         , [3, 5, 5, 6, 1, 10, 7, 3, 7, 2, 3, 5, 3, 1, 4, 3, 3, 1, 2, 3]
48 |         , [5, 4, 7, 11, 1, 9, 12, 5, 6, 3, 2, 5, 3, 1, 4, 5, 5, 1, 2, 3]
49 |         , [12, 5, 10, 10, 4, 7, 9, 27, 5, 5, 4, 6, 5, 3, 8, 11, 9, 2, 3, 7]
50 |         , [2, 5, 5, 4, 2, 7, 4, 2, 15, 2, 2, 3, 2, 2, 3, 3, 2, 2, 3, 2]
51 |         , [3, 2, 2, 2, 2, 2, 2, 2, 2, 10, 6, 2, 6, 5, 2, 3, 4, 1, 3, 9]
52 |         , [6, 4, 4, 3, 2, 6, 4, 3, 5, 15, 34, 4, 20, 13, 5, 4, 6, 6, 7, 13]
53 |         , [6, 18, 10, 8, 2, 10, 8, 5, 8, 5, 4, 24, 9, 2, 6, 8, 8, 4, 3, 5]
54 |         , [1, 1, 1, 1, 0, 1, 1, 1, 1, 2, 3, 2, 6, 2, 1, 1, 1, 1, 1, 2]
55 |         , [2, 1, 2, 1, 1, 1, 1, 1, 3, 5, 6, 1, 4, 32, 1, 2, 2, 4, 20, 3]
56 |         , [7, 5, 5, 4, 3, 5, 4, 5, 5, 3, 3, 4, 3, 2, 20, 6, 5, 1, 2, 4]
57 |         , [9, 6, 8, 7, 7, 6, 7, 9, 6, 5, 4, 7, 5, 3, 9, 10, 9, 4, 4, 6]
58 |         , [8, 5, 6, 6, 4, 5, 5, 6, 4, 6, 4, 6, 5, 3, 6, 8, 11, 2, 3, 6]
59 |         , [0, 2, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 55, 1, 0]
60 |         , [1, 1, 2, 1, 3, 1, 1, 1, 3, 2, 2, 1, 2, 15, 1, 2, 2, 3, 31, 2]
61 |         , [7, 4, 4, 4, 4, 4, 4, 4, 5, 4, 15, 10, 4, 10, 5, 5, 5, 72, 4, 17]]
62 | 
63 |         pamdict = {"A":0, "R":1, "N":2, "D":3, "C":4, "Q":5, "E":6, "G":7, "H":8, "I":9, "L":10, "K":11, "M":12,
64 |                    "F":13, "P":14, "S":15, "T":16, "W":17, "Y":18, "V":19}
65 | 
66 |         if a in pamdict and b in pamdict:
67 |             return pam250[pamdict[a]][pamdict[b]]
68 |         else:
69 |             return 1
70 | 
71 | 
72 | 
73 | 


--------------------------------------------------------------------------------
/source/lib/helper/test/IOHelperTest.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/python
 2 | # Copyright 2014 Joachim Wolff
 3 | # Programming Course: Algorithms in Bioinformatics
 4 | # Tutors: Robert Kleinkauf, Omer Alkhnbashi
 5 | # Winter semester 2014/2015
 6 | #
 7 | # Chair of Bioinformatics
 8 | # Department of Computer Science
 9 | # Faculty of Engineering
10 | # Albert-Ludwig-University Freiburg im Breisgau
11 | 
12 | import unittest
13 | import os, sys
14 | lib_path = os.path.abspath('../../')
15 | sys.path.append(lib_path)
16 | from helper import IOHelper as io
17 | 
18 | class IOHelperTestClass(unittest.TestCase):
19 |     """Test class to check the correctness of the methods in IOHelper."""
20 |     def test_readFastaFile(self):
21 |         """Test method to test the correct reading of a fasta file."""
22 |         if os.path.exists("testReadFasta.fas"):
23 |             os.remove("testReadFasta.fas")
24 | 
25 |         # first test case: two sequences
26 |         sequenceToWrite = [["ACGT", "ACGTAATTA"]]
27 |         expectedSequence = ["ACGT", "ACGTAATTA"]
28 |         io().writeFastaFile(sequenceToWrite, "testReadFasta.fas")
29 |         readSequence = io().readFastaFile("testReadFasta.fas", multipleSequenceAlignment=False)
30 |         self.assertEqual(expectedSequence, readSequence)
31 | 
32 |         # second test case: two sequences but there are multilpe ones
33 |         sequenceToWrite = [["ACGT", "ACGTAATTA", "AGTTG"]]
34 |         expectedSequence = ["ACGT", "ACGTAATTA", "AGTTG"]
35 |         io().writeFastaFile(sequenceToWrite, "testReadFasta.fas")
36 |         readSequence = io().readFastaFile("testReadFasta.fas", multipleSequenceAlignment=False)
37 |         self.assertNotEqual(expectedSequence, readSequence)
38 | 
39 |         # third test case: multiple sequences
40 |         readSequence = io().readFastaFile("testReadFasta.fas", multipleSequenceAlignment=True)
41 |         self.assertEqual(expectedSequence, readSequence)
42 | 
43 |         os.remove("testReadFasta.fas")
44 | 
45 |     def test_writeFastaFile(self):
46 |         """Test method to test the correct writing of a fasta file."""
47 |         if os.path.exists("testWriteFasta.fas"):
48 |             os.remove("testWriteFasta.fas")
49 |         sequence = [["ACGT", "ACGTAATTA"]]
50 |         expectedReadSequence = [">Alignment 0 sequence 0", "ACGT", ">Alignment 0 sequence 1", "ACGTAATTA"]
51 |         readInputSequence = []
52 | 
53 |         # first test case, filename with extension
54 |         io().writeFastaFile(sequence, "testWriteFasta.fas")     
55 |         testInputFile = open("testWriteFasta.fas")
56 |         for line in testInputFile.readlines():
57 |             readInputSequence.append(line.strip("\n"))
58 |         self.assertEqual(expectedReadSequence, readInputSequence)
59 |         testInputFile.close()
60 |         os.remove("testWriteFasta.fas")
61 | 
62 |         # second test case, filename without extension
63 |         readInputSequence = []
64 |         io().writeFastaFile(sequence, "testWriteFasta")
65 |         testInputFile = open("testWriteFasta.fas")
66 |         for line in testInputFile.readlines():
67 |             readInputSequence.append(line.strip("\n"))
68 |         self.assertEqual(expectedReadSequence, readInputSequence)
69 |         testInputFile.close()
70 |         os.remove("testWriteFasta.fas")
71 | 
72 | if __name__ == "__main__":
73 |     unittest.main() # run all tests  


--------------------------------------------------------------------------------
/source/lib/multiple/__init__.py:
--------------------------------------------------------------------------------
1 | from needlemanWunschN3 import NeedlemanWunschN3
2 | from upgmaWpgma import UpgmaWpgma
3 | from fengDoolittle import FengDoolittle
4 | from sumOfPairs import SumOfPairs


--------------------------------------------------------------------------------
/source/lib/multiple/fengDoolittle.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/python
  2 | # Copyright 2015 Joachim Wolff
  3 | # Programming Course: Algorithms in Bioinformatics
  4 | # Tutors: Robert Kleinkauf, Omer Alkhnbashi
  5 | # Winter semester 2014/2015
  6 | #
  7 | # Chair of Bioinformatics
  8 | # Department of Computer Science
  9 | # Faculty of Engineering
 10 | # Albert-Ludwig-University Freiburg im Breisgau
 11 | import sys
 12 | from pairwise import NeedlemanWunsch
 13 | from math import log10
 14 | from helper import PairwiseAlignmentHelper as pah
 15 | from multiple import UpgmaWpgma
 16 | import random
 17 | 
 18 | 
 19 | class FengDoolittle():
 20 |     """This class computes the Feng-Doolittle algorithm by Da-Fei Feng and Russell F. Doolittle:
 21 |             Feng, Da-Fei, and Russell F. Doolittle.
 22 |             "Progressive sequence alignment as a prerequisitetto correct phylogenetic trees."
 23 |             Journal of molecular evolution 25.4 (1987): 351-360.
 24 |             http://dna.bio.puc.cl/cardex/papersbio252/Grupo06-2013.pdf"""
 25 |     def __init__(self, sequences, weightFunction, similarityScore):
 26 |         """To initialize an object of class FengDoolittle you have to define:
 27 |                 sequences:          A list of sequences for the multiple alignment.
 28 |                 weightFunction:     A string containing the name of your preferred weight function.
 29 |                                     The weight function have to be defined in package helper, class PairwiseAlignmentHelper.
 30 |                 similarityScore:    A string containing the name of your preferred similarity score like pam250.
 31 |                                     The similarity score have to be defined in package helper, class PairwiseAlignmentHelper."""
 32 |         self.sequences = sequences
 33 |         self.alignments = []
 34 |         self.weightFunction = weightFunction
 35 |         if similarityScore in dir(pah) and callable(getattr(pah, similarityScore)):
 36 |             similarityScoreObj = eval('pah().' + similarityScore)
 37 |         else:
 38 |             print "Score function not found!"
 39 |             sys.exit()
 40 |         self.similarityScore = similarityScoreObj
 41 |         self.alignmentToIndexMapping = {}
 42 |         self.sequenceToIndexMapping = {}
 43 |         self.distanceDictionary = {}
 44 |         self.newickTree = ""
 45 |         self.orderToAlign = []
 46 | 
 47 |     def computeAlignments(self):
 48 |         """This function computes all pairwise alignments between every sequence with the Needleman-Wunsch algorithm."""
 49 |         nw = NeedlemanWunsch()
 50 |         alignmentsAppend = self.alignments.append
 51 |         for i in range(0, len(self.sequences)):
 52 |             for j in range(i + 1, len(self.sequences)):
 53 |                 alignmentsAppend([nw.compute([self.sequences[i], self.sequences[j]], self.weightFunction,1)[0], i, j])
 54 | 
 55 |     def computeDistanceDictionary(self):
 56 |         """This function computes the distance between every alignment. The distances are used to generate a phylogenetic tree."""
 57 |         for i in range(0, len(self.alignments)):
 58 |             index = str(self.alignments[i][1]) + " " + str(self.alignments[i][2])
 59 |             self.distanceDictionary[index] = self.similarityToDistance(self.alignments[i][0])
 60 | 
 61 |     def similarityToDistance(self, alignment):
 62 |         """Computes from the given similarity the distance measure."""
 63 |         sMax = self.similarity(alignment[0], alignment[0]) + self.similarity(alignment[1], alignment[1])
 64 |         sMax /= 2
 65 |         alignmentAsList = list(alignment[0])
 66 |         alignmentAsList1 = list(alignment[1])
 67 |         random.shuffle(alignmentAsList)
 68 |         random.shuffle(alignmentAsList1)
 69 |         alignmentShuffel0 = "".join(alignmentAsList)
 70 |         alignmentShuffel1 = "".join(alignmentAsList1)
 71 | 
 72 |         sRand = self.similarity(alignmentShuffel0, alignmentShuffel1)
 73 |         if sMax == sRand:
 74 |             sRand = sRand - 0.0001
 75 |         else:
 76 |             sEff = (self.similarity(alignment[0], alignment[1]) - sRand) / float(sMax - sRand)
 77 |         if sEff <= 0.0:
 78 |             return 1
 79 |         distance = -log10(sEff)
 80 |         return distance
 81 | 
 82 |     def similarity(self, a, b):
 83 |         """Returns the similarity of two sequences a and b with the similarity score defined at the initialization."""
 84 |         similarity = 0
 85 |         for i in range(0, len(a)):
 86 |             similarity += self.similarityScore(a[i], b[i])
 87 |         return similarity
 88 | 
 89 |     def buildTree(self):
 90 |         """This function computes the phylogenetic tree with UPGMA and stores it in the Newick-Tree format."""
 91 |         upgma = UpgmaWpgma(self.distanceDictionary, len(self.sequences))
 92 |         upgma.compute_clustering()
 93 |         self.newickTree = upgma.get_newick_tree()
 94 | 
 95 |     def buildMultipleAlignment(self, group0, group1):
 96 |         """This function returns which is the best pairwise alignment out of all alignments of group0 and group1."""
 97 |         highestScore = 0
 98 |         optimalAlignment = []
 99 |         for i in group0:
100 |             for j in group1:
101 |                 nw = NeedlemanWunsch()
102 |                 alignment = nw.compute([i[0], j[0]], self.weightFunction, 1)
103 |                 score = self.similarity(alignment[0][0], alignment[0][1])
104 |                 if highestScore < score:
105 |                     highestScore = score
106 |                     optimalAlignment = [alignment[0][0], alignment[0][1], i[1], j[1]]
107 |         return optimalAlignment
108 | 
109 | 
110 |     def computeOrderOfSequencesToAlign(self):
111 |         """This function computes out of the phylogenetic tree in which order the sequences are aligned."""
112 |         indexBegin = 0
113 |         indexEnd = len(self.newickTree)
114 |         while indexEnd != -1:
115 |             indexBegin = self.newickTree.rfind("(", indexBegin, indexEnd)
116 |             if indexBegin == -1:
117 |                 break
118 |             i = indexBegin + 1
119 |             stack = 0
120 |             while stack >= 0 and i < len(self.newickTree):
121 |                 if self.newickTree[i] == "(":
122 |                     stack += 1
123 |                 elif self.newickTree[i] == ")":
124 |                     stack -= 1
125 |                 i += 1
126 |             indexEnd = i
127 | 
128 |             group0 = ""
129 |             group1 = ""
130 |             substring = self.newickTree[indexBegin:indexEnd]
131 |             if substring[1] != "(":
132 |                 indexGroup0 = substring.find(",")
133 |                 group0 = substring[0:indexGroup0].strip(",")
134 |                 group1 = substring[indexGroup0:-1].strip(",")
135 |             else:
136 |                 k = 1
137 |                 stack = 0
138 |                 while k < len(substring):
139 |                     if substring[k] == "(":
140 |                         stack += 1
141 |                     elif substring[k] == ")":
142 |                         stack -= 1
143 |                     k += 1
144 |                     if stack <= 0:
145 |                         break
146 |                 group0 = substring[0:k].strip(",")
147 |                 group1 = substring[k:-1].strip(",")
148 |             group0List = group0.split(",")
149 |             group1List = group1.split(",")
150 |             list0 = []
151 |             list1 = []
152 |             for j in group0List:
153 |                 list0.append(int(j.strip("(").strip(")").strip(",")))
154 |             for j in group1List:
155 |                 list1.append(int(j.strip("(").strip(")").strip(",")))
156 | 
157 |             self.orderToAlign.append(sorted([sorted(list0), sorted(list1)]))
158 |             indexEnd = indexBegin
159 |             indexBegin = 0
160 | 
161 |     def computeMultipleAlignment(self):
162 |         """This function returns the multiple sequence alignment."""
163 |         self.computeAlignments()
164 |         self.computeDistanceDictionary()
165 |         self.buildTree()
166 |         self.computeOrderOfSequencesToAlign()
167 |         i = 0
168 |         indexAlignments = {}
169 |         # create index to algnment realation
170 |         while i < len(self.orderToAlign):
171 |             if len(self.orderToAlign[i][0]) == 1 and len(self.orderToAlign[i][1]):
172 |                 for j in self.alignments:
173 |                     if (j[1] == self.orderToAlign[i][0][0] and j[2] == self.orderToAlign[i][1][0]):
174 |                         indexAlignments[self.orderToAlign[i][0][0]] = j[0][0]
175 |                         indexAlignments[self.orderToAlign[i][1][0]] = j[0][1]
176 |                         break
177 |                     elif(j[1] == self.orderToAlign[i][1][0] and j[2] == self.orderToAlign[i][0][0]):
178 |                         indexAlignments[self.orderToAlign[i][0][0]] = j[0][1]
179 |                         indexAlignments[self.orderToAlign[i][1][0]] = j[0][0]
180 |                         break
181 |             elif len(self.orderToAlign[i][0]) == 1:
182 |                 indexAlignments[self.orderToAlign[i][0][0]] = self.sequences[self.orderToAlign[i][0][0]]
183 |             elif len(self.orderToAlign[i][1]) == 1:
184 |                 try:
185 |                     indexAlignments[self.orderToAlign[i][1][0]] = self.sequences[self.orderToAlign[i][1][0]]
186 |                 except:
187 |                     print "Exception!"
188 |                     print "i: ", i
189 |                     print "OrderToAlign: ", self.orderToAlign
190 |                     print "orderAlign:", self.orderToAlign[i][1][0]
191 |                     print self.sequences
192 |             i += 1
193 | 
194 |         for i in self.orderToAlign:
195 |             # one sequence with one sequence
196 |             if len(i[0]) == 1 and len(i[1]):
197 |                 indexAlignments[i[0][0]] = indexAlignments[i[0][0]].replace("-", "X")
198 |                 indexAlignments[i[1][0]] = indexAlignments[i[1][0]].replace("-", "X")
199 |             # one sequence with one group
200 |             # two groups
201 |             else:
202 |                 group0 = []
203 |                 group1 = []
204 |                 for j in i[0]:
205 |                     group0.append([indexAlignments[j], j])
206 |                 for j in i[1]:
207 |                     group1.append([indexAlignments[j],j])
208 |                 pairwiseAlignment = self.buildMultipleAlignment(group0, group1)
209 |                 indexAlignments[pairwiseAlignment[2]] = pairwiseAlignment[0].replace("-", "X")
210 |                 indexAlignments[pairwiseAlignment[3]] = pairwiseAlignment[1].replace("-", "X")
211 | 
212 |                 for j in i[0]:
213 |                     nw = NeedlemanWunsch()
214 |                     alignment = nw.compute([pairwiseAlignment[0], indexAlignments[j]], self.weightFunction, 1)
215 |                     indexAlignments[j] = alignment[0][1]
216 |                 for j in i[1]:
217 |                     nw = NeedlemanWunsch()
218 |                     alignment = nw.compute([pairwiseAlignment[1], indexAlignments[j]], self.weightFunction, 1)
219 |                     indexAlignments[j] = alignment[0][1]
220 |                 for j in indexAlignments:
221 |                     indexAlignments[j] = indexAlignments[j].replace("-", "X")
222 |         return indexAlignments
223 | 


--------------------------------------------------------------------------------
/source/lib/multiple/needlemanWunschN3.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/python
  2 | # Copyright 2015 Joachim Wolff
  3 | # Programming Course: Algorithms in Bioinformatics
  4 | # Tutors: Robert Kleinkauf, Omer Alkhnbashi
  5 | # Winter semester 2014/2015
  6 | #
  7 | # Chair of Bioinformatics
  8 | # Department of Computer Science
  9 | # Faculty of Engineering
 10 | # Albert-Ludwig-University Freiburg im Breisgau
 11 | import sys
 12 | from helper import MultipleAlignmentHelper as mah
 13 | 
 14 | 
 15 | class NeedlemanWunschN3():
 16 |     """This class computes the Needleman-Wunsch algorithm with three sequences."""
 17 | 
 18 |     def __init__(self, sequence_a, sequence_b, sequence_c, score_function):
 19 |         """Initalize all variables and methods needed to compute the Needleman-Wunsch algorithm with three sequences.
 20 |             sequenceA:      A string with the first DNA sequence.
 21 |             sequenceB:      A string with the second DNA sequence.
 22 |             sequenceC:      A string with the third DNA sequence.
 23 |             scoreFunction:  The name of a weight function as a String which is defined 
 24 |                             in the pairwiseAlignmentHelper-class.
 25 |         """
 26 |         if score_function in dir(mah) and callable(getattr(mah, score_function)):
 27 |             score_function_obj = eval('mah().' + score_function)
 28 |         else:
 29 |             print "Score function not found!"
 30 |             sys.exit()
 31 | 
 32 |         self.computation_matrix = [[[]]]
 33 |         self.sequence_a = sequence_a
 34 |         self.sequence_b = sequence_b
 35 |         self.sequence_c = sequence_c
 36 |         self.score_function = score_function_obj
 37 |         self.i = 0
 38 |         self.j = 0
 39 |         self.traceback_stack = [[]]
 40 |         self.traceback_stack_index = 0
 41 |         self.indices_stack = [[]]
 42 |         self.computed_alignment = []
 43 | 
 44 |     def compute_matrix(self):
 45 |         """Computes the matrix which is needed by the Needleman-Wunsch algorithm for three sequences."""
 46 |         self.computation_matrix = [
 47 |             [[0 for i in range(len(self.sequence_c) + 1)] for j in range(len(self.sequence_b) + 1)] \
 48 |             for k in range(len(self.sequence_a) + 1)]
 49 |         # initalize matrix
 50 |         for i in range(1, len(self.sequence_a) + 1):
 51 |             self.computation_matrix[i][0][0] = self.computation_matrix[i - 1][0][0] \
 52 |                                                + self.score_function("", "", self.sequence_a[i - 1])
 53 |         for i in range(1, len(self.sequence_b) + 1):
 54 |             self.computation_matrix[0][i][0] = self.computation_matrix[0][i - 1][0] \
 55 |                                                + self.score_function("", "", self.sequence_b[i - 1])
 56 |         for i in range(1, len(self.sequence_c) + 1):
 57 |             self.computation_matrix[0][0][i] = self.computation_matrix[0][0][i - 1] \
 58 |                                                + self.score_function("", "", self.sequence_c[i - 1])
 59 |         for i in range(1, len(self.sequence_a) + 1):
 60 |             for j in range(1, len(self.sequence_b) + 1):
 61 |                 self.computation_matrix[i][j][0] = self.computation_matrix[i - 1][j - 1][0] \
 62 |                                                    + self.score_function(self.sequence_a[i - 1], self.sequence_b[j - 1],
 63 |                                                                          "")
 64 |         for i in range(1, len(self.sequence_a) + 1):
 65 |             for k in range(1, len(self.sequence_c) + 1):
 66 |                 self.computation_matrix[i][0][k] = self.computation_matrix[i - 1][0][k - 1] \
 67 |                                                    + self.score_function(self.sequence_a[i - 1], "",
 68 |                                                                          self.sequence_c[k - 1])
 69 |         for j in range(1, len(self.sequence_b) + 1):
 70 |             for k in range(1, len(self.sequence_c) + 1):
 71 |                 self.computation_matrix[0][j][k] = self.computation_matrix[0][j - 1][k - 1] \
 72 |                                                    + self.score_function("", self.sequence_b[j - 1],
 73 |                                                                          self.sequence_c[k - 1])
 74 | 
 75 |         for i in range(1, len(self.sequence_a) + 1):
 76 |             for j in range(1, len(self.sequence_b) + 1):
 77 |                 for k in range(1, len(self.sequence_c) + 1):
 78 |                     self.computation_matrix[i][j][k] = self.compute_minimum(i, j, k)
 79 | 
 80 |     def compute_minimum(self, i, j, k):
 81 |         """Compute the minimal value for a given cell of the matrix.
 82 |             The minimum is choosen of the following values:
 83 |                 D(i-1, j-1, k-1) + w(a_i-1, b_j-1, c_k-1)
 84 |                 D(i, j-1, k-1) + w(a_i, b_j-1, c_k-1)
 85 |                 D(i-1, j, k-1) + w(a_i-1, b_j, c_k-1)
 86 |                 D(i-1, j-1, k) + w(a_i-1, b_j-1, c_k)
 87 |                 D(i, j, k-1) + w(a_i, b_j, c_k-1)
 88 |                 D(i-1, j, k) + w(a_i-1, b_j, c_k)
 89 |                 D(i, j-1, k) + w(a_i, b_j-1, c_k)
 90 |             i: index of sequence A
 91 |             j: index of sequence B
 92 |             k: index of sequence C
 93 |         """
 94 |         # no gap
 95 |         no_gap = self.computation_matrix[i - 1][j - 1][k - 1] \
 96 |                  + self.score_function(self.sequence_a[i - 1], self.sequence_b[j - 1], self.sequence_c[k - 1])
 97 |         # one gap
 98 |         gap_a = self.computation_matrix[i][j - 1][k - 1] \
 99 |                 + self.score_function("", self.sequence_b[j - 1], self.sequence_c[k - 1])
100 |         gap_b = self.computation_matrix[i - 1][j][k - 1] \
101 |                 + self.score_function(self.sequence_a[i - 1], "", self.sequence_c[k - 1])
102 |         gap_c = self.computation_matrix[i - 1][j - 1][k] \
103 |                 + self.score_function(self.sequence_a[i - 1], self.sequence_b[j - 1], "")
104 |         # two gaps
105 |         gap_ab = self.computation_matrix[i][j][k - 1] + self.score_function("", "", self.sequence_c[k - 1])
106 |         gap_bc = self.computation_matrix[i - 1][j][k] + self.score_function(self.sequence_a[i - 1], "", "")
107 |         gap_ac = self.computation_matrix[i][j - 1][k] + self.score_function("", self.sequence_b[j - 1], "")
108 |         possible_values = [no_gap, gap_a, gap_b, gap_c, gap_ab, gap_bc, gap_ac]
109 |         return min(possible_values)
110 | 
111 |     def traceback(self, maximal_optimal_solutions=-1):
112 |         """Computes the traceback for the Needleman-Wunsch n=3 matrix."""
113 |         self.traceback_stack = [[]]
114 |         self.indices_stack = [[len(self.computation_matrix) - 1, len(self.computation_matrix[0]) - 1,
115 |                                len(self.computation_matrix[0][0]) - 1]]
116 |         self.traceback_stack_index = 0
117 |         traceback_done = False
118 |         optimal_solutions_count = 0
119 |         while not traceback_done:
120 | 
121 |             i = self.indices_stack[self.traceback_stack_index][0]
122 |             j = self.indices_stack[self.traceback_stack_index][1]
123 |             k = self.indices_stack[self.traceback_stack_index][2]
124 |             optimal_solutions_count += 1
125 |             split = False
126 |             while i > 0 or j > 0 or k > 0:
127 |                 path_variable_i = i
128 |                 path_variable_j = j
129 |                 path_variable_k = k
130 |                 # no gap
131 |                 if i > 0 and j > 0 and k > 0:
132 |                     if self.computation_matrix[i][j][k] == self.computation_matrix[i - 1][j - 1][k - 1] \
133 |                             + self.score_function(self.sequence_a[i - 1], self.sequence_b[j - 1],
134 |                                                   self.sequence_c[k - 1]):
135 |                         self.traceback_stack[self.traceback_stack_index].append(mah.noGap)
136 |                         path_variable_i -= 1  # change i
137 |                         path_variable_j -= 1  # change j
138 |                         path_variable_k -= 1  # change k
139 |                         split = True
140 | 
141 |                 # a gap in sequence a
142 |                 if j > 0 and k > 0:
143 |                     if self.computation_matrix[i][j][k] == self.computation_matrix[i][j - 1][k - 1] \
144 |                             + self.score_function("", self.sequence_b[j - 1], self.sequence_c[k - 1]):
145 |                         if split == False:
146 |                             self.traceback_stack[self.traceback_stack_index].append(mah.gapA)
147 |                             path_variable_j -= 1
148 |                             path_variable_k -= 1
149 |                             split = True
150 |                         else:
151 |                             self.split([i, j - 1, k - 1], mah.gapA)
152 |                 # a gap in sequence b
153 |                 if i > 0 and k > 0:
154 |                     if self.computation_matrix[i][j][k] == self.computation_matrix[i - 1][j][k - 1] \
155 |                             + self.score_function(self.sequence_a[i - 1], "", self.sequence_c[k - 1]):
156 |                         if split == False:
157 |                             self.traceback_stack[self.traceback_stack_index].append(mah.gapB)
158 |                             path_variable_i -= 1
159 |                             path_variable_k -= 1
160 |                         elif split == True:
161 |                             self.split([i - 1, j, k - 1], mah.gapB)
162 |                 # a gap in sequence c
163 |                 if i > 0 and j > 0:
164 |                     if self.computation_matrix[i][j][k] == self.computation_matrix[i - 1][j - 1][k] \
165 |                             + self.score_function(self.sequence_a[i - 1], self.sequence_b[j - 1], ""):
166 |                         if split == False:
167 |                             self.traceback_stack[self.traceback_stack_index].append(mah.gapC)
168 |                             path_variable_i -= 1
169 |                             path_variable_j -= 1
170 |                         elif split == True:
171 |                             self.split([i - 1, j - 1, k], mah.gapC)
172 |                 # a gap in sequence a and b
173 |                 if k > 0:
174 |                     if self.computation_matrix[i][j][k] == self.computation_matrix[i][j][k - 1] \
175 |                             + self.score_function("", "", self.sequence_c[k - 1]):
176 |                         if split == False:
177 |                             self.traceback_stack[self.traceback_stack_index].append(mah.gapAB)
178 |                             path_variable_k -= 1
179 |                         elif split == True:
180 |                             self.split([i, j, k - 1], mah.gapAB)
181 |                 # a gap in sequence a and c
182 |                 if j > 0:
183 |                     if self.computation_matrix[i][j][k] == self.computation_matrix[i][j - 1][k] \
184 |                             + self.score_function("", self.sequence_b[j - 1], ""):
185 |                         if split == False:
186 |                             self.traceback_stack[self.traceback_stack_index].append(mah.gapAC)
187 |                             path_variable_j -= 1
188 |                         elif split == True:
189 |                             self.split([i, j - 1, k], mah.gapAC)
190 |                 # a gap in sequence b and c
191 |                 if i > 0:
192 |                     if self.computation_matrix[i][j][k] == self.computation_matrix[i - 1][j][k] \
193 |                             + self.score_function(self.sequence_a[i - 1], "", ""):
194 |                         if split == False:
195 |                             self.traceback_stack[self.traceback_stack_index].append(mah.gapBC)
196 |                             path_variable_i -= 1
197 |                         elif split == True:
198 |                             self.split([i - 1, j, k], mah.gapBC)
199 |                 split = False
200 |                 i = path_variable_i
201 |                 j = path_variable_j
202 |                 k = path_variable_k
203 |             if maximal_optimal_solutions != -1 and optimal_solutions_count >= maximal_optimal_solutions:
204 |                 break
205 |             self.indices_stack[self.traceback_stack_index][0] = i
206 |             self.indices_stack[self.traceback_stack_index][1] = j
207 |             self.indices_stack[self.traceback_stack_index][2] = k
208 |             l = 0
209 |             all_tracebacks_computed = 0
210 |             while l < len(self.indices_stack):
211 |                 if self.indices_stack[l][0] == 0 and self.indices_stack[l][1] == 0 and self.indices_stack[l][2] == 0:
212 |                     all_tracebacks_computed += 1
213 |                 else:
214 |                     self.traceback_stack_index = l
215 |                     l = len(self.indices_stack)
216 |                 l += 1
217 |             if all_tracebacks_computed >= len(self.indices_stack):
218 |                 traceback_done = True
219 |                 # all_tracebacks_computed = 0
220 |         if maximal_optimal_solutions != -1 and optimal_solutions_count >= maximal_optimal_solutions:
221 |             for i in range(0, maximal_optimal_solutions):
222 |                 self.computed_alignment.append(self.build_alignment(self.traceback_stack[i]))
223 |         else:
224 |             for i in range(0, len(self.traceback_stack)):
225 |                 self.computed_alignment.append(self.build_alignment(self.traceback_stack[i]))
226 | 
227 |     def split(self, index, gapSymbol):
228 |         """Splits the actual traceback path into two paths.
229 |             index:      The index values for the next cell of the path.
230 |             gapSymbol:  A symbol for the computed step for the path."""
231 |         self.traceback_stack.append(self.traceback_stack[self.traceback_stack_index][0:-1])
232 |         self.traceback_stack[len(self.traceback_stack) - 1].append(gapSymbol)
233 |         self.indices_stack.append(index)
234 | 
235 |     def build_alignment(self, tracebackStack):
236 |         """Builds the alignment for one traceback path.
237 |                 tracebackStack: The computed tracebackpath as a list = []
238 |             """
239 |         i = 0
240 |         j = 0
241 |         k = 0
242 |         l = len(tracebackStack) - 1
243 |         alignment_of_a = ""
244 |         alignment_of_b = ""
245 |         alignment_of_c = ""
246 | 
247 |         while len(tracebackStack) > 0:
248 |             try:
249 |                 tracebackElement = tracebackStack.pop(l)
250 |                 if mah.noGap == tracebackElement:
251 |                     alignment_of_a += self.sequence_a[i]
252 |                     alignment_of_b += self.sequence_b[j]
253 |                     alignment_of_c += self.sequence_c[k]
254 |                     i += 1
255 |                     j += 1
256 |                     k += 1
257 |                 elif mah.gapA == tracebackElement:
258 |                     alignment_of_a += "-"
259 |                     alignment_of_b += self.sequence_b[j]
260 |                     alignment_of_c += self.sequence_c[k]
261 |                     j += 1
262 |                     k += 1
263 |                 elif mah.gapB == tracebackElement:
264 |                     alignment_of_a += self.sequence_a[i]
265 |                     alignment_of_b += "-"
266 |                     alignment_of_c += self.sequence_c[k]
267 |                     i += 1
268 |                     k += 1
269 |                 elif mah.gapC == tracebackElement:
270 |                     alignment_of_a += self.sequence_a[i]
271 |                     alignment_of_b += self.sequence_b[j]
272 |                     alignment_of_c += "-"
273 |                     i += 1
274 |                     j += 1
275 |                 elif mah.gapAB == tracebackElement:
276 |                     alignment_of_a += "-"
277 |                     alignment_of_b += "-"
278 |                     alignment_of_c += self.sequence_c[k]
279 |                     k += 1
280 |                 elif mah.gapAC == tracebackElement:
281 |                     alignment_of_a += "-"
282 |                     alignment_of_b += self.sequence_b[j]
283 |                     alignment_of_c += "-"
284 |                     j += 1
285 |                 elif mah.gapBC == tracebackElement:
286 |                     alignment_of_a += self.sequence_a[i]
287 |                     alignment_of_b += "-"
288 |                     alignment_of_c += "-"
289 |                     i += 1
290 |                 l -= 1
291 |             except:
292 |                 print "An error occured."
293 |                 sys.exit()
294 |         while i < len(self.sequence_a):
295 |             alignment_of_a += self.sequence_a[i]
296 |             i += 1
297 |         while j < len(self.sequence_b):
298 |             alignment_of_b += self.sequence_b[j]
299 |             j += 1
300 |         while k < len(self.sequence_c):
301 |             alignment_of_b += self.sequence_c[k]
302 |             k += 1
303 |         alignment = [alignment_of_a, alignment_of_b, alignment_of_c]
304 |         return alignment
305 | 
306 |     def execute(self, maximalOptimalSolutions=-1):
307 |         """Method to start the computation of the Needleman-Wunsch algorithm with three sequences. It returns the computed alignment.
308 |         [maximalOptimalSolutions]: Define how many optimal solutions should be computed. If not defined, all optimal solutions are computed."""
309 |         self.compute_matrix()
310 |         if maximalOptimalSolutions == -1:
311 |             self.traceback()
312 |         else:
313 |             self.traceback(maximalOptimalSolutions)
314 |         return self.computed_alignment
315 | 


--------------------------------------------------------------------------------
/source/lib/multiple/sumOfPairs.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/python
 2 | # Copyright 2015 Joachim Wolff
 3 | # Programming Course: Algorithms in Bioinformatics
 4 | # Tutors: Robert Kleinkauf, Omer Alkhnbashi
 5 | # Winter semester 2014/2015
 6 | #
 7 | # Chair of Bioinformatics
 8 | # Department of Computer Science
 9 | # Faculty of Engineering
10 | # Albert-Ludwig-University Freiburg im Breisgau
11 | #
12 | # Sum of pairs algorithm
13 | from helper import PairwiseAlignmentHelper as pah
14 | import sys
15 | 
16 | 
17 | class SumOfPairs():
18 |     """This class computes the Sum-of-pairs algorithm by Carillo and Lipman:
19 |             Carrillo, Humberto, and David Lipman.
20 |             "The multiple sequence alignment problem in biology."
21 |             SIAM Journal on Applied Mathematics 48.5 (1988): 1073-1082.
22 |             http://www.academia.edu/download/30855770/Articulo03.pdf"""
23 |     def __init__(self, sequences, similarity_score):
24 |         """To initialize a object of the SumOfPairs class please define a list with the multiple sequence alignment and
25 |          a similarity score method which is defined in class PairwiseAlignmentHelper of package helper.
26 |          sequences: The multiple alginment as a list.
27 |          similarity_score: The scoring functions name as a string."""
28 |         self.sequences = sequences
29 |         if similarity_score in dir(pah) and callable(getattr(pah, similarity_score)):
30 |             similarity_score_obj = eval('pah().' + similarity_score)
31 |         else:
32 |             print "Score function not found!"
33 |             sys.exit()
34 |         self.score_function = similarity_score_obj
35 | 
36 |     def execute(self):
37 |         """Run this method to compute the sum of pairs scoring for multiple alignment."""
38 |         score_value = 0
39 |         for i in range(0, len(self.sequences)):
40 |             for j in range(i+1, len(self.sequences)):
41 |                 score_value += self.score(self.sequences[i], self.sequences[j])
42 |         return score_value
43 | 
44 |     def score(self, sequence_a, sequence_b):
45 |         """Returns the pairwise alignment for sequence_a and sequence_b."""
46 |         score_value = 0
47 |         for i in range(0, max(len(sequence_a), len(sequence_b))):
48 |             if i < len(sequence_a) and i < len(sequence_b):
49 |                 score_value += self.score_function(sequence_a[i], sequence_b[i])
50 |             elif i < len(sequence_a):
51 |                 score_value += self.score_function(sequence_a[i], "")
52 |             elif i < len(sequence_b):
53 |                 score_value += self.score_function("", sequence_b[i])
54 |             i += 1
55 |         return score_value
56 | 


--------------------------------------------------------------------------------
/source/lib/multiple/test/fengDoolittleTest.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/python
 2 | # Copyright 2014 Joachim Wolff
 3 | # Programming Course: Algorithms in Bioinformatics
 4 | # Tutors: Robert Kleinkauf, Omer Alkhnbashi
 5 | # Winter semester 2014/2015
 6 | #
 7 | # Chair of Bioinformatics
 8 | # Department of Computer Science
 9 | # Faculty of Engineering
10 | # Albert-Ludwig-University Freiburg im Breisgau
11 | #
12 | # Feng-Doolittle test class
13 | import unittest
14 | import os, sys
15 | lib_path = os.path.abspath('../../')
16 | sys.path.append(lib_path)
17 | 
18 | from multiple import FengDoolittle
19 | 
20 | class FengDoolittleTestClass(unittest.TestCase):
21 |     """Test class to test the correct computation of the Needleman-Wunsch n=3 algorithm."""
22 |     def test_computeAlignments(self):
23 |     	sequences = ["ACTG", "AT", "ACG"]
24 |         expectedAlignments = [[["ACTG", "A-T-"],0,1], [["ACTG", "AC-G"],0,2], [["AT-", "ACG"],1,2]]
25 |         fd = FengDoolittle(sequences, "weightFunctionDifference", "pam250")
26 |         fd.computeAlignments()
27 |         self.assertEqual(expectedAlignments, fd.alignments)
28 |     def test_computeDistanceDictionary(self):
29 |         sequences = ["ACCCAT", "ACGGAT", "AACCT"]
30 |         expectedAlignments = [["AC-CAT", "ACGGAT"], ["ACGGAT", "AACCAT"], ["-ACCAT", "AACCAT"]]
31 |         fd = FengDoolittle(sequences, "weightFunctionDifference", "pam250")
32 |         fd.computeAlignments()
33 |         fd.computeDistanceDictionary()
34 |     def test_computeOrderOfSequencesToAlign(self):
35 |         sequences = ["ACTG", "AT", "ACG"]
36 |         fd = FengDoolittle(sequences, "weightFunctionDifference", "pam250")
37 |         fd.computeAlignments()
38 |         fd.computeDistanceDictionary()
39 |         fd.buildTree()
40 |         # print "NewickTree: ",fd.newickTree
41 |         expectedResult = [[[0],[2]], [[0,2],[1]]]
42 |         # print "asd"
43 |         fd.computeOrderOfSequencesToAlign()
44 |         # print "asd"
45 |         self.assertEqual(expectedResult, fd.orderToAlign)
46 |     def test_computeMultipleAlignment(self):
47 |         sequences = ["ACTG", "AT", "ACG"]
48 |         expectedResult = {0: 'ACTG', 1: 'AXTX', 2: 'ACXG'}
49 |         fd = FengDoolittle(sequences, "weightFunctionDifference", "pam250")
50 |         fd.computeMultipleAlignment()
51 |         self.assertEqual(expectedResult, fd.computeMultipleAlignment())
52 | 
53 |         sequences = ["ACCAT", "ACGGAT", "AACCAT"]
54 |         expectedResult = {0: 'AXCCXAT', 1: 'AXCGGAT', 2: 'AACCXAT'}
55 |         fd2 = FengDoolittle(sequences, "weightFunctionDifference", "pam250")
56 |         fd2.computeMultipleAlignment()
57 |         self.assertEqual(expectedResult, fd2.computeMultipleAlignment())
58 | if __name__ == "__main__":
59 |     unittest.main() # run all tests     
60 | 


--------------------------------------------------------------------------------
/source/lib/multiple/test/needlemanWunschN3Test.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/python
 2 | # Copyright 2014 Joachim Wolff
 3 | # Programming Course: Algorithms in Bioinformatics
 4 | # Tutors: Robert Kleinkauf, Omer Alkhnbashi
 5 | # Winter semester 2014/2015
 6 | #
 7 | # Chair of Bioinformatics
 8 | # Department of Computer Science
 9 | # Faculty of Engineering
10 | # Albert-Ludwig-University Freiburg im Breisgau
11 | #
12 | # Needleman-Wunsch with n=3 test class
13 | import unittest
14 | import os, sys
15 | lib_path = os.path.abspath('../../')
16 | sys.path.append(lib_path)
17 | 
18 | from multiple import NeedlemanWunschN3 as nw
19 | from helper import MultipleAlignmentHelper as mah
20 | from helper import MathHelper as mathHelper
21 | class NeelemanWunschN3TestClass(unittest.TestCase):
22 |     """Test class to test the correct computation of the Needleman-Wunsch n=3 algorithm."""
23 |     def test_computeMatrix(self):
24 |         sequenceA = "AC"
25 |         sequenceB = "AGT"
26 |         sequenceC = "AGT"
27 |         expectedMatrix = [[[0 for i in range(len(sequenceC)+1) ] for j in range(len(sequenceB)+1)] for k in range(len(sequenceA)+1 )]
28 |         for i in range(1, len(sequenceA)+1):
29 |             expectedMatrix[i][0][0] = expectedMatrix[i-1][0][0] + mah().weightFunctionDifference("", "", sequenceA[i-1])
30 |         for i in range(1, len(sequenceB)+1):
31 |             expectedMatrix[0][i][0] = expectedMatrix[0][i-1][0] + mah().weightFunctionDifference("", "", sequenceB[i-1])
32 |         for i in range(1, len(sequenceC)+1):
33 |             expectedMatrix[0][0][i] = expectedMatrix[0][0][i-1] + mah().weightFunctionDifference("", "", sequenceC[i-1])
34 |         for i in range(1, len(sequenceA)+1):
35 |             for j in range(1, len(sequenceB)+1):
36 |                 expectedMatrix[i][j][0] = expectedMatrix[i-1][j-1][0] + mah().weightFunctionDifference(sequenceA[i-1], sequenceB[j-1], "")
37 |         for i in range(1, len(sequenceA)+1):
38 |             for k in range(1, len(sequenceC)+1):
39 |                 expectedMatrix[i][0][k] = expectedMatrix[i-1][0][k-1] + mah().weightFunctionDifference(sequenceA[i-1], "", sequenceC[k-1])
40 |         for j in range(1, len(sequenceB)+1):
41 |             for k in range(1, len(sequenceC)+1):
42 |                 expectedMatrix[0][j][k] = expectedMatrix[0][j-1][k-1] + mah().weightFunctionDifference("", sequenceB[j-1], sequenceC[k-1])
43 | 
44 |         assertEqual()
45 | 
46 | if __name__ == "__main__":
47 |     unittest.main() # run all tests     
48 | 


--------------------------------------------------------------------------------
/source/lib/multiple/test/sumOfPairsTest.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/joachimwolff/algorithmsInBioinformatics/0d3d91b7cb2370426617c09d98796998b7c5d1d7/source/lib/multiple/test/sumOfPairsTest.py


--------------------------------------------------------------------------------
/source/lib/multiple/test/upgmaWpgmaTest.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/python
 2 | # Copyright 2015 Joachim Wolff
 3 | # Programming Course: Algorithms in Bioinformatics
 4 | # Tutors: Robert Kleinkauf, Omer Alkhnbashi
 5 | # Winter semester 2014/2015
 6 | #
 7 | # Chair of Bioinformatics
 8 | # Department of Computer Science
 9 | # Faculty of Engineering
10 | # Albert-Ludwig-University Freiburg im Breisgau
11 | #
12 | # UPGMA/WPGMA test class
13 | import unittest
14 | import os, sys
15 | 
16 | lib_path = os.path.abspath('../../')
17 | sys.path.append(lib_path)
18 | 
19 | from multiple import UpgmaWpgma
20 | 
21 | 
22 | class UpgmaWpgmaTestClass(unittest.TestCase):
23 |     """Test class to test the correct computation of the UPGMA/WPGMA algorithm."""
24 | 
25 |     def test_computeMinimalDistance(self):
26 |         distanceDictionary = {"0 1": 1, "0 2": 2, "0 3": 3, "1 2": 2, "1 3": 3, "1 4": 3}
27 |         upgma = UpgmaWpgma(distanceDictionary, 4)
28 |         expectedValue = ["0 1", 1]
29 |         self.assertEqual(expectedValue, upgma.compute_minimal_distance())
30 | 
31 |     def test_computeClustering(self):
32 |         distanceDictionary = {"0 1": 1, "0 2": 2, "0 3": 3, "1 2": 2, "1 3": 3, "2 3": 3}
33 |         upgma = UpgmaWpgma(distanceDictionary, 4)
34 |         expectedValue = {"0 1": 4, "2 4": 5, "3 5": 6}
35 |         upgma.compute_clustering()
36 |         print upgma.get_newick_tree()
37 |         self.assertEqual(expectedValue, upgma.mapping)
38 | 
39 |         print upgma.get_newick_tree(with_edge_weights=True)
40 |         distanceDictionary = {"0 1": 6, "0 2": 10, "0 3": 10, "0 4": 10, "1 2": 10, "1 3": 10, "1 4": 10, "2 3": 2,
41 |                               "2 4": 6, "3 4": 6}
42 |         upgma2 = UpgmaWpgma(distanceDictionary, 5)
43 |         expectedValue = {"2 3": 5, "0 1": 7, "4 5": 6, "6 7": 8}
44 |         upgma2.compute_clustering()
45 |         print upgma2.get_newick_tree(with_edge_weights=False)
46 |         self.assertEqual(expectedValue, upgma2.mapping)
47 |         print upgma2.get_newick_tree(with_edge_weights=True)
48 | 
49 | 
50 | 
51 | 
52 |     def test_getNewickTree(self):
53 |         mapping = {'1 3': 5, '4 6': 7, '5 7': 8, '0 2': 6}
54 |         distanceDictionary = {}
55 |         upgma = UpgmaWpgma(distanceDictionary, 5)
56 |         upgma.mapping = mapping
57 |         upgma.get_newick_tree()
58 | 
59 | 
60 | if __name__ == "__main__":
61 |     unittest.main()  # run all tests
62 | 


--------------------------------------------------------------------------------
/source/lib/multiple/upgmaWpgma.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/python
  2 | # Copyright 2015 Joachim Wolff
  3 | # Programming Course: Algorithms in Bioinformatics
  4 | # Tutors: Robert Kleinkauf, Omer Alkhnbashi
  5 | # Winter semester 2014/2015
  6 | #
  7 | # Chair of Bioinformatics
  8 | # Department of Computer Science
  9 | # Faculty of Engineering
 10 | # Albert-Ludwig-University Freiburg im Breisgau
 11 | 
 12 | from helper import MathHelper
 13 | 
 14 | 
 15 | class UpgmaWpgma():
 16 |     """Upgma/Wpgma is a clustering method to generate phylogenetic trees. """
 17 | 
 18 |     def __init__(self, distance_dictionary, node_count, upgma_wpgma=True, sequence_size_mapping={}):
 19 |         """To initalize a object of this class, please define the following:
 20 |                 distance_dictionary:    A dictionary with the distance between two sequences.
 21 |                                         Should have the form \"Key0 key1\":distance. The key0 and key1 have to be integers.
 22 |                 node_count:             The number of sequences.
 23 |                 upgma_wpgma:            If True, the upgma weighting is used, if False, wpgma.
 24 |                 sequence_size_mapping:  Only necessary if wpgma is executed. It defines the size of each sequence.
 25 |                                         Should have the form: \"Key:len(sequence)\""""
 26 |         self.distance_dictionary = distance_dictionary
 27 |         self.mapping = {}
 28 |         self.node_count = node_count
 29 |         self.number_of_nodes = node_count
 30 |         self.upgma_wpgma = upgma_wpgma
 31 |         self.sequence_size_mapping = sequence_size_mapping
 32 |         self.edge_weight = {}
 33 | 
 34 |     def compute_clustering(self):
 35 |         """This function computes the clustering to get the phylogenetic tree."""
 36 |         computation_is_done = False
 37 |         j = 0
 38 |         while not computation_is_done:
 39 |             j += 1
 40 |             minimum_cluster = self.compute_minimal_distance()
 41 |             nodes = minimum_cluster[0].split(" ")
 42 |             if len(nodes) > 1:
 43 |                 self.mapping[minimum_cluster[0]] = self.node_count
 44 |                 self.compute_edge_weight(minimum_cluster[1], nodes)
 45 | 
 46 |                 if minimum_cluster[0] in self.distance_dictionary:
 47 |                     del self.distance_dictionary[minimum_cluster[0]]
 48 | 
 49 |                 for i in range(0, self.node_count + 1):
 50 |                     key_value_0 = nodes[0] + " " + str(i)
 51 |                     key_value_1 = nodes[1] + " " + str(i)
 52 |                     key_value = self.key_in_dictionary(key_value_0, key_value_1)
 53 |                     if key_value[0] != "":
 54 |                         key_for_new_cluster_distance = str(i) + " " + str(self.node_count)
 55 |                         self.distance_dictionary[key_for_new_cluster_distance] = self.compute_new_distance(
 56 |                             self.distance_dictionary[key_value[0]], self.distance_dictionary[key_value[1]], nodes[0],
 57 |                             nodes[1])
 58 |                         # try:
 59 |                         # except:
 60 |                         #     "something wring"
 61 |                             # "something wring"
 62 |                         if not self.upgma_wpgma:
 63 |                             self.sequence_size_mapping[self.node_count] = self.sequence_size_mapping[int(nodes[0])] + \
 64 |                                                                           self.sequence_size_mapping[int(nodes[1])]
 65 |                         del self.distance_dictionary[key_value[0]]
 66 |                         del self.distance_dictionary[key_value[1]]
 67 |                 self.node_count += 1
 68 |             else:
 69 |                 computation_is_done = True
 70 | 
 71 |     def key_in_dictionary(self, key_value_0, key_value_1):
 72 |         """Returns True if the given keys are in the distance dictionary, False otherwise.
 73 |             key_value_0: The first key value.
 74 |             key_value_1: The second key value."""
 75 |         for i in range(0, 4):
 76 |             if key_value_0 in self.distance_dictionary and key_value_1 in self.distance_dictionary:
 77 |                 return [key_value_0, key_value_1]
 78 |             elif key_value_0[::-1] in self.distance_dictionary and key_value_1 in self.distance_dictionary:
 79 |                 return [key_value_0[::-1], key_value_1]
 80 |             elif key_value_0 in self.distance_dictionary and key_value_1[::-1] in self.distance_dictionary:
 81 |                 return [key_value_0, key_value_1[::-1]]
 82 |             elif key_value_0[::-1] in self.distance_dictionary and key_value_1[::-1] in self.distance_dictionary:
 83 |                 return [key_value_0[::-1], key_value_1[::-1]]
 84 |             else:
 85 |                 return ["", ""]
 86 | 
 87 | 
 88 |     def compute_minimal_distance(self):
 89 |         """Returns the next two clusters for merging."""
 90 |         minimum = ["", MathHelper.Inf]
 91 |         for i in self.distance_dictionary:
 92 |             if minimum[1] > self.distance_dictionary[i]:
 93 |                 minimum[0] = i
 94 |                 minimum[1] = self.distance_dictionary[i]
 95 |         return minimum
 96 | 
 97 |     def compute_new_distance(self, distance_a_x, distance_b_x, index_a, index_b):
 98 |         """Returns the new distance between the new merged cluster and an other cluster.
 99 |             distance_a_x:   The old distance between cluster a and x.
100 |             distance_b_x:   The old distance between cluster b and x.
101 |             index_a:        The index of a.
102 |             index_b:        The index of b."""
103 |         if self.upgma_wpgma:
104 |             return self.upgma_distance(distance_a_x, distance_b_x)
105 |         else:
106 |             return self.wpgma_distance(distance_a_x, distance_b_x, self.sequence_size_mapping[int(index_a)],
107 |                                        self.sequence_size_mapping[int(index_b)])
108 | 
109 |     def upgma_distance(self, distance_a_x, distance_b_x):
110 |         """Returns the upgma-distance between the new merged cluster a and an other cluster x.
111 |             distance_a_x:   The old distance between cluster a and x.
112 |             distance_b_x:   The old distance between cluster b and x."""
113 |         return (distance_a_x + distance_b_x) / 2
114 | 
115 |     def wpgma_distance(self, distance_a_x, distance_b_x, length_of_a, length_of_b):
116 |         """Returns the wpgma-distance between the new merged cluster a and an other cluster x.
117 |             distance_a_x:   The old distance between cluster a and x.
118 |             distance_b_x:   The old distance between cluster b and x.
119 |             length_of_a:        The index of a.
120 |             length_of_b:        The index of b."""
121 |         return (length_of_a * distance_a_x + length_of_b * distance_b_x) / (length_of_a + length_of_b)
122 | 
123 |     def compute_edge_weight(self, weight, nodes):
124 |         """This method computes the new edge weight for a new cluster.
125 |             weight: The edge weight equal to the distance of the to merged clusters.
126 |             nodes:  A list containing the indices of the two merged clusters."""
127 |         node0= int(nodes[0])
128 |         node1 = int(nodes[1])
129 |         if node0 < self.number_of_nodes and node1 < self.number_of_nodes:
130 |             # self.edge_weight[self.node_count] = 1
131 |             self.edge_weight[self.node_count] = [weight / float(2), weight / float(2)]
132 |         elif node0 < self.number_of_nodes:
133 |             weightToLeafs = self.edge_weight[node1][1]
134 |             self.edge_weight[self.node_count] = [weight / float(2) - weightToLeafs, weight / float(2)]
135 |         elif node1 < self.number_of_nodes:
136 |             weightToLeafs = self.edge_weight[node0][1]
137 |             self.edge_weight[self.node_count] = [weight / float(2), weight / float(2) - weightToLeafs]
138 |         else:
139 |             weightToLeafs = self.edge_weight[node0][1]
140 |             weightToLeafs1 = self.edge_weight[node1][1]
141 |             self.edge_weight[self.node_count] = [weight / float(2) - weightToLeafs, weight / float(2) - weightToLeafs1]
142 | 
143 | 
144 |     def get_newick_tree(self, with_edge_weights=False):
145 |         """Returns the computed cluster in the Newick tree format.
146 |             with_edge_weights:  If True, edge weights are part of the output, if False, not."""
147 |         # expectedValue = {"2 3": 5, "0 1": 7, "4 5": 6, "6 7": 8}
148 |         newick_dictionary = dict([[v, k] for k, v in self.mapping.items()])
149 |         if with_edge_weights:
150 |             for i in newick_dictionary:
151 |                 if i in self.edge_weight:
152 |                     nodesWithWeights = newick_dictionary[i].split(" ")
153 |                     nodesWithWeights[0] = nodesWithWeights[0].strip(" ")
154 |                     nodesWithWeights[0] += ":" + str(self.edge_weight[i][1])
155 |                     nodesWithWeights[1] = nodesWithWeights[1].strip(" ")
156 |                     nodesWithWeights[1] += ":" + str(self.edge_weight[i][0])
157 |                     newick_dictionary[i] = nodesWithWeights[0] + " " + nodesWithWeights[1]
158 |             self.mapping = dict([[v, k] for k, v in newick_dictionary.items()])
159 |         for i in self.mapping:
160 |             index = -1
161 |             leading_sequence = True
162 |             for j in newick_dictionary:
163 |                 string_to_find = " " + str(self.mapping[i]) + ""
164 |                 if newick_dictionary[j].find(string_to_find) != -1:
165 |                     index = j
166 |                     leading_sequence = False
167 |                     break
168 |                 string_to_find = str(self.mapping[i]) + " "
169 |                 if newick_dictionary[j].find(string_to_find) != -1:
170 |                     index = j
171 |                     leading_sequence = True
172 |                     break
173 |                 if with_edge_weights:
174 |                     string_to_find = str(self.mapping[i]) + ":"
175 |                 else:
176 |                     string_to_find = str(self.mapping[i]) + ","
177 |                 if newick_dictionary[j].find(string_to_find) != -1:
178 |                     index = j
179 |                     leading_sequence = True
180 |                     break
181 |                 string_to_find = "," + str(self.mapping[i])
182 |                 if newick_dictionary[j].find(string_to_find) != -1:
183 |                     index = j
184 |                     leading_sequence = False
185 |                     break
186 | 
187 |             if index != -1:
188 |                 if leading_sequence:
189 |                     stringToReplace = "(" + newick_dictionary[int(string_to_find.strip().strip(",").strip(":"))].replace(" ",
190 |                                                                                                               ",") + "):"
191 |                 else:
192 |                     stringToReplace = ",(" + newick_dictionary[int(string_to_find.strip().strip(",").strip(":"))].replace(" ",
193 |                                                                                                                ",") + ")"
194 |                 newick_dictionary[index] = newick_dictionary[index].replace(string_to_find, stringToReplace).replace(
195 |                     ",,", ",")
196 |                 del newick_dictionary[int(string_to_find.strip().strip(",").strip(":"))]
197 | 
198 |         for i in newick_dictionary:
199 |             return "(" + newick_dictionary[i] + ")"
200 | 


--------------------------------------------------------------------------------
/source/lib/pairwise/__init__.py:
--------------------------------------------------------------------------------
1 | from gotoh import Gotoh
2 | from needlemanWunsch import NeedlemanWunsch
3 | 


--------------------------------------------------------------------------------
/source/lib/pairwise/gotoh.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/python
  2 | # Copyright 2014 Joachim Wolff
  3 | # Programming Course: Algorithms in Bioinformatics
  4 | # Tutors: Robert Kleinkauf, Omer Alkhnbashi
  5 | # Winter semester 2014/2015
  6 | #
  7 | # Chair of Bioinformatics
  8 | # Department of Computer Science
  9 | # Faculty of Engineering
 10 | # Albert-Ludwig-University Freiburg im Breisgau
 11 | #
 12 | # Gotoh algorithm
 13 | from helper import PairwiseAlignmentHelper as pah
 14 | from helper import MathHelper as mathHelper
 15 | import sys
 16 | 
 17 | class Gotoh():
 18 |     """This class holds methods which are needed to compute the pairwise 
 19 |     alignment algorithm from Osamu Gotoh, published in 1982:
 20 |         Osamu Gotoh (1982). "An improved algorithm for matching biological sequences".
 21 |         Journal of molecular biology 162: 705.
 22 |         https://www.cs.umd.edu/class/spring2003/cmsc838t/papers/gotoh1982.pdf
 23 |     """
 24 |     def __init__(self, sequenceA, sequenceB, scoreFunction, costFunction):
 25 |         """Initalize all variables and methods needed to compute the Gotoh algorithm.
 26 |             sequenceA:      A string with the first DNA sequence.
 27 |             sequenceB:      A string with the second DNA sequence.
 28 |             scoreFunction:  The name of a weight function as a String which is defined 
 29 |                             in the pairwiseAlignmentHelper-class.
 30 |             costFunction:   The name of a gap cost function as a String which is defined 
 31 |                             in the pairwiseAlignmentHelper-class.
 32 |         """
 33 |         if scoreFunction in dir(pah) and callable(getattr(pah, scoreFunction)):
 34 |             scoreFunctionObj = eval('pah().' + scoreFunction)
 35 |         else:
 36 |             print "Score function not found!"
 37 |             sys.exit()       
 38 |         if costFunction in dir(pah) and callable(getattr(pah, costFunction)):
 39 |             costFunctionObj = eval('pah().' + costFunction)
 40 |         else:
 41 |             print "Gap cost function not found!"
 42 |             sys.exit() 
 43 |         self.computationMatrix = [[],[],[]]
 44 |         self.sequenceA = sequenceA
 45 |         self.sequenceB = sequenceB
 46 |         self.scoreFunction = scoreFunctionObj
 47 |         self.costFunction = costFunctionObj
 48 |         self.beta = self.costFunction(1) - self.costFunction(0)
 49 |         self.i = 0
 50 |         self.j = 0
 51 |         self.tracebackStack = [[]]
 52 |         self.tracebackStackIndex = 0
 53 |         self.indiciesStack = [[]]
 54 |         self.computedAlignment = []
 55 | 
 56 | 
 57 |     def computeMatrix(self):
 58 |         """Initalize the three matricies needed for the Gotoh-Algorithm.
 59 |         The sequences A and B, the weight function and the gap costs have to be defined 
 60 |         by the creation of the object of this class."""
 61 |         computationMatrixD = [[0 for i in range(len(self.sequenceB)+1) ] for j in range(len(self.sequenceA)+1)]
 62 |         computationMatrixP = [[0 for i in range(len(self.sequenceB)+1) ] for j in range(len(self.sequenceA)+1)]
 63 |         computationMatrixQ = [[0 for i in range(len(self.sequenceB)+1) ] for j in range(len(self.sequenceA)+1)]
 64 |         # initalize matrix
 65 |         for i in range(1, len(self.sequenceA)+1):
 66 |             computationMatrixD[i][0] = self.costFunction(i)
 67 |             computationMatrixP[i][0] = mathHelper.NaN
 68 |             computationMatrixQ[i][0] = mathHelper.Inf
 69 |         for i in range(1, len(self.sequenceB)+1):
 70 |             computationMatrixD[0][i] = self.costFunction(i)
 71 |             computationMatrixP[0][i] = mathHelper.Inf
 72 |             computationMatrixQ[0][i] = mathHelper.NaN
 73 | 
 74 |         for i in range(1, len(self.sequenceA)+1):
 75 |             for j in range(1, len(self.sequenceB)+1):
 76 |                 computationMatrixP[i][j] = self.computeP(computationMatrixD[i-1][j], computationMatrixP[i-1][j], self.costFunction, self.beta)  
 77 |                 computationMatrixQ[i][j] = self.computeQ(computationMatrixD[i][j-1], computationMatrixQ[i][j-1], self.costFunction, self.beta)
 78 |                 computationMatrixD[i][j] = self.computeD(computationMatrixD[i-1][j-1], computationMatrixP[i][j], computationMatrixQ[i][j], self.sequenceA[i-1], self.sequenceB[j-1], self.scoreFunction)
 79 |         self.computationMatrix = [computationMatrixD, computationMatrixP, computationMatrixQ]
 80 | 
 81 |     def computeP(self, valueOfD, valueOfP, costFunction, beta):
 82 |         """Compute the values for matrix P.
 83 |             This is the minimum value of:
 84 |                 matrix D of cell (i-1, j) + gap costs
 85 |                 and 
 86 |                 matrix P of cell (i-1, j) + 1
 87 |             valueOfD:       The value from matrix D of cell i-1, j.
 88 |             valueOfP:       The value from matrix P of cell i-1, j.
 89 |             costFunction:   The gap cost function defined at the object creation.
 90 |             beta:           The beta value from the gap costs."""
 91 |         return min(valueOfD + costFunction(1), valueOfP + beta)
 92 |       
 93 |     def computeQ(self, valueOfD, valueOfQ, costFunction, beta):
 94 |         """Compute the values for matrix Q.
 95 |             This is the minimum value of:
 96 |                 matrix D of cell (i, j-1) + gap costs
 97 |                 and 
 98 |                 matrix Q of cell (i, j-1) + 1
 99 |             valueOfD:       The value from matrix D of cell i, j-1.
100 |             valueOfQ:       The value from matrix Q of cell i, j-1.
101 |             costFunction:   The gap cost function defined at the object creation.
102 |             beta:           The beta value from the gap costs."""
103 |         return min(valueOfD + costFunction(1), valueOfQ + beta)
104 | 
105 |     def computeD(self, valueOfD, valueOfP, valueOfQ, characterA, characterB, scoreFunction):
106 |         """Compute the values for matrix D.
107 |             This is the minimum value of:
108 |                 matrix D of cell (i-1, j-1) + w(a,b)
109 |                 and 
110 |                 matrix P of cell (i, j)
111 |                 and 
112 |                 matrix Q of cell (i, j)
113 |             valueOfD:       The value from matrix D of cell i-1, j-1.
114 |             valueOfP:       The value from matrix P of cell i, j.
115 |             valueOfQ:       The value from matrix Q of cell i, j.
116 |             characterA:     The character in sequence A at position i.
117 |             characterB:     The character in sequence B at position j.
118 |             scoreFunction:  The weight cost function defined at the object creation."""
119 |         return min(valueOfP, min(valueOfQ, valueOfD + scoreFunction(characterA, characterB)))
120 | 
121 |     def traceback(self):
122 |         """Computes the traceback for the Gotoh algorithm."""
123 |         self.j = len(self.computationMatrix[0][0]) - 1
124 |         self.i = len(self.computationMatrix[0]) - 1
125 |         self.tracebackStackIndex = 0
126 |         self.indiciesStack[self.tracebackStackIndex] = [self.i, self.j, pah.matrixIndexD]
127 |         tracebackDone = False
128 |         while not tracebackDone:
129 |             while self.i > 0 or self.j > 0:
130 |                 if self.indiciesStack[self.tracebackStackIndex][2] == pah.matrixIndexD:
131 |                     self.tracebackD()
132 |                 elif self.indiciesStack[self.tracebackStackIndex][2] == pah.matrixIndexP:
133 |                     self.tracebackP()
134 |                 elif self.indiciesStack[self.tracebackStackIndex][2] == pah.matrixIndexQ:
135 |                     self.tracebackQ()
136 |                 self.i = self.indiciesStack[self.tracebackStackIndex][0]
137 |                 self.j = self.indiciesStack[self.tracebackStackIndex][1]
138 |             tracebackDone = True
139 |             for i in range(0, len(self.indiciesStack)):
140 |                 if self.indiciesStack[i][0] > 0 or self.indiciesStack[i][1] > 0:
141 |                     self.tracebackStackIndex = i
142 |                     tracebackDone = False
143 |                     break
144 |             self.i = self.indiciesStack[self.tracebackStackIndex][0]
145 |             self.j = self.indiciesStack[self.tracebackStackIndex][1]
146 |         for i in range(0, len(self.tracebackStack)):
147 |             self.computedAlignment.append(self.buildAlignment(self.tracebackStack[i]))
148 | 
149 |     
150 |     def tracebackD(self):
151 |         """Computes the traceback for a cell of the matrix D."""
152 |         a = self.sequenceA[self.i - 1]
153 |         b = self.sequenceB[self.j - 1]
154 |         split = 0
155 |         pathVariableI = self.i
156 |         pathVariableJ = self.j
157 |         if self.j > 0 and self.i > 0:
158 |             if self.computationMatrix[pah.matrixIndexD][self.i][self.j] == self.computationMatrix[pah.matrixIndexD][self.i-1][self.j-1] + self.scoreFunction(a,b):
159 |                 self.tracebackStack[self.tracebackStackIndex].append(pah.diagonalD)
160 |                 pathVariableI -= 1
161 |                 pathVariableJ -= 1
162 |                 split = 1
163 |             if self.computationMatrix[pah.matrixIndexD][self.i][self.j] == self.computationMatrix[pah.matrixIndexQ][self.i][self.j]:
164 |                 if split == 0:
165 |                     self.tracebackStack[self.tracebackStackIndex].append(pah.dotQ)
166 |                     self.indiciesStack[self.tracebackStackIndex][2] = pah.matrixIndexQ
167 |                     split = 1
168 |                 else:
169 |                     self.tracebackStack.append(self.tracebackStack[self.tracebackStackIndex][0:-1])
170 |                     self.tracebackStack[len(self.tracebackStack)-1].append(pah.dotQ)
171 |                     self.indiciesStack.append([self.i,self.j, pah.matrixIndexQ])
172 |             if self.computationMatrix[pah.matrixIndexD][self.i][self.j] == self.computationMatrix[pah.matrixIndexP][self.i][self.j]:
173 |                 if split == 0: 
174 |                     self.tracebackStack[self.tracebackStackIndex].append(pah.dotP)
175 |                     self.indiciesStack[self.tracebackStackIndex][2] = pah.matrixIndexP
176 |                 else:
177 |                     self.tracebackStack.append(self.tracebackStack[self.tracebackStackIndex][0:-1])
178 |                     self.tracebackStack[len(self.tracebackStack)-1].append(pah.dotP)
179 |                     self.indiciesStack.append([self.i, self.j, pah.matrixIndexP])
180 | 
181 |         if self.i == 0:
182 |             self.tracebackStack[self.tracebackStackIndex].append(pah.leftD)
183 |             pathVariableJ -= 1
184 |         if self.j == 0:
185 |             self.tracebackStack[self.tracebackStackIndex].append(pah.upD)
186 |             pathVariableI -= 1
187 |         if self.i <= 0 or pathVariableI <= 0:
188 |             pathVariableI = 0
189 |         if self.j <= 0 or pathVariableJ <= 0:
190 |             pathVariableJ = 0
191 |         self.indiciesStack[self.tracebackStackIndex][0] = pathVariableI
192 |         self.indiciesStack[self.tracebackStackIndex][1] = pathVariableJ
193 | 
194 | 
195 |     def tracebackP(self):
196 |         """Computes the traceback for a cell of the matrix P"""
197 |         split = False
198 |         if self.i > 0:
199 |             if self.computationMatrix[pah.matrixIndexP][self.i][self.j] == self.computationMatrix[pah.matrixIndexD][self.i-1][self.j] + self.costFunction(1):
200 |                 self.tracebackStack[self.tracebackStackIndex].append(pah.upD)
201 |                 self.indiciesStack[self.tracebackStackIndex][0] -= 1
202 |                 self.indiciesStack[self.tracebackStackIndex][2] = pah.matrixIndexD
203 |                 split = True
204 |             if self.computationMatrix[pah.matrixIndexP][self.i][self.j] == self.computationMatrix[pah.matrixIndexP][self.i-1][self.j] + self.beta:
205 |                 if split:
206 |                     self.tracebackStack.append(self.tracebackStack[self.tracebackStackIndex][0:-1])
207 |                     self.tracebackStack[len(self.tracebackStack)-1].append(pah.upP)
208 |                     self.indiciesStack.append([self.i - 1, self.j, pah.matrixIndexP])
209 |                 else:
210 |                     self.tracebackStack[self.tracebackStackIndex].append(pah.upP)
211 |                     self.indiciesStack[self.tracebackStackIndex][0] -= 1
212 |                     self.indiciesStack[self.tracebackStackIndex][2] = pah.matrixIndexP
213 | 
214 |     def tracebackQ(self):
215 |         """Computes the traceback for a cell of the matrix Q"""
216 |         split = False
217 |         if self.j > 0:
218 |             if self.computationMatrix[pah.matrixIndexQ][self.i][self.j] == self.computationMatrix[pah.matrixIndexD][self.i][self.j-1] + self.costFunction(1):
219 |                 self.tracebackStack[self.tracebackStackIndex].append(pah.leftD)
220 |                 self.indiciesStack[self.tracebackStackIndex][1] -= 1
221 |                 self.indiciesStack[self.tracebackStackIndex][2] = pah.matrixIndexD
222 |                 split = True
223 | 
224 |             if self.computationMatrix[pah.matrixIndexQ][self.i][self.j] == self.computationMatrix[pah.matrixIndexQ][self.i][self.j-1] + self.beta:
225 |                 if split:
226 |                     self.tracebackStack.append(self.tracebackStack[self.tracebackStackIndex][0:-1])
227 |                     self.tracebackStack[len(self.tracebackStack)-1].append(pah.leftQ)
228 |                     self.indiciesStack.append([self.i , self.j - 1, pah.matrixIndexQ])
229 |                 else:
230 |                     self.tracebackStack[self.tracebackStackIndex].append(pah.leftQ)
231 |                     self.indiciesStack[self.tracebackStackIndex][1] -= 1
232 |                     self.indiciesStack[self.tracebackStackIndex][2] = pah.matrixIndexQ
233 | 
234 |     def buildAlignment(self, tracebackStack):
235 |         """A method to compute the alignment of a given traceback of the Gotoh algorithm.
236 |             tracebackStack: The computed traceback path for one alignment as a list."""
237 |         i = 0
238 |         j = 0
239 |         k = len(tracebackStack)-1
240 |         alignmentOfA = ""
241 |         alignmentOfB = ""
242 |         while len(tracebackStack) > 0:
243 |             try:
244 |                 tracebackElement = tracebackStack.pop(k)
245 |                 if pah.leftQ == tracebackElement or pah.leftD == tracebackElement:
246 |                     alignmentOfA += "-"
247 |                     alignmentOfB += self.sequenceB[j]
248 |                     j += 1
249 |                 elif pah.upP == tracebackElement or pah.upD == tracebackElement:
250 |                     alignmentOfA += self.sequenceA[i]
251 |                     alignmentOfB += "-"
252 |                     i += 1
253 |                 elif pah.diagonalD == tracebackElement:
254 |                     alignmentOfA += self.sequenceA[i]
255 |                     alignmentOfB += self.sequenceB[j]
256 |                     i += 1
257 |                     j += 1
258 |                 k -= 1
259 | 
260 |             except:
261 |                 print "An error occured."
262 |                 sys.exit()
263 | 
264 |         while i < len(self.sequenceA):
265 |             alignmentOfA += self.sequenceA[i]
266 |             i += 1
267 |         while j < len(self.sequenceB):
268 |             alignmentOfB += self.sequenceB[j]
269 |             j += 1   
270 |         alignment = [alignmentOfA, alignmentOfB]
271 |         return alignment
272 | 
273 |     def compute(self):
274 |         """Method to start the computation of the Gotoh algorithm."""
275 |         self.computeMatrix()
276 |         self.traceback()
277 |         return self.computedAlignment
278 | 
279 | 


--------------------------------------------------------------------------------
/source/lib/pairwise/needlemanWunsch.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/python
  2 | # Copyright 2014 Joachim Wolff
  3 | # Programming Course: Algorithms in Bioinformatics
  4 | # Tutors: Robert Kleinkauf, Omer Alkhnbashi
  5 | # Winter semester 2014/2015
  6 | #
  7 | # Chair of Bioinformatics
  8 | # Department of Computer Science
  9 | # Faculty of Engineering
 10 | # Albert-Ludwig-University Freiburg im Breisgau
 11 | #
 12 | # Needleman-Wunsch algorithm
 13 | import sys
 14 | from helper import PairwiseAlignmentHelper as pah
 15 | 
 16 | 
 17 | class NeedlemanWunsch():
 18 |     """This class holds methods which are needed to compute the pairwise
 19 |     alignment algorithm from Saul Needleman and Christian Wunsch, published in 1970:
 20 |         Needleman, Saul B.; and Wunsch, Christian D. (1070).
 21 |         A general method applicable to search for similarities in the aminoacid
 22 |         sequence of two proteins. Journal of Molecular Biology 48 (3): 443-53
 23 |         http://www.cise.ufl.edu/class/cis4930sp09rab/00052.pdf"""
 24 | 
 25 |     def computeMatrix(self, sequenceA, sequenceB, scoreFunction):
 26 |         """Initalize and computes the values for the Needleman-Wunsch matrix.
 27 |             sequenceA:      A string with the first DNA sequence.
 28 |             sequenceB:      A string with the second DNA sequence.
 29 |             scoreFunction:  The name of a weight function as a String which is defined 
 30 |                             in the pairwiseAlignmentHelper-class."""
 31 |         computationMatrix = [[0 for i in range(len(sequenceB) + 1)] for j in range(len(sequenceA) + 1)]
 32 | 
 33 |         # initalize matrix
 34 |         for i in range(1, len(sequenceA) + 1):
 35 |             computationMatrix[i][0] = computationMatrix[i - 1][0] + scoreFunction("", sequenceA[i - 1])
 36 |         for i in range(1, len(sequenceB) + 1):
 37 |             computationMatrix[0][i] = computationMatrix[0][i - 1] + scoreFunction("", sequenceB[i - 1])
 38 | 
 39 |         for i in range(1, len(sequenceA) + 1):
 40 |             for j in range(1, len(sequenceB) + 1):
 41 |                 computationMatrix[i][j] = self.computeMinimum(sequenceA[i - 1], sequenceB[j - 1],
 42 |                                                               computationMatrix[i][j - 1], computationMatrix[i - 1][j],
 43 |                                                               computationMatrix[i - 1][j - 1], scoreFunction)
 44 |         return computationMatrix
 45 | 
 46 |     def computeMinimum(self, characterOfA, characterOfB, predecessorLeft, predecessorUp, predecessorDiagonal,
 47 |                        scoreFunction):
 48 |         """Computes the minimum of a given cell for the Needleman-Wunsch matrix.
 49 |                 characterA:             The character in sequence A at position i.
 50 |                 characterB:             The character in sequence B at position j.
 51 |                 predecessorLeft:        The value i, j-1 in the matrix.
 52 |                 predecessorUp:          The value i-1, j in the matrix.
 53 |                 predecessorDiagonal:    The value i-1, j-1 in the matrix.
 54 |                 scoreFunction:          The weight function defined in 
 55 |                                         class pairwiseAlignmentHelper."""
 56 |         costUp = predecessorUp + scoreFunction(characterOfA, "")
 57 |         costDiagonal = predecessorDiagonal + scoreFunction(characterOfA, characterOfB)
 58 |         costLeft = predecessorLeft + scoreFunction("", characterOfB)
 59 |         return min(costUp, costDiagonal, costLeft)
 60 | 
 61 |     def traceback(self, sequenceA, sequenceB, computationMatrix, scoreFunction, maxOptimalSolutions=-1):
 62 |         """Computes the traceback for the Needleman-Wunsch matrix.
 63 |                 sequenceA:          A string with the first DNA sequence.
 64 |                 sequenceB:          A string with the second DNA sequence.
 65 |                 computationMatrix:  The computed matrix for the two sequences.
 66 |                 scoreFunction:      The name of a weight function as a String which is defined 
 67 |                                     in the pairwiseAlignmentHelper-class.
 68 |             """
 69 |         tracebackStack = [[]]
 70 |         indiciesStack = [[len(computationMatrix) - 1, len(computationMatrix[0]) - 1]]
 71 |         tracebackCount = 0
 72 |         tracebackDone = False
 73 |         optimalSolutionsCount = 0
 74 |         l = 0
 75 |         allTracebacksComputed = 0
 76 |         appendTracebackStack = tracebackStack.append
 77 |         appendIndices = indiciesStack.append
 78 |         while not tracebackDone:
 79 | 
 80 |             optimalSolutionsCount += 1
 81 |             i = indiciesStack[tracebackCount][0]
 82 |             j = indiciesStack[tracebackCount][1]
 83 |             split = False
 84 |             appendTraceback = tracebackStack[tracebackCount].append
 85 | 
 86 |             while i > 0 or j > 0:
 87 |                 pathVariableI = i
 88 |                 pathVariableJ = j
 89 |                 # left arrow
 90 |                 if j > 0:
 91 |                     if computationMatrix[i][j] == computationMatrix[i][j - 1] + scoreFunction("", sequenceB[j - 1]):
 92 |                         # tracebackStack[tracebackCount].append(pah.left)
 93 |                         appendTraceback(pah.left)
 94 |                         pathVariableJ -= 1  # change j
 95 |                         split = True
 96 | 
 97 |                 # up arrow
 98 |                 if i > 0:
 99 |                     if computationMatrix[i][j] == computationMatrix[i - 1][j] + scoreFunction(sequenceA[i - 1], ""):
100 |                         if split == False:
101 |                             appendTraceback(pah.up)
102 |                             # tracebackStack[tracebackCount].append(pah.up)
103 |                             pathVariableI -= 1
104 |                             split = True
105 |                         else:
106 |                             appendTracebackStack(tracebackStack[tracebackCount][0:-1])
107 |                             tracebackStack[len(tracebackStack) - 1].append(pah.up)
108 |                             appendIndices([i - 1, j])
109 | 
110 |                 # diagonal arrow
111 |                 if i > 0 and j > 0:
112 |                     if computationMatrix[i][j] == computationMatrix[i - 1][j - 1] + scoreFunction(sequenceA[i - 1],
113 |                                                                                                   sequenceB[j - 1]):
114 |                         if split == False:
115 |                             appendTraceback(pah.diagonal)
116 |                             # tracebackStack[tracebackCount].append(pah.diagonal)
117 |                             pathVariableI -= 1
118 |                             pathVariableJ -= 1
119 |                         elif split == True:
120 |                             appendTracebackStack(tracebackStack[tracebackCount][0:-1])
121 |                             tracebackStack[len(tracebackStack) - 1].append(pah.diagonal)
122 |                             appendIndices([i - 1, j - 1])
123 |                 split = 0
124 |                 i = pathVariableI
125 |                 j = pathVariableJ
126 | 
127 |             indiciesStack[tracebackCount][0] = i
128 |             indiciesStack[tracebackCount][1] = j
129 |             l = tracebackCount
130 |             while l < len(indiciesStack):
131 |                 if indiciesStack[l][0] == 0 and indiciesStack[l][1] == 0:
132 |                     allTracebacksComputed += 1
133 |                 else:
134 |                     tracebackCount = l
135 |                     l = len(indiciesStack)
136 |                 l += 1
137 |             if allTracebacksComputed >= len(indiciesStack):
138 |                 tracebackDone = True
139 |             if maxOptimalSolutions != -1 and optimalSolutionsCount >= maxOptimalSolutions:
140 |                 tracebackDone = True
141 |                 # allTracebacksComputed = 0
142 | 
143 |         computedAlignment = []
144 |         if maxOptimalSolutions == -1:
145 |             for i in range(0, len(tracebackStack)):
146 |                 computedAlignment.append(self.buildAlignment(tracebackStack[i], sequenceA, sequenceB))
147 |         else:
148 |             for i in range(0, maxOptimalSolutions):
149 |                 computedAlignment.append(self.buildAlignment(tracebackStack[i], sequenceA, sequenceB))
150 |         return computedAlignment
151 | 
152 |     def buildAlignment(self, tracebackStack, sequenceA, sequenceB):
153 |         """Builds the alignment for one traceback path.
154 |                 tracebackStack: The computed tracebackpath as a list = []
155 |                 sequenceA:      A string with the first DNA sequence.
156 |                 sequenceB:      A string with the second DNA sequence.
157 |             """
158 |         i = 0
159 |         j = 0
160 |         k = len(tracebackStack) - 1
161 |         alignmentOfA = ""
162 |         alignmentOfB = ""
163 | 
164 |         while len(tracebackStack) > 0:
165 |             try:
166 |                 tracebackElement = tracebackStack.pop(k)
167 |                 if pah.left == tracebackElement:
168 |                     alignmentOfA += "-"
169 |                     alignmentOfB += sequenceB[j]
170 |                     j += 1
171 |                 elif pah.up == tracebackElement:
172 |                     alignmentOfA += sequenceA[i]
173 |                     alignmentOfB += "-"
174 |                     i += 1
175 |                 elif pah.diagonal == tracebackElement:
176 |                     alignmentOfA += sequenceA[i]
177 |                     alignmentOfB += sequenceB[j]
178 |                     i += 1
179 |                     j += 1
180 |                 k -= 1
181 |             except:
182 |                 print "An error occured."
183 |                 sys.exit()
184 |         while i < len(sequenceA):
185 |             alignmentOfA += sequenceA[i]
186 |             i += 1
187 |         while j < len(sequenceB):
188 |             alignmentOfB += sequenceB[j]
189 |             j += 1
190 |         alignment = [alignmentOfA, alignmentOfB]
191 |         return alignment
192 | 
193 |     def compute(self, sequences, scoreFunction, maxOptimalSolutions=-1):
194 |         """Method to execute the Needleman-Wunsch algorithm.
195 |             sequences:      A list with two strings which represents the DNA sequences.
196 |             scoreFunction:  The name of the weight function defined in 
197 |                             class pairwiseAlignmentHelper."""
198 |         if scoreFunction in dir(pah) and callable(getattr(pah, scoreFunction)):
199 |             scoreFunctionObj = eval('pah().' + scoreFunction)
200 |         else:
201 |             print "Score function not found!"
202 |             sys.exit()
203 |         if maxOptimalSolutions == -1:
204 |             return self.traceback(sequences[0], sequences[1],
205 |                                   self.computeMatrix(sequences[0], sequences[1], scoreFunctionObj), scoreFunctionObj)
206 |         else:
207 |             return self.traceback(sequences[0], sequences[1],
208 |                                   self.computeMatrix(sequences[0], sequences[1], scoreFunctionObj), scoreFunctionObj,
209 |                                   maxOptimalSolutions)
210 | 
211 | 


--------------------------------------------------------------------------------
/source/lib/pairwise/test/__init__.py:
--------------------------------------------------------------------------------
1 | from gotohTest import GotohTestClass
2 | from needlemanWunschTest import NeedlemanWunschTestClass
3 | 


--------------------------------------------------------------------------------
/source/lib/pairwise/test/gotohTest.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/python
 2 | # Copyright 2014 Joachim Wolff
 3 | # Programming Course: Algorithms in Bioinformatics
 4 | # Tutors: Robert Kleinkauf, Omer Alkhnbashi
 5 | # Winter semester 2014/2015
 6 | #
 7 | # Chair of Bioinformatics
 8 | # Department of Computer Science
 9 | # Faculty of Engineering
10 | # Albert-Ludwig-University Freiburg im Breisgau
11 | #
12 | # Gotoh test class
13 | import unittest
14 | import os, sys
15 | lib_path = os.path.abspath('../../')
16 | sys.path.append(lib_path)
17 | 
18 | from pairwise import Gotoh 
19 | from helper import PairwiseAlignmentHelper as pah
20 | from helper import MathHelper as mathHelper
21 | class GotohTestClass(unittest.TestCase):
22 |     """Test class to test the correct computation of the Gotoh algorithm."""
23 |     def test_computeMatrix(self):
24 |         """Test method to test the correct computation of the matrix."""
25 |         a = "AGC"
26 |         b = "AC"
27 |         computedMatrixD = [[0 for i in range(len(b)+1) ] for j in range(len(a)+1)]
28 |        	computedMatrixP = [[0 for i in range(len(b)+1) ] for j in range(len(a)+1)]
29 |        	computedMatrixQ = [[0 for i in range(len(b)+1) ] for j in range(len(a)+1)]
30 |         # initalize matrix
31 |         for i in range(1, len(a)+1):
32 |             computedMatrixD[i][0] = pah().gapCost(i)
33 |             computedMatrixP[i][0] = mathHelper.NaN
34 |             computedMatrixQ[i][0] = mathHelper.Inf
35 |         for i in range(1, len(b)+1):
36 |             computedMatrixD[0][i] = pah().gapCost(i)
37 |             computedMatrixP[0][i] = mathHelper.Inf
38 |             computedMatrixQ[0][i] = mathHelper.NaN
39 | 
40 |        
41 |         # define values that should be computed by Gotoh algorithm
42 |         # matrix D
43 |         computedMatrixD[1][1] = 0
44 |         computedMatrixD[2][1] = 3
45 |         computedMatrixD[3][1] = 4
46 |         
47 |         computedMatrixD[1][2] = 3
48 |         computedMatrixD[2][2] = 1
49 |         computedMatrixD[3][2] = 3
50 | 
51 |         # matrix P
52 |         computedMatrixP[1][1] = 6
53 |         computedMatrixP[2][1] = 3
54 |         computedMatrixP[3][1] = 4
55 |         
56 |         computedMatrixP[1][2] = 7
57 |         computedMatrixP[2][2] = 6
58 |         computedMatrixP[3][2] = 4
59 | 
60 |         # matrix Q
61 |         computedMatrixQ[1][1] = 6
62 |         computedMatrixQ[2][1] = 7
63 |         computedMatrixQ[3][1] = 8
64 |         
65 |         computedMatrixQ[1][2] = 3
66 |         computedMatrixQ[2][2] = 6
67 |         computedMatrixQ[3][2] = 7
68 | 
69 |         computedMatrix = [computedMatrixD, computedMatrixP, computedMatrixQ]
70 |         # print "test: ", computedMatrix
71 |         # check if the values computed by Gotoh are correct
72 |         gotoh = Gotoh(a, b, "weightFunctionDifference", "gapCost")
73 |         gotoh.compute_matrix()
74 |         # print gotoh.computationMatrix
75 |         self.assertEqual(computedMatrix, gotoh.computationMatrix)
76 |         
77 |     def test_traceback(self):
78 |         """Test method to test the correct computation of the traceback."""
79 |         #test case with a single traceback
80 |         a = "AGC"
81 |         b = "AC"
82 |         gotoh = Gotoh(a, b, "weightFunctionDifference", "gapCost")
83 |         computedAlignment = [["AGC", "A-C"]]
84 |         gotoh.compute_matrix()
85 |         gotoh.traceback()
86 |         self.assertEqual(computedAlignment, gotoh.computedAlignment)
87 | 
88 |         # test case with a multiple traceback
89 |         a = "CC"
90 |         b = "ACCT"
91 |         gotoh2 = Gotoh(a, b, "weightFunctionDifference", "gapCost")
92 |         gotoh2.compute_matrix()
93 |         computedAlignment = [["--CC","ACCT"], ["CC--","ACCT"]]
94 |         gotoh2.traceback()
95 |         self.assertEqual(computedAlignment, gotoh2.computedAlignment)
96 | 
97 | if __name__ == "__main__":
98 |     unittest.main() # run all tests     
99 | 


--------------------------------------------------------------------------------
/source/lib/pairwise/test/needlemanWunschTest.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/python
 2 | # Copyright 2014 Joachim Wolff
 3 | # Programming Course: Algorithms in Bioinformatics
 4 | # Tutors: Robert Kleinkauf, Omer Alkhnbashi
 5 | # Winter semester 2014/2015
 6 | #
 7 | # Chair of Bioinformatics
 8 | # Department of Computer Science
 9 | # Faculty of Engineering
10 | # Albert-Ludwig-University Freiburg im Breisgau
11 | 
12 | # Test class for the Needleman-Wunsch algorithm
13 | # All test cases are written with PyUnit: http://pyunit.sourceforge.net/
14 | 
15 | import unittest
16 | import os, sys
17 | lib_path = os.path.abspath('../../')
18 | sys.path.append(lib_path)
19 | 
20 | from pairwise import NeedlemanWunsch as nw
21 | from helper import PairwiseAlignmentHelper as pah
22 | 
23 | class NeedlemanWunschTestClass(unittest.TestCase):
24 |     """Class to test the correctness of the computation for the class NeedlemanWunsch."""
25 |     def test_computeMatrix(self):
26 |         """Test of the computation of the matrix."""
27 |         a = "AGC"
28 |         b = "AC"
29 |         computedMatrix = [[0 for i in range(len(b)+1) ] for j in range(len(a)+1)]
30 |        
31 |         # initalize matrix
32 |         for i in range(1, len(a)+1):
33 |             computedMatrix[i][0] = computedMatrix[i-1][0] + pah().weightFunctionDifference("", a[i-1])
34 |         for i in range(1, len(b)+1):
35 |             computedMatrix[0][i] = computedMatrix[0][i-1] + pah().weightFunctionDifference("", b[i-1])
36 |        
37 |         # define values that should be computed by Needleman-Wunsch algorithm
38 |         computedMatrix[1][1] = 0
39 |         computedMatrix[2][1] = 1
40 |         computedMatrix[3][1] = 2
41 |         
42 |         computedMatrix[1][2] = 1
43 |         computedMatrix[2][2] = 1
44 |         computedMatrix[3][2] = 1
45 | 
46 |         # check if the values computed by Needleman-Wunsch are correct
47 |         self.assertEqual(computedMatrix, nw().compute_matrix(a, b, pah().weightFunctionDifference))
48 |         
49 |     def test_traceback(self):
50 |         """Test of the traceback computation."""
51 |         # test case with a single traceback
52 |         a = "AGC"
53 |         b = "AC"
54 |         computedAlignment = [["AGC", "A-C"]]
55 |         computedMatrix = nw().compute_matrix(a, b, pah().weightFunctionDifference)
56 |         self.assertEqual(computedAlignment, 
57 |                 nw().traceback(a, b, computedMatrix,pah().weightFunctionDifference))
58 | 
59 |         # test case with a multiple traceback
60 |         a = "AT"
61 |         b = "AAGT"
62 |         computedMatrix = nw().compute_matrix(a, b, pah().weightFunctionDifference)
63 |         computedAlignment = [["A--T","AAGT"], ["-A-T","AAGT"]]
64 |         self.assertEqual(computedAlignment, 
65 |                 nw().traceback(a, b, computedMatrix,pah().weightFunctionDifference))
66 | 
67 | if __name__ == "__main__":
68 |     unittest.main() # run all tests     
69 | 


--------------------------------------------------------------------------------
/source/lib/structurePrediction/__init__.py:
--------------------------------------------------------------------------------
1 | from nussinov import Nussinov


--------------------------------------------------------------------------------
/source/lib/structurePrediction/nussinov.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/python
  2 | # Copyright 2015 Joachim Wolff
  3 | # Programming Course: Algorithms in Bioinformatics
  4 | # Tutors: Robert Kleinkauf, Omer Alkhnbashi
  5 | # Winter semester 2014/2015
  6 | #
  7 | # Chair of Bioinformatics
  8 | # Department of Computer Science
  9 | # Faculty of Engineering
 10 | # Albert-Ludwig-University Freiburg im Breisgau
 11 | #
 12 | # Nussinov algorithm
 13 | 
 14 | class Nussinov():
 15 |     """The algorithm of Nussinov is a RNA secondary structure folding algorithm. It was developed by Ruth Nussinov et al.
 16 |     and was published in 1978:
 17 |             Nussinov, Ruth, et al. "Algorithms for loop matchings."
 18 |             SIAM Journal on Applied mathematics 35.1 (1978): 68-82.
 19 |             http://rci.rutgers.edu/~piecze/GriggsNussinovKleitmanPieczenik.pdf
 20 |         """
 21 |     def __init__(self, rnaSequence):
 22 |         """rnaSequence: The RNA sequence for which the folding should be computed."""
 23 |         self.sequence = rnaSequence
 24 |         self.pairedBases = {}
 25 |         self.computationMatrix = [[]]
 26 | 
 27 |     def computeMatrix(self):
 28 |         """This function computes the matrix which the Nussinov-algorithm is based on."""
 29 |         self.computationMatrix = [[0 for i in range(len(self.sequence)+1) ] for j in range(len(self.sequence))]
 30 |         i = 2
 31 |         while i <= len(self.sequence):
 32 |             k = i
 33 |             j = 0
 34 |             while j <= (len(self.sequence)-2) and k <= (len(self.sequence)):
 35 |                 self.computeMatrixCell(j, k)
 36 |                 j += 1
 37 |                 k += 1
 38 |             i += 1
 39 | 
 40 |     def computeMatrixCell(self, i, j):
 41 |         """This function computes the value for every cell of the matrix for the Nussinov-algorithm.
 42 |             i:  First index of cell of the Nussinov-matrix
 43 |             j:  Second index of cell of the Nussinov-matrix
 44 |         Every cell is the maximum of:
 45 |                             |       N_(i, j-1)
 46 |             N_(i,j) = max   |max i <= k < j N_(i, k-1) + N_(k+1, j-1) + 1
 47 |                             |       S_k and S_j are complementary
 48 |         """
 49 |         self.computationMatrix[i][j-1]
 50 |         maximumValue = [0,0,0]
 51 |         k = i
 52 |         while i <= k and k < j:
 53 |             if self.complementary(self.sequence[k], self.sequence[j-1]):
 54 |                 pairingValue = self.computationMatrix[i][k-1] + self.computationMatrix[k+1][j-1] + 1
 55 |                 if maximumValue[2] < pairingValue:
 56 |                     maximumValue[0] = k
 57 |                     maximumValue[1] = j
 58 |                     maximumValue[2] = pairingValue
 59 |             k += 1
 60 |         self.computationMatrix[i][j] = max(self.computationMatrix[i][j-1], maximumValue[2])
 61 | 
 62 |     def complementary(self, characterA, characterB):
 63 |         """Returns True if two RNA nucleotides are complementary, False otherwise.
 64 |         Nucleotides are complemetary if there are "A" and "U" or "C" and "G".
 65 |             characterA: First nucleotide
 66 |             characterB: Second nucleotide"""
 67 |         if characterA == "A" and characterB == "U":
 68 |             return True
 69 |         elif characterA == "U" and characterB == "A":
 70 |             return True
 71 |         elif characterA == "C" and characterB == "G":
 72 |             return True
 73 |         elif characterA == "G" and characterB == "C":
 74 |             return True
 75 |         return False
 76 | 
 77 |     def traceback(self, i, j):
 78 |         """Computes the traceback for the Nussinov-algorithm.
 79 |             i:  First index of cell of the Nussinov-matrix
 80 |             j:  Second index of cell of the Nussinov-matrix
 81 |             """
 82 |         if j <= i:
 83 |             return
 84 |         elif self.computationMatrix[i][j] == self.computationMatrix[i][j-1]:
 85 |             self.traceback(i, j-1)
 86 |             return
 87 |         else:
 88 |             k = i
 89 |             while i <= k and k < j:
 90 |                 if self.complementary(self.sequence[k-1], self.sequence[j-1]):
 91 | 
 92 |                     if self.computationMatrix[i][j] == self.computationMatrix[i][k-1] + self.computationMatrix[k][j-1] + 1:
 93 |                         self.pairedBases[k] = j
 94 |                         self.traceback(i, k-1)
 95 |                         self.traceback(k, j -1)
 96 |                         return
 97 |                 k += 1
 98 | 
 99 |     def execute(self):
100 |         """To compute the Nussinov-algorithm execute this method. It returns a dictionary with the paired bases."""
101 |         self.computeMatrix()
102 |         self.traceback(0, len(self.sequence))
103 |         print self.pairedBases
104 |         print len(self.pairedBases)
105 |         return self.pairedBases


--------------------------------------------------------------------------------
/source/lib/structurePrediction/test/nussinovTest.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/python
 2 | # Copyright 2014 Joachim Wolff
 3 | # Programming Course: Algorithms in Bioinformatics
 4 | # Tutors: Robert Kleinkauf, Omer Alkhnbashi
 5 | # Winter semester 2014/2015
 6 | #
 7 | # Chair of Bioinformatics
 8 | # Department of Computer Science
 9 | # Faculty of Engineering
10 | # Albert-Ludwig-University Freiburg im Breisgau
11 | #
12 | # Gotoh test class
13 | import unittest
14 | import os, sys
15 | lib_path = os.path.abspath('../../')
16 | sys.path.append(lib_path)
17 | 
18 | from structurePrediction import Nussinov
19 | 
20 | class NussinovTestClass(unittest.TestCase):
21 | 	def test_computeMatrix(self):
22 | 		# example for the slides of Prof. Backofen
23 | 		expectedMatrix = [[0,0,1,1,1,2,2,2,3], [0,0,0,0,0,1,1,1,2], [0,0,0,0,0,1,1,1,2], [0,0,0,0,0,1,1,1,2], [0,0,0,0,0,0,0,1,1], [0,0,0,0,0,0,0,0,1], [0,0,0,0,0,0,0,0,1],[0,0,0,0,0,0,0,0,0]]
24 | 		rnaSequence = "GCACGACG"
25 | 		nussinov = Nussinov(rnaSequence)
26 | 		nussinov.compute_matrix()
27 | 		self.assertEqual(expectedMatrix, nussinov.computationMatrix)
28 | 
29 | 	def test_traceback(self):
30 | 		# example for the slides of Prof. Backofen
31 | 		expectedMatrix = {1:2, 4:8, 5:7}
32 | 		rnaSequence = "GCACGACG"
33 | 		nussinov = Nussinov(rnaSequence)
34 | 		nussinov.compute_matrix()
35 | 		nussinov.traceback(0, len(rnaSequence))
36 | 		self.assertEqual(expectedMatrix, nussinov.pairedBases)
37 | 
38 | if __name__ == "__main__":
39 |     unittest.main() # run all tests     


--------------------------------------------------------------------------------
/source/sequences:
--------------------------------------------------------------------------------
1 | >sequence 0
2 | UUUGGUCCUCGGUAGUGGUUUCCGGAAAACGAUUUUCCGUGAACUUCGAUCGAAGAUCCAU
3 | 


--------------------------------------------------------------------------------