├── README.md └── main.py /README.md: -------------------------------------------------------------------------------- 1 | # algorithm 2 | This is a project of algorithm 3 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | 2 | ReadsList = [] 3 | 4 | ReadsLen = 100 5 | #ReadsLen = 8 6 | 7 | Graph = {} 8 | 9 | KLEN = 19 10 | #KLEN = 5 11 | 12 | STARTKEY = [] 13 | CHECKKEY = [] 14 | 15 | TIPSLENLIMIT = ReadsLen - KLEN 16 | 17 | CONTIGS = [] 18 | 19 | DATA4CNT = 0 20 | 21 | class diyError(Exception): 22 | def __init__(self, errorinfo): 23 | super().__init__(self) 24 | self.errorinfo = errorinfo 25 | 26 | class KMERNODE(): 27 | def __init__(self): 28 | self.outDegree = 0 29 | self.inDegree = 0 30 | self.coverage = 1 31 | self.next = {} # elements are dictionary consisting of next node's key as key and weights of this edge as value 32 | self.last = {} # same structure as next 33 | ## store which contig it belongs to, identical to the index of start nodes, which is able to reach this node, in STAETKEY 34 | #self.configNum = [] 35 | 36 | def addOutEdge(self, nextKey, edgeWeight): 37 | if nextKey in self.next.keys(): 38 | self.next[nextKey] += edgeWeight 39 | return 40 | self.next[nextKey] = edgeWeight 41 | self.outDegree += 1 42 | 43 | def addInEdge(self, lastKey, edgeWeight): 44 | if lastKey in self.last.keys(): 45 | self.last[lastKey] += edgeWeight 46 | return 47 | self.last[lastKey] = edgeWeight 48 | self.inDegree += 1 49 | 50 | def delOutEdge(self, nextKey): 51 | if nextKey in self.next.keys(): 52 | del(self.next[nextKey]) 53 | self.outDegree -= 1 54 | return 0 55 | else: 56 | return -1 57 | 58 | def delInEdge(self, lastKey): 59 | if lastKey in self.last.keys(): 60 | del(self.last[lastKey]) 61 | self.inDegree -= 1 62 | return 0 63 | else: 64 | return -1 65 | 66 | def updateEdge(self, oldKey, newKey, flag): 67 | """ 68 | flag 0 refers to update oldKey in last nodes 69 | flag 1 refers to update oldKey in next nodes 70 | """ 71 | if flag == 0 and oldKey in self.last.keys(): 72 | weight = self.last.pop(oldKey) 73 | self.last[newKey] = weight 74 | return 0 75 | elif flag == 1 and oldKey in self.next.keys(): 76 | weight = self.next.pop(oldKey) 77 | self.next[newKey] = weight 78 | return 0 79 | else: 80 | return -1 81 | 82 | def complement(originStr): 83 | tmp = list(originStr) 84 | for i in range(len(tmp)): 85 | if tmp[i] == 'A': 86 | tmp[i] = 'T' 87 | elif tmp[i] == 'T': 88 | tmp[i] = 'A' 89 | elif tmp[i] == 'C': 90 | tmp[i] = 'G' 91 | elif tmp[i] == 'G': 92 | tmp[i] = 'C' 93 | return ''.join(tmp) 94 | 95 | def reverse(originStr): 96 | originStr = originStr[-1::-1] 97 | return originStr 98 | 99 | def readsOrientation(reads): 100 | global Graph 101 | 102 | hitTimes = [0] * len(reads) 103 | for i in range(len(reads)): 104 | for k in range(ReadsLen - KLEN + 1): 105 | if reads[i][k : k + KLEN] in Graph.keys(): 106 | hitTimes[i] += 1 107 | return hitTimes.index(max(hitTimes)) 108 | 109 | def getNode(key): 110 | global Graph 111 | 112 | if key in Graph.keys(): 113 | Graph[key].coverage += 1 114 | return Graph[key] 115 | else: 116 | Graph[key] = KMERNODE() 117 | return Graph[key] 118 | 119 | def loadData(fastaFile): 120 | with open(fastaFile, 'r') as f: 121 | cnt = 1 122 | for line in f: 123 | if cnt % 2 == 0: 124 | ReadsList.append(line.split('\n')[0]) 125 | cnt += 1 126 | 127 | def saveData(fastaFile): 128 | global CONTIGS 129 | 130 | f = open(fastaFile, 'w') 131 | cnt = 1 132 | N = len(CONTIGS) 133 | print ("Total contigs: ", N) 134 | for i in range(N): 135 | if len(CONTIGS[i]) < 100: 136 | continue 137 | f.write(">contig_" + str(cnt) + "/" + str(N) + "\n") 138 | f.write(str(CONTIGS[i]) + '\n') 139 | cnt += 1 140 | f.close() 141 | 142 | def constructGraph(): 143 | global Graph, KLEN, ReadsList, ReadsLen 144 | 145 | N = len(ReadsList) 146 | for i in range(len(ReadsList)): 147 | reads = ['reads'] * 4 148 | reads[0] = ReadsList[i] 149 | reads[1] = complement(reads[0]) 150 | reads[2] = reverse(reads[0]) 151 | reads[3] = complement(reads[2]) 152 | 153 | # choose one orientation which has the biggest hit times and construct nodes and edges 154 | for j in range(4): 155 | read = reads[j] 156 | for k in range(ReadsLen - KLEN + 1): 157 | kmer = read[k : k + KLEN] 158 | node = getNode(kmer) 159 | if k != 0: 160 | lastKmer = read[k-1 : k-1 + KLEN] 161 | lastNode = Graph[lastKmer] 162 | lastNode.addOutEdge(kmer, 1) 163 | node.addInEdge(lastKmer, 1) 164 | print ("Constructing graph. ", i, "/", N, " complete") 165 | 166 | del(ReadsList) 167 | 168 | def reduceLowCoverage(threshold): 169 | global Graph 170 | 171 | for key in Graph: 172 | if Graph[key].coverage <= threshold: 173 | for k in Graph[key].last: 174 | Graph[k].delOutEdge(key) 175 | for k in Graph[key].next: 176 | Graph[k].delInEdge(key) 177 | Graph[key] = 0 178 | 179 | def saveGraph(graphFile): 180 | global Graph 181 | with open(graphFile, 'w') as f: 182 | for key in Graph: 183 | if Graph[key] == 0: 184 | continue 185 | node = Graph[key] 186 | f.write(str(key) + ' ' + str(node.coverage) + '\n') 187 | for k in node.last: 188 | f.write(str(k) + ',' + str(node.last[k]) + ' ') 189 | f.write('\n') 190 | for k in node.next: 191 | f.write(str(k) + ',' + str(node.next[k]) + ' ') 192 | f.write('\n') 193 | 194 | def loadGraph(graphFile): 195 | global Graph 196 | 197 | with open(graphFile, 'r') as f: 198 | cnt = 1 199 | for line in f: 200 | if cnt % 3 == 1: 201 | line = line.split('\n')[0].split(' ') 202 | nodeKey = line[0] 203 | Graph[nodeKey] = KMERNODE() 204 | Graph[nodeKey].coverage = int(line[1]) 205 | elif cnt % 3 == 2: 206 | line = line.split(' ') 207 | for i in range(len(line)-1): 208 | Graph[nodeKey].addInEdge(line[i].split(',')[0], int(line[i].split(',')[1])) 209 | elif cnt % 3 == 0: 210 | line = line.split(' ') 211 | for i in range(len(line)-1): 212 | Graph[nodeKey].addOutEdge(line[i].split(',')[0], int(line[i].split(',')[1])) 213 | cnt += 1 214 | 215 | def currentTerminal(flag): 216 | """ 217 | flag indicates which kind of terminals to find 218 | 1 for start point(inDegree is 0) 219 | 0 for end point(outDegree is 0) 220 | """ 221 | keyList = [] 222 | 223 | if flag == 1: 224 | for key in Graph: 225 | if Graph[key] == 0: 226 | continue 227 | if Graph[key].inDegree == 0: 228 | keyList.append(key) 229 | else: 230 | for key in Graph: 231 | if Graph[key] == 0: 232 | continue 233 | if Graph[key].outDegree == 0: 234 | keyList.append(key) 235 | return keyList 236 | 237 | def mergeNext(thisKey, nextKey): 238 | global CHECKKEY 239 | 240 | thisNode = Graph[thisKey] 241 | nextNode = Graph[nextKey] 242 | 243 | # judge whether be able to merge or not 244 | if nextKey not in thisNode.next: ##or thisNode.outDegree > 1: 245 | return -1 246 | 247 | # merge KMERNODE 248 | thisNewKey = thisKey + nextKey[KLEN-1 : ] 249 | thisNewNode = Graph.pop(thisKey) 250 | Graph[thisNewKey] = thisNewNode 251 | # update nodes which points to thisNewNode 252 | for key in thisNewNode.last: 253 | lastNode = Graph[key] 254 | lastNode.updateEdge(thisKey, thisNewKey, 1) 255 | 256 | # update thisnewnode's out edges and coverage information, and nextnode's in edges information 257 | thisNewNode.delOutEdge(nextKey) 258 | nextNode.delInEdge(thisKey) 259 | for key in nextNode.next: 260 | thisNewNode.addOutEdge(key, nextNode.next[key]) 261 | Graph[key].updateEdge(nextKey, thisNewKey, 0) 262 | #thisNode.coverage = min(thisNode.coverage, nextNode.coverage) 263 | 264 | # check whether this key is in the CHECKKEY list, if it is, change the key value in CHECKKEY list 265 | if thisKey in CHECKKEY: 266 | idx = CHECKKEY.index(thisKey) 267 | CHECKKEY[idx] = thisNewKey 268 | if thisKey in STARTKEY: 269 | idx = STARTKEY.index(thisKey) 270 | STARTKEY[idx] = thisNewKey 271 | 272 | # if nextnode's indegree is 0, delete it in the Graph 273 | #if nextNode.inDegree == 0: 274 | # del(Graph[nextKey]) 275 | return thisNewKey 276 | 277 | def solveCircle(startKey, nextKey): 278 | """ 279 | dissolve circles 280 | merge all nodes appearing in circles as one node to dissolve it 281 | """ 282 | global Graph 283 | 284 | # step1: merge all nodes to node B except startNode A 285 | # step2: merge B with A --> BA, reupdate all in edges of out edges of BA, and delete all out edges of BA 286 | # step3: merge A with BA 287 | 288 | # step1 289 | bPtrKey = nextKey 290 | fPtrKey = list(Graph[bPtrKey].next.keys())[0] 291 | while(fPtrKey != startKey): 292 | bPtrKey = mergeNext(bPtrKey, fPtrKey) 293 | fPtrKey = list(Graph[bPtrKey].next.keys())[0] 294 | 295 | # step2 296 | bPtrKey = mergeNext(bPtrKey, fPtrKey) 297 | bPtrNode = Graph[bPtrKey] 298 | nextKeysList = list(bPtrNode.next.keys()) 299 | for k in nextKeysList: 300 | Graph[k].updateEdge(bPtrKey, startKey, 0) 301 | bPtrNode.delOutEdge(k) 302 | 303 | # step3 304 | circleKey = mergeNext(startKey, bPtrKey) 305 | 306 | if Graph[circleKey].outDegree == 1 and Graph[circleKey].inDegree == 1: 307 | return 0 308 | else: 309 | raise diyError('SolveCircle Error') 310 | 311 | print ("call solveCircle.") 312 | 313 | def solveBubble(startKey, endKey): 314 | """ 315 | dissolve bubbles 316 | delete the branch with lower edge weight 317 | """ 318 | global Graph 319 | 320 | startNode = Graph[startKey] 321 | lightestEdgeKey = min(startNode.next.items(), key=lambda x: x[1])[0] 322 | 323 | # tour along lightestEdgeKey and delete this branch 324 | delKey = lightestEdgeKey 325 | startNode.delOutEdge(delKey) 326 | Graph[delKey].delInEdge(startKey) 327 | 328 | bKey = startKey 329 | while(delKey != endKey): 330 | delNode = Graph[delKey] 331 | bKey = delKey 332 | delKey = list(delNode.next.keys())[0] 333 | 334 | Graph[bKey].delOutEdge(delKey) 335 | Graph[delKey].delInEdge(bKey) 336 | print ("call solveBubble.") 337 | 338 | def solveTips(juncKey, tipsKey): 339 | """ 340 | delete tips branch 341 | """ 342 | global Graph 343 | 344 | # tour along tipsKey and delete tips branch 345 | """delKey = juncKey 346 | nextKey = tipsKey 347 | while(Graph[delKey].outDegree != 0): 348 | delNode = Graph[delKey] 349 | nextNode = Graph[nextKey] 350 | delNode.delOutEdge(nextKey) 351 | nextNode.delInEdge(delKey) 352 | 353 | #if delNode.inDegree == 0 and delNode.outDegree == 0: 354 | # del (Graph[delKey]) 355 | delKey = nextKey 356 | if nextNode.outDegree: 357 | nextKey = list(nextNode.next.keys())[0] 358 | 359 | delNode = Graph[delKey] 360 | #if delNode.inDegree == 0 and delNode.outDegree == 0: 361 | # del Graph[delKey]""" 362 | 363 | juncNode = Graph[juncKey] 364 | tipsNode = Graph[tipsKey] 365 | juncNode.delOutEdge(tipsKey) 366 | tipsNode.delInEdge(juncKey) 367 | 368 | print ("call solveTips.") 369 | 370 | def solveCross(crossKey): 371 | """ 372 | match branches with weights of edges 373 | merge to dispose of cross nodes 374 | """ 375 | global Graph, CROSSCALL 376 | 377 | #if CROSSCALL == 584: 378 | # print ("here comes!") 379 | 380 | # merge cross nodes with every nodes pointing to it, delete unmatched next nodes(match by weights of edges) 381 | """crossNode = Graph[crossKey] 382 | lastKeysList = list(crossNode.last.keys()) 383 | for key in lastKeysList: 384 | branchWeight = crossNode.last[key] 385 | 386 | print ("cross solve: ", crossKey, " ", key) 387 | 388 | key = mergeNext(key, crossKey) 389 | branchNode = Graph[key] 390 | branchNextKeys = list(branchNode.next.keys()) 391 | for out in branchNextKeys: 392 | if branchNode.next[out] != branchWeight: 393 | branchNode.delOutEdge(out) 394 | Graph[out].updateEdge(key, crossKey, 0) # not delete out edges' in edge 395 | else: 396 | crossNode.delOutEdge(out)""" 397 | crossNode = Graph[crossKey] 398 | crossLastKeys = list(crossNode.last.keys()) 399 | crossNextKeys = list(crossNode.next.keys()) 400 | lastWeights = [] 401 | nextWeights = [] 402 | 403 | for key in crossLastKeys: 404 | lastWeights.append(crossNode.last[key]) 405 | 406 | for key in crossNextKeys: 407 | nextWeights.append(crossNode.next[key]) 408 | 409 | for key in crossLastKeys: 410 | newKey = mergeNext(key, crossKey) 411 | crossLastKeys[crossLastKeys.index(key)] = newKey 412 | for k in crossNextKeys: 413 | Graph[k].updateEdge(newKey, crossKey, 0) 414 | 415 | branchNum = min(len(crossLastKeys), len(crossNextKeys)) 416 | for i in range(branchNum): 417 | key = crossLastKeys[i] 418 | 419 | weightsDist = [] 420 | for j in range(branchNum): 421 | weightsDist.append(abs(lastWeights[i] - nextWeights[j])) 422 | minDistIdx = weightsDist.index(min(weightsDist)) 423 | nextWeights[minDistIdx] = -10000 424 | 425 | matchKey = crossNextKeys[minDistIdx] 426 | Graph[matchKey].updateEdge(crossKey, key, 0) 427 | 428 | for k in crossNextKeys: 429 | if k != matchKey: 430 | Graph[key].delOutEdge(k) 431 | 432 | print ("call solveCross") 433 | #CROSSCALL += 1 434 | 435 | def junctionNodes_back(key): 436 | global Graph, CHECKKEY 437 | 438 | node = Graph[key] 439 | # in degree > 1: cross, circle 440 | # out degree > 1: tips, multi contigs and bubbles 441 | if node.inDegree > 1: 442 | # By merging all nodes in route, put all probable branches under out degree > 1 situation 443 | while(node.outDegree == 1): 444 | key = mergeNext(key, list(node.next.keys())[0]) 445 | if key == -1: 446 | raise diyError("Merge error!") 447 | node = Graph[key] 448 | 449 | if node.outDegree > 1: 450 | # tour each branch 451 | routeCnt = [] # store the length of each branch route 452 | branchKey = [] 453 | bubbleEndKey = 0 454 | nodeNextList = list(node.next.keys()) 455 | for juncNextKey in nodeNextList: 456 | branchKey.append(juncNextKey) 457 | juncNextNode = Graph[juncNextKey] 458 | cnt = 1 459 | tourKey = juncNextKey 460 | tourNode = juncNextNode 461 | while(tourNode.outDegree == 1 and tourNode.inDegree == 1): 462 | tourKey = list(tourNode.next.keys())[0] 463 | tourNode = Graph[tourKey] 464 | cnt += 1 465 | routeCnt.append(cnt) 466 | 467 | # terminate condition: indegree != 1, only two circumstances: circle and bubble 468 | if tourNode.inDegree > 1: 469 | # circle 470 | if tourNode == node: 471 | solveCircle(key, juncNextKey) 472 | routeCnt[-1] = -1 # mark routeCnt as -1, indicating that this branch has been solved 473 | return 474 | # bubble 475 | else: 476 | if not bubbleEndKey: 477 | bubbleEndKey = tourKey 478 | elif bubbleEndKey == tourKey: 479 | solveBubble(key, tourKey) 480 | routeCnt[-1] = -1 481 | return 482 | 483 | shortestLen = min(routeCnt) 484 | if shortestLen > 0: 485 | # terminate condition: outdegree == 0, possible circumstances: cross point, tips, multi contigs junction nodes 486 | # cross point 487 | if node.inDegree > 1 and node.inDegree == node.outDegree: 488 | solveCross(key) 489 | return 490 | # tips 491 | elif shortestLen < TIPSLENLIMIT: 492 | tipsKey = branchKey[routeCnt.index(shortestLen)] 493 | solveTips(key, tipsKey) 494 | return 495 | # multi contigs 496 | else: 497 | for k in node.next: 498 | CHECKKEY.append(k) 499 | return 500 | 501 | def junctionNodes(key): 502 | """ 503 | only solve cross nodes and circle 504 | """ 505 | global Graph, CHECKKEY 506 | 507 | node = Graph[key] 508 | # in degree > 1: cross, circle or bubble end 509 | # only solve circle and cross 510 | if node.inDegree > 1: 511 | # By merging all nodes in route, put all probable branches under out degree > 1 situation 512 | while(node.outDegree == 1): 513 | key = mergeNext(key, list(node.next.keys())[0]) 514 | if key == -1: 515 | raise diyError("Merge error!") 516 | node = Graph[key] 517 | 518 | if node.outDegree <= 1: 519 | return 520 | 521 | # tour each branch 522 | nodeNextList = list(node.next.keys()) 523 | for juncNextKey in nodeNextList: 524 | juncNextNode = Graph[juncNextKey] 525 | tourKey = juncNextKey 526 | tourNode = juncNextNode 527 | while(tourNode.outDegree == 1 and tourNode.inDegree == 1): 528 | tourKey = list(tourNode.next.keys())[0] 529 | tourNode = Graph[tourKey] 530 | 531 | # terminate condition: indegree != 1, circle 532 | if tourNode.inDegree > 1: 533 | # circle 534 | if tourNode == node: 535 | solveCircle(key, juncNextKey) 536 | return 537 | 538 | # already tour each branch and no circle nodes, cross 539 | solveCross(key) 540 | return 541 | if node.outDegree > 1: 542 | return 543 | 544 | def fixGraph(): 545 | global Graph, STARTKEY, CHECKKEY 546 | 547 | STARTKEY = currentTerminal(1) 548 | 549 | CHECKKEY = STARTKEY[:] 550 | 551 | return 552 | checkPtNum = len(CHECKKEY) 553 | print ("origin checknum: ", checkPtNum) 554 | presentNum = 0 555 | 556 | 557 | while(presentNum != checkPtNum): 558 | for i in range(presentNum, checkPtNum): 559 | bPtrKey = -1 560 | fPtrKey = CHECKKEY[i] 561 | fPtrNode = Graph[fPtrKey] 562 | while(fPtrNode.outDegree != 0): 563 | cnt = 0 564 | if fPtrNode.outDegree > 1 or fPtrNode.inDegree > 1: 565 | #junctionNodes_back(fPtrKey) 566 | junctionNodes(fPtrKey) 567 | if fPtrKey in Graph: 568 | if Graph[fPtrKey].outDegree > 1: 569 | break 570 | bPtrKey = -1 571 | try: 572 | fPtrKey = CHECKKEY[i] 573 | fPtrNode = Graph[fPtrKey] 574 | except KeyError: 575 | print ("-----------------------start point {:}, ------------------------") 576 | else: 577 | bPtrKey = fPtrKey 578 | fPtrKey = list(fPtrNode.next.keys())[0] 579 | #if fPtrKey == 'CCATTCGCATAGCGGGAGC': 580 | # print ("error here") 581 | fPtrNode = Graph[fPtrKey] 582 | presentNum += 1 583 | checkPtNum = len(CHECKKEY) 584 | print ("update checkptnum: ", checkPtNum) 585 | 586 | def tour(key, contig): 587 | global Graph, KLEN, CONTIGS, DATA4CNT 588 | 589 | #if DATA4CNT >= 10: 590 | # return 591 | 592 | stack = [] 593 | visited = {} 594 | stack.append((key, contig)) 595 | # tour until outdegree > 1: push into stack 596 | # outdegree == 0: add an contig, pop a stack 597 | while(len(stack) != 0): 598 | key, contig = stack.pop() 599 | if key in visited: 600 | continue 601 | visited[key] = True 602 | node = Graph[key] 603 | if node.outDegree == 1: 604 | contig += key[KLEN-1 : ] 605 | key = list(node.next.keys())[0] 606 | stack.append((key, contig)) 607 | elif node.outDegree == 0: 608 | contig += key[KLEN-1 : ] 609 | #CONTIGS.append(contig) 610 | print (len(contig)) 611 | if len(contig) < 100000 and len(contig) > 5000: 612 | fw = open('./contig/data4/version3.0Contig_1_10klen.fasta', 'a') 613 | fw.write(">contig\n") 614 | fw.write(contig + '\n') 615 | fw.close() 616 | else: 617 | contig += key[KLEN-1 : ] 618 | for k in node.next: 619 | stack.append((k, contig)) 620 | return 621 | 622 | def getContigs(): 623 | global STARTKEY, Graph, CONTIGS 624 | for i in range(len(STARTKEY)): 625 | key = STARTKEY[i] 626 | contig = key[: KLEN-1] 627 | tour(key, contig) 628 | 629 | def data4GetContigs(): 630 | global STARTKEY, Graph, CONTIGS 631 | for i in range(len(STARTKEY)): 632 | key = STARTKEY[i] 633 | contig = key[:KLEN-1] 634 | DFSonce(key, contig) 635 | 636 | def DFSonce(key, contig): 637 | global Graph, CONTIGS 638 | 639 | while(Graph[key].outDegree != 0 and len(contig) < 99999): 640 | contig += key[KLEN-1: ] 641 | if Graph[key].outDegree == 1: 642 | key = list(Graph[key].next.keys())[0] 643 | else: 644 | keyList = list(Graph[key].next.keys()) 645 | weightList = [0] * Graph[key].outDegree 646 | for i in range(len(keyList)): 647 | weightList[i] = Graph[key].next[keyList[i]] 648 | maxWidx = weightList.index(max(weightList)) 649 | key = keyList[maxWidx] 650 | contig += key[KLEN-1: ] 651 | if len(contig) > 50000 and len(contig) < 100000: 652 | CONTIGS.append(contig) 653 | if len(contig) >= 100000: 654 | CONTIGS.append(contig[:99999]) 655 | if len(contig) - 99999 > 50000: 656 | CONTIGS.append(contig[9999:]) 657 | print (len(contig)) 658 | 659 | if __name__ == '__main__': 660 | #loadData('./data/data4/short_1.fasta') 661 | #loadData('./data/data4/short_2.fasta') 662 | #loadData('./testMultiContig_reads.fasta') 663 | 664 | #constructGraph() 665 | #reduceLowCoverage(1) 666 | #saveGraph('./graph/data4/graphData_version3.0_19klen_data2.txt') 667 | loadGraph('./graph/data4/graphData_version3.0_19klen_data4.txt') 668 | print (len(Graph)) 669 | fixGraph() 670 | #getContigs() 671 | data4GetContigs() 672 | 673 | #saveData('./testMultiContig_contig.fasta') 674 | saveData('./contig/data4/version3.0Contig_1_19klen.fasta') 675 | --------------------------------------------------------------------------------