├── .gitignore ├── test ├── final_g_clusters_to_obtain.txt ├── clusters.list ├── test.input └── two_clusters.fasta ├── makefile ├── License.txt ├── main.cpp ├── findArticulationPoints.hpp ├── scripts ├── paf_to_CARNAC.py ├── CARNAC_to_fasta.cpp └── CARNAC_to_fasta.py ├── README.md ├── clustering_cliqueness.hpp ├── findArticulationPoints.cpp └── clustering_cliqueness.cpp /.gitignore: -------------------------------------------------------------------------------- 1 | CARNAC-LR 2 | *.o 3 | -------------------------------------------------------------------------------- /test/final_g_clusters_to_obtain.txt: -------------------------------------------------------------------------------- 1 | 0 1 5 6 11 15 16 17 19 2 | 2 3 4 7 8 9 10 12 13 14 18 3 | -------------------------------------------------------------------------------- /test/clusters.list: -------------------------------------------------------------------------------- 1 | ch482_read5787 ch123_read6165 ch497_read5679 ch488_read2761 ch182_read5053 ch58_read5008 ch206_read1352 ch114_read985 ch487_read8370 ch428_read2403 2 | ch249_read4487 ch46_read337 ch52_read6550 ch435_read29295 ch165_read1933 ch162_read1733 ch133_read3235 ch322_read2883 ch384_read10549 ch275_read3171 3 | -------------------------------------------------------------------------------- /makefile: -------------------------------------------------------------------------------- 1 | #CC=/usr/bin/g++ 2 | CC=g++ 3 | #CC=clang++ 4 | CFLAGS= -Wall -O3 -std=c++11 -march=native -pthread -fopenmp 5 | CFLAGS_SIMPLE=-std=c++11 6 | 7 | LDFLAGS= -pthread -fopenmp 8 | 9 | 10 | ifeq ($(gprof),1) 11 | CFLAGS=-std=c++0x -pg -O4 -march=native 12 | LDFLAGS=-pg 13 | endif 14 | 15 | ifeq ($(valgrind),1) 16 | CFLAGS=-std=c++0x -O4 -g 17 | LDFLAGS=-g 18 | endif 19 | 20 | 21 | 22 | EXEC=CARNAC-LR scripts/CARNAC_to_fasta 23 | 24 | all: $(EXEC) 25 | 26 | scripts/CARNAC_to_fasta: scripts/CARNAC_to_fasta.cpp 27 | $(CC) -o $@ -c $^ $(CFLAGS_SIMPLE) 28 | 29 | CARNAC-LR: main.o clustering_cliqueness.o preprocessing.o 30 | $(CC) -o $@ $^ $(LDFLAGS) 31 | 32 | main.o: main.cpp clustering_cliqueness.hpp 33 | $(CC) -o $@ -c $< $(CFLAGS) 34 | 35 | clustering_cliqueness.o: clustering_cliqueness.cpp clustering_cliqueness.hpp findArticulationPoints.hpp 36 | $(CC) -o $@ -c $< $(CFLAGS) 37 | 38 | preprocessing.o: findArticulationPoints.cpp findArticulationPoints.hpp 39 | $(CC) -o $@ -c $< $(CFLAGS) 40 | 41 | 42 | 43 | 44 | clean: 45 | rm -rf *.o 46 | rm -rf $(EXEC) 47 | 48 | 49 | rebuild: clean $(EXEC) 50 | -------------------------------------------------------------------------------- /License.txt: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | * * * * CARNAC: Clustering coefficient-based Acquisition of RNA Communities 3 | * * * * 4 | * * * * Authors: Camille Marchet 5 | * * * * Contact: camille.marchet@irisaa.fr, INRIA/IRISA/GenScale, Campus de Beaulieu, 35042 Rennes Cedex, France 6 | * * * * Source: https://github.com/Kamimrcht/CARNAC 7 | * * * * 8 | * * * * 9 | * * * * This program is free software: you can redistribute it and/or modify 10 | * * * * it under the terms of the GNU Affero General Public License as 11 | * * * * published by the Free Software Foundation, either version 3 of the 12 | * * * * License, or (at your option) any later version. 13 | * * * * 14 | * * * * This program is distributed in the hope that it will be useful, 15 | * * * * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | * * * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | * * * * GNU Affero General Public License for more details. 18 | * * * * 19 | * * * * You should have received a copy of the GNU Affero General Public License 20 | * * * * along with this program. If not, see . 21 | * * * *****************************************************************************/ 22 | -------------------------------------------------------------------------------- /main.cpp: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | * * * * * CARNAC: Clustering coefficient-based Acquisition of RNA Communities 3 | * * * * * 4 | * * * * * Authors: Camille Marchet 5 | * * * * * Contact: camille.marchet@irisaa.fr, INRIA/IRISA/GenScale, Campus de Beaulieu, 35042 Rennes Cedex, France 6 | * * * * * Source: https://github.com/Kamimrcht/CARNAC 7 | * * * * * 8 | * * * * * 9 | * * * * * This program is free software: you can redistribute it and/or modify 10 | * * * * * it under the terms of the GNU Affero General Public License as 11 | * * * * * published by the Free Software Foundation, either version 3 of the 12 | * * * * * License, or (at your option) any later version. 13 | * * * * * 14 | * * * * * This program is distributed in the hope that it will be useful, 15 | * * * * * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | * * * * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | * * * * * GNU Affero General Public License for more details. 18 | * * * * * 19 | * * * * * You should have received a copy of the GNU Affero General Public License 20 | * * * * * along with this program. If not, see . 21 | * * * * * *****************************************************************************/ 22 | 23 | 24 | 25 | 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include 39 | #include 40 | #include 41 | #include 42 | #include 43 | #include 44 | #include 45 | #include 46 | #include 47 | #include 48 | #include "clustering_cliqueness.hpp" 49 | 50 | int main(int argc, char** argv){ 51 | bool cmd(true); 52 | if (argc > 1){ 53 | cmd = execute(argc, argv); 54 | } 55 | printHelpCmd(cmd); 56 | return 0; 57 | } 58 | -------------------------------------------------------------------------------- /findArticulationPoints.hpp: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | * * * * * CARNAC: Clustering coefficient-based Acquisition of RNA Communities 3 | * * * * * 4 | * * * * * Authors: Camille Marchet 5 | * * * * * Contact: camille.marchet@irisaa.fr, INRIA/IRISA/GenScale, Campus de Beaulieu, 35042 Rennes Cedex, France 6 | * * * * * Source: https://github.com/Kamimrcht/CARNAC 7 | * * * * * 8 | * * * * * 9 | * * * * * This program is free software: you can redistribute it and/or modify 10 | * * * * * it under the terms of the GNU Affero General Public License as 11 | * * * * * published by the Free Software Foundation, either version 3 of the 12 | * * * * * License, or (at your option) any later version. 13 | * * * * * 14 | * * * * * This program is distributed in the hope that it will be useful, 15 | * * * * * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | * * * * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | * * * * * GNU Affero General Public License for more details. 18 | * * * * * 19 | * * * * * You should have received a copy of the GNU Affero General Public License 20 | * * * * * along with this program. If not, see . 21 | * * * * * *****************************************************************************/ 22 | 23 | 24 | 25 | #include 26 | #include 27 | 28 | #ifndef PREPROC 29 | #define PREPROC 30 | 31 | 32 | using namespace std; 33 | 34 | // A class that represents an undirected graph of reads 35 | class Graph 36 | { 37 | 38 | public: 39 | uint nbNodes; 40 | vector *edges; 41 | //~ void APUtil(uint v, bool visited[], uint disc[], uint low[], uint parent[], bool ap[]); 42 | void APUtil(int v, bool visited[], int disc[], int low[], int parent[], vector& ap); 43 | bool APUtilBool(int v, bool visited[], int disc[], int low[], int parent[], vector& ap, set& interC); 44 | 45 | Graph(uint nbNodes); // Constructor 46 | ~Graph(); 47 | void addEdge(int v, int w); // function to add an edge to graph 48 | //~ void AP(bool* ap); // get articulation points 49 | void AP(vector& ap); // get articulation points 50 | bool APBool(vector& ap, set& interC); // get articulation points 51 | }; 52 | 53 | 54 | 55 | #endif 56 | -------------------------------------------------------------------------------- /scripts/paf_to_CARNAC.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import sys 3 | import os 4 | 5 | # example of paf format 6 | #ch114_read985_template_fail_BYK_CB_ONT_1_FAF05104_A 1692 1 1686 + ch114_read985_template_fail_BYK_CB_ONT_1_FAF05104_A 1692 1 1686 1685 1685 255 cm:i:303 7 | #ch114_read985_template_fail_BYK_CB_ONT_1_FAF05104_A 1692 59 1598 + ch487_read8370_template_fail_BYK_CB_ONT_1_FAF05104_A 3100 1194 2784 434 1590 255 cm:i:39 8 | # 0:50-25-50.000000 27-32-64.000000, ignores the positions information 9 | 10 | 11 | 12 | def index_bank_offsets(bank_file_name, namesToOffset): 13 | read_offsets= [] 14 | 15 | if "gz" in bank_file_name: 16 | sequencefile=gzip.open(bank_file_name,"r") 17 | else: 18 | sequencefile=open(bank_file_name,"r") 19 | # i=1 20 | line=sequencefile.readline() 21 | if not line: 22 | print("Can't open file", bank_file_name) 23 | exit(1) 24 | if line[0]!='@' and line[0]!='>': 25 | print("File", bank_file_name, "not correctly formatted") 26 | exit(1) 27 | 28 | linesperread=2 #fasta by default 29 | if line[0]=='@': linesperread=4 # fastq 30 | 31 | sequencefile.seek(0) 32 | while True: 33 | offset=sequencefile.tell() 34 | line=sequencefile.readline() 35 | index = line.rstrip()[1:].split(" ")[0] # minimap splits the name if it contains a space 36 | if not line: break 37 | read_offsets.append(offset) 38 | namesToOffset[index] = len(read_offsets) - 1 39 | for i in range(linesperread-1): line=sequencefile.readline() 40 | sequencefile.close() 41 | return read_offsets 42 | 43 | 44 | if (len(sys.argv) > 3): 45 | readToRecruited = dict() 46 | namesToOffset = dict() 47 | index_bank_offsets(sys.argv[2], namesToOffset) 48 | with open(sys.argv[1]) as infile: # paf file 49 | for rline in infile: 50 | line = rline.rstrip() 51 | read = line.split("\t")[0] 52 | index = namesToOffset[read] 53 | readRecruited = line.split("\t")[5] 54 | indexRecruited = namesToOffset[readRecruited] 55 | if index in readToRecruited: 56 | if index != indexRecruited: 57 | readToRecruited[index].add(indexRecruited) 58 | else: 59 | if index != indexRecruited: 60 | readToRecruited.setdefault(index, set()) 61 | readToRecruited[index].add(indexRecruited) 62 | out = open(sys.argv[3], 'w') 63 | out.write("#\n#\n#\n") 64 | for read, recruited in readToRecruited.items(): 65 | toWrite = str(read) + ":" 66 | for r in recruited: 67 | toWrite += str(r) + "-0-0.0 " 68 | toWrite += "\n" 69 | out.write(toWrite) 70 | else: 71 | print("Usage: python3 paf_to_CARNAC.py file.paf reads.fasta/q(or gz) output.txt") 72 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | CARNAC-LR (Clustering coefficient-based Acquisition of RNA Communities in Long Reads) 2 | ==================================================================== 3 | 4 | # Get CARNAC: 5 | * Using bioconda: 6 | Thanks to @npavlovikj, CARNAC-LR is now available on bioconda: https://anaconda.org/bioconda/carnac-lr, for both Linux and OSX. 7 | 8 | * Using git clone: 9 | git clone https://github.com/kamimrcht/CARNAC.git 10 | then compile and install: 11 | cd CARNAC; 12 | ./install; 13 | 14 | # Requirements 15 | * C++11 and GCC version from 4.9 / CLANG from 3.9 16 | * Python3 for scripts 17 | 18 | ## Warning: 19 | 20 | For MacOS users (clang version < ), the flag -fopenmp must be removed from Makefile before the compilation. In this configuration CARNAC cannot be run with several threads. 21 | 22 | 23 | 24 | ## Usage: 25 | First compute overlaps between reads using [minimap2](https://github.com/lh3/minimap2). 26 | It is very likely that minimap2 will return as primary alignments the reads mapped on themselves. To prevent this, and obtain more read connections in minimap2 output, I recommend to use -X option: 27 | 28 | minimap2 reads.fq reads.fq -X > minimap_output.paf 29 | 30 | Then convert to CARNAC-LR format: 31 | 32 | python CARNAC-LR/scripts/paf_to_CARNAC.py minimap_output.paf reads.fq input_carnac.txt 33 | 34 | Before running CARNAC-LR I recommend to increase the stack size 35 | 36 | ulimit -s unlimited 37 | 38 | And then, launch CARNAC-LR: 39 | 40 | ./CARNAC-LR -f input_carnac.txt (-o output_file -t nb_cores) 41 | 42 | # Options: 43 | 44 | ./CARNAC-LR 45 | to output the options 46 | * -f is mandatory 47 | * -t gets the number of threads (default 2) 48 | * Output is written in final_g_clusters.txt by default (-o to change output name) 49 | 50 | 51 | # I/O: 52 | 53 | ## Input format: 54 | * Pairwise mApping Format (PAF) (see for instance https://github.com/lh3/minimap/blob/master/minimap.1) can be converted in CARNAC-LR input format using ./scripts/paf_to_CARNAC.py. The fastq/a file (also .gz) used for the run is also mandatory. 55 | * CARNAC-LR can directly read Short Read Connector Linker (see https://github.com/GATB/short_read_connector) output format 56 | 57 | ## Output format: 58 | 59 | Reads are attributed indices from 0 to #reads-1 in order of appearance in input file. 60 | The output is a .txt file with one line per cluster, with the indices of its read members separated by spaces. 61 | For instance with 6 reads in the input in the previous example: 62 | 63 | less output_CARNAC.txt 64 | 0 1 2 3 65 | 4 5 66 | 67 | The first four reads are in one cluster, the two last reads are in a second cluster. 68 | Transform clusters file to separated Fasta files: 69 | 70 | 71 | ./scripts/CARNAC_to_fasta [cluster_min_size] 72 | 73 | Mandatory arguments are the output of CARNAC followed by the read file. Clusters are output in fasta format, with a file name that correspond to their order of appearance in CARNAC's output. A A minimum size of the clusters to be written can be set. 74 | 75 | 76 | # Contact: 77 | 78 | camille.marchet@univ-lille.fr 79 | -------------------------------------------------------------------------------- /scripts/CARNAC_to_fasta.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | 11 | 12 | using namespace std; 13 | 14 | 15 | 16 | char revCompChar(char c) { 17 | switch (c) { 18 | case 'A': return 'T'; 19 | case 'C': return 'G'; 20 | case 'G': return 'C'; 21 | } 22 | return 'A'; 23 | } 24 | 25 | 26 | 27 | string revComp(const string& s){ 28 | string rc(s.size(),0); 29 | for (int i((int)s.length() - 1); i >= 0; i--){ 30 | rc[s.size()-1-i] = revCompChar(s[i]); 31 | } 32 | return rc; 33 | } 34 | 35 | 36 | 37 | string getCanonical(const string& str){ 38 | return (min(str,revComp(str))); 39 | } 40 | 41 | uint count_str(const string& str,char c){ 42 | uint res(0); 43 | for(uint i(0);i reads; 64 | string readFile(argv[1]); 65 | string line,number; 66 | string clusterFile(argv[2]); 67 | uint minSizeCluster(1); 68 | if (argc > 2){ 69 | minSizeCluster = stoi(argv[3]); 70 | } 71 | string unitig,useless,useless2,useless3,msp; 72 | ifstream readStream(readFile); 73 | ifstream clusterStream(clusterFile); 74 | if(not clusterStream.good()){ 75 | cerr<<"Problem with cluster file"<'){ 86 | fastaMode=true; 87 | } 88 | if(fastaMode){ 89 | while(not readStream.eof()){ 90 | getline(readStream,useless); 91 | getline(readStream,line); 92 | if(line.size()>2){ 93 | reads.push_back(useless+"\n"+line); 94 | } 95 | } 96 | }else{ 97 | while(not readStream.eof()){ 98 | getline(readStream,useless); 99 | getline(readStream,useless2); 100 | getline(readStream,useless3); 101 | getline(readStream,line); 102 | if(line.size()>2){ 103 | reads.push_back(useless+"\n"+useless2+"\n"+useless3+"\n"+line); 104 | } 105 | } 106 | } 107 | uint clusterNumber(0); 108 | while(not clusterStream.eof()){ 109 | getline(clusterStream,line); 110 | if(count_str(line,' ')>=minSizeCluster){ 111 | ofstream out(("cluster"+to_string(clusterNumber)+".fasta").c_str(),ofstream::out); 112 | uint64_t i(1),lasti(0); 113 | while(i0){ 124 | uint uNumber=stoi(number); 125 | out << reads[uNumber]<<"\n"; 126 | } 127 | } 128 | clusterNumber++; 129 | } 130 | 131 | return 0; 132 | } 133 | -------------------------------------------------------------------------------- /test/test.input: -------------------------------------------------------------------------------- 1 | #query_read_id [target_read_id-kmer_span (k=15)-kmer_span query percentage]* or U (unvalid read, containing not only ACGT characters or low complexity read) 2 | #Target read set: two_clusters.fasta 3 | #Query read set number ./two_clusters.fasta 4 | 0:15-25-50.000000 11-49-98.000000 0-50-100.000000 16-50-100.000000 17-44-88.000000 6-50-100.000000 19-23-46.000000 8-23-46.000000 5 | 1:14-41-82.000000 6-44-88.000000 5-35-70.000000 2-31-62.000000 15-48-96.000000 3-38-76.000000 17-40-80.000000 1-50-100.000000 6 | 2:7-50-100.000000 1-31-62.000000 10-50-100.000000 4-50-100.000000 13-50-100.000000 18-50-100.000000 12-50-100.000000 8-47-94.000000 9-50-100.000000 3-50-100.000000 2-50-100.000000 7 | 3:15-38-76.000000 1-38-76.000000 7-49-98.000000 8-50-100.000000 2-50-100.000000 9-50-100.000000 4-50-100.000000 10-50-100.000000 18-50-100.000000 13-50-100.000000 12-50-100.000000 3-50-100.000000 8 | 4:14-24-48.000000 7-49-98.000000 10-50-100.000000 9-50-100.000000 8-46-92.000000 2-50-100.000000 3-50-100.000000 18-50-100.000000 13-50-100.000000 12-50-100.000000 4-50-100.000000 9 | 5:14-23-46.000000 19-27-54.000000 1-36-72.000000 15-41-82.000000 6-42-84.000000 17-42-84.000000 5-50-100.000000 10 | 6:1-46-92.000000 15-50-100.000000 5-44-88.000000 19-38-76.000000 17-48-96.000000 16-50-100.000000 11-50-100.000000 6-50-100.000000 0-50-100.000000 11 | 7:8-41-82.000000 12-50-100.000000 9-50-100.000000 2-50-100.000000 4-50-100.000000 13-46-92.000000 3-49-98.000000 10-48-96.000000 18-50-100.000000 7-50-100.000000 12 | 8:0-23-46.000000 12-49-98.000000 18-50-100.000000 13-50-100.000000 7-42-84.000000 10-50-100.000000 9-49-98.000000 8-50-100.000000 4-45-90.000000 3-50-100.000000 2-48-96.000000 13 | 9:14-23-46.000000 7-50-100.000000 4-50-100.000000 10-49-98.000000 13-50-100.000000 18-50-100.000000 2-50-100.000000 8-49-98.000000 3-50-100.000000 12-50-100.000000 9-50-100.000000 14 | 10:8-50-100.000000 2-48-96.000000 9-48-96.000000 4-50-100.000000 13-50-100.000000 12-50-100.000000 3-50-100.000000 18-50-100.000000 10-50-100.000000 7-48-96.000000 15 | 11:19-23-46.000000 17-34-68.000000 16-50-100.000000 6-50-100.000000 11-50-100.000000 0-49-98.000000 16 | 12:16-46-92.000000 7-49-98.000000 10-50-100.000000 4-50-100.000000 13-50-100.000000 18-50-100.000000 2-50-100.000000 8-49-98.000000 3-50-100.000000 12-50-100.000000 9-50-100.000000 17 | 13:4-50-100.000000 10-50-100.000000 12-50-100.000000 2-49-98.000000 3-50-100.000000 8-50-100.000000 18-50-100.000000 9-50-100.000000 14-23-46.000000 13-50-100.000000 7-49-98.000000 18 | 14:5-23-46.000000 18-28-56.000000 16-35-70.000000 13-39-78.000000 9-23-46.000000 4-24-48.000000 14-50-100.000000 1-40-80.000000 19 | 15:3-37-74.000000 0-25-50.000000 1-50-100.000000 6-50-100.000000 5-41-82.000000 17-48-96.000000 15-50-100.000000 20 | 16:14-34-68.000000 12-39-78.000000 6-50-100.000000 19-23-46.000000 17-48-96.000000 0-50-100.000000 16-50-100.000000 11-50-100.000000 21 | 17:0-44-88.000000 11-34-68.000000 16-50-100.000000 19-36-72.000000 5-42-84.000000 6-50-100.000000 1-41-82.000000 17-50-100.000000 15-50-100.000000 22 | 18:14-28-56.000000 4-50-100.000000 13-50-100.000000 9-50-100.000000 2-48-96.000000 12-50-100.000000 8-50-100.000000 3-50-100.000000 10-50-100.000000 18-50-100.000000 7-50-100.000000 23 | 19:11-23-46.000000 5-27-54.000000 17-34-68.000000 15-24-48.000000 6-32-64.000000 19-40-80.000000 24 | -------------------------------------------------------------------------------- /scripts/CARNAC_to_fasta.py: -------------------------------------------------------------------------------- 1 | 2 | #/local/python/3.3.2/bin/python 3 | import sys 4 | import os 5 | import shlex, subprocess 6 | from subprocess import Popen, PIPE, STDOUT,call 7 | 8 | 9 | 10 | def get_read(sequencefile,offset, fasta): 11 | if fasta: 12 | offset*=2 13 | else: 14 | offset*=4 15 | sequencefile.seek(offset) 16 | read="" 17 | line=sequencefile.readline() 18 | if not line: 19 | print("cannot read read at offset", offset) 20 | exit(1) 21 | read+=line#include header 22 | read+=sequencefile.readline()#include sequence 23 | if fasta: return read 24 | read+=sequencefile.readline()#include header2 25 | read+=sequencefile.readline()#include quality 26 | return read 27 | 28 | def index_bank_offsets(bank_file_name, namesToOffset): 29 | read_offsets= [] 30 | 31 | if "gz" in bank_file_name: 32 | sequencefile=gzip.open(bank_file_name,"r") 33 | else: 34 | sequencefile=open(bank_file_name,"r") 35 | line=sequencefile.readline() 36 | if not line: 37 | print("Can't open file", bank_file_name) 38 | exit(1) 39 | if line[0]!='@' and line[0]!='>': 40 | print("File", bank_file_name, "not correctly formatted") 41 | exit(1) 42 | 43 | linesperread=2 #fasta by default 44 | if line[0]=='@': linesperread=4 # fastq 45 | 46 | sequencefile.seek(0) 47 | while True: 48 | offset=sequencefile.tell() 49 | line=sequencefile.readline() 50 | index = "_".join(line.rstrip().split('_')[:2])[1:] 51 | if not line: break 52 | read_offsets.append(offset) 53 | offsetToName[index] = len(read_offsets) - 1 54 | for i in range(linesperread-1): line=sequencefile.readline() 55 | sequencefile.close() 56 | return read_offsets 57 | 58 | if len(sys.argv) > 2: 59 | clustersFileName = sys.argv[1] 60 | readsFileName = sys.argv[2] 61 | path="" 62 | minSize = 1 63 | if len(sys.argv) > 3: 64 | path = sys.argv[3] + "/" 65 | if not os.path.isdir(path): 66 | cmdMkdir = "mkdir " + path 67 | subprocess.check_output(['bash','-c', cmdMkdir]) 68 | else: 69 | cmdRm = "rm " + path + "cluster_*.fasta" 70 | try: 71 | DEVNULL = open(os.devnull, 'r+b', 0) 72 | p = subprocess.call(['bash','-c', cmdRm], stderr=DEVNULL) 73 | except subprocess.CalledProcessError: 74 | pass 75 | if len(sys.argv) > 4: 76 | minSize = int(sys.argv[4]) 77 | clustersFile = open(clustersFileName, 'r') 78 | readsFile = open(readsFileName, 'r') 79 | readsFile.seek(0) 80 | line=readsFile.readline() 81 | fasta = True 82 | if not line: 83 | print("cannot read read file") 84 | exit(1) 85 | if line[0]!='>': 86 | if line[0]=='@': 87 | fasta = False 88 | else: 89 | print("reads in wrong format") 90 | exit(1) 91 | offsetToName = dict() 92 | nameToRead = dict() 93 | # indexing read file 94 | reads_offsets = index_bank_offsets(readsFileName, offsetToName) 95 | 96 | numCluster = 0 97 | for line in clustersFile: # 1 line = 1 read and its cluster 98 | readsIndex = line.rstrip().split(' ') 99 | if len(readsIndex) >= minSize: 100 | out = open(path + "cluster_" + str(numCluster) + ".fasta", 'w') 101 | for r in readsIndex: 102 | read = get_read(readsFile, int(r), fasta) 103 | out.write(read) 104 | out.close() 105 | numCluster += 1 106 | 107 | else: 108 | print("Usage : python3 CARNAC_to_fasta.py [/path/to/write/files] [cluster_min_size]") 109 | -------------------------------------------------------------------------------- /clustering_cliqueness.hpp: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | * * * * CARNAC: Clustering coefficient-based Acquisition of RNA Communities 3 | * * * * 4 | * * * * Authors: Camille Marchet 5 | * * * * Contact: camille.marchet@irisaa.fr, INRIA/IRISA/GenScale, Campus de Beaulieu, 35042 Rennes Cedex, France 6 | * * * * Source: https://github.com/Kamimrcht/CARNAC 7 | * * * * 8 | * * * * 9 | * * * * This program is free software: you can redistribute it and/or modify 10 | * * * * it under the terms of the GNU Affero General Public License as 11 | * * * * published by the Free Software Foundation, either version 3 of the 12 | * * * * License, or (at your option) any later version. 13 | * * * * 14 | * * * * This program is distributed in the hope that it will be useful, 15 | * * * * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | * * * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | * * * * GNU Affero General Public License for more details. 18 | * * * * 19 | * * * * You should have received a copy of the GNU Affero General Public License 20 | * * * * along with this program. If not, see . 21 | * * * * *****************************************************************************/ 22 | 23 | 24 | 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include 39 | #include 40 | #include 41 | #include 42 | #include 43 | #include 44 | #include 45 | #include 46 | #include 47 | #include "findArticulationPoints.hpp" 48 | 49 | #ifndef CLUST 50 | #define CLUST 51 | 52 | using namespace std; 53 | 54 | struct Node{ 55 | uint index; 56 | uint degree; 57 | double CC; 58 | vector> cluster; 59 | vector neighbors; 60 | unordered_map neighbToWeight; 61 | bool operator <(const Node&n) const 62 | { 63 | // return (degree < n.degree); 64 | 65 | 66 | if (degree < n.degree) return true; 67 | if (n.degree < degree) return false; 68 | if (CC < n.CC) return true; 69 | if (n.CC < CC) return false; 70 | 71 | return false; 72 | } 73 | }; 74 | 75 | 76 | vector removeDuplicates(vector& vec); 77 | 78 | vector removeDuplicatesCC(vector& vec); 79 | 80 | bool findBridge(vector& vecNodes, set& cluster, set& toRemove); 81 | 82 | void DFS(uint n, vector& vecNodes, unordered_set& visited, set& nodesInConnexComp, bool& above, double cutoff); 83 | 84 | vector split(const string &s, char delim); 85 | 86 | 87 | void parsingSRC(ifstream & refFile, vector& vecNodes); 88 | 89 | 90 | double getCC(unordered_set& neighbors, vector& vecNodes); 91 | 92 | 93 | int getDeltaCC(set& toRemove, set& clust1, vector& vecNodes, double cutoff); 94 | 95 | void computeCCandDeg(vector& vecNodes, vector& ClCo, vector& degrees, float& lowerCC); 96 | 97 | void sortVecNodes(vector& vecNodes); 98 | 99 | //~ void computePseudoCliques(vector& cutoffs, vector& vecNodes, uint nbThreads, vector& nodesInOrderOfCC); 100 | void computePseudoCliques(vector& cutoffs, vector& vecNodes, uint nbThreads, vector& nodesInOrderOfCC, uint higherDegree, float lowerCC); 101 | 102 | double computeUnionCC(set& unionC, vector& vecNodes); 103 | 104 | void transfer(uint tf, uint te, set& toFill, set& toEmpty, vector& vecNodes, vector>& clusters, uint ind); 105 | 106 | void merge(uint i1, uint i2, set& clust1, set& clust2, vector>& clusters, vector& vecNodes, uint ind); 107 | 108 | vector> assignNewClusters(set& clust, vector& vecNodes, double cutoff); 109 | 110 | void removeSplittedElements(uint index, vector>& clusters, set& interC); 111 | 112 | //~ uint splitClust(uint i1, uint i2, set& clust1, set& clust2, vector>& clusters, vector& vecNodes, set& interC, uint cutoff, uint ind); 113 | double splitClust(uint i1, uint i2, set& clust1, set& clust2, vector>& clusters, vector& vecNodes, set& interC, uint cutoff, uint ind); 114 | 115 | //~ uint computeClustersAndCut(float cutoff, vector& vecNodes, vector>& clusters, uint ind, uint prevCut, vector& nodesInOrderOfCC); 116 | double computeClustersAndCut(double cutoff, vector& vecNodes, vector>& clusters, uint ind, double prevCut, vector& nodesInOrderOfCC); 117 | 118 | void getVecNodes(vector& vecNodes, vector& vecNodesGlobal, set& nodesInConnexComp); 119 | 120 | void cutBrigdesInConnectedComp(vector& vecNodes, uint val); 121 | 122 | bool findArticulPoint(set& cluster, vector& vecNodes, set& interC); 123 | 124 | void preProcessGraph(vector& vecNodes, double cutoff); 125 | 126 | void mergeOrSplitProcedures(double cutoff, vector& vecNodes, vector>& clusters, uint ind, double prevCut, vector& nodesInOrderOfCC, set& clust1, set& clust2, set& unionC, set& interC, uint& i1, uint& i2, double& cut, uint i); 127 | 128 | uint quantileEdges(vector°rees, uint no, uint q); 129 | 130 | double quantileCC(vector&CC, uint no, uint q); 131 | 132 | bool execute(int argc, char** argv); 133 | 134 | void printHelpCmd(bool help); 135 | 136 | #endif 137 | -------------------------------------------------------------------------------- /findArticulationPoints.cpp: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | * * * * * CARNAC: Clustering coefficient-based Acquisition of RNA Communities 3 | * * * * * 4 | * * * * * Authors: Camille Marchet 5 | * * * * * Contact: camille.marchet@irisaa.fr, INRIA/IRISA/GenScale, Campus de Beaulieu, 35042 Rennes Cedex, France 6 | * * * * * Source: https://github.com/Kamimrcht/CARNAC 7 | * * * * * 8 | * * * * * 9 | * * * * * This program is free software: you can redistribute it and/or modify 10 | * * * * * it under the terms of the GNU Affero General Public License as 11 | * * * * * published by the Free Software Foundation, either version 3 of the 12 | * * * * * License, or (at your option) any later version. 13 | * * * * * 14 | * * * * * This program is distributed in the hope that it will be useful, 15 | * * * * * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | * * * * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | * * * * * GNU Affero General Public License for more details. 18 | * * * * * 19 | * * * * * You should have received a copy of the GNU Affero General Public License 20 | * * * * * along with this program. If not, see . 21 | * * * * * *****************************************************************************/ 22 | 23 | 24 | 25 | 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include "findArticulationPoints.hpp" 33 | 34 | 35 | using namespace std; 36 | 37 | // A class that represents an undirected graph of reads 38 | //~ class Graph 39 | //~ { 40 | 41 | //~ public: 42 | //~ int nbNodes; 43 | //~ list *edges; 44 | //~ void APUtil(uint v, bool visited[], uint disc[], uint low[], 45 | //~ uint parent[], bool ap[]); 46 | 47 | //~ Graph(uint nbNodes); // Constructor 48 | //~ void addEdge(uint v, uint w); // function to add an edge to graph 49 | //~ void AP(bool* ap); // get articulation points 50 | //~ }; 51 | 52 | Graph::Graph(uint nbNodes){ 53 | this->nbNodes = nbNodes; 54 | edges = new vector[nbNodes]; 55 | } 56 | 57 | void Graph::addEdge(int v, int w) 58 | { 59 | edges[v].push_back(w); 60 | edges[w].push_back(v); 61 | } 62 | 63 | Graph::~Graph(){ 64 | delete [] edges; 65 | } 66 | 67 | //~ // A recursive function that find articulation points using DFS traversal 68 | //~ // u --> The vertex to be visited next 69 | //~ // visited[] --> keeps tract of visited vertices 70 | //~ // disc[] --> Stores discovery times of visited vertices 71 | //~ // parent[] --> Stores parent vertices in DFS tree 72 | //~ // ap[] --> Store articulation points 73 | 74 | void Graph::APUtil(int U, bool visited[], int disc[], int low[], int parent[], vector& ap){ 75 | vector children(ap.size(),0); 76 | vector> mem; 77 | mem.push_back({U,0}); 78 | static int time = 0; 79 | while(not mem.empty()){ 80 | pair balls(mem[mem.size()-1]); 81 | int u=balls.first; 82 | mem.pop_back(); 83 | if(balls.second>=0){ 84 | if(not visited[u]){ 85 | visited[u] = true; 86 | disc[u] = low[u] = ++time; 87 | } 88 | int i=balls.second; 89 | if( edges[u].empty()){ 90 | continue; 91 | } 92 | int v = edges[u][i]; // v is current adjacent of u 93 | if((uint)i+11) 110 | ap[u] = true; 111 | if (parent[u] != -1 && low[v] >= disc[u]) 112 | ap[u] = true; 113 | } 114 | } 115 | } 116 | 117 | 118 | 119 | //~ void Graph::APUtil(int u, bool visited[], int disc[], int low[], int parent[], vector& ap){ 120 | //~ static int time = 0; 121 | //~ int children = 0; 122 | //~ visited[u] = true; 123 | //~ disc[u] = low[u] = ++time; 124 | //~ for (int i = 0; i != edges[u].size(); ++i){ 125 | //~ int v = edges[u][i]; // v is current adjacent of u 126 | //~ if (!visited[v]) 127 | //~ { 128 | //~ children++; 129 | //~ parent[v] = u; 130 | //~ APUtil(v, visited, disc, low, parent, ap); 131 | //~ low[u] = min(low[u], low[v]); 132 | //~ if (parent[u] == -1 && children > 1) 133 | //~ ap[u] = true; 134 | //~ if (parent[u] != -1 && low[v] >= disc[u]) 135 | //~ ap[u] = true; 136 | //~ } 137 | //~ else if (v != parent[u]) 138 | //~ low[u] = min(low[u], disc[v]); 139 | //~ } 140 | //~ } 141 | 142 | 143 | //~ bool Graph::APUtilBool(int u, bool visited[], int disc[], int low[], int parent[], vector& ap, set& interC){ 144 | //~ bool found(false); 145 | //~ static int time = 0; 146 | //~ int children = 0; 147 | //~ visited[u] = true; 148 | //~ disc[u] = low[u] = ++time; 149 | //~ for (uint i = 0; i < edges[u].size(); ++i){ 150 | //~ int v = edges[u][i]; 151 | //~ if (!visited[v]) 152 | //~ { 153 | //~ children++; 154 | //~ parent[v] = u; 155 | //~ APUtilBool(v, visited, disc, low, parent, ap, interC); 156 | //~ low[u] = min(low[u], low[v]); 157 | //~ if (parent[u] == -1 && children > 1) 158 | //~ ap[u] = true; 159 | //~ if (parent[u] != -1 && low[v] >= disc[u]) 160 | //~ ap[u] = true; 161 | //~ }else if (v != parent[u]){ 162 | //~ low[u] = min(low[u], disc[v]); 163 | //~ } 164 | //~ if (ap[u] and interC.count((uint)u)){ 165 | //~ return true; 166 | //~ } 167 | 168 | //~ } 169 | //~ return found; 170 | //~ } 171 | 172 | 173 | bool Graph::APUtilBool(int U, bool visited[], int disc[], int low[], int parent[], vector& ap, set& interC){ 174 | vector children(ap.size(),0); 175 | vector> mem; 176 | mem.push_back({U,0}); 177 | static int time = 0; 178 | while(not mem.empty()){ 179 | pair balls(mem[mem.size()-1]); 180 | int u=balls.first; 181 | mem.pop_back(); 182 | if(balls.second>=0){ 183 | if(not visited[u]){ 184 | visited[u] = true; 185 | disc[u] = low[u] = ++time; 186 | } 187 | int i=balls.second; 188 | if( edges[u].empty()){ 189 | continue; 190 | } 191 | int v = edges[u][i]; // v is current adjacent of u 192 | if((uint)i+11) 216 | ap[u] = true; 217 | if (parent[u] != -1 && low[v] >= disc[u]) 218 | ap[u] = true; 219 | 220 | if (ap[u] and interC.count((uint)u)){ 221 | return true; 222 | } 223 | if (v != parent[u]){// Update low value of u for parent function calls. 224 | low[u] = min(low[u], disc[v]); 225 | } 226 | 227 | } 228 | } 229 | return false; 230 | } 231 | 232 | 233 | // The function to do DFS traversal. It uses recursive function APUtil() 234 | //~ void Graph::AP(bool* ap) 235 | void Graph::AP(vector& ap){ 236 | // Mark all the vertices as not visited 237 | bool *visited = new bool[nbNodes]; 238 | int *disc = new int[nbNodes]; 239 | int *low = new int[nbNodes]; 240 | int *parent = new int[nbNodes]; 241 | 242 | // Initialize parent and visited, and ap(articulation point) arrays 243 | for (uint i = 0; i < (uint)nbNodes; i++) 244 | { 245 | parent[i] = -1; 246 | visited[i] = false; 247 | ap.push_back(false); 248 | //~ ap[i] = false; 249 | } 250 | 251 | // Call the recursive helper function to find articulation points 252 | // in DFS tree rooted with vertex 'i' 253 | for (uint i = 0; i < (uint)nbNodes; i++) 254 | if (visited[i] == false){ 255 | //~ cout<<"go"<& ap, set& interC){ 265 | bool found(false); 266 | // Mark all the vertices as not visited 267 | bool *visited = new bool[nbNodes]; 268 | int *disc = new int[nbNodes]; 269 | int *low = new int[nbNodes]; 270 | int *parent = new int[nbNodes]; 271 | 272 | // Initialize parent and visited, and ap(articulation point) arrays 273 | for (uint i = 0; i < (uint)nbNodes; i++) 274 | { 275 | parent[i] = -1; 276 | visited[i] = false; 277 | ap.push_back(false); 278 | //~ ap[i] = false; 279 | } 280 | 281 | // Call the recursive helper function to find articulation points 282 | // in DFS tree rooted with vertex 'i' 283 | for (uint i = 0; i < (uint)nbNodes; i++){ 284 | if (visited[i] == false){ 285 | found = APUtilBool(i, visited, disc, low, parent, ap, interC); 286 | if (found){ 287 | delete [] visited; 288 | delete [] disc; 289 | delete [] low; 290 | delete [] parent; 291 | return false; 292 | } 293 | } 294 | } 295 | delete [] visited; 296 | delete [] disc; 297 | delete [] low; 298 | delete [] parent; 299 | return found; 300 | } 301 | -------------------------------------------------------------------------------- /test/two_clusters.fasta: -------------------------------------------------------------------------------- 1 | >ch114_read985_template_fail_BYK_CB_ONT_1_FAF05104_A 2 | CTATGTACTTCGTTCAGTTGCATTGCTGTCGCTTGAAGCGCCATTCAAGTTTGACATGAGTTGGACGACTTACCTGAGGCTCAAGAACTCATTTTGAAGAGACTGCTAGATTCAGCCAGGATACAGATCTTAAATTGGTGGGACGGGGCTCAGGGGACTGGACGGTTCGGATGTCGGTGTCCCCAGTTTCTTTACCCTGGTCTGTCTTTGGCCGTCTCAGCTTACCCTCTTGACTCCTTTGAGCCTTTCGGAGGGCGGTTCTGGTAGTAGCAGCACATATGCTTTCACGAATTCCTTCAGTCAGAGTTCTGGCAGCAGGCCGTGCAGCAGTGTGCACTTTCAATGCATAACTGCTTACTGTTGTTTGTCTGAACTGCTTTCTGGTTGAAGATAAGTGAACTATCCTGTTCTGAATCCTTTCTCCCATCGTAGCTGAACCTCAGCCTCATCAGAGGAGGAATCTTTCCAGACTTGTTCAGTTACTGGCACCTCACTTCACAGTAGGGAGGCTAGGACATAAGGCACCTTAAGTCAGTGACAGCTCAAATTTGCACTTCATCTGTTATTAGTAACTGTCTACCTAGACAGTAGACTTAATGGTATCCTGGATGGTATTACAGGCTACAGGGGCAGGGCTTCTGTTGCAGGAGTCCTTTGAAGATATTTTCCTGTGTATCATATTAGTCCCTAAATTTAAGGTATGTACTATTTGCCCAGCTTTTAAAGGGTGATGCATTGTTTAAATGAAATAGGAAGGAAGCATTATAGCAGTATCTGTTGTTCTGCAGATTTTATGTAGTTACTTGTATCGTAATGGAGGTGACTCTTGCCAAAATGTTACATTTTATATCCTTAGCAGAGTGCAATGTAGCTATTGTGTCATTTTATGGTGGACACACACAAGCTGTGGAAGTTTCAGAACAGTAGAGCAAGCTGACCTAGATGTTCAGGAGCGGAAACTCCATATAACCTTGACCACAGAAGGCTGTTTGCGTAACCATTTACTACCTAGGGATTTAAGCTAAAAGAACACAAATCTTTAAATGAAAGTGTACAGTTCTTCTCTGCAGCATGTCTATCTCGACCCTTTTAGCAGTGTAATGACTTGTATGTAATAAAGCCTTGATGACTCTCCTCCATGAAGCTCTACTCTGTTTATGGTGAGAATGTTCTGGTACTGCTGGTAATGTCATAAAAGGCTAGTAACTGTTCGAGCTCTCTCTTTATTTCCTTCTCTCTATATTTTGTTCCTGCACTGTGTGCTGTGGAGTTGATAGTGTTATCAGTGCGGTACTCAGACCCTCCTGCTCCTCTGATGAAATATACCTTGTTCAACTTACTGTGTCTTGCATGACTGTTAGGTTTTGTGCAGAGACCAATGTCAAGTAATGCATCAACTGATTGGTGAAATCTGTTGTGACCTCTGAGTTGTATTCATGAAGATGCTGCAGAAGATAATGTGAAAAGATAATTATATTGACCTTTTCATTTCTCAAGCTGTCCTTTTGTTTCTTGGTTTTATTTTTATTTTGACATCAATATGGAAAATGGGTTCTATAAAGACACTACTGTATGAACAGCAATGCAATGCACTTGTATCATGAAATAAATATGCATCTTTATCTTTTCAAAAGAGTTAAACACCTGACTTGAACGCCGCAATATCAGCACCAACAGAGCAATACGTGGCAA 3 | >ch123_read6165_template_fail_BYK_CB_ONT_1_FAF05104_A 4 | CATGCCTTTGGTTCAGTTACGTATTGCTTTTCTGTTGGTGCTGATATTGCGCTTGAATGAGCTTTATAGAATGGAGTAAACGCTGCTCATGTCATTTATTTAGCTTGGCGTCTGGTCAGCAAGTAGGACTCGATCCTAGGGTCACACTGCGCCATCACCTGCAGCTTGTTGCCTTTCTGTATACGTCCGCCCGCCCTTCATTTTACTCAAGAAGATTACAAACGCTAGCTCTGTACATCTATGCTGGCGGATGCGCAGTATTTGCAAACTGTAGATTATTATTTGAATAATGATGATTTAGTATAATATTGAAGAAGATTTTACATTAAAAATTTAAATGTTTTGTACTCATTTGATAAACCTGACATTTATCTGTAACCTCTTAATTTGAATTTGTATAACATAAAATTGTAAGAGGTTTATATTTCATCTTAATTCTTTTTGATATTTGTAAACGTGCTTTTCAAATTCATTATTTGATGTTTGTAACTTTTAGCTTGCCAAAAGTACAAAAATCCTTAAGTCATTACATTGTCTTCTCCTATGTGTTGCCAGGTAACAGTGCTGTTCAGTACAGATTTAGTCATTCCACCTGCATGCCTTCTGACTGAATTTACTGGTCGCTGCTAATGGTAGATCACAAACTGCTGCTCTGGGCGAGCTGGAATCTCATCAGGTCTTCAGGTGATTCTTCCAGATTTTCTGGTTACAGGGAAGGCGGCCTCGCACCCCTCATGCCTTCAGTAGCTGATGCTAAAGATTGTCCCAGCAAATCCTGCTTCTGCCCATCGGAGAACACTTTGAAAACTCCTCCGATGAGTTCACTCAGCGTGTCACGTTCACGATCCTGAAGGCATGGAAATGATTCTCTTGTTCGTGCTGAAACACAGTTTACAATTGTCTCCAACAGTGATGGACTCCATTCATAACCAACAGTCAGGCCAGCATCTCAGGCACCGCCATAATCAGAACCAAATGTCATGGGCTGGGAAGAAGTGACATGGCTGCCAAACAAAATGTAAGTCCTCGCATTATATTCTCAGAAAATTGAAGAACAAGCGCATCGCCCTTTTCTATGTATGCTGAGAAGAGTCTAGAAGTGTTAGGTGGATACCATTTATTGGGCTGGGCAAAGCAGGCACAAGAGCAAGAGGACAAGGCAAAGGGCCGCAGTACATAAAAACTGTGGGCTGTGGGCACTCACTGCAGAAGGATAGAGCGGGAGCAGATCCTTACGTGTCTGTGAGTTCTTTAAATATAGGTATCCTGGCCATAGGAGCCTTTACTAGGTCTGTGGACAGCACGAAATCGGTAGAGTGTTGGAAAGTCTGCATGCACTGGAACTGTTACTTCAAAATGTATTTTCGCACAGTATTATTAAGTAGCGGGGAAGATAAAGGGCTTGAAGTTTTCGGATTTTAAGTCATCTAATGAGACCTTGAGCGTAGCAGCAAACGCATGATTCTAGCGCCTGGGCGAAATTAGACCCACACAGTCAACTTTATATGAATGAAAAGGCTGGCTTCGAAATACATAGGGGCCAACAAAACAGAAAAAGCGCGATGAAAGAGCATCTTGTGTCTTTTATCGTGATTCACTAATCTCAAATGAATTGGGCAAAACGCAATTCGCCAGAAGTGGTGCAAAGGCCAGCAACTGGCTGCCTCGGGACCATTCATGTCTTGGAAGTCACCAGAGTTGGTATCTCTCCAGAAGGGAGAACATTTGAGCATAAAACAAGACCTGGGCTGGTGAGATGGTTCGGTAGGTGAGCACCCGACTGCTGCCGAAGGTCAGTTCAATCAGCAGCTTACATGGTGGCTCACAACCATCGTAATGAGATCTGACTCCTATCTGCAGGTGTCTGAAGATAGCTGATGTACTTACATATATAATAAAATAAATAAATCTAAATGGTTAAGCACTGACGGAGCGCGCCGAGGCCGAGCGACAGGCAAGTACGTTCTGTTTATGTTTCTGCTTGGACACTGGTGGCACGTCAAACATAAACGACGCGCCTGTAAACTTTTCTTCGTCTGGCGGGTGTTTAACCGTTTGGATTTATTTATTTTATTATGTAAGTACACTGTAGCTATCTTCGTTATCTCAGAAGAGGTCAGATCTCATTACGGATGGTTGTATGTGGTTGCTATTCAAACTTCTGGAAGAAGCAGTCGATTACCCTGAACCATCTCACCAGCTGGTCTTTAATTTTTATATATAATCGCTATGGAGACACAACTCTGGTGACTCCTGAGGCGTAGATAATCGCCGAACGCAGCCAGTTGCTGGCTGCAGTCTATGGAGATTCAGTGTTGCCTAATTCATTGAATTAGTGAATCACGACAAAGACACAGATGAAGTTTCTTCTTTCACTGGGTGCTTTTTGCTTGTTTTGAGACAAGGTGGCTATTCTGCAAGACCAGGTGGGTCTTCTTCTTGAGTGCTAGAATTAAGGTGTTTGCCATACCTTGCCTATTAGATAACGTAACTGAAAAGCATGATCGCTGCACTTCCTCCTTCACAGCTGTAATATCATTGTAAGGAAATACATTTGAAATGTAGTCTCGAATAACTAAATACTACGACTTTGTGCTGTCTGAGACCTGACAAGGCTCCTATGGCAGGATACCTATATTAAAAATCCTGAACCTGAGAAGTTGCACTCCTGCCTCATCCTCTGCAAGTGAGTGCCCTGGCCACAGTTTATGTGTGTAACCCGCTGGCCTGGTCCTCTGCTCTTTGTGCCCTGTCGTGGCATCCAATTTCCTTCTCAGAACTCTTCGCGTGAAAATATCTTCGGCATGCGCTTGGTGTCTCTTCACGCCGAAATATATCAGGAGGACTTCTACATTTGCTTCCAGCTGCTGTCACCTCTCTTTCCTCAGCAGTGACATTTGGATTACCGATTATGTATGGCGCCCAGAATGCTGGCCTGACTGCGGCAGAATGGGAACCATCACTATTGAAGGTTTCAATTGTAACTACCAGGCTTCGCTTAATGTTCATCATTTCATGCCTTCCCAGGATCGTAGATATACATGATGAACTCATTTCTGAGGGAGTTTTCAAGTAGTTTACTCTCTCAGATGAGCGAAGCGAGAGGTTTGCTATGACACCTTGCTAGAACTACTGAGGCTAGTGATGGGAGATTGCGAGCCGCCTTCCTTCCTGTAACAAATTAAGAATCACCTGGAACCTGATGATTCAGTCTTAGGCAACGATTTGGCGATATATTAGCAGCGACAAATAAATTCAGTCAGAAGGCATGCTATATGGAATGACTAAACTGTATGAACAGCACTGTTACCCTGGCAGCACAATGTAATGATTCTAAGAGTTTTTGTAATTCTTTTACAAGTCAGGTGCCATAAACATTCAAATGGTGAAAAGCACGTTTACAACATCAAAGAATTGAGATGAAATATAAACCTCTTACATTTTTATACCTATTAACCGTCACATAACCTGGTCATGTTCGAGTTTAACAATATCAGCTGAAAAACATTTAAATTCTTAATGATGTAACGCGGTAATTTTAAACTTATCATTATCGTAATCATGGGTTGTCCGGTACGAAACGGTTTCTGGGTGCTCTGAAGGCGGGCGGACGGCACGAAGGCAACAAGCTGCAGGTGAGCTGGTGTGACCCTAGGACTCGGAATCCCTGCTGATCAGCACGCCAAGCTGGGCATGGCGCACAAGCGACAGCGTTTACTCCATTCATGTTTCTTGTCACGGTGAAGTTAAACGCAGACGCGCAGTCGGCCACCAACGAAGAAGCAATACGTAAC 5 | >ch133_read3235_template_fail_BYK_CB_ONT_1_FAF05104_A 6 | CATTGTACTGGTTCGGTTGCATTGCTCTTGCTATCTTTTGAATGGACGATGGTGTCATCGAGAGCAACTGGTGAAGTGTTGATAACTTTGATGATAATTTAAAGGTCCTTCTTTGAGGCATCTATGCATATATATGGTTTGAAAGCTTCAGCTATTCAGCAAAGCTATTATCCTTGTATTAAAGGGTATGATGTGATTGCTCAATCGTCAGGTATAGCAGGGCAATACATTGCTATTTCCATCCTACCTTGGTTGGAGATTGAGTTCAAGGAGACCCAAGCGCTGATATTGGCTTTACAGAACTGGCTCAGCAGGTCAAAAGGTAATTTTGTCTTGGAATTATATAGAACAACTTGTCATGCTTGCATTGGAGGAACAAATGTTCGAAATGAAATGCAAGTTGCAGGCTGAAGCCCTCACATTGTTGTTGGTACTCCAGGAAGTGTTTGATATACTAAACAGAATGCTTTCTGTCAAAATGGATCAAAATGTTCGTTTTGGACGAAAACGGAGATGAAATGTTGAGCCGAAGGTTTAAGGATCAGATCTGAGATTTTCAGAAGTAAATACAACATTCAGGTTATGTTGCTGCTACAATGCCAACTGATGTGTAAAGTGACAAAATTCATAGAGATCAATTTCAGTTACAGTGAAGAAGGAAGATTGACCCTTGAAGAGTGCAACAACGATTTTATATTAATGTTGAGCAGGAAGGTGGAAGCTGGATTTACTCTTTGTGACTTGTATGAACTTTGACTATCACTGGCAGTTATTTTCTCAATACAAGGCACGCAAGGTGGATAGCTCTGGAAAATGCATGCCAGGGACTTCAGTTTCTGCTCTGCATGACATGGACCAGAAGAAGATGTCATCAAATTCCGATCAGGAGTCAAGCCGTGTTCTGATCACTACTGACTTGTTGGCCCGTGATTGACGTGCAACAAGTGTCCTTGATTATAAACTACGATCTACCAATCGTGAAAACTATATTCACAAGTGGCAGAAGTCGATTTGGGAGAAAGGTGTGGCTATAAACTTTGTTACTGAAGAAGTAGAAGTTAACGTGACATTGAGACTTTCTACAATACTACCAAGTGGAGAAATACTTATGAATGTGGCTGACTAATTTAATCCCTAGGATGAGACTAGTTTGAATGCGGTGCTCGCTGTTGCTGAATAGGCGATCACAACGATATTGCTTCATGAGAATATTTGAATCTTTAATCTCAATGCTCATAGCAGATCACAAATACAGATTCTGATATACGGCGACTTTAGTCCTGAGCTCTTGTGAGGAAAGAGCGTGGCTTTATCCTCTTTAGAGTTAGACTTGTTGAAATTGAGTATAAGATAGGTCTGTAAATCTTTTCTTAATTCATTTACTAGTTATGTAGAAATGGTTGTATTAGATGTCTATCATTTAATAATAATATACTTGTGGACTAAAGATATAAGTGCTGTATAAAATCGTGAATTATGTTAAACTGGCATATCTGCCTTTATTGTGTTGTCATTAGCCTGACTAGAGGCCTTTAAAGTGATTTCTTTTTAGAAAATTTGGAATGCATTTTGTTGGTATTGTATTTATCAATAAAGTATTTAATTGGTGCTAAGTGTGAACTGGACCTGTTGCTAAGCCTCAGCAATAATCATCTAGGTAGGTTAAACCCAGTGGGCTGCCATATTGCACGCTGTCTTAATGATTTGGTGTTAAATAAAATTGTGTATATTCACTTTGAAAAAGGTTAAACACCCAAGCAGAGCCTTGCCAATATCAGCACCAACAGAAAAGCAATACGTGGCA 7 | >ch165_read1933_template_fail_BYK_CB_ONT_1_FAF05104_A 8 | TTTAGGTTTTTGAATTTTTATTTTGTAGGAAGTAGACAAGTGCTTGGCAGATGGTGCAGATGAACACCTGCAGTTGATGAACCTTTATTACCTTTGTGATGCAGCAGCTAACTCAGAACTGTTAGAGCTTCATCTCTCCTTTGTATGAAACCAGTTACTATAAATAAATGGCAGACCACCTATATAAAGTGAATATACAATTTTATTTAACATTCAAACTTCATTAAGACATGCAATATGGCAATTTACTGGGGTTTAACCCTACCTAGGATGATTGCTTGCTGAAACAGCAACAGGGTCAATTCACATAGCACTAATTAAATACTTTATTGAATAAATACAATACCAACAAATGCATTCAAATTTTCTCTAAAAAGTCACTTTAAAGGCCTCTCTAGTCAGGCTATTAATAAACACAATAAGGCAGATAACAGGTTTAACATAATTGGCTGATTTTTATACAGCACTTATATCTTTAGTCCCACAAGTATATTATTAAATGATAGAGAACATCTAATACAACCATTTCATAACTGGTAAATGAATTTCTAAGAAAAAGATTTTACAGACCCCATCTTTTATACACCCCAACAGTCTAACTCTAAGAGGATAAAGCCAATGCCTTTCCTCACAAGAGCTCAGGACTAAAGTCGCTTTGCTATCAGAATCTGTATTTGTGATCCGTTATGAACGTGAGACAAGATTCAATATTCTCAAGGAAGCACAATGCACGTTGTGATCGCCTATTCAGCAACAACGAGCATGCATTCAAAACTATCTCATCAGGGATTAAATTAGGTCAGCCACATTCTTAGGCATTTCCTCCACTGTATTGTAGAAAGTCTCAATGTCACGAAGAATCCTCTTGTCTTCTTTATTAACAAAGTTTATAGCCACCTTTCTCCCAAATCGACCCCTCTGCCAATTCTGTGAATATAGTTTCACGATTGGTAGGTAGATCGTAGTTATAACCAGGACACTTGTTGCGTCAATCCACGGGCCAACAAGTCAGTAGTGATCAGAACACGGCTTGACCTGATCAGATTTATTCCCTCATGATGACATCTCTTTCCTTCTGGTCCATGTCACCATGCAGAGCAGAAACTGTAAGAAGTCCCTGGCATGCATTTCTCAAATGAGCAGTCCACCTTGCGCCTTGTATTGGGAAAATAACTGCTGACGATGGTCAAAGTCATACAGTCACAAAGAGTGTCAGCTTCCACTCCTCTCGCTCAACATTAATATAAAATTGTTGAATTCCTTGGTCGATTCTTCCCTTCTTCACCAAATTCAATTGGATCTCTGAATTTCTTGATGCTTCTAGCACATCAGTTGGCATTGGCGAAGCAGCACAACCTGAATGCTTGTTGGTGGAAAATCTCATAGATCTGATCCTTGACCCTTGACAACATTTCATCTGCTTCGTCCAGAAACGAACATTTGATCATTTTGGAGAAGGTATCTTCTGTTTATATCGACACTCTCCTGAATTACCAACAACAATGTGAGGGGCTTCAGCCTACCTTCTGCATTCATTTCGAACATTTGTTCCTCAATGCAAGCATGACAAGTTGCTCATATAATCTCAAGAGCCAAAATTACCTTTTGGATCTGTTGAGCCAGTTCTCTGGTGGGGGCCAATACTAAGTGCTTGGGTCTCCTTGAACTCAATCTCCAACTGTTGCAGGATGCAAATAGCAAATGTGTGGCTGTCTTGCCAGTACCTGACTGAGCTTGAGCAATCACATCATACCGCTAATACAAGGGATAATAGCTCTTTGCTGAATAGCTGAAGGCTTCTCAAAACCATATGTAGATGCCTCGAAGAAGGACTCCTTTAAATTCATATCATCAAAGTTATCAACGATTTCATTCAGTTGCTCTCGATGACACCATCGGGGTCCATTCCCTCTGGGCCGCCATGTTCTCTGTTGTAAGGTTAAACACCCAAGCAGACACCGCAATATCAGCACCAACAGAAAAGCAATACGTAACTG 9 | >ch162_read1733_template_fail_BYK_CB_ONT_1_FAF05104_A 10 | TCACCTTGAATTAAATTTATATGTGTTGGGCGAGAGGTGGAAACTGGACACTCTTTGTGACTTGTAGGACTTTGGCTATCACACAAGCAGTTATTTTCTCAATACAAGGCGCAAGGTGGACTGGCTCACGGAAAATGCATGCCAGGGACTTCACAGTTTCTGCTCTGCATGGTGACATGGACCAGAAGGAAAGATGTCATCATGGGAATTCCGATCAGGGTCAAGCCGTGTTCTGATCACTGCGGCTTGTTGGCCCGTGGGATTGACGTGCAACAAGTGTCGCCAATTATAAACTACGATCTACCTACCAATCGTGAAAACTATATTCACAGAATTGGCAGGGGTCGATTTGGAGAAAAGGTGTGGCTATAAACTTTGTTACTGAAGACAAGAAGTTCTTCGTATTGAAGACTTTCTACAATACTACAGTGGAAAATATATGAATGTGGCTGACCTAATTTAATCCTGGGATGAATAGTTTTGAATGCAGTGCTCGCTGTTGCTGAATAGGCGATCACAACCATTATTATTCCTTGAGAATATTTGAATCTTGTCTCAATATAAACGGATCTAAATACAGATTCTGATAGCAAAGCGACTTTAGTCCTGAGCCCTTGTGAGAAAGGCATTGGCTATCTTTAGAAGTTAGACTGTTGAAAGTGGGTATAAAAGATGGGGTCTGTAAATCTTTTAGAAATTCATTTACTAGTTATGTAGAAATGGTTGTATTAGATGTTCTCTATCATTTAATAATACTTGTGGACTAAAAGATATAAGTGCTGTATAAAATCAGCCAATTATGTTAAGCGGCATATCTGCCTTTATTGTGTTTGTCATTAGCCTGACTGAGGCCTTTAAAGTGATTTCTTTTTAGAAAATTTGGTGCATTTGGTATTGTATTTATTCAATAAAGTATTTAGTAGTGCTAAGTGTGAACTGGACCTGTTGCTAAGCCTCATAAGCAATCATCCTAGGTAGTTAAACCCCAGTAAAATTGCCATATTGCATGTCTTAATGAAGTTTGAATGTTAAATAAAATTGTATATTCACAAAAAGGTTAAACACCCAAGCAGACGCCGAAGATATAGAGCGTGGGCAAGTACGTTCTGTTTGTGTTTCTTGGACACTGATTGACACGTTACCAAGCAAGCAGAACGTGCGCCTGTCGCTCTGTCGCCGGTCTGCCCAGGTGTTTAACCTTTTTGTGAATATGGTACAACATTCAACTTCATTAGGCATGTGCAATATGTGAGCTACGGGGATACAAGATGTTGCTGCTGAGCTTAACAACTGAGTCGGTCACATAACACCTGGTCATCTTTATTGAATAATGCAAATCGTCGAAGGCCGCAAAAATCGCTGTCGGCTATTAAGCTGTAAACGAATATGGTGACTGTTCTCTGCTGCAGTCCTGTGCTGGCATTCAAACAATATTTCAAGGCCTGGGCA 11 | >ch182_read5053_template_fail_BYK_CB_ONT_1_FAF05104_A 12 | TTTTTGAAGTAAGTGAAATATAAACCTACAATTTTATACATATTATAAATCATAAGCCACAGATAAATATTTCAAGTTTAACAATATAATGAAAACATTTAATTCATAGCAAATCTTTATAACGTAATTTTTAAACTTATCATTATCTAAATAATATGCAGTTTGGGATACAACACAAAAGGCATCACGGTACAGAGCAGATACAGAAGACCATGTCAGAACTTTGCAAGTGCTCATGAAGGCGACGGACGGCACAGAAAACAGCAACAAGCTGCAGGTGAGCTATGGTGTGACCCTAGGGACTCAGGTCCACTGCTGATCAACATAGCTGGGCATATAGCGACAAGCGACAGCGTTTACTCCTTGTATATGTATCTGGGTTATAGGTATCATATTATTTAAACACTAGCACAATTTTGAAAACACATTACTTTAAAACTTATGTATTTAAAAGATTGCTCAGATTTTCAAAGCCATCACATTTACAAAATATTTCCGCAAGCTTGCGATTGCTCCAAAACACGTTTACAAATATAAAGTATGAAGGACCCACAAAGCATATGTACTTTCTGGATATGTACATAGTATAGAGAAAGTCACCAAAGTTAAACCTAACTACTATCATGTAATGTATTATCCTAAGCAATGAAGTCTCTAGTGGTCTGAATACTATATGTATGCGTTAACAGAACTCTACCTTTGTGATTCCTCAGGCAACACAGACATTGTTCAAAGCTTCCCAAACTGTGTTCTACCCAGCCCAAGTGTTGCCTATTTTTCAATAGAGGATGGTGGGGCAGAATAAATGCCACAAGTAGGTCAGTGTGGCAGCTCTGAATGCCGGTATCATGTCTGGGATTGTTGGATAGCTATCTTCTTCATATAAATGTTTTTCATGTTTTGGGTTTCCCTCACATATTAAGAACGGACAGCCATCCACCAGACATCGTGGCGTATGGATGATGGCTGTGGCCTGTTATGGATGACGTTATCGGCAGCATTTTGAAACATGAGAACAAATAATGAGCGTTTCTATACTGGCCTGAGCTGTCATTTTAATGTCAGCTACATAGAAAGCCACACTTGCGTGATACAGTGACTGGATGAGTGGTGATCTCACGCAACCTCCACAGTGCGGGAAGGGCGAAAGCTCAGACCTCGAAGTAGGCTATGGTTTCATATTCACAAGATGAAGAACAATCTCATAGGTTAAAATATGTGAGAAAAGAAGCAAGTAGAACAGTCAGTGCTTCAAAGACAAAGAAAAGAAAACTGACCACTACTGGCATCGGCAAAGCTGAACTCTATTCAACAATGCCAGTGGGCTTCATGTGGAAGTGAAAAGCAGGGATTTAGGCCAGCCCGAGCGCTGAGATCTGGTTTCTGGGATGAGTCAGTTGTAGTGGTGCCTGCTACCTTGAACTTGTGCGTGGTACATGGTCTTGAACAGCCCTTCCGCAGCTCTGAGATACAGGCAAAGAAGGCCTTTTCAGAACCAGGTACAGGCATGTTTGGTGCATGTCAACATCTGACTGCGAGACCTGTTTAGAAGGATCATGTCGGGTCATTTGCTGCGCGAAGGCGAACGGGGACTCTATTTTGAGGCAGGACCTATAGAGTTCAGCTGTGCGGACCTCCTATACAAATCAGGCCTGCTTCCATTCAAGACAGCTTGCAAGCAGACGCGAATGGGAGGCGACAGGCAAGAAGGCTATGACATGTTTCTTGGACACTGATTGACGCATCGAAACGCAAACAGGCGTGCGCGCAATGGCCCTGCCGGCGTCTGCGCGTTGTTTGGCGCGGATGAGCAGGCGGCTATAAGGCTACCTCGCATCTGGCGCGATCGCCATTCGAAAGACACCTTCTACCTCGCCTGAAACTGGGAAAACTGTTCAAGATCTGCCTGCTACCATCAGGCCAACACGAAAACGGCCTGCATCCTCACCTCCACGGTCGTGAAATGATGATCTTCTTTGTCTTTAAGCATGACCTACTATTTCTTTATCCATGGTGTTGTGGTCATCTTCAGTCTGTATGTGTGAAGGTGACGAGATCGTATGACAGCTCGGTGTCGAGCTATGGCTGCTCGATGATTCTCTGTTAAAGCTGGTTGCTGGTGTCGTGGAGCCTAA 13 | >ch206_read1352_template_fail_BYK_CB_ONT_1_FAF05104_A 14 | TTCTTTGGTGATTTTTCTTGGTGCTGTGTGCATCGCGTGTATTATTTATGCTATATTTAATCTTATTCATATAAAACTATTAAAATTTATTTTAGAATAATACCAATGTCATCTTTGTTAGACATCCATTTCAGTTATTATGAAATACTGCAATAATGTGAAAATATTACCGGTAACTTTTCATTTCTGTGTCCTTTGTTTTCTTGGTTTTATTTTTATTTTGACATCAATGGAAAATGGGTTCTATAAGACTGCCTGCTAGTATGAACAGCAATGCAATGCACTTGTAACTCATGAAATAAATGTACATCTTTATCTTACACCCATGATAAGATTCAGTGTTGATTTTCTCTGGATTGGTGTGTCTAGTAGGCACTCATAATCATTTATAGCTTGGCTTCAGACAAAAATGTTCATGGGCGCCTCTACTTCTCCCACTCACCTACCCCATGCACTGCCCTCACAGCAGTTTACGTATATAGCTGGGAAGGTCCTTTCAGCTGCACATGGTGCCATGCATCGTTAATCAGCATTCCAGAAGTCAGAGGCAGGTGGATCTCTGAATGGAAGCAGGCCTGATTTGCATAGGGAGGCCCAAGACAGCTGGAACTATGAGTCCTGTCTCAAAAACAGAGTCCTCTCCCCGTCTGCCTCTCAGCAGCAAATGAATCTGACATGATCTCTAAAACAGGTCTCAAGCTGATCCAGATGTTGATGATGGCGCAAACATGCCCAAGTTAGGATCTGGTTCCCTCTGAAAGGGCCTTCTTGCCTCTGTATCCTAGAGCTGGGAAGGGCTGTTCAAGATCTCATGTACCTGCTACCAAGTTCAAGGTAGCACATACCTCACCTGGCTAAAGAATGGCTGATGTCGACAGATCTCAGTTCTAGGCCTAAATCCCTGCTTTCGCTACATGAAGCCCACTGGCATTGAAGAATAGAGTTCAGCTTTCATTGATACAGTAGTGGTCAGTTTTCTTTTCTTTTGTCTTTTTAAAGCACTGACTGTTCTCCTACTTGTTTCTTTCATATTTTAATCCCATGATTAATTTGCATTCTTGTGAATAGAAACCATAGCCTCATCTTCTCGAGGTCTGAGCTTTTTCTACCCTTCCTGCTTTTCTGTGGAGGGGTTGGTGTGAGATCACTCATAATCCTAGTCACTGTATCTAAGTGTGGCTTTCATGTAGCCATTGTAAATGACAGCTCAGAGCTGTCAGGTATAGAAACGCTCATTATTTTGGTTCATGTTTCTAAAAATGTTGGATAACGTCATCTGCATACTGGTGTCATTGGGTGCCTCTACTATTAAACACATAGATAAGCTGTCTGGTGGATGGGCTTTTGTCAGAATCTTAATATGTGAGGAAAAAGCAAAACATGAAACATTTGGCATGAAGAAGATAGCTGTATCCAACAATCCCAGAGCGCTTGATGATACCGGCATTTCAGAGCTGACACTGACCTACTCTGTGGTGCATTTATTCTGCCCCACCCTCATCCCTCATTTGAGGACAGGCAACACTTGGGCTGGGCATGACTGGTATTGGATTTTGGGAAGCTGTGCTGAATTAACAGCAGCTATCTCTGAGGAATCTAAAGGTAGACACCTACACTGCATACCCATATTATTCAGACCACTTAGGGAGACTTCCATTTGCTTGGATAATATTTACATTAATATTAGTAGTTAGGTTTGAACTTTGGTGACTTCTATACTACAGTAACACATTCATATATGCATATGCTTTGGGTCCTTCATACTACTTTTATATTTGTAAATCAGTGTTTGGAGCAATTTCAAGTTTAAGGAAATATTTTTGTAAATGTGATGGTTTGAAAATCTAGTGGTCTTTTGCTTACAAGTTTTAAAAGCATTTGTCTTTAAATTGTGCTGATGTTGGAATATGATACCCTATAGCGGATGAAACATAGAATGGAGTAAGCAGCTGTCGCTTGTCGTGCTATGCCCAGCTTGGCGTGCTGGATCAGCAGTGGGACTCCAGTCCCTATAGGGTCACACCAGCTCTGCAAGCTTGTTGCCTTTCTGTGCCGTCCGCCCGCCCTTAGAGCACTCAGAAAGTTCTGACATGGCTCTATCTGCTCTGTACTGTGGATGCCTTTTTGGTGTTGTATGTCCCAAACTATAGATTATTTAGGATAATGATAAGTTTAAAGTAATGTTGAAGAAGATTTTATTAAGAATTTAATGTTTTTCATTATATTGATAAAACTTGAACATTTATCTGTGGCTTATGTGGTTGGATTAATATGTAAAATTGTAAGTTTATATTTCATCTTAATTCTTTTGATGTTGTAAACGTGCACTGGTCATTATTTGAATGTTTATGGCACCTGACTTGTAAAAGAATTACAAAAATCCTTAGAATCCAGAGGTTAAACACCAAGCAGACGCGCCGAAGATGAGGCGACAGGCAAGGCAATACGTAGCGA 15 | >ch249_read4487_template_fail_BYK_CB_ONT_1_FAF05104_A 16 | GTTGTACTTCGTTCAGTTACGTATTGCTCTTGCCGCAATAAGCTCTATCTTCGGCGTCTGCTTTTGAAGAAAAAGATCTTTATTAGGTACCATAATTCTCAACACTGTTGTCTTCAGTGTGAATCAACGCCAAACCACACACACACACTGATCTGCCCATGTTGTCATGGGCTCCGGAAACCACCACAAGTGGCTGAGGAAGGGAGGCTCCATCAATACGGGCAGGAGTCTGCAGCTGCCCCAGACGGCACTCTAGTTGGAATGGGGGTGGTATAAAGATAAGTCTGTAAAGATCTTTTTCTTGATTCATTTACTAGTTATGTGAAATGGTTGTATTAGATGTTCTCTATCATTTAATAATATACTTGTGGACATTAAAAGATATAAGTGTTTGTATAAAATCGGCCAATTATGTTAAACTGGCATATCTGCCTTTTATTGTGTTGTCATTAGCCTGACTAGAGGCCTTTAAAGTGATTTCTTTTTAGAGAAAATTTGAATGCATTTGTTGTTGTATTTATTCAATAGTATTTAATTGGTGCTAGCAAGTGTGAACTGGACCTGTTGCTAAACCTCAGCAAGCAATCATCTAGGTAGGGTTAAACCCCAGTAAAATTATACCATATTGCACATGTCTTAATGAAGTTTGAATGTTAAATAAAATTATATTCACTTTAAAGAAAAGGTTAAACACCGTAGACGCCAGTAGGCGATGAGCAAAGGCAATACGTAACC 17 | >ch275_read3171_template_fail_BYK_CB_ONT_1_FAF05104_A 18 | CTGCTTCGTTCGGTGCATTATCTTTTTCAAGTGATATGCAATTTTATTTAACATTCAAACTTCATTAAGACATGTGCAGCCATGGCAATTTTACTGGGGTTTAGCTACCTAGGATGATTGCTTGCTGAGCAGCAACAGGGGTCCGAGTTCACATAGCACTAATTAAATACTTTATTGAATAAATACAATACCAACAAATACTTCAAATTTTCTCTAAAAAGAAATCACTTTGAGCCTCTCTAGATAAGGCTATTGACAAACACAATAAGGCAGATATGCTAGTTTAACATAATTGGCTGATTTTATACAGCATATATCTTTTAGTCACAAGTATATTATTAAATGATGAGAACATCTAATACAGCCATTCATAACTGGTAAATGAATTTTCTAAGAAAAAGATTTTACAGACCCCATCTTTTATACACAACAGTCTAACTCTAAAGGATAAAGCCAATGCCTTTTCATAAGAGCTCAGGACTAAAGTCGCTTTGCTATCAGAATCTGTATTTGTGATCCGTTATGAGCGGTAAAGGACAAGATTCAAATATTCTCAGGAAGCACAATGCGTTGTGATCGCCTATTCAGCAACAAGCGAGCACCATTTCAAACTATCTCATCAGGGATTAAATTAGGTCAGCCACATTCATGGGCATTTCCTCCACTGTAGTATTGTAGAAAGTCTCAATGTCACGAAAGAATCCTCTTGTCTTCTTCAGTAACAGGAAGTTTATAGCCACCTTTCTCCCAAATCGACCCCTCTGCCAATTCTGTGAATATATAGTTTTCACGATTGGTGGTGAATCATGGTTATAACCAAGGACGCTTGTTGCGTCAATCCCACGGGCCAACAAGTCAGTAGTGATCAGAACACGGCTTGACCTGATCGGAATTCCCTCATGATGACATCTCTTTCCTTCTGGTCATGTCGCCATACAGAGCGAAACTGTGAAGTCCCTGGCATGCATTTTCTCCGTGAGCCAGTCACCTTGCGCCTTGTATTGAAAACTAACTGCTTGTGTGATAGTCAAAGTCATACAAGTCACAAAGTGTCAGCTTCCCTCCTCGCTCAACATTAATATAAATTGTTTAATTCCTTCAAGGTCAATTCTTCCTTCTTCACCAGAATTCCGAATTGGATCTCATGAATTTCTTGGTCACTTCTAGCATCAGTTGGCATTGTGGCGAAGCAACACAACCTGAATGCTTGTATTTAATTTCTGGAAAATCTCGCCGATCAGTCCTTAAACCCTCGGCTCAACATTTCATCTGCTTCGTCCAAAACGAACATTTTGATCCATTTGGAGAAAGGTATCTTCTGTTTAGCATATCAAACCTCCTGGGTACCAACAACAATGTGAGGCTTCAGCCTGCAGCTTCTGCATTTCATTTCGAACATTTGTTCCTCCAATACAGAAGCATGACAAGTTGCTCCCATATGATCTCAAGAGCCAAAATTACCTTTTGGATCTGTTGGTAGTTCTCTGGTGGGGCCAATACTAGTGCTTGGAATATCCTTGAACTCAATCTCCAACTGTTGCAGGACAAATAGCAAATGTGGCTGTCTTGCCAGTACCTGACTGAGCTTGAGCAATCACATCATACCCTTTAATACAGGGATAATGGCTCTTTGCTGAATAGCTGAAGGCTTCTCAAAACCATATGCATAGATGCCTCGAAGAAGGACTTTAATTCATATCATCAAAGTTATCAGCAATTTCATTCCAGTTGCTCGATGACGCAATGGTCCATTCCCTCTGGGCCGCCATGTTCTCTGTTGTAATCCGCGGAGCGGATTAAACACCAAGCGAACGCCGCAATATCAGCACCAAGCAATACGTAGCGC 19 | >ch322_read2883_template_fail_BYK_CB_ONT_1_FAF05104_A 20 | CGCCTTATGCCTTGAACAGATCATGTCTGGTGGCTCGCAGTTACAACAGAGAACATGGCGGCCAGAGAATGGACCCCGATGGTGTCATCGAGCAACTGGAATGAAATTGTTGATAACTTTGATGATATGAATTTAAAGGTCCCTTCTTCGAGGCATCGCCATATGGTTTTGAGAAGCCTTCTTCTATTCAGCAAGAGCTGTATCCTTGTATTAAAGGGTATGATGTGATTGTAAGCTCAGTCAGGTACTGGCGAGCAGCCACATTTGCTATTTCCATCCTGCAACAGTTGGAATTGAGTTCAAGGAGACCCAAGCACTATTGGCCCCCTGAAGAACTGGCTCAACAGATCCAAGGTATTTTGGCTCTTGGAATTATATAGAACAACTTGTCATGCTTGCATTGGAGGAACAAATGTTCGAAATGAAATGCAAGTTGCAGGCTGAAGCCCTCCCATTGTTGTTACTCCAGGGAGAGTGTTTGATATGCTAAACAGAATACCTTTCTCCAAAATGGATCAAAATATTCGTTTTGGACGAAGCAGATGAAATGTTGAGCCGAGGTTTAGAGATCAGATCTGAATTTTCAGAAATTAAATACAAGCATTCAGGTTGTGTTGCTCTACAATGCCAACTGATGTGCTAAGTGACCAAGAAATTCATGAGATCAATTCAGGTCTGGTGAAGAAGGAAAATTGACCCTTGAAAAGGTAACAATTTATATAATGTTGAGCAGAGTAGAGCTGGACACTGCTGTGACTTGTATGAGACTTTGGCTATCCACAAGCAGTTATTTTTCTCAATACAAGGCGCAAGGTGGACTGGCTCACGGAAAATGCATGCCAGGGACTTCACAGTTTCTGCTCTGCATGGTGACATGGACCAGAAGATGTCATCATGAAATTCCGATCAGGGTCAAGCCGTGTTCTGATCACTACTGACTTGTTGGCCGTGAGTTGACGTGCAACAAGTGTCCTTGGTTAACAAACTACGATCACCTACCAATCGTGAAAACTATATTCACAGAATTGGCAGAAGTCGATTTGGGAGAAAGGTGTGGCTATAAACTTTGTTACTGAAAGACAGAAGGATTCTTCGTGACATTGAACTTTCTACAATATACTACAGTGAATGCCCATGAATGGCTGACCTAATTTAATCCCTGAGATAAGTGATTTTGAATGCAGTGCCCGCTGTTGCTGAATAGGCGATCTAACGTGCATTGTGCTTCATGAAATATTTGAATCTTGTCTCAATGCTCATAACGGATCTAAATACAGGTCTGATAGCAAAGCGACTTTAGTCCTGAGCTCTTGTGAGAAAGGCATTGGCTTTATCTTAGGATTAGATCAGTTGTTTATTGTTGTTGTTATTGTTGGTATAAAAGATGGGTCTGTAAAATCTTTTCTTAAATTCATTTACTAGTTATGTAGAAATGGTTGTATTAGATGTTATATCATTTAATAATATACTTGTGGACTACAAAAGATATAAGTGCTGTATAAAATCAGCCAATTATATTAAACTATATCTGCCTTTATTGTGTTGTCATTAGCCTGACTAGAGCCTTTAAAGTGATTTCTTTTAGAAAATTTGAATGCATTTTGGTATTGTATTTATTCAATAAAGTATTTAATTGGTACTAGTGAACTGGACCCTGTTGCTAAGCCTCAGCAAGCAATCATCTAGGTAGGTTAAACCCCAGTAAAATTGCCATATTGCACATGTCTTAATGAAGTGAATGTTAAATAAAATTGTATATTCACAAAAGGTTAAACACCCAAGCAGACGCCGCAATATCGGCACCAGCAGAAAACGTAGCGCT 21 | >ch384_read10549_template_fail_BYK_CB_ONT_1_FAF05104_A 22 | TAGTATACTTCGTTCAGTTACGTATTATCTTTTCAAGTGAATATACAATTTTATTTAACATTCAAACTTCATTAAGACATATGCAATATGGCAATTTTACTGAAGTTTAACCCTACCTAGGATAGATTTAACTTGCTACGGGAAGCAACAGCTTGGTCAGTTCACTTAGCACTAATTAAATGCTTTATTGAATAAATACAATACCAACAAAATGCATTTCAAATTTTCTCTAAAAATCTTTAAAGGCCTCTCTAGTCGGGCTAATAAACACAATAAAGGCAGATATGCCAGTTTAACATAATTGGCTGATTTTATACAGCACTTATATCTTTTAGTCACAAGTATATTATTAAATGATAGAGAACATCTAATACAACCATTCTACATAACTAGTAAATGAATTTCTAAGAAAAAGATTTTACAGACCCCATCTTTTATACCCCCAACAGTCTAACTCTAAAGAGATAGCCAATACCTTTCTCACAAGAGCTCAGGACTAAAATCGCTTTGCTATCAGAATCTGTATTTGTGATCCCGTTATGAGCATTGAGACAAATTCAAATATTCTCTCAAGGAAGCACAATGCACGTTGTGATCGCCTATTCAGCAACAGCGAGCACTGCATTCAAAACCTATCTCATCGAGTTAAATTAGGTCAGCCACATTCATGGGCATTCCTCCACTGTAGTATTGTAGAAGTCTCAATGTCACGAAGAATCCTCTTGTCTTCTTCCAGTAACAAAGTTTATAGCCACACCTTTCCTCCCCGTCGACCCCTCTGCCAATTCTGTGAATATAGTTTCACGATTGTGCAGGTAGATCATGGTTATAACCAAGGACACTTGTTGCACGGCATCAATCCCACGGGCCAACAAGTCAGTAGTGATCAAGAACACGGCTTGACCCTGATCAGAATTCCCTCATGATGACATCTCTTTCCTTCTGGTCCATGTCACCATGCAGGCAAAACTGTGAAGTCCTGGCATGCATTTTCTCAAGTAGGCCAATCCACCTTGCCTTGTATTGAAAATAACTGCCTGTGTGATAGTCAAAGTCTCATACAAAGTCACAAAGTGTCAGCTTTACACTCTAAATCAAACAAAATTCCACTGTTTTAAGCAAAACTAATGCAGTGGTTCTCAATTCTATTGCTGCAGCCCTCTAATAGTTTCCCCATGATTTGTTGCTATTCGTAACTGTTGACTGTAGTATCACGATATGCTACCCACAAAATCATGATGATCTCAAGCTGAAACTGTATTTATTGCTGGTCTAAATACTTGTGCACCGAATCTTATGACATGGGACACTGGACATTAGAAGGTCCACCCCAGTCTTGCAATCGGTGATTACTTTTCTTCTTGTATTGTGTACTCTGCTAGCATTGTACAAAGCAAGATGAATCTTAGTTCCTGCAATTTTAGCACAATTGCC 23 | >ch428_read2403_template_fail_BYK_CB_ONT_1_FAF05104_A 24 | TGTACTTCGTTCAGTGTGCTTTTCTGTTGGTGCTGATCTTTTTGAATAAAGATGTACATTTATTTCCATGAGTTACAAATTACATTGCATTGCTGTTGTTCATACTAGCAGGCAGTCTTTATGAACCCATTTTCCATTGATGTCAAATAAAAATAAAACCAAGAAAACAAAGGACAGCTGAAATGAAAAGTTAACAATATAAATTATCTTTTCTACATTATCTTCTGGGTAGCATTCTCTTCATGGAATACAACTCAGAGGTCGCGATTTCGTTCAGTAAAGGATGTGACACTTGGACATTGGTCTCTGCACAAAACCTTAACAGTCGTAAGAGCACAGTAAGTATTGAACAGGCATGTTCTCATCAGAGCAGTGAGGGGTCTGGAGGCGCCGCACTGGGACAACACCATCAACTCCACAGCACACGGTGGGAACAAAATATGGAGAAGGAAATAAAAGAGAGCTCCGAACAGTTTACTGCTAGCCTTTTGACATTAACTCCAGCGGTACCGAAACTATCTCTAACTTGGCAACAGAGCAGAGCTTCATGAGGAGAGCCCATCAAGGCTTTATTACATACAAGTCATTTTCTGCTGAAAGTGAGCTCGAGATGCTGACATGCTGCAGGAGAAAAGAACTGTACACTTTCTCATTTAAGATGCAGTGTTCCTTTTAGCTAAATCCTAGGTAGTGAATGTGGTTACGCAAACAGCCTTCTGTGTGGCTGTTCAAGGTTATATGGAGCTCTGCCCTGAACATCTAGGTCGCTTGCTCTCTACTGTTCTGAAACTTCCACAGCTTTGTGTGTCCACCTTTCAGTAAATGACAAGCACAGCTGTTACTTTCACTCTCTGGCTAGGATGGCATGTAACATTTTGTGAGCTCCACCTCCATTACGATACAAGTAACCATATAAAATCTGCAGAACAACAGATACTGCTGGTGCAATCTTCCTTCTATTTCATTTAAACAGTCAAATTTTAAAAGCTGGGCAAATAGTACATACCTTAAATTTAGGATAACATGATACACAGAAAATAGCCCCAAGGACTCTGCAACAGAAGCCCTGCCCTGGCCTGTAATACCATCAGGATACCCTAGCTCCTACTGTCTGAGTAGACAGTTACTAATAACAGATGAAGTGCAAATTTGGGCTGTCACTGACTTAGGTGCCTTATGTCTAGCCCTACTGCGAAGTGAGGTGCCAGTAACTGGAACAAGTCTGGAAGACTCTCCCTCTGATGAGGCTGATGGTTCAGCACATGATGGAGAAAGGATTCAGAACAGGAGGAACACTGCATCTTTCAAACCAGAAAGCAGTTATTCAAACAACGGTAAACAGTTAAGTGCATTGAAAGTGCACACTGCTGCACGGCCTGCTGCCAGAACTCTCTGGACTGAGGAAGGAATTCCGTGAAAGTATACAAAAGCTGCTACCGAAACGCCCTCTGAAGGCTCAAAGGAGTCAAGGTGGGTAAGCTGAGACAGGCTGAAGGCAGGACCAGGGTAAAGAGCACAGGACACCGACATCTGAACTCGTCAGTCCTCTGAGCCCTTGTCGGTAATTTAAGATCTGTATCCCGGCTGGAATCTAGCGGTCTTCAAAATAGGTTCTTTGAGCTACTCCTTAGGTAAGTCGATCGTCAACTCCATGTCAAACTTGAATGGCGCTTCAGCAATGGGCTCATCATGGGTCATAATACTGCTCCAGGTATGGGTGGTGAAACCTGTTCAACTAATCCTCTTGTGAGGGTTAAATGTCAACATTTATCCAGTAAATCAGAGCTTTAGGTCAGCATTTGAACAACCTGTTCCATGGCACCTTATTTTGTGCGGGAAAGCAAATAGTTTCTAGCTTTTAAATTTATTGCCATTCAAGCGGTCTTCCTGTGATGGAGATCAAGAATACCGGATGATTCAGCTGGTCAAAGGTAATGCTCCTGGGAAGATATAGGCCTGTTGAACAGCATCTCTGCCAGGATGCAGCCCTGAACCAAATATCAATGGACTTGTGCAACCTTGGAATTCAACGCATAATTTCTGGAGCTCTGTACCAACGTGTGGCTACGTACTCTGTAAAGAACCCTGTGGTCATGATCTGGATCTGCAACACGGGCAAGGCCAAAGTCACAGATCTTGAGATAAGTGGTGTTCAGCAGGAGTTGGAAGGCTTGAGGTCACGGTGCAGAACGTTAGCTGAATGGACTATACTTTAGCCCTCAGGATCTGATAAAGAAAATAGCAGATGTGGTCATTGCTGGTGCTGTGTCTTCAGGCTTGTAAAGGTCCGTCTCATGAGGTCCTGTACTATATATACATCTTTCATTTGCTCAATGGTTGGTGCGGATGATGTCATTGATGCCAATGATGTTCTCATCTGAAGCGCAGTAAGATTTTATCTCTCTTAGGGTTCTTTGGCTGGTCTGGTGCTCAAGGACTGATTTTCTTGACTAACGCTCGAACTTTGTTGAGATTATCATAAGCAGAGCAAACCATGCCGTAGGCGCCTTCTCAAATGTACGAGAGGTTGGTGTAGCGCGGCCCTACATCGAACACCTGCCCGCGGATTCATCCCGGGCCGCCGCCGCCGCCGCCGCCGGCCGGCTGCACAGCCGCCGCCGCAGCAATGCGTAACTTCG 25 | >ch435_read29295_template_fail_BYK_CB_ONT_1_FAF05104_A 26 | TCGCCGTTTCGGTTTACGTGTATCTGCCTGTCGCTCTATCTTTGACGTCAAGATCATGTCTGGTGGCTCCGCGGTACAACAGAGAACATGGCGGCCCAGAGAATGGGCCCGATGTGTCATCAGAGCAACTGGAATGAAGTGTGTTGTCTTTGATGATATGAATTTAAAGGTCCTTCTTCGAGGCATCTATGCATATGGTTTGAAGCCTTCAGCTATTCCGGCAAAAACTATTATCCTTGTATTAAAGGGTATGATGTGATTAACAAGCTCAGTCAGGTACTGGCAAGACAATACATTTGCTATTTCATCCTGCAACAGTTGGAGATTGGTTCAAGGGAACCCAAGCACTAATGTGGCCCCCTGAAGGCAATAACAGATCAAAAGGTATTTTGAAACATGGAATTATATGAACGCTTGTCATGCTTGCATTGGAGAATAATGTTCGAAATAGAAATGAAGTTACGGAGAAGCAAGCCTCATTGTTGTTGGTACTCAGGGAGAATATTTGATGCTAAACAGAAAGATACACTATAAAATGGATCAAAATGTTCGTTTTGGGCCAAACAGGAGATGGAAATATTGAGCGAGGGTTTAAGGATCAGATCTAAGTTTTCAGAAATTAATGGCCTTCAGGTTGTGTGCTCGCCACAATACCGCTTGATGCTAGAGTGACGAAATTCATGAGAGATCCAATTCAATTCTGGTGAAGAAGGAAAAATTGACATGAAGAATTAAACAATTTATATTAATGTTGAGCGAGAGGTGGAAGCTGGACACTCTTTGTGACTTAATATGGGAACACGGCTATCACACAAGCAGTTATTTCTCAATACAAGGCGGCGCAAGGTGGACTGGCTCGCGGAGAAAATGCATGCCAGGAACTACGGTTTCTGCTCTGCATGGTGACATGGACCAGAAGAAGAGATGTCATCATGAGAATTCGATCAGGGTCAAGCCGTGTTCTGATCACTGACTTGTTGGCCGTGAAGTTGACGTGCAACAAATGTCATGGTTATAAACTGATCTACCAATCGTGAAAACTATATTCTAAATTGGCAGGGGAGGTCGGTGGGAGAAAGGTGTGTATAAACTTTGTTACTGAAGAAGACAAGAGGATTCTTCATGACGAGACTTTCTACAATACTACAGTGGAAATGCCCATAGATGTGGCTGACCTAATTTAATCCCTGGGATGAGATGGTTTAGTCTACAGTGCTCATGTTGCTGAATAGGCGATCCTAGCGATGCAGCTATGCTTCGCGAATATTTGAATCTTGTCATAATGCTCATATTGGATCACAAATACAGATTCTGATAGCAAAACGACTTTAGTCCTGAGCTCTTGTGAGAAGGCATTGGCTTTATCCTCTTTAGAGTTAGACTGTTGGGTGGAGTATAAAAGATGGGTCTGTAAAATCTTTTCTTAGAATTATTTATATGATTATGTAAATGGTTGTATTAGATGTTCTATCATTTAATAATAACGTGGACTAAAAGATATAAGTGCTGTATAAAATCAGCCAATTATGTTAAACTGGCATATCTGCCTTTATTGTGTTGTCATTAGCTGACTGAGGCCTTTAAAGTGTTCTTTTTAGAAAATTTGAATGCATTTTGTTGGTATTGTATTTATTCAATAAAGTATTTAATTAGTGCTAAGTGTGAACTGGACTGTTGCTAAGCCTCAGCAAATAATCATCTAGGTAGGGTTAAACCCCAGTAAAATTGCCATATTGCACATGTCTTAATGAAGTTTGGATGTTAAATAAATTGTATATTCACTTTAAAAAGTTAAGCAAGCAGACGCCGCAATATCGCACCAACAGAAAACGTTCTGTTTATGTTTCTTAATTTTAATTGACACGGAC 27 | >ch46_read337_template_fail_BYK_CB_ONT_1_FAF05104_A 28 | TACATTACTTCGTTACGTATTGCTCTTGCCTGTCGCTCTTCAGGGAGGTGTTTGATATGCTAAACAGAAGATACCTTTCTCCAAAATGATCAAAATGTTCGTTTTGGACAAAGCAGATGAAATGTTGAGCCGAGGGTTTAAGGATCAGATCTATGAGATTTTCAGAGAGTGTCTAAGCGTTGTTCAGGTAAAGTTTATTTCTTCTATTTATTTGGGGTTAGGTTTCGTAGGTACAGTTACAATAAAACTGAAGTGTTCTGTGTCGTTTCCCCTGCTTAAAGCATTTGATACATTACTATCTCTGCCTAGCTACCATACTAAGGCAGAGACGCTTCAGTTCAAACATTTTCCCGTCTAGCATTTGCTAAAAAGATTATACAAACTAAACAGATAGAACAAGCATTTTATTTTTAAAGTTGTGTTGCTTTCTTACAATGCCAACTGATGTGCTAGAAGTGACCAAAGAAATTCATAGAGATCAATTCGAATTCTGGTGAAGAAGGAAGAATTGACCATGAAGGAATTAAACAATTTATATTAATGTTGAGCGAGAGGTAATTTTGTTACAAGCTTTTTCACCTTCTTGTAAACTGTGCTAAAATTGCAGGAACTAAGATTCTATCTTGGTTTTGTAATAATGCTAGCGGAGCACACAAGAAGAAAAGTACTACATGGATTGCAAAGAACTGGGGTGGACCTCTTTCTTAATGTCAGTGTCCATATGTCATAAGATTCAGTGCAATAGAGTATTTAGACCAGCAATAAATACTCAGCTTGGAATCATCATGATCTTTGGTGTATATCAGATACTACAGTCAGCAGTTACGAGCCGCTTAACAAATTTATGGTGGGGAAACTATTAGAGCTGCAGCAATAAATTGAACCACTGCTTTGGTTTGTTTAAAATCATGGGAATTTTGTTTGATTTAGGAGTGGAAGCTGGACACTCTTTGTGACTTGTATGAGATTTTGGCTATCACAAGCAGTTATTTTCTCAATACAAGGCGCAAGGTGGACTGGCTCACAGGAGAAAATGCATGCCAGGGACTTCACAGTTTCTGCTCTGCATGGTGACATGGACCAGAAGGAAGATGTCATCATGAGGGAATTCGATCAGGGTCAAGCCGTGTTCTGATCACTACTGACTTGTTGGCCCGTGGGATTGACGTGCAACAAGTGTCCTTGGTTATAAACTACGATCTACCTGCAATCATTGGTATTGCATATTCACAGGTGAGAAAGCATCTCCAATATTTGTGAGTAAGTCAAACACTTTTCTACGGTGTTTGTATACAAGGTACGGCAACTCAGGGCAGGTGGCAGTAAAGACGGTAATTGCAAGATCTATAAACATTGTACTTGGAAGATTGGTCAGAACAAAGTGGGAAGAGCTTGTAATGTAGTAGGTTGTGTACACACATGCAGATGAGGTTAGGTGCTTTAGACTACTTTTGGTCTTAGCCTATGTTTTGAGCCTTTTAGTGGTTTCTTGTGTCCATGTGCTTTTCTTGGTGATTGATTCTATACGGTTAGGATTTTCTTGGTGTTGTGGTAGCATTTTGACTGATTCCTGGTTTAGTTATAGTGGCTTTTGTCCCTAAATAAATTGGATTGTACTTTGTTATGATGTAAAGACTTTTAAAAATACAGGAGTCGGTAGCGGCAGTTGATGACGGTGGCACTCAGAAACGGCGTTGACGTAATTTAGGACGATGGAATCATAAGCGAAACAGCACACTGTTTGAATGAAACGAGTCAGTATTTATGTTGATTTTCTTTGTCTTGAGTTATTTGATATTTTAAGTTGTTTCCAACCCAAGGCATTTTGTATGTTAGCATTTCTGATAGGGAGTGATAGGTGGTTTGTATTGTCTGGTACTTTAGGAAGATGAGAGCTGTTTTGGGTAACACGTTTGAGTAATTTGGTACACATAACGTGTGAAAAGCTGAGCAAAGCAGTGATAGTTTGGGTTACCATACCAAATACTTGCTTTCACCTGGAAAAATAAGTTTTAGAAAATAGTTAACCTTACAGCATTTGTATTTACAGTTCAAGTGCGTCGAAATGGTTGTATAACTGTTTCTTTATTCCTGGTGTTTACATTTGTCCCAGGCTGACAACTGCTCCTGGCTGTACTGGTATGGGCTTTAATTTCACCCAAACACAATACTGTCATCTGCTTTATTGTAATGCTCAAGGCGCCTATCTCATTTGCAGCCAGACAAGCCTTGGTCTTTGTGGTGGCACTATCTGCAAGGAAGATGTTTTCCACCTGGATATTGGTTTTATCAGCTAGTGCTCCACTTAAAAGCACAGACTATAATCCTGATAAGTAAACAGCAGCTGATGGTTAACAAATTGGATCGTCATATTCGGTAGCTTTAAATAGTACTAAAGTAACGCTGACTGTTGGTTTCTTTCTCTTACACAGAATTGGCAGGGGTCGATTTGGGAGGAAAGGTGTGGCTATAAACTTTGTTACTGAAGACAAGAGATTCTTCGTGACATTGAGACTTTCTACAATACTACAGTGGAGGAATGCCCATGAATGTGGCTGACCTAATTTAATCCTGGGATGAGATAGTTTTGAATGCAGTGCTCGCTGTTGCTGATAGGCGATCACAACGTGCATTATTAGCTTCCTTGAGAATATTTGAATCTGTCTCAATGTAACGGATCACAAATACAGATTCTGATAGCAAGCGACTTTAGTCCTGAGCTCTTGTGAGGAAAGGCATTGGCTTCCTCTTTAGAGGATGAACTGTTGAGTGGTATAAAGATGGGGTCTGTAAAATCTTTTCTTAGAAATTCTTTACTAGTTATGTAGAAATGGTTGTATTAGATGTTCTCTATCGTTAATAATATATACTTGTGGACTAAAGATATAAGTGCTGTATAAATCAATAATTATGTAAACTAGCATATCTGCCTTTATTGTGTTTGTCATTATACAATAGAGGCCTTTAAAGTGGTTTTTAGAAAATTTGAACATTTTGTTGGTATTGTATTTATTCAATAAAGTATTTAATTAGTGCTAAGTGTGAACTGGACCCTGTTGTAAGCCTCAGCAAGCAATCATCTAGGTGAGGTTAAGCCCCAGTAGAGTATATATTGCATGTCTTAATGAAGTTTGAATGTTAAATAAGATTGTATATTCACTTTAAAAAGGTTAAACACCCAAGCAGACGCCGAAGATAGAGCGACGAGCAATGTAACTTG 29 | >ch482_read5787_template_fail_BYK_CB_ONT_1_FAF05104_A 30 | ACGTGTACCGTTCAGTTGTGTGCTGTTGAATATTTTTAAGTACATAGCTATCTTCAGACACTCCAGAAGGGAGTCAGATCTCGTACAGTTGGTTGTGAGCCACCATGTGGTTGCTGGGATTTGAACTTCGGACCTTCGGAAGAGCAGTCGAATTGCTCTTACCCCACTGAACCATCTCCACCAGCCCCAGGTCTTGTTTTTATAATATAAATGTTTCTCTCCCTTCTCTGGAGACACAGCAACTGGTGACTTCCTGAAGACATGAATGGTCATGAGGCAGCCAGTTGCTGGCTTTACAGGTCTACTGGAGGTCAGTGTTTTGCCTAATTCATTTGAGATTAGTGAATCGATAAAAGACACAGACAAGTTTCTTTCACTGGGTGCTTTTGTTTGTTTTTAGGAACAAGGTCCTAACCCAACCTCAGCTAGCCTGGTCTATAAGACCAGGTGGTCTTAATTTCACTCAGAGTGCTAGAATTAAGTGTTTGCTGCCATGCCTTGCCTCTATTAGATGACTTGTAACTGAAAACTGCCAGAATCTCTTTGCTTCCTCCTTCCTGGCTACTTAATATCATTGTAAAGGAAATGCATTTGAAGTAACAGTCTCCAGACTCTTCAAATACCTCCTGACTTTGTGCTGTCCACAGACTAGTAAAGGCTCCTATGGCCAGGATGCCTATATTTAAAATCCACAGACACAGGGACTCGCCTCACCTCATCTCTCTGCGGTAGTGCCCACAACCCACAGTTTTATGTGTACTATATTTGGCCCTGGTCCTCTGCTCTTTGTGCCTACTTTGCTAGCCCAATAAATGGCATCCAATTTCCATCTCAGGGTCTTCCTCAGCATTAGAAAAGATATCTTACAAGGCATGCGCTTGGTGTCTCTCCTTCACATGAGGAATATATCAAGGACTTCACCATTTGCTTCCAGCTGCTGTCACCTCTTTCCTCCCAGAGTTAAACACCCAAGCAGACGCCGGAGTAGAGCGACAGGCAAGACGTTCTGTTTATGTTTCTTGGACACTGATTGACACCT 31 | >ch488_read2761_template_fail_BYK_CB_ONT_1_FAF05104_A 32 | GTTCTTCGTTCAGTTACGTGGTACTACTTGCCTGTAAATCCTGCTTTTCACTTCCACACATGAAGCCCGCTGGCATTGAAGGAGATAGAGGTTAACTCATTGATACAGTAGTGAACAGTTTTCCTTTTCTTTTGTCTTTTTAAAGCGCATTGTTCTCCTACTTGCTTCTTTTCATATTTTTAATCCCATGTAAGTTAATTTGCATTCTTGTGAACCGAAACATAGCCTCATCTTCTCGAGTCTGAGCTTTCTGCCCTTCCTGGCACTGTGGAGGGGTTGGTGTGAGATCACTCACTTCATCTAGTCACTGTATCTAAGTGTGGCTTTCTTAGCCGACAAATGACAGCTCGAGCTGTCAGGTATAGAAACGCTCATTATTTTGGTTCTCATGTTCTAAAAATGTTTGGATAACGTCATCTGCATACTACTGGTGTCGTGGGTGCCTCTACTATTCATACATAGATAAGCTGTCGGTGGATGGGCTTTTTGTCAAGTCTTAATGTGGGGAAAAACCCAGAAACATGAAAACATTTAGCATGAAGATGGCTATCAACAATCCCAGAGCGCTTGATACCGGCATTCGACTGACGCTGACCTACTCTGTGGTGCATTTATTCTGCCCACCCTCATCCCTCTCATTTGAGGACAGGCAACACTTGGGCTGGGCGACTGTTAGTTTGGGAAGCTATTATTGAATTAACAGCAGCTATCTCCTGAGAATCACAAGGTGAACACCTACACTGCATGCCACATAGTATTCAGACCATAGGGAGACTTCCATTTGCTTAGGATAATATTTACATTAATATTAGTAGTTAGGTTTGAACTTTTGGTGACTTCTATACTACGGTAACACATTCATATATGCATATGCTTTGGGTCCTTCATACTACTTTTATATTTGTAAATCAGTGTTTTGGAGCAATTCAAGTTTAAAGAAATATTTTGTAAATGTGATGGTTTGAAAATCTGAGCAATTCTTTTGCTACAAGTTTTTAAAGCATTTGTGCTTTAGGTATTGAATGTTTGAATGATACCTATAACCCAGATAAGAAACATAAGAATGGAGTAAACGCGCTGTCGCTTGTCGTGCGCTATGCCCAGCTTGGCGTGGATCAGCAGTGGGACTCAGGTCCTAAGGTCACACCAGCTCACCTGCAGCGTGTTGCCTTTCTGTACCGTCCGCCCCGCCCTTCAGAGCGCACTCCAGAAAGTTCTGACATGGCTCTGTATCTGCTCTGTATGGATGCCTTTTGGTGTTATCCCAAACTGCATAGATTATTTAGAATAATGATAAGTTTAAAAATTAATGTTGAAGAAAGATTTATTAAGAATTTAAATCTTTTCATTATATTATTAAACGAACATTTATCTGTGTATGTGATTTGGTTAATATGTATAAAAATTGTAAGAGGTTTATATTTCATCTTAATTCTTTTGATGTTGTAAACGTGCTTTTCAATTCTTATTTGAATGTTTATGGCACCTGACTTGTAAGGTACAAAAATCACTTACGAAAAAGGTTAAACGCGAAGCAGACGCCGCAATATCAGCACCAACAGAAAGCAATACGTAACTAT 33 | >ch487_read8370_template_fail_BYK_CB_ONT_1_FAF05104_A 34 | GTGTACTTCGTTCGGTGCGTATTGCCTGTATTTACCTGGAAGGCTGGCAGTGGTTCGGCGGTTAATTCTCTCACTCTGTGTTGTCCTCCTTCCTCGTTCCCGATCGCCGGCGGGGCGGCTACACGGGCGGCAGCGCGGTTCCTGCAGGAAGCGCGGCATAGAATCGAGCGGCGGCCGCGAAGCGTCGAACCGAACGCGGCGGCGGCGACTTGACCGGCGGCGGCTGTGCAGCAACATGGCGGCGGCGGCGGCGGCGGGCCCGGAAGTGGTCCGCGGGCAGGTGTTCGACGTAGGGCCGCTACACCAGCCTCTCGTACGTCGGAGAAGGCGCCTACGGCATGGTTCGCTCTGTATGATAATCTCAACAAAGTTCGAGTTGCTATCAAGAAAATCAGTCGCTTTTGAGCGCAGACCTGTCAAAGAACCTAAGAGATAAAAACCTTGCGCTTCAGACATAGAACATCATTGGCATCAATGACATCATCCGAGCATAACCATTGAGCAAATGAAGATGTATATAGTACAGGACCTCTTGGGCGGACCTTTACAAGCTCTTGAAGACACAGCGCCTCAGCAATGACCACATCTGCTATTTTCTTTATCAGATCCTGGGAAGGGCTAAAGTATATCCATTCGTAACGTTCTGCACCGTGACCTCAAGCCTTCAACCTCCTGCTGAACACCGCGTGATCTCAAGATCTGTGACTTTGGCCTTGCCGTGTTGCCAGATCAGGTCATGATCACAGGGTTCTTGACAGAGTACGTAGCCACACGTTGGTACAGAGTCCAGAAATTATGTTGAATTCAAAGGGTTATACCAAGTCCATTGATATTCTTTGGTCTGGGCTGCATCTGGCAGAGATGCTCAGCAGGCCTATCTTCCCGGAAAGCATTACCTTGATGGCTGAATACATCACTGAAGGTATTCTTGGATCTCCATCACGGAAGATCTGAATTGTATAATAGAATTTAAAAGCTAGAAACTGTTGCTTTCTCCTCGCACAAAAATAAGGTGCCATGGAACAGGTTGTTTCCCAAATGCGACTCCAAAGCTCTGGATTTACTGGATAAAATGTTGACATTTAACCCTCACAAGAGGATTGAAGTTGAACAAGGCTCTGGCCCTTGTACCTGGAGCAGTGTATATTGGTGATGAGCCCATTGCTGAAGCGCCATTCCAAAGTTTGACATGGAGTTGGACGACTTACCTGGAGGCTCGAAAGAACTCATTTTTGAAGAGACTGCTAGATTCCAGCCGGGATGCAGATGCTTAAGTGGTCAGGACAGGGCTCGAAGGACTGGACGAGTTCAGATGTCGGTGTCCCCAGTTCTTTACCCTGGTCCTGTCTTCCAGCCCGTCTCAGCTTACCCACTCTTGACTCCTTTGAGCCTTTCCGGGGCGGTTTCTGGTAGTAGCAGCTTTATACTTTCACGGAATTCCTTCGGTCCAGAGTTCTGGCAGCAGGCCGATGCAGCAGTGTGCACTTTCAATGCACTTAACTGCTGTTGTTTAGTCTGAACTTTGCTTTCTGGTTTGAAAGATGCAGTGGTTCCTCCTGTTCTGAATCGCTTTCTCCATATCATAGCTGCTGAACCATCAGCCAGCCTCATCAGAGAAGTCTTTTCCAGACTTGTTCCAGTTACTGGCACCTCACTTCTGATAGGGAGGCTGGGACATAAGGCACCTTAAGTCAGTGACAGCTCAAATTTGCACTTCTATCTGTTATTTAACGCTGTCTGCCTAGACAGTAGGAGCTAATGGTATCCCTGGATGGTATTACAGGCTACAGGGCAGGGGCTTCTGTTGCAGAGGTCACTGGGGCTATTTTCCTGTGTATCATGTTAATTCCTAAATTAGGTATGTACTATTTGCCCAGCTTTTAAAATTTGATCATTGTTTTAAATGAAATGGAAGCATTTTAACAGTATCTGGTATTGCAATTTTATATGGTTACTTGTATCGTAATGGAGGTGGAGCTCTTGCCAAAATGTTACATGCTATCATAGCCAGAGAGTAAGTAACAGCTGATAGCATTCATTTACTGAAGGTGGACACACAAAGCGCTGTGGAAGTTTCAGAACAGTGAAACAAGCAGCCTAGATGTTCAGGAGCAGAGCTCCATATAACTATTGTGAACAGCCACGAAGGCTGTTGCGTAACCACATTCACTACCTAGGGATTTGGCTAAAAGGAACACTGCCTTTAAATGAGAAAGTGTACAGTTCTTCTCCTGCAACATGTCAGCATCTCGACTCACTTTTCAGCAGTGTAATGACATGTAATAAAGCCTTGATGGGCTCTCCTCATGACTCTGCTCTGTTGCCAAGTTAGAATGTTTCTGGTACTTTTCTTTGAGTTAATGTCATAAAAGGCTAGCAGTAACTGTTCGAGCTCTCTTTATTTCCTTCTCTCCTATATTTTGTTCCTGCACTGTGTGTGGAGTTGATGGTGTTATCCCGGTGCGGTGCCTCCAGACCCCCTCACTGCTCTGATGAAATATGCCTTGTTCAATACCTTGTATCTTGCATGACTGTTGAGTTTCTGTTGTGCGAGACCAATGTCAAGTGTCACATCCTTTGATTGAACGAAATCTGTTGTATTTACTCTGAGTTGTATTCATGAAAGTACTGCAGAAGATAATGTAAGAAAGATAATTATATTGTTAACTTTTCATTTCTCAGCTGTCCTTTTGTTTTTCTTAGTTTTATTTTATTTTGACATCAATGGAAAATGGGTTCTATAAAAGACTGCCTGCTAGTGAACAGCAATGCAATGCACTTGTAACTCATGAAATAATCGCCATCTTTATCTTTACACCCAAGGTTAAACGAAGCAGACGCCGCAATATCCCTTTCAACGAAAACGTTCTGTTTATGTTCTTGGACACTGATTGACATCATCAAACAAACCGGACGTTTTCTGTTGGTATTGCGGCGTCTGGTGTTAACCTTTTGGGTGTAAAGATAAAGATGTACATTTATTTCTGAGTTACAAGTGCATTGCATTGCTGTTCATACTAGCAGGCGGTCTTTATAGAACCATTCAATGATAAAATAAAATAAAACCAGCAAAGAGACAACTAATGAAAAGTTAACTAAT 35 | >ch497_read5679_template_fail_BYK_CB_ONT_1_FAF05104_A 36 | CCCGCTTCGTTCAGTTACGTATTACTACTTTGCCTGTCGCTCTTGAAGTTTTTATGTATTCTTTTACAAGTCAGGTGCCATAAACATAAATAATGAATTAACGTTTACAACATCAAAGATTAGATGAAATATAAACCTCACAATTTTATACATATTAGCAAATCCATAAACACAGATAAGTATTCAGTTTAACAATATATTGAAAACATTTAATTCTTAATAAAATCTTTCTTCAACATTAATTTTTAAGCCTCATTATCTAAATAATCTATACAGTTGGGATACAACACAAAAGGCATCCACATTTAGAGCAGATACAGAGCCATGTCAGAACTTTTGGAGTGCTCTGGTGGGGCAGACAGCTGAAAGGCAACAAGCGCAGGAAGGCCGAGCTGGTGTGACCCTAGGGACTCGGGTCCTGCTGATCAACACGCCAAGCTGGGCGCACGACAAGCGACAGCGTTTACTCCATTCTTATGTTTCTTATCGTTTATGATTATCTATTCCAAACAGCACAATTTTAAGCGCAAATGCTAAAAACTTGCTAACAAAATTGCTCGATTTTCGAAACATCCGCATTGCTAAAATATTTCCAGCGTAACTTGGAATTGCTCAAAACACTGATTTACAAATGCAAGTAAGTATGGGACCCAACATGTATGTATATGAATGTGTTACCGTAGTATAGAAGTCACCAAAGTTCAAACTATTACTAATATTAATGTAAATGTATCCTGGCAAATGAGTCCCTAGTGGTCTGAATACTATCTTGGCATGCGGTGTAGGTGTCGCGTTCCTCAGAATGGCTGCTGTTGACGTTCACAACTGGTAACAGTCATGCCGGCCCAAGTGTTGCCTGTCCTCGTGGAGGATGGTGGGAGCAGAATCATGCACCACAGGTAGGTCAGTGTCAGCTCTGAATGCCGGTATCATCGTATCTCTGGGATTGTTGGATAGCTTATCTTCTTCGCATGCTAAATGTTTTTCATGTTTGGGTTTTCCCTCACATAATGAACTTGGACAAAAGCGTTATCCTGACAGCTTTATCTATGTATGAATAGTGAGGCACCCAATATGACACCAGTATGCGGATGACGTTATCGGCTTTTGAAACGCAGAACCAAATAACGACGTTCTACCTGACTCTGTTGTCATTTACAATAGCTACACGGCCACACTTGCCATGGTACAGTGACTAGGATGAAGTGGTGATCTACACCAACCCCTCTCCACAGTGCCAGGAAGGGAGCTGAAAGCCTGAACTAGGAAGATGAGTATGGTTTCTATATTCACAAGAATACAAATTAATCATGGGATTAATGAAAACAAGTGAGGAACGATGGTGGCAAAAACAAAAGAAAGAGGCGGCCACTACATCAATGAAAGCTGATCCTGTTCAATGCCAGTGGGCTTCATGTGGAAGTGAAAGCAGGTTGGTAGGAACTGAGATCGGACTTTCTGGGATGAGTCAGCCGTCTTTAGCCAGGTGAGTGTACTACGCGAACTTGAGCAGCAGGTACATGAGATCTTGCCAGACGACCTTCACAGCTCTAGGATACAAACGAAGGCCCTTTTCAGGAACCAGATCCTAACCCAGGCATGTTTGGGTGCCATCATCAACATCTGACTCGCAGGACCTGTTTAGGATATTATCAGATTCATTTGCTCTACTGAAACGGTGGGGAGACTCTGTTTTTGAGACAGGACCTGCAAGTTCAGCTATCTTGGATGACCTCTTCACAAATCAGGCCTGCTTCATTCAGAATCACCTACCTCTGACTTCCTGAATCTGTCTTGGGATTAACGATGCATGGCACCATGTGCAGCTAGAAAGGACCTTTTAATATGTAAACTGCTGTAGGGGCAGTGCATGGGGGTAGGAGTGGAGTCGGGGAGAAGTAGATGAGCCCATGAACATTTTGTCTGAAGCACAAACCATAGGTGATTATGAAATTTCGCAGGACACCAATCCAGAAAATCAGCAACTGAATCTTATCAGGTGCTAAAGATAAAGATGTACATTTATTTCATATGAGTTACAAGTGCATTGCATTTTCATACTAGCAGGCAGTCTTTATAGAACCCATTTCATTGATGTCAAAATAAAAATAAAACAGAAAACAAGGACAGCTGATGAAGAATTAACAATATAATTATCTTTCTACATTATCTTATAGGGTAGCATTCTCTTCATGAATACAACTCAAATACAACAATTTCGTTCAATCAAAGTTGTAGCACTTGGACATTGGTCTCTGCAGAAACAACGCAGTCATACAAGAGCGCAGTAGTATTGGTCGAGCATATTCTCTCTCGAAACGATGAGGGGTCTGGAGGCACCGCATGGGATATTTCATCAACTCCATACGGTACAGGAACAAATATAGGAAGGAAATGAAAGAAGGCTCGAACAGTTACTGTAGCCTTTTATGACGTGCTCAGCGAGCAGAAACATCTCTAACGGCAGCAAACGAGCTTCATGAGCCATCAGGCTTTATTTTGATCGTACACTGCTGAAAAGTGACTCAGGAGATGCGACATGCGGGAGAAGAACCACTTTCTCGTTGATTCGGCCCAAACCGGAGCGCCGCAATATCAGCGCAACAAGCAATACATGTGGC 37 | >ch52_read6550_template_fail_BYK_CB_ONT_1_FAF05104_A 38 | TCGTGTACTTCGTTCAGTTACGTATTGCTATATTTGATATGAATTTAAGGAGGTCATCTTCGAGGCATCTATGCATATGGTTTGAAAGCCTTCAGCAGCTATTCAGCAGAGGCTATTATCCTTGTATTAAAGGTATGATGTGATTGCTCAAGCTCAGTCAGGTACTGGCAAGACAGCAGCCACATTTGCTATTTCCATCCTGCAACAGTTGGAGATTGAGTTCAAGGGCAAGCTTTAGTATTGGCCCCACCAGAAACTGGCTCAACAGATCAAAAAGTAATTTGGCTCTTGGAGATTATATAGAACAACTTGTCATGCCATTGGAGAACAAATGTTCGAAATGAAATGCAGAAGTTGCAGGCACAAACCCCTCTCACATTGTTGTTGGTACTCCAGGAGAGTGTTTGATATGCTAAACAGAAGATACCTTTCTCCAAATGGATCAAAATGTTCGTTTGGACAGAAGCAGATGAAATGTTGAGCCGAGGTTTAGGATCAGATCTATGAGATTTTCCAGAAATTAAATACAAGCATTCAGGTTGTGTTGCTTTTCTATACAATGCCAACTGATGTGCTAGAAGTGACCAAAATTCATGAAGTCAATTCGAATTCTGGTGAAGAAAGGAAGGTGACCCTTGAAGGAATTAAATTTATAATGTTGAGCGAGAGGAGTGGAAGCTGGACACTCTTTGTGACTTGTATGAGACTTTGACTATGCACAACAGTTATTTTCTCAATACAAGGCGCAAGGTGGACGGCTCTGGAGAAAATGCATGCCAGGGACTACAGTTTCTACTCTGCATGACATGGACCAGAAGGAAGGATGTCATCATGAAATTCCGATCGAGGTCAAGCCGTGTTCTGATCACTACTGACTTGTTGGCCCGTGGGATTGACGTTAACAAGTGTCGCGGTTATAAGCTACGATCTACCTGCAATCGTGAAAACTATATTCACAGAATTGGCAGAGGGGTCGATTTAGGAGAAAGGTGTGGCTATAAACTTTGTTACTGAAGACGAAGTTCTTCATTGAAGACTTTTCTACAATATACTACAGTGGAGAAATGCCATGAATGTAGCTGACCTAATTTAATCCTGGGATGAGATAGTTTGAATGCAGTGCTCGCTGTTGCTGAATAGGCGATCACAACGTGCATTGTGCTTCATGAGAATATTTGAATCTTGTCTGTCTCATAACGGATCACAAATACAGATTCTGATAGCAAAGCGACTTTAGTCCTGAGCTCTTGTGAGAAGGCATTGGCTTTATCCTCTTTAGAGTTAGACTGTTGGGGGTGGAGTATAAAAGATGGGGTCTGTAAAATCTTTTTATGAAATTCATTTACTAGTTATGTAGAAATGGTTGTATTAGATGTTCTCTATCATTTAATAATATACTTGTGGACTAAAAGATATAGTGCTGTATAAAATCAGCCAATTATATTAAACTGGCATATCTGCCTTTGTGTGTTGTCGTGTGACTAGAGGCCTTTAAGTGATTTCTTTTAGAAAATTTGAATGCATTTTGTTGGTATTGTATTTATTCAATAAAGTATTGGTAGTGCTAAATGTAGACTGGACCCTGTTGTAAGCCTCAACAATGAGAAATCATCTAGGTGAGTTATTGATAAATTGTCATATTGCACATGTCTTAATGAAGTTTGAATGTTAATAAATTGTATATTCACTTTAAAAGGTTAAACGCGGCGAACGCCGCAATATCTTTCCAGCGAAAACGTTCTGTTTATGTTTCTTGGACACTGGTGACACGGAT 39 | >ch58_read5008_template_fail_BYK_CB_ONT_1_FAF05104_A 40 | ATGCGCCTGAAGGCGATACAAACCATCATTTCTGGGTGCTCTGAAAGGGCGGAGACAGCACAGAAAACAACAAGCTGCAAGTGAGCGCGGTGCTTGACCTAGGGACTCGGAGTCCCTGCGCGATCTGCGTAGCACGCCAAGCTGGGCATAGCACGACCTGACGACAGCGTTTTACTCATGTATGTTTCTTATCTGGGTTATGGTGGCGTCTATTCCAACTAACTGTTTTCAAGAAGGGGCTTTGAGTGCTTTAAAAACTTATGTAAAGATTATCAGATTACAATATCACGTTTTGCTAAAATATTTCCATAAACGAGATTATCCAGCTGCTGATTACAAATATATAAAGAAGTAGTATGAAGGACCCCAGCTTTGCATATGGAGGAGTAATGTGTTACCATAGTATAAAGTCGCAAAGTTCAAACCTAACTAATATTAGTATAAAGCGATATTATCTAAACAAATGATCCCTAAGTGATGGTCTAGACTACTACGCTGGCCGGGCGTACAGTGTAGGTACAATCTGCTGTGATTCCTCAGAGATAGCCGTGATTGCACAGCTTCCCAAACCGTCATACCCGGCAAGTGTTACATCATCGAGCCCAGGAAGGGATGGTGGGAGCAAGTCATGCACCGAGTAGGTCGTGACAAACTCTGAATACCAGTATCATCAGCATCTGGGATTTGTTGGATAACTATCTTCTTCATGCTAATGTTTTCATGTTTTGGGTTTTCCTCATATTAGACGAATGAAAAGCCCATCCACCAGACAACAACTATGATGGCAGGAAGAATGGTAGAGCACCCTTTGATGGCAGCACCAGTGGTGTTATCCGACGACACGAGCATAGGAACCCCAAAATATTGACGTTCTGCCTGTGGCTCACTGAGCTGTCATTTACAATATCTTCATGAAGCCCGCGTGATGATGACGGTTGATGGTGATCGCAACCTCCACAGTGCCAGGAAGACGTGAAAGCTCAGTTATCGAAGATGAGCTATGGTTCCTTATTCAAGTCTGATTATCAAGCTGGGATTAAAATGAGAAAAAGCTGGAAGGCGTGGAGAGCAGTCGGTGCTTTAAAAGAACGAAAAGAAAGAAAGAAGCGCGACCAGCACTACTGTATCAATGAAAGAATGAACACTATGGGCACAGAAGCTTCAATGCCAGTGGGCTTCATGTGGAAGTAAGCAGGGTTGGTAGAACTGAGATCGGTTTCTGGATGAGTCATCATTCTTTAGAAGCCAGGTGGTATGTGCTACATTGAACGGTAGCGCAGGTACATGGTCTGAACAGCCCTTCCTACAGCTCTGGACCTACAGAGCAAAGGCCGCTTTCTAGAGAACCAGGTCGCGGGCATGTTGCGTTGTGCCATCATCAACATCTGACTACGCAGGACCTGTTTTGAGGATCATGTCAGATTTGCTGTTGCTGCTGAGGAAGGCAGACAGGAGACTCTGTTTTTGAGACTGGACCATATAGAGTTCAGCTGTCTTGGACTCCCTATACCGAATCGGAGCCTCTTCAATTCAGAATCCACCTGCCTCTGACTGGATATTTCTGATTAACATTACATAGCGCTTAAACAAGTAGGCGTGAGACCTTTCAGAAATATATAAATAACTATATGAGGGCAGTGCGGCGATGTGAGAGTAAGTGAGAAGGTGGGGGCGGTAAGGCTAAGATATGAACATTTTGTCTAAGCTGGGAAGCATAAATTGATTGCAGATGCCTGGCGGTTTTTGCACAATCCAGAAAATCCGCGCTGGATCTTAAAATGTGGGTGTAGGAGATAAAGATATTAACATTTATTCCATGAGTTACAGTGTTGCATTGCTGTTCGCGCAGCAGACGATCTTTATAGAACCGTTTTCCGTACGATCGATAAAATATAAAATAAGAAAACAAGGACAGCTGAAATGAAGGACGAAGCGCAATATAATTATCTTTCATTATCGCCTGGTGTAGCATTCTTCATGGAATACAACTCAGAGTCACAACAATTCGTTCAATCAAGGATGTGACATGGACATTGGTCTTCTGCACGAAACAGCAACAGTCATGCAAAACTGAGCGAAGTATTGAACAAGGCATATTTCTCATCGAAACGTGAGGGGTCTTTCTG 41 | -------------------------------------------------------------------------------- /clustering_cliqueness.cpp: -------------------------------------------------------------------------------- 1 | /***************************************************************************** 2 | * * * * CARNAC: Clustering coefficient-based Acquisition of RNA Communities 3 | * * * * 4 | * * * * Authors: Camille Marchet 5 | * * * * Contact: camille.marchet@irisaa.fr, INRIA/IRISA/GenScale, Campus de Beaulieu, 35042 Rennes Cedex, France 6 | * * * * Source: https://github.com/Kamimrcht/CARNAC 7 | * * * * 8 | * * * * 9 | * * * * This program is free software: you can redistribute it and/or modify 10 | * * * * it under the terms of the GNU Affero General Public License as 11 | * * * * published by the Free Software Foundation, either version 3 of the 12 | * * * * License, or (at your option) any later version. 13 | * * * * 14 | * * * * This program is distributed in the hope that it will be useful, 15 | * * * * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | * * * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | * * * * GNU Affero General Public License for more details. 18 | * * * * 19 | * * * * You should have received a copy of the GNU Affero General Public License 20 | * * * * along with this program. If not, see . 21 | * * * *****************************************************************************/ 22 | 23 | 24 | 25 | 26 | 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include 39 | #include 40 | #include 41 | #include 42 | #include 43 | #include 44 | #include 45 | #include 46 | #include 47 | #include 48 | #include 49 | #include "clustering_cliqueness.hpp" 50 | 51 | 52 | 53 | using namespace std; 54 | 55 | struct Node_hash { 56 | inline std::size_t operator()(const Node& n) const { 57 | return pow(n.index, 31) + n.degree; 58 | } 59 | }; 60 | 61 | 62 | inline bool operator == (Node const& n1, Node const& n2) 63 | { 64 | return (n1.index == n2.index); 65 | } 66 | 67 | 68 | vector removeDuplicates(vector& vec){ 69 | sort(vec.begin(), vec.end()); 70 | vec.erase(unique(vec.begin(), vec.end()), vec.end()); 71 | return vec; 72 | } 73 | 74 | vector removeDuplicatesCC(vector& vec){ 75 | sort(vec.begin(), vec.end()); 76 | vec.erase(unique(vec.begin(), vec.end()), vec.end()); 77 | return vec; 78 | } 79 | 80 | 81 | // compare nodes first by degree, then by CC 82 | //~ bool CompareNodes(const Node& a, const Node& b) 83 | //~ { 84 | //~ if (a.degree < b.degree) return true; 85 | //~ if (b.degree < a.degree) return false; 86 | 87 | //~ if (a.CC < b.CC) return true; 88 | //~ if (b.CC < a.CC) return false; 89 | //~ return false; 90 | //~ } 91 | 92 | 93 | void DFS(uint n, vector& vecNodes, unordered_set& visited, set& nodesInConnexComp, bool& above, double cutoff){ 94 | if (not visited.count(n)){ 95 | if (vecNodes[n].CC >= cutoff){ 96 | above = true; 97 | } 98 | visited.insert(n); 99 | nodesInConnexComp.insert(n); 100 | for (auto&& neigh : vecNodes[n].neighbors){ 101 | DFS(neigh, vecNodes, visited, nodesInConnexComp, above, cutoff); 102 | } 103 | } 104 | } 105 | 106 | vector split(const string &s, char delim){ 107 | stringstream ss(s); 108 | string item; 109 | vector elems; 110 | while (getline(ss, item, delim)) { 111 | elems.push_back(move(item)); 112 | } 113 | return elems; 114 | } 115 | 116 | 117 | // parser for infile at SRC format. 118 | // fills a vector of Nodes 119 | // Nodes are reads, they remember their neighbors in the graphs 120 | // Edges exist when a similarity is reported in the infile 121 | // Weights info also come from infile 122 | // in the case infile reports: 123 | // read1: read2 read3 124 | // read2: read3 125 | // edges will be (1,2) (1,3) (2,3) (non oriented) which means a similarity becomes an edge even if it is reported in only one direction (read 1 to 2 for instance) 126 | // weights: 127 | 128 | void parsingSRC(ifstream & refFile, vector& vecNodes, bool weighted){ 129 | string listNodes; 130 | // header 131 | getline(refFile, listNodes); 132 | getline(refFile, listNodes); 133 | getline(refFile, listNodes); 134 | vector neighbs; 135 | vector> clust; 136 | vector splitted1, splitted2, splitted3; 137 | uint read, target; 138 | unordered_map seenNodes; 139 | unordered_map neighbToWeight; 140 | double weight(1); 141 | while (not refFile.eof()){ 142 | getline(refFile, listNodes); 143 | splitted1 = split(listNodes, ':'); 144 | if (splitted1.size() > 1){ 145 | splitted2 = split(splitted1[1], ' '); 146 | target = stoi(splitted1[0]); // target read's index 147 | if (not splitted2.empty()){ 148 | for (uint i(0); i < splitted2.size(); ++i){ 149 | splitted3 = split(splitted2[i], '-'); 150 | read = stoi(splitted3[0]); // recruited read 151 | if (weighted){ 152 | weight = 1/ (stof(splitted3[1])); 153 | } 154 | if (read != target){ 155 | if (not seenNodes.count(target)){ // new node not already in the vector of nodes 156 | clust = {}; neighbs = {}; neighbToWeight = {}; 157 | Node t({target, 0, 1, clust, neighbs, neighbToWeight}); 158 | vecNodes.push_back({t}); // store in vecNodes 159 | vecNodes.back().neighbToWeight.insert({read, weight}); 160 | seenNodes.insert({target, vecNodes.size() - 1}); // remember this node has been pushed index -> place in the vector 161 | } 162 | if (seenNodes.count(read)){ // this neighbour is already in the vector of nodes 163 | vecNodes[seenNodes[target]].neighbors.push_back(seenNodes[read]); // add read as neighbor of target 164 | if (not vecNodes[seenNodes[target]].neighbToWeight.count(read)){ 165 | vecNodes[seenNodes[target]].neighbToWeight.insert({read, weight}); 166 | } 167 | vecNodes[seenNodes[target]].neighbors.push_back(seenNodes[read]); // add read as neighbor of target 168 | vecNodes[seenNodes[read]].neighbors.push_back(seenNodes[target]); // add target as neighbor of read 169 | if (not vecNodes[seenNodes[read]].neighbToWeight.count(target)){ 170 | vecNodes[seenNodes[read]].neighbToWeight.insert({target, weight}); 171 | } 172 | } else { // new neighbor not already in the vector of nodes 173 | clust = {}; neighbs = {}; neighbToWeight = {}; 174 | Node r({read, 0, 1, clust, neighbs, neighbToWeight}); 175 | vecNodes.push_back({r}); 176 | uint position(vecNodes.size() - 1); 177 | seenNodes.insert({read, position}); 178 | vecNodes[seenNodes[target]].neighbors.push_back(vecNodes.size() - 1); // store read as neighbor of target 179 | vecNodes[seenNodes[read]].neighbors.push_back(seenNodes[target]); // add target as neighbor of read 180 | vecNodes[seenNodes[read]].neighbToWeight.insert({target, weight}); 181 | vecNodes[seenNodes[target]].neighbToWeight.insert({read, weight}); 182 | } 183 | } 184 | } 185 | } 186 | } 187 | } 188 | } 189 | 190 | 191 | //compute the CC of a node with its set of neighbors 192 | double getCC(unordered_set& neighbors, vector& vecNodes){ 193 | double pairs(0), clusteringCoef(1); 194 | uint totalPairs; 195 | for (auto&& neigh : neighbors){ // for each neighbor of the node 196 | for (auto&& neigh2 : vecNodes[neigh].neighbors){ // for each neighbor of a neighbor 197 | if (neighbors.count(neigh2)){ // if a neighbor of a neighbor is also a neighbor of the current node = pair of connected neighbors 198 | ++pairs; 199 | } 200 | } 201 | } 202 | totalPairs = neighbors.size() * (neighbors.size() - 1); 203 | if (totalPairs > 0){ 204 | clusteringCoef = pairs/totalPairs; 205 | } 206 | return clusteringCoef; 207 | } 208 | 209 | 210 | // the CC values are compared before and after the removal of a sub set of nodes 211 | // if the value is > 0: the removal impacted negatively the cliqueness 212 | // if the value == 0: no change 213 | // if the value < 0: the cliqueness is better without the nodes 214 | 215 | // todo: change = we should take the cluster's CC instead of the cutoff 216 | int getDeltaCC(set& toRemove, set& clust1, vector& vecNodes, double cutoff){ 217 | int deltaCC(0); 218 | unordered_set clust1Without; 219 | for (auto&& i : clust1){ 220 | if (not toRemove.count(i)){ 221 | clust1Without.insert(i); 222 | } 223 | } 224 | double CC1(getCC(clust1Without, vecNodes)); 225 | deltaCC = cutoff - CC1; 226 | return deltaCC; 227 | } 228 | 229 | 230 | // compute clustering coefficient and degree for each node 231 | // CC will be stored in ClCo and degrees in vector degrees, and sorted from higher to lower (with no duplicates) 232 | // the CC are stored by decreasing values in the vector as we will start with nodes of highest CC later in the code 233 | // the quantile values are computed before removing the duplicates of CC 234 | // the graph is not oriented so the degree of a node is the nb of edges going through this node 235 | void computeCCandDeg(vector& vecNodes, vector& ClCo, vector& degrees, float& lowerCC){ 236 | double clusteringCoef; 237 | unordered_set neighbors; 238 | // start by removing double occurrences in neighbors (a neighbor of a node that would be stored twice) 239 | for (uint n(0); n < vecNodes.size(); ++n){ 240 | vecNodes[n].neighbors = removeDuplicates(vecNodes[n].neighbors); 241 | vecNodes[n].degree = vecNodes[n].neighbors.size(); 242 | degrees.push_back(vecNodes[n].degree); 243 | } 244 | 245 | // fill vector of CC 246 | for (uint n(0); n < vecNodes.size(); ++n){ 247 | if (vecNodes[n].neighbors.size() > 1){ 248 | neighbors = {}; 249 | copy(vecNodes[n].neighbors.begin(), vecNodes[n].neighbors.end(), inserter(neighbors, neighbors.end())); 250 | clusteringCoef = getCC(neighbors, vecNodes); 251 | if (clusteringCoef != 0){ 252 | vecNodes[n].CC = clusteringCoef; 253 | ClCo.push_back(clusteringCoef); 254 | } 255 | } else { 256 | ClCo.push_back(1); 257 | } 258 | } 259 | 260 | // compute the quantiles of the CC and get the CC value that represents the first quantile 261 | lowerCC = quantileCC(ClCo, 1, 1000); 262 | 263 | // remove duplicated values of CC (so that we don't compute twice a cutoff value later) 264 | // and sort CC in vector by decreasing values 265 | ClCo = removeDuplicatesCC(ClCo); 266 | sort(ClCo.begin(), ClCo.end()); 267 | reverse(ClCo.begin(), ClCo.end()); 268 | } 269 | 270 | // sort nodes by decreasing degrees 271 | // update vecNodes (neighbours...) as index of nodes may have changed 272 | void sortVecNodes(vector& vecNodes, vector& nodesInOrderOfCC){ 273 | vector vecNodesCpy(vecNodes); 274 | sort(vecNodesCpy.begin(), vecNodesCpy.end()); // based on degrees then CC 275 | reverse(vecNodesCpy.begin(), vecNodesCpy.end()); 276 | unordered_map indexReadsAf; 277 | for (uint i(0); i < vecNodesCpy.size(); ++i){ 278 | nodesInOrderOfCC.push_back(i); 279 | } 280 | } 281 | 282 | 283 | 284 | // compute sets around seed nodes 285 | // seed nodes are those whose CC is above the current cutoff 286 | // they gather their direct neighbors in sets that are expected quasi cliques 287 | // nodes which degree is above the last quantile and CC is below the last quantile (even if it is above the current cutoff) are not seeds 288 | // the function is multi threaded with one thread by cutoff 289 | // each Nodes stores a vector of vector (.cluster) )of the size of the length of the list of cutoffs 290 | // in each vector of this vector, if the node is a seed, a number of set is stored (the same number is stored for nodes included in the set) 291 | // this way, at a given cutoff, a node can know if it is in different sets 292 | 293 | 294 | //todo : change this 295 | // finally we sort the nodes in decreasing order of CC 296 | void computePseudoCliques(vector& cutoffs, vector& vecNodes, uint nbThreads, vector& nodesInOrderOfCC, uint higherDegree, float lowerCC){ 297 | vector v; 298 | vector> vec(cutoffs.size()); 299 | // for each node, at each cutoff value we will store a vector that sums up the number of sets the node belongs to 300 | for (uint i(0); i < vecNodes.size(); ++i){ 301 | vecNodes[i].cluster = vec; 302 | } 303 | uint c(0); 304 | vector> temp(cutoffs.size()); // sets identifiers for each cutoff value 305 | #pragma omp parallel num_threads(nbThreads) 306 | { 307 | #pragma omp for 308 | for (c = 0; c < cutoffs.size(); ++c){ // descending cutoffs 309 | unordered_set s; 310 | double cutoff = cutoffs[c]; 311 | for (uint i(0); i < vecNodes.size(); ++i){ 312 | if (vecNodes[i].CC >= cutoff and not (vecNodes[i].degree >= higherDegree and vecNodes[i].CC <= lowerCC)){ // if the node is a seed 313 | vecNodes[i].cluster[c].push_back(i); // store a set identifier for this node 314 | s.insert(i); 315 | for (auto&& neigh : vecNodes[i].neighbors){ 316 | vecNodes[neigh].cluster[c].push_back(i); // also include direct neighbors in set 317 | s.insert(neigh); 318 | } 319 | } 320 | } 321 | temp[c] = s; 322 | } 323 | } 324 | //~ unordered_set s; 325 | //~ for (uint i(0); i < temp.size(); ++i){ // for each cutoff (first higher values) 326 | //~ for (auto&& n : temp[i]){ // for each set identifier 327 | //~ if (not s.count(n)){ 328 | //~ s.insert(n); 329 | //~ nodesInOrderOfCC.push_back(n); // nodes are ordered from belonging to a cluster of higher CC to lower 330 | //~ } 331 | //~ } 332 | //~ } 333 | } 334 | 335 | 336 | 337 | // generalized local CC computation for a set of nodes (instead of the direct neighboring of a given node) 338 | double computeUnionCC(set& unionC, vector& vecNodes){ 339 | double cardUnion(0); 340 | for (auto&& n : unionC){ 341 | for (auto&& neigh : vecNodes[n].neighbors){ 342 | if (unionC.count(neigh)){ 343 | ++cardUnion; 344 | } 345 | } 346 | } 347 | return cardUnion / ( unionC.size() * (unionC.size() - 1)); 348 | } 349 | 350 | 351 | 352 | // transfer nodes from a set to another cancelled set 353 | void transfer(uint tf, uint te, set& toFill, set& toEmpty, vector& vecNodes, vector>& clusters, uint ind){ 354 | vector vec; 355 | // remove the cancelled set identifier from the nodes of the set to cancel: we have to compute again the new list of sets without the cancelled one 356 | for (auto&& index : toEmpty){ 357 | vec = {}; 358 | for (auto && clust : vecNodes[index].cluster[ind]){ 359 | if (not (clust == te)){ 360 | vec.push_back(clust); 361 | } 362 | } 363 | vec.push_back(tf); 364 | vecNodes[index].cluster[ind] = removeDuplicates(vec); 365 | } 366 | // same operation for the nodes from the nodes in the set to keep 367 | for (auto&& index : toFill){ 368 | vec = {}; 369 | for (auto && clust : vecNodes[index].cluster[ind]){ 370 | if (not (clust == te)){ 371 | vec.push_back(clust); 372 | } 373 | } 374 | vecNodes[index].cluster[ind] = removeDuplicates(vec); 375 | } 376 | } 377 | 378 | 379 | // merge two sets: the smaller is added to the bigger 380 | void merge(uint i1, uint i2, set& clust1, set& clust2, vector>& clusters, vector& vecNodes, uint ind){ 381 | if (clust1.size() > clust2.size()){ // merge in clust1 382 | clusters[i1].insert(clusters[i2].begin(), clusters[i2].end()); 383 | transfer(i1, i2, clust1, clust2, vecNodes, clusters, ind); 384 | clusters[i2] = {}; 385 | } else { // merge in clust2 386 | clusters[i2].insert(clusters[i1].begin(), clusters[i1].end()); 387 | transfer(i2, i1, clust2, clust1, vecNodes, clusters, ind); 388 | clusters[i1] = {}; 389 | } 390 | } 391 | 392 | 393 | 394 | vector> assignNewClusters(set& clust, vector& vecNodes, double cutoff){ 395 | bool above(false); 396 | unordered_set visited; 397 | set nodesInConnexComp; 398 | vector> newClust; 399 | for (auto&& node : clust){ 400 | if (not visited.count(node)){ 401 | above = false; 402 | nodesInConnexComp = {}; 403 | DFS(node, vecNodes, visited, nodesInConnexComp, above, cutoff); 404 | if (above){ 405 | newClust.push_back(nodesInConnexComp); 406 | } 407 | } 408 | } 409 | return newClust; 410 | } 411 | 412 | 413 | // when a set is split, update the nodes' clusters index values (remove the split set index) 414 | void removeSplittedElements(uint index, vector>& clusters, set& interC){ 415 | set clust; 416 | for (auto && elt : clusters[index]){ 417 | if (not interC.count(elt)){ 418 | clust.insert(elt); 419 | } 420 | } 421 | clusters[index] = clust; 422 | } 423 | 424 | 425 | 426 | //compute each cut for each set: the number of edges that connects nodes of the set that are not in the intersection to nodes that are in the intersection 427 | // weights are used to compute the cuts 428 | void getCutsPairSets(vector& vecNodes, set& interC, double& cut1, double& cut2, set& clust1, set& clust2){ 429 | for (auto&& node : interC){ 430 | for (auto&& neigh : vecNodes[node].neighbors){ 431 | if (clust1.count(neigh) and (not interC.count(neigh))){ 432 | cut1 += vecNodes[node].neighbToWeight[vecNodes[neigh].index]; 433 | } 434 | if (clust2.count(neigh) and (not interC.count(neigh))){ 435 | cut2 += vecNodes[node].neighbToWeight[vecNodes[neigh].index]; 436 | } 437 | } 438 | } 439 | } 440 | 441 | 442 | // in case of split and ex aequo of the two cuts 443 | // we compute the delta CC (with and without the nodes of the intersection) for each sets, and nodes are let in the set with the smaller deltaCC 444 | void splitExAequo(uint i1, uint i2, set& clust1, set& clust2, vector>& clusters, vector& vecNodes, set& interC, uint cutoff, uint ind, double& cut1, double& cut2, double& cut){ 445 | int deltaCC1(getDeltaCC(interC, clust1, vecNodes, cutoff)); 446 | int deltaCC2(getDeltaCC(interC, clust2, vecNodes, cutoff)); 447 | if (deltaCC1 <= deltaCC2){ // keep the intersection in clust1 448 | transfer(i1, i2, clust1, interC, vecNodes, clusters, ind); 449 | removeSplittedElements(i2, clusters, interC); 450 | cut = cut2; 451 | } else { // keep the intersection in clust2 452 | transfer(i2, i1, clust2, interC, vecNodes, clusters, ind); 453 | removeSplittedElements(i1, clusters, interC); 454 | cut = cut1; 455 | } 456 | } 457 | 458 | 459 | 460 | // split a set and keep the node of the intersection in the other set 461 | // when a set is split, it can happen that it becomes disconnected 462 | // we check if that happens, if so we build new clusters with the new connected subgraphs 463 | void splitProcedure(uint i1, uint i2, set& clust1, set& clust2, vector>& clusters, vector& vecNodes, set& interC, uint cutoff, uint ind, double& cut2, double& cut, vector>& newClust){ 464 | bool more(findArticulPoint(clust2, vecNodes, interC)); // to avoid doing too much computation, if at least one articulation point is found we do a DFS afterwards 465 | transfer(i1, i2, clust1, interC, vecNodes, clusters, ind); 466 | removeSplittedElements(i2, clusters, interC); 467 | cut = cut2; // todo * 2 ? 468 | if (more){ // if there was an articulation point in the intersection, it is likely that the split set will be disconnected (if not we are sure it stay connected) 469 | newClust = assignNewClusters(clust2, vecNodes, cutoff); // do a DFS to find 1 or more connected components 470 | if (newClust.size() > 1){ // if there are several connected components do new clusters 471 | for (uint i(0); i < newClust.size(); ++i){ 472 | clusters.push_back(newClust[i]); 473 | for (auto&& nodes: newClust[i]){ 474 | vecNodes[nodes].cluster[ind].push_back(clusters.size() - 1); 475 | } 476 | transfer(clusters.size() - 1, i2, newClust[i], clust2, vecNodes, clusters, ind); 477 | } 478 | clusters[i2].clear(); 479 | } 480 | } 481 | } 482 | 483 | 484 | 485 | // in case sets are not merged, one of the two sets of the pair is split gives its nodes belonging to the intersection to the other set 486 | // the set that has the most edges connected to nodes of the intersection keeps them, in order to minimize the cut 487 | double splitClust(uint i1, uint i2, set& clust1, set& clust2, vector>& clusters, vector& vecNodes, set& interC, uint cutoff, uint ind){ 488 | double cut1(0), cut2(0), cut(0); 489 | // compute each cut for each set in the pair 490 | getCutsPairSets(vecNodes, interC, cut1, cut2, clust1, clust2); 491 | 492 | if (clust1.size() == interC.size()){ // if set 1 is exactly the intersection: separate the intersection from set 2 493 | transfer(i1, i2, clust1, interC, vecNodes, clusters, ind); 494 | removeSplittedElements(i2, clusters, interC); 495 | cut = cut2; 496 | } else if (clust2.size() == interC.size()){ // if set 2 is exactly the intersection: separate the intersection from set 1 497 | transfer(i2, i1, clust2, interC, vecNodes, clusters, ind); 498 | removeSplittedElements(i1, clusters, interC); 499 | cut = cut1; 500 | } else { 501 | unordered_set neighbors; 502 | vector> newClust; 503 | if (cut1 == cut2){ 504 | // in case of ex aequo choose using the delta CC 505 | splitExAequo(i1, i2, clust1, clust2, clusters, vecNodes, interC, cutoff, ind, cut1, cut2, cut); 506 | 507 | } else if (cut1 > cut2){ // split clust 2 508 | splitProcedure(i1, i2, clust1, clust2, clusters, vecNodes, interC, cutoff, ind, cut2, cut, newClust); 509 | } else { 510 | // split clust1 511 | splitProcedure(i2, i1, clust2, clust1, clusters, vecNodes, interC, cutoff, ind, cut1, cut, newClust); 512 | } 513 | } 514 | return cut; 515 | } 516 | 517 | 518 | 519 | // computation of the cut 520 | // any edge that links a node in a cluster to another node which is not in the same cluster increases the cut 521 | // any node (singleton) which is is no cluster increases the cut by its number of edge 522 | double getCut(vector& vecNodes, vector>& clusters, uint ind){ 523 | double cut = 0; 524 | vector seen(vecNodes.size(), 0); 525 | for (uint i(0); i < vecNodes.size(); ++i){ 526 | if (vecNodes[i].cluster[ind].empty() and (not vecNodes[i].neighbors.empty())){ 527 | for (auto&& ne : vecNodes[i].neighbors){ 528 | cut += vecNodes[i].neighbToWeight[vecNodes[ne].index]; 529 | } 530 | } else { 531 | if (not vecNodes[i].cluster[ind].empty()){ 532 | for (auto&& ne : vecNodes[i].neighbors){ 533 | if (not (clusters[vecNodes[i].cluster[ind][0]].count(ne))){ 534 | cut += vecNodes[i].neighbToWeight[vecNodes[ne].index]; 535 | } 536 | } 537 | } 538 | } 539 | } 540 | return cut; 541 | } 542 | 543 | 544 | 545 | // performs merges or splits according to the compared values of the cutoff/generalized CC 546 | // also get a temporary cut value 547 | void mergeOrSplitProcedures(double cutoff, vector& vecNodes, vector>& clusters, uint ind, double prevCut, vector& nodesInOrderOfCC, set& clust1, set& clust2, set& unionC, set& interC, uint& i1, uint& i2, double& cut, uint i){ 548 | // comparison of clusters by pairs: decisions are taken first for the sets associated with the higher CCs 549 | double unionCC; 550 | i1 = vecNodes[i].cluster[ind][0]; // sets are sorted by decreasing seed's CC value, so we compare the two first 551 | i2 = vecNodes[i].cluster[ind][1]; // that are associated with the two higher CC 552 | clust1 = clusters[i1]; 553 | clust2 = clusters[i2]; 554 | interC = {}; 555 | set_intersection(clust1.begin(), clust1.end(), clust2.begin(), clust2.end(), inserter(interC, interC.begin())); // intersection of the two sets 556 | if (interC.size() == clust1.size() and clust1.size() == clust2.size()){ // clust1 and clust2 are the same 557 | transfer(i1, i2, clust1, interC, vecNodes, clusters, ind); // keep only one and cancel the other 558 | clusters[i2] = {}; 559 | } else { 560 | unionC = {}; 561 | set_union(clust1.begin(), clust1.end(), clust2.begin(), clust2.end(), inserter(unionC, unionC.begin())); // union of the two sets 562 | unionCC = computeUnionCC(unionC, vecNodes); // get CC generalized to the union of nodes 563 | if (unionCC >= cutoff){ // merge operation 564 | merge(i1, i2, clust1, clust2, clusters, vecNodes, ind); 565 | } else { // split operation 566 | cut += splitClust(i1, i2, clust1, clust2, clusters, vecNodes, interC, cutoff, ind); 567 | } 568 | } 569 | } 570 | 571 | 572 | 573 | // for a given cutoff, from the original set of sets of nodes creates from seeds, refine those sets (by merging/splitting strategies) to obtain clusters 574 | // compute the cut associated to those operations 575 | double computeClustersAndCut(double cutoff, vector& vecNodes, vector>& clusters, uint ind, double prevCut, vector& nodesInOrderOfCC){ 576 | double cut(0), sCut(0); 577 | set clust1, clust2, unionC, interC; 578 | uint i1, i2; 579 | // compute list of current sets to be refined in vector clusters 580 | for (uint n(0); n < vecNodes.size(); ++n){ 581 | if (vecNodes[n].cluster[ind].size() > 0){ 582 | for (auto&& c : vecNodes[n].cluster[ind]){ 583 | clusters[c].insert(n); // node at index n is in cluster c 584 | } 585 | } 586 | } 587 | 588 | // for each node (by decreasing CC value) 589 | for (auto&& i : nodesInOrderOfCC){ 590 | cut = 0; 591 | if (vecNodes[i].cluster[ind].size() > 1){ // if node is in several clusters: choices to make to have only one cluster in the end 592 | while (vecNodes[i].cluster[ind].size() > 1){ 593 | mergeOrSplitProcedures(cutoff, vecNodes, clusters, ind, prevCut, nodesInOrderOfCC, clust1, clust2, unionC, interC, i1, i2, cut, i); 594 | } 595 | } else { 596 | sCut += cut; 597 | if (ind > 0 and sCut > prevCut){ 598 | // if the cut is already higher than a precedant cut = we don't reach the minimal cut for this cutoff so we can stop already 599 | return sCut; 600 | } 601 | } 602 | } // when this procedure stops, each node remains in only one set which is a cluster 603 | // computation of the cut 604 | cut = getCut(vecNodes, clusters, ind); 605 | return cut; 606 | } 607 | 608 | 609 | 610 | 611 | // compute a vector of Nodes specific for a given connected component: re-write indexes 612 | void getVecNodes(vector& vecNodes, vector& vecNodesGlobal, set& nodesInConnexComp){ 613 | uint ii(0); 614 | unordered_map indexReads; 615 | for (auto&& val: nodesInConnexComp){ 616 | vecNodes.push_back(vecNodesGlobal[val]); 617 | indexReads.insert({val, ii}); 618 | ++ii; 619 | } 620 | vector vec; 621 | for (uint i(0); i < vecNodes.size(); ++i){ 622 | vec = {}; 623 | for (auto&& n : vecNodes[i].neighbors){ 624 | vec.push_back(indexReads[n]); 625 | } 626 | vecNodes[i].neighbors = vec; 627 | } 628 | } 629 | 630 | 631 | 632 | 633 | 634 | 635 | bool findArticulPoint(set& cluster, vector& vecNodes, set& interC){ 636 | Graph graph(vecNodes.size()); 637 | unordered_set visited; 638 | for (auto&& i : cluster){ 639 | visited.insert(i); 640 | for (auto&& neigh : vecNodes[i].neighbors){ 641 | if ((not visited.count(neigh)) and cluster.count(neigh)){ 642 | graph.addEdge((int)i, neigh); 643 | } 644 | } 645 | } 646 | vector ap; // To store articulation points 647 | bool b(graph.APBool(ap, interC)); 648 | return b; 649 | } 650 | 651 | 652 | // disconnects articulations points from the total graph 653 | // each time an articulation point is removed (disconnected), the nb of connected comp. in the graph increases 654 | // it allows to fragment the graph in more connected components and isolate problematic nodes that are articulation points 655 | // articulation points are searched using a DFS then disconnected from the graph 656 | void preProcessGraph(vector& vecNodes, double cutoff=1.1){ 657 | Graph graph(vecNodes.size()); 658 | unordered_set visited; 659 | for (uint i(0); i < vecNodes.size(); ++i){ 660 | visited.insert(i); 661 | for (auto&& neigh : vecNodes[i].neighbors){ 662 | if (not visited.count(neigh)){ 663 | graph.addEdge((int)i, neigh); 664 | } 665 | } 666 | } 667 | vector vec; 668 | vector ap; // To store articulation points 669 | graph.AP(ap); // get articulation points via DFS 670 | // disconnect nodes: 671 | for (uint i = 0; i < vecNodes.size(); i++){ 672 | if (ap[i] == true and vecNodes[i].CC < cutoff){ 673 | for (auto&& j : vecNodes[i].neighbors){ 674 | vec = {}; 675 | for (auto&& jj : vecNodes[j].neighbors){ 676 | if (i != jj){ 677 | vec.push_back(jj); 678 | } 679 | } 680 | vecNodes[j].neighbors = vec; 681 | vecNodes[j].degree = vecNodes[j].neighbors.size(); 682 | } 683 | vecNodes[i].neighbors = {}; 684 | vecNodes[i].degree = 0; 685 | } 686 | } 687 | } 688 | 689 | 690 | 691 | void preProcessGraphQuantiles(vector& vecNodes, double cutoffCC, uint cutoffEdges){ 692 | vector vec; 693 | for (uint i = 0; i < vecNodes.size(); i++){ 694 | if (vecNodes[i].degree >= cutoffEdges and vecNodes[i].CC <= cutoffCC){ 695 | for (auto&& j : vecNodes[i].neighbors){ 696 | vec = {}; 697 | for (auto&& jj : vecNodes[j].neighbors){ 698 | if (i != jj){ 699 | vec.push_back(jj); 700 | } 701 | } 702 | vecNodes[j].neighbors = vec; 703 | vecNodes[j].degree = vecNodes[j].neighbors.size(); 704 | } 705 | vecNodes[i].neighbors = {}; 706 | vecNodes[i].degree = 0; 707 | } 708 | } 709 | } 710 | 711 | 712 | 713 | uint quantileEdges(vector°rees, uint no, uint q){ 714 | double e; 715 | e = degrees.size()*((double)no/q); 716 | return (uint)e; 717 | } 718 | 719 | 720 | double quantileCC(vector&CC, uint no, uint q){ 721 | double cc; 722 | cc = CC.size()*((float)no/q); 723 | return cc; 724 | } 725 | 726 | 727 | 728 | 729 | 730 | 731 | void parseArgs(int argc, char** argv, bool& approx, bool& preprocessing, bool& weighted, string& fileName, string& outFileName, uint& nbThreads, uint& granularity){ 732 | approx = false; preprocessing = false; weighted = false; 733 | outFileName = "final_g_clusters.txt"; fileName = ""; 734 | nbThreads = 2; 735 | int c; 736 | granularity = 10; 737 | while ((c = getopt (argc, argv, "f:o:t:i:pw")) != -1){ 738 | switch(c){ 739 | case 'o': 740 | outFileName=optarg; 741 | break; 742 | case 'f': 743 | fileName=optarg; 744 | break; 745 | case 't': 746 | nbThreads=stoi(optarg); 747 | break; 748 | case 'i': 749 | approx = true; 750 | granularity=stoi(optarg); 751 | break; 752 | case 'p': 753 | preprocessing = true; 754 | break; 755 | case 'w': 756 | weighted = true; 757 | break; 758 | } 759 | } 760 | } 761 | 762 | 763 | void printHelpCmd(bool help){ 764 | if (help){ 765 | cout << "Usage : ./CARNAC -f input_file (-o output_file -t nb_cores)" << endl; 766 | cout << "-f is mandatory" << endl << "-t gets the number of threads (default 2)" << endl; 767 | cout << "Output written in final_g_clusters.txt by default (-o to change output name)" << endl; 768 | } 769 | } 770 | 771 | 772 | // find connected components with a DFS, each is stored in vector nodesInConnexComp 773 | void findConnectedComponents(vector& vecNodesGlobal, vector>& nodesInConnexComp){ 774 | unordered_set visited; 775 | bool b(false); 776 | for (uint n(0); n < vecNodesGlobal.size(); ++n){ 777 | if (not (visited.count(n))){ 778 | set s; 779 | DFS(n, vecNodesGlobal, visited, s, b, 0); 780 | nodesInConnexComp.push_back(s); 781 | } 782 | } 783 | } 784 | 785 | 786 | 787 | // computes a list of cutoffs (stored in vecCC) 788 | // if -i option is not set, the list of cutoff is exactly the list of CC 789 | // else, for connected components with a lot of different CC (more than 100 distinct values), CC values are rounded according to a certain granularity and these rounded values are cutoffs 790 | // then the list of cutoffs is more restrained than the original list of CC and the space to explore is smaller 791 | void computeCutoffs(bool approx, vector& vecCC, vector& ClCo, uint granularity){ 792 | double prev(1.1), cutoffTrunc; 793 | uint value; 794 | if (approx){ 795 | prev = 1.1; 796 | if (ClCo.size() > 100){ 797 | value = granularity; 798 | } else { 799 | value = 0; 800 | } 801 | for (auto&& cutoff: ClCo){ 802 | if (value != 0){ 803 | cutoffTrunc = ceil(cutoff * value)/value; 804 | } else { 805 | cutoffTrunc = cutoff; 806 | } 807 | if (cutoffTrunc < prev){ 808 | prev = cutoffTrunc; 809 | vecCC.push_back(cutoffTrunc); 810 | } 811 | } 812 | } else { 813 | for (auto&& cutoff: ClCo){ 814 | vecCC.push_back(cutoff); 815 | } 816 | } 817 | 818 | } 819 | 820 | 821 | 822 | bool execute(int argc, char** argv){ 823 | bool printHelp(true); 824 | bool approx, preprocessing, weighted; 825 | string outFileName, fileName; 826 | uint nbThreads, granularity; 827 | // parsing command line 828 | parseArgs(argc, argv, approx, preprocessing, weighted, fileName, outFileName, nbThreads, granularity); 829 | preprocessing = true; approx = true; weighted = false; granularity = 10; 830 | if (not (fileName.empty())){ 831 | printHelp = false; 832 | cout << "Command line was: " ; 833 | for (int a(0); a < argc; ++a){ 834 | cout << argv[a] << " "; 835 | } 836 | cout << endl; 837 | ifstream refFile(fileName); 838 | vector vecNodesGlobal; 839 | cout << "Parsing infile..." << endl; 840 | // parse similarity information from infile 841 | parsingSRC(refFile, vecNodesGlobal, weighted); 842 | //////// for tests only //////// 843 | //~ ofstream outmpre("nodes_metrics_before_preproc.txt"); 844 | //~ vector ClCoTmp; 845 | //~ vector degreesTmp; 846 | //~ float f(0); 847 | //~ computeCCandDeg(vecNodesGlobal, ClCoTmp, degreesTmp, f); 848 | //~ for (auto&& node : vecNodesGlobal){ 849 | //~ outmpre << node.index << " " << node.CC << " " << node.neighbors.size() << endl; 850 | //~ } 851 | ofstream outmpost("clusters_metrics.txt"); 852 | outmpost << "clco size threshold mincut" << endl; 853 | //////// end //////// 854 | if (preprocessing){ 855 | // pre -processing by removing articulation points 856 | cout << "preprocessing of the graph" << endl; 857 | preProcessGraph(vecNodesGlobal); 858 | } 859 | vector> nodesInConnexComp; 860 | // decompose graph in connected components 861 | findConnectedComponents(vecNodesGlobal, nodesInConnexComp); 862 | cout << "Connected components: " << nodesInConnexComp.size() << endl; 863 | ofstream out(outFileName); 864 | ofstream outm("nodes_metrics.txt"); 865 | mutex mm; 866 | vector> finalClusters; 867 | vector vecNodes; 868 | vectorClCo, vecCC; 869 | vector degrees, nodesInOrderOfCC; 870 | double minCut(0), cccToKeep(1); 871 | vector> clustersToKeep; 872 | uint ccc(0), round(0), higherDegree; 873 | float lowerCC(0); 874 | // loop over each connected component 875 | for (uint c(0); c < nodesInConnexComp.size(); ++c){ 876 | cout << "Connected Component " << c << " size " << nodesInConnexComp[c].size() << endl; 877 | vecNodes = {}; 878 | 879 | // compute a vector of nodes for the given connected component 880 | getVecNodes(vecNodes, vecNodesGlobal, nodesInConnexComp[c]); 881 | //~ if (preprocessing){ //REMOVED 0311 882 | //~ // pre -processing by removing articulation points 883 | //~ preProcessGraph(vecNodes); 884 | //~ } 885 | ClCo = {}; 886 | // compute CC and degree for each node 887 | computeCCandDeg(vecNodes, ClCo, degrees, lowerCC); // sorted clustering coefficients 888 | // compute quantiles for degree distribution 889 | higherDegree = quantileEdges(degrees, 999, 1000); 890 | // write nodes metrics 891 | for (auto&& node : vecNodes){ 892 | outm << node.index << " " << node.CC << " " << node.neighbors.size() << endl; 893 | } 894 | vecCC = {}; nodesInOrderOfCC = {}; 895 | // compute a list of cutoffs to loop over 896 | computeCutoffs(approx, vecCC, ClCo,granularity); 897 | minCut = 0; ccc = 0; round = 0; 898 | clustersToKeep = {}; 899 | cout << "Computing pseudo cliques" << endl; 900 | // compute sets originated from seed nodes for each cutoff value 901 | computePseudoCliques(vecCC, vecNodes, nbThreads, nodesInOrderOfCC, higherDegree, lowerCC); // todo : check if we realise a partition 902 | sortVecNodes(vecNodes, nodesInOrderOfCC); 903 | cout << vecCC.size() << " clustering coefficients to check" << endl; 904 | bool compute(true); 905 | // one thread by cutoff 906 | if (nodesInConnexComp[c].size() > 2 ){ 907 | #pragma omp parallel num_threads(nbThreads) 908 | { 909 | #pragma omp for 910 | for (ccc = 0; ccc < vecCC.size(); ++ccc){ 911 | double cut, prevCut, cutoff(vecCC[ccc]); 912 | if (ccc != 0){ 913 | if (approx and cutoff == 0 and ccc == vecCC.size() - 1){ // in this case, using the higher cutoff we got cliques, so there is nothing to cut 914 | mm.lock(); 915 | cccToKeep = 1; 916 | compute = false; 917 | mm.unlock(); 918 | } 919 | if (cut > prevCut){ // we store non minimal cuts that permit to stop the computation in func computeClustersAndCut anytime an even higher cut is found 920 | prevCut = cut; 921 | } 922 | } 923 | if (compute){ 924 | vector vecNodesCpy = vecNodes; 925 | vector> clusters(vecNodesCpy.size()); 926 | vector nodesInOrderOfCCcpy = nodesInOrderOfCC; 927 | cout << "Computing clusters" << endl; 928 | // refine sets using Clustering coeffs to obtain clusters 929 | cut = computeClustersAndCut(cutoff, vecNodesCpy, clusters, ccc, prevCut, nodesInOrderOfCCcpy); 930 | mm.lock(); 931 | cout << round + 1 << "/" << vecCC.size() << " cutoff " << cutoff << " cut " << cut << endl; 932 | ++round; 933 | mm.unlock(); 934 | // keep the minimal cut and associated clusters: 935 | if (ccc == 0){ 936 | mm.lock(); 937 | minCut = cut; 938 | clustersToKeep = clusters; 939 | cccToKeep = cutoff; 940 | if (not weighted){ 941 | //~ if (minCut == 0 and cutoff == 1){ 942 | if (minCut == 0){ 943 | compute = false; 944 | } 945 | } 946 | mm.unlock(); 947 | } else { 948 | if (not weighted){ 949 | //~ if (cut < minCut and cut > 0){ 950 | if (cut < minCut){ 951 | mm.lock(); 952 | minCut = cut; 953 | clustersToKeep = clusters; 954 | cccToKeep = cutoff; 955 | mm.unlock(); 956 | if (cut == 0){ // we will not reach a lower cut 957 | mm.lock(); 958 | compute = false; 959 | mm.unlock(); 960 | } 961 | } 962 | } else { 963 | if (cut < minCut){ 964 | mm.lock(); 965 | minCut = cut; 966 | clustersToKeep = clusters; 967 | cccToKeep = cutoff; 968 | mm.unlock(); 969 | } 970 | } 971 | } 972 | } 973 | } 974 | } 975 | } 976 | else { // connected components of size <= 2 977 | vector> clusters(vecNodes.size()); 978 | for (uint n(0); n < vecNodes.size(); ++n){ 979 | for (auto&& c : vecNodes[n].cluster[ccc]){ 980 | clusters[c].insert(n); // node at index n is in cluster c 981 | } 982 | } 983 | clustersToKeep = clusters; 984 | } 985 | //////// for tests only //////// 986 | double clcoCluster(0); 987 | //////// end //////// 988 | // print clusters associated to the minimal cut over all cutoff values 989 | bool write(false); 990 | for (uint i(0); i < clustersToKeep.size(); ++i){ 991 | if (not clustersToKeep[i].empty()){ 992 | //////// for tests only //////// 993 | clcoCluster = computeUnionCC(clustersToKeep[i], vecNodes); 994 | if (clustersToKeep[i].size() > 2){ 995 | if (i > 0 ){ 996 | if (clustersToKeep[i] != clustersToKeep[i-1]){ 997 | outmpost << clcoCluster << " " << clustersToKeep[i].size() ; 998 | write = true; 999 | } 1000 | } else { 1001 | outmpost << clcoCluster << " " << clustersToKeep[i].size() ; 1002 | write = true; 1003 | } 1004 | } else { 1005 | if (i > 0 ){ 1006 | if (clustersToKeep[i] != clustersToKeep[i-1]){ 1007 | outmpost <<1 << " " << 1 ; 1008 | write = true; 1009 | } 1010 | } else { 1011 | outmpost <<1 << " " << 1 ; 1012 | write = true; 1013 | } 1014 | } 1015 | //////// end //////// 1016 | if (i > 0 ){ 1017 | if (clustersToKeep[i] != clustersToKeep[i-1]){ 1018 | for (auto&& n : clustersToKeep[i]){ 1019 | out << vecNodes[n].index << " " ; 1020 | } 1021 | out << endl; 1022 | } 1023 | } else { 1024 | for (auto&& n : clustersToKeep[i]){ 1025 | out << vecNodes[n].index << " " ; 1026 | } 1027 | out << endl; 1028 | } 1029 | if (write){ 1030 | outmpost << " " << cccToKeep << " " << minCut << endl; 1031 | write = false; 1032 | } 1033 | } 1034 | } 1035 | //~ cout << "Final cut: " << minCut << " CC threshold: " << cccToKeep<< " size: " << clustersToKeep.size() << endl; 1036 | } 1037 | cout << "Done." << endl; 1038 | } 1039 | return printHelp; 1040 | } 1041 | --------------------------------------------------------------------------------