├── 00.reads_filter ├── corr.err ├── readme.txt ├── 02.Correct.sh ├── 01.generate_read_list.pl ├── kmerfreq.log ├── corr.log └── kmerfreq.err ├── 04.genome_assessment ├── 01.CEGMA │ └── 01.cegma.sh ├── 02.BUSCO │ ├── log │ │ └── umd3.1.mode_genome.log │ ├── results │ │ ├── yak.mode_OGS.summary │ │ ├── btau461.mode_OGS.summary │ │ ├── yak.mode_genome.summary │ │ ├── umd31.mode_OGS.summary │ │ ├── umd31.mode_genome.summary │ │ ├── wisent.mode_genome.summary │ │ ├── btau461.mode_genome.summary │ │ └── wisent.mode_OGS.summary │ └── 01.busco.sh └── 03.FRCurve │ ├── 02.frc.sh │ ├── 01.align.sh │ └── results │ ├── oar31.298_FRC.txt │ ├── btau461.298_FRC.txt │ ├── yak.298_FRC.txt │ ├── sspace.genome2.98_FRC.txt │ ├── umd31.298_FRC.txt │ └── soap.genome2.98_FRC.txt ├── 05.synteny_analysis ├── circos_plot │ ├── 02.plot.sh │ ├── circos.conf │ └── 01.transfer.pl ├── 02.single2single.sh ├── 00.mkdb.sh ├── 01.alignment.sh └── 03.analysis.pl ├── 02.scaffold ├── 03.sspace.sh ├── filter.pl ├── 01.cut500.pl ├── readme.txt ├── sam2tab.pl ├── 02.prepare.pl ├── libraries.txt ├── standard_output.summaryfile.txt └── runSAM2TAB.sh ├── README.md ├── .gitignore ├── 03.gap_close ├── 03.gapcloser.pl ├── 02.runbowtie.pl ├── 04.collect.pl ├── 01.split_scaffold.pl ├── readme.txt ├── 0.soap.prepare.pl ├── get_reads.pl └── 02.runbowtie.pl.sh ├── 01.contig ├── 01.prepare.pl ├── readme.txt └── 01.prepare.pl.sh ├── CommandLine.md └── LICENSE /00.reads_filter/corr.err: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /04.genome_assessment/01.CEGMA/01.cegma.sh: -------------------------------------------------------------------------------- 1 | ~/software/cegma/CEGMA_v2.5/bin/cegma -T 30 -g wisent.fa -o 02.wisent 2 | -------------------------------------------------------------------------------- /05.synteny_analysis/circos_plot/02.plot.sh: -------------------------------------------------------------------------------- 1 | /home/share/user/user101/software/circos/circos-0.69/bin/circos -conf circos.conf 2 | -------------------------------------------------------------------------------- /02.scaffold/03.sspace.sh: -------------------------------------------------------------------------------- 1 | /home/share/user/user101/software/sspace/SSPACE-STANDARD-3.0_linux-x86_64/SSPACE_Standard_v3.0.pl -l libraries.txt -s pre.fa -T 32 2 | -------------------------------------------------------------------------------- /04.genome_assessment/02.BUSCO/log/umd3.1.mode_genome.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wk8910/assemble_pipeline/HEAD/04.genome_assessment/02.BUSCO/log/umd3.1.mode_genome.log -------------------------------------------------------------------------------- /00.reads_filter/readme.txt: -------------------------------------------------------------------------------- 1 | The following files were generated: 2 | 3 | corresponding {}.cor.pair_1.fq.gz {}.cor.single.fq.gz {}.cor.pair_2.fq.gz 4 | read.lst.QC.xls 5 | Wisent.freq.gz 6 | Wisent.freq.stat 7 | -------------------------------------------------------------------------------- /05.synteny_analysis/02.single2single.sh: -------------------------------------------------------------------------------- 1 | /home/share/user/user101/software/last/last-802-build/bin/maf-swap contig2nc.maf| /home/share/user/user101/software/last/last-802-build/bin/last-split > contig2nc.maf.swap.maf 2 | -------------------------------------------------------------------------------- /05.synteny_analysis/00.mkdb.sh: -------------------------------------------------------------------------------- 1 | # this reference fasta file use chromosomes 1-29 and X from UMD3.1 and chromosome Y from btau461 2 | /home/share/user/user101/software/last/last-802-build/bin/lastdb -uNEAR -cR11 nc_db nc_ref.fa 3 | -------------------------------------------------------------------------------- /02.scaffold/filter.pl: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env perl 2 | use strict; 3 | use warnings; 4 | 5 | while(<>){ 6 | next if(/^\@/); 7 | /^\S+\s+(\S+)/; 8 | my $flag=$1; 9 | next if($flag>=256); 10 | print "$_"; 11 | } 12 | -------------------------------------------------------------------------------- /04.genome_assessment/03.FRCurve/02.frc.sh: -------------------------------------------------------------------------------- 1 | # FRC version 1.3.0 2 | /home/share/user/user101/software/FRCalign/FRC_align/bin/FRC --genome-size 2980000000 --pe-sam il230.bam --mp-sam il20000.bam --pe-max-insert 230 --mp-max-insert 25000 --out frc_curve 3 | -------------------------------------------------------------------------------- /00.reads_filter/02.Correct.sh: -------------------------------------------------------------------------------- 1 | export PATH=$PATH:/home/share/user/user101/software/soapdenovo/SOAPec_src_v2.02/bin/ 2 | KmerFreq_HA -k 23 -t 64 -p Wisent -l read.lst -L 100 >kmerfreq.log 2>kmerfreq.err 3 | Corrector_HA -k 23 -l 3 -r 50 -t 10 Wisent.freq.gz read.lst >corr.log 2>corr.err 4 | -------------------------------------------------------------------------------- /05.synteny_analysis/01.alignment.sh: -------------------------------------------------------------------------------- 1 | /home/share/user/user101/software/last/last-802-build/bin/lastal -P48 -m100 -E0.05 /home/share/user/user101/projects/wisent_me/03.last.wisent2cattle/nc_db contig10k.fa | /home/share/user/user101/software/last/last-761-build/bin/last-split -m1 > contig2nc.maf 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # assemble_pipeline 2 | Assemble and following pipeline used in wisent genome paper. 3 | 4 | This pipeline include the following sections: 5 | 6 | - reads filtering and genome assemble 7 | - genome assessment 8 | - synteny analysis with relative species 9 | 10 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /blib/ 2 | /.build/ 3 | _build/ 4 | cover_db/ 5 | inc/ 6 | Build 7 | !Build/ 8 | Build.bat 9 | .last_cover_stats 10 | /Makefile 11 | /Makefile.old 12 | /MANIFEST.bak 13 | /META.yml 14 | /META.json 15 | /MYMETA.* 16 | nytprof.out 17 | /pm_to_blib 18 | *.o 19 | *.bs 20 | /_eumm/ 21 | -------------------------------------------------------------------------------- /00.reads_filter/01.generate_read_list.pl: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env perl 2 | use strict; 3 | use warnings; 4 | 5 | open O,"> read.lst"; 6 | my @fq1=; 7 | foreach my $fq1(@fq1){ 8 | my $fq2=$fq1; 9 | $fq2=~s/1.fq.gz/2.fq.gz/; 10 | $fq1=~/(\d+)bp/; 11 | next if($1>=2000); 12 | print O "$fq1\n$fq2\n"; 13 | } 14 | close O; 15 | -------------------------------------------------------------------------------- /03.gap_close/03.gapcloser.pl: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env perl 2 | use strict; 3 | use warnings; 4 | 5 | my @fa=; 6 | 7 | foreach my $fa(@fa){ 8 | $fa=~/^(.*)\/(\w+\.fa)/; 9 | my $dir=$1; 10 | $fa=$2; 11 | print "cd $dir ; cp /path/to/0.soap.prepare.pl . ; perl 0.soap.prepare.pl ; GapCloser -a $fa -b soap.config -o $fa.GC -p 31 -t 32 ; cd - \n"; 12 | } 13 | -------------------------------------------------------------------------------- /04.genome_assessment/02.BUSCO/results/yak.mode_OGS.summary: -------------------------------------------------------------------------------- 1 | #Summarized BUSCO benchmarking for file: yak.pep 2 | #BUSCO was run in mode: OGS 3 | 4 | Summarized benchmarks in BUSCO notation: 5 | C:97%[D:1.9%],F:1.7%,M:0.9%,n:3023 6 | 7 | 2939 Complete BUSCOs 8 | 2881 Complete and single-copy BUSCOs 9 | 58 Complete and duplicated BUSCOs 10 | 54 Fragmented BUSCOs 11 | 30 Missing BUSCOs 12 | 3023 Total BUSCO groups searched 13 | -------------------------------------------------------------------------------- /04.genome_assessment/02.BUSCO/results/btau461.mode_OGS.summary: -------------------------------------------------------------------------------- 1 | #Summarized BUSCO benchmarking for file: cattle.pep 2 | #BUSCO was run in mode: OGS 3 | 4 | Summarized benchmarks in BUSCO notation: 5 | C:85%[D:1.5%],F:2.9%,M:11%,n:3023 6 | 7 | 2577 Complete BUSCOs 8 | 2530 Complete and single-copy BUSCOs 9 | 47 Complete and duplicated BUSCOs 10 | 89 Fragmented BUSCOs 11 | 357 Missing BUSCOs 12 | 3023 Total BUSCO groups searched 13 | -------------------------------------------------------------------------------- /04.genome_assessment/02.BUSCO/results/yak.mode_genome.summary: -------------------------------------------------------------------------------- 1 | #Summarized BUSCO benchmarking for file: yak.fa 2 | #BUSCO was run in mode: genome 3 | 4 | Summarized benchmarks in BUSCO notation: 5 | C:82%[D:1.3%],F:12%,M:5.5%,n:3023 6 | 7 | 2479 Complete BUSCOs 8 | 2438 Complete and single-copy BUSCOs 9 | 41 Complete and duplicated BUSCOs 10 | 375 Fragmented BUSCOs 11 | 169 Missing BUSCOs 12 | 3023 Total BUSCO groups searched 13 | -------------------------------------------------------------------------------- /04.genome_assessment/02.BUSCO/results/umd31.mode_OGS.summary: -------------------------------------------------------------------------------- 1 | #Summarized BUSCO benchmarking for file: cattle.all.pep 2 | #BUSCO was run in mode: OGS 3 | 4 | Summarized benchmarks in BUSCO notation: 5 | C:97%[D:7.5%],F:1.6%,M:0.5%,n:3023 6 | 7 | 2957 Complete BUSCOs 8 | 2728 Complete and single-copy BUSCOs 9 | 229 Complete and duplicated BUSCOs 10 | 49 Fragmented BUSCOs 11 | 17 Missing BUSCOs 12 | 3023 Total BUSCO groups searched 13 | -------------------------------------------------------------------------------- /04.genome_assessment/02.BUSCO/results/umd31.mode_genome.summary: -------------------------------------------------------------------------------- 1 | #Summarized BUSCO benchmarking for file: umd31.fa 2 | #BUSCO was run in mode: genome 3 | 4 | Summarized benchmarks in BUSCO notation: 5 | C:81%[D:1.8%],F:12%,M:5.4%,n:3023 6 | 7 | 2471 Complete BUSCOs 8 | 2416 Complete and single-copy BUSCOs 9 | 55 Complete and duplicated BUSCOs 10 | 388 Fragmented BUSCOs 11 | 164 Missing BUSCOs 12 | 3023 Total BUSCO groups searched 13 | -------------------------------------------------------------------------------- /04.genome_assessment/02.BUSCO/results/wisent.mode_genome.summary: -------------------------------------------------------------------------------- 1 | #Summarized BUSCO benchmarking for file: wisent.fa 2 | #BUSCO was run in mode: genome 3 | 4 | Summarized benchmarks in BUSCO notation: 5 | C:85%[D:1.8%],F:9.6%,M:5.0%,n:3023 6 | 7 | 2577 Complete BUSCOs 8 | 2522 Complete and single-copy BUSCOs 9 | 55 Complete and duplicated BUSCOs 10 | 292 Fragmented BUSCOs 11 | 154 Missing BUSCOs 12 | 3023 Total BUSCO groups searched 13 | -------------------------------------------------------------------------------- /04.genome_assessment/02.BUSCO/results/btau461.mode_genome.summary: -------------------------------------------------------------------------------- 1 | #Summarized BUSCO benchmarking for file: btau461.fa 2 | #BUSCO was run in mode: genome 3 | 4 | Summarized benchmarks in BUSCO notation: 5 | C:78%[D:1.9%],F:12%,M:8.5%,n:3023 6 | 7 | 2380 Complete BUSCOs 8 | 2321 Complete and single-copy BUSCOs 9 | 59 Complete and duplicated BUSCOs 10 | 385 Fragmented BUSCOs 11 | 258 Missing BUSCOs 12 | 3023 Total BUSCO groups searched 13 | -------------------------------------------------------------------------------- /02.scaffold/01.cut500.pl: -------------------------------------------------------------------------------- 1 | #! /usr/bin/perl 2 | use strict; 3 | use warnings; 4 | use Bio::SeqIO; 5 | 6 | my $file=shift; 7 | 8 | my $fa=Bio::SeqIO->new(-file=>"wisent-contigs.fa",-format=>'fasta'); 9 | 10 | open O,"> pre.fa" or die "Cannot create pre.fa!\n"; 11 | while(my $seq=$fa->next_seq){ 12 | my $id=$seq->id; 13 | my $seq=$seq->seq; 14 | my $len=length($seq); 15 | next if($len<500); 16 | print O ">$id\n$seq\n"; 17 | } 18 | close O; 19 | -------------------------------------------------------------------------------- /04.genome_assessment/03.FRCurve/01.align.sh: -------------------------------------------------------------------------------- 1 | # version of samtools: 1.3.1-34-g26e1ea5-dirty 2 | # Using htslib 1.3.1-42-gb6aa0e6 3 | /home/share/user/user101/software/bwa/bwa-0.7.8/bwa mem -t 32 umd31.fa il20000.1.fq.gz il20000.2.fq.gz | /home/share/user/user101/bin/samtools sort -O bam -T ./ -o il20000.bam 4 | /home/share/user/user101/software/bwa/bwa-0.7.8/bwa mem -t 32 umd31.fa il230.1.fq.gz il230.2.fq.gz | /home/share/user/user101/bin/samtools sort -O bam -T ./ -o il230.bam - 5 | -------------------------------------------------------------------------------- /03.gap_close/02.runbowtie.pl: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env perl 2 | use strict; 3 | use warnings; 4 | 5 | my @fq1=; 6 | 7 | open O,"> $0.sh"; 8 | foreach my $fq1(@fq1){ 9 | my $fq2=$fq1; 10 | $fq2=~s/1.fq.gz/2.fq.gz/; 11 | $fq1=~/\/(.*)\.1\.fq\.gz/; 12 | my $lib=$1; 13 | $lib=~/(\d+)bp/; 14 | my $insert=$1; 15 | print O "bowtie2 -p 32 --very-sensitive --no-sq -x wisent.fa -1 $fq1 -2 $fq2 | perl get_reads.pl $insert $lib\n"; 16 | } 17 | close O; 18 | -------------------------------------------------------------------------------- /03.gap_close/04.collect.pl: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env perl 2 | use strict; 3 | use warnings; 4 | use Bio::SeqIO; 5 | 6 | my @GC=; 7 | 8 | open(O,"> wisent.gc.fa"); 9 | foreach my $file(@GC){ 10 | my $fa=Bio::SeqIO->new(-file=>$file,-format=>'fasta'); 11 | 12 | while(my $seq=$fa->next_seq){ 13 | my $id=$seq->id; 14 | my $seq=$seq->seq; 15 | my $len=length($seq); 16 | print O ">$id\tsize:$len\n$seq\n"; 17 | } 18 | } 19 | close O; 20 | -------------------------------------------------------------------------------- /04.genome_assessment/02.BUSCO/results/wisent.mode_OGS.summary: -------------------------------------------------------------------------------- 1 | #Summarized BUSCO benchmarking for file: ../08.finalAnnotation.combine_orthomcl_busco/wisent.cds.pep 2 | #BUSCO was run in mode: OGS 3 | 4 | Summarized benchmarks in BUSCO notation: 5 | C:96%[D:2.5%],F:2.2%,M:1.0%,n:3023 6 | 7 | 2924 Complete BUSCOs 8 | 2846 Complete and single-copy BUSCOs 9 | 78 Complete and duplicated BUSCOs 10 | 67 Fragmented BUSCOs 11 | 32 Missing BUSCOs 12 | 3023 Total BUSCO groups searched 13 | -------------------------------------------------------------------------------- /00.reads_filter/kmerfreq.log: -------------------------------------------------------------------------------- 1 | Program start.. 2 | Program: KmerFreq_HA 3 | Version: v2.02 4 | Author: BGI-ShenZhen 5 | CompileDate: Apr 15 2014 time: 15:19:53 6 | Current time: Sun Apr 20 22:30:05 2014 7 | Command line: KmerFreq_HA -k 23 -t 64 -p Wisent -l read.lst -L 100 8 | Start to construct kmer-freq table... 9 | 64 threads created! 10 | Waiting for threads to exit... 11 | All threads finished! 12 | Used time: 217.433 minutes. 13 | Finish kmer-freq table construction! 14 | Used time: 444.367mins. 15 | 16 | Start to analysis the result... 17 | 18 | All done! Used time: 444.367mins. 19 | Thank you! 20 | 21 | -------------------------------------------------------------------------------- /04.genome_assessment/03.FRCurve/results/oar31.298_FRC.txt: -------------------------------------------------------------------------------- 1 | 0 0 2 | 15809.6 0 3 | 31619.2 0 4 | 47428.9 0 5 | 63238.5 9.24876 6 | 79048.1 9.24876 7 | 94857.7 9.24876 8 | 110667 9.24876 9 | 126477 17.6043 10 | 142287 17.6043 11 | 158096 25.1305 12 | 173906 25.1305 13 | 189715 25.1305 14 | 205525 29.6754 15 | 221335 33.6773 16 | 237144 33.6773 17 | 252954 37.6045 18 | 268764 41.2254 19 | 284573 44.5837 20 | 300383 44.5837 21 | 316192 47.7625 22 | 332002 50.806 23 | 347812 53.7069 24 | 363621 56.4948 25 | 379431 59.2103 26 | 395241 61.8647 27 | 411050 64.2904 28 | 426860 66.6971 29 | 442669 71.1041 30 | 458479 73.1957 31 | 474289 77.3136 32 | 490098 79.0309 33 | 505908 82.417 34 | 521718 83.9394 35 | 537527 86.8291 36 | -------------------------------------------------------------------------------- /04.genome_assessment/03.FRCurve/results/btau461.298_FRC.txt: -------------------------------------------------------------------------------- 1 | 0 0 2 | 6440.51 0 3 | 12881 0 4 | 19321.5 5.41706 5 | 25762 10.181 6 | 32202.5 10.181 7 | 38643.1 14.4375 8 | 45083.6 14.4375 9 | 51524.1 18.6405 10 | 57964.6 18.6405 11 | 64405.1 22.7952 12 | 70845.6 26.9066 13 | 77286.1 26.9066 14 | 83726.6 30.8276 15 | 90167.1 34.6205 16 | 96607.6 38.3114 17 | 103048 38.3114 18 | 109489 41.9525 19 | 115929 45.5089 20 | 122370 48.4839 21 | 128810 51.3403 22 | 135251 54.1749 23 | 141691 57.0009 24 | 148132 59.7263 25 | 154572 62.334 26 | 161013 64.8937 27 | 167453 67.4335 28 | 173894 71.96 29 | 180334 74.136 30 | 186775 78.3678 31 | 193215 80.124 32 | 199656 83.6018 33 | 206096 85.228 34 | 212537 88.2497 35 | 218977 89.7033 36 | -------------------------------------------------------------------------------- /04.genome_assessment/03.FRCurve/results/yak.298_FRC.txt: -------------------------------------------------------------------------------- 1 | 0 0 2 | 8815.63 2.99107 3 | 17631.3 6.277 4 | 26446.9 9.40584 5 | 35262.5 12.6665 6 | 44078.1 15.7176 7 | 52893.8 18.9257 8 | 61709.4 21.8611 9 | 70525 24.9162 10 | 79340.7 28.024 11 | 88156.3 30.8808 12 | 96971.9 34.1155 13 | 105788 37.2756 14 | 114603 40.5379 15 | 123419 43.1385 16 | 132234 46.1304 17 | 141050 49.3179 18 | 149866 52.2563 19 | 158681 55.3722 20 | 167497 58.7233 21 | 176313 61.8529 22 | 185128 65.0015 23 | 193944 67.8842 24 | 202759 70.9305 25 | 211575 73.8024 26 | 220391 76.7831 27 | 229206 79.6054 28 | 238022 82.3394 29 | 246838 84.7599 30 | 255653 86.5144 31 | 264469 87.6834 32 | 273284 88.5544 33 | 282100 88.8696 34 | 290916 89.0139 35 | 299731 89.1532 36 | -------------------------------------------------------------------------------- /04.genome_assessment/03.FRCurve/results/sspace.genome2.98_FRC.txt: -------------------------------------------------------------------------------- 1 | 0 0 2 | 7261.16 3.43317 3 | 14522.3 6.64865 4 | 21783.5 10.0811 5 | 29044.6 12.8541 6 | 36305.8 15.7667 7 | 43567 19.3295 8 | 50828.1 22.3778 9 | 58089.3 25.6905 10 | 65350.4 28.685 11 | 72611.6 31.3459 12 | 79872.8 34.4557 13 | 87133.9 37.5697 14 | 94395.1 40.8418 15 | 101656 43.7569 16 | 108917 46.6131 17 | 116179 48.9811 18 | 123440 51.8923 19 | 130701 55.0867 20 | 137962 58.1472 21 | 145223 61.0262 22 | 152484 63.8845 23 | 159745 66.7941 24 | 167007 69.6318 25 | 174268 71.9989 26 | 181529 74.7836 27 | 188790 77.7445 28 | 196051 79.7727 29 | 203312 81.2229 30 | 210574 82.4345 31 | 217835 83.5841 32 | 225096 84.5866 33 | 232357 85.2218 34 | 239618 85.7811 35 | 246879 86.4415 36 | -------------------------------------------------------------------------------- /04.genome_assessment/03.FRCurve/results/umd31.298_FRC.txt: -------------------------------------------------------------------------------- 1 | 0 0 2 | 4224.22 0 3 | 8448.44 5.31332 4 | 12672.7 10.3074 5 | 16896.9 14.9068 6 | 21121.1 14.9068 7 | 25345.3 18.9816 8 | 29569.5 23.0484 9 | 33793.8 27.1031 10 | 38018 27.1031 11 | 42242.2 31.1118 12 | 46466.4 31.1118 13 | 50690.6 34.9167 14 | 54914.9 38.6965 15 | 59139.1 42.2975 16 | 63363.3 45.8448 17 | 67587.5 45.8448 18 | 71811.7 49.3449 19 | 76036 52.4041 20 | 80260.2 55.2664 21 | 84484.4 55.2664 22 | 88708.6 58.107 23 | 92932.8 60.9338 24 | 97157 63.6763 25 | 101381 66.1984 26 | 105605 68.6159 27 | 109830 71.0185 28 | 114054 75.383 29 | 118278 79.5492 30 | 122502 81.3119 31 | 126727 83.0462 32 | 130951 86.3287 33 | 135175 89.4081 34 | 139399 89.5215 35 | 143623 89.5963 36 | 147848 89.6115 37 | -------------------------------------------------------------------------------- /04.genome_assessment/03.FRCurve/results/soap.genome2.98_FRC.txt: -------------------------------------------------------------------------------- 1 | 0 0 2 | 25061.2 11.1075 3 | 50122.4 22.2063 4 | 75183.6 33.4734 5 | 100245 44.3263 6 | 125306 55.1672 7 | 150367 66.5453 8 | 175428 77.4488 9 | 200490 88.6675 10 | 225551 97.0267 11 | 250612 99.4427 12 | 275673 100.572 13 | 300734 101.026 14 | 325796 101.313 15 | 350857 101.517 16 | 375918 101.686 17 | 400979 101.848 18 | 426040 101.992 19 | 451102 102.123 20 | 476163 102.252 21 | 501224 102.365 22 | 526285 102.545 23 | 551346 102.658 24 | 576408 102.775 25 | 601469 102.931 26 | 626530 103.139 27 | 651591 103.337 28 | 676652 103.516 29 | 701713 103.685 30 | 726775 103.84 31 | 751836 103.999 32 | 776897 104.159 33 | 801958 104.325 34 | 827019 104.488 35 | 852081 104.656 36 | 877142 104.829 37 | 902203 104.865 38 | -------------------------------------------------------------------------------- /03.gap_close/01.split_scaffold.pl: -------------------------------------------------------------------------------- 1 | #! /usr/bin/perl 2 | use strict; 3 | use warnings; 4 | use Bio::SeqIO; 5 | 6 | my $file = "wisent.fa"; 7 | my $out_dir = "scaffolds"; 8 | 9 | my $fa=Bio::SeqIO->new(-file=>$file,-format=>'fasta'); 10 | 11 | my %fh; 12 | 13 | while(my $seq=$fa->next_seq){ 14 | my $id=$seq->id; 15 | my $seq=$seq->seq; 16 | $id=~/(scaffold\d+)\|size(\d+)/; 17 | my $name=$1; 18 | my $len=$2; 19 | my $dir=$name; 20 | 21 | $dir="rest" if($len<500000); 22 | 23 | `mkdir $out_dir/$dir` if(!-e "$out_dir/$dir"); 24 | if(!exists $fh{$dir}){ 25 | open($fh{$dir},"> $out_dir/$dir/$dir.fa"); 26 | } 27 | 28 | $fh{$dir}->print(">$name\n$seq\n"); 29 | } 30 | 31 | foreach my $name(keys %fh){ 32 | close $fh{$name}; 33 | } 34 | -------------------------------------------------------------------------------- /02.scaffold/readme.txt: -------------------------------------------------------------------------------- 1 | ### step1 ### 2 | Run script "01.cut500.pl" to filter contig with a length less than 500bp. 3 | 4 | ### step2 ### 5 | Config and run script "02.prepare.pl" to generate "libraries.txt" and "runSAM2TAB.sh". 6 | 7 | ### step3 ### 8 | Run script "runSAM2TAB.sh". 9 | ps: It should be noted that this require bowtie2 in PATH. We had adopted version 2.2.2 here. 10 | ps: The script "filter.pl" and "sam2tab.pl" should be found in the current directory and should be given executable permissions. 11 | 12 | ### step4 ### 13 | Run script "03.sspace.sh". A directory named "standard_output" will be generated and the final result named "standard_output.final.scaffolds.fasta" will be found in this directory. 14 | We have copied the log file "standard_output.summaryfile.txt" to this directory. 15 | -------------------------------------------------------------------------------- /02.scaffold/sam2tab.pl: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env perl 2 | use strict; 3 | use warnings; 4 | 5 | while(my $line1=<>){ 6 | chomp $line1; 7 | my $line2 = <>; 8 | chomp $line2; 9 | my @arr1 = split(/\s+/, $line1); 10 | my @arr2 = split(/\s+/, $line2); 11 | next if( $arr1[2] eq "*" || $arr2[2] eq "*"); 12 | next if( $arr1[2] eq $arr2[2] ); 13 | my ($tig1,$start1,$end1, $tig2,$start2,$end2) = ($arr1[2], $arr1[3], ($arr1[3]+length($arr1[9])), $arr2[2],$arr2[3],($arr2[3]+length($arr2[9]))); 14 | if ($arr1[1] & 16) { 15 | $end1 = $start1; 16 | $start1 = $start1 + length($arr1[9]); 17 | } 18 | if ($arr2[1] & 16) { 19 | $end2 = $start2; 20 | $start2 = $start2 + length($arr2[9]); 21 | } 22 | print "$tig1\t$start1\t$end1\t$tig2\t$start2\t$end2\n"; 23 | } 24 | -------------------------------------------------------------------------------- /04.genome_assessment/02.BUSCO/01.busco.sh: -------------------------------------------------------------------------------- 1 | export PATH=$PATH:/home/share/software/blast/ncbi-blast-2.2.28+/bin 2 | export PATH=$PATH:/home/share/user/user101/software/HMMER/hmmer-3.1b2-linux-intel-x86_64-build/bin 3 | export PATH=$PATH:/home/share/user/user101/software/augustus/augustus-3.0.3/bin 4 | export AUGUSTUS_CONFIG_PATH=/home/share/user/user101/software/augustus/augustus-3.0.3/config 5 | export PATH=/home/share/user/user101/software/python3/ActivePython-3.4.3.2-linux-x86_64-build/bin:$PATH 6 | python3 /home/share/user/user101/software/busco/BUSCO_v1.22/BUSCO_v1.22.py -c 32 -o wisent_genome_pep -in wisent.pep -l /home/share/user/user101/software/busco/vertebrata -m OGS 7 | python3 /home/share/user/user101/software/busco/BUSCO_v1.22/BUSCO_v1.22.py -c 32 -o wisent_genome_long_SPhuman -sp human -in wisent.fa -l /home/share/user/user101/software/busco/vertebrata -m genome --long 2>&1 | tee run.sh.sp_human.log 8 | -------------------------------------------------------------------------------- /03.gap_close/readme.txt: -------------------------------------------------------------------------------- 1 | We applied Gapcloser (version 1.12) from SOAPdenovo package to fill the gap in the scaffolds. In order to make this more efficiently, we split the reads into each scaffold and perform the Gapclose seperatedly. 2 | ### step1 ### 3 | Run script "01.split_scaffold.pl" to split the scaffold file. 4 | ps: the scaffold which has a length less than 500kb were combined in the rest file. 5 | 6 | ### step2 ### 7 | Run script "02.runbowtie.pl" to generate a new script to split reads. 8 | ps: note that the script "get_reads.pl" is very important for this step. Users have to configure it appropriately. 9 | ps: the file "head.txt" is the dict file of genome, which could be generated with "samtools dict wisent.fa > head.txt". 10 | 11 | ### step3 ### 12 | Runs script "03.gapcloser.pl" to generate a new script to execute the Gapcloser. 13 | ps: the path of each program must be configured appropriately. 14 | -------------------------------------------------------------------------------- /01.contig/01.prepare.pl: -------------------------------------------------------------------------------- 1 | use strict; 2 | use warnings; 3 | 4 | my @lib=; 5 | 6 | my %pe; 7 | my %mp; 8 | my $num=0; 9 | my $line0="export PATH=\$PATH:/home/share/user/user101/software/abyss/abyss-1.5.1-build/bin\nnohup abyss-pe -C contig j=64 np=64 k=75 name=wisent "; 10 | my @line1; 11 | my @line2; 12 | my $line3; 13 | foreach my $fq1(@lib){ 14 | $num++; 15 | my $name=""; 16 | my $fq2=$fq1; 17 | $fq2=~s/1.fq.gz/2.fq.gz/g; 18 | $fq1=~/reads\/lib\.(\d+)bp/; 19 | my $insert=$1; 20 | if(!$insert){ 21 | print "$fq1\t$fq2\n"; 22 | } 23 | next if($insert>=2000); 24 | if($insert>=2000){ 25 | $name="mp"."$num"; 26 | $mp{$name}="$fq1 $fq2"; 27 | push @line2,$name; 28 | $line3.=$name."='$fq1 $fq2 ' "; 29 | } 30 | else{ 31 | $name="pe"."$num"; 32 | $pe{$name}="$fq1 $fq2"; 33 | push @line1,$name; 34 | $line3.=$name."='$fq1 $fq2 ' "; 35 | } 36 | } 37 | 38 | my $line1="lib='".join " ",@line1,"'"; 39 | open O,"> $0.sh"; 40 | print O "$line0 $line1 $line3 2>&1 | tee abyss.err &\n"; 41 | close O; 42 | -------------------------------------------------------------------------------- /03.gap_close/0.soap.prepare.pl: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env perl 2 | use strict; 3 | use warnings; 4 | 5 | my @fq1=<*.1.fq.gz>; 6 | 7 | open(O,"> soap.config"); 8 | print O "#maximal read length\nmax_rd_len=100\n"; 9 | foreach my $fq1(@fq1){ 10 | my $fq2=$fq1; 11 | $fq2=~s/1.fq.gz/2.fq.gz/; 12 | $fq1=~/(\d+)bp/; 13 | my $insert=$1; 14 | #next if($insert<2000); 15 | my $r=0; 16 | my $asm_flags=3; 17 | my $rank=1; 18 | my $cut_off=3; 19 | my $map_len=65; 20 | if($insert>=2000){ 21 | $r=1; 22 | $asm_flags=3; 23 | $cut_off=5; 24 | $map_len=32; 25 | if($insert==2000){ 26 | $rank=2; 27 | } 28 | elsif($insert==5000){ 29 | $rank=3; 30 | } 31 | elsif($insert==10000){ 32 | $rank=4; 33 | } 34 | elsif($insert=20000){ 35 | $rank=5; 36 | } 37 | } 38 | print O "[LIB] 39 | avg_ins=$insert 40 | reverse_seq=$r 41 | asm_flags=$asm_flags 42 | rank=$rank 43 | pair_num_cutoff=$cut_off 44 | map_len=$map_len 45 | "; 46 | if($insert>=2000){ 47 | print O "q1=$fq1 48 | q2=$fq2 49 | "; 50 | } 51 | else{ 52 | print O "q1=$fq1 53 | q2=$fq2 54 | "; 55 | } 56 | } 57 | close O; 58 | -------------------------------------------------------------------------------- /05.synteny_analysis/circos_plot/circos.conf: -------------------------------------------------------------------------------- 1 | karyotype = chromosome.txt 2 | 3 | 4 | 5 | 6 | default = 0r 7 | 8 | 9 | radius = 0.9r#圆大小 10 | thickness = 60p#宽度 11 | fill = yes#颜色填充 12 | 13 | show_bands = yes#区域块 14 | fill_bands = yes 15 | 16 | stroke_color = dgrey#边框 17 | stroke_thickness = 2p#边框宽度 18 | 19 | show_label = yes#添加chr名 20 | label_font = default 21 | label_radius = 1r + 90p 22 | label_size = 30 23 | label_parallel = yes 24 | 25 | 26 | show_ticks = yes 27 | show_tick_labels = yes 28 | 29 | 30 | 31 | 32 | 33 | 34 | file = link.txt 35 | radius = 0.95r 36 | bezier_radius = 0r 37 | color = black_a4 38 | thickness = 0.5 39 | 40 | 41 | 42 | 43 | show_ticks = yes 44 | show_tick_labels = yes 45 | 46 | # 47 | #radius = 1r 48 | #color = black 49 | #thickness = 2p 50 | # 51 | #multiplier = 1e-6 52 | 53 | #format = %d 54 | 55 | # 56 | #spacing = 5u 57 | #size = 10p 58 | # 59 | 60 | # 61 | 62 | 63 | 64 | 65 | 66 | <> 67 | 68 | 69 | <> 70 | <> 71 | -------------------------------------------------------------------------------- /02.scaffold/02.prepare.pl: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env perl 2 | use strict; 3 | use warnings; 4 | 5 | `mkdir tab` if(!-e "tab"); 6 | 7 | my @fq1=; 8 | 9 | my %hash; 10 | foreach my $fq1(@fq1){ 11 | my $fq2=$fq1; 12 | $fq2=~s/1.fq.gz/2.fq.gz/g; 13 | $fq1=~/(\d+)bp/; 14 | my $insert=$1; 15 | #next if($insert<2000 && $fq1!~/pair_1.fq.gz/); 16 | $hash{$insert}{$fq1}=$fq2; 17 | } 18 | 19 | my $num=1; 20 | open(L,"> libraries.txt"); 21 | open(R,"> runSAM2TAB.sh"); 22 | my $file_no=0; 23 | foreach my $insert(sort {$a<=>$b} keys %hash){ 24 | $num++ if($insert>=2000); 25 | #next if($insert<10000); 26 | my $lib="lib".$num; 27 | my $var=0.25; 28 | my $ori="FR"; 29 | if($insert>=2000){ 30 | $var=0.5; 31 | $ori="RF"; 32 | } 33 | foreach my $fq1(sort keys %{$hash{$insert}}){ 34 | $file_no++; 35 | my $fq2=$hash{$insert}{$fq1}; 36 | #print L "$lib\tbowtie\t$fq1\t$fq2\t$insert\t$var\t$ori\n"; 37 | print L "$lib\tTAB\ttab/file.$file_no.tab\t$insert\t$var\t$ori\n"; 38 | #print R "bwa mem -t 32 pre.fa $fq1 $fq2 | ./filter.pl | ./sam2tab.pl > tab/file.$file_no.tab\n"; 39 | print R "bowtie2 -x pre.fa -1 $fq1 -2 $fq2 --very-sensitive -p 32 | ./filter.pl | ./sam2tab.pl > tab/file.$file_no.tab\n"; 40 | } 41 | } 42 | close L; 43 | close R; 44 | -------------------------------------------------------------------------------- /01.contig/readme.txt: -------------------------------------------------------------------------------- 1 | The following files were generated: 2 | 3 | coverage.hist pe42-3.dist pe44-3.hist pe46-6.dist.dot pe48-6.hist wisent-1.adj wisent-4.fa wisent-6.path wisent-contigs.fa 4 | pe30-3.dist pe42-3.hist pe44-6.dist.dot pe46-6.hist pe49-3.dist wisent-1.fa wisent-4.path1 wisent-6.path.dot wisent-indel.fa 5 | pe30-3.hist pe42-6.dist.dot pe44-6.hist pe47-3.dist pe49-3.hist wisent-1.path wisent-4.path2 wisent-7.adj wisent-scaffolds.dot 6 | pe30-6.dist.dot pe42-6.hist pe45-3.dist pe47-3.hist pe49-6.dist.dot wisent-2.adj wisent-4.path3 wisent-7.fa wisent-scaffolds.fa 7 | pe30-6.hist pe43-3.dist pe45-3.hist pe47-6.dist.dot pe49-6.hist wisent-2.path wisent-5.adj wisent-7.path wisent-stats 8 | pe31-3.dist pe43-3.hist pe45-6.dist.dot pe47-6.hist pe9-3.dist wisent-3.adj wisent-5.fa wisent-8.dot wisent-stats.csv 9 | pe31-3.hist pe43-6.dist.dot pe45-6.hist pe48-3.dist pe9-3.hist wisent-3.dist wisent-5.path wisent-8.fa wisent-stats.md 10 | pe31-6.dist.dot pe43-6.hist pe46-3.dist pe48-3.hist pe9-6.dist.dot wisent-3.fa wisent-6.dot wisent-bubbles.fa wisent-stats.tab 11 | pe31-6.hist pe44-3.dist pe46-3.hist pe48-6.dist.dot pe9-6.hist wisent-4.adj wisent-6.fa wisent-contigs.dot wisent-unitigs.fa 12 | 13 | NOTE: the "wisent-contigs.fa" will be used for the next step. 14 | -------------------------------------------------------------------------------- /02.scaffold/libraries.txt: -------------------------------------------------------------------------------- 1 | lib1 TAB tab/file.1.tab 170 0.25 FR 2 | lib1 TAB tab/file.2.tab 200 0.25 FR 3 | lib1 TAB tab/file.3.tab 200 0.25 FR 4 | lib1 TAB tab/file.4.tab 500 0.25 FR 5 | lib1 TAB tab/file.5.tab 500 0.25 FR 6 | lib1 TAB tab/file.6.tab 500 0.25 FR 7 | lib1 TAB tab/file.7.tab 800 0.25 FR 8 | lib1 TAB tab/file.8.tab 800 0.25 FR 9 | lib1 TAB tab/file.9.tab 800 0.25 FR 10 | lib1 TAB tab/file.10.tab 800 0.25 FR 11 | lib1 TAB tab/file.11.tab 800 0.25 FR 12 | lib2 TAB tab/file.12.tab 2000 0.5 RF 13 | lib2 TAB tab/file.13.tab 2000 0.5 RF 14 | lib2 TAB tab/file.14.tab 2000 0.5 RF 15 | lib2 TAB tab/file.15.tab 2000 0.5 RF 16 | lib2 TAB tab/file.16.tab 2000 0.5 RF 17 | lib2 TAB tab/file.17.tab 2000 0.5 RF 18 | lib2 TAB tab/file.18.tab 2000 0.5 RF 19 | lib2 TAB tab/file.19.tab 2000 0.5 RF 20 | lib2 TAB tab/file.20.tab 2000 0.5 RF 21 | lib2 TAB tab/file.21.tab 2000 0.5 RF 22 | lib2 TAB tab/file.22.tab 2000 0.5 RF 23 | lib3 TAB tab/file.23.tab 5000 0.5 RF 24 | lib3 TAB tab/file.24.tab 5000 0.5 RF 25 | lib3 TAB tab/file.25.tab 5000 0.5 RF 26 | lib3 TAB tab/file.26.tab 5000 0.5 RF 27 | lib3 TAB tab/file.27.tab 5000 0.5 RF 28 | lib3 TAB tab/file.28.tab 5000 0.5 RF 29 | lib3 TAB tab/file.29.tab 5000 0.5 RF 30 | lib3 TAB tab/file.30.tab 5000 0.5 RF 31 | lib3 TAB tab/file.31.tab 5000 0.5 RF 32 | lib3 TAB tab/file.32.tab 5000 0.5 RF 33 | lib4 TAB tab/file.33.tab 10000 0.5 RF 34 | lib4 TAB tab/file.34.tab 10000 0.5 RF 35 | lib4 TAB tab/file.35.tab 10000 0.5 RF 36 | lib4 TAB tab/file.36.tab 10000 0.5 RF 37 | lib4 TAB tab/file.37.tab 10000 0.5 RF 38 | lib4 TAB tab/file.38.tab 10000 0.5 RF 39 | lib4 TAB tab/file.39.tab 10000 0.5 RF 40 | lib4 TAB tab/file.40.tab 10000 0.5 RF 41 | lib5 TAB tab/file.41.tab 20000 0.5 RF 42 | lib5 TAB tab/file.42.tab 20000 0.5 RF 43 | lib5 TAB tab/file.43.tab 20000 0.5 RF 44 | lib5 TAB tab/file.44.tab 20000 0.5 RF 45 | lib5 TAB tab/file.45.tab 20000 0.5 RF 46 | lib5 TAB tab/file.46.tab 20000 0.5 RF 47 | lib5 TAB tab/file.47.tab 20000 0.5 RF 48 | lib5 TAB tab/file.48.tab 20000 0.5 RF 49 | lib5 TAB tab/file.49.tab 20000 0.5 RF 50 | -------------------------------------------------------------------------------- /01.contig/01.prepare.pl.sh: -------------------------------------------------------------------------------- 1 | export PATH=$PATH:/home/share/user/user101/software/abyss/abyss-1.5.1-build/bin 2 | nohup abyss-pe -C attemp2 j=64 np=64 k=75 name=wisent lib='pe9 pe30 pe31 pe42 pe43 pe44 pe45 pe46 pe47 pe48 pe49 ' pe9='/home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads/lib.170bp_49.1.fq.gz /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads/lib.170bp_49.2.fq.gz ' pe30='/home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads/lib.200bp_29.1.fq.gz /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads/lib.200bp_29.2.fq.gz ' pe31='/home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads/lib.200bp_30.1.fq.gz /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads/lib.200bp_30.2.fq.gz ' pe42='/home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads/lib.500bp_41.1.fq.gz /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads/lib.500bp_41.2.fq.gz ' pe43='/home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads/lib.500bp_42.1.fq.gz /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads/lib.500bp_42.2.fq.gz ' pe44='/home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads/lib.500bp_43.1.fq.gz /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads/lib.500bp_43.2.fq.gz ' pe45='/home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads/lib.800bp_44.1.fq.gz /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads/lib.800bp_44.2.fq.gz ' pe46='/home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads/lib.800bp_45.1.fq.gz /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads/lib.800bp_45.2.fq.gz ' pe47='/home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads/lib.800bp_46.1.fq.gz /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads/lib.800bp_46.2.fq.gz ' pe48='/home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads/lib.800bp_47.1.fq.gz /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads/lib.800bp_47.2.fq.gz ' pe49='/home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads/lib.800bp_48.1.fq.gz /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads/lib.800bp_48.2.fq.gz ' 2>&1 | tee abyss.err & 3 | -------------------------------------------------------------------------------- /03.gap_close/get_reads.pl: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env perl 2 | use strict; 3 | use warnings; 4 | use FileHandle; 5 | 6 | my $outdir="scaffolds"; 7 | 8 | `mkdir $outdir` if(!-e "$outdir"); 9 | 10 | my $insert=shift; 11 | my $id=shift; 12 | 13 | die "Usage: $0 \n" if(!$insert || !$id); 14 | 15 | # my $cigar="100M"; 16 | # if($insert>=2000){ 17 | # $cigar="49M"; 18 | # } 19 | 20 | print "$insert\t$id\n"; 21 | 22 | my %fh; 23 | open(I,"< head.txt"); 24 | while () { 25 | next unless(/SN:(\S+)\s+LN:(\d+)/); 26 | my ($chr,$len)=($1,$2); 27 | $chr=~/(scaffold\d+)/; 28 | $chr=$1; 29 | if($len<500000){ 30 | $chr="rest"; 31 | } 32 | my $name=$chr; 33 | `mkdir $outdir/$name` if(!-e "$outdir/$name"); 34 | if(exists $fh{$name}{fq1} && $fh{$name}{fq2}){ 35 | next; 36 | } 37 | open($fh{$name}{fq1},"| gzip -> $outdir/$name/$id.1.fq.gz"); 38 | open($fh{$name}{fq2},"| gzip -> $outdir/$name/$id.2.fq.gz"); 39 | } 40 | close I; 41 | 42 | my $num=0; 43 | print "start\n"; 44 | while (<>) { 45 | next if(/^\@/); 46 | my $line1=$_; 47 | my $line2=<>; 48 | chomp $line1; 49 | chomp $line2; 50 | my @a=split(/\s+/,$line1); 51 | my @b=split(/\s+/,$line2); 52 | my $flag_left=$a[5]; 53 | my $flag_right=$b[5]; 54 | if($flag_left eq $flag_right){ 55 | next if($flag_left =~/^\d+M$/ || $flag_left eq "*"); 56 | } 57 | my ($name1,$name2)=("NA","NA"); 58 | if(!($a[1] & 4)){ 59 | my $name_pre=$a[2]; 60 | $name_pre=~/(scaffold\d+)\|size(\d+)/; 61 | $name_pre=$1; 62 | my $len=$2; 63 | $name_pre="rest" if($len<500000); 64 | $name1=$name_pre; 65 | } 66 | if(!($b[1] & 4)){ 67 | my $name_pre=$b[2]; 68 | $name_pre=~/(scaffold\d+)\|size(\d+)/; 69 | $name_pre=$1; 70 | my $len=$2; 71 | $name_pre="rest" if($len<500000); 72 | $name2=$name_pre; 73 | } 74 | $num++; 75 | print STDERR "Proceeding...$num...\r"; 76 | 77 | if($insert>=2000){ 78 | $a[9]=~tr/ATCGatcg/TAGCtagc/; 79 | $a[9]=reverse($a[9]); 80 | $a[10]=reverse($a[10]); 81 | } 82 | else{ 83 | $b[9]=~tr/ATCGatcg/TAGCtagc/; 84 | $b[9]=reverse($b[9]); 85 | $b[10]=reverse($b[10]); 86 | } 87 | if($name1 eq $name2){ 88 | $fh{$name1}{fq1}->print("@"."$a[0]/1\n$a[9]\n+\n$a[10]\n"); 89 | $fh{$name1}{fq2}->print("@"."$b[0]/2\n$b[9]\n+\n$b[10]\n"); 90 | } 91 | else{ 92 | if($name1 ne "NA"){ 93 | $fh{$name1}{fq1}->print("@"."$a[0]/1\n$a[9]\n+\n$a[10]\n"); 94 | $fh{$name1}{fq2}->print("@"."$b[0]/2\n$b[9]\n+\n$b[10]\n"); 95 | } 96 | if($name2 ne "NA"){ 97 | $fh{$name2}{fq1}->print("@"."$a[0]/1\n$a[9]\n+\n$a[10]\n"); 98 | $fh{$name2}{fq2}->print("@"."$b[0]/2\n$b[9]\n+\n$b[10]\n"); 99 | } 100 | } 101 | } 102 | foreach my $name(keys %fh){ 103 | close $fh{$name}{fq1}; 104 | close $fh{$name}{fq2}; 105 | } 106 | -------------------------------------------------------------------------------- /05.synteny_analysis/circos_plot/01.transfer.pl: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env perl 2 | use strict; 3 | use warnings; 4 | 5 | my $maf="wisent2nc.swap.maf"; 6 | my $percent=0.1; 7 | my $length_of_scaffold_plotted=100000; 8 | 9 | my %link; 10 | my %cattle_len; 11 | my %wisent_len; 12 | 13 | my $total_len=0; 14 | my $link_num=0; 15 | open I,"< $maf"; 16 | my $control=0; 17 | while(){ 18 | chomp; 19 | next if(/^#/); 20 | my @alignment; 21 | if(/^a/){ 22 | push @alignment,"$_"; 23 | while(){ 24 | chomp; 25 | if(/^\s*$/){ 26 | last; 27 | } 28 | else{ 29 | push @alignment,"$_"; 30 | } 31 | } 32 | } 33 | my ($wisent_chr,$wisent_start,$wisent_end,$wisent_strand,$wisent_chr_len,$cattle_chr,$cattle_start,$cattle_end,$cattle_strand,$cattle_chr_len)=&read_maf(@alignment); 34 | next if($wisent_chr_len < $length_of_scaffold_plotted); 35 | $total_len += $wisent_end-$wisent_start+1; 36 | $link_num++; 37 | 38 | $cattle_chr=~s/chr//; 39 | if($cattle_chr eq "X"){ 40 | $cattle_chr = 30; 41 | } 42 | elsif($cattle_chr eq "Y"){ 43 | $cattle_chr=31; 44 | } 45 | $cattle_len{$cattle_chr} = $cattle_chr_len; 46 | $wisent_len{$wisent_chr} = $wisent_chr_len; 47 | $link{$cattle_chr}{$cattle_start} = {cattle_end => $cattle_end, cattle_strand => $cattle_strand, wisent_chr => $wisent_chr, wisent_start => $wisent_start, wisent_end => $wisent_end, wisent_strand => $wisent_strand}; 48 | # last if($control++>10); 49 | } 50 | close I; 51 | 52 | my $filter_percent = $link_num*$percent/$total_len; 53 | 54 | open L,"> link.txt"; 55 | my %wisent_position; 56 | my %wisent2cattle_len; 57 | my %align_len; 58 | my $link=0; 59 | foreach my $cattle_chr(sort {$a<=>$b} keys %link){ 60 | foreach my $cattle_start(sort {$a<=>$b} keys %{$link{$cattle_chr}}){ 61 | my $cattle_end = $link{$cattle_chr}{$cattle_start}{cattle_end}; 62 | my $cattle_strand = $link{$cattle_chr}{$cattle_start}{cattle_strand}; 63 | 64 | my $wisent_chr = $link{$cattle_chr}{$cattle_start}{wisent_chr}; 65 | my $wisent_start = $link{$cattle_chr}{$cattle_start}{wisent_start}; 66 | my $wisent_end = $link{$cattle_chr}{$cattle_start}{wisent_end}; 67 | my $wisent_strand = $link{$cattle_chr}{$cattle_start}{wisent_strand}; 68 | 69 | my $wisent_chr_len = $wisent_len{$wisent_chr}; 70 | 71 | push @{$wisent_position{$wisent_chr}{$cattle_chr}{pos}},$cattle_start; 72 | $wisent2cattle_len{$wisent_chr}{$cattle_chr}+=$wisent_end-$wisent_start+1; 73 | $align_len{$wisent_chr}+=$wisent_end-$wisent_start+1; 74 | 75 | my $x = rand(1)/($wisent_end-$wisent_start+1); 76 | if($x > $filter_percent){ 77 | next; 78 | } 79 | 80 | $link++; 81 | my $num=10000000+$link; 82 | my $color=&get_color($cattle_chr); 83 | 84 | print L "link${num}bundle $cattle_chr $cattle_start $cattle_end color=$color\n"; 85 | print L "link${num}bundle $wisent_chr $wisent_start $wisent_end color=$color\n"; 86 | } 87 | } 88 | close L; 89 | 90 | open C,"> chromosome.txt"; 91 | foreach my $cattle_chr(sort {$a<=>$b} keys %link){ 92 | my $chr_len=$cattle_len{$cattle_chr}; 93 | print C "chr - $cattle_chr $cattle_chr 1 $chr_len white\n"; 94 | } 95 | 96 | open O,"> debug.txt"; 97 | foreach my $wisent_chr(sort keys %align_len){ 98 | my $len = $align_len{$wisent_chr}; 99 | my $wisent_chr_len=$wisent_len{$wisent_chr}; 100 | my $percent = $len/$wisent_chr_len; 101 | print O "$wisent_chr\t$wisent_chr_len\t$len\t$percent\n"; 102 | } 103 | close O; 104 | 105 | my %result; 106 | foreach my $wisent_chr(sort keys %wisent2cattle_len){ 107 | foreach my $cattle_chr(sort {$wisent2cattle_len{$wisent_chr}{$b} <=> $wisent2cattle_len{$wisent_chr}{$a}} keys %{$wisent2cattle_len{$wisent_chr}}){ 108 | my $chr=$cattle_chr; 109 | my @wisent_position=sort {$a<=>$b} @{$wisent_position{$wisent_chr}{$cattle_chr}{pos}}; 110 | my $len=@wisent_position; 111 | my $mid=int(($len/2)+0.5); 112 | if($mid >= $len-1){ 113 | $mid=$len-1; 114 | } 115 | elsif($mid < 0){ 116 | $mid = 0; 117 | } 118 | my $pos=$wisent_position[$mid]; 119 | $result{$chr}{$wisent_chr}=$pos; 120 | last; 121 | } 122 | } 123 | 124 | foreach my $chr(sort {$b<=>$a} keys %result){ 125 | foreach my $wisent_chr(sort {$result{$chr}{$b}<=>$result{$chr}{$a}} keys %{$result{$chr}}){ 126 | my $pos=$result{$chr}{$wisent_chr}; 127 | my $chr_len=$wisent_len{$wisent_chr}; 128 | print C "chr - $wisent_chr $wisent_chr 1 $chr_len white\n"; 129 | } 130 | } 131 | 132 | close C; 133 | 134 | sub read_maf{ 135 | my @alignment=@_; 136 | # print "\n\n**************************START**************************\n"; 137 | # print join "\n",@alignment; 138 | # print "\n***************************END***************************\n\n"; 139 | my @speciesA=split /\s+/,$alignment[1]; 140 | my @speciesB=split /\s+/,$alignment[2]; 141 | my $chrA=$speciesA[1]; 142 | my $chrB=$speciesB[1]; 143 | 144 | my($startA,$lenA,$strandA,$chr_lenA)=($speciesA[2],$speciesA[3],$speciesA[4],$speciesA[5]); 145 | my $endA; 146 | if($strandA eq "+"){ 147 | $startA = $startA + 1; 148 | $endA = $startA + $lenA - 1; 149 | } 150 | else{ 151 | $startA = $chr_lenA - $startA; 152 | $endA = $startA - $lenA + 1; 153 | 154 | my $temp = $startA; 155 | $startA = $endA; 156 | $endA = $temp; 157 | } 158 | 159 | my($startB,$lenB,$strandB,$chr_lenB)=($speciesB[2],$speciesB[3],$speciesB[4],$speciesB[5]); 160 | my $endB; 161 | if($strandB eq "+"){ 162 | $startB = $startB + 1; 163 | $endB = $startB + $lenB - 1; 164 | } 165 | else{ 166 | $startB = $chr_lenB - $startB; 167 | $endB = $startB - $lenB + 1; 168 | 169 | my $temp = $startB; 170 | $startB = $endB; 171 | $endB = $temp; 172 | } 173 | return($chrA,$startA,$endA,$strandA,$chr_lenA,$chrB,$startB,$endB,$strandB,$chr_lenB); 174 | } 175 | 176 | sub get_color{ 177 | my $chr=shift; 178 | my $color="chr$chr"; 179 | if($chr > 24){ 180 | $chr=$chr-24; 181 | $color="chr$chr"; 182 | } 183 | 184 | return($color); 185 | } 186 | -------------------------------------------------------------------------------- /00.reads_filter/corr.log: -------------------------------------------------------------------------------- 1 | Program start.. 2 | Program: Corrector_HA 3 | Version: v2.02 4 | Author: BGI-ShenZhen 5 | CompileDate: Apr 15 2014 time: 15:20:05 6 | Current time: Mon Apr 21 05:54:28 2014 7 | Command line: Corrector_HA -k 23 -l 3 -r 50 -t 10 Wisent.freq.gz read.lst 8 | 9 | Start to caculate high-freq kmer species number... 10 | Finish caculation. The number of kmer species which frequency higher than 3 is: 2131784643 11 | Run time: 2919s. 12 | 13 | Start to load the kmer frequency table... 14 | Finish loading the kmer frequency table! 15 | KmerSize 23 16 | Kmer_species_num 5698293539 17 | Low_freq_kmer_ratio 0.625891 18 | Run time: 6267s. 19 | 10 threads creation done! 20 | 21 | Finished to parse file ../SeqV2/200bp/140104_I686_FCC3R25ACXX_L3_WHAXPI000090-15_1.fq.gz 22 | 23 | 10 threads creation done! 24 | 25 | Finished to parse file ../SeqV2/200bp/140104_I686_FCC3R25ACXX_L3_WHAXPI000090-15_2.fq.gz 26 | 27 | 10 threads creation done! 28 | 29 | Finished to parse file ../SeqV2/200bp/140104_I686_FCC3R25ACXX_L4_WHAXPI000090-15_1.fq.gz 30 | 31 | 10 threads creation done! 32 | 33 | Finished to parse file ../SeqV2/200bp/140104_I686_FCC3R25ACXX_L4_WHAXPI000090-15_2.fq.gz 34 | 35 | 10 threads creation done! 36 | 37 | Finished to parse file ../SeqV2/500bp/140104_I686_FCC3R25ACXX_L1_WHAIPI000089-13_1.fq.gz 38 | 39 | 10 threads creation done! 40 | 41 | Finished to parse file ../SeqV2/500bp/140104_I686_FCC3R25ACXX_L1_WHAIPI000089-13_2.fq.gz 42 | 43 | 10 threads creation done! 44 | 45 | Finished to parse file ../SeqV2/500bp/140104_I686_FCC3R25ACXX_L2_WHAIPI000089-13_1.fq.gz 46 | 47 | 10 threads creation done! 48 | 49 | Finished to parse file ../SeqV2/500bp/140104_I686_FCC3R25ACXX_L2_WHAIPI000089-13_2.fq.gz 50 | 51 | 10 threads creation done! 52 | 53 | Finished to parse file ../SeqV2/500bp/140220_I191_FCC3R2PACXX_L2_WHAIPI000089-13_1.fq.gz 54 | 55 | 10 threads creation done! 56 | 57 | Finished to parse file ../SeqV2/500bp/140220_I191_FCC3R2PACXX_L2_WHAIPI000089-13_2.fq.gz 58 | 59 | 10 threads creation done! 60 | 61 | Finished to parse file ../SeqV2/800bp/140104_I686_FCC3HWYACXX_L6_WHAMPI000088-14_1.fq.gz 62 | 63 | 10 threads creation done! 64 | 65 | Finished to parse file ../SeqV2/800bp/140104_I686_FCC3HWYACXX_L6_WHAMPI000088-14_2.fq.gz 66 | 67 | 10 threads creation done! 68 | 69 | Finished to parse file ../SeqV2/800bp/140104_I686_FCC3HWYACXX_L7_WHAMPI000088-14_1.fq.gz 70 | 71 | 10 threads creation done! 72 | 73 | Finished to parse file ../SeqV2/800bp/140104_I686_FCC3HWYACXX_L7_WHAMPI000088-14_2.fq.gz 74 | 75 | 10 threads creation done! 76 | 77 | Finished to parse file ../SeqV2/800bp/140104_I686_FCC3R25ACXX_L7_WHAMPI000088-14_1.fq.gz 78 | 79 | 10 threads creation done! 80 | 81 | Finished to parse file ../SeqV2/800bp/140104_I686_FCC3R25ACXX_L7_WHAMPI000088-14_2.fq.gz 82 | 83 | 10 threads creation done! 84 | 85 | Finished to parse file ../SeqV2/800bp/140104_I686_FCC3R25ACXX_L8_WHAMPI000088-14_1.fq.gz 86 | 87 | 10 threads creation done! 88 | 89 | Finished to parse file ../SeqV2/800bp/140104_I686_FCC3R25ACXX_L8_WHAMPI000088-14_2.fq.gz 90 | 91 | 10 threads creation done! 92 | 93 | Finished to parse file ../SeqV2/800bp/140220_I191_FCC3R2PACXX_L2_WHAMPI000088-14_1.fq.gz 94 | 95 | 10 threads creation done! 96 | 97 | Finished to parse file ../SeqV2/800bp/140220_I191_FCC3R2PACXX_L2_WHAMPI000088-14_2.fq.gz 98 | 99 | Parsed all the reads files completed 100 | Run time: 72053s. 101 | Start to convert ../SeqV2/800bp/140104_I686_FCC3HWYACXX_L7_WHAMPI000088-14_1.fq.gz.cor & ../SeqV2/800bp/140104_I686_FCC3HWYACXX_L7_WHAMPI000088-14_2.fq.gz.cor 102 | Start to convert ../SeqV2/800bp/140104_I686_FCC3HWYACXX_L6_WHAMPI000088-14_1.fq.gz.cor & ../SeqV2/800bp/140104_I686_FCC3HWYACXX_L6_WHAMPI000088-14_2.fq.gz.cor 103 | Start to convert ../SeqV2/800bp/140104_I686_FCC3R25ACXX_L7_WHAMPI000088-14_1.fq.gz.cor & ../SeqV2/800bp/140104_I686_FCC3R25ACXX_L7_WHAMPI000088-14_2.fq.gz.cor 104 | Start to convert ../SeqV2/500bp/140104_I686_FCC3R25ACXX_L2_WHAIPI000089-13_1.fq.gz.cor & ../SeqV2/500bp/140104_I686_FCC3R25ACXX_L2_WHAIPI000089-13_2.fq.gz.cor 105 | Start to convert ../SeqV2/500bp/140220_I191_FCC3R2PACXX_L2_WHAIPI000089-13_1.fq.gz.cor & ../SeqV2/500bp/140220_I191_FCC3R2PACXX_L2_WHAIPI000089-13_2.fq.gz.cor 106 | Start to convert ../SeqV2/800bp/140220_I191_FCC3R2PACXX_L2_WHAMPI000088-14_1.fq.gz.cor & ../SeqV2/800bp/140220_I191_FCC3R2PACXX_L2_WHAMPI000088-14_2.fq.gz.cor 107 | Start to convert ../SeqV2/200bp/140104_I686_FCC3R25ACXX_L4_WHAXPI000090-15_1.fq.gz.cor & ../SeqV2/200bp/140104_I686_FCC3R25ACXX_L4_WHAXPI000090-15_2.fq.gz.cor 108 | Start to convert ../SeqV2/500bp/140104_I686_FCC3R25ACXX_L1_WHAIPI000089-13_1.fq.gz.cor & ../SeqV2/500bp/140104_I686_FCC3R25ACXX_L1_WHAIPI000089-13_2.fq.gz.cor 109 | Start to convert ../SeqV2/800bp/140104_I686_FCC3R25ACXX_L8_WHAMPI000088-14_1.fq.gz.cor & ../SeqV2/800bp/140104_I686_FCC3R25ACXX_L8_WHAMPI000088-14_2.fq.gz.cor 110 | Start to convert ../SeqV2/200bp/140104_I686_FCC3R25ACXX_L3_WHAXPI000090-15_1.fq.gz.cor & ../SeqV2/200bp/140104_I686_FCC3R25ACXX_L3_WHAXPI000090-15_2.fq.gz.cor 111 | Finished to convert ../SeqV2/800bp/140220_I191_FCC3R2PACXX_L2_WHAMPI000088-14_1.fq.gz.cor & ../SeqV2/800bp/140220_I191_FCC3R2PACXX_L2_WHAMPI000088-14_2.fq.gz.cor 112 | Finished to convert ../SeqV2/800bp/140104_I686_FCC3R25ACXX_L8_WHAMPI000088-14_1.fq.gz.cor & ../SeqV2/800bp/140104_I686_FCC3R25ACXX_L8_WHAMPI000088-14_2.fq.gz.cor 113 | Finished to convert ../SeqV2/800bp/140104_I686_FCC3R25ACXX_L7_WHAMPI000088-14_1.fq.gz.cor & ../SeqV2/800bp/140104_I686_FCC3R25ACXX_L7_WHAMPI000088-14_2.fq.gz.cor 114 | Finished to convert ../SeqV2/500bp/140220_I191_FCC3R2PACXX_L2_WHAIPI000089-13_1.fq.gz.cor & ../SeqV2/500bp/140220_I191_FCC3R2PACXX_L2_WHAIPI000089-13_2.fq.gz.cor 115 | Finished to convert ../SeqV2/800bp/140104_I686_FCC3HWYACXX_L7_WHAMPI000088-14_1.fq.gz.cor & ../SeqV2/800bp/140104_I686_FCC3HWYACXX_L7_WHAMPI000088-14_2.fq.gz.cor 116 | Finished to convert ../SeqV2/800bp/140104_I686_FCC3HWYACXX_L6_WHAMPI000088-14_1.fq.gz.cor & ../SeqV2/800bp/140104_I686_FCC3HWYACXX_L6_WHAMPI000088-14_2.fq.gz.cor 117 | Finished to convert ../SeqV2/500bp/140104_I686_FCC3R25ACXX_L1_WHAIPI000089-13_1.fq.gz.cor & ../SeqV2/500bp/140104_I686_FCC3R25ACXX_L1_WHAIPI000089-13_2.fq.gz.cor 118 | Finished to convert ../SeqV2/500bp/140104_I686_FCC3R25ACXX_L2_WHAIPI000089-13_1.fq.gz.cor & ../SeqV2/500bp/140104_I686_FCC3R25ACXX_L2_WHAIPI000089-13_2.fq.gz.cor 119 | Finished to convert ../SeqV2/200bp/140104_I686_FCC3R25ACXX_L4_WHAXPI000090-15_1.fq.gz.cor & ../SeqV2/200bp/140104_I686_FCC3R25ACXX_L4_WHAXPI000090-15_2.fq.gz.cor 120 | Finished to convert ../SeqV2/200bp/140104_I686_FCC3R25ACXX_L3_WHAXPI000090-15_1.fq.gz.cor & ../SeqV2/200bp/140104_I686_FCC3R25ACXX_L3_WHAXPI000090-15_2.fq.gz.cor 121 | Convert all the paired reads files completed 122 | Run time: 88250s. 123 | Begin output the QC file... 124 | Finished output QC file! 125 | All done! 126 | Run time: 88250s. 127 | -------------------------------------------------------------------------------- /CommandLine.md: -------------------------------------------------------------------------------- 1 | # Part1 Assembly 2 | ### SOAPec 3 | #### Version: 2.02 4 | #### Command Line 5 | ``` 6 | KmerFreq_HA -k 23 -t 64 -p Wisent -l read.lst -L 100 7 | Corrector_HA -k 23 -l 3 -r 50 -t 10 Wisent.freq.gz read.lst 8 | ``` 9 | ### ABySS 10 | #### Version: 1.5.1 11 | #### Command Line 12 | ``` 13 | abyss-pe -C j=64 np=64 k=75 name=wisent lib='pe1 pe2 ...' \ 14 | pe1='lib1.1.fq.gz lib1.2.fq.gz' pe2='lib2.2.fq.gz lib2.2.fq.gz' ... # more libs were omitted in this line 15 | ``` 16 | ### SSPACE 17 | #### Version: 3.0_linux-x86_64 18 | #### Command Line 19 | ``` 20 | SSPACE_Standard_v3.0.pl -l libraries.txt -s contig.fa -T 32 21 | ``` 22 | # Part2 Assessment 23 | ### BUSCO 24 | #### Version: 1.22 25 | #### Command Line 26 | ``` 27 | BUSCO_v1.22.py -c 32 -o -in wisent.pep -l vertebrata -m OGS 28 | BUSCO_v1.22.py -c 32 -o -sp human -in wisent.fa -l vertebrata -m genome --long 29 | ``` 30 | ### CEGMA 31 | #### Version: 2.5 32 | #### Command Line 33 | ``` 34 | cegma -T 30 -g wisent.fa -o 35 | ``` 36 | ### FRCurve 37 | #### Version: 1.3.0 38 | #### Command Line 39 | ``` 40 | FRC --genome-size 2980000000 --pe-sam il230.bam --mp-sam il20000.bam --pe-max-insert 230 --mp-max-insert 25000 --out frc_curve 41 | ``` 42 | # Part3 SNPs and InDels 43 | ### BWA 44 | #### Version: 0.7.12 45 | #### Command Line 46 | ``` 47 | bwa mem -t 12 -R '@RG ID: SM: LB:' wisent.fa lib1.1.fq.gz lib1.2.fq.gz | samtools sort -O bam -T ./ -l 3 -o .bam - 48 | samtools rmdup .bam .rmdup.bam 49 | ``` 50 | ### GATK 51 | #### Version: 3.3 52 | #### Command Line 53 | ``` 54 | java -Xmx10g -jar GenomeAnalysisTK.jar -R wisent.fa -T RealignerTargetCreator -o .intervals -I .rmdup.bam 55 | java -Xmx10g -jar GenomeAnalysisTK.jar -R wisent.fa -T IndelRealigner -targetIntervals .intervals -o .realn.bam -I .rmdup.bam 56 | java -Xmx10g -jar GenomeAnalysisTK.jar -nct 12 -R wisent.fa -T HaplotypeCaller -I .realn.bam -out_mode EMIT_VARIANTS_ONLY -o .vcf 57 | ``` 58 | # Part4 Annotation 59 | ### RepeatMasker 60 | #### Version: 1.323 61 | #### Command Line 62 | ``` 63 | RepeatMasker -nolow -no_is -norna -parallel 1 -species mammal -gff wisent.fa 64 | ``` 65 | ### RepeatProteinMask 66 | #### Version: 1.36 67 | #### Command Line 68 | ``` 69 | RepeatProteinMask -engine ncbi -noLowSimple -pvalue 0.0001 wisent.fa 70 | ``` 71 | ### TRF 72 | #### Version: 407b 73 | #### Command Line 74 | ``` 75 | trf wisent.fa 2 7 7 80 10 50 2000 -d -h 76 | ``` 77 | ### LTR_FINDER 78 | #### Version: 1.0.6 79 | #### Command Line 80 | ``` 81 | ltr_finder wisent.fa 82 | ``` 83 | ### PILER 84 | #### Version: 1.0 85 | #### Command Line 86 | ``` 87 | pals -self wisent.fa -out wisent.fa.pals.gff 88 | piler2 -trs wisent.fa.pals.gff -out wisent.fa.trs.gff 89 | piler2 -trs2fasta wisent.fa.trs.gff -seq wisent.fa -path family -prefix wisent 90 | for i in family/* ; do muscle -in $i -out $i.aligned.fasta -maxiters 1 -diags1 ; done 91 | for I in family/*aligned.fasta ; do piler2 -cons $i -out $i.cons -label $i ; done 92 | cat family/*cons > wisent_library.fasta 93 | RepeatMasker -lib wisent_library.fasta -pa 30 wisent.fa 94 | ``` 95 | ### RepeatScout 96 | #### Version: 1.05 97 | #### Command Line 98 | ``` 99 | BuildDatabase -name wisent wisent.fa 100 | RepeatModeler -pa 30 -database wisent 101 | RepeatMasker -lib RM*/consensi.fa.classified -pa 30 wisent.fa 102 | ``` 103 | ### BLAST 104 | #### Version: 2.26 105 | #### Command Line 106 | ``` 107 | blastall -p tblastn -d -i -e 1E-5 -o protein2query.out -a 12 108 | ``` 109 | ### BLAST2GENE 110 | #### Version: 17 111 | #### Command Line 112 | ``` 113 | blast.parse.pl protein2query.out > protein2query.bp 114 | blast2gene.pl protein2query.bp > protein2query.bl2g 115 | ``` 116 | ### GeneWise 117 | #### Version: 2.41 118 | #### Command Line 119 | ``` 120 | genewise -u -v - -gff query.fa .fa > genewise.gff 121 | ``` 122 | ### Augustus 123 | #### Version: 2.5.5 124 | #### Command Line 125 | ``` 126 | augustus --species=human .fa > .gff 127 | ``` 128 | ### GenScan 129 | #### Version: Not available 130 | #### Command Line 131 | ``` 132 | genscan HumanIso.smat .fa > .out 133 | ``` 134 | ### EVM 135 | #### Version: 1.1.1 136 | #### Command Line 137 | ``` 138 | evidence_modeler.pl --genome .fa --weights weights.txt --gene_predictions ab_initio.gff --protein_alignments homolog.gff > evm.out; EVM_to_GFF3.pl evm.out > evm.out.gff 139 | ``` 140 | # Part5 Synteny 141 | ### last 142 | #### Version: 761 143 | #### Command Line 144 | ``` 145 | lastdb -uNEAR -cR11 ref_db ref.fa 146 | lastal -P48 -m100 -E0.05 ref_db wisent.fa | last-split > query2db.maf 147 | maf-swap query2db.maf | last-split > cattle.wisent.sing.maf # same parameters were applied for the other species 148 | ``` 149 | ### multiz 150 | #### Version: 012109 151 | #### Command Line 152 | ``` 153 | roast -T=. E=cattle ((((bison wisent) yak) (cattle indicus)) buffalo) cattle.bison.sing.maf cattle.wisent.sing.maf cattle.yak.sing.maf cattle.indicus.sing.maf cattle.buffalo.sing.maf cattle.maf | sh 154 | ``` 155 | # Part6 Evolution analysis 156 | ### ExaML 157 | #### Version: 8.1.17 158 | #### Command Line 159 | ``` 160 | clustalw2 -INFILE=align.part.fa -CONVERT -OUTFILE=alian.part.phy -OUTPUT=PHYLIP 161 | raxmlHPC -s alian.part.phy -n start.tree -m GTRGAMMA -p 31415 162 | clustalw2 -INFILE=align.fa -CONVERT -OUTFILE=align.phy -OUTPUT=PHYLIP 163 | parse-examl -s align.phy -n align.bin -m DNA 164 | examl-AVX -s alignment.bin.binary -n output.phb -m GAMMA -t start.tree 165 | raxmlHPC -# 100 -b 12345 -f j -m GTRCAT -s align.phy -n REPS # the generated alignments were applied for bootstrap 166 | ``` 167 | ### orthoMCL 168 | #### Version: 2.0.9 169 | #### Command Line 170 | ``` 171 | orthomclInstallSchema orthomcl.config.template 172 | orthomclAdjustFasta compliantFasta/cat cattle.pep 1 173 | orthomclAdjustFasta compliantFasta/dog dog.pep 1 174 | orthomclAdjustFasta compliantFasta/hor horse.pep 1 175 | orthomclAdjustFasta compliantFasta/hum human.pep 1 176 | orthomclAdjustFasta compliantFasta/she sheep.pep 1 177 | orthomclAdjustFasta compliantFasta/wis wisent.pep 1 178 | orthomclAdjustFasta compliantFasta/yak yak.pep 1 179 | orthomclFilterFasta compliantFasta/ 10 20 180 | makeblastdb -in goodProteins.fasta -dbtype prot 181 | blastp -db goodProteins.fasta -query goodProteins.fasta -out all-all.blastp.out -evalue 1e-5 -outfmt 6 -num_threads 24 182 | orthomclBlastParser all-all.blastp.out compliantFasta > similarSequences.txt 183 | perl -p -i -e 's/0\t0/1\t-181/' similarSequences.txt 184 | orthomclLoadBlast orthomcl.config.template similarSequences.txt 185 | orthomclPairs orthomcl.config.template orthomcl_pairs.log cleanup=no 186 | orthomclDumpPairsFiles orthomcl.config.template 187 | mcl mclInput --abc -I 1.5 -o mclOutput 188 | orthomclMclToGroups cluster 1 < mclOutput > groups.txt 189 | ``` 190 | ### PAML 191 | #### Version: 4.8 192 | #### Command Line 193 | ``` 194 | codeml codeml.ctl 195 | ``` 196 | We used the Codeml program from the PAML package with a branch-site model (runmode = -2, model = 2, NSsites = 2) to detect positively selected genes in focal lineages. A likelihood ratio test was constructed to compare a model that allows sites to be under positive selection on the foreground branch with the null model in which sites may evolve neutrally and under purifying selection. The p-values were computed based on the Chi-square statistic adjusted by the FDR method and genes with adjusted p-value < 0.05 were treated as candidates for positive selection. 197 | -------------------------------------------------------------------------------- /03.gap_close/02.runbowtie.pl.sh: -------------------------------------------------------------------------------- 1 | bowtie2 -p 32 --very-sensitive --no-sq -x wisent.fa -1 clean_reads/lib.10000bp_1.1.fq.gz -2 clean_reads/lib.10000bp_1.2.fq.gz | perl get_reads.pl 10000 lib.10000bp_1 2 | bowtie2 -p 32 --very-sensitive --no-sq -x wisent.fa -1 clean_reads/lib.10000bp_2.1.fq.gz -2 clean_reads/lib.10000bp_2.2.fq.gz | perl get_reads.pl 10000 lib.10000bp_2 3 | bowtie2 -p 32 --very-sensitive --no-sq -x wisent.fa -1 clean_reads/lib.10000bp_3.1.fq.gz -2 clean_reads/lib.10000bp_3.2.fq.gz | perl get_reads.pl 10000 lib.10000bp_3 4 | bowtie2 -p 32 --very-sensitive --no-sq -x wisent.fa -1 clean_reads/lib.10000bp_4.1.fq.gz -2 clean_reads/lib.10000bp_4.2.fq.gz | perl get_reads.pl 10000 lib.10000bp_4 5 | bowtie2 -p 32 --very-sensitive --no-sq -x wisent.fa -1 clean_reads/lib.10000bp_5.1.fq.gz -2 clean_reads/lib.10000bp_5.2.fq.gz | perl get_reads.pl 10000 lib.10000bp_5 6 | bowtie2 -p 32 --very-sensitive --no-sq -x wisent.fa -1 clean_reads/lib.10000bp_6.1.fq.gz -2 clean_reads/lib.10000bp_6.2.fq.gz | perl get_reads.pl 10000 lib.10000bp_6 7 | bowtie2 -p 32 --very-sensitive --no-sq -x wisent.fa -1 clean_reads/lib.10000bp_7.1.fq.gz -2 clean_reads/lib.10000bp_7.2.fq.gz | perl get_reads.pl 10000 lib.10000bp_7 8 | bowtie2 -p 32 --very-sensitive --no-sq -x wisent.fa -1 clean_reads/lib.10000bp_8.1.fq.gz -2 clean_reads/lib.10000bp_8.2.fq.gz | perl get_reads.pl 10000 lib.10000bp_8 9 | bowtie2 -p 32 --very-sensitive --no-sq -x wisent.fa -1 clean_reads/lib.170bp_49.1.fq.gz -2 clean_reads/lib.170bp_49.2.fq.gz | perl get_reads.pl 170 lib.170bp_49 10 | bowtie2 -p 32 --very-sensitive --no-sq -x wisent.fa -1 clean_reads/lib.20000bp_10.1.fq.gz -2 clean_reads/lib.20000bp_10.2.fq.gz | perl get_reads.pl 20000 lib.20000bp_10 11 | bowtie2 -p 32 --very-sensitive --no-sq -x wisent.fa -1 clean_reads/lib.20000bp_11.1.fq.gz -2 clean_reads/lib.20000bp_11.2.fq.gz | perl get_reads.pl 20000 lib.20000bp_11 12 | bowtie2 -p 32 --very-sensitive --no-sq -x wisent.fa -1 clean_reads/lib.20000bp_12.1.fq.gz -2 clean_reads/lib.20000bp_12.2.fq.gz | perl get_reads.pl 20000 lib.20000bp_12 13 | bowtie2 -p 32 --very-sensitive --no-sq -x wisent.fa -1 clean_reads/lib.20000bp_13.1.fq.gz -2 clean_reads/lib.20000bp_13.2.fq.gz | perl get_reads.pl 20000 lib.20000bp_13 14 | bowtie2 -p 32 --very-sensitive --no-sq -x wisent.fa -1 clean_reads/lib.20000bp_14.1.fq.gz -2 clean_reads/lib.20000bp_14.2.fq.gz | perl get_reads.pl 20000 lib.20000bp_14 15 | bowtie2 -p 32 --very-sensitive --no-sq -x wisent.fa -1 clean_reads/lib.20000bp_15.1.fq.gz -2 clean_reads/lib.20000bp_15.2.fq.gz | perl get_reads.pl 20000 lib.20000bp_15 16 | bowtie2 -p 32 --very-sensitive --no-sq -x wisent.fa -1 clean_reads/lib.20000bp_16.1.fq.gz -2 clean_reads/lib.20000bp_16.2.fq.gz | perl get_reads.pl 20000 lib.20000bp_16 17 | bowtie2 -p 32 --very-sensitive --no-sq -x wisent.fa -1 clean_reads/lib.20000bp_17.1.fq.gz -2 clean_reads/lib.20000bp_17.2.fq.gz | perl get_reads.pl 20000 lib.20000bp_17 18 | bowtie2 -p 32 --very-sensitive --no-sq -x wisent.fa -1 clean_reads/lib.20000bp_9.1.fq.gz -2 clean_reads/lib.20000bp_9.2.fq.gz | perl get_reads.pl 20000 lib.20000bp_9 19 | bowtie2 -p 32 --very-sensitive --no-sq -x wisent.fa -1 clean_reads/lib.2000bp_18.1.fq.gz -2 clean_reads/lib.2000bp_18.2.fq.gz | perl get_reads.pl 2000 lib.2000bp_18 20 | bowtie2 -p 32 --very-sensitive --no-sq -x wisent.fa -1 clean_reads/lib.2000bp_19.1.fq.gz -2 clean_reads/lib.2000bp_19.2.fq.gz | perl get_reads.pl 2000 lib.2000bp_19 21 | bowtie2 -p 32 --very-sensitive --no-sq -x wisent.fa -1 clean_reads/lib.2000bp_20.1.fq.gz -2 clean_reads/lib.2000bp_20.2.fq.gz | perl get_reads.pl 2000 lib.2000bp_20 22 | bowtie2 -p 32 --very-sensitive --no-sq -x wisent.fa -1 clean_reads/lib.2000bp_21.1.fq.gz -2 clean_reads/lib.2000bp_21.2.fq.gz | perl get_reads.pl 2000 lib.2000bp_21 23 | bowtie2 -p 32 --very-sensitive --no-sq -x wisent.fa -1 clean_reads/lib.2000bp_22.1.fq.gz -2 clean_reads/lib.2000bp_22.2.fq.gz | perl get_reads.pl 2000 lib.2000bp_22 24 | bowtie2 -p 32 --very-sensitive --no-sq -x wisent.fa -1 clean_reads/lib.2000bp_23.1.fq.gz -2 clean_reads/lib.2000bp_23.2.fq.gz | perl get_reads.pl 2000 lib.2000bp_23 25 | bowtie2 -p 32 --very-sensitive --no-sq -x wisent.fa -1 clean_reads/lib.2000bp_24.1.fq.gz -2 clean_reads/lib.2000bp_24.2.fq.gz | perl get_reads.pl 2000 lib.2000bp_24 26 | bowtie2 -p 32 --very-sensitive --no-sq -x wisent.fa -1 clean_reads/lib.2000bp_25.1.fq.gz -2 clean_reads/lib.2000bp_25.2.fq.gz | perl get_reads.pl 2000 lib.2000bp_25 27 | bowtie2 -p 32 --very-sensitive --no-sq -x wisent.fa -1 clean_reads/lib.2000bp_26.1.fq.gz -2 clean_reads/lib.2000bp_26.2.fq.gz | perl get_reads.pl 2000 lib.2000bp_26 28 | bowtie2 -p 32 --very-sensitive --no-sq -x wisent.fa -1 clean_reads/lib.2000bp_27.1.fq.gz -2 clean_reads/lib.2000bp_27.2.fq.gz | perl get_reads.pl 2000 lib.2000bp_27 29 | bowtie2 -p 32 --very-sensitive --no-sq -x wisent.fa -1 clean_reads/lib.2000bp_28.1.fq.gz -2 clean_reads/lib.2000bp_28.2.fq.gz | perl get_reads.pl 2000 lib.2000bp_28 30 | bowtie2 -p 32 --very-sensitive --no-sq -x wisent.fa -1 clean_reads/lib.200bp_29.1.fq.gz -2 clean_reads/lib.200bp_29.2.fq.gz | perl get_reads.pl 200 lib.200bp_29 31 | bowtie2 -p 32 --very-sensitive --no-sq -x wisent.fa -1 clean_reads/lib.200bp_30.1.fq.gz -2 clean_reads/lib.200bp_30.2.fq.gz | perl get_reads.pl 200 lib.200bp_30 32 | bowtie2 -p 32 --very-sensitive --no-sq -x wisent.fa -1 clean_reads/lib.5000bp_31.1.fq.gz -2 clean_reads/lib.5000bp_31.2.fq.gz | perl get_reads.pl 5000 lib.5000bp_31 33 | bowtie2 -p 32 --very-sensitive --no-sq -x wisent.fa -1 clean_reads/lib.5000bp_32.1.fq.gz -2 clean_reads/lib.5000bp_32.2.fq.gz | perl get_reads.pl 5000 lib.5000bp_32 34 | bowtie2 -p 32 --very-sensitive --no-sq -x wisent.fa -1 clean_reads/lib.5000bp_33.1.fq.gz -2 clean_reads/lib.5000bp_33.2.fq.gz | perl get_reads.pl 5000 lib.5000bp_33 35 | bowtie2 -p 32 --very-sensitive --no-sq -x wisent.fa -1 clean_reads/lib.5000bp_34.1.fq.gz -2 clean_reads/lib.5000bp_34.2.fq.gz | perl get_reads.pl 5000 lib.5000bp_34 36 | bowtie2 -p 32 --very-sensitive --no-sq -x wisent.fa -1 clean_reads/lib.5000bp_35.1.fq.gz -2 clean_reads/lib.5000bp_35.2.fq.gz | perl get_reads.pl 5000 lib.5000bp_35 37 | bowtie2 -p 32 --very-sensitive --no-sq -x wisent.fa -1 clean_reads/lib.5000bp_36.1.fq.gz -2 clean_reads/lib.5000bp_36.2.fq.gz | perl get_reads.pl 5000 lib.5000bp_36 38 | bowtie2 -p 32 --very-sensitive --no-sq -x wisent.fa -1 clean_reads/lib.5000bp_37.1.fq.gz -2 clean_reads/lib.5000bp_37.2.fq.gz | perl get_reads.pl 5000 lib.5000bp_37 39 | bowtie2 -p 32 --very-sensitive --no-sq -x wisent.fa -1 clean_reads/lib.5000bp_38.1.fq.gz -2 clean_reads/lib.5000bp_38.2.fq.gz | perl get_reads.pl 5000 lib.5000bp_38 40 | bowtie2 -p 32 --very-sensitive --no-sq -x wisent.fa -1 clean_reads/lib.5000bp_39.1.fq.gz -2 clean_reads/lib.5000bp_39.2.fq.gz | perl get_reads.pl 5000 lib.5000bp_39 41 | bowtie2 -p 32 --very-sensitive --no-sq -x wisent.fa -1 clean_reads/lib.5000bp_40.1.fq.gz -2 clean_reads/lib.5000bp_40.2.fq.gz | perl get_reads.pl 5000 lib.5000bp_40 42 | bowtie2 -p 32 --very-sensitive --no-sq -x wisent.fa -1 clean_reads/lib.500bp_41.1.fq.gz -2 clean_reads/lib.500bp_41.2.fq.gz | perl get_reads.pl 500 lib.500bp_41 43 | bowtie2 -p 32 --very-sensitive --no-sq -x wisent.fa -1 clean_reads/lib.500bp_42.1.fq.gz -2 clean_reads/lib.500bp_42.2.fq.gz | perl get_reads.pl 500 lib.500bp_42 44 | bowtie2 -p 32 --very-sensitive --no-sq -x wisent.fa -1 clean_reads/lib.500bp_43.1.fq.gz -2 clean_reads/lib.500bp_43.2.fq.gz | perl get_reads.pl 500 lib.500bp_43 45 | bowtie2 -p 32 --very-sensitive --no-sq -x wisent.fa -1 clean_reads/lib.800bp_44.1.fq.gz -2 clean_reads/lib.800bp_44.2.fq.gz | perl get_reads.pl 800 lib.800bp_44 46 | bowtie2 -p 32 --very-sensitive --no-sq -x wisent.fa -1 clean_reads/lib.800bp_45.1.fq.gz -2 clean_reads/lib.800bp_45.2.fq.gz | perl get_reads.pl 800 lib.800bp_45 47 | bowtie2 -p 32 --very-sensitive --no-sq -x wisent.fa -1 clean_reads/lib.800bp_46.1.fq.gz -2 clean_reads/lib.800bp_46.2.fq.gz | perl get_reads.pl 800 lib.800bp_46 48 | bowtie2 -p 32 --very-sensitive --no-sq -x wisent.fa -1 clean_reads/lib.800bp_47.1.fq.gz -2 clean_reads/lib.800bp_47.2.fq.gz | perl get_reads.pl 800 lib.800bp_47 49 | bowtie2 -p 32 --very-sensitive --no-sq -x wisent.fa -1 clean_reads/lib.800bp_48.1.fq.gz -2 clean_reads/lib.800bp_48.2.fq.gz | perl get_reads.pl 800 lib.800bp_48 50 | -------------------------------------------------------------------------------- /02.scaffold/standard_output.summaryfile.txt: -------------------------------------------------------------------------------- 1 | READING READS lib1: 2 | ------------------------------------------------------------ 3 | Total inserted pairs = 115092587 4 | ------------------------------------------------------------ 5 | 6 | READING READS lib2: 7 | ------------------------------------------------------------ 8 | Total inserted pairs = 159944696 9 | ------------------------------------------------------------ 10 | 11 | READING READS lib3: 12 | ------------------------------------------------------------ 13 | Total inserted pairs = 162338990 14 | ------------------------------------------------------------ 15 | 16 | READING READS lib4: 17 | ------------------------------------------------------------ 18 | Total inserted pairs = 138865184 19 | ------------------------------------------------------------ 20 | 21 | READING READS lib5: 22 | ------------------------------------------------------------ 23 | Total inserted pairs = 167407360 24 | ------------------------------------------------------------ 25 | 26 | 27 | 28 | LIBRARY lib1 STATS: 29 | ################################################################################ 30 | READ PAIRS STATS: 31 | Assembled pairs: 0 (0 sequences) 32 | Satisfied in distance/logic within contigs (i.e. -> <-, distance on target: 170 +/-42.5): 0 33 | Unsatisfied in distance within contigs (i.e. distance out-of-bounds): 0 34 | Unsatisfied pairing logic within contigs (i.e. illogical pairing ->->, <-<- or <-->): 0 35 | --- 36 | Satisfied in distance/logic within a given contig pair (pre-scaffold): 10239 37 | Unsatisfied in distance within a given contig pair (i.e. calculated distances out-of-bounds): 508268 38 | --- 39 | Total satisfied: 10239 unsatisfied: 508268 40 | 41 | 42 | Estimated insert size statistics (based on 0 pairs): 43 | Mean insert size = 0 44 | Median insert size = 0 45 | 46 | REPEATS: 47 | Number of repeated edges = 9 48 | ------------------------------------------------------------ 49 | 50 | ################################################################################ 51 | 52 | 53 | LIBRARY lib2 STATS: 54 | ################################################################################ 55 | READ PAIRS STATS: 56 | Assembled pairs: 0 (0 sequences) 57 | Satisfied in distance/logic within contigs (i.e. -> <-, distance on target: 2000 +/-1000): 989 58 | Unsatisfied in distance within contigs (i.e. distance out-of-bounds): 527 59 | Unsatisfied pairing logic within contigs (i.e. illogical pairing ->->, <-<- or <-->): 1832 60 | --- 61 | Satisfied in distance/logic within a given contig pair (pre-scaffold): 6140529 62 | Unsatisfied in distance within a given contig pair (i.e. calculated distances out-of-bounds): 3510889 63 | --- 64 | Total satisfied: 6141518 unsatisfied: 3513248 65 | 66 | 67 | Estimated insert size statistics (based on 989 pairs): 68 | Mean insert size = 1543 69 | Median insert size = 1404 70 | 71 | REPEATS: 72 | Number of repeated edges = 6891 73 | ------------------------------------------------------------ 74 | 75 | ################################################################################ 76 | 77 | 78 | LIBRARY lib3 STATS: 79 | ################################################################################ 80 | READ PAIRS STATS: 81 | Assembled pairs: 0 (0 sequences) 82 | Satisfied in distance/logic within contigs (i.e. -> <-, distance on target: 5000 +/-2500): 465714 83 | Unsatisfied in distance within contigs (i.e. distance out-of-bounds): 7531 84 | Unsatisfied pairing logic within contigs (i.e. illogical pairing ->->, <-<- or <-->): 14310 85 | --- 86 | Satisfied in distance/logic within a given contig pair (pre-scaffold): 2082582 87 | Unsatisfied in distance within a given contig pair (i.e. calculated distances out-of-bounds): 1502514 88 | --- 89 | Total satisfied: 2548296 unsatisfied: 1524355 90 | 91 | 92 | Estimated insert size statistics (based on 465714 pairs): 93 | Mean insert size = 5021 94 | Median insert size = 5059 95 | 96 | REPEATS: 97 | Number of repeated edges = 5665 98 | ------------------------------------------------------------ 99 | 100 | ################################################################################ 101 | 102 | 103 | LIBRARY lib4 STATS: 104 | ################################################################################ 105 | READ PAIRS STATS: 106 | Assembled pairs: 0 (0 sequences) 107 | Satisfied in distance/logic within contigs (i.e. -> <-, distance on target: 10000 +/-5000): 133188 108 | Unsatisfied in distance within contigs (i.e. distance out-of-bounds): 49298 109 | Unsatisfied pairing logic within contigs (i.e. illogical pairing ->->, <-<- or <-->): 11037 110 | --- 111 | Satisfied in distance/logic within a given contig pair (pre-scaffold): 491767 112 | Unsatisfied in distance within a given contig pair (i.e. calculated distances out-of-bounds): 258544 113 | --- 114 | Total satisfied: 624955 unsatisfied: 318879 115 | 116 | 117 | Estimated insert size statistics (based on 133188 pairs): 118 | Mean insert size = 10182 119 | Median insert size = 10499 120 | 121 | REPEATS: 122 | Number of repeated edges = 2303 123 | ------------------------------------------------------------ 124 | 125 | ################################################################################ 126 | 127 | 128 | LIBRARY lib5 STATS: 129 | ################################################################################ 130 | READ PAIRS STATS: 131 | Assembled pairs: 0 (0 sequences) 132 | Satisfied in distance/logic within contigs (i.e. -> <-, distance on target: 20000 +/-10000): 50341 133 | Unsatisfied in distance within contigs (i.e. distance out-of-bounds): 5784 134 | Unsatisfied pairing logic within contigs (i.e. illogical pairing ->->, <-<- or <-->): 7041 135 | --- 136 | Satisfied in distance/logic within a given contig pair (pre-scaffold): 146641 137 | Unsatisfied in distance within a given contig pair (i.e. calculated distances out-of-bounds): 125994 138 | --- 139 | Total satisfied: 196982 unsatisfied: 138819 140 | 141 | 142 | Estimated insert size statistics (based on 50341 pairs): 143 | Mean insert size = 17872 144 | Median insert size = 17911 145 | 146 | REPEATS: 147 | Number of repeated edges = 242 148 | ------------------------------------------------------------ 149 | 150 | ################################################################################ 151 | 152 | SUMMARY: 153 | ------------------------------------------------------------ 154 | Inserted contig file; 155 | Total number of contigs = 276036 156 | Sum (bp) = 2440848532 157 | Total number of N's = 44395337 158 | Sum (bp) no N's = 2396453195 159 | GC Content = 41.68% 160 | Max contig size = 214095 161 | Min contig size = 500 162 | Average contig size = 8842 163 | N25 = 29379 164 | N50 = 16472 165 | N75 = 8459 166 | 167 | After scaffolding lib1: 168 | Total number of scaffolds = 275979 169 | Sum (bp) = 2440848034 170 | Total number of N's = 44395383 171 | Sum (bp) no N's = 2396452651 172 | GC Content = 41.68% 173 | Max scaffold size = 214095 174 | Min scaffold size = 500 175 | Average scaffold size = 8844 176 | N25 = 29381 177 | N50 = 16473 178 | N75 = 8460 179 | 180 | After scaffolding lib2: 181 | Total number of scaffolds = 84316 182 | Sum (bp) = 2482214565 183 | Total number of N's = 86739900 184 | Sum (bp) no N's = 2395474665 185 | GC Content = 41.66% 186 | Max scaffold size = 1529186 187 | Min scaffold size = 500 188 | Average scaffold size = 29439 189 | N25 = 177229 190 | N50 = 93417 191 | N75 = 43997 192 | 193 | After scaffolding lib3: 194 | Total number of scaffolds = 40565 195 | Sum (bp) = 2539946460 196 | Total number of N's = 145644818 197 | Sum (bp) no N's = 2394301642 198 | GC Content = 41.64% 199 | Max scaffold size = 5921021 200 | Min scaffold size = 500 201 | Average scaffold size = 62614 202 | N25 = 956264 203 | N50 = 503076 204 | N75 = 229027 205 | 206 | After scaffolding lib4: 207 | Total number of scaffolds = 31880 208 | Sum (bp) = 2562129876 209 | Total number of N's = 168189675 210 | Sum (bp) no N's = 2393940201 211 | GC Content = 41.64% 212 | Max scaffold size = 9736302 213 | Min scaffold size = 500 214 | Average scaffold size = 80367 215 | N25 = 2498617 216 | N50 = 1400606 217 | N75 = 661390 218 | 219 | After scaffolding lib5: 220 | Total number of scaffolds = 29074 221 | Sum (bp) = 2576549422 222 | Total number of N's = 182643872 223 | Sum (bp) no N's = 2393905550 224 | GC Content = 41.63% 225 | Max scaffold size = 31653177 226 | Min scaffold size = 500 227 | Average scaffold size = 88620 228 | N25 = 8057465 229 | N50 = 4694011 230 | N75 = 2122805 231 | 232 | ------------------------------------------------------------ 233 | -------------------------------------------------------------------------------- /00.reads_filter/kmerfreq.err: -------------------------------------------------------------------------------- 1 | File: ../SeqV2/200bp/140104_I686_FCC3R25ACXX_L3_WHAXPI000090-15_1.fq.gz 2 | read file: ../SeqV2/200bp/140104_I686_FCC3R25ACXX_L3_WHAXPI000090-15_1.fq.gz 3 | processed reads 10000000 4 | processed reads 20000000 5 | processed reads 30000000 6 | processed reads 40000000 7 | processed reads 50000000 8 | processed reads 60000000 9 | processed reads 70000000 10 | processed reads 80000000 11 | processed reads 90000000 12 | processed reads 100000000 13 | processed reads 110000000 14 | processed reads 120000000 15 | processed reads 130000000 16 | processed reads 140000000 17 | processed reads 150000000 18 | processed reads 160000000 19 | processed reads 170000000 20 | processed reads 180000000 21 | File: ../SeqV2/200bp/140104_I686_FCC3R25ACXX_L3_WHAXPI000090-15_2.fq.gz 22 | read file: ../SeqV2/200bp/140104_I686_FCC3R25ACXX_L3_WHAXPI000090-15_2.fq.gz 23 | processed reads 190000000 24 | processed reads 200000000 25 | processed reads 210000000 26 | processed reads 220000000 27 | processed reads 230000000 28 | processed reads 240000000 29 | processed reads 250000000 30 | processed reads 260000000 31 | processed reads 270000000 32 | processed reads 280000000 33 | processed reads 290000000 34 | processed reads 300000000 35 | processed reads 310000000 36 | processed reads 320000000 37 | processed reads 330000000 38 | processed reads 340000000 39 | processed reads 350000000 40 | processed reads 360000000 41 | processed reads 370000000 42 | File: ../SeqV2/200bp/140104_I686_FCC3R25ACXX_L4_WHAXPI000090-15_1.fq.gz 43 | read file: ../SeqV2/200bp/140104_I686_FCC3R25ACXX_L4_WHAXPI000090-15_1.fq.gz 44 | processed reads 380000000 45 | processed reads 390000000 46 | processed reads 400000000 47 | processed reads 410000000 48 | processed reads 420000000 49 | processed reads 430000000 50 | processed reads 440000000 51 | processed reads 450000000 52 | processed reads 460000000 53 | processed reads 470000000 54 | processed reads 480000000 55 | processed reads 490000000 56 | processed reads 500000000 57 | processed reads 510000000 58 | processed reads 520000000 59 | processed reads 530000000 60 | processed reads 540000000 61 | processed reads 550000000 62 | processed reads 560000000 63 | File: ../SeqV2/200bp/140104_I686_FCC3R25ACXX_L4_WHAXPI000090-15_2.fq.gz 64 | read file: ../SeqV2/200bp/140104_I686_FCC3R25ACXX_L4_WHAXPI000090-15_2.fq.gz 65 | processed reads 570000000 66 | processed reads 580000000 67 | processed reads 590000000 68 | processed reads 600000000 69 | processed reads 610000000 70 | processed reads 620000000 71 | processed reads 630000000 72 | processed reads 640000000 73 | processed reads 650000000 74 | processed reads 660000000 75 | processed reads 670000000 76 | processed reads 680000000 77 | processed reads 690000000 78 | processed reads 700000000 79 | processed reads 710000000 80 | processed reads 720000000 81 | processed reads 730000000 82 | processed reads 740000000 83 | processed reads 750000000 84 | File: ../SeqV2/500bp/140104_I686_FCC3R25ACXX_L1_WHAIPI000089-13_1.fq.gz 85 | read file: ../SeqV2/500bp/140104_I686_FCC3R25ACXX_L1_WHAIPI000089-13_1.fq.gz 86 | processed reads 760000000 87 | processed reads 770000000 88 | processed reads 780000000 89 | processed reads 790000000 90 | processed reads 800000000 91 | processed reads 810000000 92 | processed reads 820000000 93 | processed reads 830000000 94 | processed reads 840000000 95 | processed reads 850000000 96 | processed reads 860000000 97 | processed reads 870000000 98 | processed reads 880000000 99 | File: ../SeqV2/500bp/140104_I686_FCC3R25ACXX_L1_WHAIPI000089-13_2.fq.gz 100 | read file: ../SeqV2/500bp/140104_I686_FCC3R25ACXX_L1_WHAIPI000089-13_2.fq.gz 101 | processed reads 890000000 102 | processed reads 900000000 103 | processed reads 910000000 104 | processed reads 920000000 105 | processed reads 930000000 106 | processed reads 940000000 107 | processed reads 950000000 108 | processed reads 960000000 109 | processed reads 970000000 110 | processed reads 980000000 111 | processed reads 990000000 112 | processed reads 1000000000 113 | processed reads 1010000000 114 | File: ../SeqV2/500bp/140104_I686_FCC3R25ACXX_L2_WHAIPI000089-13_1.fq.gz 115 | read file: ../SeqV2/500bp/140104_I686_FCC3R25ACXX_L2_WHAIPI000089-13_1.fq.gz 116 | processed reads 1020000000 117 | processed reads 1030000000 118 | processed reads 1040000000 119 | processed reads 1050000000 120 | processed reads 1060000000 121 | processed reads 1070000000 122 | processed reads 1080000000 123 | processed reads 1090000000 124 | processed reads 1100000000 125 | processed reads 1110000000 126 | processed reads 1120000000 127 | processed reads 1130000000 128 | processed reads 1140000000 129 | processed reads 1150000000 130 | File: ../SeqV2/500bp/140104_I686_FCC3R25ACXX_L2_WHAIPI000089-13_2.fq.gz 131 | read file: ../SeqV2/500bp/140104_I686_FCC3R25ACXX_L2_WHAIPI000089-13_2.fq.gz 132 | processed reads 1160000000 133 | processed reads 1170000000 134 | processed reads 1180000000 135 | processed reads 1190000000 136 | processed reads 1200000000 137 | processed reads 1210000000 138 | processed reads 1220000000 139 | processed reads 1230000000 140 | processed reads 1240000000 141 | processed reads 1250000000 142 | processed reads 1260000000 143 | processed reads 1270000000 144 | processed reads 1280000000 145 | File: ../SeqV2/500bp/140220_I191_FCC3R2PACXX_L2_WHAIPI000089-13_1.fq.gz 146 | read file: ../SeqV2/500bp/140220_I191_FCC3R2PACXX_L2_WHAIPI000089-13_1.fq.gz 147 | processed reads 1290000000 148 | processed reads 1300000000 149 | processed reads 1310000000 150 | File: ../SeqV2/500bp/140220_I191_FCC3R2PACXX_L2_WHAIPI000089-13_2.fq.gz 151 | read file: ../SeqV2/500bp/140220_I191_FCC3R2PACXX_L2_WHAIPI000089-13_2.fq.gz 152 | processed reads 1320000000 153 | processed reads 1330000000 154 | processed reads 1340000000 155 | File: ../SeqV2/800bp/140104_I686_FCC3HWYACXX_L6_WHAMPI000088-14_1.fq.gz 156 | read file: ../SeqV2/800bp/140104_I686_FCC3HWYACXX_L6_WHAMPI000088-14_1.fq.gz 157 | processed reads 1350000000 158 | processed reads 1360000000 159 | processed reads 1370000000 160 | processed reads 1380000000 161 | processed reads 1390000000 162 | processed reads 1400000000 163 | processed reads 1410000000 164 | processed reads 1420000000 165 | processed reads 1430000000 166 | processed reads 1440000000 167 | processed reads 1450000000 168 | File: ../SeqV2/800bp/140104_I686_FCC3HWYACXX_L6_WHAMPI000088-14_2.fq.gz 169 | read file: ../SeqV2/800bp/140104_I686_FCC3HWYACXX_L6_WHAMPI000088-14_2.fq.gz 170 | processed reads 1460000000 171 | processed reads 1470000000 172 | processed reads 1480000000 173 | processed reads 1490000000 174 | processed reads 1500000000 175 | processed reads 1510000000 176 | processed reads 1520000000 177 | processed reads 1530000000 178 | processed reads 1540000000 179 | processed reads 1550000000 180 | processed reads 1560000000 181 | processed reads 1570000000 182 | File: ../SeqV2/800bp/140104_I686_FCC3HWYACXX_L7_WHAMPI000088-14_1.fq.gz 183 | read file: ../SeqV2/800bp/140104_I686_FCC3HWYACXX_L7_WHAMPI000088-14_1.fq.gz 184 | processed reads 1580000000 185 | processed reads 1590000000 186 | processed reads 1600000000 187 | processed reads 1610000000 188 | processed reads 1620000000 189 | processed reads 1630000000 190 | processed reads 1640000000 191 | processed reads 1650000000 192 | processed reads 1660000000 193 | processed reads 1670000000 194 | processed reads 1680000000 195 | File: ../SeqV2/800bp/140104_I686_FCC3HWYACXX_L7_WHAMPI000088-14_2.fq.gz 196 | read file: ../SeqV2/800bp/140104_I686_FCC3HWYACXX_L7_WHAMPI000088-14_2.fq.gz 197 | processed reads 1690000000 198 | processed reads 1700000000 199 | processed reads 1710000000 200 | processed reads 1720000000 201 | processed reads 1730000000 202 | processed reads 1740000000 203 | processed reads 1750000000 204 | processed reads 1760000000 205 | processed reads 1770000000 206 | processed reads 1780000000 207 | processed reads 1790000000 208 | File: ../SeqV2/800bp/140104_I686_FCC3R25ACXX_L7_WHAMPI000088-14_1.fq.gz 209 | read file: ../SeqV2/800bp/140104_I686_FCC3R25ACXX_L7_WHAMPI000088-14_1.fq.gz 210 | processed reads 1800000000 211 | processed reads 1810000000 212 | File: ../SeqV2/800bp/140104_I686_FCC3R25ACXX_L7_WHAMPI000088-14_2.fq.gz 213 | read file: ../SeqV2/800bp/140104_I686_FCC3R25ACXX_L7_WHAMPI000088-14_2.fq.gz 214 | processed reads 1820000000 215 | processed reads 1830000000 216 | processed reads 1840000000 217 | File: ../SeqV2/800bp/140104_I686_FCC3R25ACXX_L8_WHAMPI000088-14_1.fq.gz 218 | read file: ../SeqV2/800bp/140104_I686_FCC3R25ACXX_L8_WHAMPI000088-14_1.fq.gz 219 | processed reads 1850000000 220 | processed reads 1860000000 221 | File: ../SeqV2/800bp/140104_I686_FCC3R25ACXX_L8_WHAMPI000088-14_2.fq.gz 222 | read file: ../SeqV2/800bp/140104_I686_FCC3R25ACXX_L8_WHAMPI000088-14_2.fq.gz 223 | processed reads 1870000000 224 | processed reads 1880000000 225 | processed reads 1890000000 226 | File: ../SeqV2/800bp/140220_I191_FCC3R2PACXX_L2_WHAMPI000088-14_1.fq.gz 227 | read file: ../SeqV2/800bp/140220_I191_FCC3R2PACXX_L2_WHAMPI000088-14_1.fq.gz 228 | processed reads 1900000000 229 | File: ../SeqV2/800bp/140220_I191_FCC3R2PACXX_L2_WHAMPI000088-14_2.fq.gz 230 | read file: ../SeqV2/800bp/140220_I191_FCC3R2PACXX_L2_WHAMPI000088-14_2.fq.gz 231 | processed reads 1910000000 232 | Load All files done! 233 | Please check the peek position carefully 234 | -------------------------------------------------------------------------------- /05.synteny_analysis/03.analysis.pl: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env perl 2 | use strict; 3 | use warnings; 4 | 5 | my $maf="contig2nc.maf.swap.maf"; 6 | my $length_of_scaffold_plotted = 0; 7 | 8 | my %link; 9 | my %cattle_len; 10 | my %wisent_len; 11 | 12 | my $total_len=0; 13 | my $link_num=0; 14 | open I,"< $maf"; 15 | my $control=0; 16 | while(){ 17 | chomp; 18 | next if(/^#/); 19 | my @alignment; 20 | if(/^a/){ 21 | push @alignment,"$_"; 22 | while(){ 23 | chomp; 24 | if(/^\s*$/){ 25 | last; 26 | } 27 | else{ 28 | push @alignment,"$_"; 29 | } 30 | } 31 | } 32 | my ($wisent_chr,$wisent_start,$wisent_end,$wisent_strand,$wisent_chr_len,$cattle_chr,$cattle_start,$cattle_end,$cattle_strand,$cattle_chr_len)=&read_maf(@alignment); 33 | next if($wisent_chr_len < $length_of_scaffold_plotted); 34 | $total_len += $wisent_end-$wisent_start+1; 35 | $link_num++; 36 | 37 | $cattle_chr=~s/chr//; 38 | if($cattle_chr eq "X"){ 39 | $cattle_chr = 30; 40 | } 41 | elsif($cattle_chr eq "Y"){ 42 | $cattle_chr=31; 43 | } 44 | $cattle_len{$cattle_chr} = $cattle_chr_len; 45 | $wisent_len{$wisent_chr} = $wisent_chr_len; 46 | # $link{$cattle_chr}{$cattle_start} = {cattle_end => $cattle_end, cattle_strand => $cattle_strand, wisent_chr => $wisent_chr, wisent_start => $wisent_start, wisent_end => $wisent_end, wisent_strand => $wisent_strand}; 47 | $link{$wisent_chr}{$wisent_start} = {cattle_chr => $cattle_chr, cattle_start => $cattle_start, cattle_end => $cattle_end, cattle_strand => $cattle_strand, wisent_end => $wisent_end, wisent_strand => $wisent_strand}; 48 | # last if($control++>1000); 49 | } 50 | close I; 51 | 52 | my @simple_report; # breakpoint总的简单统计 53 | open S,"> $0.breakpoint_info"; 54 | print S "chr\tstart\tend\ttype\n"; 55 | open L,"> $0.record"; 56 | open O,"> $0.sta"; 57 | print O "chr\tlen\tinter_chromosome\tinversion\ttranslocation\tinsertion\tdeletion\n"; 58 | foreach my $wisent_chr(sort keys %link){ 59 | my $link_num = keys %{$link{$wisent_chr}}; 60 | # next if($link_num<5); 61 | print L "\n\n**************************START**************************\n"; 62 | my $wisent_chr_len = $wisent_len{$wisent_chr}; 63 | print L "$wisent_chr\t$wisent_chr_len\n"; 64 | 65 | my %sv_type; 66 | 67 | foreach my $wisent_start(sort {$a<=>$b} keys %{$link{$wisent_chr}}){ 68 | my $cattle_chr = $link{$wisent_chr}{$wisent_start}{cattle_chr}; 69 | my $cattle_start = $link{$wisent_chr}{$wisent_start}{cattle_start}; 70 | my $cattle_end = $link{$wisent_chr}{$wisent_start}{cattle_end}; 71 | my $cattle_strand = $link{$wisent_chr}{$wisent_start}{cattle_strand}; 72 | 73 | my $wisent_end = $link{$wisent_chr}{$wisent_start}{wisent_end}; 74 | my $wisent_strand = $link{$wisent_chr}{$wisent_start}{wisent_strand}; 75 | 76 | print L "$wisent_start\t$wisent_end\t$wisent_strand\t<->\t$cattle_chr\t$cattle_start\t$cattle_end\t$cattle_strand\n"; 77 | } 78 | print L "\n"; 79 | 80 | my ($cattle_chr_pre,$wisent_chr_pre,$cattle_start_pre,$cattle_end_pre,$cattle_strand_pre,$wisent_start_pre,$wisent_end_pre,$wisent_strand_pre); 81 | foreach my $wisent_start(sort {$a<=>$b} keys %{$link{$wisent_chr}}){ 82 | my $cattle_chr = $link{$wisent_chr}{$wisent_start}{cattle_chr}; 83 | my $cattle_start = $link{$wisent_chr}{$wisent_start}{cattle_start}; 84 | my $cattle_end = $link{$wisent_chr}{$wisent_start}{cattle_end}; 85 | my $cattle_strand = $link{$wisent_chr}{$wisent_start}{cattle_strand}; 86 | 87 | my $wisent_end = $link{$wisent_chr}{$wisent_start}{wisent_end}; 88 | my $wisent_strand = $link{$wisent_chr}{$wisent_start}{wisent_strand}; 89 | 90 | if($wisent_strand eq "-"){ 91 | if($cattle_strand eq "+"){ 92 | $cattle_strand = "-"; 93 | } 94 | else{ 95 | $cattle_strand = "+"; 96 | } 97 | $wisent_strand = "+"; 98 | } 99 | 100 | my $wisent_chr_len = $wisent_len{$wisent_chr}; 101 | 102 | my $flag = 1; #记录此alignment与上一个alignment是否处于同一个方向 103 | my $wisent_dis = 0; 104 | if($cattle_chr_pre){ 105 | $wisent_dis = $wisent_start - $wisent_end_pre; 106 | } 107 | my $cattle_dis = 0; 108 | 109 | if($cattle_chr_pre){ 110 | $flag = 0; 111 | if($cattle_strand_pre eq $cattle_strand && $cattle_strand eq "+"){ 112 | if($cattle_start > $cattle_end_pre){ 113 | $flag = 1; 114 | $cattle_dis = $cattle_start - $cattle_end_pre; 115 | } 116 | } 117 | elsif($cattle_strand_pre eq $cattle_strand && $cattle_strand eq "-"){ 118 | if($cattle_end < $cattle_start_pre){ 119 | $flag = 1; 120 | $cattle_dis = $cattle_start_pre - $cattle_end; 121 | } 122 | } 123 | elsif($cattle_chr eq $cattle_chr_pre){ 124 | $cattle_dis = $cattle_start - $cattle_end_pre; 125 | if($cattle_dis < 0){ 126 | $cattle_dis = $cattle_start_pre - $cattle_end; 127 | } 128 | } 129 | } 130 | 131 | my $flag_dis = 0; # 这个是判断两个alignment在两个物种基因组上的距离是否显著的差异,是否存在插入缺失 132 | if($wisent_dis > 0 && $cattle_dis > 0){ 133 | my $diff = $cattle_dis - $wisent_dis; 134 | my $fold = $cattle_dis/$wisent_dis; 135 | if($fold < 1){ 136 | $fold = $wisent_dis/$cattle_dis; 137 | } 138 | if(abs($diff) >= 1000 && $fold >= 2){ 139 | if($diff > 0){ 140 | $flag_dis = -1; 141 | } 142 | else{ 143 | $flag_dis = 1; 144 | } 145 | } 146 | } 147 | 148 | if(!$cattle_chr_pre){ 149 | $cattle_chr_pre=$cattle_chr; 150 | $cattle_start_pre=$cattle_start; 151 | $cattle_end_pre=$cattle_end; 152 | $cattle_strand_pre=$cattle_strand; 153 | 154 | $wisent_chr_pre=$wisent_chr; 155 | $wisent_start_pre=$wisent_start; 156 | $wisent_end_pre=$wisent_end; 157 | $wisent_strand_pre=$wisent_strand; 158 | } 159 | elsif($cattle_chr ne $cattle_chr_pre || $cattle_strand_pre ne $cattle_strand || $flag == 0 || $flag_dis != 0){ 160 | my $sv_type = "NA"; 161 | if($cattle_chr ne $cattle_chr_pre){ 162 | $sv_type = "inter_chromosome"; 163 | } 164 | elsif($cattle_strand_pre ne $cattle_strand){ 165 | if($flag_dis == 0){ 166 | $sv_type = "inversion"; 167 | } 168 | else{ 169 | $sv_type = "translocation"; 170 | } 171 | } 172 | elsif($flag == 0){ 173 | $sv_type = "translocation"; 174 | } 175 | elsif($flag_dis == 1){ 176 | $sv_type = "insertion"; 177 | } 178 | elsif($flag_dis == -1){ 179 | $sv_type = "deletion"; 180 | } 181 | 182 | $sv_type{$sv_type}++; 183 | 184 | # print "$cattle_chr ne $cattle_chr_pre || $wisent_chr ne $wisent_chr_pre || $wisent_strand_pre ne $wisent_strand || $cattle_strand_pre ne $cattle_strand\n"; 185 | print L "$wisent_start_pre\t$wisent_end_pre\t$wisent_strand_pre\t<=>\t$cattle_chr_pre\t$cattle_start_pre\t$cattle_end_pre\t$cattle_strand_pre\tsv:$sv_type\n"; 186 | print S "$wisent_chr\t$wisent_end_pre\t$wisent_start\t$sv_type\n"; 187 | $cattle_chr_pre=$cattle_chr; 188 | $cattle_start_pre=$cattle_start; 189 | $cattle_end_pre=$cattle_end; 190 | $cattle_strand_pre=$cattle_strand; 191 | 192 | $wisent_chr_pre=$wisent_chr; 193 | $wisent_start_pre=$wisent_start; 194 | $wisent_end_pre=$wisent_end; 195 | $wisent_strand_pre=$wisent_strand; 196 | } 197 | else{ 198 | my @cattle_pos=sort {$a<=>$b} ($cattle_start,$cattle_start_pre,$cattle_end,$cattle_end_pre); 199 | $cattle_start_pre=$cattle_pos[0]; 200 | $cattle_end_pre=$cattle_pos[-1]; 201 | 202 | my @wisent_pos=sort {$a<=>$b} ($wisent_start,$wisent_start_pre,$wisent_end,$wisent_end_pre); 203 | $wisent_start_pre=$wisent_pos[0]; 204 | $wisent_end_pre=$wisent_pos[-1]; 205 | } 206 | } 207 | print L "$wisent_start_pre\t$wisent_end_pre\t$wisent_strand_pre\t<=>\t$cattle_chr_pre\t$cattle_start_pre\t$cattle_end_pre\t$cattle_strand_pre\n"; 208 | print L "\n***************************END***************************\n\n"; 209 | my ($type1,$type2,$type3,$type4,$type5)=(0,0,0,0,0); 210 | if(exists $sv_type{inter_chromosome}){ 211 | $type1=$sv_type{inter_chromosome}; 212 | } 213 | if(exists $sv_type{inversion}){ 214 | $type2=$sv_type{inversion}; 215 | } 216 | if(exists $sv_type{translocation}){ 217 | $type3=$sv_type{translocation}; 218 | } 219 | if(exists $sv_type{insertion}){ 220 | $type4=$sv_type{insertion}; 221 | } 222 | if(exists $sv_type{deletion}){ 223 | $type5=$sv_type{deletion}; 224 | } 225 | print O "$wisent_chr\t$wisent_chr_len\t$type1\t$type2\t$type3\t$type4\t$type5\n"; 226 | $simple_report[0]+=$wisent_chr_len; 227 | $simple_report[1]+=$type1; 228 | $simple_report[2]+=$type2; 229 | $simple_report[3]+=$type3; 230 | $simple_report[4]+=$type4; 231 | $simple_report[5]+=$type5; 232 | } 233 | close L; 234 | close O; 235 | close S; 236 | 237 | open O,"> $0.simple_report"; 238 | my @head=("len","inter_chromosome","inversion","translocation","insertion","deletion"); 239 | print O "$head[0]\t$simple_report[0]\n"; 240 | for(my $i=1;$i<@head;$i++){ 241 | my $percent=$simple_report[$i]/$simple_report[0]; 242 | my $value=$percent*1e6; 243 | print O "$head[$i]\t$percent\t$value\n"; 244 | } 245 | close O; 246 | 247 | sub read_maf{ 248 | my @alignment=@_; 249 | # print "\n\n**************************START**************************\n"; 250 | # print join "\n",@alignment; 251 | # print "\n***************************END***************************\n\n"; 252 | my @speciesA=split /\s+/,$alignment[1]; 253 | my @speciesB=split /\s+/,$alignment[2]; 254 | my $chrA=$speciesA[1]; 255 | my $chrB=$speciesB[1]; 256 | 257 | my($startA,$lenA,$strandA,$chr_lenA)=($speciesA[2],$speciesA[3],$speciesA[4],$speciesA[5]); 258 | my $endA; 259 | if($strandA eq "+"){ 260 | $startA = $startA + 1; 261 | $endA = $startA + $lenA - 1; 262 | } 263 | else{ 264 | $startA = $chr_lenA - $startA; 265 | $endA = $startA - $lenA + 1; 266 | 267 | my $temp = $startA; 268 | $startA = $endA; 269 | $endA = $temp; 270 | } 271 | 272 | my($startB,$lenB,$strandB,$chr_lenB)=($speciesB[2],$speciesB[3],$speciesB[4],$speciesB[5]); 273 | my $endB; 274 | if($strandB eq "+"){ 275 | $startB = $startB + 1; 276 | $endB = $startB + $lenB - 1; 277 | } 278 | else{ 279 | $startB = $chr_lenB - $startB; 280 | $endB = $startB - $lenB + 1; 281 | 282 | my $temp = $startB; 283 | $startB = $endB; 284 | $endB = $temp; 285 | } 286 | return($chrA,$startA,$endA,$strandA,$chr_lenA,$chrB,$startB,$endB,$strandB,$chr_lenB); 287 | } 288 | 289 | -------------------------------------------------------------------------------- /02.scaffold/runSAM2TAB.sh: -------------------------------------------------------------------------------- 1 | bowtie2 -x pre.fa -1 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.170bp_49.1.fq.gz -2 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.170bp_49.2.fq.gz --very-sensitive -p 32 | ./filter.pl | ./sam2tab.pl > tab/file.1.tab 2 | bowtie2 -x pre.fa -1 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.200bp_29.1.fq.gz -2 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.200bp_29.2.fq.gz --very-sensitive -p 32 | ./filter.pl | ./sam2tab.pl > tab/file.2.tab 3 | bowtie2 -x pre.fa -1 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.200bp_30.1.fq.gz -2 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.200bp_30.2.fq.gz --very-sensitive -p 32 | ./filter.pl | ./sam2tab.pl > tab/file.3.tab 4 | bowtie2 -x pre.fa -1 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.500bp_41.1.fq.gz -2 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.500bp_41.2.fq.gz --very-sensitive -p 32 | ./filter.pl | ./sam2tab.pl > tab/file.4.tab 5 | bowtie2 -x pre.fa -1 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.500bp_42.1.fq.gz -2 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.500bp_42.2.fq.gz --very-sensitive -p 32 | ./filter.pl | ./sam2tab.pl > tab/file.5.tab 6 | bowtie2 -x pre.fa -1 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.500bp_43.1.fq.gz -2 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.500bp_43.2.fq.gz --very-sensitive -p 32 | ./filter.pl | ./sam2tab.pl > tab/file.6.tab 7 | bowtie2 -x pre.fa -1 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.800bp_44.1.fq.gz -2 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.800bp_44.2.fq.gz --very-sensitive -p 32 | ./filter.pl | ./sam2tab.pl > tab/file.7.tab 8 | bowtie2 -x pre.fa -1 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.800bp_45.1.fq.gz -2 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.800bp_45.2.fq.gz --very-sensitive -p 32 | ./filter.pl | ./sam2tab.pl > tab/file.8.tab 9 | bowtie2 -x pre.fa -1 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.800bp_46.1.fq.gz -2 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.800bp_46.2.fq.gz --very-sensitive -p 32 | ./filter.pl | ./sam2tab.pl > tab/file.9.tab 10 | bowtie2 -x pre.fa -1 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.800bp_47.1.fq.gz -2 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.800bp_47.2.fq.gz --very-sensitive -p 32 | ./filter.pl | ./sam2tab.pl > tab/file.10.tab 11 | bowtie2 -x pre.fa -1 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.800bp_48.1.fq.gz -2 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.800bp_48.2.fq.gz --very-sensitive -p 32 | ./filter.pl | ./sam2tab.pl > tab/file.11.tab 12 | bowtie2 -x pre.fa -1 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.2000bp_18.1.fq.gz -2 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.2000bp_18.2.fq.gz --very-sensitive -p 32 | ./filter.pl | ./sam2tab.pl > tab/file.12.tab 13 | bowtie2 -x pre.fa -1 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.2000bp_19.1.fq.gz -2 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.2000bp_19.2.fq.gz --very-sensitive -p 32 | ./filter.pl | ./sam2tab.pl > tab/file.13.tab 14 | bowtie2 -x pre.fa -1 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.2000bp_20.1.fq.gz -2 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.2000bp_20.2.fq.gz --very-sensitive -p 32 | ./filter.pl | ./sam2tab.pl > tab/file.14.tab 15 | bowtie2 -x pre.fa -1 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.2000bp_21.1.fq.gz -2 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.2000bp_21.2.fq.gz --very-sensitive -p 32 | ./filter.pl | ./sam2tab.pl > tab/file.15.tab 16 | bowtie2 -x pre.fa -1 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.2000bp_22.1.fq.gz -2 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.2000bp_22.2.fq.gz --very-sensitive -p 32 | ./filter.pl | ./sam2tab.pl > tab/file.16.tab 17 | bowtie2 -x pre.fa -1 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.2000bp_23.1.fq.gz -2 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.2000bp_23.2.fq.gz --very-sensitive -p 32 | ./filter.pl | ./sam2tab.pl > tab/file.17.tab 18 | bowtie2 -x pre.fa -1 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.2000bp_24.1.fq.gz -2 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.2000bp_24.2.fq.gz --very-sensitive -p 32 | ./filter.pl | ./sam2tab.pl > tab/file.18.tab 19 | bowtie2 -x pre.fa -1 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.2000bp_25.1.fq.gz -2 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.2000bp_25.2.fq.gz --very-sensitive -p 32 | ./filter.pl | ./sam2tab.pl > tab/file.19.tab 20 | bowtie2 -x pre.fa -1 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.2000bp_26.1.fq.gz -2 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.2000bp_26.2.fq.gz --very-sensitive -p 32 | ./filter.pl | ./sam2tab.pl > tab/file.20.tab 21 | bowtie2 -x pre.fa -1 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.2000bp_27.1.fq.gz -2 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.2000bp_27.2.fq.gz --very-sensitive -p 32 | ./filter.pl | ./sam2tab.pl > tab/file.21.tab 22 | bowtie2 -x pre.fa -1 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.2000bp_28.1.fq.gz -2 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.2000bp_28.2.fq.gz --very-sensitive -p 32 | ./filter.pl | ./sam2tab.pl > tab/file.22.tab 23 | bowtie2 -x pre.fa -1 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.5000bp_31.1.fq.gz -2 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.5000bp_31.2.fq.gz --very-sensitive -p 32 | ./filter.pl | ./sam2tab.pl > tab/file.23.tab 24 | bowtie2 -x pre.fa -1 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.5000bp_32.1.fq.gz -2 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.5000bp_32.2.fq.gz --very-sensitive -p 32 | ./filter.pl | ./sam2tab.pl > tab/file.24.tab 25 | bowtie2 -x pre.fa -1 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.5000bp_33.1.fq.gz -2 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.5000bp_33.2.fq.gz --very-sensitive -p 32 | ./filter.pl | ./sam2tab.pl > tab/file.25.tab 26 | bowtie2 -x pre.fa -1 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.5000bp_34.1.fq.gz -2 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.5000bp_34.2.fq.gz --very-sensitive -p 32 | ./filter.pl | ./sam2tab.pl > tab/file.26.tab 27 | bowtie2 -x pre.fa -1 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.5000bp_35.1.fq.gz -2 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.5000bp_35.2.fq.gz --very-sensitive -p 32 | ./filter.pl | ./sam2tab.pl > tab/file.27.tab 28 | bowtie2 -x pre.fa -1 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.5000bp_36.1.fq.gz -2 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.5000bp_36.2.fq.gz --very-sensitive -p 32 | ./filter.pl | ./sam2tab.pl > tab/file.28.tab 29 | bowtie2 -x pre.fa -1 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.5000bp_37.1.fq.gz -2 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.5000bp_37.2.fq.gz --very-sensitive -p 32 | ./filter.pl | ./sam2tab.pl > tab/file.29.tab 30 | bowtie2 -x pre.fa -1 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.5000bp_38.1.fq.gz -2 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.5000bp_38.2.fq.gz --very-sensitive -p 32 | ./filter.pl | ./sam2tab.pl > tab/file.30.tab 31 | bowtie2 -x pre.fa -1 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.5000bp_39.1.fq.gz -2 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.5000bp_39.2.fq.gz --very-sensitive -p 32 | ./filter.pl | ./sam2tab.pl > tab/file.31.tab 32 | bowtie2 -x pre.fa -1 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.5000bp_40.1.fq.gz -2 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.5000bp_40.2.fq.gz --very-sensitive -p 32 | ./filter.pl | ./sam2tab.pl > tab/file.32.tab 33 | bowtie2 -x pre.fa -1 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.10000bp_1.1.fq.gz -2 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.10000bp_1.2.fq.gz --very-sensitive -p 32 | ./filter.pl | ./sam2tab.pl > tab/file.33.tab 34 | bowtie2 -x pre.fa -1 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.10000bp_2.1.fq.gz -2 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.10000bp_2.2.fq.gz --very-sensitive -p 32 | ./filter.pl | ./sam2tab.pl > tab/file.34.tab 35 | bowtie2 -x pre.fa -1 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.10000bp_3.1.fq.gz -2 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.10000bp_3.2.fq.gz --very-sensitive -p 32 | ./filter.pl | ./sam2tab.pl > tab/file.35.tab 36 | bowtie2 -x pre.fa -1 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.10000bp_4.1.fq.gz -2 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.10000bp_4.2.fq.gz --very-sensitive -p 32 | ./filter.pl | ./sam2tab.pl > tab/file.36.tab 37 | bowtie2 -x pre.fa -1 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.10000bp_5.1.fq.gz -2 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.10000bp_5.2.fq.gz --very-sensitive -p 32 | ./filter.pl | ./sam2tab.pl > tab/file.37.tab 38 | bowtie2 -x pre.fa -1 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.10000bp_6.1.fq.gz -2 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.10000bp_6.2.fq.gz --very-sensitive -p 32 | ./filter.pl | ./sam2tab.pl > tab/file.38.tab 39 | bowtie2 -x pre.fa -1 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.10000bp_7.1.fq.gz -2 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.10000bp_7.2.fq.gz --very-sensitive -p 32 | ./filter.pl | ./sam2tab.pl > tab/file.39.tab 40 | bowtie2 -x pre.fa -1 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.10000bp_8.1.fq.gz -2 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.10000bp_8.2.fq.gz --very-sensitive -p 32 | ./filter.pl | ./sam2tab.pl > tab/file.40.tab 41 | bowtie2 -x pre.fa -1 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.20000bp_10.1.fq.gz -2 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.20000bp_10.2.fq.gz --very-sensitive -p 32 | ./filter.pl | ./sam2tab.pl > tab/file.41.tab 42 | bowtie2 -x pre.fa -1 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.20000bp_11.1.fq.gz -2 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.20000bp_11.2.fq.gz --very-sensitive -p 32 | ./filter.pl | ./sam2tab.pl > tab/file.42.tab 43 | bowtie2 -x pre.fa -1 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.20000bp_12.1.fq.gz -2 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.20000bp_12.2.fq.gz --very-sensitive -p 32 | ./filter.pl | ./sam2tab.pl > tab/file.43.tab 44 | bowtie2 -x pre.fa -1 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.20000bp_13.1.fq.gz -2 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.20000bp_13.2.fq.gz --very-sensitive -p 32 | ./filter.pl | ./sam2tab.pl > tab/file.44.tab 45 | bowtie2 -x pre.fa -1 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.20000bp_14.1.fq.gz -2 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.20000bp_14.2.fq.gz --very-sensitive -p 32 | ./filter.pl | ./sam2tab.pl > tab/file.45.tab 46 | bowtie2 -x pre.fa -1 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.20000bp_15.1.fq.gz -2 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.20000bp_15.2.fq.gz --very-sensitive -p 32 | ./filter.pl | ./sam2tab.pl > tab/file.46.tab 47 | bowtie2 -x pre.fa -1 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.20000bp_16.1.fq.gz -2 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.20000bp_16.2.fq.gz --very-sensitive -p 32 | ./filter.pl | ./sam2tab.pl > tab/file.47.tab 48 | bowtie2 -x pre.fa -1 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.20000bp_17.1.fq.gz -2 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.20000bp_17.2.fq.gz --very-sensitive -p 32 | ./filter.pl | ./sam2tab.pl > tab/file.48.tab 49 | bowtie2 -x pre.fa -1 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.20000bp_9.1.fq.gz -2 /home/share/user/user101/projects/Wisent2014/1.assembly/06.finalAssemble/clean_reads//lib.20000bp_9.2.fq.gz --very-sensitive -p 32 | ./filter.pl | ./sam2tab.pl > tab/file.49.tab 50 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Mozilla Public License Version 2.0 2 | ================================== 3 | 4 | 1. Definitions 5 | -------------- 6 | 7 | 1.1. "Contributor" 8 | means each individual or legal entity that creates, contributes to 9 | the creation of, or owns Covered Software. 10 | 11 | 1.2. "Contributor Version" 12 | means the combination of the Contributions of others (if any) used 13 | by a Contributor and that particular Contributor's Contribution. 14 | 15 | 1.3. "Contribution" 16 | means Covered Software of a particular Contributor. 17 | 18 | 1.4. "Covered Software" 19 | means Source Code Form to which the initial Contributor has attached 20 | the notice in Exhibit A, the Executable Form of such Source Code 21 | Form, and Modifications of such Source Code Form, in each case 22 | including portions thereof. 23 | 24 | 1.5. "Incompatible With Secondary Licenses" 25 | means 26 | 27 | (a) that the initial Contributor has attached the notice described 28 | in Exhibit B to the Covered Software; or 29 | 30 | (b) that the Covered Software was made available under the terms of 31 | version 1.1 or earlier of the License, but not also under the 32 | terms of a Secondary License. 33 | 34 | 1.6. "Executable Form" 35 | means any form of the work other than Source Code Form. 36 | 37 | 1.7. "Larger Work" 38 | means a work that combines Covered Software with other material, in 39 | a separate file or files, that is not Covered Software. 40 | 41 | 1.8. "License" 42 | means this document. 43 | 44 | 1.9. "Licensable" 45 | means having the right to grant, to the maximum extent possible, 46 | whether at the time of the initial grant or subsequently, any and 47 | all of the rights conveyed by this License. 48 | 49 | 1.10. "Modifications" 50 | means any of the following: 51 | 52 | (a) any file in Source Code Form that results from an addition to, 53 | deletion from, or modification of the contents of Covered 54 | Software; or 55 | 56 | (b) any new file in Source Code Form that contains any Covered 57 | Software. 58 | 59 | 1.11. "Patent Claims" of a Contributor 60 | means any patent claim(s), including without limitation, method, 61 | process, and apparatus claims, in any patent Licensable by such 62 | Contributor that would be infringed, but for the grant of the 63 | License, by the making, using, selling, offering for sale, having 64 | made, import, or transfer of either its Contributions or its 65 | Contributor Version. 66 | 67 | 1.12. "Secondary License" 68 | means either the GNU General Public License, Version 2.0, the GNU 69 | Lesser General Public License, Version 2.1, the GNU Affero General 70 | Public License, Version 3.0, or any later versions of those 71 | licenses. 72 | 73 | 1.13. "Source Code Form" 74 | means the form of the work preferred for making modifications. 75 | 76 | 1.14. "You" (or "Your") 77 | means an individual or a legal entity exercising rights under this 78 | License. For legal entities, "You" includes any entity that 79 | controls, is controlled by, or is under common control with You. For 80 | purposes of this definition, "control" means (a) the power, direct 81 | or indirect, to cause the direction or management of such entity, 82 | whether by contract or otherwise, or (b) ownership of more than 83 | fifty percent (50%) of the outstanding shares or beneficial 84 | ownership of such entity. 85 | 86 | 2. License Grants and Conditions 87 | -------------------------------- 88 | 89 | 2.1. Grants 90 | 91 | Each Contributor hereby grants You a world-wide, royalty-free, 92 | non-exclusive license: 93 | 94 | (a) under intellectual property rights (other than patent or trademark) 95 | Licensable by such Contributor to use, reproduce, make available, 96 | modify, display, perform, distribute, and otherwise exploit its 97 | Contributions, either on an unmodified basis, with Modifications, or 98 | as part of a Larger Work; and 99 | 100 | (b) under Patent Claims of such Contributor to make, use, sell, offer 101 | for sale, have made, import, and otherwise transfer either its 102 | Contributions or its Contributor Version. 103 | 104 | 2.2. Effective Date 105 | 106 | The licenses granted in Section 2.1 with respect to any Contribution 107 | become effective for each Contribution on the date the Contributor first 108 | distributes such Contribution. 109 | 110 | 2.3. Limitations on Grant Scope 111 | 112 | The licenses granted in this Section 2 are the only rights granted under 113 | this License. No additional rights or licenses will be implied from the 114 | distribution or licensing of Covered Software under this License. 115 | Notwithstanding Section 2.1(b) above, no patent license is granted by a 116 | Contributor: 117 | 118 | (a) for any code that a Contributor has removed from Covered Software; 119 | or 120 | 121 | (b) for infringements caused by: (i) Your and any other third party's 122 | modifications of Covered Software, or (ii) the combination of its 123 | Contributions with other software (except as part of its Contributor 124 | Version); or 125 | 126 | (c) under Patent Claims infringed by Covered Software in the absence of 127 | its Contributions. 128 | 129 | This License does not grant any rights in the trademarks, service marks, 130 | or logos of any Contributor (except as may be necessary to comply with 131 | the notice requirements in Section 3.4). 132 | 133 | 2.4. Subsequent Licenses 134 | 135 | No Contributor makes additional grants as a result of Your choice to 136 | distribute the Covered Software under a subsequent version of this 137 | License (see Section 10.2) or under the terms of a Secondary License (if 138 | permitted under the terms of Section 3.3). 139 | 140 | 2.5. Representation 141 | 142 | Each Contributor represents that the Contributor believes its 143 | Contributions are its original creation(s) or it has sufficient rights 144 | to grant the rights to its Contributions conveyed by this License. 145 | 146 | 2.6. Fair Use 147 | 148 | This License is not intended to limit any rights You have under 149 | applicable copyright doctrines of fair use, fair dealing, or other 150 | equivalents. 151 | 152 | 2.7. Conditions 153 | 154 | Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted 155 | in Section 2.1. 156 | 157 | 3. Responsibilities 158 | ------------------- 159 | 160 | 3.1. Distribution of Source Form 161 | 162 | All distribution of Covered Software in Source Code Form, including any 163 | Modifications that You create or to which You contribute, must be under 164 | the terms of this License. You must inform recipients that the Source 165 | Code Form of the Covered Software is governed by the terms of this 166 | License, and how they can obtain a copy of this License. You may not 167 | attempt to alter or restrict the recipients' rights in the Source Code 168 | Form. 169 | 170 | 3.2. Distribution of Executable Form 171 | 172 | If You distribute Covered Software in Executable Form then: 173 | 174 | (a) such Covered Software must also be made available in Source Code 175 | Form, as described in Section 3.1, and You must inform recipients of 176 | the Executable Form how they can obtain a copy of such Source Code 177 | Form by reasonable means in a timely manner, at a charge no more 178 | than the cost of distribution to the recipient; and 179 | 180 | (b) You may distribute such Executable Form under the terms of this 181 | License, or sublicense it under different terms, provided that the 182 | license for the Executable Form does not attempt to limit or alter 183 | the recipients' rights in the Source Code Form under this License. 184 | 185 | 3.3. Distribution of a Larger Work 186 | 187 | You may create and distribute a Larger Work under terms of Your choice, 188 | provided that You also comply with the requirements of this License for 189 | the Covered Software. If the Larger Work is a combination of Covered 190 | Software with a work governed by one or more Secondary Licenses, and the 191 | Covered Software is not Incompatible With Secondary Licenses, this 192 | License permits You to additionally distribute such Covered Software 193 | under the terms of such Secondary License(s), so that the recipient of 194 | the Larger Work may, at their option, further distribute the Covered 195 | Software under the terms of either this License or such Secondary 196 | License(s). 197 | 198 | 3.4. Notices 199 | 200 | You may not remove or alter the substance of any license notices 201 | (including copyright notices, patent notices, disclaimers of warranty, 202 | or limitations of liability) contained within the Source Code Form of 203 | the Covered Software, except that You may alter any license notices to 204 | the extent required to remedy known factual inaccuracies. 205 | 206 | 3.5. Application of Additional Terms 207 | 208 | You may choose to offer, and to charge a fee for, warranty, support, 209 | indemnity or liability obligations to one or more recipients of Covered 210 | Software. However, You may do so only on Your own behalf, and not on 211 | behalf of any Contributor. You must make it absolutely clear that any 212 | such warranty, support, indemnity, or liability obligation is offered by 213 | You alone, and You hereby agree to indemnify every Contributor for any 214 | liability incurred by such Contributor as a result of warranty, support, 215 | indemnity or liability terms You offer. You may include additional 216 | disclaimers of warranty and limitations of liability specific to any 217 | jurisdiction. 218 | 219 | 4. Inability to Comply Due to Statute or Regulation 220 | --------------------------------------------------- 221 | 222 | If it is impossible for You to comply with any of the terms of this 223 | License with respect to some or all of the Covered Software due to 224 | statute, judicial order, or regulation then You must: (a) comply with 225 | the terms of this License to the maximum extent possible; and (b) 226 | describe the limitations and the code they affect. Such description must 227 | be placed in a text file included with all distributions of the Covered 228 | Software under this License. Except to the extent prohibited by statute 229 | or regulation, such description must be sufficiently detailed for a 230 | recipient of ordinary skill to be able to understand it. 231 | 232 | 5. Termination 233 | -------------- 234 | 235 | 5.1. The rights granted under this License will terminate automatically 236 | if You fail to comply with any of its terms. However, if You become 237 | compliant, then the rights granted under this License from a particular 238 | Contributor are reinstated (a) provisionally, unless and until such 239 | Contributor explicitly and finally terminates Your grants, and (b) on an 240 | ongoing basis, if such Contributor fails to notify You of the 241 | non-compliance by some reasonable means prior to 60 days after You have 242 | come back into compliance. Moreover, Your grants from a particular 243 | Contributor are reinstated on an ongoing basis if such Contributor 244 | notifies You of the non-compliance by some reasonable means, this is the 245 | first time You have received notice of non-compliance with this License 246 | from such Contributor, and You become compliant prior to 30 days after 247 | Your receipt of the notice. 248 | 249 | 5.2. If You initiate litigation against any entity by asserting a patent 250 | infringement claim (excluding declaratory judgment actions, 251 | counter-claims, and cross-claims) alleging that a Contributor Version 252 | directly or indirectly infringes any patent, then the rights granted to 253 | You by any and all Contributors for the Covered Software under Section 254 | 2.1 of this License shall terminate. 255 | 256 | 5.3. In the event of termination under Sections 5.1 or 5.2 above, all 257 | end user license agreements (excluding distributors and resellers) which 258 | have been validly granted by You or Your distributors under this License 259 | prior to termination shall survive termination. 260 | 261 | ************************************************************************ 262 | * * 263 | * 6. Disclaimer of Warranty * 264 | * ------------------------- * 265 | * * 266 | * Covered Software is provided under this License on an "as is" * 267 | * basis, without warranty of any kind, either expressed, implied, or * 268 | * statutory, including, without limitation, warranties that the * 269 | * Covered Software is free of defects, merchantable, fit for a * 270 | * particular purpose or non-infringing. The entire risk as to the * 271 | * quality and performance of the Covered Software is with You. * 272 | * Should any Covered Software prove defective in any respect, You * 273 | * (not any Contributor) assume the cost of any necessary servicing, * 274 | * repair, or correction. This disclaimer of warranty constitutes an * 275 | * essential part of this License. No use of any Covered Software is * 276 | * authorized under this License except under this disclaimer. * 277 | * * 278 | ************************************************************************ 279 | 280 | ************************************************************************ 281 | * * 282 | * 7. Limitation of Liability * 283 | * -------------------------- * 284 | * * 285 | * Under no circumstances and under no legal theory, whether tort * 286 | * (including negligence), contract, or otherwise, shall any * 287 | * Contributor, or anyone who distributes Covered Software as * 288 | * permitted above, be liable to You for any direct, indirect, * 289 | * special, incidental, or consequential damages of any character * 290 | * including, without limitation, damages for lost profits, loss of * 291 | * goodwill, work stoppage, computer failure or malfunction, or any * 292 | * and all other commercial damages or losses, even if such party * 293 | * shall have been informed of the possibility of such damages. This * 294 | * limitation of liability shall not apply to liability for death or * 295 | * personal injury resulting from such party's negligence to the * 296 | * extent applicable law prohibits such limitation. Some * 297 | * jurisdictions do not allow the exclusion or limitation of * 298 | * incidental or consequential damages, so this exclusion and * 299 | * limitation may not apply to You. * 300 | * * 301 | ************************************************************************ 302 | 303 | 8. Litigation 304 | ------------- 305 | 306 | Any litigation relating to this License may be brought only in the 307 | courts of a jurisdiction where the defendant maintains its principal 308 | place of business and such litigation shall be governed by laws of that 309 | jurisdiction, without reference to its conflict-of-law provisions. 310 | Nothing in this Section shall prevent a party's ability to bring 311 | cross-claims or counter-claims. 312 | 313 | 9. Miscellaneous 314 | ---------------- 315 | 316 | This License represents the complete agreement concerning the subject 317 | matter hereof. If any provision of this License is held to be 318 | unenforceable, such provision shall be reformed only to the extent 319 | necessary to make it enforceable. Any law or regulation which provides 320 | that the language of a contract shall be construed against the drafter 321 | shall not be used to construe this License against a Contributor. 322 | 323 | 10. Versions of the License 324 | --------------------------- 325 | 326 | 10.1. New Versions 327 | 328 | Mozilla Foundation is the license steward. Except as provided in Section 329 | 10.3, no one other than the license steward has the right to modify or 330 | publish new versions of this License. Each version will be given a 331 | distinguishing version number. 332 | 333 | 10.2. Effect of New Versions 334 | 335 | You may distribute the Covered Software under the terms of the version 336 | of the License under which You originally received the Covered Software, 337 | or under the terms of any subsequent version published by the license 338 | steward. 339 | 340 | 10.3. Modified Versions 341 | 342 | If you create software not governed by this License, and you want to 343 | create a new license for such software, you may create and use a 344 | modified version of this License if you rename the license and remove 345 | any references to the name of the license steward (except to note that 346 | such modified license differs from this License). 347 | 348 | 10.4. Distributing Source Code Form that is Incompatible With Secondary 349 | Licenses 350 | 351 | If You choose to distribute Source Code Form that is Incompatible With 352 | Secondary Licenses under the terms of this version of the License, the 353 | notice described in Exhibit B of this License must be attached. 354 | 355 | Exhibit A - Source Code Form License Notice 356 | ------------------------------------------- 357 | 358 | This Source Code Form is subject to the terms of the Mozilla Public 359 | License, v. 2.0. If a copy of the MPL was not distributed with this 360 | file, You can obtain one at http://mozilla.org/MPL/2.0/. 361 | 362 | If it is not possible or desirable to put the notice in a particular 363 | file, then You may include the notice in a location (such as a LICENSE 364 | file in a relevant directory) where a recipient would be likely to look 365 | for such a notice. 366 | 367 | You may add additional accurate notices of copyright ownership. 368 | 369 | Exhibit B - "Incompatible With Secondary Licenses" Notice 370 | --------------------------------------------------------- 371 | 372 | This Source Code Form is "Incompatible With Secondary Licenses", as 373 | defined by the Mozilla Public License, v. 2.0. 374 | --------------------------------------------------------------------------------