├── pandaseq.remove2bases.frombarcode.pl ├── README.md ├── template.pl ├── download.ncbi.gb.using.gi.list.pl ├── download.ncbi.using.acession.list.pl ├── rename.fasta.pl ├── extract.species.names.from.header.pl ├── splitpe.fasta.pl ├── interleave.reads.pl ├── extract.rast.annotation.using.list.pl ├── calc.gc.pl ├── splitpe.fastq.pl ├── extract.subset.from.sam.using.list.pl ├── calc.contigs.in.scaffolds.pl ├── gff.to.table.pl ├── trim.fastq.length.pl ├── mp.fix.FR.single.to.RF.pl ├── esom.classified.to.contigs.pl ├── pb.to.bp.pl ├── sfr.mgrast.extract.using.list.pl ├── gff.to.table.v2.pl ├── sfr.ec.name.to.ec.number.pl ├── sfr.ec.format.db.pl ├── rnaseq.to.stranded.pl ├── clc.variant.to.consensus.pl ├── cut.fasta.in.smaller.pieces.pl ├── split.scaffolds.to.contigs.pl ├── trim.length.singleline.pl ├── mannotator.totab.add.pl ├── find.lost.indexes.pl ├── extract.using.header.list.pl ├── extract.kmer.bad.bins.pl ├── calc.gc.distribution.pl ├── split.assembly.bins.pl ├── multi.sam.to.coverage.profile.pl ├── summarize.bin.stats.pl ├── calc.dnds.pl ├── cytoscape.extract.sub.graph.using.list.pl ├── extract.fastq.pe.reads.using.single.pl ├── extract.fasta.pe.reads.using.single.pl ├── extract.read2.using.read1.pl └── cytoscape.otu.cor.matrix.pl /pandaseq.remove2bases.frombarcode.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | use warnings; 4 | use strict; 5 | #use diagnostics; 6 | 7 | ####################Read files######################### 8 | unless (@ARGV == 2) {die "Usage:perl scriptname readfile outfile\n";} 9 | 10 | my $inputfile1 = shift; 11 | my $outfile1 = shift; 12 | 13 | my $readcount; 14 | my $printreadcount = 0; 15 | my $line; 16 | my $linenr = 0; 17 | 18 | 19 | open(IN, $inputfile1) or die("Cannot open file\n"); 20 | open(OUT, ">$outfile1") or die("Cannot create file\n"); 21 | 22 | print "Stripping 2 nucleotides of the barcode..\n"; 23 | while ( $line = ) { 24 | chomp $line; 25 | if ($printreadcount == 1000000) { 26 | $printreadcount = 0; 27 | print "$readcount headers corrected\n"; 28 | } 29 | $linenr++; 30 | if ($linenr == 1){ 31 | $line = substr($line,0,length($line)-2); 32 | print OUT "$line\n"; 33 | $readcount++; 34 | $printreadcount++; 35 | } 36 | if ($linenr == 2 ){ 37 | print OUT "$line\n"; 38 | } 39 | if ($linenr == 3 ){ 40 | print OUT "$line\n"; 41 | } 42 | if ($linenr == 4){ 43 | $linenr = 0; 44 | print OUT "$line\n"; 45 | } 46 | } 47 | 48 | print "done..\n" 49 | 50 | close IN; 51 | close OUT; 52 | 53 | exit; -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | calc.dnds.pl : Calculates pairwise dn/ds ratios of all input sequences. Note: the sequences have to be alligned and in fasta format. In addition its assumed they start inframe. 2 | 3 | calc.gc.pl : Calculates the gc content of sequences in fasta format. 4 | 5 | calc.kmerfreq.pl : Calculates kmer frequency in a set of input fasta sequences. The kmer length and minumim length of sequences can be changed. 6 | 7 | extract.read2.using.read1.pl : Extract read 2 using read 1 from two fasta files. Usefull when having a subset of read 1 and wanting the pairs extracted from the read 2 file. 8 | 9 | mannotator.totab.add.pl : Allows you to add more data points to a tab formated mannotator file. The extra data must be formatted as: contigid "tab" DATA1 "tab" DATA2.. Usefull to add e.g. coverage or binning information to all contigs. 10 | 11 | mannotator.totab.pl : Converts the mannotator gff file into tab format (see the mannotator project). Also splits coulmns with multiple entries and arranges all unique entires in a column each. Identifies "=" as definition field and ; in the ontology field. 12 | 13 | pandaseq.remove2bases.frombarcode.pl : Removes 2 bases from the barcode in fastq files. Used to circumvent the limitation in padaseq that assumes the header to be 6 bp long. 14 | 15 | split.amphora.alignments.bin.pl : Processes the .aln files output by Amphora2. The output needs to be fasta formatted and without reference sequences. The script creates stats on the number of genes and creates combined aligned files that can quickly be scanned through. In addition the program can split the results into different files based on an input bin file. The bin file needs to be in the format contigname "tab" bin. e.g. ">10 bin0". The header in the input alignments need to be in the format of e.g >10_whatever. The first "_" character is used to split the header of the input sequences. By using the -DNA feature of Amphora or FragGeneScan as orf predicter the header should be fine. 16 | 17 | split.assembly.bins.pl : Splits a fastafile into multiple fastafiles based on an input bin file (format: fasta.header.name "tab" bin). 18 | 19 | splitpe.fasta.pl : Splits a combined paired end fasta file into two separate fasta files. 20 | 21 | splitpe.fastq.pl : Splits a combined paired end fastq file into two separate fastq files. 22 | 23 | template.pl : Nice perl template supplied by Mike Immelfort. 24 | -------------------------------------------------------------------------------- /template.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | ############################################################################### 3 | # 4 | # scriptname 5 | # 6 | # Short description 7 | # 8 | # Copyright (C) 2012 Mads Albertsen 9 | # 10 | # This program is free software: you can redistribute it and/or modify 11 | # it under the terms of the GNU General Public License as published by 12 | # the Free Software Foundation, either version 3 of the License, or 13 | # (at your option) any later version. 14 | # 15 | # This program is distributed in the hope that it will be useful, 16 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 | # GNU General Public License for more details. 19 | # 20 | # You should have received a copy of the GNU General Public License 21 | # along with this program. If not, see . 22 | # 23 | ############################################################################### 24 | 25 | #pragmas 26 | use strict; 27 | use warnings; 28 | 29 | #core Perl modules 30 | use Getopt::Long; 31 | 32 | #locally-written modules 33 | BEGIN { 34 | select(STDERR); 35 | $| = 1; 36 | select(STDOUT); 37 | $| = 1; 38 | } 39 | 40 | # get input params 41 | my $global_options = checkParams(); 42 | 43 | my $inputfile; 44 | my $outputfile; 45 | 46 | $inputfile = &overrideDefault("inputfile.txt",'inputfile'); 47 | $outputfile = &overrideDefault("outputfile.txt",'outputfile'); 48 | 49 | 50 | ###################################################################### 51 | # CODE HERE 52 | ###################################################################### 53 | 54 | 55 | open(IN, $inputfile) or die("Cannot read file: $inputfile\n"); 56 | open(OUT, ">$outputfile") or die("Cannot create file: $outputfile\n"); 57 | 58 | while ( my $line = ) { 59 | chomp $line; 60 | 61 | } 62 | 63 | close IN; 64 | close OUT; 65 | 66 | ###################################################################### 67 | # TEMPLATE SUBS 68 | ###################################################################### 69 | sub checkParams { 70 | #----- 71 | # Do any and all options checking here... 72 | # 73 | my @standard_options = ( "help|h+", "inputfile|i:s", "outputfile|o:s"); 74 | my %options; 75 | 76 | # Add any other command line options, and the code to handle them 77 | # 78 | GetOptions( \%options, @standard_options ); 79 | 80 | #if no arguments supplied print the usage and exit 81 | # 82 | exec("pod2usage $0") if (0 == (keys (%options) )); 83 | 84 | # If the -help option is set, print the usage and exit 85 | # 86 | exec("pod2usage $0") if $options{'help'}; 87 | 88 | # Compulsosy items 89 | #if(!exists $options{'infile'} ) { print "**ERROR: $0 : \n"; exec("pod2usage $0"); } 90 | 91 | return \%options; 92 | } 93 | 94 | sub overrideDefault 95 | { 96 | #----- 97 | # Set and override default values for parameters 98 | # 99 | my ($default_value, $option_name) = @_; 100 | if(exists $global_options->{$option_name}) 101 | { 102 | return $global_options->{$option_name}; 103 | } 104 | return $default_value; 105 | } 106 | 107 | __DATA__ 108 | 109 | =head1 NAME 110 | 111 | vprobes.generateprobes.pl 112 | 113 | =head1 COPYRIGHT 114 | 115 | copyright (C) 2012 Mads Albertsen 116 | 117 | This program is free software: you can redistribute it and/or modify 118 | it under the terms of the GNU General Public License as published by 119 | the Free Software Foundation, either version 3 of the License, or 120 | (at your option) any later version. 121 | 122 | This program is distributed in the hope that it will be useful, 123 | but WITHOUT ANY WARRANTY; without even the implied warranty of 124 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 125 | GNU General Public License for more details. 126 | 127 | You should have received a copy of the GNU General Public License 128 | along with this program. If not, see . 129 | 130 | =head1 DESCRIPTION 131 | 132 | 133 | 134 | =head1 SYNOPSIS 135 | 136 | script.pl -i [-h] 137 | 138 | [-help -h] Displays this basic usage information 139 | [-inputfile -i] Inputfile. 140 | [-outputfile -o] Outputfile. 141 | 142 | =cut -------------------------------------------------------------------------------- /download.ncbi.gb.using.gi.list.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | ############################################################################### 3 | # 4 | # scriptname 5 | # 6 | # Short description 7 | # 8 | # Copyright (C) 2012 Mads Albertsen 9 | # 10 | # This program is free software: you can redistribute it and/or modify 11 | # it under the terms of the GNU General Public License as published by 12 | # the Free Software Foundation, either version 3 of the License, or 13 | # (at your option) any later version. 14 | # 15 | # This program is distributed in the hope that it will be useful, 16 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 | # GNU General Public License for more details. 19 | # 20 | # You should have received a copy of the GNU General Public License 21 | # along with this program. If not, see . 22 | # 23 | ############################################################################### 24 | 25 | #pragmas 26 | use strict; 27 | use warnings; 28 | 29 | #core Perl modules 30 | use Getopt::Long; 31 | use Bio::DB::EUtilities; 32 | 33 | #locally-written modules 34 | BEGIN { 35 | select(STDERR); 36 | $| = 1; 37 | select(STDOUT); 38 | $| = 1; 39 | } 40 | 41 | # get input params 42 | my $global_options = checkParams(); 43 | 44 | my $inputfile; 45 | 46 | $inputfile = &overrideDefault("inputfile.txt",'inputfile'); 47 | 48 | ###################################################################### 49 | # CODE HERE 50 | ###################################################################### 51 | 52 | 53 | open(IN, $inputfile) or die("Cannot read file: $inputfile\n"); 54 | 55 | while ( my $line = ) { 56 | chomp $line; 57 | my $id = $line; 58 | print "Downloading GI: $line\n"; 59 | my $factory = Bio::DB::EUtilities->new( 60 | -eutil => 'efetch', 61 | -db => 'nucleotide', 62 | -id => $id, 63 | -email => 'mymail@foo.bar', 64 | -rettype => 'gb' 65 | ); 66 | my $outname = "$id.gb"; 67 | $factory->get_Response(-file => $outname); 68 | } 69 | 70 | close IN; 71 | 72 | ###################################################################### 73 | # TEMPLATE SUBS 74 | ###################################################################### 75 | sub checkParams { 76 | #----- 77 | # Do any and all options checking here... 78 | # 79 | my @standard_options = ( "help|h+", "inputfile|i:s"); 80 | my %options; 81 | 82 | # Add any other command line options, and the code to handle them 83 | # 84 | GetOptions( \%options, @standard_options ); 85 | 86 | #if no arguments supplied print the usage and exit 87 | # 88 | exec("pod2usage $0") if (0 == (keys (%options) )); 89 | 90 | # If the -help option is set, print the usage and exit 91 | # 92 | exec("pod2usage $0") if $options{'help'}; 93 | 94 | # Compulsosy items 95 | #if(!exists $options{'infile'} ) { print "**ERROR: $0 : \n"; exec("pod2usage $0"); } 96 | 97 | return \%options; 98 | } 99 | 100 | sub overrideDefault 101 | { 102 | #----- 103 | # Set and override default values for parameters 104 | # 105 | my ($default_value, $option_name) = @_; 106 | if(exists $global_options->{$option_name}) 107 | { 108 | return $global_options->{$option_name}; 109 | } 110 | return $default_value; 111 | } 112 | 113 | __DATA__ 114 | 115 | =head1 NAME 116 | 117 | vprobes.generateprobes.pl 118 | 119 | =head1 COPYRIGHT 120 | 121 | copyright (C) 2012 Mads Albertsen 122 | 123 | This program is free software: you can redistribute it and/or modify 124 | it under the terms of the GNU General Public License as published by 125 | the Free Software Foundation, either version 3 of the License, or 126 | (at your option) any later version. 127 | 128 | This program is distributed in the hope that it will be useful, 129 | but WITHOUT ANY WARRANTY; without even the implied warranty of 130 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 131 | GNU General Public License for more details. 132 | 133 | You should have received a copy of the GNU General Public License 134 | along with this program. If not, see . 135 | 136 | =head1 DESCRIPTION 137 | 138 | 139 | 140 | =head1 SYNOPSIS 141 | 142 | script.pl -i [-h] 143 | 144 | [-help -h] Displays this basic usage information 145 | [-inputfile -i] Inputfile. 146 | 147 | =cut -------------------------------------------------------------------------------- /download.ncbi.using.acession.list.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | ############################################################################### 3 | # 4 | # scriptname 5 | # 6 | # Short description 7 | # 8 | # Copyright (C) 2012 Mads Albertsen 9 | # 10 | # This program is free software: you can redistribute it and/or modify 11 | # it under the terms of the GNU General Public License as published by 12 | # the Free Software Foundation, either version 3 of the License, or 13 | # (at your option) any later version. 14 | # 15 | # This program is distributed in the hope that it will be useful, 16 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 | # GNU General Public License for more details. 19 | # 20 | # You should have received a copy of the GNU General Public License 21 | # along with this program. If not, see . 22 | # 23 | ############################################################################### 24 | 25 | #pragmas 26 | use strict; 27 | use warnings; 28 | 29 | #core Perl modules 30 | use Getopt::Long; 31 | use Bio::Perl; 32 | use Bio::DB::GenBank; 33 | 34 | 35 | #locally-written modules 36 | BEGIN { 37 | select(STDERR); 38 | $| = 1; 39 | select(STDOUT); 40 | $| = 1; 41 | } 42 | 43 | # get input params 44 | my $global_options = checkParams(); 45 | 46 | my $inputfile; 47 | 48 | $inputfile = &overrideDefault("inputfile.txt",'inputfile'); 49 | 50 | ###################################################################### 51 | # CODE HERE 52 | ###################################################################### 53 | 54 | 55 | open(IN, $inputfile) or die("Cannot read file: $inputfile\n"); 56 | 57 | while ( my $line = ) { 58 | chomp $line; 59 | my $id = $line; 60 | print "Downloading: $line\n"; 61 | 62 | my $gb = new Bio::DB::GenBank; 63 | my $seq = $gb->get_Stream_by_acc($id); 64 | while( my $seq_elt = $seq->next_seq ) { 65 | # write_sequence(">$id.gb", 'genbank', $seq_elt); 66 | write_sequence(">$id.fa", 'fasta', $seq_elt); 67 | } 68 | } 69 | 70 | close IN; 71 | 72 | ###################################################################### 73 | # TEMPLATE SUBS 74 | ###################################################################### 75 | sub checkParams { 76 | #----- 77 | # Do any and all options checking here... 78 | # 79 | my @standard_options = ( "help|h+", "inputfile|i:s"); 80 | my %options; 81 | 82 | # Add any other command line options, and the code to handle them 83 | # 84 | GetOptions( \%options, @standard_options ); 85 | 86 | #if no arguments supplied print the usage and exit 87 | # 88 | exec("pod2usage $0") if (0 == (keys (%options) )); 89 | 90 | # If the -help option is set, print the usage and exit 91 | # 92 | exec("pod2usage $0") if $options{'help'}; 93 | 94 | # Compulsosy items 95 | #if(!exists $options{'infile'} ) { print "**ERROR: $0 : \n"; exec("pod2usage $0"); } 96 | 97 | return \%options; 98 | } 99 | 100 | sub overrideDefault 101 | { 102 | #----- 103 | # Set and override default values for parameters 104 | # 105 | my ($default_value, $option_name) = @_; 106 | if(exists $global_options->{$option_name}) 107 | { 108 | return $global_options->{$option_name}; 109 | } 110 | return $default_value; 111 | } 112 | 113 | __DATA__ 114 | 115 | =head1 NAME 116 | 117 | vprobes.generateprobes.pl 118 | 119 | =head1 COPYRIGHT 120 | 121 | copyright (C) 2012 Mads Albertsen 122 | 123 | This program is free software: you can redistribute it and/or modify 124 | it under the terms of the GNU General Public License as published by 125 | the Free Software Foundation, either version 3 of the License, or 126 | (at your option) any later version. 127 | 128 | This program is distributed in the hope that it will be useful, 129 | but WITHOUT ANY WARRANTY; without even the implied warranty of 130 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 131 | GNU General Public License for more details. 132 | 133 | You should have received a copy of the GNU General Public License 134 | along with this program. If not, see . 135 | 136 | =head1 DESCRIPTION 137 | 138 | 139 | 140 | =head1 SYNOPSIS 141 | 142 | script.pl -i [-h] 143 | 144 | [-help -h] Displays this basic usage information 145 | [-inputfile -i] Inputfile. 146 | 147 | =cut 148 | -------------------------------------------------------------------------------- /rename.fasta.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | ############################################################################### 3 | # 4 | # scriptname 5 | # 6 | # Short description 7 | # 8 | # Copyright (C) 2012 Mads Albertsen 9 | # 10 | # This program is free software: you can redistribute it and/or modify 11 | # it under the terms of the GNU General Public License as published by 12 | # the Free Software Foundation, either version 3 of the License, or 13 | # (at your option) any later version. 14 | # 15 | # This program is distributed in the hope that it will be useful, 16 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 | # GNU General Public License for more details. 19 | # 20 | # You should have received a copy of the GNU General Public License 21 | # along with this program. If not, see . 22 | # 23 | ############################################################################### 24 | 25 | #pragmas 26 | use strict; 27 | use warnings; 28 | 29 | #core Perl modules 30 | use Getopt::Long; 31 | 32 | #locally-written modules 33 | BEGIN { 34 | select(STDERR); 35 | $| = 1; 36 | select(STDOUT); 37 | $| = 1; 38 | } 39 | 40 | # get input params 41 | my $global_options = checkParams(); 42 | 43 | my $inputfile; 44 | my $outputfile; 45 | my $name; 46 | 47 | $inputfile = &overrideDefault("inputfile.txt",'inputfile'); 48 | $outputfile = &overrideDefault("outputfile.txt",'outputfile'); 49 | $name = &overrideDefault("new.name",'name'); 50 | 51 | my $count = 0; 52 | 53 | ###################################################################### 54 | # CODE HERE 55 | ###################################################################### 56 | 57 | 58 | open(IN, $inputfile) or die("Cannot read file: $inputfile\n"); 59 | open(OUT, ">$outputfile") or die("Cannot create file: $outputfile\n"); 60 | 61 | while ( my $line = ) { 62 | chomp $line; 63 | if ($line =~ m/>/) { 64 | $count++; 65 | print OUT ">$name.$count\n"; 66 | } 67 | else{ 68 | print OUT "$line\n" 69 | } 70 | 71 | } 72 | 73 | close IN; 74 | close OUT; 75 | 76 | ###################################################################### 77 | # TEMPLATE SUBS 78 | ###################################################################### 79 | sub checkParams { 80 | #----- 81 | # Do any and all options checking here... 82 | # 83 | my @standard_options = ( "help|h+", "inputfile|i:s", "outputfile|o:s", "name|n:s"); 84 | my %options; 85 | 86 | # Add any other command line options, and the code to handle them 87 | # 88 | GetOptions( \%options, @standard_options ); 89 | 90 | #if no arguments supplied print the usage and exit 91 | # 92 | exec("pod2usage $0") if (0 == (keys (%options) )); 93 | 94 | # If the -help option is set, print the usage and exit 95 | # 96 | exec("pod2usage $0") if $options{'help'}; 97 | 98 | # Compulsosy items 99 | #if(!exists $options{'infile'} ) { print "**ERROR: $0 : \n"; exec("pod2usage $0"); } 100 | 101 | return \%options; 102 | } 103 | 104 | sub overrideDefault 105 | { 106 | #----- 107 | # Set and override default values for parameters 108 | # 109 | my ($default_value, $option_name) = @_; 110 | if(exists $global_options->{$option_name}) 111 | { 112 | return $global_options->{$option_name}; 113 | } 114 | return $default_value; 115 | } 116 | 117 | __DATA__ 118 | 119 | =head1 NAME 120 | 121 | vprobes.generateprobes.pl 122 | 123 | =head1 COPYRIGHT 124 | 125 | copyright (C) 2012 Mads Albertsen 126 | 127 | This program is free software: you can redistribute it and/or modify 128 | it under the terms of the GNU General Public License as published by 129 | the Free Software Foundation, either version 3 of the License, or 130 | (at your option) any later version. 131 | 132 | This program is distributed in the hope that it will be useful, 133 | but WITHOUT ANY WARRANTY; without even the implied warranty of 134 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 135 | GNU General Public License for more details. 136 | 137 | You should have received a copy of the GNU General Public License 138 | along with this program. If not, see . 139 | 140 | =head1 DESCRIPTION 141 | 142 | 143 | 144 | =head1 SYNOPSIS 145 | 146 | script.pl -i [-h] 147 | 148 | [-help -h] Displays this basic usage information 149 | [-inputfile -i] Inputfile 150 | [-outputfile -o] Outputfile 151 | [-name -n] New name 152 | 153 | =cut -------------------------------------------------------------------------------- /extract.species.names.from.header.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | ############################################################################### 3 | # 4 | # extract.names.from.header.pl 5 | # 6 | # Short description 7 | # 8 | # Copyright (C) 2012 Mads Albertsen 9 | # 10 | # This program is free software: you can redistribute it and/or modify 11 | # it under the terms of the GNU General Public License as published by 12 | # the Free Software Foundation, either version 3 of the License, or 13 | # (at your option) any later version. 14 | # 15 | # This program is distributed in the hope that it will be useful, 16 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 | # GNU General Public License for more details. 19 | # 20 | # You should have received a copy of the GNU General Public License 21 | # along with this program. If not, see . 22 | # 23 | ############################################################################### 24 | 25 | #pragmas 26 | use strict; 27 | use warnings; 28 | 29 | #core Perl modules 30 | use Getopt::Long; 31 | 32 | #locally-written modules 33 | BEGIN { 34 | select(STDERR); 35 | $| = 1; 36 | select(STDOUT); 37 | $| = 1; 38 | } 39 | 40 | # get input params 41 | my $global_options = checkParams(); 42 | 43 | my $inputfile; 44 | my $outputfile; 45 | 46 | $inputfile = &overrideDefault("inputfile.txt",'inputfile'); 47 | $outputfile = &overrideDefault("outputfile.txt",'outputfile'); 48 | 49 | my $line; 50 | my %header; 51 | 52 | 53 | ###################################################################### 54 | # CODE HERE 55 | ###################################################################### 56 | 57 | 58 | open(IN, $inputfile) or die("Cannot read file: $inputfile\n"); 59 | open(OUT, ">$outputfile") or die("Cannot create file: $outputfile\n"); 60 | 61 | while ( $line = ) { 62 | chomp $line; 63 | if ($line =~ m/>/){ 64 | if ($line =~ m/\[/){ 65 | my @splitline = split(/\[/,$line); 66 | my @splitline1 = split(/\]/,$splitline[-1]); 67 | if (exists($header{$splitline1[0]})){ 68 | $header{$splitline1[0]}++; 69 | } 70 | else{ 71 | $header{$splitline1[0]} = 1; 72 | } 73 | } 74 | } 75 | } 76 | 77 | foreach my $key (keys %header){ 78 | print OUT "$key\t$header{$key}\n" 79 | } 80 | 81 | close IN; 82 | close OUT; 83 | 84 | ###################################################################### 85 | # TEMPLATE SUBS 86 | ###################################################################### 87 | sub checkParams { 88 | #----- 89 | # Do any and all options checking here... 90 | # 91 | my @standard_options = ( "help|h+", "inputfile|i:s", "outputfile|o:s"); 92 | my %options; 93 | 94 | # Add any other command line options, and the code to handle them 95 | # 96 | GetOptions( \%options, @standard_options ); 97 | 98 | #if no arguments supplied print the usage and exit 99 | # 100 | exec("pod2usage $0") if (0 == (keys (%options) )); 101 | 102 | # If the -help option is set, print the usage and exit 103 | # 104 | exec("pod2usage $0") if $options{'help'}; 105 | 106 | # Compulsosy items 107 | #if(!exists $options{'infile'} ) { print "**ERROR: $0 : \n"; exec("pod2usage $0"); } 108 | 109 | return \%options; 110 | } 111 | 112 | sub overrideDefault 113 | { 114 | #----- 115 | # Set and override default values for parameters 116 | # 117 | my ($default_value, $option_name) = @_; 118 | if(exists $global_options->{$option_name}) 119 | { 120 | return $global_options->{$option_name}; 121 | } 122 | return $default_value; 123 | } 124 | 125 | __DATA__ 126 | 127 | =head1 NAME 128 | 129 | vprobes.generateprobes.pl 130 | 131 | =head1 COPYRIGHT 132 | 133 | copyright (C) 2012 Mads Albertsen 134 | 135 | This program is free software: you can redistribute it and/or modify 136 | it under the terms of the GNU General Public License as published by 137 | the Free Software Foundation, either version 3 of the License, or 138 | (at your option) any later version. 139 | 140 | This program is distributed in the hope that it will be useful, 141 | but WITHOUT ANY WARRANTY; without even the implied warranty of 142 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 143 | GNU General Public License for more details. 144 | 145 | You should have received a copy of the GNU General Public License 146 | along with this program. If not, see . 147 | 148 | =head1 DESCRIPTION 149 | 150 | 151 | 152 | =head1 SYNOPSIS 153 | 154 | script.pl -i [-h] 155 | 156 | [-help -h] Displays this basic usage information 157 | [-inputfile -i] Inputfile. 158 | [-outputfile -o] Outputfile. 159 | 160 | =cut -------------------------------------------------------------------------------- /splitpe.fasta.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | ############################################################################### 3 | # 4 | # splitpe.fasta.pl 5 | # 6 | # Splits a combined paired end fastafile. 7 | # 8 | # Copyright (C) 2012 Mads Albertsen 9 | # 10 | # This program is free software: you can redistribute it and/or modify 11 | # it under the terms of the GNU General Public License as published by 12 | # the Free Software Foundation, either version 3 of the License, or 13 | # (at your option) any later version. 14 | # 15 | # This program is distributed in the hope that it will be useful, 16 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 | # GNU General Public License for more details. 19 | # 20 | # You should have received a copy of the GNU General Public License 21 | # along with this program. If not, see . 22 | # 23 | ############################################################################### 24 | 25 | #pragmas 26 | use strict; 27 | use warnings; 28 | 29 | #core Perl modules 30 | use Getopt::Long; 31 | 32 | #locally-written modules 33 | BEGIN { 34 | select(STDERR); 35 | $| = 1; 36 | select(STDOUT); 37 | $| = 1; 38 | } 39 | 40 | # get input params 41 | my $global_options = checkParams(); 42 | 43 | my $inputfile; 44 | 45 | $inputfile = &overrideDefault("inputfile.fa",'inputfile'); 46 | 47 | my $line; 48 | my $header; 49 | my $prevheader; 50 | my $seq; 51 | my $count = 0; 52 | 53 | ###################################################################### 54 | # CODE HERE 55 | ###################################################################### 56 | 57 | open(IN, $inputfile) or die; 58 | open(OUTp1, ">p1.fa") or die; 59 | open(OUTp2, ">p2.fa") or die; 60 | 61 | while (my $line = ) { 62 | chomp $line; 63 | if ($line =~ m/>/) { 64 | $header = $line; 65 | if ($count == 1){ 66 | print OUTp1 "$prevheader\n"; 67 | print OUTp1 "$seq\n"; 68 | } 69 | if ($count == 2){ 70 | print OUTp2 "$prevheader\n"; 71 | print OUTp2 "$seq\n"; 72 | $count = 0; 73 | } 74 | $count++; 75 | $seq = ""; 76 | $prevheader = $header; 77 | } 78 | else{ 79 | $seq = $seq.$line; 80 | } 81 | } 82 | 83 | print OUTp2 "$prevheader\n"; 84 | print OUTp2 "$seq\n"; 85 | 86 | close IN; 87 | close OUTp1; 88 | close OUTp2; 89 | 90 | exit; 91 | 92 | ###################################################################### 93 | # TEMPLATE SUBS 94 | ###################################################################### 95 | sub checkParams { 96 | #----- 97 | # Do any and all options checking here... 98 | # 99 | my @standard_options = ( "help|h+", "inputfile|i:s"); 100 | my %options; 101 | 102 | # Add any other command line options, and the code to handle them 103 | # 104 | GetOptions( \%options, @standard_options ); 105 | 106 | #if no arguments supplied print the usage and exit 107 | # 108 | exec("pod2usage $0") if (0 == (keys (%options) )); 109 | 110 | # If the -help option is set, print the usage and exit 111 | # 112 | exec("pod2usage $0") if $options{'help'}; 113 | 114 | # Compulsosy items 115 | #if(!exists $options{'infile'} ) { print "**ERROR: $0 : \n"; exec("pod2usage $0"); } 116 | 117 | return \%options; 118 | } 119 | 120 | sub overrideDefault 121 | { 122 | #----- 123 | # Set and override default values for parameters 124 | # 125 | my ($default_value, $option_name) = @_; 126 | if(exists $global_options->{$option_name}) 127 | { 128 | return $global_options->{$option_name}; 129 | } 130 | return $default_value; 131 | } 132 | 133 | __DATA__ 134 | 135 | =head1 NAME 136 | 137 | splitpe.fasta.pl 138 | 139 | =head1 COPYRIGHT 140 | 141 | copyright (C) 2012 Mads Albertsen 142 | 143 | This program is free software: you can redistribute it and/or modify 144 | it under the terms of the GNU General Public License as published by 145 | the Free Software Foundation, either version 3 of the License, or 146 | (at your option) any later version. 147 | 148 | This program is distributed in the hope that it will be useful, 149 | but WITHOUT ANY WARRANTY; without even the implied warranty of 150 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 151 | GNU General Public License for more details. 152 | 153 | You should have received a copy of the GNU General Public License 154 | along with this program. If not, see . 155 | 156 | =head1 DESCRIPTION 157 | 158 | Splits a combined paired end fastafile. 159 | 160 | =head1 SYNOPSIS 161 | 162 | script.pl -i [-h] 163 | 164 | [-help -h] Displays this basic usage information 165 | [-inputfile -i] Input compined paried end fasta file. 166 | 167 | =cut -------------------------------------------------------------------------------- /interleave.reads.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | ############################################################################### 3 | # 4 | # scriptname 5 | # 6 | # Short description 7 | # 8 | # Copyright (C) 2012 Mads Albertsen 9 | # 10 | # This program is free software: you can redistribute it and/or modify 11 | # it under the terms of the GNU General Public License as published by 12 | # the Free Software Foundation, either version 3 of the License, or 13 | # (at your option) any later version. 14 | # 15 | # This program is distributed in the hope that it will be useful, 16 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 | # GNU General Public License for more details. 19 | # 20 | # You should have received a copy of the GNU General Public License 21 | # along with this program. If not, see . 22 | # 23 | ############################################################################### 24 | 25 | #pragmas 26 | use strict; 27 | use warnings; 28 | 29 | #core Perl modules 30 | use Getopt::Long; 31 | 32 | #locally-written modules 33 | BEGIN { 34 | select(STDERR); 35 | $| = 1; 36 | select(STDOUT); 37 | $| = 1; 38 | } 39 | 40 | # get input params 41 | my $global_options = checkParams(); 42 | 43 | my $inputfile1; 44 | my $inputfile2; 45 | my $outputfile; 46 | 47 | $inputfile1 = &overrideDefault("inputfile1.txt",'inputfile1'); 48 | $inputfile2 = &overrideDefault("inputfile2.txt",'inputfile2'); 49 | $outputfile = &overrideDefault("outputfile.txt",'outputfile'); 50 | 51 | my @forward; 52 | my @reverse; 53 | my $count = 0; 54 | 55 | ###################################################################### 56 | # CODE HERE 57 | ###################################################################### 58 | 59 | 60 | open(IN1, $inputfile1) or die("Cannot read file: $inputfile1\n"); 61 | open(IN2, $inputfile2) or die("Cannot read file: $inputfile2\n"); 62 | 63 | open(OUT, ">$outputfile") or die("Cannot create file: $outputfile\n"); 64 | 65 | while ( defined(my $line1 = ) and defined(my $line2 = ) ) { 66 | $count++; 67 | chomp($line1); 68 | chomp($line2); 69 | push (@forward, $line1); 70 | push (@reverse, $line2); 71 | if ($count == 4){ 72 | print OUT join("\n",@forward),"\n"; 73 | print OUT join("\n",@reverse),"\n"; 74 | $count = 0; 75 | @forward = (); 76 | @reverse = (); 77 | } 78 | } 79 | 80 | close IN1; 81 | close IN2; 82 | close OUT; 83 | 84 | ###################################################################### 85 | # TEMPLATE SUBS 86 | ###################################################################### 87 | sub checkParams { 88 | #----- 89 | # Do any and all options checking here... 90 | # 91 | my @standard_options = ( "help|h+", "inputfile1|f:s","inputfile2|r:s", "outputfile|o:s"); 92 | my %options; 93 | 94 | # Add any other command line options, and the code to handle them 95 | # 96 | GetOptions( \%options, @standard_options ); 97 | 98 | #if no arguments supplied print the usage and exit 99 | # 100 | exec("pod2usage $0") if (0 == (keys (%options) )); 101 | 102 | # If the -help option is set, print the usage and exit 103 | # 104 | exec("pod2usage $0") if $options{'help'}; 105 | 106 | # Compulsosy items 107 | #if(!exists $options{'infile'} ) { print "**ERROR: $0 : \n"; exec("pod2usage $0"); } 108 | 109 | return \%options; 110 | } 111 | 112 | sub overrideDefault 113 | { 114 | #----- 115 | # Set and override default values for parameters 116 | # 117 | my ($default_value, $option_name) = @_; 118 | if(exists $global_options->{$option_name}) 119 | { 120 | return $global_options->{$option_name}; 121 | } 122 | return $default_value; 123 | } 124 | 125 | __DATA__ 126 | 127 | =head1 NAME 128 | 129 | vprobes.generateprobes.pl 130 | 131 | =head1 COPYRIGHT 132 | 133 | copyright (C) 2012 Mads Albertsen 134 | 135 | This program is free software: you can redistribute it and/or modify 136 | it under the terms of the GNU General Public License as published by 137 | the Free Software Foundation, either version 3 of the License, or 138 | (at your option) any later version. 139 | 140 | This program is distributed in the hope that it will be useful, 141 | but WITHOUT ANY WARRANTY; without even the implied warranty of 142 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 143 | GNU General Public License for more details. 144 | 145 | You should have received a copy of the GNU General Public License 146 | along with this program. If not, see . 147 | 148 | =head1 DESCRIPTION 149 | 150 | 151 | 152 | =head1 SYNOPSIS 153 | 154 | script.pl -i [-h] 155 | 156 | [-help -h] Displays this basic usage information 157 | [-inpufile1 -f] Forward read. 158 | [-inpufile2 -r] Reverse read. 159 | [-outputfile -o] Outputfile. 160 | 161 | =cut -------------------------------------------------------------------------------- /extract.rast.annotation.using.list.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | ############################################################################### 3 | # 4 | # extract.rast.annotation.using.list.pl 5 | # 6 | # Given a list of nodes extracts all parts of the relating graph in a 7 | # cytoscape connection file (nodes in column 0 and 2). 8 | # 9 | # Copyright (C) 2012 Mads Albertsen 10 | # 11 | # This program is free software: you can redistribute it and/or modify 12 | # it under the terms of the GNU General Public License as published by 13 | # the Free Software Foundation, either version 3 of the License, or 14 | # (at your option) any later version. 15 | # 16 | # This program is distributed in the hope that it will be useful, 17 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 18 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19 | # GNU General Public License for more details. 20 | # 21 | # You should have received a copy of the GNU General Public License 22 | # along with this program. If not, see . 23 | # 24 | ############################################################################### 25 | 26 | #pragmas 27 | use strict; 28 | use warnings; 29 | 30 | #core Perl modules 31 | use Getopt::Long; 32 | 33 | #locally-written modules 34 | BEGIN { 35 | select(STDERR); 36 | $| = 1; 37 | select(STDOUT); 38 | $| = 1; 39 | } 40 | 41 | # get input params 42 | my $global_options = checkParams(); 43 | 44 | my $inrast; 45 | my $inlist; 46 | 47 | $inrast = &overrideDefault("inrast.txt",'inrast'); 48 | $inlist = &overrideDefault("inlist.txt",'inlist'); 49 | 50 | my %contigs; 51 | 52 | ###################################################################### 53 | # CODE HERE 54 | ###################################################################### 55 | 56 | 57 | open(INlist, $inlist) or die("Cannot read file: $inlist\n"); 58 | open(INrast, $inrast) or die("Cannot read file: $inrast\n"); 59 | 60 | open(OUTsub, ">$inrast.sub.txt") or die("Cannot create file: $inrast.sub.txt\n"); 61 | print OUTsub "scaffold\tdb.md5.ref\torf.id\tsimilarity\taln.length\te.value\tfunc.annotation\tfunc.category.id\tfunc.hirac.name\n"; 62 | 63 | 64 | while ( my $line = ) { 65 | chomp $line; 66 | $contigs{$line} = 1; 67 | } 68 | 69 | close INlist; 70 | 71 | while ( my $line = ) { 72 | chomp $line; 73 | my @splitline = split("\t",$line); 74 | my @splitline1 = split("_",$splitline[1]); 75 | if (exists($contigs{$splitline1[0]})){ 76 | print OUTsub "$splitline1[0]\t$line\n"; 77 | } 78 | } 79 | 80 | close INrast; 81 | close OUTsub; 82 | 83 | ###################################################################### 84 | # TEMPLATE SUBS 85 | ###################################################################### 86 | sub checkParams { 87 | #----- 88 | # Do any and all options checking here... 89 | # 90 | my @standard_options = ( "help|h+", "inlist|l:s", "inrast|r:s"); 91 | my %options; 92 | 93 | # Add any other command line options, and the code to handle them 94 | # 95 | GetOptions( \%options, @standard_options ); 96 | 97 | #if no arguments supplied print the usage and exit 98 | # 99 | exec("pod2usage $0") if (0 == (keys (%options) )); 100 | 101 | # If the -help option is set, print the usage and exit 102 | # 103 | exec("pod2usage $0") if $options{'help'}; 104 | 105 | # Compulsosy items 106 | #if(!exists $options{'infile'} ) { print "**ERROR: $0 : \n"; exec("pod2usage $0"); } 107 | 108 | return \%options; 109 | } 110 | 111 | sub overrideDefault 112 | { 113 | #----- 114 | # Set and override default values for parameters 115 | # 116 | my ($default_value, $option_name) = @_; 117 | if(exists $global_options->{$option_name}) 118 | { 119 | return $global_options->{$option_name}; 120 | } 121 | return $default_value; 122 | } 123 | 124 | __DATA__ 125 | 126 | =head1 NAME 127 | 128 | vprobes.generateprobes.pl 129 | 130 | =head1 COPYRIGHT 131 | 132 | copyright (C) 2012 Mads Albertsen 133 | 134 | This program is free software: you can redistribute it and/or modify 135 | it under the terms of the GNU General Public License as published by 136 | the Free Software Foundation, either version 3 of the License, or 137 | (at your option) any later version. 138 | 139 | This program is distributed in the hope that it will be useful, 140 | but WITHOUT ANY WARRANTY; without even the implied warranty of 141 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 142 | GNU General Public License for more details. 143 | 144 | You should have received a copy of the GNU General Public License 145 | along with this program. If not, see . 146 | 147 | =head1 DESCRIPTION 148 | 149 | 150 | 151 | =head1 SYNOPSIS 152 | 153 | script.pl -i [-h] 154 | 155 | [-help -h] Displays this basic usage information 156 | [-inlist -l] List of nodes in subgraph to extract. 157 | [-incrast -r] Rast onlogy annotation file. 158 | 159 | =cut -------------------------------------------------------------------------------- /calc.gc.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | ############################################################################### 3 | # 4 | # calc.gc.pl 5 | # 6 | # Calculates gc content in fasta files. 7 | # 8 | # Copyright (C) 2012 Mads Albertsen 9 | # 10 | # This program is free software: you can redistribute it and/or modify 11 | # it under the terms of the GNU General Public License as published by 12 | # the Free Software Foundation, either version 3 of the License, or 13 | # (at your option) any later version. 14 | # 15 | # This program is distributed in the hope that it will be useful, 16 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 | # GNU General Public License for more details. 19 | # 20 | # You should have received a copy of the GNU General Public License 21 | # along with this program. If not, see . 22 | # 23 | ############################################################################### 24 | 25 | #pragmas 26 | use strict; 27 | use warnings; 28 | 29 | #core Perl modules 30 | use Getopt::Long; 31 | 32 | #locally-written modules 33 | BEGIN { 34 | select(STDERR); 35 | $| = 1; 36 | select(STDOUT); 37 | $| = 1; 38 | } 39 | 40 | # get input params 41 | my $global_options = checkParams(); 42 | 43 | my $inputfile; 44 | my $outputfile; 45 | 46 | $inputfile = &overrideDefault("inputfile.fasta",'inputfile'); 47 | $outputfile = &overrideDefault("outputfile.txt",'outputfile'); 48 | 49 | my $line; 50 | my $seq2; 51 | my $dummy = 0; 52 | my @array; 53 | my %counts; 54 | 55 | ###################################################################### 56 | # CODE HERE 57 | ###################################################################### 58 | 59 | 60 | open(IN, $inputfile) or die; 61 | open(OUT, ">$outputfile") or die; 62 | print OUT "contig\tgc\n"; 63 | 64 | while (my $line = ) { 65 | if ($line =~ m/>/) { 66 | chomp $line; 67 | if ($dummy == 1){ 68 | push (@array, "$seq2"); 69 | } 70 | $seq2 = "$line\t"; 71 | $dummy =1; 72 | } 73 | else { 74 | chomp $line; 75 | $seq2 = $seq2.$line; 76 | } 77 | } 78 | push (@array, "$seq2"); #to catch the last sequence 79 | 80 | foreach my $sequence (@array){ 81 | $counts{G} = 0; 82 | $counts{C} = 0; 83 | $counts{A} = 0; 84 | $counts{T} = 0; 85 | my @tseq = split("\t", $sequence); 86 | my @seq = split("", $tseq[1]); 87 | foreach my $nucleotide (@seq) { 88 | $counts{$nucleotide}++; 89 | } 90 | my $gc = ($counts{G}+$counts{C})/($counts{G}+$counts{C}+$counts{A}+$counts{T})*100; 91 | $tseq[0] =~ s/>//; 92 | print OUT "$tseq[0]\t",sprintf("%.2f",$gc),"\n"; 93 | } 94 | 95 | close IN; 96 | close OUT; 97 | exit; 98 | 99 | ###################################################################### 100 | # TEMPLATE SUBS 101 | ###################################################################### 102 | sub checkParams { 103 | #----- 104 | # Do any and all options checking here... 105 | # 106 | my @standard_options = ( "help|h+", "inputfile|i:s", "outputfile|o:s"); 107 | my %options; 108 | 109 | # Add any other command line options, and the code to handle them 110 | # 111 | GetOptions( \%options, @standard_options ); 112 | 113 | #if no arguments supplied print the usage and exit 114 | # 115 | exec("pod2usage $0") if (0 == (keys (%options) )); 116 | 117 | # If the -help option is set, print the usage and exit 118 | # 119 | exec("pod2usage $0") if $options{'help'}; 120 | 121 | # Compulsosy items 122 | #if(!exists $options{'infile'} ) { print "**ERROR: $0 : \n"; exec("pod2usage $0"); } 123 | 124 | return \%options; 125 | } 126 | 127 | sub overrideDefault 128 | { 129 | #----- 130 | # Set and override default values for parameters 131 | # 132 | my ($default_value, $option_name) = @_; 133 | if(exists $global_options->{$option_name}) 134 | { 135 | return $global_options->{$option_name}; 136 | } 137 | return $default_value; 138 | } 139 | 140 | __DATA__ 141 | 142 | =head1 NAME 143 | 144 | calc.gc.pl 145 | 146 | =head1 COPYRIGHT 147 | 148 | copyright (C) 2012 Mads Albertsen 149 | 150 | This program is free software: you can redistribute it and/or modify 151 | it under the terms of the GNU General Public License as published by 152 | the Free Software Foundation, either version 3 of the License, or 153 | (at your option) any later version. 154 | 155 | This program is distributed in the hope that it will be useful, 156 | but WITHOUT ANY WARRANTY; without even the implied warranty of 157 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 158 | GNU General Public License for more details. 159 | 160 | You should have received a copy of the GNU General Public License 161 | along with this program. If not, see . 162 | 163 | =head1 DESCRIPTION 164 | 165 | Calculates gc content in fastafiles. 166 | 167 | =head1 SYNOPSIS 168 | 169 | script.pl -i -o [-h] 170 | 171 | [-help -h] Displays this basic usage information 172 | [-inputfile -i] Input fasta file. 173 | [-outputfile -o] Outputfile. 174 | 175 | =cut -------------------------------------------------------------------------------- /splitpe.fastq.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | ############################################################################### 3 | # 4 | # splitpe.fastq.pl 5 | # 6 | # Splits a combined fastq file. 7 | # 8 | # Copyright (C) 2012 Mads Albertsen 9 | # 10 | # This program is free software: you can redistribute it and/or modify 11 | # it under the terms of the GNU General Public License as published by 12 | # the Free Software Foundation, either version 3 of the License, or 13 | # (at your option) any later version. 14 | # 15 | # This program is distributed in the hope that it will be useful, 16 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 | # GNU General Public License for more details. 19 | # 20 | # You should have received a copy of the GNU General Public License 21 | # along with this program. If not, see . 22 | # 23 | ############################################################################### 24 | 25 | #pragmas 26 | use strict; 27 | use warnings; 28 | 29 | #core Perl modules 30 | use Getopt::Long; 31 | 32 | #locally-written modules 33 | BEGIN { 34 | select(STDERR); 35 | $| = 1; 36 | select(STDOUT); 37 | $| = 1; 38 | } 39 | 40 | # get input params 41 | my $global_options = checkParams(); 42 | 43 | my $inputfile; 44 | 45 | $inputfile = &overrideDefault("inputfile.txt",'inputfile'); 46 | 47 | my $line; 48 | my $readcount; 49 | my $printreadcount = 0; 50 | my $linenr = 0; 51 | 52 | ###################################################################### 53 | # CODE HERE 54 | ###################################################################### 55 | 56 | open(IN, $inputfile) or die("Cannot open $inputfile\n"); 57 | open(OUT, ">p1.fastq") or die("Cannot create p1.fastq\n"); 58 | open(OUT2, ">p2.fastq") or die("Cannot create p2.fastq\n"); 59 | 60 | print "Splitting reads into 2 files..\n"; 61 | while ( $line = ) { 62 | chomp $line; 63 | if ($printreadcount == 1000000) { 64 | $printreadcount = 0; 65 | print "$readcount PE reads split\n"; 66 | } 67 | $linenr++; 68 | if ($linenr == 1){ 69 | print OUT "$line\n"; 70 | $readcount++; 71 | $printreadcount++; 72 | } 73 | if ($linenr == 2 ){ 74 | print OUT "$line\n"; 75 | } 76 | if ($linenr == 3 ){ 77 | print OUT "$line\n"; 78 | } 79 | if ($linenr == 4){ 80 | print OUT "$line\n"; 81 | } 82 | if ($linenr == 5){ 83 | print OUT2 "$line\n"; 84 | } 85 | if ($linenr == 6 ){ 86 | print OUT2 "$line\n"; 87 | } 88 | if ($linenr == 7 ){ 89 | print OUT2 "$line\n"; 90 | } 91 | if ($linenr == 8){ 92 | print OUT2 "$line\n"; 93 | $linenr =0; 94 | } 95 | } 96 | 97 | print "done..\n"; 98 | 99 | close IN; 100 | close OUT; 101 | close OUT2; 102 | 103 | exit; 104 | 105 | ###################################################################### 106 | # TEMPLATE SUBS 107 | ###################################################################### 108 | sub checkParams { 109 | #----- 110 | # Do any and all options checking here... 111 | # 112 | my @standard_options = ( "help|h+", "inputfile|i:s"); 113 | my %options; 114 | 115 | # Add any other command line options, and the code to handle them 116 | # 117 | GetOptions( \%options, @standard_options ); 118 | 119 | #if no arguments supplied print the usage and exit 120 | # 121 | exec("pod2usage $0") if (0 == (keys (%options) )); 122 | 123 | # If the -help option is set, print the usage and exit 124 | # 125 | exec("pod2usage $0") if $options{'help'}; 126 | 127 | # Compulsosy items 128 | #if(!exists $options{'infile'} ) { print "**ERROR: $0 : \n"; exec("pod2usage $0"); } 129 | 130 | return \%options; 131 | } 132 | 133 | sub overrideDefault 134 | { 135 | #----- 136 | # Set and override default values for parameters 137 | # 138 | my ($default_value, $option_name) = @_; 139 | if(exists $global_options->{$option_name}) 140 | { 141 | return $global_options->{$option_name}; 142 | } 143 | return $default_value; 144 | } 145 | 146 | __DATA__ 147 | 148 | =head1 NAME 149 | 150 | splitpe.fastq.pl 151 | 152 | =head1 COPYRIGHT 153 | 154 | copyright (C) 2012 Mads Albertsen 155 | 156 | This program is free software: you can redistribute it and/or modify 157 | it under the terms of the GNU General Public License as published by 158 | the Free Software Foundation, either version 3 of the License, or 159 | (at your option) any later version. 160 | 161 | This program is distributed in the hope that it will be useful, 162 | but WITHOUT ANY WARRANTY; without even the implied warranty of 163 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 164 | GNU General Public License for more details. 165 | 166 | You should have received a copy of the GNU General Public License 167 | along with this program. If not, see . 168 | 169 | =head1 DESCRIPTION 170 | 171 | Splits a merged fastq file. 172 | 173 | =head1 SYNOPSIS 174 | 175 | script.pl -i [-h] 176 | 177 | [-help -h] Displays this basic usage information 178 | [-inputfile -i] Input combined pe fastq file. 179 | 180 | =cut -------------------------------------------------------------------------------- /extract.subset.from.sam.using.list.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | ############################################################################### 3 | # 4 | # extract.subset.from.sam.using.list.pl 5 | # 6 | # Extracts a subset of sequences from a SAM file using a list of references 7 | # 8 | # Copyright (C) 2012 Mads Albertsen 9 | # 10 | # This program is free software: you can redistribute it and/or modify 11 | # it under the terms of the GNU General Public License as published by 12 | # the Free Software Foundation, either version 3 of the License, or 13 | # (at your option) any later version. 14 | # 15 | # This program is distributed in the hope that it will be useful, 16 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 | # GNU General Public License for more details. 19 | # 20 | # You should have received a copy of the GNU General Public License 21 | # along with this program. If not, see . 22 | # 23 | ############################################################################### 24 | 25 | #pragmas 26 | use strict; 27 | use warnings; 28 | 29 | #core Perl modules 30 | use Getopt::Long; 31 | 32 | #locally-written modules 33 | BEGIN { 34 | select(STDERR); 35 | $| = 1; 36 | select(STDOUT); 37 | $| = 1; 38 | } 39 | 40 | # get input params 41 | my $global_options = checkParams(); 42 | 43 | my $inlist; 44 | my $insam; 45 | my $outputfile; 46 | 47 | $inlist = &overrideDefault("inlist.txt",'inlist'); 48 | $insam = &overrideDefault("insam.txt",'insam'); 49 | $outputfile = &overrideDefault("outputfile.txt",'outputfile'); 50 | 51 | my %extract; 52 | 53 | ###################################################################### 54 | # CODE HERE 55 | ###################################################################### 56 | 57 | 58 | open(INlist, $inlist) or die("Cannot read file: $inlist\n"); 59 | 60 | while ( my $line = ) { 61 | chomp $line; 62 | $extract{$line} = 1; 63 | } 64 | 65 | close INlist; 66 | open(OUT, ">$outputfile") or die("Cannot create file: $outputfile\n"); 67 | open(INsam, "$insam") or die("Cannot read file: $insam\n"); 68 | 69 | my $count = 0; 70 | 71 | while ( my $line = ) { 72 | chomp $line; 73 | $count++; 74 | if ($count == 1){ 75 | print OUT "$line\n"; 76 | } 77 | else{ 78 | if ($line =~ m/\@SQ/) { 79 | my @splitline = split("\t",$line); 80 | my @splitline1 = split(":",$splitline[1]); 81 | if (exists($extract{$splitline1[1]})){ 82 | print OUT "$line\n"; 83 | } 84 | } 85 | else{ 86 | my @splitline = split("\t",$line); 87 | if (exists($extract{$splitline[2]})){ 88 | print OUT "$line\n"; 89 | } 90 | } 91 | } 92 | } 93 | 94 | close INsam; 95 | close OUT; 96 | 97 | ###################################################################### 98 | # TEMPLATE SUBS 99 | ###################################################################### 100 | sub checkParams { 101 | #----- 102 | # Do any and all options checking here... 103 | # 104 | my @standard_options = ( "help|h+", "insam|s:s", "inlist|l:s", "outputfile|o:s"); 105 | my %options; 106 | 107 | # Add any other command line options, and the code to handle them 108 | # 109 | GetOptions( \%options, @standard_options ); 110 | 111 | #if no arguments supplied print the usage and exit 112 | # 113 | exec("pod2usage $0") if (0 == (keys (%options) )); 114 | 115 | # If the -help option is set, print the usage and exit 116 | # 117 | exec("pod2usage $0") if $options{'help'}; 118 | 119 | # Compulsosy items 120 | #if(!exists $options{'infile'} ) { print "**ERROR: $0 : \n"; exec("pod2usage $0"); } 121 | 122 | return \%options; 123 | } 124 | 125 | sub overrideDefault 126 | { 127 | #----- 128 | # Set and override default values for parameters 129 | # 130 | my ($default_value, $option_name) = @_; 131 | if(exists $global_options->{$option_name}) 132 | { 133 | return $global_options->{$option_name}; 134 | } 135 | return $default_value; 136 | } 137 | 138 | __DATA__ 139 | 140 | =head1 NAME 141 | 142 | vprobes.generateprobes.pl 143 | 144 | =head1 COPYRIGHT 145 | 146 | copyright (C) 2012 Mads Albertsen 147 | 148 | This program is free software: you can redistribute it and/or modify 149 | it under the terms of the GNU General Public License as published by 150 | the Free Software Foundation, either version 3 of the License, or 151 | (at your option) any later version. 152 | 153 | This program is distributed in the hope that it will be useful, 154 | but WITHOUT ANY WARRANTY; without even the implied warranty of 155 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 156 | GNU General Public License for more details. 157 | 158 | You should have received a copy of the GNU General Public License 159 | along with this program. If not, see . 160 | 161 | =head1 DESCRIPTION 162 | 163 | 164 | 165 | =head1 SYNOPSIS 166 | 167 | script.pl -i [-h] 168 | 169 | [-help -h] Displays this basic usage information 170 | [-insam -s] Input sam file. 171 | [-inlist -l] Input list of rerefence names to extract. 172 | [-outputfile -o] Outputfile. 173 | 174 | =cut -------------------------------------------------------------------------------- /calc.contigs.in.scaffolds.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | ############################################################################### 3 | # 4 | # calc.contigs.in.scaffolds.pl 5 | # 6 | # Calculates the number of contigs in a given scaffold and the number of N's 7 | # 8 | # Copyright (C) 2012 Mads Albertsen 9 | # 10 | # This program is free software: you can redistribute it and/or modify 11 | # it under the terms of the GNU General Public License as published by 12 | # the Free Software Foundation, either version 3 of the License, or 13 | # (at your option) any later version. 14 | # 15 | # This program is distributed in the hope that it will be useful, 16 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 | # GNU General Public License for more details. 19 | # 20 | # You should have received a copy of the GNU General Public License 21 | # along with this program. If not, see . 22 | # 23 | ############################################################################### 24 | 25 | #pragmas 26 | use strict; 27 | use warnings; 28 | 29 | #core Perl modules 30 | use Getopt::Long; 31 | 32 | #locally-written modules 33 | BEGIN { 34 | select(STDERR); 35 | $| = 1; 36 | select(STDOUT); 37 | $| = 1; 38 | } 39 | 40 | # get input params 41 | my $global_options = checkParams(); 42 | 43 | my $inputfile; 44 | my $outputfile; 45 | 46 | $inputfile = &overrideDefault("inputfile.fasta",'inputfile'); 47 | $outputfile = &overrideDefault("outputfile.txt",'outputfile'); 48 | 49 | my $line; 50 | my $seq2; 51 | my $dummy = 0; 52 | my $header; 53 | 54 | ###################################################################### 55 | # CODE HERE 56 | ###################################################################### 57 | 58 | 59 | open(IN, $inputfile) or die; 60 | open(OUT, ">$outputfile") or die; 61 | 62 | 63 | while (my $line = ) { 64 | if ($line =~ m/>/) { 65 | chomp $line; 66 | if ($dummy == 1){ 67 | my @seq = split("", $seq2); 68 | my $Ncount = 0; 69 | my $Ccount = 1; 70 | my $prevnucl = ""; 71 | foreach my $nucl (@seq) { 72 | if ($nucl eq "N"){ 73 | $Ncount++; 74 | if ($nucl ne $prevnucl){ 75 | $Ccount++; 76 | } 77 | } 78 | $prevnucl = $nucl; 79 | } 80 | print OUT "$header\t$Ccount\t$Ncount\n"; 81 | } 82 | $header = "$line"; 83 | $dummy =1; 84 | $seq2 = ""; 85 | } 86 | else { 87 | chomp $line; 88 | $seq2 = $seq2.$line; 89 | } 90 | } 91 | my @seq = split("", $seq2); 92 | my $Ncount = 0; 93 | my $Ccount = 1; 94 | my $prevnucl = ""; 95 | foreach my $nucl (@seq) { 96 | if ($nucl eq "N"){ 97 | $Ncount++; 98 | if ($nucl ne $prevnucl){ 99 | $Ccount++; 100 | } 101 | } 102 | $prevnucl = $nucl; 103 | } 104 | print OUT "$header\t$Ccount\t$Ncount\n"; 105 | 106 | close IN; 107 | close OUT; 108 | exit; 109 | 110 | ###################################################################### 111 | # TEMPLATE SUBS 112 | ###################################################################### 113 | sub checkParams { 114 | #----- 115 | # Do any and all options checking here... 116 | # 117 | my @standard_options = ( "help|h+", "inputfile|i:s", "outputfile|o:s"); 118 | my %options; 119 | 120 | # Add any other command line options, and the code to handle them 121 | # 122 | GetOptions( \%options, @standard_options ); 123 | 124 | #if no arguments supplied print the usage and exit 125 | # 126 | exec("pod2usage $0") if (0 == (keys (%options) )); 127 | 128 | # If the -help option is set, print the usage and exit 129 | # 130 | exec("pod2usage $0") if $options{'help'}; 131 | 132 | # Compulsosy items 133 | #if(!exists $options{'infile'} ) { print "**ERROR: $0 : \n"; exec("pod2usage $0"); } 134 | 135 | return \%options; 136 | } 137 | 138 | sub overrideDefault 139 | { 140 | #----- 141 | # Set and override default values for parameters 142 | # 143 | my ($default_value, $option_name) = @_; 144 | if(exists $global_options->{$option_name}) 145 | { 146 | return $global_options->{$option_name}; 147 | } 148 | return $default_value; 149 | } 150 | 151 | __DATA__ 152 | 153 | =head1 NAME 154 | 155 | calc.gc.pl 156 | 157 | =head1 COPYRIGHT 158 | 159 | copyright (C) 2012 Mads Albertsen 160 | 161 | This program is free software: you can redistribute it and/or modify 162 | it under the terms of the GNU General Public License as published by 163 | the Free Software Foundation, either version 3 of the License, or 164 | (at your option) any later version. 165 | 166 | This program is distributed in the hope that it will be useful, 167 | but WITHOUT ANY WARRANTY; without even the implied warranty of 168 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 169 | GNU General Public License for more details. 170 | 171 | You should have received a copy of the GNU General Public License 172 | along with this program. If not, see . 173 | 174 | =head1 DESCRIPTION 175 | 176 | 177 | =head1 SYNOPSIS 178 | 179 | script.pl -i [-h -o] 180 | 181 | [-help -h] Displays this basic usage information 182 | [-inputfile -i] Input fasta file. 183 | [-outputfile -o] Outputfile. 184 | 185 | =cut -------------------------------------------------------------------------------- /gff.to.table.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | ############################################################################### 3 | # 4 | # scriptname 5 | # 6 | # Short description 7 | # 8 | # Copyright (C) 2012 Mads Albertsen 9 | # 10 | # This program is free software: you can redistribute it and/or modify 11 | # it under the terms of the GNU General Public License as published by 12 | # the Free Software Foundation, either version 3 of the License, or 13 | # (at your option) any later version. 14 | # 15 | # This program is distributed in the hope that it will be useful, 16 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 | # GNU General Public License for more details. 19 | # 20 | # You should have received a copy of the GNU General Public License 21 | # along with this program. If not, see . 22 | # 23 | ############################################################################### 24 | 25 | #pragmas 26 | use strict; 27 | use warnings; 28 | 29 | #core Perl modules 30 | use Getopt::Long; 31 | 32 | #locally-written modules 33 | BEGIN { 34 | select(STDERR); 35 | $| = 1; 36 | select(STDOUT); 37 | $| = 1; 38 | } 39 | 40 | # get input params 41 | my $global_options = checkParams(); 42 | 43 | my $inputfile; 44 | my $outputfile; 45 | 46 | $inputfile = &overrideDefault("inputfile.txt",'inputfile'); 47 | $outputfile = &overrideDefault("outputfile.txt",'outputfile'); 48 | 49 | ###################################################################### 50 | # CODE HERE 51 | ###################################################################### 52 | 53 | 54 | open(IN, $inputfile) or die("Cannot read file: $inputfile\n"); 55 | open(OUT, ">$outputfile") or die("Cannot create file: $outputfile\n"); 56 | 57 | print OUT "Type\tStart\tEnd\tStrand\tTag\tGene\tECnumber\tProduct\n"; 58 | 59 | while ( my $line = ) { 60 | chomp $line; 61 | my @info = split(/\t/, $line); 62 | next if ($info[2] =~ m/Source/); 63 | next if ($info[2] =~ m/Gene/); 64 | 65 | my @details = split(";",$info[8]); 66 | my @locus = split(" ", $details[0]); 67 | 68 | my $gene = "NA"; 69 | if ($info[8] =~ m/ ; gene/){ 70 | my @split1 = split(" ; gene ", $info[8]); 71 | my @split2 = split(" ; ", $split1[1]); 72 | $gene = $split2[0]; 73 | } 74 | 75 | my $ec = "NA"; 76 | if ($info[8] =~ m/ ; EC_number /){ 77 | my @split1 = split(" ; EC_number ", $info[8]); 78 | my @split2 = split(" ; ", $split1[1]); 79 | $ec = $split2[0]; 80 | } 81 | 82 | my $product = "NA"; 83 | if ($info[8] =~ m/ ; product /){ 84 | my @split1 = split(" ; product ", $info[8]); 85 | my @split2 = split(" ; ", $split1[1]); 86 | $product = $split2[0]; 87 | $product =~ s/"//g; 88 | } 89 | 90 | print OUT "$info[2]\t$info[3]\t$info[4]\t$info[6]\t$locus[1]\t$gene\t$ec\t$product\n"; 91 | 92 | } 93 | 94 | close IN; 95 | close OUT; 96 | 97 | ###################################################################### 98 | # TEMPLATE SUBS 99 | ###################################################################### 100 | sub checkParams { 101 | #----- 102 | # Do any and all options checking here... 103 | # 104 | my @standard_options = ( "help|h+", "inputfile|i:s", "outputfile|o:s"); 105 | my %options; 106 | 107 | # Add any other command line options, and the code to handle them 108 | # 109 | GetOptions( \%options, @standard_options ); 110 | 111 | #if no arguments supplied print the usage and exit 112 | # 113 | exec("pod2usage $0") if (0 == (keys (%options) )); 114 | 115 | # If the -help option is set, print the usage and exit 116 | # 117 | exec("pod2usage $0") if $options{'help'}; 118 | 119 | # Compulsosy items 120 | #if(!exists $options{'infile'} ) { print "**ERROR: $0 : \n"; exec("pod2usage $0"); } 121 | 122 | return \%options; 123 | } 124 | 125 | sub overrideDefault 126 | { 127 | #----- 128 | # Set and override default values for parameters 129 | # 130 | my ($default_value, $option_name) = @_; 131 | if(exists $global_options->{$option_name}) 132 | { 133 | return $global_options->{$option_name}; 134 | } 135 | return $default_value; 136 | } 137 | 138 | __DATA__ 139 | 140 | =head1 NAME 141 | 142 | vprobes.generateprobes.pl 143 | 144 | =head1 COPYRIGHT 145 | 146 | copyright (C) 2012 Mads Albertsen 147 | 148 | This program is free software: you can redistribute it and/or modify 149 | it under the terms of the GNU General Public License as published by 150 | the Free Software Foundation, either version 3 of the License, or 151 | (at your option) any later version. 152 | 153 | This program is distributed in the hope that it will be useful, 154 | but WITHOUT ANY WARRANTY; without even the implied warranty of 155 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 156 | GNU General Public License for more details. 157 | 158 | You should have received a copy of the GNU General Public License 159 | along with this program. If not, see . 160 | 161 | =head1 DESCRIPTION 162 | 163 | 164 | 165 | =head1 SYNOPSIS 166 | 167 | script.pl -i [-h] 168 | 169 | [-help -h] Displays this basic usage information 170 | [-inputfile -i] Inputfile. 171 | [-outputfile -o] Outputfile. 172 | 173 | =cut -------------------------------------------------------------------------------- /trim.fastq.length.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | ############################################################################### 3 | # 4 | # trim.fastq.length.pl 5 | # 6 | # Removes short or long sequences 7 | # 8 | # Copyright (C) 2013 Mads Albertsen 9 | # 10 | # This program is free software: you can redistribute it and/or modify 11 | # it under the terms of the GNU General Public License as published by 12 | # the Free Software Foundation, either version 3 of the License, or 13 | # (at your option) any later version. 14 | # 15 | # This program is distributed in the hope that it will be useful, 16 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 | # GNU General Public License for more details. 19 | # 20 | # You should have received a copy of the GNU General Public License 21 | # along with this program. If not, see . 22 | # 23 | ############################################################################### 24 | 25 | #pragmas 26 | use strict; 27 | use warnings; 28 | 29 | #core Perl modules 30 | use Getopt::Long; 31 | 32 | #locally-written modules 33 | BEGIN { 34 | select(STDERR); 35 | $| = 1; 36 | select(STDOUT); 37 | $| = 1; 38 | } 39 | 40 | # get input params 41 | my $global_options = checkParams(); 42 | 43 | my $inputfile; 44 | my $outputfile; 45 | my $minlength; 46 | my $maxlength; 47 | 48 | $inputfile = &overrideDefault("inputfile.txt",'inputfile'); 49 | $outputfile = &overrideDefault("outputfile.txt",'outputfile'); 50 | $minlength = &overrideDefault(1,'minlength'); 51 | $maxlength = &overrideDefault(600,'maxlength'); 52 | 53 | my $line; 54 | my $header; 55 | my $sequence; 56 | my $quality; 57 | my $linenr = 0; 58 | my $inseq = 0; 59 | my $outseq = 0; 60 | 61 | ###################################################################### 62 | # CODE HERE 63 | ###################################################################### 64 | 65 | open(IN, $inputfile) or die("Cannot open $inputfile\n"); 66 | open(OUT, ">$outputfile") or die("Cannot create $outputfile\n"); 67 | 68 | while ( $line = ) { 69 | chomp $line; 70 | $linenr++; 71 | if ($linenr == 1){ 72 | $header = $line; 73 | } 74 | if ($linenr == 2 ){ 75 | $sequence = $line; 76 | } 77 | if ($linenr == 4){ 78 | $linenr = 0; 79 | $inseq++; 80 | if ((length($line) >= $minlength) and (length($line) <= $maxlength)){ 81 | print OUT "$header\n"; 82 | print OUT "$sequence\n"; 83 | print OUT "+\n"; 84 | print OUT "$line\n"; 85 | $outseq++; 86 | } 87 | } 88 | } 89 | 90 | print "$inseq sequences evaluated\n"; 91 | print "$outseq sequences within $minlength bp to $maxlength bp saved\n"; 92 | 93 | close IN; 94 | close OUT; 95 | 96 | exit; 97 | 98 | ###################################################################### 99 | # TEMPLATE SUBS 100 | ###################################################################### 101 | sub checkParams { 102 | #----- 103 | # Do any and all options checking here... 104 | # 105 | my @standard_options = ( "help|h+", "inputfile|i:s", "outputfile|o:s", "minlength|m:s", "maxlength|x:s"); 106 | my %options; 107 | 108 | # Add any other command line options, and the code to handle them 109 | # 110 | GetOptions( \%options, @standard_options ); 111 | 112 | #if no arguments supplied print the usage and exit 113 | # 114 | exec("pod2usage $0") if (0 == (keys (%options) )); 115 | 116 | # If the -help option is set, print the usage and exit 117 | # 118 | exec("pod2usage $0") if $options{'help'}; 119 | 120 | # Compulsosy items 121 | #if(!exists $options{'infile'} ) { print "**ERROR: $0 : \n"; exec("pod2usage $0"); } 122 | 123 | return \%options; 124 | } 125 | 126 | sub overrideDefault 127 | { 128 | #----- 129 | # Set and override default values for parameters 130 | # 131 | my ($default_value, $option_name) = @_; 132 | if(exists $global_options->{$option_name}) 133 | { 134 | return $global_options->{$option_name}; 135 | } 136 | return $default_value; 137 | } 138 | 139 | __DATA__ 140 | 141 | =head1 NAME 142 | 143 | splitpe.fastq.pl 144 | 145 | =head1 COPYRIGHT 146 | 147 | copyright (C) 2012 Mads Albertsen 148 | 149 | This program is free software: you can redistribute it and/or modify 150 | it under the terms of the GNU General Public License as published by 151 | the Free Software Foundation, either version 3 of the License, or 152 | (at your option) any later version. 153 | 154 | This program is distributed in the hope that it will be useful, 155 | but WITHOUT ANY WARRANTY; without even the implied warranty of 156 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 157 | GNU General Public License for more details. 158 | 159 | You should have received a copy of the GNU General Public License 160 | along with this program. If not, see . 161 | 162 | =head1 DESCRIPTION 163 | 164 | Splits a merged fastq file. 165 | 166 | =head1 SYNOPSIS 167 | 168 | script.pl -i [-h] 169 | 170 | [-help -h] Displays this basic usage information 171 | [-inputfile -i] Input fastq file 172 | [-outputfile -o] Output fastq file 173 | [-minlength -m] Minimum sequence length 174 | [-minlength -x] Maximum sequence length 175 | 176 | =cut 177 | -------------------------------------------------------------------------------- /mp.fix.FR.single.to.RF.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | ############################################################################### 3 | # 4 | # mp.fix.FR.single.to.RF.pl 5 | # 6 | # 7 | # Copyright (C) 2013 Mads Albertsen 8 | # 9 | # This program is free software: you can redistribute it and/or modify 10 | # it under the terms of the GNU General Public License as published by 11 | # the Free Software Foundation, either version 3 of the License, or 12 | # (at your option) any later version. 13 | # 14 | # This program is distributed in the hope that it will be useful, 15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | # GNU General Public License for more details. 18 | # 19 | # You should have received a copy of the GNU General Public License 20 | # along with this program. If not, see . 21 | # 22 | ############################################################################### 23 | 24 | #pragmas 25 | use strict; 26 | use warnings; 27 | 28 | #core Perl modules 29 | use Getopt::Long; 30 | 31 | #locally-written modules 32 | BEGIN { 33 | select(STDERR); 34 | $| = 1; 35 | select(STDOUT); 36 | $| = 1; 37 | } 38 | 39 | # get input params 40 | my $global_options = checkParams(); 41 | 42 | my $inputfile; 43 | 44 | $inputfile = &overrideDefault("single.fa",'inputfile'); 45 | 46 | my $line; 47 | my $linenr = 0; 48 | my $header; 49 | my %read1; 50 | my %read2; 51 | my $seq; 52 | 53 | 54 | ###################################################################### 55 | # CODE HERE 56 | ###################################################################### 57 | 58 | open(IN, $inputfile) or die("Cannot open $inputfile\n"); 59 | open(OUT, ">paired.fa") or die("Cannot create paired.fa\n"); 60 | 61 | while ( my $line = ) { 62 | chomp $line; 63 | $linenr++; 64 | if ($line =~ m/>/) { 65 | if ($linenr != 1){ 66 | my @splitline = split(/_/, $header); 67 | if ($header =~ m/_1/) { 68 | $read1{$splitline[0]} = $header."\t".$seq; 69 | } 70 | else{ 71 | $read2{$splitline[0]} = $header."\t".$seq; 72 | } 73 | } 74 | $header = $line; 75 | $seq = ""; 76 | } 77 | else{ 78 | $seq = $seq.$line; 79 | } 80 | } 81 | 82 | my @splitline = split(/_/, $header); 83 | if ($header =~ m/_1/) { 84 | $read1{$splitline[0]} = $header."\t".$seq; 85 | } 86 | else{ 87 | $read2{$splitline[0]} = $header."\t".$seq; 88 | } 89 | 90 | foreach my $key (keys %read1){ 91 | if (exists($read2{$key})){ 92 | my @split1 = split(/\t/,$read1{$key}); 93 | $split1[1] = reverse($split1[1]); 94 | $split1[1] =~ tr/ACGTacgt/TGCAtgca/; 95 | print OUT "$split1[0]\n$split1[1]\n"; 96 | my @split2 = split(/\t/,$read2{$key}); 97 | $split2[1] = reverse($split2[1]); 98 | $split2[1] =~ tr/ACGTacgt/TGCAtgca/; 99 | print OUT "$split2[0]\n$split2[1]\n"; 100 | } 101 | } 102 | 103 | 104 | close IN; 105 | close OUT; 106 | 107 | exit; 108 | 109 | ###################################################################### 110 | # TEMPLATE SUBS 111 | ###################################################################### 112 | sub checkParams { 113 | #----- 114 | # Do any and all options checking here... 115 | # 116 | my @standard_options = ( "help|h+", "inputfile|i:s"); 117 | my %options; 118 | 119 | # Add any other command line options, and the code to handle them 120 | # 121 | GetOptions( \%options, @standard_options ); 122 | 123 | #if no arguments supplied print the usage and exit 124 | # 125 | exec("pod2usage $0") if (0 == (keys (%options) )); 126 | 127 | # If the -help option is set, print the usage and exit 128 | # 129 | exec("pod2usage $0") if $options{'help'}; 130 | 131 | # Compulsosy items 132 | #if(!exists $options{'infile'} ) { print "**ERROR: $0 : \n"; exec("pod2usage $0"); } 133 | 134 | return \%options; 135 | } 136 | 137 | sub overrideDefault 138 | { 139 | #----- 140 | # Set and override default values for parameters 141 | # 142 | my ($default_value, $option_name) = @_; 143 | if(exists $global_options->{$option_name}) 144 | { 145 | return $global_options->{$option_name}; 146 | } 147 | return $default_value; 148 | } 149 | 150 | __DATA__ 151 | 152 | =head1 NAME 153 | 154 | mp.fix.FR.single.to.RF.pl 155 | 156 | =head1 COPYRIGHT 157 | 158 | copyright (C) 2012 Mads Albertsen 159 | 160 | This program is free software: you can redistribute it and/or modify 161 | it under the terms of the GNU General Public License as published by 162 | the Free Software Foundation, either version 3 of the License, or 163 | (at your option) any later version. 164 | 165 | This program is distributed in the hope that it will be useful, 166 | but WITHOUT ANY WARRANTY; without even the implied warranty of 167 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 168 | GNU General Public License for more details. 169 | 170 | You should have received a copy of the GNU General Public License 171 | along with this program. If not, see . 172 | 173 | =head1 DESCRIPTION 174 | 175 | Splits a merged fastq file. 176 | 177 | =head1 SYNOPSIS 178 | 179 | script.pl -i [-h] 180 | 181 | [-help -h] Displays this basic usage information 182 | [-inputfile -i] Input combined pe fastq file. 183 | 184 | =cut 185 | -------------------------------------------------------------------------------- /esom.classified.to.contigs.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | ############################################################################### 3 | # 4 | # esom.classified.to.contigs.pl 5 | # 6 | # Renames sub.contigs from esom using a name file that links the original 7 | # contig name to the esom name. 8 | # 9 | # ToDo: split the original contig file based on majority assignments or 10 | # just add bins as 3.5 when the sub.contigs are in bin 3 and 5 11 | # 12 | # Copyright (C) 2012 Mads Albertsen 13 | # 14 | # This program is free software: you can redistribute it and/or modify 15 | # it under the terms of the GNU General Public License as published by 16 | # the Free Software Foundation, either version 3 of the License, or 17 | # (at your option) any later version. 18 | # 19 | # This program is distributed in the hope that it will be useful, 20 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 21 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 22 | # GNU General Public License for more details. 23 | # 24 | # You should have received a copy of the GNU General Public License 25 | # along with this program. If not, see . 26 | # 27 | ############################################################################### 28 | 29 | #pragmas 30 | use strict; 31 | use warnings; 32 | 33 | #core Perl modules 34 | use Getopt::Long; 35 | 36 | #locally-written modules 37 | BEGIN { 38 | select(STDERR); 39 | $| = 1; 40 | select(STDOUT); 41 | $| = 1; 42 | } 43 | 44 | # get input params 45 | my $global_options = checkParams(); 46 | 47 | my $inclass; 48 | my $innames; 49 | my $outputfile; 50 | 51 | $inclass = &overrideDefault("inclass.cls",'inclass'); 52 | $innames = &overrideDefault("innames.txt",'innames'); 53 | $outputfile = &overrideDefault("outputfile.txt",'outputfile'); 54 | 55 | my %class; 56 | 57 | ###################################################################### 58 | # CODE HERE 59 | ###################################################################### 60 | 61 | 62 | open(INclass, $inclass) or die("Cannot read file: $inclass\n"); 63 | 64 | while ( my $line = ) { 65 | chomp $line; 66 | if ($line !~ m/\%/) { 67 | my @splitline = split(/\t/,$line); 68 | $class{$splitline[0]} = $splitline[1]; 69 | } 70 | } 71 | close INclass; 72 | 73 | open(OUT, ">$outputfile") or die("Cannot create file: $outputfile\n"); 74 | print OUT "contig\tbin\tesom.name\n"; 75 | open(INnames, $innames) or die("Cannot read file: $innames\n"); 76 | 77 | while ( my $line = ) { 78 | chomp $line; 79 | my @splitline = split(/\t/,$line); 80 | my @splitline1 = split(/_/,$splitline[1]); 81 | if (exists($class{$splitline1[0]})){ 82 | print OUT "$splitline[1]\t$class{$splitline1[0]}\t$splitline[0]\n"; 83 | } 84 | else{ 85 | print "Couldn not find any class for $splitline[0] from the entry $line.\n"; 86 | } 87 | } 88 | close INnames; 89 | 90 | close OUT; 91 | 92 | 93 | ###################################################################### 94 | # TEMPLATE SUBS 95 | ###################################################################### 96 | sub checkParams { 97 | #----- 98 | # Do any and all options checking here... 99 | # 100 | my @standard_options = ( "help|h+", "inclass|c:s", "innames|n:s", "outputfile|o:s"); 101 | my %options; 102 | 103 | # Add any other command line options, and the code to handle them 104 | # 105 | GetOptions( \%options, @standard_options ); 106 | 107 | #if no arguments supplied print the usage and exit 108 | # 109 | exec("pod2usage $0") if (0 == (keys (%options) )); 110 | 111 | # If the -help option is set, print the usage and exit 112 | # 113 | exec("pod2usage $0") if $options{'help'}; 114 | 115 | # Compulsosy items 116 | #if(!exists $options{'infile'} ) { print "**ERROR: $0 : \n"; exec("pod2usage $0"); } 117 | 118 | return \%options; 119 | } 120 | 121 | sub overrideDefault 122 | { 123 | #----- 124 | # Set and override default values for parameters 125 | # 126 | my ($default_value, $option_name) = @_; 127 | if(exists $global_options->{$option_name}) 128 | { 129 | return $global_options->{$option_name}; 130 | } 131 | return $default_value; 132 | } 133 | 134 | __DATA__ 135 | 136 | =head1 NAME 137 | 138 | vprobes.generateprobes.pl 139 | 140 | =head1 COPYRIGHT 141 | 142 | copyright (C) 2012 Mads Albertsen 143 | 144 | This program is free software: you can redistribute it and/or modify 145 | it under the terms of the GNU General Public License as published by 146 | the Free Software Foundation, either version 3 of the License, or 147 | (at your option) any later version. 148 | 149 | This program is distributed in the hope that it will be useful, 150 | but WITHOUT ANY WARRANTY; without even the implied warranty of 151 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 152 | GNU General Public License for more details. 153 | 154 | You should have received a copy of the GNU General Public License 155 | along with this program. If not, see . 156 | 157 | =head1 DESCRIPTION 158 | 159 | 160 | 161 | =head1 SYNOPSIS 162 | 163 | [-help -h] Displays this basic usage information 164 | [-inclass -c] Class file. 165 | [-innames -n] Names file. 166 | [-outputfile -o] Outputfile. 167 | 168 | =cut -------------------------------------------------------------------------------- /pb.to.bp.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | ############################################################################### 3 | # 4 | # pb.to.mp.pl 5 | # 6 | # Converts PB data to convential MP data 7 | # 8 | # Copyright (C) 2014 Mads Albertsen 9 | # 10 | # This program is free software: you can redistribute it and/or modify 11 | # it under the terms of the GNU General Public License as published by 12 | # the Free Software Foundation, either version 3 of the License, or 13 | # (at your option) any later version. 14 | # 15 | # This program is distributed in the hope that it will be useful, 16 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 | # GNU General Public License for more details. 19 | # 20 | # You should have received a copy of the GNU General Public License 21 | # along with this program. If not, see . 22 | # 23 | ############################################################################### 24 | 25 | #pragmas 26 | use strict; 27 | use warnings; 28 | 29 | #core Perl modules 30 | use Getopt::Long; 31 | 32 | #locally-written modules 33 | BEGIN { 34 | select(STDERR); 35 | $| = 1; 36 | select(STDOUT); 37 | $| = 1; 38 | } 39 | 40 | # get input params 41 | my $global_options = checkParams(); 42 | 43 | my $inputfile; 44 | my $outputfile; 45 | my $minlength; 46 | 47 | $inputfile = &overrideDefault("inputfile.txt",'inputfile'); 48 | $outputfile = &overrideDefault("outputfile.txt",'outputfile'); 49 | $minlength = &overrideDefault(2000,'minlength'); 50 | 51 | my $header = ""; 52 | my $seq = ""; 53 | my $count = 0; 54 | 55 | ###################################################################### 56 | # CODE HERE 57 | ###################################################################### 58 | 59 | 60 | open(IN, $inputfile) or die("Cannot read file: $inputfile\n"); 61 | open(OUT, ">$outputfile") or die("Cannot create file: $outputfile\n"); 62 | 63 | while ( my $line = ) { 64 | chomp $line; 65 | #$line =~ s/\r//g; 66 | if ($line =~ m/>/) { 67 | if ($seq ne "" and length($seq) >= $minlength) { 68 | $count++; 69 | my $f = substr($seq, 0, 100); 70 | my $r = substr($seq, length($seq)-100, length($seq)); 71 | my $revcomp = reverse($r); 72 | $revcomp =~ tr/ACGTacgt/TGCAtgca/; 73 | 74 | print OUT ">".$count."_1\n"; 75 | print OUT "$f\n"; 76 | print OUT ">".$count."_2\n"; 77 | print OUT "$revcomp\n"; 78 | } 79 | $seq = ""; 80 | } 81 | else{ 82 | $seq = $seq.$line; 83 | } 84 | } 85 | 86 | if (length($seq) >= $minlength) { 87 | $count++; 88 | my $f = substr($seq, 0, 100); 89 | my $r = substr($seq, length($seq)-100, length($seq)); 90 | my $revcomp = reverse($r); 91 | $revcomp =~ tr/ACGTacgt/TGCAtgca/; 92 | 93 | print OUT ">".$count."_1\n"; 94 | print OUT "$f\n"; 95 | print OUT ">".$count."_2\n"; 96 | print OUT "$revcomp\n"; 97 | } 98 | 99 | close IN; 100 | close OUT; 101 | 102 | ###################################################################### 103 | # TEMPLATE SUBS 104 | ###################################################################### 105 | sub checkParams { 106 | #----- 107 | # Do any and all options checking here... 108 | # 109 | my @standard_options = ( "help|h+", "inputfile|i:s", "outputfile|o:s", "minlength|m:s"); 110 | my %options; 111 | 112 | # Add any other command line options, and the code to handle them 113 | # 114 | GetOptions( \%options, @standard_options ); 115 | 116 | #if no arguments supplied print the usage and exit 117 | # 118 | exec("pod2usage $0") if (0 == (keys (%options) )); 119 | 120 | # If the -help option is set, print the usage and exit 121 | # 122 | exec("pod2usage $0") if $options{'help'}; 123 | 124 | # Compulsosy items 125 | #if(!exists $options{'infile'} ) { print "**ERROR: $0 : \n"; exec("pod2usage $0"); } 126 | 127 | return \%options; 128 | } 129 | 130 | sub overrideDefault 131 | { 132 | #----- 133 | # Set and override default values for parameters 134 | # 135 | my ($default_value, $option_name) = @_; 136 | if(exists $global_options->{$option_name}) 137 | { 138 | return $global_options->{$option_name}; 139 | } 140 | return $default_value; 141 | } 142 | 143 | __DATA__ 144 | 145 | =head1 NAME 146 | 147 | pb.to.mp.pl 148 | 149 | =head1 COPYRIGHT 150 | 151 | copyright (C) 2014 Mads Albertsen 152 | 153 | This program is free software: you can redistribute it and/or modify 154 | it under the terms of the GNU General Public License as published by 155 | the Free Software Foundation, either version 3 of the License, or 156 | (at your option) any later version. 157 | 158 | This program is distributed in the hope that it will be useful, 159 | but WITHOUT ANY WARRANTY; without even the implied warranty of 160 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 161 | GNU General Public License for more details. 162 | 163 | You should have received a copy of the GNU General Public License 164 | along with this program. If not, see . 165 | 166 | =head1 DESCRIPTION 167 | 168 | 169 | 170 | =head1 SYNOPSIS 171 | 172 | script.pl -i [-h] 173 | 174 | [-help -h] Displays this basic usage information 175 | [-inputfile -i] Inputfile 176 | [-outputfile -o] Outputfile 177 | [-minlength -m] Minumum length 178 | 179 | =cut -------------------------------------------------------------------------------- /sfr.mgrast.extract.using.list.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | ############################################################################### 3 | # 4 | # extract.from rast.using.list.pl 5 | # 6 | # Extracts a subset of seauences given a list of the headers to extract. 7 | # 8 | # Copyright (C) 2012 Mads Albertsen 9 | # 10 | # This program is free software: you can redistribute it and/or modify 11 | # it under the terms of the GNU General Public License as published by 12 | # the Free Software Foundation, either version 3 of the License, or 13 | # (at your option) any later version. 14 | # 15 | # This program is distributed in the hope that it will be useful, 16 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 | # GNU General Public License for more details. 19 | # 20 | # You should have received a copy of the GNU General Public License 21 | # along with this program. If not, see . 22 | # 23 | ############################################################################### 24 | 25 | #pragmas 26 | use strict; 27 | use warnings; 28 | 29 | #core Perl modules 30 | use Getopt::Long; 31 | 32 | #locally-written modules 33 | BEGIN { 34 | select(STDERR); 35 | $| = 1; 36 | select(STDOUT); 37 | $| = 1; 38 | } 39 | 40 | # get input params 41 | my $global_options = checkParams(); 42 | 43 | my $inlist; 44 | my $insequences; 45 | my $outputfile; 46 | 47 | $inlist = &overrideDefault("linlist.txt",'inlist'); 48 | $insequences = &overrideDefault("sequences.fa",'insequences'); 49 | $outputfile = &overrideDefault("outputfile.txt",'outputfile'); 50 | 51 | my $countp = 0; 52 | my $countout = 0; 53 | my $count = 0; 54 | my $seq; 55 | my $header = ">start[test]"; 56 | my $prevheader = ">start[test]"; 57 | my %extract; 58 | 59 | 60 | ###################################################################### 61 | # CODE HERE 62 | ###################################################################### 63 | 64 | 65 | open(INlist, $inlist) or die("Cannot read file: $inlist\n"); 66 | 67 | while ( my $line = ) { 68 | chomp $line; 69 | if (!exists($extract{$line})){ 70 | $extract{$line} = 1; 71 | $count++; 72 | } 73 | } 74 | close INlist; 75 | 76 | print "$count sequences to extract.\n"; 77 | 78 | 79 | open(INseq, $insequences) or die("Cannot read file: $insequences\n"); 80 | open(OUT, ">$insequences.subset.fa") or die("Cannot create file: $insequences.subset.fa\n"); 81 | 82 | while ( my $line = ) { 83 | chomp $line; 84 | my @splitline = split(/\t/,$line); 85 | my @splitline1 = split(/_/,$splitline[1]); 86 | if(exists($extract{$splitline1[0]})){ 87 | print OUT "$line\n"; 88 | $countout++; 89 | } 90 | $countp++; 91 | 92 | } 93 | 94 | print "$countp sequences in total.\n"; 95 | print "$countout sequences extracted.\n"; 96 | 97 | close OUT; 98 | close INseq; 99 | 100 | ###################################################################### 101 | # TEMPLATE SUBS 102 | ###################################################################### 103 | sub checkParams { 104 | #----- 105 | # Do any and all options checking here... 106 | # 107 | my @standard_options = ( "help|h+", "insequences|s:s", "inlist|l:s", "outputfile|o:s"); 108 | my %options; 109 | 110 | # Add any other command line options, and the code to handle them 111 | # 112 | GetOptions( \%options, @standard_options ); 113 | 114 | #if no arguments supplied print the usage and exit 115 | # 116 | exec("pod2usage $0") if (0 == (keys (%options) )); 117 | 118 | # If the -help option is set, print the usage and exit 119 | # 120 | exec("pod2usage $0") if $options{'help'}; 121 | 122 | # Compulsosy items 123 | #if(!exists $options{'infile'} ) { print "**ERROR: $0 : \n"; exec("pod2usage $0"); } 124 | 125 | return \%options; 126 | } 127 | 128 | sub overrideDefault 129 | { 130 | #----- 131 | # Set and override default values for parameters 132 | # 133 | my ($default_value, $option_name) = @_; 134 | if(exists $global_options->{$option_name}) 135 | { 136 | return $global_options->{$option_name}; 137 | } 138 | return $default_value; 139 | } 140 | 141 | __DATA__ 142 | 143 | =head1 NAME 144 | 145 | extract.using.header.list.pl 146 | 147 | =head1 COPYRIGHT 148 | 149 | copyright (C) 2012 Mads Albertsen 150 | 151 | This program is free software: you can redistribute it and/or modify 152 | it under the terms of the GNU General Public License as published by 153 | the Free Software Foundation, either version 3 of the License, or 154 | (at your option) any later version. 155 | 156 | This program is distributed in the hope that it will be useful, 157 | but WITHOUT ANY WARRANTY; without even the implied warranty of 158 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 159 | GNU General Public License for more details. 160 | 161 | You should have received a copy of the GNU General Public License 162 | along with this program. If not, see . 163 | 164 | =head1 DESCRIPTION 165 | 166 | 167 | 168 | =head1 SYNOPSIS 169 | 170 | script.pl -i [-h] 171 | 172 | [-help -h] Displays this basic usage information 173 | [-inlist -l] List of headers to use for extraction. 174 | [-insequences -s] Sequence file where a subset of sequences are to be extracted from. 175 | [-outputfile -o] Outputfile. 176 | 177 | =cut -------------------------------------------------------------------------------- /gff.to.table.v2.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | ############################################################################### 3 | # 4 | # scriptname 5 | # 6 | # Short description 7 | # 8 | # Copyright (C) 2012 Mads Albertsen 9 | # 10 | # This program is free software: you can redistribute it and/or modify 11 | # it under the terms of the GNU General Public License as published by 12 | # the Free Software Foundation, either version 3 of the License, or 13 | # (at your option) any later version. 14 | # 15 | # This program is distributed in the hope that it will be useful, 16 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 | # GNU General Public License for more details. 19 | # 20 | # You should have received a copy of the GNU General Public License 21 | # along with this program. If not, see . 22 | # 23 | ############################################################################### 24 | 25 | #pragmas 26 | use strict; 27 | use warnings; 28 | 29 | #core Perl modules 30 | use Getopt::Long; 31 | 32 | #locally-written modules 33 | BEGIN { 34 | select(STDERR); 35 | $| = 1; 36 | select(STDOUT); 37 | $| = 1; 38 | } 39 | 40 | # get input params 41 | my $global_options = checkParams(); 42 | 43 | my $inputfile; 44 | my $outputfile; 45 | 46 | $inputfile = &overrideDefault("inputfile.txt",'inputfile'); 47 | $outputfile = &overrideDefault("outputfile.txt",'outputfile'); 48 | 49 | ###################################################################### 50 | # CODE HERE 51 | ###################################################################### 52 | 53 | 54 | open(IN, $inputfile) or die("Cannot read file: $inputfile\n"); 55 | open(OUT, ">$outputfile") or die("Cannot create file: $outputfile\n"); 56 | 57 | print OUT "Type\tStart\tEnd\tStrand\tTag\tGene\tECnumber\tProduct\n"; 58 | 59 | while ( my $line = ) { 60 | chomp $line; 61 | my @info = split(/\t/, $line); 62 | next if ($info[2] =~ m/Source/); 63 | next if ($info[2] =~ m/Gene/); 64 | 65 | my @details = split(";",$info[8]); 66 | 67 | my $locus = "NA"; 68 | my $gene = "NA"; 69 | if ($details[0] =~ m/locus/){ 70 | my @details2 = split(" ", $details[0]); 71 | $locus = $details2[1]; 72 | } 73 | else{ 74 | my @split1 = split(" ; locus_tag ", $info[8]); 75 | my @split2 = split(" ; ", $split1[1]); 76 | $locus = $split2[0]; 77 | my @details2 = split(" ", $details[0]); 78 | $gene = $details2[1]; 79 | } 80 | 81 | my $ec = "NA"; 82 | if ($info[8] =~ m/ ; EC_number /){ 83 | my @split1 = split(" ; EC_number ", $info[8]); 84 | my @split2 = split(" ; ", $split1[1]); 85 | $ec = $split2[0]; 86 | } 87 | 88 | my $product = "NA"; 89 | if ($info[8] =~ m/ ; product /){ 90 | my @split1 = split(" ; product ", $info[8]); 91 | my @split2 = split(" ; ", $split1[1]); 92 | $product = $split2[0]; 93 | $product =~ s/"//g; 94 | } 95 | 96 | print OUT "$info[2]\t$info[3]\t$info[4]\t$info[6]\t$locus\t$gene\t$ec\t$product\n"; 97 | 98 | } 99 | 100 | close IN; 101 | close OUT; 102 | 103 | ###################################################################### 104 | # TEMPLATE SUBS 105 | ###################################################################### 106 | sub checkParams { 107 | #----- 108 | # Do any and all options checking here... 109 | # 110 | my @standard_options = ( "help|h+", "inputfile|i:s", "outputfile|o:s"); 111 | my %options; 112 | 113 | # Add any other command line options, and the code to handle them 114 | # 115 | GetOptions( \%options, @standard_options ); 116 | 117 | #if no arguments supplied print the usage and exit 118 | # 119 | exec("pod2usage $0") if (0 == (keys (%options) )); 120 | 121 | # If the -help option is set, print the usage and exit 122 | # 123 | exec("pod2usage $0") if $options{'help'}; 124 | 125 | # Compulsosy items 126 | #if(!exists $options{'infile'} ) { print "**ERROR: $0 : \n"; exec("pod2usage $0"); } 127 | 128 | return \%options; 129 | } 130 | 131 | sub overrideDefault 132 | { 133 | #----- 134 | # Set and override default values for parameters 135 | # 136 | my ($default_value, $option_name) = @_; 137 | if(exists $global_options->{$option_name}) 138 | { 139 | return $global_options->{$option_name}; 140 | } 141 | return $default_value; 142 | } 143 | 144 | __DATA__ 145 | 146 | =head1 NAME 147 | 148 | vprobes.generateprobes.pl 149 | 150 | =head1 COPYRIGHT 151 | 152 | copyright (C) 2012 Mads Albertsen 153 | 154 | This program is free software: you can redistribute it and/or modify 155 | it under the terms of the GNU General Public License as published by 156 | the Free Software Foundation, either version 3 of the License, or 157 | (at your option) any later version. 158 | 159 | This program is distributed in the hope that it will be useful, 160 | but WITHOUT ANY WARRANTY; without even the implied warranty of 161 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 162 | GNU General Public License for more details. 163 | 164 | You should have received a copy of the GNU General Public License 165 | along with this program. If not, see . 166 | 167 | =head1 DESCRIPTION 168 | 169 | 170 | 171 | =head1 SYNOPSIS 172 | 173 | script.pl -i [-h] 174 | 175 | [-help -h] Displays this basic usage information 176 | [-inputfile -i] Inputfile. 177 | [-outputfile -o] Outputfile. 178 | 179 | =cut 180 | -------------------------------------------------------------------------------- /sfr.ec.name.to.ec.number.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | ############################################################################### 3 | # 4 | # ec.name.to.ec.number.pl 5 | # 6 | # Adds the EC number description to ec numbers. Uses a formated list made 7 | # using ec.format.db.pl and all.ec.txt file made using the script 8 | # mgrast.to.ShotgunFunctionalizeR.pl or in excel... 9 | # 10 | # Copyright (C) 2012 Mads Albertsen 11 | # 12 | # This program is free software: you can redistribute it and/or modify 13 | # it under the terms of the GNU General Public License as published by 14 | # the Free Software Foundation, either version 3 of the License, or 15 | # (at your option) any later version. 16 | # 17 | # This program is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 20 | # GNU General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU General Public License 23 | # along with this program. If not, see . 24 | # 25 | ############################################################################### 26 | 27 | #pragmas 28 | use strict; 29 | use warnings; 30 | 31 | #core Perl modules 32 | use Getopt::Long; 33 | 34 | #locally-written modules 35 | BEGIN { 36 | select(STDERR); 37 | $| = 1; 38 | select(STDOUT); 39 | $| = 1; 40 | } 41 | 42 | # get input params 43 | my $global_options = checkParams(); 44 | 45 | my $inputfile; 46 | my $inec; 47 | my $outputfile; 48 | my $id; 49 | my %ec; 50 | 51 | $inputfile = &overrideDefault("inputfile.txt",'inputfile'); 52 | $inec = &overrideDefault("inec.txt",'inec'); 53 | $outputfile = &overrideDefault("outputfile.txt",'outputfile'); 54 | 55 | ###################################################################### 56 | # CODE HERE 57 | ###################################################################### 58 | 59 | 60 | open(IN, $inputfile) or die("Cannot read file: $inputfile\n"); 61 | open(INec, $inec) or die("Cannot read file: $inputfile\n"); 62 | open(OUT, ">$outputfile") or die("Cannot create file: $outputfile\n"); 63 | 64 | while ( my $line = ) { 65 | chomp $line; 66 | my @splitline = split(/\t/,$line); 67 | my $ecn = "EC".$splitline[0]; #To match it with the other file for ShotgunFunctionalizeR 68 | $ecn =~ s/ //g; 69 | $ec{$ecn} = $splitline[1]; 70 | } 71 | 72 | while ( my $line = ) { 73 | chomp $line; 74 | my @splitline = split(/\t/,$line); 75 | my @splitline1 = split(/ /,$splitline[0]); 76 | my $count = 0; 77 | while ($splitline1[0] =~ m/\./g){$count++;} 78 | if ($count == 1){ 79 | $splitline1[0] = $splitline1[0].".-.-"; 80 | } 81 | if ($count == 2){ 82 | $splitline1[0] = $splitline1[0].".-"; 83 | } 84 | if (exists($ec{$splitline1[0]})){ 85 | $splitline[-1] = $ec{$splitline1[0]}; 86 | $splitline[0] = $splitline1[0]; 87 | print OUT join("\t", @splitline), "\n"; 88 | } 89 | else{ 90 | print OUT "$line\n"; 91 | } 92 | } 93 | 94 | close IN; 95 | close INec; 96 | close OUT; 97 | 98 | ###################################################################### 99 | # TEMPLATE SUBS 100 | ###################################################################### 101 | sub checkParams { 102 | #----- 103 | # Do any and all options checking here... 104 | # 105 | my @standard_options = ( "help|h+", "inputfile|i:s", "outputfile|o:s", "inec|e:s"); 106 | my %options; 107 | 108 | # Add any other command line options, and the code to handle them 109 | # 110 | GetOptions( \%options, @standard_options ); 111 | 112 | #if no arguments supplied print the usage and exit 113 | # 114 | exec("pod2usage $0") if (0 == (keys (%options) )); 115 | 116 | # If the -help option is set, print the usage and exit 117 | # 118 | exec("pod2usage $0") if $options{'help'}; 119 | 120 | # Compulsosy items 121 | #if(!exists $options{'infile'} ) { print "**ERROR: $0 : \n"; exec("pod2usage $0"); } 122 | 123 | return \%options; 124 | } 125 | 126 | sub overrideDefault 127 | { 128 | #----- 129 | # Set and override default values for parameters 130 | # 131 | my ($default_value, $option_name) = @_; 132 | if(exists $global_options->{$option_name}) 133 | { 134 | return $global_options->{$option_name}; 135 | } 136 | return $default_value; 137 | } 138 | 139 | __DATA__ 140 | 141 | =head1 NAME 142 | 143 | vprobes.generateprobes.pl 144 | 145 | =head1 COPYRIGHT 146 | 147 | copyright (C) 2012 Mads Albertsen 148 | 149 | This program is free software: you can redistribute it and/or modify 150 | it under the terms of the GNU General Public License as published by 151 | the Free Software Foundation, either version 3 of the License, or 152 | (at your option) any later version. 153 | 154 | This program is distributed in the hope that it will be useful, 155 | but WITHOUT ANY WARRANTY; without even the implied warranty of 156 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 157 | GNU General Public License for more details. 158 | 159 | You should have received a copy of the GNU General Public License 160 | along with this program. If not, see . 161 | 162 | =head1 DESCRIPTION 163 | 164 | 165 | 166 | =head1 SYNOPSIS 167 | 168 | script.pl -i [-h] 169 | 170 | [-help -h] Displays this basic usage information 171 | [-inputfile -i] all.ec.txt file. 172 | [-outputfile -o] Outputfile. 173 | [-inec -e] Formated ec name file. 174 | 175 | =cut -------------------------------------------------------------------------------- /sfr.ec.format.db.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | ############################################################################### 3 | # 4 | # ec.format.db.pl 5 | # 6 | # Fast formatting of enzyme nomeclature to tab format.. 7 | # The enzyme.dat and enzclass.txt database was found here : 8 | # ftp://ftp.ebi.ac.uk/pub/databases/intenz/enzyme 9 | # 10 | # Copyright (C) 2012 Mads Albertsen 11 | # 12 | # This program is free software: you can redistribute it and/or modify 13 | # it under the terms of the GNU General Public License as published by 14 | # the Free Software Foundation, either version 3 of the License, or 15 | # (at your option) any later version. 16 | # 17 | # This program is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 20 | # GNU General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU General Public License 23 | # along with this program. If not, see . 24 | # 25 | ############################################################################### 26 | 27 | #pragmas 28 | use strict; 29 | use warnings; 30 | 31 | #core Perl modules 32 | use Getopt::Long; 33 | 34 | #locally-written modules 35 | BEGIN { 36 | select(STDERR); 37 | $| = 1; 38 | select(STDOUT); 39 | $| = 1; 40 | } 41 | 42 | # get input params 43 | my $global_options = checkParams(); 44 | 45 | my $inputfile; 46 | my $inclass; 47 | my $outputfile; 48 | my $id; 49 | my $lastone; 50 | my $lasttwo; 51 | 52 | $inputfile = &overrideDefault("inputfile.txt",'inputfile'); 53 | $inclass = &overrideDefault("inclass.txt",'inclass'); 54 | $outputfile = &overrideDefault("outputfile.txt",'outputfile'); 55 | 56 | ###################################################################### 57 | # CODE HERE 58 | ###################################################################### 59 | 60 | 61 | open(IN, $inputfile) or die("Cannot read file: $inputfile\n"); 62 | open(INclass, $inclass) or die("Cannot read file: $inclass\n"); 63 | open(OUT, ">$outputfile") or die("Cannot create file: $outputfile\n"); 64 | 65 | while ( my $line = ) { 66 | chomp $line; 67 | my @splitline = split(/ /,$line); 68 | if ($splitline[0] eq "ID"){ 69 | $id = $splitline[1]; 70 | } 71 | if (($splitline[0] eq "DE") and $id ne ""){ 72 | print OUT "$id\t$splitline[1]\n"; 73 | $id = ""; 74 | } 75 | } 76 | 77 | while ( my $line = ) { 78 | chomp $line; 79 | if ($line =~ m/\.-/){ 80 | my @splitline = split(/ /,$line); 81 | my $count = 0; 82 | while ($splitline[0] =~ m/-/g){$count++;} 83 | if ($count == 3){ 84 | print OUT "$splitline[0]\t$splitline[1]\n"; 85 | $lastone = $splitline[1]; 86 | } 87 | if ($count == 2){ 88 | my $outstr = substr($splitline[1],1,length($splitline[1])-1); 89 | print OUT "$splitline[0]\t$lastone;$outstr\n"; 90 | $lasttwo = $outstr; 91 | } 92 | if ($count == 1){ 93 | print OUT "$splitline[0]\t$lastone;$lasttwo;$splitline[2]\n"; 94 | } 95 | 96 | } 97 | } 98 | 99 | close IN; 100 | close INclass; 101 | close OUT; 102 | 103 | ###################################################################### 104 | # TEMPLATE SUBS 105 | ###################################################################### 106 | sub checkParams { 107 | #----- 108 | # Do any and all options checking here... 109 | # 110 | my @standard_options = ( "help|h+", "inputfile|i:s", "outputfile|o:s", "inclass|c:s"); 111 | my %options; 112 | 113 | # Add any other command line options, and the code to handle them 114 | # 115 | GetOptions( \%options, @standard_options ); 116 | 117 | #if no arguments supplied print the usage and exit 118 | # 119 | exec("pod2usage $0") if (0 == (keys (%options) )); 120 | 121 | # If the -help option is set, print the usage and exit 122 | # 123 | exec("pod2usage $0") if $options{'help'}; 124 | 125 | # Compulsosy items 126 | #if(!exists $options{'infile'} ) { print "**ERROR: $0 : \n"; exec("pod2usage $0"); } 127 | 128 | return \%options; 129 | } 130 | 131 | sub overrideDefault 132 | { 133 | #----- 134 | # Set and override default values for parameters 135 | # 136 | my ($default_value, $option_name) = @_; 137 | if(exists $global_options->{$option_name}) 138 | { 139 | return $global_options->{$option_name}; 140 | } 141 | return $default_value; 142 | } 143 | 144 | __DATA__ 145 | 146 | =head1 NAME 147 | 148 | vprobes.generateprobes.pl 149 | 150 | =head1 COPYRIGHT 151 | 152 | copyright (C) 2012 Mads Albertsen 153 | 154 | This program is free software: you can redistribute it and/or modify 155 | it under the terms of the GNU General Public License as published by 156 | the Free Software Foundation, either version 3 of the License, or 157 | (at your option) any later version. 158 | 159 | This program is distributed in the hope that it will be useful, 160 | but WITHOUT ANY WARRANTY; without even the implied warranty of 161 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 162 | GNU General Public License for more details. 163 | 164 | You should have received a copy of the GNU General Public License 165 | along with this program. If not, see . 166 | 167 | =head1 DESCRIPTION 168 | 169 | 170 | 171 | =head1 SYNOPSIS 172 | 173 | script.pl -i [-h] 174 | 175 | [-help -h] Displays this basic usage information 176 | [-inputfile -i] enzyme.dat 177 | [-outputfile -o] Outputfile. 178 | [-inclass -c] enzyclass.txt 179 | 180 | =cut -------------------------------------------------------------------------------- /rnaseq.to.stranded.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | ############################################################################### 3 | # 4 | # rna.seq.to.stranded.pl 5 | # 6 | # Makes a SAM file of stranded RNAseq CLC data - stranded.. 7 | # Outputs a simple tab - seperated file. 8 | # 9 | # Copyright (C) 2012 Mads Albertsen 10 | # 11 | # This program is free software: you can redistribute it and/or modify 12 | # it under the terms of the GNU General Public License as published by 13 | # the Free Software Foundation, either version 3 of the License, or 14 | # (at your option) any later version. 15 | # 16 | # This program is distributed in the hope that it will be useful, 17 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 18 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19 | # GNU General Public License for more details. 20 | # 21 | # You should have received a copy of the GNU General Public License 22 | # along with this program. If not, see . 23 | # 24 | ############################################################################### 25 | 26 | #pragmas 27 | use strict; 28 | use warnings; 29 | 30 | #core Perl modules 31 | use Getopt::Long; 32 | 33 | #locally-written modules 34 | BEGIN { 35 | select(STDERR); 36 | $| = 1; 37 | select(STDOUT); 38 | $| = 1; 39 | } 40 | 41 | # get input params 42 | my $global_options = checkParams(); 43 | 44 | my $insam; 45 | my $outputfile; 46 | 47 | $insam = &overrideDefault("insam.sam",'insam'); 48 | $outputfile = &overrideDefault("outputfile.txt",'outputfile'); 49 | 50 | my %length; 51 | my %sense; 52 | my %antisense; 53 | 54 | ###################################################################### 55 | # CODE HERE 56 | ###################################################################### 57 | 58 | 59 | open(OUT, ">$outputfile") or die("Cannot create file: $outputfile\n"); 60 | open(INsam, "$insam") or die("Cannot read file: $insam\n"); 61 | 62 | while ( my $line = ) { 63 | chomp $line; 64 | if ($line =~ m/\@SQ/) { 65 | my @splitline = split(/\t/,$line); 66 | my @contigname = split(/:/, $splitline[1]); #Retrive the contig name 67 | my @contiglength = split(/:/, $splitline[2]); #Retrive the contig length 68 | $length{$contigname[1]} = $contiglength[1]; #store it 69 | $sense{$contigname[1]} = 0; 70 | $antisense{$contigname[1]} = 0; 71 | } 72 | else { 73 | if ($line !~ m/(\@PG|\@HD|\@SQ|\@RG)/) { 74 | my @readinfo = split(/\t/,$line); 75 | if ($readinfo[1] == 16){ 76 | $sense{$readinfo[2]}++; 77 | } 78 | else{ 79 | if ($readinfo[1] == 0){ 80 | $antisense{$readinfo[2]}++; 81 | } 82 | else{ 83 | print "Problematic SAM flag: $line\n"; 84 | } 85 | } 86 | 87 | } 88 | } 89 | } 90 | 91 | print OUT "Gene\tLength\tSense\tAntisense\n"; 92 | foreach my $gene (keys %length){ 93 | print OUT "$gene\t$length{$gene}\t$sense{$gene}\t$antisense{$gene}\n"; 94 | } 95 | 96 | close INsam; 97 | close OUT; 98 | 99 | ###################################################################### 100 | # TEMPLATE SUBS 101 | ###################################################################### 102 | sub checkParams { 103 | #----- 104 | # Do any and all options checking here... 105 | # 106 | my @standard_options = ( "help|h+", "insam|s:s", "outputfile|o:s"); 107 | my %options; 108 | 109 | # Add any other command line options, and the code to handle them 110 | # 111 | GetOptions( \%options, @standard_options ); 112 | 113 | #if no arguments supplied print the usage and exit 114 | # 115 | exec("pod2usage $0") if (0 == (keys (%options) )); 116 | 117 | # If the -help option is set, print the usage and exit 118 | # 119 | exec("pod2usage $0") if $options{'help'}; 120 | 121 | # Compulsosy items 122 | #if(!exists $options{'infile'} ) { print "**ERROR: $0 : \n"; exec("pod2usage $0"); } 123 | 124 | return \%options; 125 | } 126 | 127 | sub overrideDefault 128 | { 129 | #----- 130 | # Set and override default values for parameters 131 | # 132 | my ($default_value, $option_name) = @_; 133 | if(exists $global_options->{$option_name}) 134 | { 135 | return $global_options->{$option_name}; 136 | } 137 | return $default_value; 138 | } 139 | 140 | __DATA__ 141 | 142 | =head1 NAME 143 | 144 | vprobes.generateprobes.pl 145 | 146 | =head1 COPYRIGHT 147 | 148 | copyright (C) 2012 Mads Albertsen 149 | 150 | This program is free software: you can redistribute it and/or modify 151 | it under the terms of the GNU General Public License as published by 152 | the Free Software Foundation, either version 3 of the License, or 153 | (at your option) any later version. 154 | 155 | This program is distributed in the hope that it will be useful, 156 | but WITHOUT ANY WARRANTY; without even the implied warranty of 157 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 158 | GNU General Public License for more details. 159 | 160 | You should have received a copy of the GNU General Public License 161 | along with this program. If not, see . 162 | 163 | =head1 DESCRIPTION 164 | 165 | 166 | 167 | =head1 SYNOPSIS 168 | 169 | script.pl -i [-h] 170 | 171 | [-help -h] Displays this basic usage information 172 | [-insam -s] Input sam file. 173 | [-outputfile -o] Outputfile. 174 | 175 | =cut -------------------------------------------------------------------------------- /clc.variant.to.consensus.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | ############################################################################### 3 | # 4 | # scriptname 5 | # 6 | # Short description 7 | # 8 | # Copyright (C) 2012 Mads Albertsen 9 | # 10 | # This program is free software: you can redistribute it and/or modify 11 | # it under the terms of the GNU General Public License as published by 12 | # the Free Software Foundation, either version 3 of the License, or 13 | # (at your option) any later version. 14 | # 15 | # This program is distributed in the hope that it will be useful, 16 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 | # GNU General Public License for more details. 19 | # 20 | # You should have received a copy of the GNU General Public License 21 | # along with this program. If not, see . 22 | # 23 | ############################################################################### 24 | 25 | #pragmas 26 | use strict; 27 | use warnings; 28 | 29 | #core Perl modules 30 | use Getopt::Long; 31 | 32 | #locally-written modules 33 | BEGIN { 34 | select(STDERR); 35 | $| = 1; 36 | select(STDOUT); 37 | $| = 1; 38 | } 39 | 40 | # get input params 41 | my $global_options = checkParams(); 42 | 43 | my $variant; 44 | my $reference; 45 | my $outputfile; 46 | 47 | 48 | $variant = &overrideDefault("variant.csv",'variant'); 49 | $reference = &overrideDefault("reference.fa",'reference'); 50 | $outputfile = &overrideDefault("outputfile.txt",'outputfile'); 51 | 52 | my $header; 53 | my $sequence = ""; 54 | 55 | ###################################################################### 56 | # CODE HERE 57 | ###################################################################### 58 | 59 | 60 | open(IN_REF, $reference) or die("Cannot read file: $reference\n"); 61 | open(IN_VAR, $variant) or die("Cannot read file: $variant\n"); 62 | open(OUT, ">$outputfile") or die("Cannot create file: $outputfile\n"); 63 | 64 | while ( my $line = ) { 65 | chomp $line; 66 | if ($line =~ m/>/) { 67 | $header = $line; 68 | } 69 | else{ 70 | $sequence = "$sequence"."$line"; 71 | } 72 | } 73 | close IN_REF; 74 | 75 | my @seq = split(//,$sequence); 76 | 77 | while ( my $line = ) { 78 | next if ($line =~ m/Reference Position/); #To skip first line 79 | chomp $line; 80 | $line =~ s/"//g; 81 | my @splitline = split(",",$line); 82 | if ($splitline[2] eq "SNV"){ 83 | $seq[$splitline[0]-1] = $splitline[10]; 84 | } 85 | if ($splitline[2] eq "MNV"){ 86 | for (my $count = $splitline[0]-1; $count <= $splitline[0]-1+$splitline[3]-1; $count++) { 87 | $seq[$count] = ""; 88 | } 89 | $seq[$splitline[0]-1] = $splitline[10]; 90 | } 91 | if ($splitline[2] eq "InDel"){ 92 | if ($splitline[3] == 0){ 93 | $seq[$splitline[0]-2] = "$seq[$splitline[1]-2]$splitline[10]"; 94 | } 95 | else{ 96 | for (my $count = $splitline[0]-1; $count <= $splitline[0]-1+$splitline[3]-1; $count++) { 97 | $seq[$count] = ""; 98 | } 99 | } 100 | } 101 | } 102 | 103 | print OUT "$header.consensus\n"; 104 | print OUT join("",@seq)."\n"; 105 | 106 | close IN_VAR; 107 | close OUT; 108 | 109 | ###################################################################### 110 | # TEMPLATE SUBS 111 | ###################################################################### 112 | sub checkParams { 113 | #----- 114 | # Do any and all options checking here... 115 | # 116 | my @standard_options = ( "help|h+", "variant|v:s", "reference|r:s", "outputfile|o:s"); 117 | my %options; 118 | 119 | # Add any other command line options, and the code to handle them 120 | # 121 | GetOptions( \%options, @standard_options ); 122 | 123 | #if no arguments supplied print the usage and exit 124 | # 125 | exec("pod2usage $0") if (0 == (keys (%options) )); 126 | 127 | # If the -help option is set, print the usage and exit 128 | # 129 | exec("pod2usage $0") if $options{'help'}; 130 | 131 | # Compulsosy items 132 | #if(!exists $options{'infile'} ) { print "**ERROR: $0 : \n"; exec("pod2usage $0"); } 133 | 134 | return \%options; 135 | } 136 | 137 | sub overrideDefault 138 | { 139 | #----- 140 | # Set and override default values for parameters 141 | # 142 | my ($default_value, $option_name) = @_; 143 | if(exists $global_options->{$option_name}) 144 | { 145 | return $global_options->{$option_name}; 146 | } 147 | return $default_value; 148 | } 149 | 150 | __DATA__ 151 | 152 | =head1 NAME 153 | 154 | vprobes.generateprobes.pl 155 | 156 | =head1 COPYRIGHT 157 | 158 | copyright (C) 2012 Mads Albertsen 159 | 160 | This program is free software: you can redistribute it and/or modify 161 | it under the terms of the GNU General Public License as published by 162 | the Free Software Foundation, either version 3 of the License, or 163 | (at your option) any later version. 164 | 165 | This program is distributed in the hope that it will be useful, 166 | but WITHOUT ANY WARRANTY; without even the implied warranty of 167 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 168 | GNU General Public License for more details. 169 | 170 | You should have received a copy of the GNU General Public License 171 | along with this program. If not, see . 172 | 173 | =head1 DESCRIPTION 174 | 175 | 176 | 177 | =head1 SYNOPSIS 178 | 179 | script.pl -i [-h] 180 | 181 | [-help -h] Displays this basic usage information 182 | [-variant -v] CLC cvs variant file 183 | [-reference -r] Reference fasta sequence 184 | [-outputfile -o] Outputfile 185 | 186 | =cut 187 | -------------------------------------------------------------------------------- /cut.fasta.in.smaller.pieces.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | ############################################################################### 3 | # 4 | # cut.fasta.in.smaler.pieces.pl 5 | # 6 | # 7 | # Copyright (C) 2012 Mads Albertsen 8 | # 9 | # This program is free software: you can redistribute it and/or modify 10 | # it under the terms of the GNU General Public License as published by 11 | # the Free Software Foundation, either version 3 of the License, or 12 | # (at your option) any later version. 13 | # 14 | # This program is distributed in the hope that it will be useful, 15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | # GNU General Public License for more details. 18 | # 19 | # You should have received a copy of the GNU General Public License 20 | # along with this program. If not, see . 21 | # 22 | ############################################################################### 23 | 24 | #pragmas 25 | use strict; 26 | use warnings; 27 | 28 | #core Perl modules 29 | use Getopt::Long; 30 | 31 | #locally-written modules 32 | BEGIN { 33 | select(STDERR); 34 | $| = 1; 35 | select(STDOUT); 36 | $| = 1; 37 | } 38 | 39 | # get input params 40 | my $global_options = checkParams(); 41 | 42 | my $inputfile; 43 | my $outputfile; 44 | my $length; 45 | 46 | $inputfile = &overrideDefault("inputfile.fa",'inputfile'); 47 | $outputfile = &overrideDefault("out.fa",'outputfile'); 48 | $length = &overrideDefault("1000",'length'); 49 | 50 | my $line; 51 | my $header = "error"; 52 | my $prevheader = "error"; 53 | my $seq; 54 | my $count = 0; 55 | my $pos; 56 | 57 | ###################################################################### 58 | # CODE HERE 59 | ###################################################################### 60 | 61 | open(IN, $inputfile) or die("Cannot open $inputfile\n"); 62 | open(OUT, ">$outputfile") or die("Cannot create $outputfile"); 63 | 64 | while ( my $line = ) { 65 | chomp $line; 66 | if ($line =~ m/>/) { 67 | $prevheader = $header; 68 | $header = $line; 69 | if($count > 0){ 70 | for ($pos = $length; $pos <= length($seq); $pos+=$length) { 71 | my $outstr = substr($seq, $pos-$length, $length); 72 | my $start = $pos-$length; 73 | print OUT "$prevheader.$start.$pos\n"; 74 | print OUT "$outstr\n"; 75 | } 76 | my $outstr = substr($seq, $pos-$length, $pos-length($seq)); 77 | my $end = length($seq); 78 | my $start = $pos-$length; 79 | print OUT "$prevheader.$start.$end\n"; 80 | print OUT "$outstr\n"; 81 | 82 | } 83 | $seq = ""; 84 | $count++; 85 | } 86 | else{ 87 | $seq = $seq.$line; 88 | } 89 | } 90 | 91 | for ($pos = $length; $pos <= length($seq); $pos+=$length) { 92 | my $outstr = substr($seq, $pos-$length, $length); 93 | my $start = $pos-$length; 94 | print OUT "$header.$start.$pos\n"; 95 | print OUT "$outstr\n"; 96 | } 97 | my $outstr = substr($seq, $pos-$length, $pos-length($seq)); 98 | my $end = length($seq); 99 | my $start = $pos-$length; 100 | print OUT "$header.$start.$end\n"; 101 | print OUT "$outstr\n"; 102 | 103 | close IN; 104 | close OUT; 105 | 106 | exit; 107 | 108 | ###################################################################### 109 | # TEMPLATE SUBS 110 | ###################################################################### 111 | sub checkParams { 112 | #----- 113 | # Do any and all options checking here... 114 | # 115 | my @standard_options = ( "help|h+", "inputfile|i:s", "outputfile|o:s", "length|l:s"); 116 | my %options; 117 | 118 | # Add any other command line options, and the code to handle them 119 | # 120 | GetOptions( \%options, @standard_options ); 121 | 122 | #if no arguments supplied print the usage and exit 123 | # 124 | exec("pod2usage $0") if (0 == (keys (%options) )); 125 | 126 | # If the -help option is set, print the usage and exit 127 | # 128 | exec("pod2usage $0") if $options{'help'}; 129 | 130 | # Compulsosy items 131 | #if(!exists $options{'infile'} ) { print "**ERROR: $0 : \n"; exec("pod2usage $0"); } 132 | 133 | return \%options; 134 | } 135 | 136 | sub overrideDefault 137 | { 138 | #----- 139 | # Set and override default values for parameters 140 | # 141 | my ($default_value, $option_name) = @_; 142 | if(exists $global_options->{$option_name}) 143 | { 144 | return $global_options->{$option_name}; 145 | } 146 | return $default_value; 147 | } 148 | 149 | __DATA__ 150 | 151 | =head1 NAME 152 | 153 | trim.length.singleline.pl 154 | 155 | =head1 COPYRIGHT 156 | 157 | copyright (C) 2012 Mads Albertsen 158 | 159 | This program is free software: you can redistribute it and/or modify 160 | it under the terms of the GNU General Public License as published by 161 | the Free Software Foundation, either version 3 of the License, or 162 | (at your option) any later version. 163 | 164 | This program is distributed in the hope that it will be useful, 165 | but WITHOUT ANY WARRANTY; without even the implied warranty of 166 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 167 | GNU General Public License for more details. 168 | 169 | You should have received a copy of the GNU General Public License 170 | along with this program. If not, see . 171 | 172 | =head1 DESCRIPTION 173 | 174 | some nice description. 175 | 176 | =head1 SYNOPSIS 177 | 178 | script.pl -i -l [-h] 179 | 180 | [-help -h] Displays this basic usage information 181 | [-inputfile -i] Input fastafile. 182 | [-outputfile -o] Optional outputfile (default: out.fa). 183 | [-length -l] Maximum length of output (default: 1000). 184 | 185 | 186 | =cut -------------------------------------------------------------------------------- /split.scaffolds.to.contigs.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | ############################################################################### 3 | # 4 | # split.scaffolds.to.contigs.pl 5 | # 6 | # 7 | # Copyright (C) 2012 Mads Albertsen 8 | # 9 | # This program is free software: you can redistribute it and/or modify 10 | # it under the terms of the GNU General Public License as published by 11 | # the Free Software Foundation, either version 3 of the License, or 12 | # (at your option) any later version. 13 | # 14 | # This program is distributed in the hope that it will be useful, 15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | # GNU General Public License for more details. 18 | # 19 | # You should have received a copy of the GNU General Public License 20 | # along with this program. If not, see . 21 | # 22 | ############################################################################### 23 | 24 | #pragmas 25 | use strict; 26 | use warnings; 27 | 28 | #core Perl modules 29 | use Getopt::Long; 30 | 31 | #locally-written modules 32 | BEGIN { 33 | select(STDERR); 34 | $| = 1; 35 | select(STDOUT); 36 | $| = 1; 37 | } 38 | 39 | # get input params 40 | my $global_options = checkParams(); 41 | 42 | my $inputfile; 43 | my $outputfile; 44 | my $minlength; 45 | 46 | $inputfile = &overrideDefault("inputfile.fa",'inputfile'); 47 | $outputfile = &overrideDefault("out.fa",'outputfile'); 48 | $minlength = &overrideDefault("200",'minlength'); 49 | 50 | my $line; 51 | my $header = "error"; 52 | my $prevheader = "error"; 53 | my $seq; 54 | my $count = 0; 55 | my $contigs = 0; 56 | my $goodcontigs = 0; 57 | 58 | ###################################################################### 59 | # CODE HERE 60 | ###################################################################### 61 | 62 | open(IN, $inputfile) or die("Cannot open $inputfile\n"); 63 | open(OUT, ">$outputfile") or die("Cannot create $outputfile"); 64 | 65 | while ( my $line = ) { 66 | chomp $line; 67 | if ($line =~ m/>/) { 68 | $prevheader = $header; 69 | $header = $line; 70 | if($count > 0){ 71 | $seq =~ s/N*N/N/g; 72 | my @splitline = split(/N/,$seq); 73 | my $splitcount = 0; 74 | foreach (@splitline) { 75 | $splitcount++; 76 | $contigs++; 77 | if (length($_) > $minlength-1){ 78 | print OUT "$prevheader.$splitcount\n"; 79 | print OUT $_."\n"; 80 | $goodcontigs++; 81 | } 82 | } 83 | } 84 | $seq = ""; 85 | $count++; 86 | } 87 | else{ 88 | $seq = $seq.$line; 89 | } 90 | } 91 | 92 | #Remember to catch the last sequence! 93 | $seq =~ s/N*N/N/g; 94 | my @splitline = split(/N/,$seq); 95 | my $splitcount = 0; 96 | foreach (@splitline) { 97 | $splitcount++; 98 | $contigs++; 99 | if (length($_) > $minlength-1){ 100 | print OUT "$header.$splitcount\n"; 101 | print OUT $_."\n"; 102 | $goodcontigs++; 103 | } 104 | } 105 | $count++; 106 | 107 | print "$count scaffolds in total\n"; 108 | print "$contigs contigs in total\n"; 109 | print "$goodcontigs contigs over $minlength\n"; 110 | 111 | 112 | close IN; 113 | close OUT; 114 | 115 | exit; 116 | 117 | ###################################################################### 118 | # TEMPLATE SUBS 119 | ###################################################################### 120 | sub checkParams { 121 | #----- 122 | # Do any and all options checking here... 123 | # 124 | my @standard_options = ( "help|h+", "inputfile|i:s", "outputfile|o:s", "minlength|m:s", "stopcount|s:s", "rename|r:+"); 125 | my %options; 126 | 127 | # Add any other command line options, and the code to handle them 128 | # 129 | GetOptions( \%options, @standard_options ); 130 | 131 | #if no arguments supplied print the usage and exit 132 | # 133 | exec("pod2usage $0") if (0 == (keys (%options) )); 134 | 135 | # If the -help option is set, print the usage and exit 136 | # 137 | exec("pod2usage $0") if $options{'help'}; 138 | 139 | # Compulsosy items 140 | #if(!exists $options{'infile'} ) { print "**ERROR: $0 : \n"; exec("pod2usage $0"); } 141 | 142 | return \%options; 143 | } 144 | 145 | sub overrideDefault 146 | { 147 | #----- 148 | # Set and override default values for parameters 149 | # 150 | my ($default_value, $option_name) = @_; 151 | if(exists $global_options->{$option_name}) 152 | { 153 | return $global_options->{$option_name}; 154 | } 155 | return $default_value; 156 | } 157 | 158 | __DATA__ 159 | 160 | =head1 NAME 161 | 162 | split.scaffolds.to.contigs.pl 163 | 164 | =head1 COPYRIGHT 165 | 166 | copyright (C) 2012 Mads Albertsen 167 | 168 | This program is free software: you can redistribute it and/or modify 169 | it under the terms of the GNU General Public License as published by 170 | the Free Software Foundation, either version 3 of the License, or 171 | (at your option) any later version. 172 | 173 | This program is distributed in the hope that it will be useful, 174 | but WITHOUT ANY WARRANTY; without even the implied warranty of 175 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 176 | GNU General Public License for more details. 177 | 178 | You should have received a copy of the GNU General Public License 179 | along with this program. If not, see . 180 | 181 | =head1 DESCRIPTION 182 | 183 | Splits a combined paired end fastafile. 184 | 185 | =head1 SYNOPSIS 186 | 187 | split.scaffolds.to.contigs.pl -i [-h -o -m] 188 | 189 | [-help -h] Displays this basic usage information 190 | [-inputfile -i] Input fasta file 191 | [-outputfile -o] Outputfile (default: out.fa) 192 | [-minlength -m] Minimum length of reads (default: 200) 193 | 194 | =cut -------------------------------------------------------------------------------- /trim.length.singleline.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | ############################################################################### 3 | # 4 | # trim.length.single.pl 5 | # 6 | # Make a fasta file single line / sequence. 7 | # Extract X sequences of minlength X and rename all using numbers. 8 | # 9 | # Copyright (C) 2012 Mads Albertsen 10 | # 11 | # This program is free software: you can redistribute it and/or modify 12 | # it under the terms of the GNU General Public License as published by 13 | # the Free Software Foundation, either version 3 of the License, or 14 | # (at your option) any later version. 15 | # 16 | # This program is distributed in the hope that it will be useful, 17 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 18 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19 | # GNU General Public License for more details. 20 | # 21 | # You should have received a copy of the GNU General Public License 22 | # along with this program. If not, see . 23 | # 24 | ############################################################################### 25 | 26 | #pragmas 27 | use strict; 28 | use warnings; 29 | 30 | #core Perl modules 31 | use Getopt::Long; 32 | 33 | #locally-written modules 34 | BEGIN { 35 | select(STDERR); 36 | $| = 1; 37 | select(STDOUT); 38 | $| = 1; 39 | } 40 | 41 | # get input params 42 | my $global_options = checkParams(); 43 | 44 | my $inputfile; 45 | my $outputfile; 46 | my $minlength; 47 | my $stopcount; 48 | my $rename; 49 | 50 | $inputfile = &overrideDefault("inputfile.fa",'inputfile'); 51 | $outputfile = &overrideDefault("out.fa",'outputfile'); 52 | $minlength = &overrideDefault("100",'minlength'); 53 | $stopcount = &overrideDefault("1000000000000000",'stopcount'); 54 | $rename = &overrideDefault("0",'rename'); 55 | 56 | my $line; 57 | my $header = "error"; 58 | my $prevheader = "error"; 59 | my $seq; 60 | my $count = 0; 61 | my $seqsout = 0; 62 | 63 | ###################################################################### 64 | # CODE HERE 65 | ###################################################################### 66 | 67 | open(IN, $inputfile) or die("Cannot open $inputfile\n"); 68 | open(OUT, ">$outputfile") or die("Cannot create $outputfile"); 69 | 70 | while ( my $line = ) { 71 | chomp $line; 72 | last if ($seqsout == $stopcount); 73 | if ($line =~ m/>/) { 74 | $prevheader = $header; 75 | $header = $line; 76 | if($count > 0){ 77 | if (length($seq) > $minlength-1){ 78 | if ($rename != 1){ 79 | print OUT "$prevheader\n"; 80 | } 81 | else{ 82 | print OUT ">$count\n"; 83 | } 84 | print OUT "$seq\n"; 85 | $seqsout++; 86 | } 87 | } 88 | $seq = ""; 89 | $count++; 90 | } 91 | else{ 92 | $seq = $seq.$line; 93 | } 94 | } 95 | 96 | if ((length($seq)>$minlength-1) and ($seqsout != $stopcount)){ 97 | if ($rename != 1){ 98 | print OUT "$prevheader\n"; 99 | } 100 | else{ 101 | print OUT ">$count\n"; 102 | } 103 | print OUT "$seq\n"; 104 | } 105 | 106 | close IN; 107 | close OUT; 108 | 109 | exit; 110 | 111 | ###################################################################### 112 | # TEMPLATE SUBS 113 | ###################################################################### 114 | sub checkParams { 115 | #----- 116 | # Do any and all options checking here... 117 | # 118 | my @standard_options = ( "help|h+", "inputfile|i:s", "outputfile|o:s", "minlength|m:s", "stopcount|s:s", "rename|r:+"); 119 | my %options; 120 | 121 | # Add any other command line options, and the code to handle them 122 | # 123 | GetOptions( \%options, @standard_options ); 124 | 125 | #if no arguments supplied print the usage and exit 126 | # 127 | exec("pod2usage $0") if (0 == (keys (%options) )); 128 | 129 | # If the -help option is set, print the usage and exit 130 | # 131 | exec("pod2usage $0") if $options{'help'}; 132 | 133 | # Compulsosy items 134 | #if(!exists $options{'infile'} ) { print "**ERROR: $0 : \n"; exec("pod2usage $0"); } 135 | 136 | return \%options; 137 | } 138 | 139 | sub overrideDefault 140 | { 141 | #----- 142 | # Set and override default values for parameters 143 | # 144 | my ($default_value, $option_name) = @_; 145 | if(exists $global_options->{$option_name}) 146 | { 147 | return $global_options->{$option_name}; 148 | } 149 | return $default_value; 150 | } 151 | 152 | __DATA__ 153 | 154 | =head1 NAME 155 | 156 | trim.length.singleline.pl 157 | 158 | =head1 COPYRIGHT 159 | 160 | copyright (C) 2012 Mads Albertsen 161 | 162 | This program is free software: you can redistribute it and/or modify 163 | it under the terms of the GNU General Public License as published by 164 | the Free Software Foundation, either version 3 of the License, or 165 | (at your option) any later version. 166 | 167 | This program is distributed in the hope that it will be useful, 168 | but WITHOUT ANY WARRANTY; without even the implied warranty of 169 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 170 | GNU General Public License for more details. 171 | 172 | You should have received a copy of the GNU General Public License 173 | along with this program. If not, see . 174 | 175 | =head1 DESCRIPTION 176 | 177 | Splits a combined paired end fastafile. 178 | 179 | =head1 SYNOPSIS 180 | 181 | script.pl -i [-h] 182 | 183 | [-help -h] Displays this basic usage information 184 | [-inputfile -i] Input compined paried end fasta file. 185 | [-outputfile -o] Optional outputfile (default: out.fa). 186 | [-minlength -m] Minimum length of reads (default: 100). 187 | [-stopcount -s] Max number of sequences to output. 188 | [-rename -r] Renames sequences with a number. 189 | 190 | =cut -------------------------------------------------------------------------------- /mannotator.totab.add.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | ############################################################################### 3 | # 4 | # mannotator.totab.add.pl 5 | # 6 | # Allows you to add more data points to a tab formated mannotator file. 7 | # The extra data must be formatted as: contigid "tab" DATA "tab" DATA 8 | # Usefull to add e.g. coverage or binning information to all contigs 9 | # 10 | # Copyright (C) 2012 Mads Albertsen 11 | # 12 | # This program is free software: you can redistribute it and/or modify 13 | # it under the terms of the GNU General Public License as published by 14 | # the Free Software Foundation, either version 3 of the License, or 15 | # (at your option) any later version. 16 | # 17 | # This program is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 20 | # GNU General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU General Public License 23 | # along with this program. If not, see . 24 | # 25 | ############################################################################### 26 | 27 | #pragmas 28 | use strict; 29 | use warnings; 30 | 31 | #core Perl modules 32 | use Getopt::Long; 33 | 34 | #locally-written modules 35 | BEGIN { 36 | select(STDERR); 37 | $| = 1; 38 | select(STDOUT); 39 | $| = 1; 40 | } 41 | 42 | # get input params 43 | my $global_options = checkParams(); 44 | 45 | my $manfile; 46 | my $edatafile; 47 | my $outfile; 48 | 49 | $manfile = &overrideDefault("manfile.gff",'manfile'); 50 | $edatafile = &overrideDefault("data.tab",'edatafile'); 51 | $outfile = &overrideDefault("outfile.txt",'outfile'); 52 | 53 | my $line; 54 | my $empty; 55 | my $elements; 56 | my $count = 0; 57 | my $count1 = 0; 58 | my @splitline; 59 | my %contig; 60 | 61 | ###################################################################### 62 | # CODE HERE 63 | ###################################################################### 64 | 65 | open(IN1, $manfile) or die("Cannot open $manfile\n"); 66 | open(IN2, $edatafile) or die("Cannot open $edatafile\n"); 67 | open(OUT, ">$outfile") or die("Cannot create $outfile\n"); 68 | 69 | ################### Read file with new data and hash based on contig nr. must be in the format contigid DATA DATA 70 | while ( $line = ) { 71 | $count++; 72 | chomp $line; 73 | @splitline = split(/\t/,$line); 74 | my $ID = $splitline[0]; 75 | shift @splitline; 76 | $contig{$ID} = join("\t",@splitline); 77 | $elements = scalar @splitline; 78 | } 79 | 80 | foreach my $temp (@splitline){ 81 | $empty = $empty."\t0"; 82 | } 83 | 84 | while ( $line = ) { 85 | chomp $line; 86 | @splitline = split(/\t/,$line); 87 | my $ID = $splitline[0]; 88 | if (exists($contig{$ID})){ 89 | print OUT "$line\t$contig{$ID}\n"; 90 | } 91 | else{ 92 | print OUT "$line$empty\n"; 93 | } 94 | } 95 | 96 | print "done.\n"; 97 | 98 | close IN1; 99 | close IN2; 100 | close OUT; 101 | 102 | exit; 103 | 104 | ###################################################################### 105 | # TEMPLATE SUBS 106 | ###################################################################### 107 | sub checkParams { 108 | #----- 109 | # Do any and all options checking here... 110 | # 111 | my @standard_options = ( "help|h+", "manfile|m:s", "edatafile|e:s", "outfile|o:s"); 112 | my %options; 113 | 114 | # Add any other command line options, and the code to handle them 115 | # 116 | GetOptions( \%options, @standard_options ); 117 | 118 | #if no arguments supplied print the usage and exit 119 | # 120 | exec("pod2usage $0") if (0 == (keys (%options) )); 121 | 122 | # If the -help option is set, print the usage and exit 123 | # 124 | exec("pod2usage $0") if $options{'help'}; 125 | 126 | # Compulsosy items 127 | #if(!exists $options{'infile'} ) { print "**ERROR: $0 : \n"; exec("pod2usage $0"); } 128 | 129 | return \%options; 130 | } 131 | 132 | sub overrideDefault 133 | { 134 | #----- 135 | # Set and override default values for parameters 136 | # 137 | my ($default_value, $option_name) = @_; 138 | if(exists $global_options->{$option_name}) 139 | { 140 | return $global_options->{$option_name}; 141 | } 142 | return $default_value; 143 | } 144 | 145 | __DATA__ 146 | 147 | =head1 NAME 148 | 149 | mannotator.totab.add.pl 150 | 151 | =head1 COPYRIGHT 152 | 153 | copyright (C) 2012 Mads Albertsen 154 | 155 | This program is free software: you can redistribute it and/or modify 156 | it under the terms of the GNU General Public License as published by 157 | the Free Software Foundation, either version 3 of the License, or 158 | (at your option) any later version. 159 | 160 | This program is distributed in the hope that it will be useful, 161 | but WITHOUT ANY WARRANTY; without even the implied warranty of 162 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 163 | GNU General Public License for more details. 164 | 165 | You should have received a copy of the GNU General Public License 166 | along with this program. If not, see . 167 | 168 | =head1 DESCRIPTION 169 | 170 | Allows you to add more data points to a tab formated mannotator file. 171 | The extra data must be formatted as: contigid "tab" DATA "tab" DATA 172 | Usefull to add e.g. coverage or binning information to all contigs 173 | 174 | =head1 SYNOPSIS 175 | 176 | script.pl -m -e [-h -o] 177 | 178 | [-help -h] Displays this basic usage information 179 | [-manfile -m] tab formated mannotator file. 180 | [-edatafile -e] tab formated extra data file. First column must be contig id. 181 | [-outfile -o] Outfile. 182 | 183 | =cut -------------------------------------------------------------------------------- /find.lost.indexes.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | ############################################################################### 3 | # 4 | # find.lost.indexes.pl 5 | # 6 | # Short description 7 | # 8 | # Copyright (C) 2012 Mads Albertsen 9 | # 10 | # This program is free software: you can redistribute it and/or modify 11 | # it under the terms of the GNU General Public License as published by 12 | # the Free Software Foundation, either version 3 of the License, or 13 | # (at your option) any later version. 14 | # 15 | # This program is distributed in the hope that it will be useful, 16 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 | # GNU General Public License for more details. 19 | # 20 | # You should have received a copy of the GNU General Public License 21 | # along with this program. If not, see . 22 | # 23 | ############################################################################### 24 | 25 | #pragmas 26 | #use strict; 27 | use warnings; 28 | 29 | #core Perl modules 30 | use Getopt::Long; 31 | 32 | #locally-written modules 33 | BEGIN { 34 | select(STDERR); 35 | $| = 1; 36 | select(STDOUT); 37 | $| = 1; 38 | } 39 | 40 | # get input params 41 | my $global_options = checkParams(); 42 | 43 | my $inputfile; 44 | my $outputfile; 45 | my $inlist; 46 | 47 | $inputfile = &overrideDefault("inputfile.txt",'inputfile'); 48 | $outputfile = &overrideDefault("outputfile.txt",'outputfile'); 49 | $inlist = &overrideDefault("inlist.txt",'inlist'); 50 | 51 | my $count = 0; 52 | my $newkey = 0; 53 | my %index; 54 | my %outkey; 55 | 56 | ###################################################################### 57 | # CODE HERE 58 | ###################################################################### 59 | 60 | 61 | open(IN, $inputfile) or die("Cannot read file: $inputfile\n"); 62 | open(OUT, ">$outputfile") or die("Cannot create file: $outputfile\n"); 63 | 64 | if ($inlist ne "inlist.txt"){ 65 | open(INlist, $inlist) or die("Cannot read file: $inputfile\n"); 66 | while ( my $line = ) { 67 | chomp $line; 68 | $outkey{$line} = 1; 69 | open($line, ">$line.fastq") or die("Cannot create file: $inlist\n"); 70 | } 71 | close INlist; 72 | } 73 | 74 | 75 | while ( my $line = ) { 76 | chomp $line; 77 | $count++; 78 | if ($count == 1){ 79 | my @splitline = split(/:/,$line); 80 | my @splitline2 = split(/ /,$splitline[-4]); 81 | $newkey = "$splitline[1].$splitline2[1].$splitline[3].$splitline[-1]"; 82 | if (exists $index{$newkey}){ 83 | $index{$newkey}++; 84 | } 85 | else{ 86 | $index{$newkey} = 1; 87 | } 88 | } 89 | else{ 90 | if ($count ==4){ 91 | $count = 0; 92 | } 93 | } 94 | if (exists $outkey{$newkey}){ 95 | print $newkey "$line\n"; 96 | } 97 | } 98 | 99 | print OUT "Run.Lane.Read.Barcode\tCount\n"; 100 | foreach my $key (sort { $index{$b} <=> $index{$a} } keys %index){ 101 | print OUT "$key\t$index{$key}\n"; 102 | } 103 | 104 | close IN; 105 | close OUT; 106 | 107 | if ($inlist ne "inlist.txt"){ 108 | open(INlist, $inlist) or die("Cannot read file: $inputfile\n"); 109 | while ( my $line = ) { 110 | chomp $line; 111 | close $line; 112 | } 113 | close INlist; 114 | } 115 | 116 | 117 | 118 | ###################################################################### 119 | # TEMPLATE SUBS 120 | ###################################################################### 121 | sub checkParams { 122 | #----- 123 | # Do any and all options checking here... 124 | # 125 | my @standard_options = ( "help|h+", "inputfile|i:s", "outputfile|o:s","inlist|l:s"); 126 | my %options; 127 | 128 | # Add any other command line options, and the code to handle them 129 | # 130 | GetOptions( \%options, @standard_options ); 131 | 132 | #if no arguments supplied print the usage and exit 133 | # 134 | exec("pod2usage $0") if (0 == (keys (%options) )); 135 | 136 | # If the -help option is set, print the usage and exit 137 | # 138 | exec("pod2usage $0") if $options{'help'}; 139 | 140 | # Compulsosy items 141 | #if(!exists $options{'infile'} ) { print "**ERROR: $0 : \n"; exec("pod2usage $0"); } 142 | 143 | return \%options; 144 | } 145 | 146 | sub overrideDefault 147 | { 148 | #----- 149 | # Set and override default values for parameters 150 | # 151 | my ($default_value, $option_name) = @_; 152 | if(exists $global_options->{$option_name}) 153 | { 154 | return $global_options->{$option_name}; 155 | } 156 | return $default_value; 157 | } 158 | 159 | __DATA__ 160 | 161 | =head1 NAME 162 | 163 | vprobes.generateprobes.pl 164 | 165 | =head1 COPYRIGHT 166 | 167 | copyright (C) 2012 Mads Albertsen 168 | 169 | This program is free software: you can redistribute it and/or modify 170 | it under the terms of the GNU General Public License as published by 171 | the Free Software Foundation, either version 3 of the License, or 172 | (at your option) any later version. 173 | 174 | This program is distributed in the hope that it will be useful, 175 | but WITHOUT ANY WARRANTY; without even the implied warranty of 176 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 177 | GNU General Public License for more details. 178 | 179 | You should have received a copy of the GNU General Public License 180 | along with this program. If not, see . 181 | 182 | =head1 DESCRIPTION 183 | 184 | 185 | 186 | =head1 SYNOPSIS 187 | 188 | find.lost.indexes.pl -i [-o -l -h] 189 | 190 | [-help -h] Displays this basic usage information 191 | [-inputfile -i] Inputfile of undetermined reads. 192 | [-outputfile -o] Overview of barcodes in the list. 193 | [-inlist -l] List of barcodes to extract to new files. 194 | 195 | =cut -------------------------------------------------------------------------------- /extract.using.header.list.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | ############################################################################### 3 | # 4 | # extract.using.header.list.pl - version 1.0 5 | # 6 | # Extracts a subset of seauences given a list of the headers to extract. 7 | # 8 | # Copyright (C) 2012 Mads Albertsen 9 | # 10 | # This program is free software: you can redistribute it and/or modify 11 | # it under the terms of the GNU General Public License as published by 12 | # the Free Software Foundation, either version 3 of the License, or 13 | # (at your option) any later version. 14 | # 15 | # This program is distributed in the hope that it will be useful, 16 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 | # GNU General Public License for more details. 19 | # 20 | # You should have received a copy of the GNU General Public License 21 | # along with this program. If not, see . 22 | # 23 | ############################################################################### 24 | 25 | #pragmas 26 | use strict; 27 | use warnings; 28 | 29 | #core Perl modules 30 | use Getopt::Long; 31 | 32 | #locally-written modules 33 | BEGIN { 34 | select(STDERR); 35 | $| = 1; 36 | select(STDOUT); 37 | $| = 1; 38 | } 39 | 40 | # get input params 41 | my $global_options = checkParams(); 42 | 43 | my $inlist; 44 | my $insequences; 45 | my $outputfile; 46 | 47 | $inlist = &overrideDefault("linlist.txt",'inlist'); 48 | $insequences = &overrideDefault("sequences.fa",'insequences'); 49 | $outputfile = &overrideDefault("outputfile.txt",'outputfile'); 50 | 51 | my $countp = 0; 52 | my $countout = 0; 53 | my $count = 0; 54 | my $seq; 55 | my $header = ">start[test]"; 56 | my $prevheader = ">start[test]"; 57 | my %extract; 58 | 59 | 60 | ###################################################################### 61 | # CODE HERE 62 | ###################################################################### 63 | 64 | 65 | open(INlist, $inlist) or die("Cannot read file: $inlist\n"); 66 | 67 | while ( my $line = ) { 68 | chomp $line; 69 | if (!exists($extract{$line})){ 70 | $extract{$line} = 1; 71 | $count++; 72 | } 73 | } 74 | close INlist; 75 | 76 | print "$count sequences to extract.\n"; 77 | 78 | 79 | open(INseq, $insequences) or die("Cannot read file: $insequences\n"); 80 | open(OUT, ">$outputfile") or die("Cannot create file: $outputfile\n"); 81 | 82 | while ( my $line = ) { 83 | chomp $line; 84 | if ($line =~ m/>/) { 85 | $countp++; 86 | $prevheader = $header; 87 | $header = $line; 88 | my @splitline = split(/>/,$prevheader); 89 | if(exists($extract{$splitline[1]}) or exists($extract{$prevheader})){ 90 | print OUT "$prevheader\n"; 91 | print OUT "$seq\n"; 92 | $countout++; 93 | } 94 | $seq = ""; 95 | } 96 | else{ 97 | $seq = $seq.$line; 98 | } 99 | } 100 | my @splitline = split(/>/,$header); 101 | if(exists($extract{$splitline[1]}) or exists($extract{$header})){ 102 | print OUT "$header\n"; 103 | print OUT "$seq\n"; 104 | $countout++; 105 | } 106 | 107 | print "$countp sequences in total.\n"; 108 | print "$countout sequences extracted.\n"; 109 | 110 | close OUT; 111 | close INseq; 112 | 113 | ###################################################################### 114 | # TEMPLATE SUBS 115 | ###################################################################### 116 | sub checkParams { 117 | #----- 118 | # Do any and all options checking here... 119 | # 120 | my @standard_options = ( "help|h+", "insequences|s:s", "inlist|l:s", "outputfile|o:s"); 121 | my %options; 122 | 123 | # Add any other command line options, and the code to handle them 124 | # 125 | GetOptions( \%options, @standard_options ); 126 | 127 | #if no arguments supplied print the usage and exit 128 | # 129 | exec("pod2usage $0") if (0 == (keys (%options) )); 130 | 131 | # If the -help option is set, print the usage and exit 132 | # 133 | exec("pod2usage $0") if $options{'help'}; 134 | 135 | # Compulsosy items 136 | #if(!exists $options{'infile'} ) { print "**ERROR: $0 : \n"; exec("pod2usage $0"); } 137 | 138 | return \%options; 139 | } 140 | 141 | sub overrideDefault 142 | { 143 | #----- 144 | # Set and override default values for parameters 145 | # 146 | my ($default_value, $option_name) = @_; 147 | if(exists $global_options->{$option_name}) 148 | { 149 | return $global_options->{$option_name}; 150 | } 151 | return $default_value; 152 | } 153 | 154 | __DATA__ 155 | 156 | =head1 NAME 157 | 158 | extract.using.header.list.pl 159 | 160 | =head1 COPYRIGHT 161 | 162 | copyright (C) 2012 Mads Albertsen 163 | 164 | This program is free software: you can redistribute it and/or modify 165 | it under the terms of the GNU General Public License as published by 166 | the Free Software Foundation, either version 3 of the License, or 167 | (at your option) any later version. 168 | 169 | This program is distributed in the hope that it will be useful, 170 | but WITHOUT ANY WARRANTY; without even the implied warranty of 171 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 172 | GNU General Public License for more details. 173 | 174 | You should have received a copy of the GNU General Public License 175 | along with this program. If not, see . 176 | 177 | =head1 DESCRIPTION 178 | 179 | 180 | 181 | =head1 SYNOPSIS 182 | 183 | extract.using.header.list.pl -i [-h] - version 1.0 184 | 185 | [-help -h] Displays this basic usage information 186 | [-inlist -l] List of headers to use for extraction. 187 | [-insequences -s] Sequence file where a subset of sequences are to be extracted from. 188 | [-outputfile -o] Outputfile. 189 | 190 | =cut -------------------------------------------------------------------------------- /extract.kmer.bad.bins.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | ############################################################################### 3 | # 4 | # extract.kmer.bad.bins.pl 5 | # 6 | # Looks in classified kmer large contig split files for contigs that are 7 | # classified in different groups and output these to a new file. 8 | # 9 | # Copyright (C) 2012 Mads Albertsen 10 | # 11 | # This program is free software: you can redistribute it and/or modify 12 | # it under the terms of the GNU General Public License as published by 13 | # the Free Software Foundation, either version 3 of the License, or 14 | # (at your option) any later version. 15 | # 16 | # This program is distributed in the hope that it will be useful, 17 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 18 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19 | # GNU General Public License for more details. 20 | # 21 | # You should have received a copy of the GNU General Public License 22 | # along with this program. If not, see . 23 | # 24 | ############################################################################### 25 | 26 | #pragmas 27 | use strict; 28 | use warnings; 29 | 30 | #core Perl modules 31 | use Getopt::Long; 32 | 33 | #locally-written modules 34 | BEGIN { 35 | select(STDERR); 36 | $| = 1; 37 | select(STDOUT); 38 | $| = 1; 39 | } 40 | 41 | # get input params 42 | my $global_options = checkParams(); 43 | 44 | my $inkmer; 45 | my $inbin; 46 | my $outputfile; 47 | 48 | $inkmer = &overrideDefault("inkmer.txt",'inkmer'); 49 | $inbin = &overrideDefault("inbin.txt",'inbin'); 50 | $outputfile = &overrideDefault("outputfile.txt",'outputfile'); 51 | 52 | my $count = 0; 53 | my $count1 = 0; 54 | my $countp = 0; 55 | my $countb = 0; 56 | my %contigs; 57 | my %bin; 58 | 59 | ###################################################################### 60 | # CODE HERE 61 | ###################################################################### 62 | 63 | 64 | open(INbin, $inbin) or die("Cannot read file: $inbin\n"); 65 | while ( my $line = ) { 66 | chomp $line; 67 | if ($count > 0){ 68 | my @splitline = split(/\t/, $line); 69 | my @splitline1 = split(/\./,$splitline[0]); 70 | $bin{$splitline[0]} = $splitline[1]; 71 | if (exists($contigs{$splitline1[0]})){ 72 | if ($contigs{$splitline1[0]} ne $splitline[1]){ 73 | $contigs{$splitline1[0]} = -1; 74 | $countb++; 75 | } 76 | } 77 | else{ 78 | $contigs{$splitline1[0]} = $splitline[1]; 79 | $countp++; 80 | } 81 | } 82 | $count++; 83 | } 84 | 85 | close INbin; 86 | print "$countp different contigs\n"; 87 | print "$count different subcontigs\n"; 88 | print "$countb contigs with conficting binning\n"; 89 | 90 | open(OUT, ">$outputfile") or die("Cannot create file: $outputfile\n"); 91 | open(OUTall, ">$outputfile.all.tab") or die("Cannot create file: $outputfile.all.tab\n"); 92 | open(INkmer, $inkmer) or die("Cannot read file: $inkmer\n"); 93 | while ( my $line = ) { 94 | chomp $line; 95 | if ($count1 > 0){ 96 | my @splitline = split(/\t/, $line); 97 | my @splitline1 = split(/\./,$splitline[0]); 98 | if ($contigs{$splitline1[0]} == -1){ 99 | print OUT "$line\t$bin{$splitline[0]}\n"; 100 | } 101 | print OUTall "$line\t$bin{$splitline[0]}\n"; 102 | } 103 | else{ 104 | print OUTall "$line\n"; 105 | } 106 | $count1++; 107 | } 108 | 109 | close OUT; 110 | close OUTall; 111 | close INkmer; 112 | 113 | ###################################################################### 114 | # TEMPLATE SUBS 115 | ###################################################################### 116 | sub checkParams { 117 | #----- 118 | # Do any and all options checking here... 119 | # 120 | my @standard_options = ( "help|h+", "inbin|b:s", "inkmer|k:s", "outputfile|o:s"); 121 | my %options; 122 | 123 | # Add any other command line options, and the code to handle them 124 | # 125 | GetOptions( \%options, @standard_options ); 126 | 127 | #if no arguments supplied print the usage and exit 128 | # 129 | exec("pod2usage $0") if (0 == (keys (%options) )); 130 | 131 | # If the -help option is set, print the usage and exit 132 | # 133 | exec("pod2usage $0") if $options{'help'}; 134 | 135 | # Compulsosy items 136 | #if(!exists $options{'infile'} ) { print "**ERROR: $0 : \n"; exec("pod2usage $0"); } 137 | 138 | return \%options; 139 | } 140 | 141 | sub overrideDefault 142 | { 143 | #----- 144 | # Set and override default values for parameters 145 | # 146 | my ($default_value, $option_name) = @_; 147 | if(exists $global_options->{$option_name}) 148 | { 149 | return $global_options->{$option_name}; 150 | } 151 | return $default_value; 152 | } 153 | 154 | __DATA__ 155 | 156 | =head1 NAME 157 | 158 | vprobes.generateprobes.pl 159 | 160 | =head1 COPYRIGHT 161 | 162 | copyright (C) 2012 Mads Albertsen 163 | 164 | This program is free software: you can redistribute it and/or modify 165 | it under the terms of the GNU General Public License as published by 166 | the Free Software Foundation, either version 3 of the License, or 167 | (at your option) any later version. 168 | 169 | This program is distributed in the hope that it will be useful, 170 | but WITHOUT ANY WARRANTY; without even the implied warranty of 171 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 172 | GNU General Public License for more details. 173 | 174 | You should have received a copy of the GNU General Public License 175 | along with this program. If not, see . 176 | 177 | =head1 DESCRIPTION 178 | 179 | 180 | 181 | =head1 SYNOPSIS 182 | 183 | script.pl -i [-h] 184 | 185 | [-help -h] Displays this basic usage information 186 | [-inbin -b] Unscrambler Bin file. 187 | [-inkmer -k] Kmer freqeuncy file. 188 | 189 | =cut -------------------------------------------------------------------------------- /calc.gc.distribution.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | ############################################################################### 3 | # 4 | # scriptname 5 | # 6 | # Short description 7 | # 8 | # Copyright (C) 2012 Mads Albertsen 9 | # 10 | # This program is free software: you can redistribute it and/or modify 11 | # it under the terms of the GNU General Public License as published by 12 | # the Free Software Foundation, either version 3 of the License, or 13 | # (at your option) any later version. 14 | # 15 | # This program is distributed in the hope that it will be useful, 16 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 | # GNU General Public License for more details. 19 | # 20 | # You should have received a copy of the GNU General Public License 21 | # along with this program. If not, see . 22 | # 23 | ############################################################################### 24 | 25 | #pragmas 26 | use strict; 27 | use warnings; 28 | 29 | #core Perl modules 30 | use Getopt::Long; 31 | 32 | #locally-written modules 33 | BEGIN { 34 | select(STDERR); 35 | $| = 1; 36 | select(STDOUT); 37 | $| = 1; 38 | } 39 | 40 | # get input params 41 | my $global_options = checkParams(); 42 | 43 | my $inputfile; 44 | my $outputfile; 45 | my $filetype; 46 | 47 | $inputfile = &overrideDefault("inputfile.txt",'inputfile'); 48 | $outputfile = &overrideDefault("outputfile.txt",'outputfile'); 49 | $filetype = &overrideDefault(0,'filetype'); #0 = fasta 50 | 51 | 52 | my $lineext = 2; 53 | my $linecount = 0; 54 | my $readcount = 0; 55 | my $length = 0; 56 | my %A; 57 | my %T; 58 | my %C; 59 | my %G; 60 | 61 | 62 | 63 | ###################################################################### 64 | # CODE HERE 65 | ###################################################################### 66 | 67 | if ($filetype == 1){ 68 | $lineext = 4; 69 | $linecount = 2; 70 | } 71 | 72 | open(IN, $inputfile) or die("Cannot read file: $inputfile\n"); 73 | open(OUT, ">$outputfile") or die("Cannot create file: $outputfile\n"); 74 | 75 | while ( my $line = ) { 76 | chomp $line; 77 | $linecount++; 78 | if ($linecount == $lineext){ 79 | my @splitline = split(//,$line); 80 | $length = 0; 81 | foreach my $nucl (@splitline){ 82 | $length++; 83 | if ($nucl eq "A"){ 84 | if (exists $A{$length}){ 85 | $A{$length}++; 86 | } 87 | else{ 88 | $A{$length} = 1; 89 | } 90 | } 91 | if ($nucl eq "T"){ 92 | if (exists $T{$length}){ 93 | $T{$length}++; 94 | } 95 | else{ 96 | $T{$length} = 1; 97 | } 98 | } 99 | if ($nucl eq "C"){ 100 | if (exists $C{$length}){ 101 | $C{$length}++; 102 | } 103 | else{ 104 | $C{$length} = 1; 105 | } 106 | } 107 | if ($nucl eq "G"){ 108 | if (exists $G{$length}){ 109 | $G{$length}++; 110 | } 111 | else{ 112 | $G{$length} = 1; 113 | } 114 | } 115 | } 116 | $linecount = 0; 117 | } 118 | } 119 | 120 | for (my $count = 1; $count <= $length; $count++) { 121 | if (!exists($A{$count})){ 122 | $A{$count} = 0; 123 | } 124 | if (!exists($T{$count})){ 125 | $T{$count} = 0; 126 | } 127 | if (!exists($C{$count})){ 128 | $C{$count} = 0; 129 | } 130 | if (!exists($G{$count})){ 131 | $G{$count} = 0; 132 | } 133 | print "$count $A{$count} $T{$count} $C{$count} $G{$count}\n"; 134 | } 135 | 136 | close IN; 137 | close OUT; 138 | 139 | ###################################################################### 140 | # TEMPLATE SUBS 141 | ###################################################################### 142 | sub checkParams { 143 | #----- 144 | # Do any and all options checking here... 145 | # 146 | my @standard_options = ( "help|h+", "inputfile|i:s", "outputfile|o:s","filetype|q:+"); 147 | my %options; 148 | 149 | # Add any other command line options, and the code to handle them 150 | # 151 | GetOptions( \%options, @standard_options ); 152 | 153 | #if no arguments supplied print the usage and exit 154 | # 155 | exec("pod2usage $0") if (0 == (keys (%options) )); 156 | 157 | # If the -help option is set, print the usage and exit 158 | # 159 | exec("pod2usage $0") if $options{'help'}; 160 | 161 | # Compulsosy items 162 | #if(!exists $options{'infile'} ) { print "**ERROR: $0 : \n"; exec("pod2usage $0"); } 163 | 164 | return \%options; 165 | } 166 | 167 | sub overrideDefault 168 | { 169 | #----- 170 | # Set and override default values for parameters 171 | # 172 | my ($default_value, $option_name) = @_; 173 | if(exists $global_options->{$option_name}) 174 | { 175 | return $global_options->{$option_name}; 176 | } 177 | return $default_value; 178 | } 179 | 180 | __DATA__ 181 | 182 | =head1 NAME 183 | 184 | vprobes.generateprobes.pl 185 | 186 | =head1 COPYRIGHT 187 | 188 | copyright (C) 2012 Mads Albertsen 189 | 190 | This program is free software: you can redistribute it and/or modify 191 | it under the terms of the GNU General Public License as published by 192 | the Free Software Foundation, either version 3 of the License, or 193 | (at your option) any later version. 194 | 195 | This program is distributed in the hope that it will be useful, 196 | but WITHOUT ANY WARRANTY; without even the implied warranty of 197 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 198 | GNU General Public License for more details. 199 | 200 | You should have received a copy of the GNU General Public License 201 | along with this program. If not, see . 202 | 203 | =head1 DESCRIPTION 204 | 205 | 206 | 207 | =head1 SYNOPSIS 208 | 209 | script.pl -i [-h] 210 | 211 | [-help -h] Displays this basic usage information 212 | [-inputfile -i] Inputfile. 213 | [-outputfile -o] Outputfile. 214 | [-filetype -q] Fasta or fastq input (flag, default fasta) 215 | 216 | =cut -------------------------------------------------------------------------------- /split.assembly.bins.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | ############################################################################### 3 | # 4 | # split.assembly.bins.pl 5 | # 6 | # Splits a fasta file into different fasta files based on a binning file 7 | # The binning need to be scaffoldname "tab" bin. 8 | # 9 | # Copyright (C) 2012 Mads Albertsen 10 | # 11 | # This program is free software: you can redistribute it and/or modify 12 | # it under the terms of the GNU General Public License as published by 13 | # the Free Software Foundation, either version 3 of the License, or 14 | # (at your option) any later version. 15 | # 16 | # This program is distributed in the hope that it will be useful, 17 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 18 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19 | # GNU General Public License for more details. 20 | # 21 | # You should have received a copy of the GNU General Public License 22 | # along with this program. If not, see . 23 | # 24 | ############################################################################### 25 | 26 | #pragmas 27 | use strict; 28 | use warnings; 29 | 30 | #core Perl modules 31 | use Getopt::Long; 32 | 33 | #locally-written modules 34 | BEGIN { 35 | select(STDERR); 36 | $| = 1; 37 | select(STDOUT); 38 | $| = 1; 39 | } 40 | 41 | # get input params 42 | my $global_options = checkParams(); 43 | 44 | my $inputfile; 45 | my $inbins; 46 | 47 | $inputfile = &overrideDefault("inputfile.fasta",'inputfile'); 48 | $inbins = &overrideDefault("inbins.tab",'inbins'); 49 | 50 | 51 | my $header; 52 | my $seq; 53 | my $prevheader; 54 | my $count = 0; 55 | my %bins; 56 | my %contigs; 57 | my %sequences; 58 | 59 | ###################################################################### 60 | # CODE HERE 61 | ###################################################################### 62 | 63 | 64 | open(INbins, $inbins) or die("Cannot read file: $inputfile\n"); 65 | open(INfasta, $inputfile) or die("Cannot read file: $inputfile\n"); 66 | #my $outputfile; 67 | #open(OUT, ">$outputfile") or die("Cannot create file: $outputfile\n"); 68 | 69 | while (my $line = ) { #Read in the bins 70 | chomp $line; 71 | my @splitline = split(/\t/, $line); 72 | $contigs{$splitline[0]} = $splitline[1]; 73 | if (exists($bins{$splitline[1]})){ 74 | $bins{$splitline[1]}++; 75 | } 76 | else{ 77 | $bins{$splitline[1]} = 1; 78 | } 79 | } 80 | close INbins; 81 | 82 | while (my $line = ) { #Read in the sequences 83 | chomp $line; 84 | if ($line =~ m/>/) { 85 | $header = $line; 86 | if ($count > 0){ 87 | $sequences{$prevheader} = $seq; 88 | } 89 | $count++; 90 | $seq = ""; 91 | $prevheader = $header; 92 | } 93 | else{ 94 | $seq = $seq.$line; 95 | } 96 | } 97 | $sequences{$prevheader} = $seq; #To catch the last sequence.. 98 | close INfasta; 99 | 100 | foreach my $bin (sort keys %bins) { 101 | my $outputfile = "bin.".$bin.".fasta"; 102 | open(OUT, ">$outputfile") or die("Cannot create file: $outputfile\n"); 103 | my $count1 = 0; 104 | my $length = 0; 105 | foreach my $contig (keys %contigs){ 106 | if ($bin eq $contigs{$contig}){ 107 | print OUT "$contig\n"; 108 | print OUT "$sequences{$contig}\n"; 109 | $count1++; 110 | $length = $length + length($sequences{$contig}); 111 | } 112 | } 113 | print "bin:$bin\tsequences:$count1\tLength:$length\n"; 114 | close OUT; 115 | } 116 | 117 | 118 | 119 | ###################################################################### 120 | # TEMPLATE SUBS 121 | ###################################################################### 122 | sub checkParams { 123 | #----- 124 | # Do any and all options checking here... 125 | # 126 | my @standard_options = ( "help|h+", "inputfile|i:s", "inbins|b:s"); 127 | my %options; 128 | 129 | # Add any other command line options, and the code to handle them 130 | # 131 | GetOptions( \%options, @standard_options ); 132 | 133 | #if no arguments supplied print the usage and exit 134 | # 135 | exec("pod2usage $0") if (0 == (keys (%options) )); 136 | 137 | # If the -help option is set, print the usage and exit 138 | # 139 | exec("pod2usage $0") if $options{'help'}; 140 | 141 | # Compulsosy items 142 | #if(!exists $options{'infile'} ) { print "**ERROR: $0 : \n"; exec("pod2usage $0"); } 143 | 144 | return \%options; 145 | } 146 | 147 | sub overrideDefault 148 | { 149 | #----- 150 | # Set and override default values for parameters 151 | # 152 | my ($default_value, $option_name) = @_; 153 | if(exists $global_options->{$option_name}) 154 | { 155 | return $global_options->{$option_name}; 156 | } 157 | return $default_value; 158 | } 159 | 160 | __DATA__ 161 | 162 | =head1 NAME 163 | 164 | split.assembly.bins.pl 165 | 166 | =head1 COPYRIGHT 167 | 168 | copyright (C) 2012 Mads Albertsen 169 | 170 | This program is free software: you can redistribute it and/or modify 171 | it under the terms of the GNU General Public License as published by 172 | the Free Software Foundation, either version 3 of the License, or 173 | (at your option) any later version. 174 | 175 | This program is distributed in the hope that it will be useful, 176 | but WITHOUT ANY WARRANTY; without even the implied warranty of 177 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 178 | GNU General Public License for more details. 179 | 180 | You should have received a copy of the GNU General Public License 181 | along with this program. If not, see . 182 | 183 | =head1 DESCRIPTION 184 | 185 | 186 | 187 | =head1 SYNOPSIS 188 | 189 | script.pl -i -b [-h] 190 | 191 | [-help -h] Displays this basic usage information 192 | [-inputfile -i] Input fasta file. 193 | [-inbins -b] Tab seperated binfile (format: name tab bin) 194 | 195 | =cut -------------------------------------------------------------------------------- /multi.sam.to.coverage.profile.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | ############################################################################### 3 | # 4 | # multi.sam.to.count.profile.pl 5 | # 6 | # Short description 7 | # 8 | # Copyright (C) 2014 Mads Albertsen 9 | # 10 | # This program is free software: you can redistribute it and/or modify 11 | # it under the terms of the GNU General Public License as published by 12 | # the Free Software Foundation, either version 3 of the License, or 13 | # (at your option) any later version. 14 | # 15 | # This program is distributed in the hope that it will be useful, 16 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 | # GNU General Public License for more details. 19 | # 20 | # You should have received a copy of the GNU General Public License 21 | # along with this program. If not, see . 22 | # 23 | ############################################################################### 24 | 25 | #pragmas 26 | use strict; 27 | use warnings; 28 | 29 | #core Perl modules 30 | use Getopt::Long; 31 | 32 | #locally-written modules 33 | BEGIN { 34 | select(STDERR); 35 | $| = 1; 36 | select(STDOUT); 37 | $| = 1; 38 | } 39 | 40 | # get input params 41 | my $global_options = checkParams(); 42 | 43 | my $inputfile; 44 | my $outputfile; 45 | 46 | $inputfile = &overrideDefault("inputfile.txt",'inputfile'); 47 | $outputfile = &overrideDefault("outputfile.txt",'outputfile'); 48 | 49 | my %length; 50 | my %coverage; 51 | my %samples; 52 | my $description; 53 | 54 | 55 | ###################################################################### 56 | # CODE HERE 57 | ###################################################################### 58 | 59 | 60 | open(IN, $inputfile) or die("Cannot read file: $inputfile\n"); 61 | open(OUT, ">$outputfile") or die("Cannot create file: $outputfile\n"); 62 | 63 | while (my $line = ) { 64 | chomp $line; 65 | my @splitline = split(/\t/, $line); 66 | if ($line =~ m/\@SQ/) { #if we are in the contig header area then retrive all contigs/scaffolds and store the name and length in the hash: contig 67 | my @contigname = split(/:/, $splitline[1]); #Retrive the contig name 68 | my @contiglength = split(/:/, $splitline[2]); #Retrive the contig length 69 | $length{$contigname[1]} = $contiglength[1]; #Make a hash with key = "contig name" and value = "contig length" 70 | } 71 | else { 72 | if ($line !~ m/(\@PG|\@HD|\@SQ|\@RG)/) { 73 | my @samplename = split(/:/, $splitline[0]); 74 | my $sample = "$samplename[0]:$samplename[1]:$samplename[2]:$samplename[3]:$samplename[9]"; 75 | $samples{$sample} = 1; 76 | my $contig = $splitline[2]; 77 | if (exists($coverage{$contig}{$sample})){ 78 | $coverage{$contig}{$sample}++; 79 | } 80 | else{ 81 | $coverage{$contig}{$sample} = 1; 82 | } 83 | } 84 | } 85 | } 86 | 87 | $description = "Contig"; 88 | foreach my $sample (keys %samples){ 89 | $description = "$description\t$sample" 90 | } 91 | print OUT "$description\n"; 92 | 93 | foreach my $contig (keys %length){ 94 | my $count = "$contig"; 95 | foreach my $sample (keys %samples){ 96 | if (exists($coverage{$contig}{$sample})){ 97 | $count = "$count\t$coverage{$contig}{$sample}"; 98 | } 99 | else{ 100 | $count = "$count\t0"; 101 | } 102 | } 103 | print OUT "$count\n"; 104 | } 105 | 106 | close IN; 107 | close OUT; 108 | 109 | ###################################################################### 110 | # TEMPLATE SUBS 111 | ###################################################################### 112 | sub checkParams { 113 | #----- 114 | # Do any and all options checking here... 115 | # 116 | my @standard_options = ( "help|h+", "inputfile|i:s", "outputfile|o:s"); 117 | my %options; 118 | 119 | # Add any other command line options, and the code to handle them 120 | # 121 | GetOptions( \%options, @standard_options ); 122 | 123 | #if no arguments supplied print the usage and exit 124 | # 125 | exec("pod2usage $0") if (0 == (keys (%options) )); 126 | 127 | # If the -help option is set, print the usage and exit 128 | # 129 | exec("pod2usage $0") if $options{'help'}; 130 | 131 | # Compulsosy items 132 | #if(!exists $options{'infile'} ) { print "**ERROR: $0 : \n"; exec("pod2usage $0"); } 133 | 134 | return \%options; 135 | } 136 | 137 | sub overrideDefault 138 | { 139 | #----- 140 | # Set and override default values for parameters 141 | # 142 | my ($default_value, $option_name) = @_; 143 | if(exists $global_options->{$option_name}) 144 | { 145 | return $global_options->{$option_name}; 146 | } 147 | return $default_value; 148 | } 149 | 150 | __DATA__ 151 | 152 | =head1 NAME 153 | 154 | vprobes.generateprobes.pl 155 | 156 | =head1 COPYRIGHT 157 | 158 | copyright (C) 2012 Mads Albertsen 159 | 160 | This program is free software: you can redistribute it and/or modify 161 | it under the terms of the GNU General Public License as published by 162 | the Free Software Foundation, either version 3 of the License, or 163 | (at your option) any later version. 164 | 165 | This program is distributed in the hope that it will be useful, 166 | but WITHOUT ANY WARRANTY; without even the implied warranty of 167 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 168 | GNU General Public License for more details. 169 | 170 | You should have received a copy of the GNU General Public License 171 | along with this program. If not, see . 172 | 173 | =head1 DESCRIPTION 174 | 175 | 176 | 177 | =head1 SYNOPSIS 178 | 179 | script.pl -i [-h] 180 | 181 | [-help -h] Displays this basic usage information 182 | [-inputfile -i] SAM file with mappings from multiple datasets. 183 | [-outputfile -o] Count file split on genes and dataset. 184 | 185 | =cut -------------------------------------------------------------------------------- /summarize.bin.stats.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | ############################################################################### 3 | # 4 | # scriptname 5 | # 6 | # Short description 7 | # 8 | # Copyright (C) 2012 Mads Albertsen 9 | # 10 | # This program is free software: you can redistribute it and/or modify 11 | # it under the terms of the GNU General Public License as published by 12 | # the Free Software Foundation, either version 3 of the License, or 13 | # (at your option) any later version. 14 | # 15 | # This program is distributed in the hope that it will be useful, 16 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 | # GNU General Public License for more details. 19 | # 20 | # You should have received a copy of the GNU General Public License 21 | # along with this program. If not, see . 22 | # 23 | ############################################################################### 24 | 25 | #pragmas 26 | use strict; 27 | use warnings; 28 | 29 | #core Perl modules 30 | use Getopt::Long; 31 | 32 | #locally-written modules 33 | BEGIN { 34 | select(STDERR); 35 | $| = 1; 36 | select(STDOUT); 37 | $| = 1; 38 | } 39 | 40 | # get input params 41 | my $global_options = checkParams(); 42 | 43 | my $inputfile; 44 | my $essfile; 45 | my $ess31file; 46 | my $outputfile; 47 | my $length; 48 | my $hmm; 49 | 50 | $inputfile = &overrideDefault("inputfile.txt",'inputfile'); 51 | $essfile = &overrideDefault("essfile.txt",'essfile'); 52 | $ess31file = &overrideDefault("ess31file.txt",'ess31file'); 53 | $outputfile = &overrideDefault("outputfile.txt",'outputfile'); 54 | $length = &overrideDefault(3,'length'); 55 | $hmm = &overrideDefault(8,'hmm'); 56 | 57 | my $linecount = 0; 58 | my $sumlength = 0; 59 | my %outcontigs; 60 | my %sumhmm; 61 | 62 | 63 | ###################################################################### 64 | # CODE HERE 65 | ###################################################################### 66 | 67 | 68 | open(IN, $inputfile) or die("Cannot read file: $inputfile\n"); 69 | open(ESS, $essfile) or die("Cannot read file: $essfile\n"); 70 | open(OUT, ">$outputfile") or die("Cannot create file: $outputfile\n"); 71 | 72 | while ( my $line = ) { 73 | chomp $line; 74 | if ($linecount!=0){ 75 | my @splitline = split(/"/,$line); 76 | $outcontigs{$splitline[1]} = 1; 77 | my @splitline2 = split(/ /,$splitline[2]); 78 | $sumlength+= $splitline2[$length]; 79 | } 80 | $linecount++; 81 | } 82 | print OUT "#Contings\t$linecount\n"; 83 | print OUT "Total Length\t$sumlength\n"; 84 | 85 | $linecount = 0; 86 | 87 | 88 | while ( my $line = ) { 89 | chomp $line; 90 | if ($linecount!=0){ 91 | my @splitline = split(/\t/,$line); 92 | if (exists($outcontigs{$splitline[0]})){ 93 | if (exists($sumhmm{$splitline[$hmm]})){ 94 | $sumhmm{$splitline[$hmm]}++; 95 | } 96 | else{ 97 | $sumhmm{$splitline[$hmm]} = 1; 98 | } 99 | } 100 | } 101 | $linecount++; 102 | } 103 | 104 | $linecount = 0; 105 | my $summedhmms = 0; 106 | 107 | foreach my $key (keys %sumhmm){ 108 | $linecount++; 109 | $summedhmms += $sumhmm{$key}; 110 | } 111 | 112 | print OUT "Total Unique Hmms\t$linecount\n"; 113 | print OUT "Total Hmms\t$summedhmms\n"; 114 | 115 | 116 | foreach my $key (sort {$sumhmm{$b} <=> $sumhmm{$a}} keys %sumhmm){ 117 | print OUT "$key\t$sumhmm{$key}\n"; 118 | } 119 | 120 | close ESS; 121 | close IN; 122 | close OUT; 123 | 124 | ###################################################################### 125 | # TEMPLATE SUBS 126 | ###################################################################### 127 | sub checkParams { 128 | #----- 129 | # Do any and all options checking here... 130 | # 131 | my @standard_options = ( "help|h+", "inputfile|i:s", "outputfile|o:s","essfile|e:s","ess31file|b:s","length|l:s","hmm|h:s"); 132 | my %options; 133 | 134 | # Add any other command line options, and the code to handle them 135 | # 136 | GetOptions( \%options, @standard_options ); 137 | 138 | #if no arguments supplied print the usage and exit 139 | # 140 | exec("pod2usage $0") if (0 == (keys (%options) )); 141 | 142 | # If the -help option is set, print the usage and exit 143 | # 144 | exec("pod2usage $0") if $options{'help'}; 145 | 146 | # Compulsosy items 147 | #if(!exists $options{'infile'} ) { print "**ERROR: $0 : \n"; exec("pod2usage $0"); } 148 | 149 | return \%options; 150 | } 151 | 152 | sub overrideDefault 153 | { 154 | #----- 155 | # Set and override default values for parameters 156 | # 157 | my ($default_value, $option_name) = @_; 158 | if(exists $global_options->{$option_name}) 159 | { 160 | return $global_options->{$option_name}; 161 | } 162 | return $default_value; 163 | } 164 | 165 | __DATA__ 166 | 167 | =head1 NAME 168 | 169 | vprobes.generateprobes.pl 170 | 171 | =head1 COPYRIGHT 172 | 173 | copyright (C) 2012 Mads Albertsen 174 | 175 | This program is free software: you can redistribute it and/or modify 176 | it under the terms of the GNU General Public License as published by 177 | the Free Software Foundation, either version 3 of the License, or 178 | (at your option) any later version. 179 | 180 | This program is distributed in the hope that it will be useful, 181 | but WITHOUT ANY WARRANTY; without even the implied warranty of 182 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 183 | GNU General Public License for more details. 184 | 185 | You should have received a copy of the GNU General Public License 186 | along with this program. If not, see . 187 | 188 | =head1 DESCRIPTION 189 | 190 | 191 | 192 | =head1 SYNOPSIS 193 | 194 | script.pl -i [-h] 195 | 196 | [-help -h] Displays this basic usage information 197 | [-inputfile -i] Inputfile 198 | [-outputfile -o] Outputfile 199 | [-essfile -e] Essential gene file 200 | [-ess31file -b] File with the 31 ess genes 201 | [-length -l ] Column with lenght stats 202 | [-hmm -h] Column with hmm identifier 203 | 204 | =cut -------------------------------------------------------------------------------- /calc.dnds.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | ############################################################################### 3 | # 4 | # calc.dnds.pl 5 | # 6 | # Calculates pairwise dn/ds ratios of all input sequences. Note: the sequences 7 | # have to be alligned and in fasta format. In addition its assumed they start in 8 | # frame. 9 | # 10 | # Copyright (C) 2012 Mads Albertsen 11 | # 12 | # This program is free software: you can redistribute it and/or modify 13 | # it under the terms of the GNU General Public License as published by 14 | # the Free Software Foundation, either version 3 of the License, or 15 | # (at your option) any later version. 16 | # 17 | # This program is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 20 | # GNU General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU General Public License 23 | # along with this program. If not, see . 24 | # 25 | ############################################################################### 26 | 27 | #pragmas 28 | use strict; 29 | use warnings; 30 | 31 | #core Perl modules 32 | use Getopt::Long; 33 | 34 | #Bioperl modules 35 | use Bio::Seq; 36 | use Bio::AlignIO; 37 | use BIO::Align::DNAStatistics; 38 | 39 | #locally-written modules 40 | BEGIN { 41 | select(STDERR); 42 | $| = 1; 43 | select(STDOUT); 44 | $| = 1; 45 | } 46 | 47 | # get input params 48 | my $global_options = checkParams(); 49 | 50 | my $inputfile; 51 | my $tempheader; 52 | my %header; 53 | my %sequence; 54 | $inputfile = &overrideDefault("inputfile.txt",'inputfile'); 55 | 56 | ###################################################################### 57 | # CODE HERE 58 | ###################################################################### 59 | 60 | # use Bio::AlignIO to read in the alignment 61 | my $str = Bio::AlignIO->new('-file' => $inputfile); #To get the length of the sequences. (Assumes that tey are all equal length) 62 | my $aln = $str->next_aln(); 63 | my $seqlength = $aln->length/3; 64 | 65 | my $stats = Bio::Align::DNAStatistics->new(); #Create the stats variable. 66 | my $in = Bio::AlignIO->new(-format => 'fasta', -file => $inputfile); #Add the input fasta file. 67 | my $alnobj = $in->next_aln; #Add objects that need to be aligned. By taking each sequence from the in file one by one. 68 | my ($seq1id,$seq2id) = map { $_->display_id } $alnobj->each_seq; #create the combinations of sequences that need to be compared. 69 | my $results = $stats->calc_all_KaKs_pairs($alnobj); #Calculate all stats. 70 | my %Nd; 71 | my %Sd; 72 | 73 | for my $an (@$results){ #Get the results that are needed for the output 74 | for (sort keys %$an ){ 75 | next if /Seq/; 76 | if ($_ eq "N_d"){ 77 | my $tempcomp = $an->{'Seq1'}." vs ".$an->{'Seq2'}; 78 | $Nd{$tempcomp} = $an->{$_}; 79 | } 80 | if ($_ eq "S_d"){ 81 | my $tempcomp = $an->{'Seq1'}." vs ".$an->{'Seq2'}; 82 | $Sd{$tempcomp} = $an->{$_}; 83 | } 84 | } 85 | } 86 | 87 | print "comparison\tAA.length\tdN/dS\t#dN\t#dS\t%AA.sim\n"; 88 | 89 | foreach my $key (sort keys %Nd){ 90 | my $tempcalc = $Nd{$key}/$Sd{$key}; 91 | my $tempcalc2 = (1-$Nd{$key}/$seqlength)*100; 92 | print "$key\t$seqlength\t",sprintf("%.2f",$tempcalc),"\t",sprintf("%.0f",$Nd{$key}),"\t",sprintf("%.0f",$Sd{$key}),"\t",sprintf("%.1f",$tempcalc2),"\n"; 93 | } 94 | 95 | ###################################################################### 96 | # TEMPLATE SUBS 97 | ###################################################################### 98 | sub checkParams { 99 | #----- 100 | # Do any and all options checking here... 101 | # 102 | my @standard_options = ( "help|h+", "inputfile|i:s", "outputfile|o:s"); 103 | my %options; 104 | 105 | # Add any other command line options, and the code to handle them 106 | # 107 | GetOptions( \%options, @standard_options ); 108 | 109 | #if no arguments supplied print the usage and exit 110 | # 111 | exec("pod2usage $0") if (0 == (keys (%options) )); 112 | 113 | # If the -help option is set, print the usage and exit 114 | # 115 | exec("pod2usage $0") if $options{'help'}; 116 | 117 | # Compulsosy items 118 | #if(!exists $options{'infile'} ) { print "**ERROR: $0 : \n"; exec("pod2usage $0"); } 119 | 120 | return \%options; 121 | } 122 | 123 | sub overrideDefault 124 | { 125 | #----- 126 | # Set and override default values for parameters 127 | # 128 | my ($default_value, $option_name) = @_; 129 | if(exists $global_options->{$option_name}) 130 | { 131 | return $global_options->{$option_name}; 132 | } 133 | return $default_value; 134 | } 135 | 136 | __DATA__ 137 | 138 | =head1 NAME 139 | 140 | calc.dsdn.pl 141 | 142 | =head1 COPYRIGHT 143 | 144 | copyright (C) 2012 Mads Albertsen 145 | 146 | This program is free software: you can redistribute it and/or modify 147 | it under the terms of the GNU General Public License as published by 148 | the Free Software Foundation, either version 3 of the License, or 149 | (at your option) any later version. 150 | 151 | This program is distributed in the hope that it will be useful, 152 | but WITHOUT ANY WARRANTY; without even the implied warranty of 153 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 154 | GNU General Public License for more details. 155 | 156 | You should have received a copy of the GNU General Public License 157 | along with this program. If not, see . 158 | 159 | =head1 DESCRIPTION 160 | 161 | 162 | 163 | =head1 SYNOPSIS 164 | 165 | script.pl -i [-h] 166 | 167 | [-help -h] Displays this basic usage information 168 | [-inputfile -i] Aligned 'in frame' fasta file. 169 | 170 | =cut -------------------------------------------------------------------------------- /cytoscape.extract.sub.graph.using.list.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | ############################################################################### 3 | # 4 | # cytoscape.extract.sub.graph.using.list.pl 5 | # 6 | # Given a list of nodes extracts all parts of the relating graph in a 7 | # cytoscape connection file (nodes in column 0 and 2). 8 | # 9 | # Copyright (C) 2012 Mads Albertsen 10 | # 11 | # This program is free software: you can redistribute it and/or modify 12 | # it under the terms of the GNU General Public License as published by 13 | # the Free Software Foundation, either version 3 of the License, or 14 | # (at your option) any later version. 15 | # 16 | # This program is distributed in the hope that it will be useful, 17 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 18 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19 | # GNU General Public License for more details. 20 | # 21 | # You should have received a copy of the GNU General Public License 22 | # along with this program. If not, see . 23 | # 24 | ############################################################################### 25 | 26 | #pragmas 27 | use strict; 28 | use warnings; 29 | 30 | #core Perl modules 31 | use Getopt::Long; 32 | 33 | #locally-written modules 34 | BEGIN { 35 | select(STDERR); 36 | $| = 1; 37 | select(STDOUT); 38 | $| = 1; 39 | } 40 | 41 | # get input params 42 | my $global_options = checkParams(); 43 | 44 | my $inconnections; 45 | my $inlist; 46 | 47 | $inconnections = &overrideDefault("incon.txt",'inconnections'); 48 | $inlist = &overrideDefault("inlist.txt",'inlist'); 49 | 50 | my %contigs; 51 | my %printed; 52 | 53 | ###################################################################### 54 | # CODE HERE 55 | ###################################################################### 56 | 57 | 58 | open(INlist, $inlist) or die("Cannot read file: $inlist\n"); 59 | open(INcon, $inconnections) or die("Cannot read file: $inconnections\n"); 60 | open(OUT, ">$inlist.sub.txt") or die("Cannot create file: $inlist.sub.txt\n"); 61 | open(OUTsub, ">$inconnections.sub.txt") or die("Cannot create file: $inconnections.sub.txt\n"); 62 | open(OUTorg, ">$inlist.orginal.paint.cyto.txt") or die("Cannot create file: $inlist.orginal.paint.cyto.txt\n"); 63 | 64 | print OUTsub "node1\tinteraction\tnode2\tconnections\n"; 65 | print OUTorg "OrgScaffolds\n"; 66 | 67 | while ( my $line = ) { 68 | chomp $line; 69 | $contigs{$line} = 1; 70 | print OUTorg "$line = 1\n"; 71 | } 72 | 73 | 74 | close OUTorg; 75 | 76 | my $newfound = 1; 77 | my $count = 0; 78 | 79 | while ($newfound == 1){ 80 | $newfound = 0; 81 | $count++; 82 | print "Pass $count\n"; 83 | while ( my $line = ) { 84 | chomp $line; 85 | my @splitline = split("\t",$line); 86 | if (exists($contigs{$splitline[0]}) and !exists($contigs{$splitline[2]}) ){ 87 | $contigs{$splitline[2]} = 1; 88 | $newfound = 1; 89 | } 90 | if (exists($contigs{$splitline[2]}) and !exists($contigs{$splitline[0]}) ){ 91 | $contigs{$splitline[0]} = 1; 92 | $newfound = 1; 93 | } 94 | } 95 | seek(INcon,0,0); 96 | } 97 | 98 | foreach my $key (keys %contigs){ 99 | print OUT "$key\n"; 100 | } 101 | 102 | while ( my $line = ) { 103 | chomp $line; 104 | my @splitline = split("\t",$line); 105 | if (exists($contigs{$splitline[0]})){ 106 | print OUTsub "$line\n"; 107 | $printed{$splitline[0]} = 1; 108 | $printed{$splitline[2]} = 1; 109 | } 110 | } 111 | 112 | seek(INlist,0,0); 113 | while ( my $line = ) { 114 | chomp $line; 115 | if (!exists($printed{$line})){ 116 | print OUTsub "$line\n"; 117 | } 118 | } 119 | 120 | 121 | close INcon; 122 | close OUT; 123 | close OUTsub; 124 | close INlist; 125 | 126 | ###################################################################### 127 | # TEMPLATE SUBS 128 | ###################################################################### 129 | sub checkParams { 130 | #----- 131 | # Do any and all options checking here... 132 | # 133 | my @standard_options = ( "help|h+", "inlist|l:s", "inconnections|c:s"); 134 | my %options; 135 | 136 | # Add any other command line options, and the code to handle them 137 | # 138 | GetOptions( \%options, @standard_options ); 139 | 140 | #if no arguments supplied print the usage and exit 141 | # 142 | exec("pod2usage $0") if (0 == (keys (%options) )); 143 | 144 | # If the -help option is set, print the usage and exit 145 | # 146 | exec("pod2usage $0") if $options{'help'}; 147 | 148 | # Compulsosy items 149 | #if(!exists $options{'infile'} ) { print "**ERROR: $0 : \n"; exec("pod2usage $0"); } 150 | 151 | return \%options; 152 | } 153 | 154 | sub overrideDefault 155 | { 156 | #----- 157 | # Set and override default values for parameters 158 | # 159 | my ($default_value, $option_name) = @_; 160 | if(exists $global_options->{$option_name}) 161 | { 162 | return $global_options->{$option_name}; 163 | } 164 | return $default_value; 165 | } 166 | 167 | __DATA__ 168 | 169 | =head1 NAME 170 | 171 | vprobes.generateprobes.pl 172 | 173 | =head1 COPYRIGHT 174 | 175 | copyright (C) 2012 Mads Albertsen 176 | 177 | This program is free software: you can redistribute it and/or modify 178 | it under the terms of the GNU General Public License as published by 179 | the Free Software Foundation, either version 3 of the License, or 180 | (at your option) any later version. 181 | 182 | This program is distributed in the hope that it will be useful, 183 | but WITHOUT ANY WARRANTY; without even the implied warranty of 184 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 185 | GNU General Public License for more details. 186 | 187 | You should have received a copy of the GNU General Public License 188 | along with this program. If not, see . 189 | 190 | =head1 DESCRIPTION 191 | 192 | 193 | 194 | =head1 SYNOPSIS 195 | 196 | script.pl -i [-h] 197 | 198 | [-help -h] Displays this basic usage information 199 | [-inlist -l] List of nodes in subgraph to extract. 200 | [-inconnections -c] Cytoscape connection file. 201 | 202 | =cut -------------------------------------------------------------------------------- /extract.fastq.pe.reads.using.single.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | ############################################################################### 3 | # 4 | # extract.pe.reads.using.single.pl 5 | # 6 | # Given a list of single reads it extracts the PE reads from 2 PE fastq files. 7 | # 8 | # Copyright (C) 2012 Mads Albertsen 9 | # 10 | # This program is free software: you can redistribute it and/or modify 11 | # it under the terms of the GNU General Public License as published by 12 | # the Free Software Foundation, either version 3 of the License, or 13 | # (at your option) any later version. 14 | # 15 | # This program is distributed in the hope that it will be useful, 16 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 | # GNU General Public License for more details. 19 | # 20 | # You should have received a copy of the GNU General Public License 21 | # along with this program. If not, see . 22 | # 23 | ############################################################################### 24 | 25 | #pragmas 26 | use strict; 27 | use warnings; 28 | 29 | #core Perl modules 30 | use Getopt::Long; 31 | 32 | #locally-written modules 33 | BEGIN { 34 | select(STDERR); 35 | $| = 1; 36 | select(STDOUT); 37 | $| = 1; 38 | } 39 | 40 | # get input params 41 | my $global_options = checkParams(); 42 | 43 | 44 | my $insingle; 45 | my $inread; 46 | my $splitheader; 47 | my $splitfasta; 48 | my $print = 0; 49 | my $linecount = 0; 50 | 51 | $inread = &overrideDefault("paired.fa",'inread'); 52 | $insingle = &overrideDefault("single.fa",'insingle'); 53 | $splitheader = &overrideDefault(" ",'splitheader'); 54 | $splitfasta = &overrideDefault("_",'splitfasta'); 55 | 56 | my $seq = ""; 57 | my $readfound = 0; 58 | my $toextract = 0; 59 | my $extracted = 0; 60 | my %reads; 61 | my $count = 0; 62 | 63 | ###################################################################### 64 | # CODE HERE 65 | ###################################################################### 66 | 67 | open(INsingle, $insingle) or die("Cannot read file: $insingle\n"); #First read in all headers in the read 1 file that need to be matched in the read 2 file. 68 | 69 | while ( my $line = ) { 70 | chomp $line; 71 | if ($line =~ m/>/) { 72 | my @splitline = split(/$splitfasta/,$line); 73 | my @splitline1 = split(/>/,$splitline[0]); 74 | $reads{$splitline1[1]} = 1; 75 | $toextract++; 76 | } 77 | } 78 | print "Found $toextract single reads.\n"; 79 | close INsingle; 80 | 81 | open(OUT, ">paired.sub.fa") or die("Cannot create file: paired.sub.fa\n"); 82 | open(INread, $inread) or die("Cannot read file: $inread\n"); 83 | 84 | while (my $line = ) { #Look for matching read1 headers in the read2 file. 85 | chomp $line; 86 | if ($count == 0){ 87 | my @splitline = split(/$splitheader/,$line); 88 | my @splitline1 = split(/\@/,$splitline[0]); 89 | if (exists($reads{$splitline1[1]})){ 90 | $print = 1; 91 | $extracted++; 92 | } 93 | else{ 94 | $print = 0; 95 | } 96 | } 97 | if ($print == 1){ 98 | print OUT "$line\n"; 99 | } 100 | $count++; 101 | if ($count == 4){ 102 | $count = 0; 103 | } 104 | } 105 | print "Extracted $extracted reads.\n"; 106 | close INread; 107 | close OUT; 108 | 109 | ###################################################################### 110 | # TEMPLATE SUBS 111 | ###################################################################### 112 | sub checkParams { 113 | #----- 114 | # Do any and all options checking here... 115 | # 116 | my @standard_options = ( "help|h+", "inread|p:s","splitheader|x:s","insingle|s:s","splitfasta|y:s"); 117 | my %options; 118 | 119 | # Add any other command line options, and the code to handle them 120 | # 121 | GetOptions( \%options, @standard_options ); 122 | 123 | #if no arguments supplied print the usage and exit 124 | # 125 | exec("pod2usage $0") if (0 == (keys (%options) )); 126 | 127 | # If the -help option is set, print the usage and exit 128 | # 129 | exec("pod2usage $0") if $options{'help'}; 130 | 131 | # Compulsosy items 132 | #if(!exists $options{'infile'} ) { print "**ERROR: $0 : \n"; exec("pod2usage $0"); } 133 | 134 | return \%options; 135 | } 136 | 137 | sub overrideDefault 138 | { 139 | #----- 140 | # Set and override default values for parameters 141 | # 142 | my ($default_value, $option_name) = @_; 143 | if(exists $global_options->{$option_name}) 144 | { 145 | return $global_options->{$option_name}; 146 | } 147 | return $default_value; 148 | } 149 | 150 | __DATA__ 151 | 152 | =head1 NAME 153 | 154 | extract.read2.using.read1.pl 155 | 156 | =head1 COPYRIGHT 157 | 158 | copyright (C) 2012 Mads Albertsen 159 | 160 | This program is free software: you can redistribute it and/or modify 161 | it under the terms of the GNU General Public License as published by 162 | the Free Software Foundation, either version 3 of the License, or 163 | (at your option) any later version. 164 | 165 | This program is distributed in the hope that it will be useful, 166 | but WITHOUT ANY WARRANTY; without even the implied warranty of 167 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 168 | GNU General Public License for more details. 169 | 170 | You should have received a copy of the GNU General Public License 171 | along with this program. If not, see . 172 | 173 | =head1 DESCRIPTION 174 | 175 | Used in digital normalization. First read1 library is digital normalized 176 | by khmer scripts and then read2 is extracted using the remaining read1 reads 177 | using this scripts. This ensures proper use of PE reads. 178 | 179 | 180 | =head1 SYNOPSIS 181 | 182 | extract.read2.using.read1.pl -f -r -s [-h -x] 183 | 184 | [-help -h] Displays this basic usage information 185 | [-inread -p] pairedreads.fa. 186 | [-insingle -s] Singlereads.fa. 187 | [-splitheader -x] Code used to split the header of the fastq files (default: " ") 188 | [-splitfasta -y] Code used to split the header of the fasta file (default: "_") 189 | 190 | =cut 191 | -------------------------------------------------------------------------------- /extract.fasta.pe.reads.using.single.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | ############################################################################### 3 | # 4 | # extract.pe.reads.using.single.pl 5 | # 6 | # Given a list of single reads it extracts the PE reads from 2 PE fastq files. 7 | # 8 | # Copyright (C) 2012 Mads Albertsen 9 | # 10 | # This program is free software: you can redistribute it and/or modify 11 | # it under the terms of the GNU General Public License as published by 12 | # the Free Software Foundation, either version 3 of the License, or 13 | # (at your option) any later version. 14 | # 15 | # This program is distributed in the hope that it will be useful, 16 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 | # GNU General Public License for more details. 19 | # 20 | # You should have received a copy of the GNU General Public License 21 | # along with this program. If not, see . 22 | # 23 | ############################################################################### 24 | 25 | #pragmas 26 | use strict; 27 | use warnings; 28 | 29 | #core Perl modules 30 | use Getopt::Long; 31 | 32 | #locally-written modules 33 | BEGIN { 34 | select(STDERR); 35 | $| = 1; 36 | select(STDOUT); 37 | $| = 1; 38 | } 39 | 40 | # get input params 41 | my $global_options = checkParams(); 42 | 43 | 44 | my $insingle; 45 | my $inread; 46 | my $splitheader; 47 | my $splitfasta; 48 | my $print = 0; 49 | my $linecount = 0; 50 | my $printcount = 0; 51 | 52 | $inread = &overrideDefault("paired.fa",'inread'); 53 | $insingle = &overrideDefault("single.fa",'insingle'); 54 | $splitheader = &overrideDefault("_",'splitheader'); 55 | $splitfasta = &overrideDefault("_",'splitfasta'); 56 | 57 | my $seq = ""; 58 | my $readfound = 0; 59 | my $toextract = 0; 60 | my $extracted = 0; 61 | my %reads; 62 | 63 | ###################################################################### 64 | # CODE HERE 65 | ###################################################################### 66 | 67 | open(INsingle, $insingle) or die("Cannot read file: $insingle\n"); #First read in all headers in the read 1 file that need to be matched in the read 2 file. 68 | 69 | while ( my $line = ) { 70 | chomp $line; 71 | if ($line =~ m/>/) { 72 | my @splitline = split(/$splitfasta/,$line); 73 | $reads{$splitline[0]} = 1; 74 | $toextract++; 75 | } 76 | } 77 | print "Found $toextract single reads.\n"; 78 | close INsingle; 79 | 80 | open(OUT, ">paired.sub.fa") or die("Cannot create file: paired.sub.fa\n"); 81 | open(INread, $inread) or die("Cannot read file: $inread\n"); 82 | 83 | while (my $line = ) { #Look for matching read1 headers in the read2 file. 84 | chomp $line; 85 | if ($line =~ m/>/){ 86 | $linecount++; 87 | $printcount++; 88 | my @splitline = split(/$splitheader/,$line); 89 | if (exists($reads{$splitline[0]})){ 90 | $print = 1; 91 | $extracted++; 92 | } 93 | else{ 94 | $print = 0; 95 | } 96 | } 97 | if ($print == 1){ 98 | print OUT "$line\n"; 99 | } 100 | if ($printcount == 1000000){ 101 | print "$linecount reads scanned - $extracted extracted\n"; 102 | $printcount = 0; 103 | } 104 | } 105 | print "Extracted $extracted of $linecount reads.\n"; 106 | close INread; 107 | close OUT; 108 | 109 | ###################################################################### 110 | # TEMPLATE SUBS 111 | ###################################################################### 112 | sub checkParams { 113 | #----- 114 | # Do any and all options checking here... 115 | # 116 | my @standard_options = ( "help|h+", "inread|p:s","splitheader|x:s","insingle|s:s","splitfasta|y:s"); 117 | my %options; 118 | 119 | # Add any other command line options, and the code to handle them 120 | # 121 | GetOptions( \%options, @standard_options ); 122 | 123 | #if no arguments supplied print the usage and exit 124 | # 125 | exec("pod2usage $0") if (0 == (keys (%options) )); 126 | 127 | # If the -help option is set, print the usage and exit 128 | # 129 | exec("pod2usage $0") if $options{'help'}; 130 | 131 | # Compulsosy items 132 | #if(!exists $options{'infile'} ) { print "**ERROR: $0 : \n"; exec("pod2usage $0"); } 133 | 134 | return \%options; 135 | } 136 | 137 | sub overrideDefault 138 | { 139 | #----- 140 | # Set and override default values for parameters 141 | # 142 | my ($default_value, $option_name) = @_; 143 | if(exists $global_options->{$option_name}) 144 | { 145 | return $global_options->{$option_name}; 146 | } 147 | return $default_value; 148 | } 149 | 150 | __DATA__ 151 | 152 | =head1 NAME 153 | 154 | extract.read2.using.read1.pl 155 | 156 | =head1 COPYRIGHT 157 | 158 | copyright (C) 2012 Mads Albertsen 159 | 160 | This program is free software: you can redistribute it and/or modify 161 | it under the terms of the GNU General Public License as published by 162 | the Free Software Foundation, either version 3 of the License, or 163 | (at your option) any later version. 164 | 165 | This program is distributed in the hope that it will be useful, 166 | but WITHOUT ANY WARRANTY; without even the implied warranty of 167 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 168 | GNU General Public License for more details. 169 | 170 | You should have received a copy of the GNU General Public License 171 | along with this program. If not, see . 172 | 173 | =head1 DESCRIPTION 174 | 175 | Used in digital normalization. First read1 library is digital normalized 176 | by khmer scripts and then read2 is extracted using the remaining read1 reads 177 | using this scripts. This ensures proper use of PE reads. 178 | 179 | 180 | =head1 SYNOPSIS 181 | 182 | extract.read2.using.read1.pl -f -r -s [-h -x] 183 | 184 | [-help -h] Displays this basic usage information 185 | [-inread -p] pairedreads.fa. 186 | [-insingle -s] Singlereads.fa. 187 | [-splitheader -x] Code used to split the header of the fastq files (default: "_") 188 | [-splitfasta -y] Code used to split the header of the fasta file (default: " ") 189 | 190 | =cut -------------------------------------------------------------------------------- /extract.read2.using.read1.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | ############################################################################### 3 | # 4 | # extract.read2.using.read1.pl 5 | # 6 | # Used in digital normalization. First read1 library is digital normalized 7 | # by khmer scripts and then read2 is extracted using the remaining read1 reads 8 | # using this scripts. This ensures proper use of PE reads. 9 | # 10 | # Copyright (C) 2012 Mads Albertsen 11 | # 12 | # This program is free software: you can redistribute it and/or modify 13 | # it under the terms of the GNU General Public License as published by 14 | # the Free Software Foundation, either version 3 of the License, or 15 | # (at your option) any later version. 16 | # 17 | # This program is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 20 | # GNU General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU General Public License 23 | # along with this program. If not, see . 24 | # 25 | ############################################################################### 26 | 27 | #pragmas 28 | use strict; 29 | use warnings; 30 | 31 | #core Perl modules 32 | use Getopt::Long; 33 | 34 | #locally-written modules 35 | BEGIN { 36 | select(STDERR); 37 | $| = 1; 38 | select(STDOUT); 39 | $| = 1; 40 | } 41 | 42 | # get input params 43 | my $global_options = checkParams(); 44 | 45 | my $inread1; 46 | my $inread2; 47 | my $outputfile; 48 | my $splitheader; 49 | my $print = 0; 50 | 51 | $inread1 = &overrideDefault("read1.fa",'inread1'); 52 | $inread2 = &overrideDefault("read2.fa",'inread2'); 53 | $outputfile = &overrideDefault("read2.normalized.fa",'outputfile'); 54 | $splitheader = &overrideDefault("\#",'splitheader'); 55 | 56 | my $seq = ""; 57 | my $readfound = 0; 58 | my %read1; 59 | 60 | ###################################################################### 61 | # CODE HERE 62 | ###################################################################### 63 | 64 | open(INread1, $inread1) or die("Cannot read file: $inread1\n"); #First read in all headers in the read 1 file that need to be matched in the read 2 file. 65 | 66 | while ( my $line = ) { 67 | chomp $line; 68 | if ($line =~ m/>/) { 69 | my @splitline = split(/$splitheader/,$line); 70 | $read1{$splitline[0]} = 1; 71 | } 72 | } 73 | 74 | close INread1; 75 | 76 | open(OUT, ">$outputfile") or die("Cannot create file: $outputfile\n"); 77 | open(INread2, $inread2) or die("Cannot read file: $inread2\n"); 78 | 79 | while (my $line = ) { #Look for matching read1 headers in the read2 file. 80 | chomp $line; 81 | if ($line =~ m/>/) { 82 | if ($readfound == 1){ 83 | print OUT "$seq\n"; 84 | } 85 | $readfound = 0; 86 | my @splitline = split(/$splitheader/,$line); 87 | if (exists($read1{$splitline[0]})){ 88 | print OUT "$line\n"; 89 | $readfound = 1; 90 | } 91 | $seq = ""; 92 | } 93 | else{ 94 | $seq = $seq.$line; 95 | } 96 | } 97 | if ($readfound == 1){ #To catch the last sequence if needed 98 | print OUT "$seq\n"; 99 | } 100 | 101 | close INread2; 102 | close OUT; 103 | 104 | ###################################################################### 105 | # TEMPLATE SUBS 106 | ###################################################################### 107 | sub checkParams { 108 | #----- 109 | # Do any and all options checking here... 110 | # 111 | my @standard_options = ( "help|h+", "inread1|i:s", "inread2|p:s","outputfile|o:s","splitheader|s:s"); 112 | my %options; 113 | 114 | # Add any other command line options, and the code to handle them 115 | # 116 | GetOptions( \%options, @standard_options ); 117 | 118 | #if no arguments supplied print the usage and exit 119 | # 120 | exec("pod2usage $0") if (0 == (keys (%options) )); 121 | 122 | # If the -help option is set, print the usage and exit 123 | # 124 | exec("pod2usage $0") if $options{'help'}; 125 | 126 | # Compulsosy items 127 | #if(!exists $options{'infile'} ) { print "**ERROR: $0 : \n"; exec("pod2usage $0"); } 128 | 129 | return \%options; 130 | } 131 | 132 | sub overrideDefault 133 | { 134 | #----- 135 | # Set and override default values for parameters 136 | # 137 | my ($default_value, $option_name) = @_; 138 | if(exists $global_options->{$option_name}) 139 | { 140 | return $global_options->{$option_name}; 141 | } 142 | return $default_value; 143 | } 144 | 145 | __DATA__ 146 | 147 | =head1 NAME 148 | 149 | extract.read2.using.read1.pl 150 | 151 | =head1 COPYRIGHT 152 | 153 | copyright (C) 2012 Mads Albertsen 154 | 155 | This program is free software: you can redistribute it and/or modify 156 | it under the terms of the GNU General Public License as published by 157 | the Free Software Foundation, either version 3 of the License, or 158 | (at your option) any later version. 159 | 160 | This program is distributed in the hope that it will be useful, 161 | but WITHOUT ANY WARRANTY; without even the implied warranty of 162 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 163 | GNU General Public License for more details. 164 | 165 | You should have received a copy of the GNU General Public License 166 | along with this program. If not, see . 167 | 168 | =head1 DESCRIPTION 169 | 170 | Used in digital normalization. First read1 library is digital normalized 171 | by khmer scripts and then read2 is extracted using the remaining read1 reads 172 | using this scripts. This ensures proper use of PE reads. 173 | 174 | 175 | =head1 SYNOPSIS 176 | 177 | extract.read2.using.read1.pl -i -p [-h -o -s] 178 | 179 | [-help -h] Displays this basic usage information 180 | [-inread1 -i] Normalized Read1.fa. 181 | [-inread2 -p] Unnormalized Read2.fa. 182 | [-outputfile -o] Outputfile, (default: read2.normalized.fa). 183 | [-splitheader -s] Code used to split the header so read1 and 2 headers become identical (default: \#) 184 | 185 | =cut -------------------------------------------------------------------------------- /cytoscape.otu.cor.matrix.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | ############################################################################### 3 | # 4 | # cytoscape.otu.cor.matrix.pl 5 | # 6 | # Short description 7 | # 8 | # Copyright (C) 2012 Mads Albertsen 9 | # 10 | # This program is free software: you can redistribute it and/or modify 11 | # it under the terms of the GNU General Public License as published by 12 | # the Free Software Foundation, either version 3 of the License, or 13 | # (at your option) any later version. 14 | # 15 | # This program is distributed in the hope that it will be useful, 16 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 | # GNU General Public License for more details. 19 | # 20 | # You should have received a copy of the GNU General Public License 21 | # along with this program. If not, see . 22 | # 23 | ############################################################################### 24 | 25 | #pragmas 26 | use strict; 27 | use warnings; 28 | 29 | #core Perl modules 30 | use Getopt::Long; 31 | 32 | #locally-written modules 33 | BEGIN { 34 | select(STDERR); 35 | $| = 1; 36 | select(STDOUT); 37 | $| = 1; 38 | } 39 | 40 | # get input params 41 | my $global_options = checkParams(); 42 | 43 | my $inputfile; 44 | my $outputfile; 45 | my $lowcor; 46 | my $highcor; 47 | my $printnocorr; 48 | 49 | $inputfile = &overrideDefault("inputfile.txt",'inputfile'); 50 | $outputfile = &overrideDefault("outputfile.txt",'outputfile'); 51 | $lowcor = &overrideDefault(0,'lowcor'); 52 | $highcor = &overrideDefault(0,'highcor'); 53 | $printnocorr = &overrideDefault(0,'printnocorr'); 54 | 55 | my @otus; 56 | my %cor; 57 | my %printedcor; 58 | my $linenr = 0; 59 | 60 | ###################################################################### 61 | # CODE HERE 62 | ###################################################################### 63 | 64 | 65 | open(IN, $inputfile) or die("Cannot read file: $inputfile\n"); 66 | open(OUT, ">$outputfile") or die("Cannot create file: $outputfile\n"); 67 | 68 | while ( my $line = ) { 69 | chomp $line; 70 | $linenr++; 71 | $line =~ s/"//g; #Remove the "" as are exported by R by default 72 | if ($linenr == 1){ #If it is the first line then extract all otu names 73 | @otus = split(" ", $line); 74 | } 75 | else{ 76 | my @tempcor = split(" ", $line); 77 | my $count = 0; 78 | foreach my $key (@otus){ 79 | $count++; 80 | my $revid = "$key\t0\t$tempcor[0]"; 81 | if (($tempcor[0] ne $key) and !exists($cor{$revid})){ #Add min max criteria when this works on simple datasets. 82 | my $id = "$tempcor[0]\t0\t$key"; 83 | $cor{$id} = $tempcor[$count]; 84 | } 85 | } 86 | } 87 | } 88 | 89 | print OUT "node1\tinteraction\tnode2\tcorrelation\tabs.correlation\n"; 90 | foreach my $key (keys %cor){ 91 | if ($cor{$key} < 0){ 92 | if ($cor{$key} < -$lowcor){ 93 | my $abscor = abs($cor{$key}); 94 | print OUT "$key\t$cor{$key}\t$abscor\n"; 95 | my @temp = split("\t",$key); 96 | $printedcor{$temp[0]} = 1; 97 | $printedcor{$temp[2]} = 1; 98 | } 99 | } 100 | else{ 101 | if ($cor{$key} > $highcor){ 102 | my $abscor = abs($cor{$key}); 103 | print OUT "$key\t$cor{$key}\t$abscor\n"; 104 | my @temp = split("\t",$key); 105 | $printedcor{$temp[0]} = 1; 106 | $printedcor{$temp[2]} = 1; 107 | } 108 | } 109 | } 110 | 111 | if ($printnocorr == 0){ 112 | foreach my $key (@otus){ 113 | if (!exists($printedcor{$key})){ 114 | print OUT "$key\n"; 115 | } 116 | } 117 | } 118 | 119 | close IN; 120 | close OUT; 121 | 122 | ###################################################################### 123 | # TEMPLATE SUBS 124 | ###################################################################### 125 | sub checkParams { 126 | #----- 127 | # Do any and all options checking here... 128 | # 129 | my @standard_options = ( "help|h+", "inputfile|i:s", "outputfile|o:s", "lowcor|l:s", "highcor|h:s","printnocorr|p:+"); 130 | my %options; 131 | 132 | # Add any other command line options, and the code to handle them 133 | # 134 | GetOptions( \%options, @standard_options ); 135 | 136 | #if no arguments supplied print the usage and exit 137 | # 138 | exec("pod2usage $0") if (0 == (keys (%options) )); 139 | 140 | # If the -help option is set, print the usage and exit 141 | # 142 | exec("pod2usage $0") if $options{'help'}; 143 | 144 | # Compulsosy items 145 | #if(!exists $options{'infile'} ) { print "**ERROR: $0 : \n"; exec("pod2usage $0"); } 146 | 147 | return \%options; 148 | } 149 | 150 | sub overrideDefault 151 | { 152 | #----- 153 | # Set and override default values for parameters 154 | # 155 | my ($default_value, $option_name) = @_; 156 | if(exists $global_options->{$option_name}) 157 | { 158 | return $global_options->{$option_name}; 159 | } 160 | return $default_value; 161 | } 162 | 163 | __DATA__ 164 | 165 | =head1 NAME 166 | 167 | vprobes.generateprobes.pl 168 | 169 | =head1 COPYRIGHT 170 | 171 | copyright (C) 2012 Mads Albertsen 172 | 173 | This program is free software: you can redistribute it and/or modify 174 | it under the terms of the GNU General Public License as published by 175 | the Free Software Foundation, either version 3 of the License, or 176 | (at your option) any later version. 177 | 178 | This program is distributed in the hope that it will be useful, 179 | but WITHOUT ANY WARRANTY; without even the implied warranty of 180 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 181 | GNU General Public License for more details. 182 | 183 | You should have received a copy of the GNU General Public License 184 | along with this program. If not, see . 185 | 186 | =head1 DESCRIPTION 187 | 188 | 189 | 190 | =head1 SYNOPSIS 191 | 192 | cytoscape.otu.cor.matrix.pl -i [-h] 193 | 194 | [-help -h] Displays this basic usage information 195 | [-inputfile -i] Inputfile 196 | [-outputfile -o] Outputfile 197 | [-lowcor -l] Negative correlation cutoff (0:1, default: 0) 198 | [-highcor -h] Positive correaltion cutoff (0:1, default: 0) 199 | [-printnocor -p] Flag to disable printing of nodes with no cor 200 | 201 | 202 | =cut --------------------------------------------------------------------------------