├── Makefile ├── README.md ├── facanon ├── facat ├── fachain ├── faclean ├── facmp ├── facstont ├── fadecimate ├── faget ├── fagrep ├── fakmer ├── falength ├── falint ├── famd5 ├── famd5seq ├── fanttocs ├── farand ├── farc ├── farenumber ├── faseperate-mates ├── fasplit-read ├── fastqtofa ├── fatoagp ├── fatofastq ├── fatoseq ├── faunamb └── faunscaffold /Makefile: -------------------------------------------------------------------------------- 1 | prefix=/usr/local 2 | bindir=$(prefix)/bin 3 | bin_SCRIPTS=\ 4 | facanon \ 5 | facat \ 6 | fachain \ 7 | faclean \ 8 | facmp \ 9 | facstont \ 10 | fadecimate \ 11 | faget \ 12 | fagrep \ 13 | fakmer \ 14 | falength \ 15 | falint \ 16 | famd5 \ 17 | famd5seq \ 18 | fanttocs \ 19 | farand \ 20 | farc \ 21 | farenumber \ 22 | faseperate-mates \ 23 | fasplit-read \ 24 | fastqtofa \ 25 | fatoagp \ 26 | fatofastq \ 27 | fatoseq \ 28 | faunamb \ 29 | faunscaffold 30 | 31 | all: 32 | 33 | clean: 34 | 35 | install: 36 | install -d $(DESTDIR)$(bindir) 37 | install $(bin_SCRIPTS) $(DESTDIR)$(bindir) 38 | 39 | uninstall: 40 | cd $(DESTDIR)$(bindir) && rm -f $(bin_SCRIPTS) 41 | 42 | .PHONY: all clean install uninstall 43 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | fastascripts – Manipulate FASTA files 2 | ===================================== 3 | 4 | These scripts were never meant to see the light of day. I apologize for the 5 | distasteful code. 6 | 7 | Utilities 8 | ========= 9 | 10 | * `facanon` 11 | Output the lexicographically smaller of the sequence and its reverse complement 12 | * `facat` 13 | Concatenate FASTA files and add a prefix to the identifier 14 | * `fachain` 15 | Merge overlapping sequences 16 | * `faclean` 17 | Reformat and optionally remove short contigs from a FASTA file 18 | * `facmp` 19 | Compare pairs of sequences 20 | * `facstont` 21 | Convert colour-space sequence to nucleotides 22 | * `fadecimate` 23 | Randomly keep 1 in every N pairs of reads 24 | * `faget` 25 | Select sequences from a FASTA file by identifier 26 | * `fagrep` 27 | Search a FASTA file using a regular expression 28 | * `fakmer` 29 | Generate tiled k-mers 30 | * `falength` 31 | Print the lengths of sequences 32 | * `falint` 33 | Check the syntax of a FASTA file 34 | * `famd5` 35 | Calculate a MD5 digest for a FASTA file 36 | * `famd5seq` 37 | Calculate a MD5 digest for each sequence 38 | * `fanttocs` 39 | Convert nucleotides to colour-space sequence 40 | * `farand` 41 | Generate a FASTA file with random sequence 42 | * `farc` 43 | Reverse and complement the sequences 44 | * `farenumber` 45 | Renumber the sequences 46 | * `faseperate-mates` 47 | Separate paired reads into two files 48 | * `fasplit-read` 49 | Split a read into two at the midpoint 50 | * `fastqtofa` 51 | Convert a FASTQ file to FASTA format 52 | * `fatoagp` 53 | Convert FASTA scaffolds to FASTA contigs and an AGP file 54 | * `fatofastq` 55 | Convert a FASTA file to a FASTQ file 56 | * `fatoseq` 57 | Remove FASTA headers 58 | * `faunamb` 59 | Convert IUPAC-IUB ambiguity codes to ACGT 60 | * `faunscaffold` 61 | Break scaffolds into contigs at Ns 62 | 63 | License 64 | ================================================================================ 65 | 66 | Copyright 2013 Shaun Jackman 67 | 68 | ### [ISC License][] 69 | 70 | Permission to use, copy, modify, and/or distribute this software for any 71 | purpose with or without fee is hereby granted, provided that the above 72 | copyright notice and this permission notice appear in all copies. 73 | 74 | [ISC License]: http://opensource.org/licenses/ISC 75 | -------------------------------------------------------------------------------- /facanon: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | # Output the lexicographically smaller of the sequence and its reverse complement 3 | # Copyright 2012 Shaun Jackman 4 | 5 | use strict; 6 | 7 | while (<>) { 8 | die unless /^>/; 9 | my $id = $_; 10 | chomp (my $seq = <>); 11 | (my $rev = reverse $seq) =~ tr/ACGTacgt/TGCAtgca/; 12 | print $id, ($seq cmp $rev) < 0 ? $seq : $rev, "\n"; 13 | } 14 | -------------------------------------------------------------------------------- /facat: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | # Concatenate FASTA files and add a prefix to the identifier 3 | # Copyright 2012 Shaun Jackman 4 | 5 | use strict; 6 | 7 | while (<>) { 8 | my $seq = <>; 9 | my $id = substr $_, 1; 10 | my $prefix = substr $ARGV, 0, 3; 11 | print '>', $prefix, '_', $id, $seq; 12 | } 13 | -------------------------------------------------------------------------------- /fachain: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | # Merge overlapping sequences 3 | # Copyright 2012 Shaun Jackman 4 | 5 | use strict; 6 | use Getopt::Std qw'getopts'; 7 | 8 | my %opt; 9 | getopts 'k:o:', \%opt; 10 | my $opt_o = 11 | exists $opt{'o'} ? -$opt{'o'} : 12 | exists $opt{'k'} ? -($opt{'k'} - 1) : 13 | undef; 14 | 15 | # Reverse complement. 16 | sub rc($) { 17 | my $seq = $_[0]; 18 | ($seq = reverse $seq) =~ tr/ACGTacgt/TGCAtgca/; 19 | return $seq; 20 | } 21 | 22 | my $id; 23 | my %comment; 24 | my %seq; 25 | my %multiplicity; 26 | while (<>) { 27 | chomp; 28 | next if /^#/ || /^$/; 29 | 30 | if (/^>/) { 31 | my $comment; 32 | ($id, $comment) = split ' ', (substr $_, 1), 2; 33 | die if exists $comment{$id}; 34 | if (defined $comment) { 35 | $comment{$id} = " $comment"; 36 | my (undef, $multiplicity) = split ' ', $comment; 37 | $multiplicity{$id} = $multiplicity; 38 | } 39 | 40 | next; 41 | } 42 | 43 | if (/^[ACGTacgtN]/) { 44 | die if exists $seq{$id}; 45 | $seq{$id} = $_ if !/N/; 46 | undef $id; 47 | next; 48 | } 49 | 50 | last; 51 | } 52 | 53 | my $contig_id = 0; 54 | my %seen; 55 | goto first; 56 | while (<>) { 57 | first: 58 | chomp; 59 | next if /^#/ || /^$/; 60 | 61 | my @x = split; 62 | 63 | my $contig_chain = shift @x; 64 | die unless $contig_chain =~ /[+-]$/; 65 | my $s = chop $contig_chain; 66 | die unless exists $seq{$contig_chain}; 67 | #next if $seen{$contig_chain}; 68 | $seen{$contig_chain} = 1; 69 | my $contig_seq = $seq{$contig_chain}; 70 | $contig_seq = rc $contig_seq if $s eq '-'; 71 | my $contig_multiplicity = $multiplicity{$contig_chain}; 72 | $contig_chain .= $s; 73 | 74 | while (@x > 0) { 75 | my $o = defined $opt_o ? $opt_o : shift @x; 76 | die unless $o < 0; 77 | $o = -$o; 78 | my $id = shift @x; 79 | die unless $id =~ /[+-]$/; 80 | my $s = chop $id; 81 | die unless exists $seq{$id}; 82 | my $seq = $seq{$id}; 83 | $seq = rc $seq if $s eq '-'; 84 | $seen{$id} = 1; 85 | 86 | my $l = $contig_seq; 87 | my $r = $seq; 88 | my $ol = substr $l, -$o, $o, ''; 89 | my $or = substr $r, 0, $o, ''; 90 | die "$ol\n$or" unless $ol eq $or; 91 | #print "$ida$as\t$idb$bs\t-$o\n"; 92 | #print ">$ida$as,$idb$bs $o\n$l$ol$r\n"; 93 | $contig_seq .= $r; 94 | $contig_chain .= ",$id$s"; 95 | $contig_multiplicity += $multiplicity{$id}; 96 | } 97 | 98 | my $l = length $contig_seq; 99 | print ">$contig_id $l $contig_multiplicity $contig_chain\n", 100 | "$contig_seq\n"; 101 | $contig_id++; 102 | } 103 | 104 | exit 0; 105 | 106 | for my $id (sort {$a <=> $b} keys %seq) { 107 | if (!$seen{$id}) { 108 | print ">$contig_id$comment{$id} $id\n$seq{$id}\n"; 109 | $contig_id++; 110 | } 111 | } 112 | -------------------------------------------------------------------------------- /faclean: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | # Reformat and optionally remove short contigs from a FASTA file 3 | # Copyright 2012 Shaun Jackman 4 | 5 | use strict; 6 | use Getopt::Std qw'getopts'; 7 | 8 | my %opt; 9 | getopts 'c:l:L:', \%opt; 10 | my $opt_columns = defined $opt{'c'} ? $opt{'c'} : 0; 11 | my $opt_min_length = defined $opt{'l'} ? $opt{'l'} : 0; 12 | my $opt_max_length = defined $opt{'L'} ? $opt{'L'} : 2000000000; 13 | 14 | while (<>) { 15 | next if /^#/; 16 | die unless /^>/; 17 | chomp; 18 | #my $header = $_; 19 | my ($id, $comment) = split ' ', $_, 2; 20 | 21 | my $seq = ''; 22 | while (<>) { 23 | next if /^#/; 24 | last if /^>/; 25 | chomp; 26 | $seq .= $_; 27 | } 28 | 29 | # Skip short sequences. 30 | my $len = $seq =~ tr/ACGTacgt//; 31 | if ($opt_min_length <= $len && $len < $opt_max_length) { 32 | #print $header, "\n"; 33 | #print $id, ' ', $comment, "\n"; 34 | print $id, "\n"; 35 | if ($opt_columns > 0) { 36 | print substr($seq, 0, $opt_columns, ''), "\n" 37 | while length $seq > 0; 38 | } else { 39 | print $seq, "\n"; 40 | } 41 | } 42 | 43 | redo if /^>/; 44 | last if eof; 45 | } 46 | -------------------------------------------------------------------------------- /facmp: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | # Compare pairs of sequences 3 | # Copyright 2012 Shaun Jackman 4 | 5 | use strict; 6 | 7 | my $a; 8 | while (<>) { 9 | next if /^[>#]/; 10 | chomp; 11 | if (!defined $a) { 12 | $a = $_; 13 | next; 14 | } 15 | my $b = $_; 16 | 17 | # Compare sequence a to sequence b. 18 | print $a, "\n"; 19 | print map { ord == 0 ? '|' : ' ' } split '', $a ^ $b; 20 | print "\n"; 21 | print $b, "\n"; 22 | undef $a; 23 | } 24 | -------------------------------------------------------------------------------- /facstont: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | # Convert colour-space sequence to nucleotides 3 | # Copyright 2012 Shaun Jackman 4 | 5 | use strict; 6 | 7 | my %table = ( 8 | 'A' => ['A', 'C', 'G', 'T'], 9 | 'C' => ['C', 'A', 'T', 'G'], 10 | 'G' => ['G', 'T', 'A', 'C'], 11 | 'T' => ['T', 'G', 'C', 'A'] 12 | ); 13 | 14 | sub cs_to_base($) 15 | { 16 | my $read = shift; 17 | my $seed = substr($read, 0, 1); 18 | for (my $i = 1; $i < length $read; $i++) { 19 | my $p = \substr($read, $i, 1); 20 | $$p = $seed = $table{$seed}[$$p]; 21 | } 22 | return $read; 23 | } 24 | 25 | while (<>) { 26 | if (/^[ACGT]/) { 27 | chomp; 28 | print substr cs_to_base($_), 1; 29 | print "\n"; 30 | } else { 31 | print; 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /fadecimate: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | # Randomly keep 1 in every N pairs of reads 3 | # Copyright 2012 Shaun Jackman 4 | 5 | use strict; 6 | 7 | my $p = 1 / shift; 8 | 9 | while (<>) { 10 | my $x = $_ . <> . <> . <>; 11 | print $x if rand() < $p; 12 | } 13 | -------------------------------------------------------------------------------- /faget: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | # Select sequences from a FASTA file by identifier 3 | # Copyright 2012 Shaun Jackman 4 | 5 | use strict; 6 | use Getopt::Std qw'getopts'; 7 | 8 | my %opt; 9 | getopts 'f:p:v', \%opt; 10 | my $opt_f = $opt{'f'}; 11 | my $opt_prefix = $opt{'p'}; 12 | my $opt_invert = $opt{'v'}; 13 | 14 | # Reverse complement. 15 | sub rc($) 16 | { 17 | my $seq = $_[0]; 18 | ($seq = reverse $seq) =~ tr/ACGTacgt/TGCAtgca/; 19 | return $seq; 20 | } 21 | 22 | my %f; 23 | sub insert($) 24 | { 25 | my $id = shift; 26 | if ($id =~ /[+-]$/) { 27 | my $sense = chop $id; 28 | $f{$id} = $sense; 29 | } else { 30 | $f{$id} = 1; 31 | } 32 | } 33 | 34 | if (defined $opt_f) { 35 | open F, "<$opt_f" or die; 36 | chomp, insert $_ while ; 37 | close F; 38 | } else { 39 | insert $_ for split ',| |\|', shift; 40 | } 41 | 42 | while (<>) { 43 | next if /^#/; 44 | die unless s/^>//; 45 | chomp; 46 | my ($id, $comment) = split ' ', $_, 2; 47 | chomp (my $seq = <>); 48 | if ($opt_invert) { 49 | print ">$_\n$seq\n" unless $f{$id}; 50 | } else { 51 | my $sense = $f{$id}; 52 | $seq = rc $seq if $sense eq '-'; 53 | next if !$f{$id}; 54 | if ($sense =~ /[+-]/) { 55 | print ">$opt_prefix$id$sense $comment\n$seq\n"; 56 | } else { 57 | print ">$opt_prefix$_\n$seq\n"; 58 | } 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /fagrep: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Search a FASTA file using a regular expression 3 | # Copyright 2012 Shaun Jackman 4 | 5 | set -eu 6 | f="$1" 7 | shift 8 | grep --color=auto -B1 -Ff \ 9 | <(grep -v '^>' $f; grep -v '^>' $f |farc) "$@" 10 | -------------------------------------------------------------------------------- /fakmer: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | # Generate tiled k-mers 3 | # Copyright 2012 Shaun Jackman 4 | 5 | use strict; 6 | use Getopt::Std 'getopts'; 7 | 8 | my %opt; 9 | getopts('k:', \%opt); 10 | my $opt_k = $opt{'k'}; 11 | 12 | my $id; 13 | while (<>) { 14 | chomp; 15 | if (/^>/) { 16 | $id = $_; 17 | next; 18 | } 19 | my $l = length; 20 | for my $i (0..($l - $opt_k)) { 21 | #print '>', $., ':', $i, "\n", substr($_, $i, $opt_k), "\n"; 22 | #print $id, ':', $i, "\n", substr($_, $i, $opt_k), "\n"; 23 | 24 | #my $seq = substr($_, $i, $opt_k); 25 | #if ($seq =~ /N/) { 26 | # print STDERR "skipping N\n"; 27 | # next; 28 | #} 29 | #my ($chr, $rest) = split ':', substr $id, 1; 30 | #my ($pos, $comment) = split ' ', $rest; 31 | #my $kpos = $pos + $i; 32 | ##my $kid = "$chr:$kpos $comment:$i\n"; 33 | #my $kid = ">$comment:$i $chr:$kpos\n"; 34 | #print $kid, $seq, "\n"; 35 | 36 | my ($tag, $comment) = split ' ', $id; 37 | print $tag, ':', $i, , ' ', $comment, "\n", 38 | substr($_, $i, $opt_k), "\n"; 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /falength: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | # Print the lengths of sequences 3 | # Copyright 2012 Shaun Jackman 4 | 5 | use strict; 6 | 7 | while (<>) { 8 | next if /^#/; 9 | die unless /^>/; 10 | chomp; 11 | my ($id, undef) = split ' ', $_, 2; 12 | 13 | my $seq = ''; 14 | while (<>) { 15 | next if /^#/; 16 | last if /^>/; 17 | chomp; 18 | $seq .= $_; 19 | } 20 | 21 | #my $len = length $seq; 22 | #my $len = $seq =~ tr/ACGT//; 23 | my $len = $seq =~ tr/ACGTacgt//; 24 | print substr($id, 1), "\t", $len, "\n"; 25 | 26 | redo if /^>/; 27 | last if eof; 28 | } 29 | -------------------------------------------------------------------------------- /falint: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | # Check the syntax of a FASTA file 3 | # Copyright 2012 Shaun Jackman 4 | 5 | use strict; 6 | 7 | my $n = 0; 8 | while (<>) { 9 | $n++; 10 | if (/^>/) { 11 | my $seq = <> or die; 12 | } elsif (/^@/) { 13 | my $seq = <> or die; 14 | my $header = <> or die; 15 | my $qual = <> or die; 16 | die unless $header =~ /^+/; 17 | die unless length $seq == length $qual; 18 | } else { 19 | die; 20 | } 21 | } 22 | 23 | print "$n\n"; 24 | -------------------------------------------------------------------------------- /famd5: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Calculate a MD5 digest for a FASTA file 3 | # Copyright 2012 Shaun Jackman 4 | 5 | set -eu 6 | for i; do 7 | echo `facanon $i |grep -v '^>' |tr a-z A-Z |sort |md5sum |cut -d' ' -f1` $i 8 | done 9 | -------------------------------------------------------------------------------- /famd5seq: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | # Calculate a MD5 digest for each sequence 3 | # Copyright 2012 Shaun Jackman 4 | 5 | use strict; 6 | use Digest::MD5 qw(md5_hex); 7 | 8 | my $id; 9 | while (<>) { 10 | chomp; 11 | if (/^>/) { 12 | $id = $_; 13 | next; 14 | } 15 | my $seq = $_; 16 | my $rc = reverse $seq; 17 | $rc =~ tr/ACGT/TGCA/; 18 | print md5_hex $seq lt $rc ? $seq : $rc; 19 | print "\t$id" if defined $id; 20 | print "\n"; 21 | undef $id; 22 | } 23 | -------------------------------------------------------------------------------- /fanttocs: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | # Convert nucleotides to colour-space sequence 3 | # Copyright 2012 Shaun Jackman 4 | 5 | use strict; 6 | 7 | my %table = ( 8 | 'AA' => 0, 'CC' => 0, 'GG' => 0, 'TT' => 0, 9 | 'AC' => 1, 'CA' => 1, 'GT' => 1, 'TG' => 1, 10 | 'AG' => 2, 'GA' => 2, 'CT' => 2, 'TC' => 2, 11 | 'AT' => 3, 'TA' => 3, 'CG' => 3, 'GC' => 3, 12 | ); 13 | 14 | while (<>) { 15 | if (/^>/) { 16 | print; 17 | next; 18 | } 19 | chomp; 20 | my $seq = 'T' . $_; 21 | #my $seq = $_; 22 | print substr $seq, 0, 1; 23 | print map { $table{substr($seq, $_, 2)} } 0..length($seq)-2; 24 | print "\n"; 25 | } 26 | -------------------------------------------------------------------------------- /farand: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | # Generate a FASTA file with random sequence 3 | # Copyright 2012 Shaun Jackman 4 | 5 | use strict; 6 | 7 | srand 0; 8 | 9 | my $i = 0; 10 | for my $len (@ARGV) { 11 | print ">$i $len\n"; 12 | print map { qw'A C G T'[rand 4] } 1 .. $len; 13 | print "\n"; 14 | $i++; 15 | } 16 | -------------------------------------------------------------------------------- /farc: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | # Reverse and complement the sequences 3 | # Copyright 2013 Shaun Jackman 4 | 5 | use strict; 6 | use Getopt::Std qw'getopts'; 7 | 8 | my %opt; 9 | getopts 'f:r:', \%opt; 10 | my $opt_f = $opt{'f'}; 11 | my $opt_r = defined $opt{'r'} ? $opt{'r'} : ''; 12 | 13 | while (<>) { 14 | next if /^#/; 15 | die unless /^>/; 16 | chomp; 17 | my ($id, $comment) = split ' ', $_, 2; 18 | 19 | my $seq = ''; 20 | while (<>) { 21 | next if /^#/; 22 | last if /^>/; 23 | chomp; 24 | $seq .= $_; 25 | } 26 | 27 | if (defined $opt_f) { 28 | print $id, $opt_f, "\n", $seq, "\n"; 29 | } 30 | 31 | if (defined $opt_r) { 32 | $seq = reverse($seq); 33 | $seq =~ tr/ACGTMRWSYKVHDBNacgtmrwsykvhdbn/TGCAKYWSRMBDHVNtgcakywsrmbdhvn/; 34 | print $id, $opt_r, "\n", $seq, "\n"; 35 | } 36 | 37 | redo if /^>/; 38 | last if eof; 39 | } 40 | -------------------------------------------------------------------------------- /farenumber: -------------------------------------------------------------------------------- 1 | #!/usr/bin/awk -f 2 | # Renumber the sequences 3 | # Copyright 2012 Shaun Jackman 4 | 5 | /^>/ { $1=">" i++ } 6 | { print } 7 | -------------------------------------------------------------------------------- /faseperate-mates: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | # Separate paired reads into two files 3 | # Copyright 2012 Shaun Jackman 4 | 5 | use strict; 6 | 7 | open A, '>a'; 8 | open B, '>b'; 9 | 10 | while (<>) { 11 | die unless /^@/; 12 | my $b = <>; 13 | my $c = <>; 14 | my $d = <>; 15 | if (/\/1$/) { 16 | print A $_, $b, $c, $d; 17 | } elsif (/\/2$/) { 18 | print B $_, $b, $c, $d; 19 | } else { 20 | die; 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /fasplit-read: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | # Split a read into two at the midpoint 3 | # Copyright 2012 Shaun Jackman 4 | 5 | use strict; 6 | 7 | while (<>) { 8 | die unless /^>/; 9 | chomp (my $id = $_); 10 | chomp (my $seq = <>); 11 | my $l = (length $seq)/2; 12 | my $a = $seq; 13 | my $b = substr $a, $l, $l, ''; 14 | print "$id/1\n$a\n$id/2\n$b\n"; 15 | } 16 | -------------------------------------------------------------------------------- /fastqtofa: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | # Convert a FASTQ file to FASTA format 3 | # Copyright 2012 Shaun Jackman 4 | 5 | use strict; 6 | 7 | while (<>) { 8 | my $hdr = $_; 9 | my $seq = <>; 10 | my $qhdr = <>; 11 | my $qual = <>; 12 | die unless $hdr =~ /^@/; 13 | die unless $qhdr =~ /^+/; 14 | print '>', substr($hdr, 1), $seq; 15 | } 16 | -------------------------------------------------------------------------------- /fatoagp: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | # Convert FASTA scaffolds to FASTA contigs and an AGP file 3 | # Copyright 2012 Shaun Jackman 4 | 5 | use strict; 6 | use Getopt::Std qw'getopts'; 7 | 8 | my %opt; 9 | getopts 'f:s:', \%opt; 10 | my $opt_fasta = $opt{'f'}; 11 | my $opt_min_len = defined $opt{'s'} ? $opt{'s'} : 200; 12 | 13 | open FASTA, ">$opt_fasta" 14 | or die "error: `$opt_fasta': $!\n" 15 | if $opt_fasta; 16 | 17 | while (<>) { 18 | die unless /^>/; 19 | chomp; 20 | my ($scafid, undef) = split ' ', $_, 2; 21 | substr $scafid, 0, 1, ''; 22 | 23 | my $scafseq = <>; 24 | chomp $scafseq; 25 | my $scaflen = $scafseq =~ tr/ACGTacgt//; 26 | next if $scaflen < $opt_min_len; 27 | 28 | my @ctgseqs = split /([Nn]+)/, $scafseq; 29 | my $i = 0; 30 | my $x = 0; 31 | for my $ctgseq (@ctgseqs) { 32 | my $len = length $ctgseq; 33 | # object object_beg object_end part_number 34 | print 'scaffold', $scafid, "\t", 35 | $x + 1, "\t", 36 | $x + $len, "\t", 37 | $i + 1, "\t"; 38 | if ($ctgseq =~ /^[nN]/) { 39 | # component_type gap_length gap_type linkage 40 | print "N\t", $len, "\tscaffold\tyes\tpaired-ends\n"; 41 | } else { 42 | my $ctgid = 'contig' . $scafid . '_' . ($i / 2); 43 | # component_type component_id 44 | # component_beg component_end orientation 45 | print "W\t", $ctgid, "\t1\t", $len, "\t+\n"; 46 | print FASTA '>', $ctgid, "\n", $ctgseq, "\n" 47 | if $opt_fasta; 48 | } 49 | $i++; 50 | $x += $len; 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /fatofastq: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | # Convert a FASTA file to a FASTQ file 3 | # Copyright 2012 Shaun Jackman 4 | 5 | use strict; 6 | 7 | my $opt_q = 'I'; 8 | 9 | while (<>) { 10 | die unless /^>/; 11 | my $id = $_; 12 | chomp (my $seq = <>); 13 | print '@', substr($id, 1), 14 | $seq, "\n+\n", 15 | $opt_q x length($seq), "\n"; 16 | } 17 | -------------------------------------------------------------------------------- /fatoseq: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | # Remove FASTA headers 3 | # Copyright 2012 Shaun Jackman 4 | 5 | use strict; 6 | 7 | while (<>) { 8 | die unless /^>/; 9 | my $seq = <>; 10 | print $seq; 11 | } 12 | -------------------------------------------------------------------------------- /faunamb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | # Convert IUPAC-IUB ambiguity codes to ACGT 3 | # Copyright 2012 Shaun Jackman 4 | 5 | use strict; 6 | 7 | my $opt_n = 0; 8 | my $opt_conserve_case = 0; 9 | 10 | while (<>) { 11 | if (/^>/) { 12 | print; 13 | next; 14 | } 15 | 16 | if ($opt_n) { 17 | tr/MRWSYKVHDBmrwsykvhdb/NNNNNNNNNNnnnnnnnnnn/; 18 | } elsif ($opt_conserve_case) { 19 | tr/MRWSYKVHDBmrwsykvhdb/AAACCGAAACaaaccgaaac/; 20 | } else { 21 | tr/MRWSYKVHDBmrwsykvhdb/aaaccgaaacaaaccgaaac/; 22 | } 23 | print; 24 | } 25 | -------------------------------------------------------------------------------- /faunscaffold: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | # Break scaffolds into contigs at Ns 3 | # Copyright 2012 Shaun Jackman 4 | 5 | use strict; 6 | 7 | my $opt_letters = 0; 8 | 9 | while (<>) { 10 | next if /^#/; 11 | die unless /^>/; 12 | chomp; 13 | my ($id, $comment) = split ' ', $_, 2; 14 | 15 | my $seq = ''; 16 | while (<>) { 17 | next if /^#/; 18 | last if /^>/; 19 | chomp; 20 | $seq .= $_; 21 | } 22 | 23 | #$seq =~ tr/MRWSYKVHDBmrwsykvhdb/AAACCGAAACaaaccgaaac/; 24 | #$seq =~ tr/acgtMRWSYKVHDBmrwsykvhdb/ACGTAAACCGAAACAAACCGAAAC/; 25 | my @contigs = split /NN*|nn*/, $seq; 26 | #my @contigs = split /nn*/, $seq; 27 | 28 | my $i = $opt_letters ? 'A' : '0'; 29 | for my $contig (@contigs) { 30 | print $id; 31 | print '_', $i++ if @contigs > 1; 32 | print ' ', $comment if length $comment > 0; 33 | print "\n", $contig, "\n"; 34 | } 35 | 36 | redo if /^>/; 37 | last if eof; 38 | } 39 | --------------------------------------------------------------------------------