├── Makefile
├── README.md
├── facanon
├── facat
├── fachain
├── faclean
├── facmp
├── facstont
├── fadecimate
├── faget
├── fagrep
├── fakmer
├── falength
├── falint
├── famd5
├── famd5seq
├── fanttocs
├── farand
├── farc
├── farenumber
├── faseperate-mates
├── fasplit-read
├── fastqtofa
├── fatoagp
├── fatofastq
├── fatoseq
├── faunamb
└── faunscaffold


/Makefile:
--------------------------------------------------------------------------------
 1 | prefix=/usr/local
 2 | bindir=$(prefix)/bin
 3 | bin_SCRIPTS=\
 4 | facanon \
 5 | facat \
 6 | fachain \
 7 | faclean \
 8 | facmp \
 9 | facstont \
10 | fadecimate \
11 | faget \
12 | fagrep \
13 | fakmer \
14 | falength \
15 | falint \
16 | famd5 \
17 | famd5seq \
18 | fanttocs \
19 | farand \
20 | farc \
21 | farenumber \
22 | faseperate-mates \
23 | fasplit-read \
24 | fastqtofa \
25 | fatoagp \
26 | fatofastq \
27 | fatoseq \
28 | faunamb \
29 | faunscaffold
30 | 
31 | all:
32 | 
33 | clean:
34 | 
35 | install:
36 | 	install -d $(DESTDIR)$(bindir)
37 | 	install $(bin_SCRIPTS) $(DESTDIR)$(bindir)
38 | 
39 | uninstall:
40 | 	cd $(DESTDIR)$(bindir) && rm -f $(bin_SCRIPTS)
41 | 
42 | .PHONY: all clean install uninstall
43 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | fastascripts – Manipulate FASTA files
 2 | =====================================
 3 | 
 4 | These scripts were never meant to see the light of day. I apologize for the
 5 | distasteful code.
 6 | 
 7 | Utilities
 8 | =========
 9 | 
10 | * `facanon`
11 | 	Output the lexicographically smaller of the sequence and its reverse complement
12 | * `facat`
13 | 	Concatenate FASTA files and add a prefix to the identifier
14 | * `fachain`
15 | 	Merge overlapping sequences
16 | * `faclean`
17 | 	Reformat and optionally remove short contigs from a FASTA file
18 | * `facmp`
19 | 	Compare pairs of sequences
20 | * `facstont`
21 | 	Convert colour-space sequence to nucleotides
22 | * `fadecimate`
23 | 	Randomly keep 1 in every N pairs of reads
24 | * `faget`
25 | 	Select sequences from a FASTA file by identifier
26 | * `fagrep`
27 | 	Search a FASTA file using a regular expression
28 | * `fakmer`
29 | 	Generate tiled k-mers
30 | * `falength`
31 | 	Print the lengths of sequences
32 | * `falint`
33 | 	Check the syntax of a FASTA file
34 | * `famd5`
35 | 	Calculate a MD5 digest for a FASTA file
36 | * `famd5seq`
37 | 	Calculate a MD5 digest for each sequence
38 | * `fanttocs`
39 | 	Convert nucleotides to colour-space sequence
40 | * `farand`
41 | 	Generate a FASTA file with random sequence
42 | * `farc`
43 | 	Reverse and complement the sequences
44 | * `farenumber`
45 | 	Renumber the sequences
46 | * `faseperate-mates`
47 | 	Separate paired reads into two files
48 | * `fasplit-read`
49 | 	Split a read into two at the midpoint
50 | * `fastqtofa`
51 | 	Convert a FASTQ file to FASTA format
52 | * `fatoagp`
53 | 	Convert FASTA scaffolds to FASTA contigs and an AGP file
54 | * `fatofastq`
55 | 	 Convert a FASTA file to a FASTQ file
56 | * `fatoseq`
57 | 	Remove FASTA headers
58 | * `faunamb`
59 | 	Convert IUPAC-IUB ambiguity codes to ACGT
60 | * `faunscaffold`
61 | 	Break scaffolds into contigs at Ns
62 | 
63 | License
64 | ================================================================================
65 | 
66 | Copyright 2013 Shaun Jackman
67 | 
68 | ### [ISC License][]
69 | 
70 | Permission to use, copy, modify, and/or distribute this software for any
71 | purpose with or without fee is hereby granted, provided that the above
72 | copyright notice and this permission notice appear in all copies.
73 | 
74 | [ISC License]: http://opensource.org/licenses/ISC
75 | 


--------------------------------------------------------------------------------
/facanon:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | # Output the lexicographically smaller of the sequence and its reverse complement
 3 | # Copyright 2012 Shaun Jackman
 4 | 
 5 | use strict;
 6 | 
 7 | while (<>) {
 8 | 	die unless /^>/;
 9 | 	my $id = $_;
10 | 	chomp (my $seq = <>);
11 | 	(my $rev = reverse $seq) =~ tr/ACGTacgt/TGCAtgca/;
12 | 	print $id, ($seq cmp $rev) < 0 ? $seq : $rev, "\n";
13 | }
14 | 


--------------------------------------------------------------------------------
/facat:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | # Concatenate FASTA files and add a prefix to the identifier
 3 | # Copyright 2012 Shaun Jackman
 4 | 
 5 | use strict;
 6 | 
 7 | while (<>) {
 8 | 	my $seq = <>;
 9 | 	my $id = substr $_, 1;
10 | 	my $prefix = substr $ARGV, 0, 3;
11 | 	print '>', $prefix, '_', $id, $seq;
12 | }
13 | 


--------------------------------------------------------------------------------
/fachain:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/perl
  2 | # Merge overlapping sequences
  3 | # Copyright 2012 Shaun Jackman
  4 | 
  5 | use strict;
  6 | use Getopt::Std qw'getopts';
  7 | 
  8 | my %opt;
  9 | getopts 'k:o:', \%opt;
 10 | my $opt_o =
 11 | 	exists $opt{'o'} ? -$opt{'o'} :
 12 | 	exists $opt{'k'} ? -($opt{'k'} - 1) :
 13 | 	undef;
 14 | 
 15 | # Reverse complement.
 16 | sub rc($) {
 17 | 	my $seq = $_[0];
 18 | 	($seq = reverse $seq) =~ tr/ACGTacgt/TGCAtgca/;
 19 | 	return $seq;
 20 | }
 21 | 
 22 | my $id;
 23 | my %comment;
 24 | my %seq;
 25 | my %multiplicity;
 26 | while (<>) {
 27 | 	chomp;
 28 | 	next if /^#/ || /^$/;
 29 | 
 30 | 	if (/^>/) {
 31 | 		my $comment;
 32 | 		($id, $comment) = split ' ', (substr $_, 1), 2;
 33 | 		die if exists $comment{$id};
 34 | 		if (defined $comment) {
 35 | 			$comment{$id} = " $comment";
 36 | 			my (undef, $multiplicity) = split ' ', $comment;
 37 | 			$multiplicity{$id} = $multiplicity;
 38 | 		}
 39 | 
 40 | 		next;
 41 | 	}
 42 | 
 43 | 	if (/^[ACGTacgtN]/) {
 44 | 		die if exists $seq{$id};
 45 | 		$seq{$id} = $_ if !/N/;
 46 | 		undef $id;
 47 | 		next;
 48 | 	}
 49 | 
 50 | 	last;
 51 | }
 52 | 
 53 | my $contig_id = 0;
 54 | my %seen;
 55 | goto first;
 56 | while (<>) {
 57 | first:
 58 | 	chomp;
 59 | 	next if /^#/ || /^$/;
 60 | 
 61 | 	my @x = split;
 62 | 
 63 | 	my $contig_chain = shift @x;
 64 | 	die unless $contig_chain =~ /[+-]$/;
 65 | 	my $s = chop $contig_chain;
 66 | 	die unless exists $seq{$contig_chain};
 67 | 	#next if $seen{$contig_chain};
 68 | 	$seen{$contig_chain} = 1;
 69 | 	my $contig_seq = $seq{$contig_chain};
 70 | 	$contig_seq = rc $contig_seq if $s eq '-';
 71 | 	my $contig_multiplicity = $multiplicity{$contig_chain};
 72 | 	$contig_chain .= $s;
 73 | 
 74 | 	while (@x > 0) {
 75 | 		my $o = defined $opt_o ? $opt_o : shift @x;
 76 | 		die unless $o < 0;
 77 | 		$o = -$o;
 78 | 		my $id = shift @x;
 79 | 		die unless $id =~ /[+-]$/;
 80 | 		my $s = chop $id;
 81 | 		die unless exists $seq{$id};
 82 | 		my $seq = $seq{$id};
 83 | 		$seq = rc $seq if $s eq '-';
 84 | 		$seen{$id} = 1;
 85 | 
 86 | 		my $l = $contig_seq;
 87 | 		my $r = $seq;
 88 | 		my $ol = substr $l, -$o, $o, '';
 89 | 		my $or = substr $r, 0, $o, '';
 90 | 		die "$ol\n$or" unless $ol eq $or;
 91 | 		#print "$ida$as\t$idb$bs\t-$o\n";
 92 | 		#print ">$ida$as,$idb$bs $o\n$l$ol$r\n";
 93 | 		$contig_seq .= $r;
 94 | 		$contig_chain .= ",$id$s";
 95 | 		$contig_multiplicity += $multiplicity{$id};
 96 | 	}
 97 | 
 98 | 	my $l = length $contig_seq;
 99 | 	print ">$contig_id $l $contig_multiplicity $contig_chain\n",
100 | 		"$contig_seq\n";
101 | 	$contig_id++;
102 | }
103 | 
104 | exit 0;
105 | 
106 | for my $id (sort {$a <=> $b} keys %seq) {
107 | 	if (!$seen{$id}) {
108 | 		print ">$contig_id$comment{$id} $id\n$seq{$id}\n";
109 | 		$contig_id++;
110 | 	}
111 | }
112 | 


--------------------------------------------------------------------------------
/faclean:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | # Reformat and optionally remove short contigs from a FASTA file
 3 | # Copyright 2012 Shaun Jackman
 4 | 
 5 | use strict;
 6 | use Getopt::Std qw'getopts';
 7 | 
 8 | my %opt;
 9 | getopts 'c:l:L:', \%opt;
10 | my $opt_columns = defined $opt{'c'} ? $opt{'c'} : 0;
11 | my $opt_min_length = defined $opt{'l'} ? $opt{'l'} : 0;
12 | my $opt_max_length = defined $opt{'L'} ? $opt{'L'} : 2000000000;
13 | 
14 | while (<>) {
15 | 	next if /^#/;
16 | 	die unless /^>/;
17 | 	chomp;
18 | 	#my $header = $_;
19 | 	my ($id, $comment) = split ' ', $_, 2;
20 | 
21 | 	my $seq = '';
22 | 	while (<>) {
23 | 		next if /^#/;
24 | 		last if /^>/;
25 | 		chomp;
26 | 		$seq .= $_;
27 | 	}
28 | 
29 | 	# Skip short sequences.
30 | 	my $len = $seq =~ tr/ACGTacgt//;
31 | 	if ($opt_min_length <= $len && $len < $opt_max_length) {
32 | 		#print $header, "\n";
33 | 		#print $id, ' ', $comment, "\n";
34 | 		print $id, "\n";
35 | 		if ($opt_columns > 0) {
36 | 			print substr($seq, 0, $opt_columns, ''), "\n"
37 | 				while length $seq > 0;
38 | 		} else {
39 | 			print $seq, "\n";
40 | 		}
41 | 	}
42 | 
43 | 	redo if /^>/;
44 | 	last if eof;
45 | }
46 | 


--------------------------------------------------------------------------------
/facmp:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | # Compare pairs of sequences
 3 | # Copyright 2012 Shaun Jackman
 4 | 
 5 | use strict;
 6 | 
 7 | my $a;
 8 | while (<>) {
 9 | 	next if /^[>#]/;
10 | 	chomp;
11 | 	if (!defined $a) {
12 | 		$a = $_;
13 | 		next;
14 | 	}
15 | 	my $b = $_;
16 | 
17 | 	# Compare sequence a to sequence b.
18 | 	print $a, "\n";
19 | 	print map { ord == 0 ? '|' : ' ' } split '', $a ^ $b;
20 | 	print "\n";
21 | 	print $b, "\n";
22 | 	undef $a;
23 | }
24 | 


--------------------------------------------------------------------------------
/facstont:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | # Convert colour-space sequence to nucleotides
 3 | # Copyright 2012 Shaun Jackman
 4 | 
 5 | use strict;
 6 | 
 7 | my %table = (
 8 | 	'A' => ['A', 'C', 'G', 'T'],
 9 | 	'C' => ['C', 'A', 'T', 'G'],
10 | 	'G' => ['G', 'T', 'A', 'C'],
11 | 	'T' => ['T', 'G', 'C', 'A']
12 | );
13 | 
14 | sub cs_to_base($)
15 | {
16 | 	my $read = shift;
17 | 	my $seed = substr($read, 0, 1);
18 | 	for (my $i = 1; $i < length $read; $i++) {
19 | 		my $p = \substr($read, $i, 1);
20 | 		$$p = $seed = $table{$seed}[$$p];
21 | 	}
22 | 	return $read;
23 | }
24 | 
25 | while (<>) {
26 | 	if (/^[ACGT]/) {
27 | 		chomp;
28 | 		print substr cs_to_base($_), 1;
29 | 		print "\n";
30 | 	} else {
31 | 		print;
32 | 	}
33 | }
34 | 


--------------------------------------------------------------------------------
/fadecimate:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | # Randomly keep 1 in every N pairs of reads
 3 | # Copyright 2012 Shaun Jackman
 4 | 
 5 | use strict;
 6 | 
 7 | my $p = 1 / shift;
 8 | 
 9 | while (<>) {
10 | 	my $x = $_ . <> . <> . <>;
11 | 	print $x if rand() < $p;
12 | }
13 | 


--------------------------------------------------------------------------------
/faget:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | # Select sequences from a FASTA file by identifier
 3 | # Copyright 2012 Shaun Jackman
 4 | 
 5 | use strict;
 6 | use Getopt::Std qw'getopts';
 7 | 
 8 | my %opt;
 9 | getopts 'f:p:v', \%opt;
10 | my $opt_f = $opt{'f'};
11 | my $opt_prefix = $opt{'p'};
12 | my $opt_invert = $opt{'v'};
13 | 
14 | # Reverse complement.
15 | sub rc($)
16 | {
17 | 	my $seq = $_[0];
18 | 	($seq = reverse $seq) =~ tr/ACGTacgt/TGCAtgca/;
19 | 	return $seq;
20 | }
21 | 
22 | my %f;
23 | sub insert($)
24 | {
25 | 	my $id = shift;
26 | 	if ($id =~ /[+-]$/) {
27 | 		my $sense = chop $id;
28 | 		$f{$id} = $sense;
29 | 	} else {
30 | 		$f{$id} = 1;
31 | 	}
32 | }
33 | 
34 | if (defined $opt_f) {
35 | 	open F, "<$opt_f" or die;
36 | 	chomp, insert $_ while <F>;
37 | 	close F;
38 | } else {
39 | 	insert $_ for split ',| |\|', shift;
40 | }
41 | 
42 | while (<>) {
43 | 	next if /^#/;
44 | 	die unless s/^>//;
45 | 	chomp;
46 | 	my ($id, $comment) = split ' ', $_, 2;
47 | 	chomp (my $seq = <>);
48 | 	if ($opt_invert) {
49 | 		print ">$_\n$seq\n" unless $f{$id};
50 | 	} else {
51 | 		my $sense = $f{$id};
52 | 		$seq = rc $seq if $sense eq '-';
53 | 		next if !$f{$id};
54 | 		if ($sense =~ /[+-]/) {
55 | 			print ">$opt_prefix$id$sense $comment\n$seq\n";
56 | 		} else {
57 | 			print ">$opt_prefix$_\n$seq\n";
58 | 		}
59 | 	}
60 | }
61 | 


--------------------------------------------------------------------------------
/fagrep:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Search a FASTA file using a regular expression
 3 | # Copyright 2012 Shaun Jackman
 4 | 
 5 | set -eu
 6 | f="$1"
 7 | shift
 8 | grep --color=auto -B1 -Ff \
 9 | 	<(grep -v '^>' $f; grep -v '^>' $f |farc) "$@"
10 | 


--------------------------------------------------------------------------------
/fakmer:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | # Generate tiled k-mers
 3 | # Copyright 2012 Shaun Jackman
 4 | 
 5 | use strict;
 6 | use Getopt::Std 'getopts';
 7 | 
 8 | my %opt;
 9 | getopts('k:', \%opt);
10 | my $opt_k = $opt{'k'};
11 | 
12 | my $id;
13 | while (<>) {
14 | 	chomp;
15 | 	if (/^>/) {
16 | 		$id = $_;
17 | 		next;
18 | 	}
19 | 	my $l = length;
20 | 	for my $i (0..($l - $opt_k)) {
21 | 		#print '>', $., ':', $i, "\n", substr($_, $i, $opt_k), "\n";
22 | 		#print $id, ':', $i, "\n", substr($_, $i, $opt_k), "\n";
23 | 
24 | 		#my $seq = substr($_, $i, $opt_k);
25 | 		#if ($seq =~ /N/) {
26 | 		#	print STDERR "skipping N\n";
27 | 		#	next;
28 | 		#}
29 | 		#my ($chr, $rest) = split ':', substr $id, 1;
30 | 		#my ($pos, $comment) = split ' ', $rest;
31 | 		#my $kpos = $pos + $i;
32 | 		##my $kid = "$chr:$kpos $comment:$i\n";
33 | 		#my $kid = ">$comment:$i $chr:$kpos\n";
34 | 		#print $kid, $seq, "\n";
35 | 
36 | 		my ($tag, $comment) = split ' ', $id;
37 | 		print $tag, ':', $i, , ' ', $comment, "\n",
38 | 			substr($_, $i, $opt_k), "\n";
39 | 	}
40 | }
41 | 


--------------------------------------------------------------------------------
/falength:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | # Print the lengths of sequences
 3 | # Copyright 2012 Shaun Jackman
 4 | 
 5 | use strict;
 6 | 
 7 | while (<>) {
 8 | 	next if /^#/;
 9 | 	die unless /^>/;
10 | 	chomp;
11 | 	my ($id, undef) = split ' ', $_, 2;
12 | 
13 | 	my $seq = '';
14 | 	while (<>) {
15 | 		next if /^#/;
16 | 		last if /^>/;
17 | 		chomp;
18 | 		$seq .= $_;
19 | 	}
20 | 
21 | 	#my $len = length $seq;
22 | 	#my $len = $seq =~ tr/ACGT//;
23 | 	my $len = $seq =~ tr/ACGTacgt//;
24 | 	print substr($id, 1), "\t", $len, "\n";
25 | 
26 | 	redo if /^>/;
27 | 	last if eof;
28 | }
29 | 


--------------------------------------------------------------------------------
/falint:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | # Check the syntax of a FASTA file
 3 | # Copyright 2012 Shaun Jackman
 4 | 
 5 | use strict;
 6 | 
 7 | my $n = 0;
 8 | while (<>) {
 9 | 	$n++;
10 | 	if (/^>/) {
11 | 		my $seq = <> or die;
12 | 	} elsif (/^@/) {
13 | 		my $seq = <> or die;
14 | 		my $header = <> or die;
15 | 		my $qual = <> or die;
16 | 		die unless $header =~ /^+/;
17 | 		die unless length $seq == length $qual;
18 | 	} else {
19 | 		die;
20 | 	}
21 | }
22 | 
23 | print "$n\n";
24 | 


--------------------------------------------------------------------------------
/famd5:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Calculate a MD5 digest for a FASTA file
3 | # Copyright 2012 Shaun Jackman
4 | 
5 | set -eu
6 | for i; do
7 | 	echo `facanon $i |grep -v '^>' |tr a-z A-Z |sort |md5sum |cut -d' ' -f1` $i
8 | done
9 | 


--------------------------------------------------------------------------------
/famd5seq:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | # Calculate a MD5 digest for each sequence
 3 | # Copyright 2012 Shaun Jackman
 4 | 
 5 | use strict;
 6 | use Digest::MD5 qw(md5_hex);
 7 | 
 8 | my $id;
 9 | while (<>) {
10 | 	chomp;
11 | 	if (/^>/) {
12 | 		$id = $_;
13 | 		next;
14 | 	}
15 | 	my $seq = $_;
16 | 	my $rc = reverse $seq;
17 | 	$rc =~ tr/ACGT/TGCA/;
18 | 	print md5_hex $seq lt $rc ? $seq : $rc;
19 | 	print "\t$id" if defined $id;
20 | 	print "\n";
21 | 	undef $id;
22 | }
23 | 


--------------------------------------------------------------------------------
/fanttocs:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | # Convert nucleotides to colour-space sequence
 3 | # Copyright 2012 Shaun Jackman
 4 | 
 5 | use strict;
 6 | 
 7 | my %table = (
 8 | 	'AA' => 0, 'CC' => 0, 'GG' => 0, 'TT' => 0,
 9 | 	'AC' => 1, 'CA' => 1, 'GT' => 1, 'TG' => 1,
10 | 	'AG' => 2, 'GA' => 2, 'CT' => 2, 'TC' => 2,
11 | 	'AT' => 3, 'TA' => 3, 'CG' => 3, 'GC' => 3,
12 | );
13 | 
14 | while (<>) {
15 | 	if (/^>/) {
16 | 		print;
17 | 		next;
18 | 	}
19 | 	chomp;
20 | 	my $seq = 'T' . $_;
21 | 	#my $seq = $_;
22 | 	print substr $seq, 0, 1;
23 | 	print map { $table{substr($seq, $_, 2)} } 0..length($seq)-2;
24 | 	print "\n";
25 | }
26 | 


--------------------------------------------------------------------------------
/farand:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | # Generate a FASTA file with random sequence
 3 | # Copyright 2012 Shaun Jackman
 4 | 
 5 | use strict;
 6 | 
 7 | srand 0;
 8 | 
 9 | my $i = 0;
10 | for my $len (@ARGV) {
11 | 	print ">$i $len\n";
12 | 	print map { qw'A C G T'[rand 4] } 1 .. $len;
13 | 	print "\n";
14 | 	$i++;
15 | }
16 | 


--------------------------------------------------------------------------------
/farc:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | # Reverse and complement the sequences
 3 | # Copyright 2013 Shaun Jackman
 4 | 
 5 | use strict;
 6 | use Getopt::Std qw'getopts';
 7 | 
 8 | my %opt;
 9 | getopts 'f:r:', \%opt;
10 | my $opt_f = $opt{'f'};
11 | my $opt_r = defined $opt{'r'} ? $opt{'r'} : '';
12 | 
13 | while (<>) {
14 | 	next if /^#/;
15 | 	die unless /^>/;
16 | 	chomp;
17 | 	my ($id, $comment) = split ' ', $_, 2;
18 | 
19 | 	my $seq = '';
20 | 	while (<>) {
21 | 		next if /^#/;
22 | 		last if /^>/;
23 | 		chomp;
24 | 		$seq .= $_;
25 | 	}
26 | 
27 | 	if (defined $opt_f) {
28 | 		print $id, $opt_f, "\n", $seq, "\n";
29 | 	}
30 | 
31 | 	if (defined $opt_r) {
32 | 		$seq = reverse($seq);
33 | 		$seq =~ tr/ACGTMRWSYKVHDBNacgtmrwsykvhdbn/TGCAKYWSRMBDHVNtgcakywsrmbdhvn/;
34 | 		print $id, $opt_r, "\n", $seq, "\n";
35 | 	}
36 | 
37 | 	redo if /^>/;
38 | 	last if eof;
39 | }
40 | 


--------------------------------------------------------------------------------
/farenumber:
--------------------------------------------------------------------------------
1 | #!/usr/bin/awk -f
2 | # Renumber the sequences
3 | # Copyright 2012 Shaun Jackman
4 | 
5 | /^>/ { $1=">" i++ }
6 | { print }
7 | 


--------------------------------------------------------------------------------
/faseperate-mates:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | # Separate paired reads into two files
 3 | # Copyright 2012 Shaun Jackman
 4 | 
 5 | use strict;
 6 | 
 7 | open A, '>a';
 8 | open B, '>b';
 9 | 
10 | while (<>) {
11 | 	die unless /^@/;
12 | 	my $b = <>;
13 | 	my $c = <>;
14 | 	my $d = <>;
15 | 	if (/\/1$/) {
16 | 		print A $_, $b, $c, $d;
17 | 	} elsif (/\/2$/) {
18 | 		print B $_, $b, $c, $d;
19 | 	} else {
20 | 		die;
21 | 	}
22 | }
23 | 


--------------------------------------------------------------------------------
/fasplit-read:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | # Split a read into two at the midpoint
 3 | # Copyright 2012 Shaun Jackman
 4 | 
 5 | use strict;
 6 | 
 7 | while (<>) {
 8 | 	die unless /^>/;
 9 | 	chomp (my $id = $_);
10 | 	chomp (my $seq = <>);
11 | 	my $l = (length $seq)/2;
12 | 	my $a = $seq;
13 | 	my $b = substr $a, $l, $l, '';
14 | 	print "$id/1\n$a\n$id/2\n$b\n";
15 | }
16 | 


--------------------------------------------------------------------------------
/fastqtofa:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | # Convert a FASTQ file to FASTA format
 3 | # Copyright 2012 Shaun Jackman
 4 | 
 5 | use strict;
 6 | 
 7 | while (<>) {
 8 | 	my $hdr = $_;
 9 | 	my $seq = <>;
10 | 	my $qhdr = <>;
11 | 	my $qual = <>;
12 | 	die unless $hdr =~ /^@/;
13 | 	die unless $qhdr =~ /^+/;
14 | 	print '>', substr($hdr, 1), $seq;
15 | }
16 | 


--------------------------------------------------------------------------------
/fatoagp:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | # Convert FASTA scaffolds to FASTA contigs and an AGP file
 3 | # Copyright 2012 Shaun Jackman
 4 | 
 5 | use strict;
 6 | use Getopt::Std qw'getopts';
 7 | 
 8 | my %opt;
 9 | getopts 'f:s:', \%opt;
10 | my $opt_fasta = $opt{'f'};
11 | my $opt_min_len = defined $opt{'s'} ? $opt{'s'} : 200;
12 | 
13 | open FASTA, ">$opt_fasta"
14 | 	or die "error: `$opt_fasta': $!\n"
15 | 	if $opt_fasta;
16 | 
17 | while (<>) {
18 | 	die unless /^>/;
19 | 	chomp;
20 | 	my ($scafid, undef) = split ' ', $_, 2;
21 | 	substr $scafid, 0, 1, '';
22 | 
23 | 	my $scafseq = <>;
24 | 	chomp $scafseq;
25 | 	my $scaflen = $scafseq =~ tr/ACGTacgt//;
26 | 	next if $scaflen < $opt_min_len;
27 | 
28 | 	my @ctgseqs = split /([Nn]+)/, $scafseq;
29 | 	my $i = 0;
30 | 	my $x = 0;
31 | 	for my $ctgseq (@ctgseqs) {
32 | 		my $len = length $ctgseq;
33 | 		# object object_beg object_end part_number
34 | 		print 'scaffold', $scafid, "\t",
35 | 			$x + 1, "\t",
36 | 			$x + $len, "\t",
37 | 			$i + 1, "\t";
38 | 		if ($ctgseq =~ /^[nN]/) {
39 | 			# component_type gap_length gap_type linkage
40 | 			print "N\t", $len, "\tscaffold\tyes\tpaired-ends\n";
41 | 		} else {
42 | 			my $ctgid = 'contig' . $scafid . '_' . ($i / 2);
43 | 			# component_type component_id
44 | 			# component_beg component_end orientation
45 | 			print "W\t", $ctgid, "\t1\t", $len, "\t+\n";
46 | 			print FASTA '>', $ctgid, "\n", $ctgseq, "\n"
47 | 				if $opt_fasta;
48 | 		}
49 | 		$i++;
50 | 		$x += $len;
51 | 	}
52 | }
53 | 


--------------------------------------------------------------------------------
/fatofastq:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | # Convert a FASTA file to a FASTQ file
 3 | # Copyright 2012 Shaun Jackman
 4 | 
 5 | use strict;
 6 | 
 7 | my $opt_q = 'I';
 8 | 
 9 | while (<>) {
10 | 	die unless /^>/;
11 | 	my $id = $_;
12 | 	chomp (my $seq = <>);
13 | 	print '@', substr($id, 1),
14 | 		$seq, "\n+\n",
15 | 		$opt_q x length($seq), "\n";
16 | }
17 | 


--------------------------------------------------------------------------------
/fatoseq:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | # Remove FASTA headers
 3 | # Copyright 2012 Shaun Jackman
 4 | 
 5 | use strict;
 6 | 
 7 | while (<>) {
 8 | 	die unless /^>/;
 9 | 	my $seq = <>;
10 | 	print $seq;
11 | }
12 | 


--------------------------------------------------------------------------------
/faunamb:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | # Convert IUPAC-IUB ambiguity codes to ACGT
 3 | # Copyright 2012 Shaun Jackman
 4 | 
 5 | use strict;
 6 | 
 7 | my $opt_n = 0;
 8 | my $opt_conserve_case = 0;
 9 | 
10 | while (<>) {
11 | 	if (/^>/) {
12 | 		print;
13 | 		next;
14 | 	}
15 | 
16 | 	if ($opt_n) {
17 | 		tr/MRWSYKVHDBmrwsykvhdb/NNNNNNNNNNnnnnnnnnnn/;
18 | 	} elsif ($opt_conserve_case) {
19 | 		tr/MRWSYKVHDBmrwsykvhdb/AAACCGAAACaaaccgaaac/;
20 | 	} else {
21 | 		tr/MRWSYKVHDBmrwsykvhdb/aaaccgaaacaaaccgaaac/;
22 | 	}
23 | 	print;
24 | }
25 | 


--------------------------------------------------------------------------------
/faunscaffold:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | # Break scaffolds into contigs at Ns
 3 | # Copyright 2012 Shaun Jackman
 4 | 
 5 | use strict;
 6 | 
 7 | my $opt_letters = 0;
 8 | 
 9 | while (<>) {
10 | 	next if /^#/;
11 | 	die unless /^>/;
12 | 	chomp;
13 | 	my ($id, $comment) = split ' ', $_, 2;
14 | 
15 | 	my $seq = '';
16 | 	while (<>) {
17 | 		next if /^#/;
18 | 		last if /^>/;
19 | 		chomp;
20 | 		$seq .= $_;
21 | 	}
22 | 
23 | 	#$seq =~ tr/MRWSYKVHDBmrwsykvhdb/AAACCGAAACaaaccgaaac/;
24 | 	#$seq =~ tr/acgtMRWSYKVHDBmrwsykvhdb/ACGTAAACCGAAACAAACCGAAAC/;
25 | 	my @contigs = split /NN*|nn*/, $seq;
26 | 	#my @contigs = split /nn*/, $seq;
27 | 
28 | 	my $i = $opt_letters ? 'A' : '0';
29 | 	for my $contig (@contigs) {
30 | 		print $id;
31 | 		print '_', $i++ if @contigs > 1;
32 | 		print ' ', $comment if length $comment > 0;
33 | 		print "\n", $contig, "\n";
34 | 	}
35 | 
36 | 	redo if /^>/;
37 | 	last if eof;
38 | }
39 | 


--------------------------------------------------------------------------------