├── .gitignore ├── .hgignore ├── AUTHORS ├── COPYING ├── ChangeLog ├── GALAXY-LICENSE.txt ├── LICENSE ├── Makefile.am ├── NEWS ├── README ├── THANKS ├── build_scripts ├── Makefile.am ├── build_on_linux.sh └── build_on_opensolaris_2009.6.sh ├── configure.ac ├── doc └── Makefile.am ├── galaxy ├── Makefile.am ├── README ├── fastx_toolkit_conf.xml ├── static │ ├── Makefile.am │ └── fastx_icons │ │ ├── Makefile.am │ │ ├── barcode_splitter_output_example.png │ │ ├── fasta_clipping_histogram_1.png │ │ ├── fasta_clipping_histogram_2.png │ │ ├── fasta_clipping_histogram_3.png │ │ ├── fasta_clipping_histogram_4.png │ │ ├── fastq_nucleotides_distribution_1.png │ │ ├── fastq_nucleotides_distribution_2.png │ │ ├── fastq_nucleotides_distribution_3.png │ │ ├── fastq_nucleotides_distribution_4.png │ │ ├── fastq_nucleotides_distribution_line_graph.png │ │ ├── fastq_quality_boxplot_1.png │ │ ├── fastq_quality_boxplot_2.png │ │ ├── fastq_quality_boxplot_3.png │ │ ├── fastx_clipper_example.png │ │ └── fastx_clipper_illustration.png ├── test-data │ ├── Makefile.am │ ├── fasta_collapser1.fasta │ ├── fasta_collapser1.out │ ├── fasta_formatter1.fasta │ ├── fasta_formatter1.out │ ├── fasta_formatter2.out │ ├── fasta_nuc_changer1.fasta │ ├── fasta_nuc_changer1.out │ ├── fasta_nuc_changer2.fasta │ ├── fasta_nuc_changer2.out │ ├── fasta_uncollapser1.fasta │ ├── fasta_uncollapser1.out │ ├── fastq_masker.fastq │ ├── fastq_masker.out │ ├── fastq_qual_conv1.fastq │ ├── fastq_qual_conv1.out │ ├── fastq_qual_conv1a.out │ ├── fastq_qual_conv2.fastq │ ├── fastq_qual_conv2.out │ ├── fastq_qual_conv2n.out │ ├── fastq_qual_filter1.fastq │ ├── fastq_qual_filter1a.out │ ├── fastq_qual_filter1b.out │ ├── fastq_quality_trimmer.fastq │ ├── fastq_quality_trimmer.out │ ├── fastq_stats1.fastq │ ├── fastq_stats1.out │ ├── fastq_to_fasta1.fastq │ ├── fastq_to_fasta1a.out │ ├── fastq_to_fasta1b.out │ ├── fastx_artifacts1.fasta │ ├── fastx_artifacts1.out │ ├── fastx_artifacts2.fastq │ ├── fastx_artifacts2.out │ ├── fastx_barcode_splitter1.fastq │ ├── fastx_barcode_splitter1.out │ ├── fastx_barcode_splitter1.txt │ ├── fastx_clipper1.fastq │ ├── fastx_clipper1a.out │ ├── fastx_renamer1.fastq │ ├── fastx_renamer1.out │ ├── fastx_rev_comp1.fasta │ ├── fastx_rev_comp2.fastq │ ├── fastx_reverse_complement1.out │ ├── fastx_reverse_complement2.out │ ├── fastx_seqid_uncollapse1.out │ ├── fastx_seqid_uncollapse1.psl │ ├── fastx_trimmer1.fasta │ ├── fastx_trimmer1.out │ ├── fastx_trimmer2.fastq │ ├── fastx_trimmer2.out │ ├── fastx_trimmer_from_end1.fasta │ └── fastx_trimmer_from_end1.out ├── tool-data │ ├── Makefile.am │ └── fastx_clipper_sequences.txt └── tools │ ├── Makefile.am │ └── fastx_toolkit │ ├── Makefile.am │ ├── fasta_clipping_histogram.xml │ ├── fasta_formatter.xml │ ├── fasta_nucleotide_changer.xml │ ├── fastq_masker.xml │ ├── fastq_quality_boxplot.xml │ ├── fastq_quality_converter.xml │ ├── fastq_quality_filter.xml │ ├── fastq_quality_trimmer.xml │ ├── fastq_to_fasta.xml │ ├── fastx_artifacts_filter.xml │ ├── fastx_barcode_splitter.xml │ ├── fastx_barcode_splitter_galaxy_wrapper.sh │ ├── fastx_clipper.xml │ ├── fastx_collapser.xml │ ├── fastx_nucleotides_distribution.xml │ ├── fastx_nucleotides_distribution_line.xml │ ├── fastx_quality_statistics.xml │ ├── fastx_quality_statistics_ng.xml │ ├── fastx_renamer.xml │ ├── fastx_reverse_complement.xml │ ├── fastx_trimmer.xml │ ├── fastx_trimmer_from_end.xml │ ├── fastx_uncollapser.xml │ └── seqid_uncollapser.xml ├── install_galaxy_files.sh ├── m4 ├── Makefile.am ├── ax_c_long_long.m4 ├── ax_cxx_compile_stdcxx_11.m4 └── ax_cxx_header_stdcxx_tr1.m4 ├── reconf ├── scripts ├── Makefile.am ├── fasta_clipping_histogram.pl ├── fastq_quality_boxplot_graph.sh ├── fastx_barcode_splitter.pl ├── fastx_nucleotide_distribution_graph.sh └── fastx_nucleotide_distribution_line_graph.sh └── src ├── Makefile.am ├── fasta_formatter ├── Makefile.am ├── fasta_formatter.cpp └── sequence_writers.h ├── fasta_nucleotide_changer ├── Makefile.am └── fasta_nucleotide_changer.c ├── fastq_masker ├── Makefile.am └── fastq_masker.c ├── fastq_quality_converter ├── Makefile.am └── fastq_quality_converter.c ├── fastq_quality_filter ├── Makefile.am └── fastq_quality_filter.c ├── fastq_quality_trimmer ├── Makefile.am └── fastq_quality_trimmer.c ├── fastq_to_fasta ├── Makefile.am └── fastq_to_fasta.c ├── fastx_artifacts_filter ├── Makefile.am └── fastx_artifacts_filter.c ├── fastx_clipper ├── Makefile.am └── fastx_clipper.cpp ├── fastx_collapser ├── Makefile.am ├── fastx_collapser.cpp └── std_hash.h ├── fastx_quality_stats ├── Makefile.am └── fastx_quality_stats.c ├── fastx_renamer ├── Makefile.am └── fastx_renamer.c ├── fastx_reverse_complement ├── Makefile.am └── fastx_reverse_complement.c ├── fastx_trimmer ├── Makefile.am └── fastx_trimmer.c ├── fastx_uncollapser ├── Makefile.am └── fastx_uncollapser.cpp ├── libfastx ├── Makefile.am ├── chomp.c ├── chomp.h ├── fastx.c ├── fastx.h ├── fastx_args.c ├── fastx_args.h ├── sequence_alignment.cpp └── sequence_alignment.h └── seqalign_test ├── Makefile.am └── seqalign_test.cpp /.gitignore: -------------------------------------------------------------------------------- 1 | syntax: glob 2 | 3 | ## vim temporary files 4 | .*.swp 5 | 6 | ## GNU auto-tools generated files 7 | config.h 8 | config.h.in 9 | config.log 10 | config.status 11 | config/ltmain.sh 12 | config/compile 13 | 14 | INSTALL 15 | 16 | ## Distribution tarballs 17 | fastx*.tar.bz2 18 | 19 | ## Intermediate files 20 | *.o 21 | *.a 22 | *.Po 23 | Makefile 24 | Makefile.in 25 | libtool 26 | *.m4 27 | autom4te.cache/* 28 | configure 29 | stamp-h1 30 | 31 | ## The compiled binaries 32 | src/fastx_collapser/fastx_collapser 33 | src/fastx_reverse_complement/fastx_reverse_complement 34 | src/fastx_trimmer/fastx_trimmer 35 | src/fastq_quality_filter/fastq_quality_filter 36 | src/fastq_to_fasta/fastq_to_fasta 37 | src/fasta_nucleotide_changer/fasta_nucleotide_changer 38 | src/fastx_quality_stats/fastx_quality_stats 39 | src/fasta_formatter/fasta_formatter 40 | src/fastx_uncollapser/fastx_uncollapser 41 | src/fastx_renamer/fastx_renamer 42 | src/fastq_quality_trimmer/fastq_quality_trimmer 43 | src/fastx_artifacts_filter/fastx_artifacts_filter 44 | src/fastx_clipper/fastx_clipper 45 | src/fastq_quality_converter/fastq_quality_converter 46 | src/seqalign_test/seqalign_test 47 | src/fastq_masker/fastq_masker 48 | -------------------------------------------------------------------------------- /.hgignore: -------------------------------------------------------------------------------- 1 | syntax: glob 2 | 3 | ## vim temporary files 4 | .*.swp 5 | 6 | ## GNU auto-tools generated files 7 | config.h 8 | config.h.in 9 | config.log 10 | config.status 11 | config/ltmain.sh 12 | 13 | ## Distribution tarballs 14 | fastx*.tar.bz2 15 | 16 | ## Intermediate files 17 | *.o 18 | *.a 19 | *.Po 20 | Makefile 21 | Makefile.in 22 | libtool 23 | *.m4 24 | autom4te.cache/* 25 | configure 26 | stamp-h1 27 | 28 | ## The compiled binaries 29 | src/fastx_collapser/fastx_collapser 30 | src/fastx_reverse_complement/fastx_reverse_complement 31 | src/fastx_trimmer/fastx_trimmer 32 | src/fastq_quality_filter/fastq_quality_filter 33 | src/fastq_to_fasta/fastq_to_fasta 34 | src/fasta_nucleotide_changer/fasta_nucleotide_changer 35 | src/fastx_quality_stats/fastx_quality_stats 36 | src/fasta_formatter/fasta_formatter 37 | src/fastx_uncollapser/fastx_uncollapser 38 | src/fastx_renamer/fastx_renamer 39 | src/fastq_quality_trimmer/fastq_quality_trimmer 40 | src/fastx_artifacts_filter/fastx_artifacts_filter 41 | src/fastx_clipper/fastx_clipper 42 | src/fastq_quality_converter/fastq_quality_converter 43 | src/seqalign_test/seqalign_test 44 | src/fastq_masker/fastq_masker 45 | -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | Authors of FASTX toolkit. 2 | See also the files THANKS and ChangeLog. 3 | 4 | Assaf Gordon designed and implemented FASTX toolkit. 5 | -------------------------------------------------------------------------------- /ChangeLog: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/agordon/fastx_toolkit/ea0ca83ba24dce80c20ca589b838a281fe5deb0c/ChangeLog -------------------------------------------------------------------------------- /GALAXY-LICENSE.txt: -------------------------------------------------------------------------------- 1 | =============== 2 | Galaxy License 3 | 4 | For more information: 5 | Galaxy main web: 6 | https://usegalaxy.org/ 7 | 8 | Galaxy license: 9 | https://galaxyproject.org/admin/license/ 10 | 11 | All files under the ./galaxy sub-directory are 12 | distributed under the Galaxy license. 13 | =============== 14 | 15 | Copyright (c) 2005 Pennsylvania State University 16 | 17 | Permission is hereby granted, free of charge, to any person obtaining 18 | a copy of this software and associated documentation files (the 19 | "Software"), to deal in the Software without restriction, including 20 | without limitation the rights to use, copy, modify, merge, publish, 21 | distribute, sublicense, and/or sell copies of the Software, and to 22 | permit persons to whom the Software is furnished to do so, subject to 23 | the following conditions: 24 | 25 | The above copyright notice and this permission notice shall be 26 | included in all copies or substantial portions of the Software. 27 | 28 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 29 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 30 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 31 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 32 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 33 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 34 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 35 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | FASTX-Toolkit LICENSE 2 | ===================== 3 | 4 | FASTX-Toolkit is distributed under the Affero GPL (AGPL) version 3 or later. 5 | See the COPYING file, or . 6 | 7 | EXCEPT 8 | 9 | Files under the 'galaxy' sub-direcory are distributed under the same license as 10 | Galaxy itself (which is a more permissive, MIT style license). 11 | See the GALAXY-LICENSE file. 12 | 13 | 14 | -------------------------------------------------------------------------------- /Makefile.am: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2008-2013 Assaf Gordon 2 | # 3 | # This file is free software; as a special exception the author gives 4 | # unlimited permission to copy and/or distribute it, with or without 5 | # modifications, as long as this notice is preserved. 6 | # 7 | # This program is distributed in the hope that it will be useful, but 8 | # WITHOUT ANY WARRANTY, to the extent permitted by law; without even the 9 | # implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 10 | 11 | EXTRA_DIST = reconf configure README install_galaxy_files.sh 12 | 13 | SUBDIRS = m4 src doc galaxy scripts build_scripts 14 | 15 | ACLOCAL_AMFLAGS = -I m4 16 | 17 | AUTOMAKE_OPTIONS = dist-bzip2 no-dist-gzip 18 | 19 | -------------------------------------------------------------------------------- /NEWS: -------------------------------------------------------------------------------- 1 | FASTX toolkit -- History of visible changes. 2 | 3 | Copyright (C) 2008-2013, Assaf Gordon 4 | See the end for copying conditions. 5 | 6 | Please send FASTX toolkit bug reports to assafgordon@gmail.com. 7 | 8 | Version 0.0.6 9 | 10 | * First public release 11 | 12 | ------------------------------------------------------- 13 | Copying information: 14 | 15 | Copyright (C) 2008-2013, Assaf Gordon 16 | 17 | Permission is granted to anyone to make or distribute verbatim copies 18 | of this document as received, in any medium, provided that the 19 | copyright notice and this permission notice are preserved, 20 | thus giving the recipient permission to redistribute in turn. 21 | 22 | Permission is granted to distribute modified versions 23 | of this document, or of portions of it, 24 | under the above conditions, provided also that they 25 | carry prominent notices stating who last changed them. 26 | 27 | -------------------------------------------------------------------------------- /THANKS: -------------------------------------------------------------------------------- 1 | FASTX toolkit THANKS file 2 | 3 | FASTX toolkit has originally been written by Assaf Gordon. 4 | Many people have further contributed to FASTX-Toolkit by reporting problems, 5 | suggesting various improvements, or submitting actual code. Here is 6 | a list of these people. Help me keep it complete and exempt of errors. 7 | 8 | Many Hannon-lab members at CSHL (who prefered to remain anonymous). 9 | -------------------------------------------------------------------------------- /build_scripts/Makefile.am: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2008-2013 Assaf Gordon 2 | # 3 | # This file is free software; as a special exception the author gives 4 | # unlimited permission to copy and/or distribute it, with or without 5 | # modifications, as long as this notice is preserved. 6 | # 7 | # This program is distributed in the hope that it will be useful, but 8 | # WITHOUT ANY WARRANTY, to the extent permitted by law; without even the 9 | # implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 10 | 11 | EXTRA_DIST = build_on_opensolaris_2009.6.sh \ 12 | build_on_linux.sh 13 | -------------------------------------------------------------------------------- /build_scripts/build_on_linux.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | #Tell pkg-config to look for libraries in /usr/local/lib, too. 4 | export PKG_CONFIG_PATH=/usr/local/lib/pkgconfig:$PKG_CONFIG_PATH 5 | 6 | # Configure, with static binaries and target directroy 7 | ./configure --enable-static --enable-all-static --prefix="$PWD/build" || exit 1 8 | 9 | # build the programs 10 | make || exit 1 11 | 12 | # Install them to the target directory (doesn't require root) 13 | make install || exit 1 14 | 15 | # Create a package of the static binaries 16 | VERSION=$(grep '#define VERSION' config.h | sed 's/.*"\(.*\)".*/\1/') 17 | TARBALL=fastx_toolkit_${VERSION}_binaries_$(uname -s)_$(uname -r)_$(uname -m).tar.bz2 18 | cd build || exit 1 19 | tar -cjvf "../$TARBALL" ./bin/* || exit 1 20 | cd .. 21 | 22 | echo "Static Binaries are installed in:" 23 | echo " $PWD/build/bin" 24 | echo 25 | echo "Static binaries tarball:" 26 | echo " $TARBALL" 27 | 28 | -------------------------------------------------------------------------------- /build_scripts/build_on_opensolaris_2009.6.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | export CC=gcc-4.3.2 4 | export CXX=g++-4.3.2 5 | 6 | #Tell pkg-config to look for libraries in /usr/local/lib, too. 7 | export PKG_CONFIG_PATH=/usr/local/lib/pkgconfig:$PKG_CONFIG_PATH 8 | 9 | # Configure, with static binaries and target directroy 10 | ./configure --enable-static --enable-all-static --prefix="$PWD/build" || exit 1 11 | 12 | # build the programs 13 | make || exit 1 14 | 15 | # Install them to the target directory (doesn't require root) 16 | make install || exit 1 17 | 18 | # Create a package of the static binaries 19 | VERSION=$(grep '#define VERSION' config.h | sed 's/.*"\(.*\)".*/\1/') 20 | TARBALL=fastx_toolkit_${VERSION}_binaries_$(uname -s)_$(uname -r)_$(uname -m).tar.bz2 21 | cd build || exit 1 22 | tar -cjvf "../$TARBALL" ./bin/* || exit 1 23 | cd .. 24 | 25 | echo "Static Binaries are installed in:" 26 | echo " $PWD/build/bin" 27 | echo 28 | echo "Static binaries tarball:" 29 | echo " $TARBALL" 30 | 31 | 32 | -------------------------------------------------------------------------------- /configure.ac: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2008-2013 Assaf Gordon 2 | # 3 | # This file is free software; as a special exception the author gives 4 | # unlimited permission to copy and/or distribute it, with or without 5 | # modifications, as long as this notice is preserved. 6 | # 7 | # This program is distributed in the hope that it will be useful, but 8 | # WITHOUT ANY WARRANTY, to the extent permitted by law; without even the 9 | # implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 10 | 11 | AC_INIT([FASTX Toolkit], 12 | [0.0.14], 13 | [Assaf Gordon assafgordon@gmail.com], 14 | [fastx_toolkit]) 15 | AC_CONFIG_AUX_DIR(config) 16 | AC_CONFIG_MACRO_DIR([m4]) 17 | AM_CONFIG_HEADER(config.h) 18 | AM_INIT_AUTOMAKE([dist-bzip2]) 19 | 20 | AC_PROG_CC 21 | AC_PROG_CXX 22 | AC_PROG_LIBTOOL 23 | AX_C_LONG_LONG 24 | AX_CXX_HEADER_STDCXX_TR1 25 | AX_CXX_COMPILE_STDCXX_11([noext],[optional]) 26 | 27 | dnl if test "$ax_cv_cxx_stdcxx_tr1" != yes; then 28 | dnl AC_MSG_ERROR([Your version of gcc does not support the 'std::tr1' standard. Recommended gcc version is 4.1.2 or later. Please use a newer gcc version, or try to download the pre-compiled binaries from the fastx-toolkit website.]) 29 | dnl fi 30 | 31 | PKG_CHECK_MODULES([GTEXTUTILS],[gtextutils]) 32 | 33 | dnl --enable-wall 34 | EXTRA_CHECKS="-Wall -Wextra -Wformat-nonliteral -Wformat-security -Wswitch-default -Wswitch-enum -Wunused-parameter -Wfloat-equal -Werror" 35 | AC_ARG_ENABLE(wall, 36 | [ --enable-wall Enable many common GCC warnings (-Wall,-Wextra, -Werror etc., default enabled)], 37 | [case "${enableval}" in 38 | yes) wall=true ;; 39 | no) wall=false ;; 40 | *) AC_MSG_ERROR(bad value ${enableval} for --enable-wall) ;; 41 | esac],[wall=true]) 42 | if test "$wall" = "true" 43 | then 44 | CFLAGS="${CFLAGS} ${EXTRA_CHECKS}" 45 | CXXFLAGS="${CXXFLAGS} ${EXTRA_CHECKS}" 46 | fi 47 | 48 | dnl --enable-debug 49 | AC_ARG_ENABLE(debug, 50 | [ --enable-debug Enable debug mode (default enabled)], 51 | [case "${enableval}" in 52 | yes) debug=true ;; 53 | no) debug=false ;; 54 | *) AC_MSG_ERROR(bad value ${enableval} for --enable-debug) ;; 55 | esac],[debug=true]) 56 | if test "$debug" = "true" 57 | then 58 | CFLAGS="${CFLAGS} -DDEBUG -g -O1" 59 | CXXFLAGS="${CXXFLAGS} -DDEBUG -g -O1" 60 | else 61 | CFLAGS="${CFLAGS} -O3" 62 | CXXFLAGS="${CXXFLAGS} -O3" 63 | fi 64 | 65 | dnl 'all-static' marco copied from subversion's configure.ac 66 | dnl Check for --enable-all-static option 67 | AC_ARG_ENABLE(all-static, 68 | AS_HELP_STRING([--enable-all-static], 69 | [Build completely static (standalone) binaries.]), 70 | [ 71 | if test "$enableval" = "yes" ; then 72 | LT_LDFLAGS="-all-static $LT_LDFLAGS" 73 | elif test "$enableval" != "no" ; then 74 | AC_MSG_ERROR([--enable-all-static doesn't accept argument]) 75 | fi 76 | ]) 77 | 78 | AC_SUBST(LT_LDFLAGS) 79 | 80 | 81 | 82 | AC_CONFIG_FILES([ 83 | Makefile 84 | doc/Makefile 85 | m4/Makefile 86 | src/Makefile 87 | src/libfastx/Makefile 88 | src/fastx_clipper/Makefile 89 | src/fastq_to_fasta/Makefile 90 | src/fastx_quality_stats/Makefile 91 | src/fastq_quality_converter/Makefile 92 | src/fastx_trimmer/Makefile 93 | src/fastq_quality_filter/Makefile 94 | src/fastq_quality_trimmer/Makefile 95 | src/fastx_artifacts_filter/Makefile 96 | src/fastx_reverse_complement/Makefile 97 | src/fastx_collapser/Makefile 98 | src/fastx_uncollapser/Makefile 99 | src/seqalign_test/Makefile 100 | src/fasta_formatter/Makefile 101 | src/fasta_nucleotide_changer/Makefile 102 | src/fastx_renamer/Makefile 103 | src/fastq_masker/Makefile 104 | galaxy/Makefile 105 | galaxy/tools/Makefile 106 | galaxy/tools/fastx_toolkit/Makefile 107 | galaxy/test-data/Makefile 108 | galaxy/static/Makefile 109 | galaxy/static/fastx_icons/Makefile 110 | galaxy/tool-data/Makefile 111 | scripts/Makefile 112 | build_scripts/Makefile 113 | ]) 114 | 115 | AC_OUTPUT 116 | -------------------------------------------------------------------------------- /doc/Makefile.am: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2008-2013 Assaf Gordon 2 | # 3 | # This file is free software; as a special exception the author gives 4 | # unlimited permission to copy and/or distribute it, with or without 5 | # modifications, as long as this notice is preserved. 6 | # 7 | # This program is distributed in the hope that it will be useful, but 8 | # WITHOUT ANY WARRANTY, to the extent permitted by law; without even the 9 | # implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 10 | 11 | -------------------------------------------------------------------------------- /galaxy/Makefile.am: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2008-2013 Assaf Gordon 2 | # 3 | # This file is free software; as a special exception the author gives 4 | # unlimited permission to copy and/or distribute it, with or without 5 | # modifications, as long as this notice is preserved. 6 | # 7 | # This program is distributed in the hope that it will be useful, but 8 | # WITHOUT ANY WARRANTY, to the extent permitted by law; without even the 9 | # implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 10 | 11 | SUBDIRS = tools test-data static tool-data 12 | 13 | EXTRA_DIST = README fastx_toolkit_conf.xml 14 | -------------------------------------------------------------------------------- /galaxy/README: -------------------------------------------------------------------------------- 1 | FASTX-Toolkit - Galaxy Files 2 | ============================ 3 | 4 | These files allows easy integration of the FASTX-toolkit tools in the 5 | Galaxy framework. 6 | 7 | Installation 8 | ============ 9 | See the README file. 10 | 11 | LICENSE 12 | ======= 13 | All files under the 'galaxy' sub-directory are licensed under 14 | Galaxy's license. see GALAXY-LICENSE file. 15 | (The rest of the FASTX-Toolkit files are licensed under AGPLv3 or later). 16 | -------------------------------------------------------------------------------- /galaxy/fastx_toolkit_conf.xml: -------------------------------------------------------------------------------- 1 | # 2 | # Add the following sections to your Galaxy server's tool_conf.xml file. 3 | # 4 |
5 | 6 |
7 | 8 |
9 | 10 | 11 | 12 | 13 |
14 | 15 |
16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 |
33 | -------------------------------------------------------------------------------- /galaxy/static/Makefile.am: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2008-2013 Assaf Gordon 2 | # 3 | # This file is free software; as a special exception the author gives 4 | # unlimited permission to copy and/or distribute it, with or without 5 | # modifications, as long as this notice is preserved. 6 | # 7 | # This program is distributed in the hope that it will be useful, but 8 | # WITHOUT ANY WARRANTY, to the extent permitted by law; without even the 9 | # implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 10 | 11 | SUBDIRS = fastx_icons 12 | 13 | 14 | -------------------------------------------------------------------------------- /galaxy/static/fastx_icons/Makefile.am: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2008-2013 Assaf Gordon 2 | # 3 | # This file is free software; as a special exception the author gives 4 | # unlimited permission to copy and/or distribute it, with or without 5 | # modifications, as long as this notice is preserved. 6 | # 7 | # This program is distributed in the hope that it will be useful, but 8 | # WITHOUT ANY WARRANTY, to the extent permitted by law; without even the 9 | # implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 10 | 11 | EXTRA_DIST = fastx_clipper_example.png \ 12 | fastx_clipper_illustration.png \ 13 | barcode_splitter_output_example.png \ 14 | fastq_nucleotides_distribution_1.png \ 15 | fastq_nucleotides_distribution_2.png \ 16 | fastq_nucleotides_distribution_3.png \ 17 | fastq_nucleotides_distribution_4.png \ 18 | fastq_nucleotides_distribution_line_graph.png \ 19 | fasta_clipping_histogram_1.png \ 20 | fasta_clipping_histogram_2.png \ 21 | fasta_clipping_histogram_3.png \ 22 | fasta_clipping_histogram_4.png \ 23 | fastq_quality_boxplot_1.png \ 24 | fastq_quality_boxplot_2.png \ 25 | fastq_quality_boxplot_3.png 26 | -------------------------------------------------------------------------------- /galaxy/static/fastx_icons/barcode_splitter_output_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/agordon/fastx_toolkit/ea0ca83ba24dce80c20ca589b838a281fe5deb0c/galaxy/static/fastx_icons/barcode_splitter_output_example.png -------------------------------------------------------------------------------- /galaxy/static/fastx_icons/fasta_clipping_histogram_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/agordon/fastx_toolkit/ea0ca83ba24dce80c20ca589b838a281fe5deb0c/galaxy/static/fastx_icons/fasta_clipping_histogram_1.png -------------------------------------------------------------------------------- /galaxy/static/fastx_icons/fasta_clipping_histogram_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/agordon/fastx_toolkit/ea0ca83ba24dce80c20ca589b838a281fe5deb0c/galaxy/static/fastx_icons/fasta_clipping_histogram_2.png -------------------------------------------------------------------------------- /galaxy/static/fastx_icons/fasta_clipping_histogram_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/agordon/fastx_toolkit/ea0ca83ba24dce80c20ca589b838a281fe5deb0c/galaxy/static/fastx_icons/fasta_clipping_histogram_3.png -------------------------------------------------------------------------------- /galaxy/static/fastx_icons/fasta_clipping_histogram_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/agordon/fastx_toolkit/ea0ca83ba24dce80c20ca589b838a281fe5deb0c/galaxy/static/fastx_icons/fasta_clipping_histogram_4.png -------------------------------------------------------------------------------- /galaxy/static/fastx_icons/fastq_nucleotides_distribution_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/agordon/fastx_toolkit/ea0ca83ba24dce80c20ca589b838a281fe5deb0c/galaxy/static/fastx_icons/fastq_nucleotides_distribution_1.png -------------------------------------------------------------------------------- /galaxy/static/fastx_icons/fastq_nucleotides_distribution_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/agordon/fastx_toolkit/ea0ca83ba24dce80c20ca589b838a281fe5deb0c/galaxy/static/fastx_icons/fastq_nucleotides_distribution_2.png -------------------------------------------------------------------------------- /galaxy/static/fastx_icons/fastq_nucleotides_distribution_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/agordon/fastx_toolkit/ea0ca83ba24dce80c20ca589b838a281fe5deb0c/galaxy/static/fastx_icons/fastq_nucleotides_distribution_3.png -------------------------------------------------------------------------------- /galaxy/static/fastx_icons/fastq_nucleotides_distribution_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/agordon/fastx_toolkit/ea0ca83ba24dce80c20ca589b838a281fe5deb0c/galaxy/static/fastx_icons/fastq_nucleotides_distribution_4.png -------------------------------------------------------------------------------- /galaxy/static/fastx_icons/fastq_nucleotides_distribution_line_graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/agordon/fastx_toolkit/ea0ca83ba24dce80c20ca589b838a281fe5deb0c/galaxy/static/fastx_icons/fastq_nucleotides_distribution_line_graph.png -------------------------------------------------------------------------------- /galaxy/static/fastx_icons/fastq_quality_boxplot_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/agordon/fastx_toolkit/ea0ca83ba24dce80c20ca589b838a281fe5deb0c/galaxy/static/fastx_icons/fastq_quality_boxplot_1.png -------------------------------------------------------------------------------- /galaxy/static/fastx_icons/fastq_quality_boxplot_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/agordon/fastx_toolkit/ea0ca83ba24dce80c20ca589b838a281fe5deb0c/galaxy/static/fastx_icons/fastq_quality_boxplot_2.png -------------------------------------------------------------------------------- /galaxy/static/fastx_icons/fastq_quality_boxplot_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/agordon/fastx_toolkit/ea0ca83ba24dce80c20ca589b838a281fe5deb0c/galaxy/static/fastx_icons/fastq_quality_boxplot_3.png -------------------------------------------------------------------------------- /galaxy/static/fastx_icons/fastx_clipper_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/agordon/fastx_toolkit/ea0ca83ba24dce80c20ca589b838a281fe5deb0c/galaxy/static/fastx_icons/fastx_clipper_example.png -------------------------------------------------------------------------------- /galaxy/static/fastx_icons/fastx_clipper_illustration.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/agordon/fastx_toolkit/ea0ca83ba24dce80c20ca589b838a281fe5deb0c/galaxy/static/fastx_icons/fastx_clipper_illustration.png -------------------------------------------------------------------------------- /galaxy/test-data/Makefile.am: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2008-2013 Assaf Gordon 2 | # 3 | # This file is free software; as a special exception the author gives 4 | # unlimited permission to copy and/or distribute it, with or without 5 | # modifications, as long as this notice is preserved. 6 | # 7 | # This program is distributed in the hope that it will be useful, but 8 | # WITHOUT ANY WARRANTY, to the extent permitted by law; without even the 9 | # implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 10 | 11 | EXTRA_DIST = fastq_qual_conv1a.out \ 12 | fastq_qual_conv1.fastq \ 13 | fastq_qual_conv1.out \ 14 | fastq_qual_conv2.fastq \ 15 | fastq_qual_conv2n.out \ 16 | fastq_qual_conv2.out \ 17 | fastq_qual_filter1a.out \ 18 | fastq_qual_filter1b.out \ 19 | fastq_qual_filter1.fastq \ 20 | fastq_stats1.fastq \ 21 | fastq_stats1.out \ 22 | fastq_to_fasta1a.out \ 23 | fastq_to_fasta1b.out \ 24 | fastq_to_fasta1.fastq \ 25 | fastx_artifacts1.fasta \ 26 | fastx_artifacts1.out \ 27 | fastx_artifacts2.fastq \ 28 | fastx_artifacts2.out \ 29 | fastx_clipper1a.out \ 30 | fastx_clipper1.fastq \ 31 | fastx_rev_comp1.fasta \ 32 | fastx_rev_comp2.fastq \ 33 | fastx_reverse_complement1.out \ 34 | fastx_reverse_complement2.out \ 35 | fastx_trimmer1.fasta \ 36 | fastx_trimmer1.out \ 37 | fastx_trimmer2.fastq \ 38 | fastx_trimmer2.out \ 39 | fasta_collapser1.fasta \ 40 | fasta_collapser1.out \ 41 | fastx_barcode_splitter1.fastq \ 42 | fastx_barcode_splitter1.txt \ 43 | fastx_barcode_splitter1.out \ 44 | fasta_formatter1.fasta \ 45 | fasta_formatter1.out \ 46 | fasta_formatter2.out \ 47 | fasta_nuc_changer1.fasta \ 48 | fasta_nuc_changer1.out \ 49 | fasta_nuc_changer2.fasta \ 50 | fasta_nuc_changer2.out 51 | 52 | 53 | -------------------------------------------------------------------------------- /galaxy/test-data/fasta_collapser1.fasta: -------------------------------------------------------------------------------- 1 | >1 2 | TGTATTTACAATGACTAGAAA 3 | >2 4 | ATTGCTGCTCGGATGGTCCGGCTGTGCACAC 5 | >3 6 | AGTACAAGGACATGC 7 | >4 8 | ATTGCTGCTCGGATGGTCCGGCTGTGCACAC 9 | >5 10 | AGTACAAGGACATGC 11 | >6 12 | ATTGCTGCTCGGATGGTCCGGCTGTGCACAC 13 | >7 14 | AGTACAAGGACATGC 15 | >8 16 | AGTACAAGGACATGC 17 | >9 18 | ATTGCTGCTCGGATGGTCCGGCTGTGCACAC 19 | >10 20 | AGTACAAGGACATGC 21 | >11 22 | AGTACAAGGACATGC 23 | >12 24 | ATTGCTGCTCGGATGGTCCGGCTGTGCACAC 25 | >13 26 | CGATTGCCGAAGTCTACCA 27 | >14 28 | AGTACAAGGACATGC 29 | >15 30 | CCTTGTAGTGGATTCTGATGA 31 | >16 32 | AGTACAAGGACATGC 33 | >17 34 | AGTACAAGGACATGC 35 | >18 36 | ATTGCTGCTCGGATGGTCCGGCTGTGCACAC 37 | >19 38 | AGTACAAGGACATGC 39 | >20 40 | ATTGCTGCTCGGATGGTCCGGCTGTGCACAC 41 | >21 42 | AGTACAAGGACATGC 43 | >22 44 | AGTACAAGGACATGC 45 | >23 46 | CTGCTGCGATCGGTGTGC 47 | >24 48 | AGTACAAGGACATGC 49 | >25 50 | ACCATTCGAGCATAC 51 | >26 52 | AGTACAAGGACATGC 53 | >27 54 | TCAAATTCTAGATTTTTACGG 55 | >28 56 | AGTACAAGGACATGC 57 | >29 58 | TGATTTCCAGAGCCAAT 59 | >30 60 | ATTGCTGCTCGGATGGTCCGGCTGTGCACAC 61 | >31 62 | TTACCTCACGATATTGTAATA 63 | >32 64 | ATGACTTCATCGTCCACCCTTTAGAACT 65 | >33 66 | ATTGCTGCTCGGATGGTCCGGCTGTGCACAC 67 | >34 68 | TTCAACGCCGCCGTGAAC 69 | >35 70 | ATTGCTGCTCGGATGGTCCGGCTGTGCACAC 71 | >36 72 | CTGCTGCGATCGGTGTGC 73 | >37 74 | ATTGCTGCTCGGATGGTCCGGCTGTGCACAC 75 | >38 76 | TTCAACGCCGCCGTGAAC 77 | >39 78 | TTCAACGCCGCCGTGAAC 79 | >40 80 | CTGCTGCGATCGGTGTGC 81 | >41 82 | TTCAACGCCGCCGTGAAC 83 | >42 84 | TTCAACGCCGCCGTGAAC 85 | -------------------------------------------------------------------------------- /galaxy/test-data/fasta_collapser1.out: -------------------------------------------------------------------------------- 1 | >1-15 2 | AGTACAAGGACATGC 3 | >2-11 4 | ATTGCTGCTCGGATGGTCCGGCTGTGCACAC 5 | >3-5 6 | TTCAACGCCGCCGTGAAC 7 | >4-3 8 | CTGCTGCGATCGGTGTGC 9 | >5-1 10 | TCAAATTCTAGATTTTTACGG 11 | >6-1 12 | ACCATTCGAGCATAC 13 | >7-1 14 | TGATTTCCAGAGCCAAT 15 | >8-1 16 | TTACCTCACGATATTGTAATA 17 | >9-1 18 | TGTATTTACAATGACTAGAAA 19 | >10-1 20 | CCTTGTAGTGGATTCTGATGA 21 | >11-1 22 | CGATTGCCGAAGTCTACCA 23 | >12-1 24 | ATGACTTCATCGTCCACCCTTTAGAACT 25 | -------------------------------------------------------------------------------- /galaxy/test-data/fasta_nuc_changer1.fasta: -------------------------------------------------------------------------------- 1 | >cel-let-7 MIMAT0000001 Caenorhabditis elegans let-7 2 | TGAGGTAGTAGGTTGTATAGTT 3 | >cel-lin-4 MIMAT0000002 Caenorhabditis elegans lin-4 4 | TCCCTGAGACCTCAAGTGTGA 5 | >cel-miR-1 MIMAT0000003 Caenorhabditis elegans miR-1 6 | TGGAATGTAAAGAAGTATGTA 7 | >cel-miR-2 MIMAT0000004 Caenorhabditis elegans miR-2 8 | TATCACAGCCAGCTTTGATGTGC 9 | >cel-miR-34 MIMAT0000005 Caenorhabditis elegans miR-34 10 | AGGCAGTGTGGTTAGCTGGTTG 11 | >cel-miR-35 MIMAT0000006 Caenorhabditis elegans miR-35 12 | TCACCGGGTGGAAACTAGCAGT 13 | >cel-miR-36 MIMAT0000007 Caenorhabditis elegans miR-36 14 | TCACCGGGTGAAAATTCGCATG 15 | >cel-miR-37 MIMAT0000008 Caenorhabditis elegans miR-37 16 | TCACCGGGTGAACACTTGCAGT 17 | >cel-miR-38 MIMAT0000009 Caenorhabditis elegans miR-38 18 | TCACCGGGAGAAAAACTGGAGT 19 | >cel-miR-39 MIMAT0000010 Caenorhabditis elegans miR-39 20 | TCACCGGGTGTAAATCAGCTTG 21 | >cel-miR-40 MIMAT0000011 Caenorhabditis elegans miR-40 22 | TCACCGGGTGTACATCAGCTAA 23 | >cel-miR-41 MIMAT0000012 Caenorhabditis elegans miR-41 24 | TCACCGGGTGAAAAATCACCTA 25 | >cel-miR-42 MIMAT0000013 Caenorhabditis elegans miR-42 26 | TCACCGGGTTAACATCTACAGA 27 | >cel-miR-43 MIMAT0000014 Caenorhabditis elegans miR-43 28 | TATCACAGTTTACTTGCTGTCGC 29 | >cel-miR-44 MIMAT0000015 Caenorhabditis elegans miR-44 30 | TGACTAGAGACACATTCAGCT 31 | >cel-miR-45 MIMAT0000016 Caenorhabditis elegans miR-45 32 | TGACTAGAGACACATTCAGCT 33 | >cel-miR-46 MIMAT0000017 Caenorhabditis elegans miR-46 34 | TGTCATGGAGTCGCTCTCTTCA 35 | >cel-miR-47 MIMAT0000018 Caenorhabditis elegans miR-47 36 | TGTCATGGAGGCGCTCTCTTCA 37 | >cel-miR-48 MIMAT0000019 Caenorhabditis elegans miR-48 38 | TGAGGTAGGCTCAGTAGATGCGA 39 | >cel-miR-49 MIMAT0000020 Caenorhabditis elegans miR-49 40 | AAGCACCACGAGAAGCTGCAGA 41 | >cel-miR-50 MIMAT0000021 Caenorhabditis elegans miR-50 42 | TGATATGTCTGGTATTCTTGGG 43 | >cel-miR-51 MIMAT0000022 Caenorhabditis elegans miR-51 44 | TACCCGTAGCTCCTATCCATGTT 45 | >cel-miR-52 MIMAT0000023 Caenorhabditis elegans miR-52 46 | CACCCGTACATATGTTTCCGTGCT 47 | >cel-miR-53 MIMAT0000024 Caenorhabditis elegans miR-53 48 | CACCCGTACATTTGTTTCCGTGCT 49 | >cel-miR-54 MIMAT0000025 Caenorhabditis elegans miR-54 50 | TACCCGTAATCTTCATAATCCGAG 51 | -------------------------------------------------------------------------------- /galaxy/test-data/fasta_nuc_changer1.out: -------------------------------------------------------------------------------- 1 | >cel-let-7 MIMAT0000001 Caenorhabditis elegans let-7 2 | UGAGGUAGUAGGUUGUAUAGUU 3 | >cel-lin-4 MIMAT0000002 Caenorhabditis elegans lin-4 4 | UCCCUGAGACCUCAAGUGUGA 5 | >cel-miR-1 MIMAT0000003 Caenorhabditis elegans miR-1 6 | UGGAAUGUAAAGAAGUAUGUA 7 | >cel-miR-2 MIMAT0000004 Caenorhabditis elegans miR-2 8 | UAUCACAGCCAGCUUUGAUGUGC 9 | >cel-miR-34 MIMAT0000005 Caenorhabditis elegans miR-34 10 | AGGCAGUGUGGUUAGCUGGUUG 11 | >cel-miR-35 MIMAT0000006 Caenorhabditis elegans miR-35 12 | UCACCGGGUGGAAACUAGCAGU 13 | >cel-miR-36 MIMAT0000007 Caenorhabditis elegans miR-36 14 | UCACCGGGUGAAAAUUCGCAUG 15 | >cel-miR-37 MIMAT0000008 Caenorhabditis elegans miR-37 16 | UCACCGGGUGAACACUUGCAGU 17 | >cel-miR-38 MIMAT0000009 Caenorhabditis elegans miR-38 18 | UCACCGGGAGAAAAACUGGAGU 19 | >cel-miR-39 MIMAT0000010 Caenorhabditis elegans miR-39 20 | UCACCGGGUGUAAAUCAGCUUG 21 | >cel-miR-40 MIMAT0000011 Caenorhabditis elegans miR-40 22 | UCACCGGGUGUACAUCAGCUAA 23 | >cel-miR-41 MIMAT0000012 Caenorhabditis elegans miR-41 24 | UCACCGGGUGAAAAAUCACCUA 25 | >cel-miR-42 MIMAT0000013 Caenorhabditis elegans miR-42 26 | UCACCGGGUUAACAUCUACAGA 27 | >cel-miR-43 MIMAT0000014 Caenorhabditis elegans miR-43 28 | UAUCACAGUUUACUUGCUGUCGC 29 | >cel-miR-44 MIMAT0000015 Caenorhabditis elegans miR-44 30 | UGACUAGAGACACAUUCAGCU 31 | >cel-miR-45 MIMAT0000016 Caenorhabditis elegans miR-45 32 | UGACUAGAGACACAUUCAGCU 33 | >cel-miR-46 MIMAT0000017 Caenorhabditis elegans miR-46 34 | UGUCAUGGAGUCGCUCUCUUCA 35 | >cel-miR-47 MIMAT0000018 Caenorhabditis elegans miR-47 36 | UGUCAUGGAGGCGCUCUCUUCA 37 | >cel-miR-48 MIMAT0000019 Caenorhabditis elegans miR-48 38 | UGAGGUAGGCUCAGUAGAUGCGA 39 | >cel-miR-49 MIMAT0000020 Caenorhabditis elegans miR-49 40 | AAGCACCACGAGAAGCUGCAGA 41 | >cel-miR-50 MIMAT0000021 Caenorhabditis elegans miR-50 42 | UGAUAUGUCUGGUAUUCUUGGG 43 | >cel-miR-51 MIMAT0000022 Caenorhabditis elegans miR-51 44 | UACCCGUAGCUCCUAUCCAUGUU 45 | >cel-miR-52 MIMAT0000023 Caenorhabditis elegans miR-52 46 | CACCCGUACAUAUGUUUCCGUGCU 47 | >cel-miR-53 MIMAT0000024 Caenorhabditis elegans miR-53 48 | CACCCGUACAUUUGUUUCCGUGCU 49 | >cel-miR-54 MIMAT0000025 Caenorhabditis elegans miR-54 50 | UACCCGUAAUCUUCAUAAUCCGAG 51 | -------------------------------------------------------------------------------- /galaxy/test-data/fasta_nuc_changer2.fasta: -------------------------------------------------------------------------------- 1 | >cel-let-7 MIMAT0000001 Caenorhabditis elegans let-7 2 | UGAGGUAGUAGGUUGUAUAGUU 3 | >cel-lin-4 MIMAT0000002 Caenorhabditis elegans lin-4 4 | UCCCUGAGACCUCAAGUGUGA 5 | >cel-miR-1 MIMAT0000003 Caenorhabditis elegans miR-1 6 | UGGAAUGUAAAGAAGUAUGUA 7 | >cel-miR-2 MIMAT0000004 Caenorhabditis elegans miR-2 8 | UAUCACAGCCAGCUUUGAUGUGC 9 | >cel-miR-34 MIMAT0000005 Caenorhabditis elegans miR-34 10 | AGGCAGUGUGGUUAGCUGGUUG 11 | >cel-miR-35 MIMAT0000006 Caenorhabditis elegans miR-35 12 | UCACCGGGUGGAAACUAGCAGU 13 | >cel-miR-36 MIMAT0000007 Caenorhabditis elegans miR-36 14 | UCACCGGGUGAAAAUUCGCAUG 15 | >cel-miR-37 MIMAT0000008 Caenorhabditis elegans miR-37 16 | UCACCGGGUGAACACUUGCAGU 17 | >cel-miR-38 MIMAT0000009 Caenorhabditis elegans miR-38 18 | UCACCGGGAGAAAAACUGGAGU 19 | >cel-miR-39 MIMAT0000010 Caenorhabditis elegans miR-39 20 | UCACCGGGUGUAAAUCAGCUUG 21 | >cel-miR-40 MIMAT0000011 Caenorhabditis elegans miR-40 22 | UCACCGGGUGUACAUCAGCUAA 23 | >cel-miR-41 MIMAT0000012 Caenorhabditis elegans miR-41 24 | UCACCGGGUGAAAAAUCACCUA 25 | >cel-miR-42 MIMAT0000013 Caenorhabditis elegans miR-42 26 | UCACCGGGUUAACAUCUACAGA 27 | >cel-miR-43 MIMAT0000014 Caenorhabditis elegans miR-43 28 | UAUCACAGUUUACUUGCUGUCGC 29 | >cel-miR-44 MIMAT0000015 Caenorhabditis elegans miR-44 30 | UGACUAGAGACACAUUCAGCU 31 | >cel-miR-45 MIMAT0000016 Caenorhabditis elegans miR-45 32 | UGACUAGAGACACAUUCAGCU 33 | >cel-miR-46 MIMAT0000017 Caenorhabditis elegans miR-46 34 | UGUCAUGGAGUCGCUCUCUUCA 35 | >cel-miR-47 MIMAT0000018 Caenorhabditis elegans miR-47 36 | UGUCAUGGAGGCGCUCUCUUCA 37 | >cel-miR-48 MIMAT0000019 Caenorhabditis elegans miR-48 38 | UGAGGUAGGCUCAGUAGAUGCGA 39 | >cel-miR-49 MIMAT0000020 Caenorhabditis elegans miR-49 40 | AAGCACCACGAGAAGCUGCAGA 41 | >cel-miR-50 MIMAT0000021 Caenorhabditis elegans miR-50 42 | UGAUAUGUCUGGUAUUCUUGGG 43 | >cel-miR-51 MIMAT0000022 Caenorhabditis elegans miR-51 44 | UACCCGUAGCUCCUAUCCAUGUU 45 | >cel-miR-52 MIMAT0000023 Caenorhabditis elegans miR-52 46 | CACCCGUACAUAUGUUUCCGUGCU 47 | >cel-miR-53 MIMAT0000024 Caenorhabditis elegans miR-53 48 | CACCCGUACAUUUGUUUCCGUGCU 49 | >cel-miR-54 MIMAT0000025 Caenorhabditis elegans miR-54 50 | UACCCGUAAUCUUCAUAAUCCGAG 51 | -------------------------------------------------------------------------------- /galaxy/test-data/fasta_nuc_changer2.out: -------------------------------------------------------------------------------- 1 | >cel-let-7 MIMAT0000001 Caenorhabditis elegans let-7 2 | TGAGGTAGTAGGTTGTATAGTT 3 | >cel-lin-4 MIMAT0000002 Caenorhabditis elegans lin-4 4 | TCCCTGAGACCTCAAGTGTGA 5 | >cel-miR-1 MIMAT0000003 Caenorhabditis elegans miR-1 6 | TGGAATGTAAAGAAGTATGTA 7 | >cel-miR-2 MIMAT0000004 Caenorhabditis elegans miR-2 8 | TATCACAGCCAGCTTTGATGTGC 9 | >cel-miR-34 MIMAT0000005 Caenorhabditis elegans miR-34 10 | AGGCAGTGTGGTTAGCTGGTTG 11 | >cel-miR-35 MIMAT0000006 Caenorhabditis elegans miR-35 12 | TCACCGGGTGGAAACTAGCAGT 13 | >cel-miR-36 MIMAT0000007 Caenorhabditis elegans miR-36 14 | TCACCGGGTGAAAATTCGCATG 15 | >cel-miR-37 MIMAT0000008 Caenorhabditis elegans miR-37 16 | TCACCGGGTGAACACTTGCAGT 17 | >cel-miR-38 MIMAT0000009 Caenorhabditis elegans miR-38 18 | TCACCGGGAGAAAAACTGGAGT 19 | >cel-miR-39 MIMAT0000010 Caenorhabditis elegans miR-39 20 | TCACCGGGTGTAAATCAGCTTG 21 | >cel-miR-40 MIMAT0000011 Caenorhabditis elegans miR-40 22 | TCACCGGGTGTACATCAGCTAA 23 | >cel-miR-41 MIMAT0000012 Caenorhabditis elegans miR-41 24 | TCACCGGGTGAAAAATCACCTA 25 | >cel-miR-42 MIMAT0000013 Caenorhabditis elegans miR-42 26 | TCACCGGGTTAACATCTACAGA 27 | >cel-miR-43 MIMAT0000014 Caenorhabditis elegans miR-43 28 | TATCACAGTTTACTTGCTGTCGC 29 | >cel-miR-44 MIMAT0000015 Caenorhabditis elegans miR-44 30 | TGACTAGAGACACATTCAGCT 31 | >cel-miR-45 MIMAT0000016 Caenorhabditis elegans miR-45 32 | TGACTAGAGACACATTCAGCT 33 | >cel-miR-46 MIMAT0000017 Caenorhabditis elegans miR-46 34 | TGTCATGGAGTCGCTCTCTTCA 35 | >cel-miR-47 MIMAT0000018 Caenorhabditis elegans miR-47 36 | TGTCATGGAGGCGCTCTCTTCA 37 | >cel-miR-48 MIMAT0000019 Caenorhabditis elegans miR-48 38 | TGAGGTAGGCTCAGTAGATGCGA 39 | >cel-miR-49 MIMAT0000020 Caenorhabditis elegans miR-49 40 | AAGCACCACGAGAAGCTGCAGA 41 | >cel-miR-50 MIMAT0000021 Caenorhabditis elegans miR-50 42 | TGATATGTCTGGTATTCTTGGG 43 | >cel-miR-51 MIMAT0000022 Caenorhabditis elegans miR-51 44 | TACCCGTAGCTCCTATCCATGTT 45 | >cel-miR-52 MIMAT0000023 Caenorhabditis elegans miR-52 46 | CACCCGTACATATGTTTCCGTGCT 47 | >cel-miR-53 MIMAT0000024 Caenorhabditis elegans miR-53 48 | CACCCGTACATTTGTTTCCGTGCT 49 | >cel-miR-54 MIMAT0000025 Caenorhabditis elegans miR-54 50 | TACCCGTAATCTTCATAATCCGAG 51 | -------------------------------------------------------------------------------- /galaxy/test-data/fasta_uncollapser1.fasta: -------------------------------------------------------------------------------- 1 | >1-15 2 | AGTACAAGGACATGC 3 | >2-11 4 | ATTGCTGCTCGGATGGTCCGGCTGTGCACAC 5 | >3-5 6 | TTCAACGCCGCCGTGAAC 7 | >4-3 8 | CTGCTGCGATCGGTGTGC 9 | >5-1 10 | TCAAATTCTAGATTTTTACGG 11 | >6-1 12 | ACCATTCGAGCATAC 13 | >7-1 14 | TGATTTCCAGAGCCAAT 15 | >8-1 16 | TTACCTCACGATATTGTAATA 17 | >9-1 18 | TGTATTTACAATGACTAGAAA 19 | >10-1 20 | CCTTGTAGTGGATTCTGATGA 21 | >11-1 22 | CGATTGCCGAAGTCTACCA 23 | >12-1 24 | ATGACTTCATCGTCCACCCTTTAGAACT 25 | -------------------------------------------------------------------------------- /galaxy/test-data/fasta_uncollapser1.out: -------------------------------------------------------------------------------- 1 | >1 2 | AGTACAAGGACATGC 3 | >2 4 | AGTACAAGGACATGC 5 | >3 6 | AGTACAAGGACATGC 7 | >4 8 | AGTACAAGGACATGC 9 | >5 10 | AGTACAAGGACATGC 11 | >6 12 | AGTACAAGGACATGC 13 | >7 14 | AGTACAAGGACATGC 15 | >8 16 | AGTACAAGGACATGC 17 | >9 18 | AGTACAAGGACATGC 19 | >10 20 | AGTACAAGGACATGC 21 | >11 22 | AGTACAAGGACATGC 23 | >12 24 | AGTACAAGGACATGC 25 | >13 26 | AGTACAAGGACATGC 27 | >14 28 | AGTACAAGGACATGC 29 | >15 30 | AGTACAAGGACATGC 31 | >16 32 | ATTGCTGCTCGGATGGTCCGGCTGTGCACAC 33 | >17 34 | ATTGCTGCTCGGATGGTCCGGCTGTGCACAC 35 | >18 36 | ATTGCTGCTCGGATGGTCCGGCTGTGCACAC 37 | >19 38 | ATTGCTGCTCGGATGGTCCGGCTGTGCACAC 39 | >20 40 | ATTGCTGCTCGGATGGTCCGGCTGTGCACAC 41 | >21 42 | ATTGCTGCTCGGATGGTCCGGCTGTGCACAC 43 | >22 44 | ATTGCTGCTCGGATGGTCCGGCTGTGCACAC 45 | >23 46 | ATTGCTGCTCGGATGGTCCGGCTGTGCACAC 47 | >24 48 | ATTGCTGCTCGGATGGTCCGGCTGTGCACAC 49 | >25 50 | ATTGCTGCTCGGATGGTCCGGCTGTGCACAC 51 | >26 52 | ATTGCTGCTCGGATGGTCCGGCTGTGCACAC 53 | >27 54 | TTCAACGCCGCCGTGAAC 55 | >28 56 | TTCAACGCCGCCGTGAAC 57 | >29 58 | TTCAACGCCGCCGTGAAC 59 | >30 60 | TTCAACGCCGCCGTGAAC 61 | >31 62 | TTCAACGCCGCCGTGAAC 63 | >32 64 | CTGCTGCGATCGGTGTGC 65 | >33 66 | CTGCTGCGATCGGTGTGC 67 | >34 68 | CTGCTGCGATCGGTGTGC 69 | >35 70 | TCAAATTCTAGATTTTTACGG 71 | >36 72 | ACCATTCGAGCATAC 73 | >37 74 | TGATTTCCAGAGCCAAT 75 | >38 76 | TTACCTCACGATATTGTAATA 77 | >39 78 | TGTATTTACAATGACTAGAAA 79 | >40 80 | CCTTGTAGTGGATTCTGATGA 81 | >41 82 | CGATTGCCGAAGTCTACCA 83 | >42 84 | ATGACTTCATCGTCCACCCTTTAGAACT 85 | -------------------------------------------------------------------------------- /galaxy/test-data/fastq_masker.fastq: -------------------------------------------------------------------------------- 1 | @CSHL_3_FC042AGLLWW:1:2:7:203 2 | GTACGCATGACCGAACCCCCCNCCCCCCAATTGGTT 3 | +CSHL_3_FC042AGLLWW:1:2:7:203 4 | aab^V^aU]`aa^aZaaabbXEZabaaaaaaaa]]` 5 | @CSHL_3_FC042AGLLWW:1:2:7:33 6 | CAATGCCTCCAATTGGTTAATCCCCCTATATATACT 7 | +CSHL_3_FC042AGLLWW:1:2:7:33 8 | Waaa^aZaaW^U_XaWaa\WMEP^KEZXRPEEEGaa 9 | @CSHL_3_FC042AGLLWW:1:2:7:169 10 | GCAGCAGGCGCGTCAGAGAGCCCCCCCCCCCCCCCC 11 | +CSHL_3_FC042AGLLWW:1:2:7:169 12 | a_M^a\Uaaa_M_aaaaaaaaaaaaaaaV\ZUGUUR 13 | @CSHL_3_FC042AGLLWW:1:2:7:1436 14 | AATTATTTATTAAATTTTAATAATATGGGAGACACT 15 | +CSHL_3_FC042AGLLWW:1:2:7:1436 16 | a^aaaaaaaaaaaaaaa_U`aaaaa_S_aaaaaVV[ 17 | @CSHL_3_FC042AGLLWW:1:2:7:292 18 | GGAGAAATACACACAATTGGTTAATCCCCCTATATA 19 | +CSHL_3_FC042AGLLWW:1:2:7:292 20 | babaaaaaaaUMaaaaaaaaaaa\XEUUEP_]UERE 21 | @CSHL_3_FC042AGLLWW:1:2:7:1819 22 | AATTCAAACCACCCCAACCCACACACAGAGATACAA 23 | +CSHL_3_FC042AGLLWW:1:2:7:1819 24 | a\\QVVVLaaLOEXUWUUEKUULEMUEUUKULIQMU 25 | @CSHL_3_FC042AGLLWW:1:2:7:1875 26 | GCAAAAGAGTAGTGTACCCCCCCCCCCCCCCCCCCC 27 | +CSHL_3_FC042AGLLWW:1:2:7:1875 28 | aaaaaaaaaXUXXEXaaaaa`_ZaaaaaaaaaXEXU 29 | @CSHL_3_FC042AGLLWW:1:2:8:624 30 | ACTGCAATTGGTTAATCCCCCTATATAGCGCTGTGG 31 | +CSHL_3_FC042AGLLWW:1:2:8:624 32 | aa[S^`X`aa_]]OOXMU^_[MU_aaaaaaaaaaaa 33 | @CSHL_3_FC042AGLLWW:1:2:8:250 34 | TGCCGCGCACACTGATGCAATTGGTTAATCCCCCTA 35 | +CSHL_3_FC042AGLLWW:1:2:8:250 36 | aaaaaaaa^aaaaaabbb[KXPEU[RXZ^JUKRKXE 37 | -------------------------------------------------------------------------------- /galaxy/test-data/fastq_masker.out: -------------------------------------------------------------------------------- 1 | @CSHL_3_FC042AGLLWW:1:2:7:203 2 | GTACxCAxGACCGAxCCCCCxxxCCCCCAATTGGTT 3 | +CSHL_3_FC042AGLLWW:1:2:7:203 4 | aab^V^aU]`aa^aZaaabbXEZabaaaaaaaa]]` 5 | @CSHL_3_FC042AGLLWW:1:2:7:33 6 | xAATGCxTCxAxTxGxTTxxxxxCxxxxxxxxxxCT 7 | +CSHL_3_FC042AGLLWW:1:2:7:33 8 | Waaa^aZaaW^U_XaWaa\WMEP^KEZXRPEEEGaa 9 | @CSHL_3_FC042AGLLWW:1:2:7:169 10 | GCxGCxxGCGCxTCAGAGAGCCCCCCCCxxxxxxxx 11 | +CSHL_3_FC042AGLLWW:1:2:7:169 12 | a_M^a\Uaaa_M_aaaaaaaaaaaaaaaV\ZUGUUR 13 | @CSHL_3_FC042AGLLWW:1:2:7:1436 14 | AATTATTTATTAAATTTTxATAATATxGGAGACxxx 15 | +CSHL_3_FC042AGLLWW:1:2:7:1436 16 | a^aaaaaaaaaaaaaaa_U`aaaaa_S_aaaaaVV[ 17 | @CSHL_3_FC042AGLLWW:1:2:7:292 18 | GGAGAAATACxxACAATTGGTTAxxxxxxxTAxxxx 19 | +CSHL_3_FC042AGLLWW:1:2:7:292 20 | babaaaaaaaUMaaaaaaaaaaa\XEUUEP_]UERE 21 | @CSHL_3_FC042AGLLWW:1:2:7:1819 22 | AxxxxxxxCCxxxxxxxxxxxxxxxxxxxxxxxxxx 23 | +CSHL_3_FC042AGLLWW:1:2:7:1819 24 | a\\QVVVLaaLOEXUWUUEKUULEMUEUUKULIQMU 25 | @CSHL_3_FC042AGLLWW:1:2:7:1875 26 | GCAAAAGAGxxxxxxACCCCCCxCCCCCCCCCxxxx 27 | +CSHL_3_FC042AGLLWW:1:2:7:1875 28 | aaaaaaaaaXUXXEXaaaaa`_ZaaaaaaaaaXEXU 29 | @CSHL_3_FC042AGLLWW:1:2:8:624 30 | ACxxCAxTTGGTTxxxxxCCxxxTATAGCGCTGTGG 31 | +CSHL_3_FC042AGLLWW:1:2:8:624 32 | aa[S^`X`aa_]]OOXMU^_[MU_aaaaaaaaaaaa 33 | @CSHL_3_FC042AGLLWW:1:2:8:250 34 | TGCCGCGCACACTGATGCxxxxxxxxxxTxxxxxxx 35 | +CSHL_3_FC042AGLLWW:1:2:8:250 36 | aaaaaaaa^aaaaaabbb[KXPEU[RXZ^JUKRKXE 37 | -------------------------------------------------------------------------------- /galaxy/test-data/fastq_qual_conv1.fastq: -------------------------------------------------------------------------------- 1 | @CSHL_3_FC042AGLLWW:1:2:7:203 2 | GTACGCATGACCGAACCCCCCNCCCCCCCCCATGTC 3 | +CSHL_3_FC042AGLLWW:1:2:7:203 4 | aab^V^aU]`aa^aZaaabbXEZabaaaaaaaa]]` 5 | @CSHL_3_FC042AGLLWW:1:2:7:33 6 | CAATGCCTCTACTCATCCCAGTAGAGGCCCGTGGCC 7 | +CSHL_3_FC042AGLLWW:1:2:7:33 8 | Waaa^aZaaW^U_XaWaa\WMEP^KEZXRPEEEGaa 9 | @CSHL_3_FC042AGLLWW:1:2:7:169 10 | GCAGCAGGCGCGTCAGAGAGCCCCCCCCCCCCCCCC 11 | +CSHL_3_FC042AGLLWW:1:2:7:169 12 | a_M^a\Uaaa_M_aaaaaaaaaaaaaaaV\ZUGUUR 13 | @CSHL_3_FC042AGLLWW:1:2:7:1436 14 | AATTATTTATTAAATTTTAATAATATGGGAGACACT 15 | +CSHL_3_FC042AGLLWW:1:2:7:1436 16 | a^aaaaaaaaaaaaaaa_U`aaaaa_S_aaaaaVV[ 17 | @CSHL_3_FC042AGLLWW:1:2:7:292 18 | GGAGAAATACACACACACACTCATCGTCGTCCCCCG 19 | +CSHL_3_FC042AGLLWW:1:2:7:292 20 | babaaaaaaaUMaaaaaaaaaaa\XEUUEP_]UERE 21 | @CSHL_3_FC042AGLLWW:1:2:7:1819 22 | AATTCAAACCACCCCAACCCACACACAGAGATACCC 23 | +CSHL_3_FC042AGLLWW:1:2:7:1819 24 | a\\QVVVLaaLOEXUWUUEKUULEMUEUUKULIQMU 25 | @CSHL_3_FC042AGLLWW:1:2:7:1875 26 | GCAAAAGAGTAGTGTACCCCCCCCCCCCCCCCCCCC 27 | +CSHL_3_FC042AGLLWW:1:2:7:1875 28 | aaaaaaaaaXUXXEXaaaaa`_ZaaaaaaaaaXEXU 29 | @CSHL_3_FC042AGLLWW:1:2:8:624 30 | ACTGGCGCTGTGGAGAGTGTCACACCCCCCCCCCCC 31 | +CSHL_3_FC042AGLLWW:1:2:8:624 32 | aa[S^`X`aa_]]OOXMU^_[MU_aaaaaaaaaaaa 33 | @CSHL_3_FC042AGLLWW:1:2:8:250 34 | TGCCGCGCACACTGATGACGCGGCCGCTCGCGCTCT 35 | +CSHL_3_FC042AGLLWW:1:2:8:250 36 | aaaaaaaa^aaaaaabbb[KXPEU[RXZ^JUKRKXE 37 | -------------------------------------------------------------------------------- /galaxy/test-data/fastq_qual_conv1.out: -------------------------------------------------------------------------------- 1 | @CSHL_3_FC042AGLLWW:1:2:7:203 2 | GTACGCATGACCGAACCCCCCNCCCCCCCCCATGTC 3 | +CSHL_3_FC042AGLLWW:1:2:7:203 4 | 33 33 34 30 22 30 33 21 29 32 33 33 30 33 26 33 33 33 34 34 24 5 26 33 34 33 33 33 33 33 33 33 33 29 29 32 5 | @CSHL_3_FC042AGLLWW:1:2:7:33 6 | CAATGCCTCTACTCATCCCAGTAGAGGCCCGTGGCC 7 | +CSHL_3_FC042AGLLWW:1:2:7:33 8 | 23 33 33 33 30 33 26 33 33 23 30 21 31 24 33 23 33 33 28 23 13 5 16 30 11 5 26 24 18 16 5 5 5 7 33 33 9 | @CSHL_3_FC042AGLLWW:1:2:7:169 10 | GCAGCAGGCGCGTCAGAGAGCCCCCCCCCCCCCCCC 11 | +CSHL_3_FC042AGLLWW:1:2:7:169 12 | 33 31 13 30 33 28 21 33 33 33 31 13 31 33 33 33 33 33 33 33 33 33 33 33 33 33 33 33 22 28 26 21 7 21 21 18 13 | @CSHL_3_FC042AGLLWW:1:2:7:1436 14 | AATTATTTATTAAATTTTAATAATATGGGAGACACT 15 | +CSHL_3_FC042AGLLWW:1:2:7:1436 16 | 33 30 33 33 33 33 33 33 33 33 33 33 33 33 33 33 33 31 21 32 33 33 33 33 33 31 19 31 33 33 33 33 33 22 22 27 17 | @CSHL_3_FC042AGLLWW:1:2:7:292 18 | GGAGAAATACACACACACACTCATCGTCGTCCCCCG 19 | +CSHL_3_FC042AGLLWW:1:2:7:292 20 | 34 33 34 33 33 33 33 33 33 33 21 13 33 33 33 33 33 33 33 33 33 33 33 28 24 5 21 21 5 16 31 29 21 5 18 5 21 | @CSHL_3_FC042AGLLWW:1:2:7:1819 22 | AATTCAAACCACCCCAACCCACACACAGAGATACCC 23 | +CSHL_3_FC042AGLLWW:1:2:7:1819 24 | 33 28 28 17 22 22 22 12 33 33 12 15 5 24 21 23 21 21 5 11 21 21 12 5 13 21 5 21 21 11 21 12 9 17 13 21 25 | @CSHL_3_FC042AGLLWW:1:2:7:1875 26 | GCAAAAGAGTAGTGTACCCCCCCCCCCCCCCCCCCC 27 | +CSHL_3_FC042AGLLWW:1:2:7:1875 28 | 33 33 33 33 33 33 33 33 33 24 21 24 24 5 24 33 33 33 33 33 32 31 26 33 33 33 33 33 33 33 33 33 24 5 24 21 29 | @CSHL_3_FC042AGLLWW:1:2:8:624 30 | ACTGGCGCTGTGGAGAGTGTCACACCCCCCCCCCCC 31 | +CSHL_3_FC042AGLLWW:1:2:8:624 32 | 33 33 27 19 30 32 24 32 33 33 31 29 29 15 15 24 13 21 30 31 27 13 21 31 33 33 33 33 33 33 33 33 33 33 33 33 33 | @CSHL_3_FC042AGLLWW:1:2:8:250 34 | TGCCGCGCACACTGATGACGCGGCCGCTCGCGCTCT 35 | +CSHL_3_FC042AGLLWW:1:2:8:250 36 | 33 33 33 33 33 33 33 33 30 33 33 33 33 33 33 34 34 34 27 11 24 16 5 21 27 18 24 26 30 10 21 11 18 11 24 5 37 | -------------------------------------------------------------------------------- /galaxy/test-data/fastq_qual_conv1a.out: -------------------------------------------------------------------------------- 1 | @CSHL_3_FC042AGLLWW:1:2:7:203 2 | GTACGCATGACCGAACCCCCCNCCCCCCCCCATGTC 3 | +CSHL_3_FC042AGLLWW:1:2:7:203 4 | aab^V^aU]`aa^aZaaabbXEZabaaaaaaaa]]` 5 | @CSHL_3_FC042AGLLWW:1:2:7:33 6 | CAATGCCTCTACTCATCCCAGTAGAGGCCCGTGGCC 7 | +CSHL_3_FC042AGLLWW:1:2:7:33 8 | Waaa^aZaaW^U_XaWaa\WMEP^KEZXRPEEEGaa 9 | @CSHL_3_FC042AGLLWW:1:2:7:169 10 | GCAGCAGGCGCGTCAGAGAGCCCCCCCCCCCCCCCC 11 | +CSHL_3_FC042AGLLWW:1:2:7:169 12 | a_M^a\Uaaa_M_aaaaaaaaaaaaaaaV\ZUGUUR 13 | @CSHL_3_FC042AGLLWW:1:2:7:1436 14 | AATTATTTATTAAATTTTAATAATATGGGAGACACT 15 | +CSHL_3_FC042AGLLWW:1:2:7:1436 16 | a^aaaaaaaaaaaaaaa_U`aaaaa_S_aaaaaVV[ 17 | @CSHL_3_FC042AGLLWW:1:2:7:292 18 | GGAGAAATACACACACACACTCATCGTCGTCCCCCG 19 | +CSHL_3_FC042AGLLWW:1:2:7:292 20 | babaaaaaaaUMaaaaaaaaaaa\XEUUEP_]UERE 21 | @CSHL_3_FC042AGLLWW:1:2:7:1819 22 | AATTCAAACCACCCCAACCCACACACAGAGATACCC 23 | +CSHL_3_FC042AGLLWW:1:2:7:1819 24 | a\\QVVVLaaLOEXUWUUEKUULEMUEUUKULIQMU 25 | @CSHL_3_FC042AGLLWW:1:2:7:1875 26 | GCAAAAGAGTAGTGTACCCCCCCCCCCCCCCCCCCC 27 | +CSHL_3_FC042AGLLWW:1:2:7:1875 28 | aaaaaaaaaXUXXEXaaaaa`_ZaaaaaaaaaXEXU 29 | @CSHL_3_FC042AGLLWW:1:2:8:624 30 | ACTGGCGCTGTGGAGAGTGTCACACCCCCCCCCCCC 31 | +CSHL_3_FC042AGLLWW:1:2:8:624 32 | aa[S^`X`aa_]]OOXMU^_[MU_aaaaaaaaaaaa 33 | @CSHL_3_FC042AGLLWW:1:2:8:250 34 | TGCCGCGCACACTGATGACGCGGCCGCTCGCGCTCT 35 | +CSHL_3_FC042AGLLWW:1:2:8:250 36 | aaaaaaaa^aaaaaabbb[KXPEU[RXZ^JUKRKXE 37 | -------------------------------------------------------------------------------- /galaxy/test-data/fastq_qual_conv2.fastq: -------------------------------------------------------------------------------- 1 | @CSHL_3_FC0420AGLLKK:2:1:233:1674 2 | GTTAGAGGGAATACACCCACTCTGTAGGCACCATC 3 | +CSHL_3_FC0420AGLLKK:2:1:233:1674 4 | 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 32 40 40 40 40 16 20 25 9 21 37 40 40 16 29 26 30 5 | @CSHL_3_FC0420AGLLKK:2:1:136:448 6 | GTTCTCAGGACCCCTTCAGTAGTNGGCACCATCAA 7 | +CSHL_3_FC0420AGLLKK:2:1:136:448 8 | 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 -5 13 17 28 40 40 8 17 27 8 13 10 9 | @CSHL_3_FC0420AGLLKK:2:1:237:1037 10 | GTGATAGATTGTCTTGTTGTTCTGTAGGCACCATC 11 | +CSHL_3_FC0420AGLLKK:2:1:237:1037 12 | 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 4 40 40 26 35 40 38 40 6 40 40 0 3 26 32 27 14 11 26 11 13 | @CSHL_3_FC0420AGLLKK:2:1:1601:1525 14 | AAAAACACAAAAAAAAAAAAAAAAAAAAAAAAAAA 15 | +CSHL_3_FC0420AGLLKK:2:1:1601:1525 16 | 40 40 40 40 40 40 40 40 40 40 40 40 35 40 40 12 40 40 30 30 40 40 40 12 36 23 17 24 18 22 25 15 10 34 14 17 | @CSHL_3_FC0420AGLLKK:2:1:1805:1464 18 | GATGCGTTCGAGATGGGTGCGCTGTAGGCACCATC 19 | +CSHL_3_FC0420AGLLKK:2:1:1805:1464 20 | 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 16 23 28 40 21 40 9 37 13 20 21 7 11 14 14 6 23 10 21 | @CSHL_3_FC0420AGLLKK:2:1:1713:528 22 | AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA 23 | +CSHL_3_FC0420AGLLKK:2:1:1713:528 24 | 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 32 40 12 38 15 22 20 17 14 12 10 7 22 11 25 | @CSHL_3_FC0420AGLLKK:2:1:126:1087 26 | GAGATATTCGAATGCATCATCAGATGGCACCATCA 27 | +CSHL_3_FC0420AGLLKK:2:1:126:1087 28 | 40 40 40 40 40 40 40 40 40 40 40 40 40 40 25 40 40 40 40 40 40 40 31 40 40 11 10 23 40 13 12 17 37 17 22 29 | @CSHL_3_FC0420AGLLKK:2:1:1488:1323 30 | GTTTTTTCCCCTAATCTGAGTCTGTAGGCACCATC 31 | +CSHL_3_FC0420AGLLKK:2:1:1488:1323 32 | 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 39 22 31 40 40 12 29 22 0 7 12 8 18 7 3 18 9 33 | @CSHL_3_FC0420AGLLKK:2:1:913:199 34 | GTTCAGTGTTGGTGCACTGTGTTNTAGGCACCATC 35 | +CSHL_3_FC0420AGLLKK:2:1:913:199 36 | 40 40 39 40 40 40 40 40 40 40 40 40 4 40 40 24 34 20 33 21 36 32 40 -5 40 13 21 21 26 17 18 25 14 25 21 37 | @CSHL_3_FC0420AGLLKK:2:1:1236:1157 38 | AAAAAAAAAAAAAAAACAAAAAAAAAAAAAACAAA 39 | +CSHL_3_FC0420AGLLKK:2:1:1236:1157 40 | 40 40 40 40 40 40 40 40 40 40 40 40 40 35 40 40 40 40 40 33 40 37 40 40 40 18 16 20 23 22 31 26 10 22 19 41 | @CSHL_3_FC0420AGLLKK:2:1:928:765 42 | GTTTTCAGTTCGAGGTTCGTGCTNTAGGCATTATC 43 | +CSHL_3_FC0420AGLLKK:2:1:928:765 44 | 40 40 40 40 40 40 40 40 40 40 40 40 40 25 27 40 37 35 27 40 40 17 40 -5 36 11 19 15 19 16 11 12 12 23 11 45 | @CSHL_3_FC0420AGLLKK:2:1:727:1020 46 | GTAATATAGTTGATAAGAATCTGCAGAGAGAATCA 47 | +CSHL_3_FC0420AGLLKK:2:1:727:1020 48 | 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 30 40 40 24 18 38 33 26 16 23 22 16 18 49 | @CSHL_3_FC0420AGLLKK:2:1:758:1799 50 | GTAGAGACCCCCTAATAGAGTCTGTAGGCACCATC 51 | +CSHL_3_FC0420AGLLKK:2:1:758:1799 52 | 40 40 40 40 40 40 40 40 35 40 39 40 40 27 20 40 17 34 15 40 40 40 40 15 28 17 4 12 10 10 18 14 3 14 11 53 | @CSHL_3_FC0420AGLLKK:2:1:1818:550 54 | AAAAAAAAAAAAAAAACAAAAACAAAAAAAACAAA 55 | +CSHL_3_FC0420AGLLKK:2:1:1818:550 56 | 40 40 40 40 40 40 40 40 40 40 40 40 40 40 36 32 40 33 40 40 38 37 40 28 29 27 22 13 20 19 17 17 13 33 18 57 | @CSHL_3_FC0420AGLLKK:2:1:1764:391 58 | CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC 59 | +CSHL_3_FC0420AGLLKK:2:1:1764:391 60 | 40 40 40 40 40 40 40 40 40 40 40 33 40 40 40 40 40 24 40 40 40 40 40 12 40 24 14 9 22 15 29 18 11 40 22 61 | -------------------------------------------------------------------------------- /galaxy/test-data/fastq_qual_conv2.out: -------------------------------------------------------------------------------- 1 | @CSHL_3_FC0420AGLLKK:2:1:233:1674 2 | GTTAGAGGGAATACACCCACTCTGTAGGCACCATC 3 | +CSHL_3_FC0420AGLLKK:2:1:233:1674 4 | hhhhhhhhhhhhhhhhhh`hhhhPTYIUehhP]Z^ 5 | @CSHL_3_FC0420AGLLKK:2:1:136:448 6 | GTTCTCAGGACCCCTTCAGTAGTNGGCACCATCAA 7 | +CSHL_3_FC0420AGLLKK:2:1:136:448 8 | hhhhhhhhhhhhhhhhhhhhhhh;MQ\hhHQ[HMJ 9 | @CSHL_3_FC0420AGLLKK:2:1:237:1037 10 | GTGATAGATTGTCTTGTTGTTCTGTAGGCACCATC 11 | +CSHL_3_FC0420AGLLKK:2:1:237:1037 12 | hhhhhhhhhhhhhhhDhhZchfhFhh@CZ`[NKZK 13 | @CSHL_3_FC0420AGLLKK:2:1:1601:1525 14 | AAAAACACAAAAAAAAAAAAAAAAAAAAAAAAAAA 15 | +CSHL_3_FC0420AGLLKK:2:1:1601:1525 16 | hhhhhhhhhhhhchhLhh^^hhhLdWQXRVYOJbN 17 | @CSHL_3_FC0420AGLLKK:2:1:1805:1464 18 | GATGCGTTCGAGATGGGTGCGCTGTAGGCACCATC 19 | +CSHL_3_FC0420AGLLKK:2:1:1805:1464 20 | hhhhhhhhhhhhhhhhhPW\hUhIeMTUGKNNFWJ 21 | @CSHL_3_FC0420AGLLKK:2:1:1713:528 22 | AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA 23 | +CSHL_3_FC0420AGLLKK:2:1:1713:528 24 | hhhhhhhhhhhhhhhhhhhhh`hLfOVTQNLJGVK 25 | @CSHL_3_FC0420AGLLKK:2:1:126:1087 26 | GAGATATTCGAATGCATCATCAGATGGCACCATCA 27 | +CSHL_3_FC0420AGLLKK:2:1:126:1087 28 | hhhhhhhhhhhhhhYhhhhhhh_hhKJWhMLQeQV 29 | @CSHL_3_FC0420AGLLKK:2:1:1488:1323 30 | GTTTTTTCCCCTAATCTGAGTCTGTAGGCACCATC 31 | +CSHL_3_FC0420AGLLKK:2:1:1488:1323 32 | hhhhhhhhhhhhhhhhhhgV_hhL]V@GLHRGCRI 33 | @CSHL_3_FC0420AGLLKK:2:1:913:199 34 | GTTCAGTGTTGGTGCACTGTGTTNTAGGCACCATC 35 | +CSHL_3_FC0420AGLLKK:2:1:913:199 36 | hhghhhhhhhhhDhhXbTaUd`h;hMUUZQRYNYU 37 | @CSHL_3_FC0420AGLLKK:2:1:1236:1157 38 | AAAAAAAAAAAAAAAACAAAAAAAAAAAAAACAAA 39 | +CSHL_3_FC0420AGLLKK:2:1:1236:1157 40 | hhhhhhhhhhhhhchhhhhahehhhRPTWV_ZJVS 41 | @CSHL_3_FC0420AGLLKK:2:1:928:765 42 | GTTTTCAGTTCGAGGTTCGTGCTNTAGGCATTATC 43 | +CSHL_3_FC0420AGLLKK:2:1:928:765 44 | hhhhhhhhhhhhhY[hec[hhQh;dKSOSPKLLWK 45 | @CSHL_3_FC0420AGLLKK:2:1:727:1020 46 | GTAATATAGTTGATAAGAATCTGCAGAGAGAATCA 47 | +CSHL_3_FC0420AGLLKK:2:1:727:1020 48 | hhhhhhhhhhhhhhhhhhhhhh^hhXRfaZPWVPR 49 | @CSHL_3_FC0420AGLLKK:2:1:758:1799 50 | GTAGAGACCCCCTAATAGAGTCTGTAGGCACCATC 51 | +CSHL_3_FC0420AGLLKK:2:1:758:1799 52 | hhhhhhhhchghh[ThQbOhhhhO\QDLJJRNCNK 53 | @CSHL_3_FC0420AGLLKK:2:1:1818:550 54 | AAAAAAAAAAAAAAAACAAAAACAAAAAAAACAAA 55 | +CSHL_3_FC0420AGLLKK:2:1:1818:550 56 | hhhhhhhhhhhhhhd`hahhfeh\][VMTSQQMaR 57 | @CSHL_3_FC0420AGLLKK:2:1:1764:391 58 | CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC 59 | +CSHL_3_FC0420AGLLKK:2:1:1764:391 60 | hhhhhhhhhhhahhhhhXhhhhhLhXNIVO]RKhV 61 | -------------------------------------------------------------------------------- /galaxy/test-data/fastq_qual_conv2n.out: -------------------------------------------------------------------------------- 1 | @CSHL_3_FC0420AGLLKK:2:1:233:1674 2 | GTTAGAGGGAATACACCCACTCTGTAGGCACCATC 3 | +CSHL_3_FC0420AGLLKK:2:1:233:1674 4 | 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 32 40 40 40 40 16 20 25 9 21 37 40 40 16 29 26 30 5 | @CSHL_3_FC0420AGLLKK:2:1:136:448 6 | GTTCTCAGGACCCCTTCAGTAGTNGGCACCATCAA 7 | +CSHL_3_FC0420AGLLKK:2:1:136:448 8 | 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 -5 13 17 28 40 40 8 17 27 8 13 10 9 | @CSHL_3_FC0420AGLLKK:2:1:237:1037 10 | GTGATAGATTGTCTTGTTGTTCTGTAGGCACCATC 11 | +CSHL_3_FC0420AGLLKK:2:1:237:1037 12 | 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 4 40 40 26 35 40 38 40 6 40 40 0 3 26 32 27 14 11 26 11 13 | @CSHL_3_FC0420AGLLKK:2:1:1601:1525 14 | AAAAACACAAAAAAAAAAAAAAAAAAAAAAAAAAA 15 | +CSHL_3_FC0420AGLLKK:2:1:1601:1525 16 | 40 40 40 40 40 40 40 40 40 40 40 40 35 40 40 12 40 40 30 30 40 40 40 12 36 23 17 24 18 22 25 15 10 34 14 17 | @CSHL_3_FC0420AGLLKK:2:1:1805:1464 18 | GATGCGTTCGAGATGGGTGCGCTGTAGGCACCATC 19 | +CSHL_3_FC0420AGLLKK:2:1:1805:1464 20 | 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 16 23 28 40 21 40 9 37 13 20 21 7 11 14 14 6 23 10 21 | @CSHL_3_FC0420AGLLKK:2:1:1713:528 22 | AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA 23 | +CSHL_3_FC0420AGLLKK:2:1:1713:528 24 | 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 32 40 12 38 15 22 20 17 14 12 10 7 22 11 25 | @CSHL_3_FC0420AGLLKK:2:1:126:1087 26 | GAGATATTCGAATGCATCATCAGATGGCACCATCA 27 | +CSHL_3_FC0420AGLLKK:2:1:126:1087 28 | 40 40 40 40 40 40 40 40 40 40 40 40 40 40 25 40 40 40 40 40 40 40 31 40 40 11 10 23 40 13 12 17 37 17 22 29 | @CSHL_3_FC0420AGLLKK:2:1:1488:1323 30 | GTTTTTTCCCCTAATCTGAGTCTGTAGGCACCATC 31 | +CSHL_3_FC0420AGLLKK:2:1:1488:1323 32 | 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 39 22 31 40 40 12 29 22 0 7 12 8 18 7 3 18 9 33 | @CSHL_3_FC0420AGLLKK:2:1:913:199 34 | GTTCAGTGTTGGTGCACTGTGTTNTAGGCACCATC 35 | +CSHL_3_FC0420AGLLKK:2:1:913:199 36 | 40 40 39 40 40 40 40 40 40 40 40 40 4 40 40 24 34 20 33 21 36 32 40 -5 40 13 21 21 26 17 18 25 14 25 21 37 | @CSHL_3_FC0420AGLLKK:2:1:1236:1157 38 | AAAAAAAAAAAAAAAACAAAAAAAAAAAAAACAAA 39 | +CSHL_3_FC0420AGLLKK:2:1:1236:1157 40 | 40 40 40 40 40 40 40 40 40 40 40 40 40 35 40 40 40 40 40 33 40 37 40 40 40 18 16 20 23 22 31 26 10 22 19 41 | @CSHL_3_FC0420AGLLKK:2:1:928:765 42 | GTTTTCAGTTCGAGGTTCGTGCTNTAGGCATTATC 43 | +CSHL_3_FC0420AGLLKK:2:1:928:765 44 | 40 40 40 40 40 40 40 40 40 40 40 40 40 25 27 40 37 35 27 40 40 17 40 -5 36 11 19 15 19 16 11 12 12 23 11 45 | @CSHL_3_FC0420AGLLKK:2:1:727:1020 46 | GTAATATAGTTGATAAGAATCTGCAGAGAGAATCA 47 | +CSHL_3_FC0420AGLLKK:2:1:727:1020 48 | 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 30 40 40 24 18 38 33 26 16 23 22 16 18 49 | @CSHL_3_FC0420AGLLKK:2:1:758:1799 50 | GTAGAGACCCCCTAATAGAGTCTGTAGGCACCATC 51 | +CSHL_3_FC0420AGLLKK:2:1:758:1799 52 | 40 40 40 40 40 40 40 40 35 40 39 40 40 27 20 40 17 34 15 40 40 40 40 15 28 17 4 12 10 10 18 14 3 14 11 53 | @CSHL_3_FC0420AGLLKK:2:1:1818:550 54 | AAAAAAAAAAAAAAAACAAAAACAAAAAAAACAAA 55 | +CSHL_3_FC0420AGLLKK:2:1:1818:550 56 | 40 40 40 40 40 40 40 40 40 40 40 40 40 40 36 32 40 33 40 40 38 37 40 28 29 27 22 13 20 19 17 17 13 33 18 57 | @CSHL_3_FC0420AGLLKK:2:1:1764:391 58 | CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC 59 | +CSHL_3_FC0420AGLLKK:2:1:1764:391 60 | 40 40 40 40 40 40 40 40 40 40 40 33 40 40 40 40 40 24 40 40 40 40 40 12 40 24 14 9 22 15 29 18 11 40 22 61 | -------------------------------------------------------------------------------- /galaxy/test-data/fastq_qual_filter1.fastq: -------------------------------------------------------------------------------- 1 | @CSHL_3_FC042AGLLWW:1:2:7:203 2 | GTACGCATGACCGAACCCCCCNCCCCCCAATTGGTT 3 | +CSHL_3_FC042AGLLWW:1:2:7:203 4 | aab^V^aU]`aa^aZaaaaaaaaabaaaaaaaa]]` 5 | @CSHL_3_FC042AGLLWW:1:2:7:33 6 | CAATGCCTCCAATTGGTTAATCCCCCTATATATACT 7 | +CSHL_3_FC042AGLLWW:1:2:7:33 8 | aaaaaaaaaW^U_XaWaa\WMEP^KEZXRPEEEGaa 9 | @CSHL_3_FC042AGLLWW:1:2:7:169 10 | GCAGCAGGCGCGTCAGAGAGCCCCCCCCCCCCCCCC 11 | +CSHL_3_FC042AGLLWW:1:2:7:169 12 | a_M^a\Uaaa_M_aaaZZZZZZUZUZaaV\ZUGUUR 13 | @CSHL_3_FC042AGLLWW:1:2:7:1436 14 | AATTATTTATTAAATTTTAATAATATGGGAGACACT 15 | +CSHL_3_FC042AGLLWW:1:2:7:1436 16 | a^aaaaaaaaaaaaaaa_U`aaaaa_S_aaaaaVV[ 17 | @CSHL_3_FC042AGLLWW:1:2:7:292 18 | GGAGAAATACACACAATTGGTTAATCCCCCTATATA 19 | +CSHL_3_FC042AGLLWW:1:2:7:292 20 | babaaaaaaaUMaaaaaaaaaaa\XEUUEP_]UERE 21 | @CSHL_3_FC042AGLLWW:1:2:7:1819 22 | AATTCAAACCACCCCAACCCACACACAGAGATACAA 23 | +CSHL_3_FC042AGLLWW:1:2:7:1819 24 | a\\QVVVLaaLOEXUWUUEKUULEMUEUUKULIQMU 25 | @CSHL_3_FC042AGLLWW:1:2:7:1875 26 | GCAAAAGAGTAGTGTACCCCCCCCCCCCCCCCCCCC 27 | +CSHL_3_FC042AGLLWW:1:2:7:1875 28 | aaaaaaaaaXUXXEXaaaaa`_ZaaaaaaaaaXEXU 29 | @CSHL_3_FC042AGLLWW:1:2:8:624 30 | ACTGCAATTGGTTAATCCCCCTATATAGCGCTGTGG 31 | +CSHL_3_FC042AGLLWW:1:2:8:624 32 | aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa 33 | @CSHL_3_FC042AGLLWW:1:2:8:250 34 | TGCCGCGCACACTGATGCAATTGGTTAATCCCCCTA 35 | +CSHL_3_FC042AGLLWW:1:2:8:250 36 | aaaaaaaa^aaaaaabbb[KXPEU[RXZ^JUKRKXE 37 | -------------------------------------------------------------------------------- /galaxy/test-data/fastq_qual_filter1a.out: -------------------------------------------------------------------------------- 1 | @CSHL_3_FC042AGLLWW:1:2:8:624 2 | ACTGCAATTGGTTAATCCCCCTATATAGCGCTGTGG 3 | +CSHL_3_FC042AGLLWW:1:2:8:624 4 | aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa 5 | -------------------------------------------------------------------------------- /galaxy/test-data/fastq_qual_filter1b.out: -------------------------------------------------------------------------------- 1 | @CSHL_3_FC042AGLLWW:1:2:7:203 2 | GTACGCATGACCGAACCCCCCNCCCCCCAATTGGTT 3 | +CSHL_3_FC042AGLLWW:1:2:7:203 4 | aab^V^aU]`aa^aZaaaaaaaaabaaaaaaaa]]` 5 | @CSHL_3_FC042AGLLWW:1:2:7:169 6 | GCAGCAGGCGCGTCAGAGAGCCCCCCCCCCCCCCCC 7 | +CSHL_3_FC042AGLLWW:1:2:7:169 8 | a_M^a\Uaaa_M_aaaZZZZZZUZUZaaV\ZUGUUR 9 | @CSHL_3_FC042AGLLWW:1:2:7:1436 10 | AATTATTTATTAAATTTTAATAATATGGGAGACACT 11 | +CSHL_3_FC042AGLLWW:1:2:7:1436 12 | a^aaaaaaaaaaaaaaa_U`aaaaa_S_aaaaaVV[ 13 | @CSHL_3_FC042AGLLWW:1:2:7:292 14 | GGAGAAATACACACAATTGGTTAATCCCCCTATATA 15 | +CSHL_3_FC042AGLLWW:1:2:7:292 16 | babaaaaaaaUMaaaaaaaaaaa\XEUUEP_]UERE 17 | @CSHL_3_FC042AGLLWW:1:2:7:1875 18 | GCAAAAGAGTAGTGTACCCCCCCCCCCCCCCCCCCC 19 | +CSHL_3_FC042AGLLWW:1:2:7:1875 20 | aaaaaaaaaXUXXEXaaaaa`_ZaaaaaaaaaXEXU 21 | @CSHL_3_FC042AGLLWW:1:2:8:624 22 | ACTGCAATTGGTTAATCCCCCTATATAGCGCTGTGG 23 | +CSHL_3_FC042AGLLWW:1:2:8:624 24 | aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa 25 | -------------------------------------------------------------------------------- /galaxy/test-data/fastq_quality_trimmer.fastq: -------------------------------------------------------------------------------- 1 | @CSHL_3_FC042AGLLWW:1:2:7:203 2 | GTACGCATGACCGAACCCCCCNCCCCCCAATTGGTT 3 | +CSHL_3_FC042AGLLWW:1:2:7:203 4 | aab^V^aU]`aa^aZaaabbXEZabaaaaaaaa]]` 5 | @CSHL_3_FC042AGLLWW:1:2:7:33 6 | CAATGCCTCCAATTGGTTAATCCCCCTATATATACT 7 | +CSHL_3_FC042AGLLWW:1:2:7:33 8 | Waaa^aZaaW^U_XaWaa\WMEP^KEZXRPEEEGaa 9 | @CSHL_3_FC042AGLLWW:1:2:7:169 10 | GCAGCAGGCGCGTCAGAGAGCCCCCCCCCCCCCCCC 11 | +CSHL_3_FC042AGLLWW:1:2:7:169 12 | a_M^a\Uaaa_M_aaaaaaaaaaaaaaaV\ZUGUUR 13 | @CSHL_3_FC042AGLLWW:1:2:7:1436 14 | AATTATTTATTAAATTTTAATAATATGGGAGACACT 15 | +CSHL_3_FC042AGLLWW:1:2:7:1436 16 | a^aaaaaaaaaaaaaaa_U`aaaaa_S_aaaaaVV[ 17 | @CSHL_3_FC042AGLLWW:1:2:7:292 18 | GGAGAAATACACACAATTGGTTAATCCCCCTATATA 19 | +CSHL_3_FC042AGLLWW:1:2:7:292 20 | babaaaaaaaUMaaaaaaaaaaa\XEUUEP_]UERE 21 | @CSHL_3_FC042AGLLWW:1:2:7:1819 22 | AATTCAAACCACCCCAACCCACACACAGAGATACAA 23 | +CSHL_3_FC042AGLLWW:1:2:7:1819 24 | a\\QVVVLaaLOEXUWUUEKUULEMUEUUKULIQMU 25 | @CSHL_3_FC042AGLLWW:1:2:7:1875 26 | GCAAAAGAGTAGTGTACCCCCCCCCCCCCCCCCCCC 27 | +CSHL_3_FC042AGLLWW:1:2:7:1875 28 | aaaaaaaaaXUXXEXaaaaa`_ZaaaaaaaaaXEXU 29 | @CSHL_3_FC042AGLLWW:1:2:8:624 30 | ACTGCAATTGGTTAATCCCCCTATATAGCGCTGTGG 31 | +CSHL_3_FC042AGLLWW:1:2:8:624 32 | aa[S^`X`aa_]]OOXMU^_[MU_aaaaaaaaaaaa 33 | @CSHL_3_FC042AGLLWW:1:2:8:250 34 | TGCCGCGCACACTGATGCAATTGGTTAATCCCCCTA 35 | +CSHL_3_FC042AGLLWW:1:2:8:250 36 | aaaaaaaa^aaaaaabbb[KXPEU[RXZ^JUKRKXE 37 | -------------------------------------------------------------------------------- /galaxy/test-data/fastq_quality_trimmer.out: -------------------------------------------------------------------------------- 1 | @CSHL_3_FC042AGLLWW:1:2:7:203 2 | GTACGCATGACCGAACCCCCCNCCCCCCAATTGGTT 3 | +CSHL_3_FC042AGLLWW:1:2:7:203 4 | aab^V^aU]`aa^aZaaabbXEZabaaaaaaaa]]` 5 | @CSHL_3_FC042AGLLWW:1:2:7:33 6 | CAATGCCTCCAATTGGTTAATCCCCCTATATATACT 7 | +CSHL_3_FC042AGLLWW:1:2:7:33 8 | Waaa^aZaaW^U_XaWaa\WMEP^KEZXRPEEEGaa 9 | @CSHL_3_FC042AGLLWW:1:2:7:169 10 | GCAGCAGGCGCGTCAGAGAGCCCCCCCC 11 | +CSHL_3_FC042AGLLWW:1:2:7:169 12 | a_M^a\Uaaa_M_aaaaaaaaaaaaaaa 13 | @CSHL_3_FC042AGLLWW:1:2:7:1436 14 | AATTATTTATTAAATTTTAATAATATGGGAGAC 15 | +CSHL_3_FC042AGLLWW:1:2:7:1436 16 | a^aaaaaaaaaaaaaaa_U`aaaaa_S_aaaaa 17 | @CSHL_3_FC042AGLLWW:1:2:7:292 18 | GGAGAAATACACACAATTGGTTAATCCCCCT 19 | +CSHL_3_FC042AGLLWW:1:2:7:292 20 | babaaaaaaaUMaaaaaaaaaaa\XEUUEP_ 21 | @CSHL_3_FC042AGLLWW:1:2:7:1875 22 | GCAAAAGAGTAGTGTACCCCCCCCCCCCCCCC 23 | +CSHL_3_FC042AGLLWW:1:2:7:1875 24 | aaaaaaaaaXUXXEXaaaaa`_Zaaaaaaaaa 25 | @CSHL_3_FC042AGLLWW:1:2:8:624 26 | ACTGCAATTGGTTAATCCCCCTATATAGCGCTGTGG 27 | +CSHL_3_FC042AGLLWW:1:2:8:624 28 | aa[S^`X`aa_]]OOXMU^_[MU_aaaaaaaaaaaa 29 | @CSHL_3_FC042AGLLWW:1:2:8:250 30 | TGCCGCGCACACTGATGCAATTGGTTAAT 31 | +CSHL_3_FC042AGLLWW:1:2:8:250 32 | aaaaaaaa^aaaaaabbb[KXPEU[RXZ^ 33 | -------------------------------------------------------------------------------- /galaxy/test-data/fastq_stats1.fastq: -------------------------------------------------------------------------------- 1 | @CSHL_3_FC042AGLLWW:1:2:7:203 2 | GTACGCATGACCGAACCCCCCNCCCCCCAATTGGTT 3 | +CSHL_3_FC042AGLLWW:1:2:7:203 4 | aab^V^aU]`aa^aZaaabbXEZabaaaaaaaa]]` 5 | @CSHL_3_FC042AGLLWW:1:2:7:33 6 | CAATGCCTCCAATTGGTTAATCCCCCTATATATACT 7 | +CSHL_3_FC042AGLLWW:1:2:7:33 8 | Waaa^aZaaW^U_XaWaa\WMEP^KEZXRPEEEGaa 9 | @CSHL_3_FC042AGLLWW:1:2:7:169 10 | GCAGCAGGCGCGTCAGAGAGCCCCCCCCCCCCCCCC 11 | +CSHL_3_FC042AGLLWW:1:2:7:169 12 | a_M^a\Uaaa_M_aaaaaaaaaaaaaaaV\ZUGUUR 13 | @CSHL_3_FC042AGLLWW:1:2:7:1436 14 | AATTATTTATTAAATTTTAATAATATGGGAGACACT 15 | +CSHL_3_FC042AGLLWW:1:2:7:1436 16 | a^aaaaaaaaaaaaaaa_U`aaaaa_S_aaaaaVV[ 17 | @CSHL_3_FC042AGLLWW:1:2:7:292 18 | GGAGAAATACACACAATTGGTTAATCCCCCTATATA 19 | +CSHL_3_FC042AGLLWW:1:2:7:292 20 | babaaaaaaaUMaaaaaaaaaaa\XEUUEP_]UERE 21 | @CSHL_3_FC042AGLLWW:1:2:7:1819 22 | AATTCAAACCACCCCAACCCACACACAGAGATACAA 23 | +CSHL_3_FC042AGLLWW:1:2:7:1819 24 | a\\QVVVLaaLOEXUWUUEKUULEMUEUUKULIQMU 25 | @CSHL_3_FC042AGLLWW:1:2:7:1875 26 | GCAAAAGAGTAGTGTACCCCCCCCCCCCCCCCCCCC 27 | +CSHL_3_FC042AGLLWW:1:2:7:1875 28 | aaaaaaaaaXUXXEXaaaaa`_ZaaaaaaaaaXEXU 29 | @CSHL_3_FC042AGLLWW:1:2:8:624 30 | ACTGCAATTGGTTAATCCCCCTATATAGCGCTGTGG 31 | +CSHL_3_FC042AGLLWW:1:2:8:624 32 | aa[S^`X`aa_]]OOXMU^_[MU_aaaaaaaaaaaa 33 | @CSHL_3_FC042AGLLWW:1:2:8:250 34 | TGCCGCGCACACTGATGCAATTGGTTAATCCCCCTA 35 | +CSHL_3_FC042AGLLWW:1:2:8:250 36 | aaaaaaaa^aaaaaabbb[KXPEU[RXZ^JUKRKXE 37 | -------------------------------------------------------------------------------- /galaxy/test-data/fastq_stats1.out: -------------------------------------------------------------------------------- 1 | column count min max sum mean Q1 med Q3 IQR lW rW A_Count C_Count G_Count T_Count N_Count Max_count 2 | 1 9 23 34 288 32.00 33 33 33 0 33 33 3 1 4 1 0 9 3 | 2 9 28 33 287 31.89 31 33 33 2 28 33 3 3 2 1 0 9 4 | 3 9 13 34 268 29.78 28 33 33 5 21 34 5 1 0 3 0 9 5 | 4 9 17 33 261 29.00 30 33 33 3 26 33 1 2 3 3 0 9 6 | 5 9 22 33 269 29.89 30 33 33 3 26 33 3 3 3 0 0 9 7 | 6 9 22 33 277 30.78 30 33 33 3 26 33 5 3 0 1 0 9 8 | 7 9 21 33 258 28.67 24 33 33 9 21 33 4 1 3 1 0 9 9 | 8 9 12 33 263 29.22 32 33 33 1 31 33 2 1 1 5 0 9 10 | 9 9 29 33 290 32.22 33 33 33 0 33 33 3 3 2 1 0 9 11 | 10 9 23 33 277 30.78 32 33 33 1 31 33 1 4 2 2 0 9 12 | 11 9 12 33 245 27.22 21 31 33 12 12 33 5 2 1 1 0 9 13 | 12 9 13 33 214 23.78 15 24 33 18 13 33 2 4 2 1 0 9 14 | 13 9 5 33 249 27.67 29 31 33 4 23 33 2 1 1 5 0 9 15 | 14 9 5 33 233 25.89 24 33 33 9 11 33 3 3 2 1 0 9 16 | 15 9 15 33 251 27.89 24 33 33 9 15 33 5 1 1 2 0 9 17 | 16 9 23 34 269 29.89 24 33 33 9 23 34 3 1 2 3 0 9 18 | 17 9 13 34 266 29.56 33 33 33 0 33 33 2 3 1 3 0 9 19 | 18 9 21 34 272 30.22 31 33 33 2 28 34 0 5 1 3 0 9 20 | 19 9 5 34 244 27.11 27 30 33 6 18 34 4 4 1 0 0 9 21 | 20 9 11 34 241 26.78 23 32 33 10 11 34 3 4 2 0 0 9 22 | 21 9 13 33 240 26.67 24 27 33 9 13 33 1 4 0 4 0 9 23 | 22 9 5 33 190 21.11 13 21 33 20 5 33 1 4 0 3 1 9 24 | 23 9 5 33 205 22.78 16 26 33 17 5 33 4 4 1 0 0 9 25 | 24 9 5 33 247 27.44 28 31 33 5 21 33 1 5 1 2 0 9 26 | 25 9 11 34 241 26.78 24 33 33 9 11 34 3 4 0 2 0 9 27 | 26 9 5 33 212 23.56 18 31 33 15 5 33 0 6 0 3 0 9 28 | 27 9 5 33 227 25.22 21 26 33 12 5 33 3 4 1 1 0 9 29 | 28 9 21 33 255 28.33 24 31 33 9 21 33 2 4 3 0 0 9 30 | 29 9 5 33 228 25.33 21 30 33 12 5 33 2 4 1 2 0 9 31 | 30 9 10 33 213 23.67 16 28 33 17 10 33 3 4 2 0 0 9 32 | 31 9 5 33 236 26.22 21 31 33 12 5 33 1 4 1 3 0 9 33 | 32 9 5 33 210 23.33 12 29 33 21 5 33 3 3 0 3 0 9 34 | 33 9 5 33 183 20.33 9 21 33 24 5 33 1 4 2 2 0 9 35 | 34 9 5 33 150 16.67 7 17 22 15 5 33 3 4 1 1 0 9 36 | 35 9 13 33 217 24.11 21 24 29 8 13 33 1 4 1 3 0 9 37 | 36 9 5 33 195 21.67 18 21 32 14 5 33 3 2 1 3 0 9 38 | -------------------------------------------------------------------------------- /galaxy/test-data/fastq_to_fasta1.fastq: -------------------------------------------------------------------------------- 1 | @CSHL_3_FC042AGLLWW:1:2:7:203 2 | GTACGCATGACCGAACCCCCCNCCCCCCAATTGGTT 3 | +CSHL_3_FC042AGLLWW:1:2:7:203 4 | aab^V^aU]`aa^aZaaabbXEZabaaaaaaaa]]` 5 | @CSHL_3_FC042AGLLWW:1:2:7:33 6 | CAATGCCTCCAATTGGTTAATCCCCCTATATATACT 7 | +CSHL_3_FC042AGLLWW:1:2:7:33 8 | Waaa^aZaaW^U_XaWaa\WMEP^KEZXRPEEEGaa 9 | @CSHL_3_FC042AGLLWW:1:2:7:169 10 | GCAGCAGGCGCGTCAGAGAGCCCCCCCCCCCCCCCC 11 | +CSHL_3_FC042AGLLWW:1:2:7:169 12 | a_M^a\Uaaa_M_aaaaaaaaaaaaaaaV\ZUGUUR 13 | @CSHL_3_FC042AGLLWW:1:2:7:1436 14 | AATTATTTATTAAATTTTAATAATATGGGAGACACT 15 | +CSHL_3_FC042AGLLWW:1:2:7:1436 16 | a^aaaaaaaaaaaaaaa_U`aaaaa_S_aaaaaVV[ 17 | @CSHL_3_FC042AGLLWW:1:2:7:292 18 | GGAGAAATACACACAATTGGTTAATCCCCCTATATA 19 | +CSHL_3_FC042AGLLWW:1:2:7:292 20 | babaaaaaaaUMaaaaaaaaaaa\XEUUEP_]UERE 21 | @CSHL_3_FC042AGLLWW:1:2:7:1819 22 | AATTCAAACCACCCCAACCCACACACAGAGATACAA 23 | +CSHL_3_FC042AGLLWW:1:2:7:1819 24 | a\\QVVVLaaLOEXUWUUEKUULEMUEUUKULIQMU 25 | @CSHL_3_FC042AGLLWW:1:2:7:1875 26 | GCAAAAGAGTAGTGTACCCCCCCCCCCCCCCCCCCC 27 | +CSHL_3_FC042AGLLWW:1:2:7:1875 28 | aaaaaaaaaXUXXEXaaaaa`_ZaaaaaaaaaXEXU 29 | @CSHL_3_FC042AGLLWW:1:2:8:624 30 | ACTGCAATTGGTTAATCCCCCTATATAGCGCTGTGG 31 | +CSHL_3_FC042AGLLWW:1:2:8:624 32 | aa[S^`X`aa_]]OOXMU^_[MU_aaaaaaaaaaaa 33 | @CSHL_3_FC042AGLLWW:1:2:8:250 34 | TGCCGCGCACACTGATGCAATTGGTTAATCCCCCTA 35 | +CSHL_3_FC042AGLLWW:1:2:8:250 36 | aaaaaaaa^aaaaaabbb[KXPEU[RXZ^JUKRKXE 37 | -------------------------------------------------------------------------------- /galaxy/test-data/fastq_to_fasta1a.out: -------------------------------------------------------------------------------- 1 | >CSHL_3_FC042AGLLWW:1:2:7:33 2 | CAATGCCTCCAATTGGTTAATCCCCCTATATATACT 3 | >CSHL_3_FC042AGLLWW:1:2:7:169 4 | GCAGCAGGCGCGTCAGAGAGCCCCCCCCCCCCCCCC 5 | >CSHL_3_FC042AGLLWW:1:2:7:1436 6 | AATTATTTATTAAATTTTAATAATATGGGAGACACT 7 | >CSHL_3_FC042AGLLWW:1:2:7:292 8 | GGAGAAATACACACAATTGGTTAATCCCCCTATATA 9 | >CSHL_3_FC042AGLLWW:1:2:7:1819 10 | AATTCAAACCACCCCAACCCACACACAGAGATACAA 11 | >CSHL_3_FC042AGLLWW:1:2:7:1875 12 | GCAAAAGAGTAGTGTACCCCCCCCCCCCCCCCCCCC 13 | >CSHL_3_FC042AGLLWW:1:2:8:624 14 | ACTGCAATTGGTTAATCCCCCTATATAGCGCTGTGG 15 | >CSHL_3_FC042AGLLWW:1:2:8:250 16 | TGCCGCGCACACTGATGCAATTGGTTAATCCCCCTA 17 | -------------------------------------------------------------------------------- /galaxy/test-data/fastq_to_fasta1b.out: -------------------------------------------------------------------------------- 1 | >1 2 | GTACGCATGACCGAACCCCCCNCCCCCCAATTGGTT 3 | >2 4 | CAATGCCTCCAATTGGTTAATCCCCCTATATATACT 5 | >3 6 | GCAGCAGGCGCGTCAGAGAGCCCCCCCCCCCCCCCC 7 | >4 8 | AATTATTTATTAAATTTTAATAATATGGGAGACACT 9 | >5 10 | GGAGAAATACACACAATTGGTTAATCCCCCTATATA 11 | >6 12 | AATTCAAACCACCCCAACCCACACACAGAGATACAA 13 | >7 14 | GCAAAAGAGTAGTGTACCCCCCCCCCCCCCCCCCCC 15 | >8 16 | ACTGCAATTGGTTAATCCCCCTATATAGCGCTGTGG 17 | >9 18 | TGCCGCGCACACTGATGCAATTGGTTAATCCCCCTA 19 | -------------------------------------------------------------------------------- /galaxy/test-data/fastx_artifacts1.fasta: -------------------------------------------------------------------------------- 1 | >CSHL_3_FC0420AGLLKK:2:1:233:1674 2 | GTTAGAGGGAATACACCCACTCTGTAGGCACCATC 3 | >CSHL_3_FC0420AGLLKK:2:1:237:1037 4 | GTGATAGATTGTCTTGTTGTTCTGTAGGCACCATC 5 | >CSHL_3_FC0420AGLLKK:2:1:1601:1525 6 | AAAAACACAAAAAAAAAAAAAAAAAAAAAAAAAAA 7 | >CSHL_3_FC0420AGLLKK:2:1:1805:1464 8 | GATGCGTTCGAGATGGGTGCGCTGTAGGCACCATC 9 | >CSHL_3_FC0420AGLLKK:2:1:1713:528 10 | AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA 11 | >CSHL_3_FC0420AGLLKK:2:1:126:1087 12 | GAGATATTCGAATGCATCATCAGATGGCACCATCA 13 | >CSHL_3_FC0420AGLLKK:2:1:1488:1323 14 | GTTTTTTCCCCTAATCTGAGTCTGTAGGCACCATC 15 | >CSHL_3_FC0420AGLLKK:2:1:1236:1157 16 | AAAAAAAAAAAAAAAACAAAAAAAAAAAAAACAAA 17 | >CSHL_3_FC0420AGLLKK:2:1:727:1020 18 | GTAATATAGTTGATAAGAATCTGCAGAGAGAATCA 19 | >CSHL_3_FC0420AGLLKK:2:1:758:1799 20 | GTAGAGACCCCCTAATAGAGTCTGTAGGCACCATC 21 | >CSHL_3_FC0420AGLLKK:2:1:1818:550 22 | AAAAAAAAAAAAAAAACAAAAACAAAAAAAACAAA 23 | >CSHL_3_FC0420AGLLKK:2:1:1764:391 24 | CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC 25 | -------------------------------------------------------------------------------- /galaxy/test-data/fastx_artifacts1.out: -------------------------------------------------------------------------------- 1 | >CSHL_3_FC0420AGLLKK:2:1:233:1674 2 | GTTAGAGGGAATACACCCACTCTGTAGGCACCATC 3 | >CSHL_3_FC0420AGLLKK:2:1:237:1037 4 | GTGATAGATTGTCTTGTTGTTCTGTAGGCACCATC 5 | >CSHL_3_FC0420AGLLKK:2:1:1805:1464 6 | GATGCGTTCGAGATGGGTGCGCTGTAGGCACCATC 7 | >CSHL_3_FC0420AGLLKK:2:1:126:1087 8 | GAGATATTCGAATGCATCATCAGATGGCACCATCA 9 | >CSHL_3_FC0420AGLLKK:2:1:1488:1323 10 | GTTTTTTCCCCTAATCTGAGTCTGTAGGCACCATC 11 | >CSHL_3_FC0420AGLLKK:2:1:727:1020 12 | GTAATATAGTTGATAAGAATCTGCAGAGAGAATCA 13 | >CSHL_3_FC0420AGLLKK:2:1:758:1799 14 | GTAGAGACCCCCTAATAGAGTCTGTAGGCACCATC 15 | -------------------------------------------------------------------------------- /galaxy/test-data/fastx_artifacts2.fastq: -------------------------------------------------------------------------------- 1 | @CSHL_3_FC0420AGLLKK:2:1:233:1674 2 | GTTAGAGGGAATACACCCACTCTGTAGGCACCATC 3 | +CSHL_3_FC0420AGLLKK:2:1:233:1674 4 | 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 32 40 40 40 40 16 20 25 9 21 37 40 40 16 29 26 30 5 | @CSHL_3_FC0420AGLLKK:2:1:136:448 6 | GTTCTCAGGACCCCTTCAGTAGTNGGCACCATCAA 7 | +CSHL_3_FC0420AGLLKK:2:1:136:448 8 | 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 -5 13 17 28 40 40 8 17 27 8 13 10 9 | @CSHL_3_FC0420AGLLKK:2:1:237:1037 10 | GTGATAGATTGTCTTGTTGTTCTGTAGGCACCATC 11 | +CSHL_3_FC0420AGLLKK:2:1:237:1037 12 | 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 4 40 40 26 35 40 38 40 6 40 40 0 3 26 32 27 14 11 26 11 13 | @CSHL_3_FC0420AGLLKK:2:1:1601:1525 14 | AAAAACACAAAAAAAAAAAAAAAAAAAAAAAAAAA 15 | +CSHL_3_FC0420AGLLKK:2:1:1601:1525 16 | 40 40 40 40 40 40 40 40 40 40 40 40 35 40 40 12 40 40 30 30 40 40 40 12 36 23 17 24 18 22 25 15 10 34 14 17 | @CSHL_3_FC0420AGLLKK:2:1:1805:1464 18 | GATGCGTTCGAGATGGGTGCGCTGTAGGCACCATC 19 | +CSHL_3_FC0420AGLLKK:2:1:1805:1464 20 | 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 16 23 28 40 21 40 9 37 13 20 21 7 11 14 14 6 23 10 21 | @CSHL_3_FC0420AGLLKK:2:1:1713:528 22 | AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA 23 | +CSHL_3_FC0420AGLLKK:2:1:1713:528 24 | 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 32 40 12 38 15 22 20 17 14 12 10 7 22 11 25 | @CSHL_3_FC0420AGLLKK:2:1:126:1087 26 | GAGATATTCGAATGCATCATCAGATGGCACCATCA 27 | +CSHL_3_FC0420AGLLKK:2:1:126:1087 28 | 40 40 40 40 40 40 40 40 40 40 40 40 40 40 25 40 40 40 40 40 40 40 31 40 40 11 10 23 40 13 12 17 37 17 22 29 | @CSHL_3_FC0420AGLLKK:2:1:1488:1323 30 | GTTTTTTCCCCTAATCTGAGTCTGTAGGCACCATC 31 | +CSHL_3_FC0420AGLLKK:2:1:1488:1323 32 | 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 39 22 31 40 40 12 29 22 0 7 12 8 18 7 3 18 9 33 | @CSHL_3_FC0420AGLLKK:2:1:913:199 34 | GTTCAGTGTTGGTGCACTGTGTTNTAGGCACCATC 35 | +CSHL_3_FC0420AGLLKK:2:1:913:199 36 | 40 40 39 40 40 40 40 40 40 40 40 40 4 40 40 24 34 20 33 21 36 32 40 -5 40 13 21 21 26 17 18 25 14 25 21 37 | @CSHL_3_FC0420AGLLKK:2:1:1236:1157 38 | AAAAAAAAAAAAAAAACAAAAAAAAAAAAAACAAA 39 | +CSHL_3_FC0420AGLLKK:2:1:1236:1157 40 | 40 40 40 40 40 40 40 40 40 40 40 40 40 35 40 40 40 40 40 33 40 37 40 40 40 18 16 20 23 22 31 26 10 22 19 41 | @CSHL_3_FC0420AGLLKK:2:1:928:765 42 | GTTTTCAGTTCGAGGTTCGTGCTNTAGGCATTATC 43 | +CSHL_3_FC0420AGLLKK:2:1:928:765 44 | 40 40 40 40 40 40 40 40 40 40 40 40 40 25 27 40 37 35 27 40 40 17 40 -5 36 11 19 15 19 16 11 12 12 23 11 45 | @CSHL_3_FC0420AGLLKK:2:1:727:1020 46 | GTAATATAGTTGATAAGAATCTGCAGAGAGAATCA 47 | +CSHL_3_FC0420AGLLKK:2:1:727:1020 48 | 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 30 40 40 24 18 38 33 26 16 23 22 16 18 49 | @CSHL_3_FC0420AGLLKK:2:1:758:1799 50 | GTAGAGACCCCCTAATAGAGTCTGTAGGCACCATC 51 | +CSHL_3_FC0420AGLLKK:2:1:758:1799 52 | 40 40 40 40 40 40 40 40 35 40 39 40 40 27 20 40 17 34 15 40 40 40 40 15 28 17 4 12 10 10 18 14 3 14 11 53 | @CSHL_3_FC0420AGLLKK:2:1:1818:550 54 | AAAAAAAAAAAAAAAACAAAAACAAAAAAAACAAA 55 | +CSHL_3_FC0420AGLLKK:2:1:1818:550 56 | 40 40 40 40 40 40 40 40 40 40 40 40 40 40 36 32 40 33 40 40 38 37 40 28 29 27 22 13 20 19 17 17 13 33 18 57 | @CSHL_3_FC0420AGLLKK:2:1:1764:391 58 | CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC 59 | +CSHL_3_FC0420AGLLKK:2:1:1764:391 60 | 40 40 40 40 40 40 40 40 40 40 40 33 40 40 40 40 40 24 40 40 40 40 40 12 40 24 14 9 22 15 29 18 11 40 22 61 | -------------------------------------------------------------------------------- /galaxy/test-data/fastx_artifacts2.out: -------------------------------------------------------------------------------- 1 | @CSHL_3_FC0420AGLLKK:2:1:233:1674 2 | GTTAGAGGGAATACACCCACTCTGTAGGCACCATC 3 | +CSHL_3_FC0420AGLLKK:2:1:233:1674 4 | 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 32 40 40 40 40 16 20 25 9 21 37 40 40 16 29 26 30 5 | @CSHL_3_FC0420AGLLKK:2:1:136:448 6 | GTTCTCAGGACCCCTTCAGTAGTNGGCACCATCAA 7 | +CSHL_3_FC0420AGLLKK:2:1:136:448 8 | 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 -5 13 17 28 40 40 8 17 27 8 13 10 9 | @CSHL_3_FC0420AGLLKK:2:1:237:1037 10 | GTGATAGATTGTCTTGTTGTTCTGTAGGCACCATC 11 | +CSHL_3_FC0420AGLLKK:2:1:237:1037 12 | 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 4 40 40 26 35 40 38 40 6 40 40 0 3 26 32 27 14 11 26 11 13 | @CSHL_3_FC0420AGLLKK:2:1:1805:1464 14 | GATGCGTTCGAGATGGGTGCGCTGTAGGCACCATC 15 | +CSHL_3_FC0420AGLLKK:2:1:1805:1464 16 | 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 16 23 28 40 21 40 9 37 13 20 21 7 11 14 14 6 23 10 17 | @CSHL_3_FC0420AGLLKK:2:1:126:1087 18 | GAGATATTCGAATGCATCATCAGATGGCACCATCA 19 | +CSHL_3_FC0420AGLLKK:2:1:126:1087 20 | 40 40 40 40 40 40 40 40 40 40 40 40 40 40 25 40 40 40 40 40 40 40 31 40 40 11 10 23 40 13 12 17 37 17 22 21 | @CSHL_3_FC0420AGLLKK:2:1:1488:1323 22 | GTTTTTTCCCCTAATCTGAGTCTGTAGGCACCATC 23 | +CSHL_3_FC0420AGLLKK:2:1:1488:1323 24 | 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 39 22 31 40 40 12 29 22 0 7 12 8 18 7 3 18 9 25 | @CSHL_3_FC0420AGLLKK:2:1:913:199 26 | GTTCAGTGTTGGTGCACTGTGTTNTAGGCACCATC 27 | +CSHL_3_FC0420AGLLKK:2:1:913:199 28 | 40 40 39 40 40 40 40 40 40 40 40 40 4 40 40 24 34 20 33 21 36 32 40 -5 40 13 21 21 26 17 18 25 14 25 21 29 | @CSHL_3_FC0420AGLLKK:2:1:928:765 30 | GTTTTCAGTTCGAGGTTCGTGCTNTAGGCATTATC 31 | +CSHL_3_FC0420AGLLKK:2:1:928:765 32 | 40 40 40 40 40 40 40 40 40 40 40 40 40 25 27 40 37 35 27 40 40 17 40 -5 36 11 19 15 19 16 11 12 12 23 11 33 | @CSHL_3_FC0420AGLLKK:2:1:727:1020 34 | GTAATATAGTTGATAAGAATCTGCAGAGAGAATCA 35 | +CSHL_3_FC0420AGLLKK:2:1:727:1020 36 | 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 40 30 40 40 24 18 38 33 26 16 23 22 16 18 37 | @CSHL_3_FC0420AGLLKK:2:1:758:1799 38 | GTAGAGACCCCCTAATAGAGTCTGTAGGCACCATC 39 | +CSHL_3_FC0420AGLLKK:2:1:758:1799 40 | 40 40 40 40 40 40 40 40 35 40 39 40 40 27 20 40 17 34 15 40 40 40 40 15 28 17 4 12 10 10 18 14 3 14 11 41 | -------------------------------------------------------------------------------- /galaxy/test-data/fastx_barcode_splitter1.out: -------------------------------------------------------------------------------- 1 | 2 | 5 | 8 | 11 | 14 | 17 | 20 | 23 |

Copy these files to your local computer, as they will be soon deleted. 24 |

3 | BarcodeCountLocation 4 |
6 | BC111http://tango.cshl.edu/barcode_splits/2009-01-19_1719__fastx_barcode_splitter1_fastq__BC1.txt 7 |
9 | BC212http://tango.cshl.edu/barcode_splits/2009-01-19_1719__fastx_barcode_splitter1_fastq__BC2.txt 10 |
12 | BC39http://tango.cshl.edu/barcode_splits/2009-01-19_1719__fastx_barcode_splitter1_fastq__BC3.txt 13 |
15 | BC41http://tango.cshl.edu/barcode_splits/2009-01-19_1719__fastx_barcode_splitter1_fastq__BC4.txt 16 |
18 | unmatched9http://tango.cshl.edu/barcode_splits/2009-01-19_1719__fastx_barcode_splitter1_fastq__unmatched.txt 19 |
21 | total42 22 |
25 | -------------------------------------------------------------------------------- /galaxy/test-data/fastx_barcode_splitter1.txt: -------------------------------------------------------------------------------- 1 | BC1 GATCT 2 | BC2 ATCGT 3 | BC3 GTGAT 4 | BC4 TGTCT -------------------------------------------------------------------------------- /galaxy/test-data/fastx_clipper1.fastq: -------------------------------------------------------------------------------- 1 | @CSHL_3_FC042AGLLWW:1:2:7:203 2 | GTACGCATGACCGAACCCCCCNCCCCCCAATTGGTT 3 | +CSHL_3_FC042AGLLWW:1:2:7:203 4 | aab^V^aU]`aa^aZaaabbXEZabaaaaaaaa]]` 5 | @CSHL_3_FC042AGLLWW:1:2:7:33 6 | CAATGCCTCCAATTGGTTAATCCCCCTATATATACT 7 | +CSHL_3_FC042AGLLWW:1:2:7:33 8 | Waaa^aZaaW^U_XaWaa\WMEP^KEZXRPEEEGaa 9 | @CSHL_3_FC042AGLLWW:1:2:7:169 10 | GCAGCAGGCGCGTCAGAGAGCCCCCCCCCCCCCCCC 11 | +CSHL_3_FC042AGLLWW:1:2:7:169 12 | a_M^a\Uaaa_M_aaaaaaaaaaaaaaaV\ZUGUUR 13 | @CSHL_3_FC042AGLLWW:1:2:7:1436 14 | AATTATTTATTAAATTTTAATAATATGGGAGACACT 15 | +CSHL_3_FC042AGLLWW:1:2:7:1436 16 | a^aaaaaaaaaaaaaaa_U`aaaaa_S_aaaaaVV[ 17 | @CSHL_3_FC042AGLLWW:1:2:7:292 18 | GGAGAAATACACACAATTGGTTAATCCCCCTATATA 19 | +CSHL_3_FC042AGLLWW:1:2:7:292 20 | babaaaaaaaUMaaaaaaaaaaa\XEUUEP_]UERE 21 | @CSHL_3_FC042AGLLWW:1:2:7:1819 22 | AATTCAAACCACCCCAACCCACACACAGAGATACAA 23 | +CSHL_3_FC042AGLLWW:1:2:7:1819 24 | a\\QVVVLaaLOEXUWUUEKUULEMUEUUKULIQMU 25 | @CSHL_3_FC042AGLLWW:1:2:7:1875 26 | GCAAAAGAGTAGTGTACCCCCCCCCCCCCCCCCCCC 27 | +CSHL_3_FC042AGLLWW:1:2:7:1875 28 | aaaaaaaaaXUXXEXaaaaa`_ZaaaaaaaaaXEXU 29 | @CSHL_3_FC042AGLLWW:1:2:8:624 30 | ACTGCAATTGGTTAATCCCCCTATATAGCGCTGTGG 31 | +CSHL_3_FC042AGLLWW:1:2:8:624 32 | aa[S^`X`aa_]]OOXMU^_[MU_aaaaaaaaaaaa 33 | @CSHL_3_FC042AGLLWW:1:2:8:250 34 | TGCCGCGCACACTGATGCAATTGGTTAATCCCCCTA 35 | +CSHL_3_FC042AGLLWW:1:2:8:250 36 | aaaaaaaa^aaaaaabbb[KXPEU[RXZ^JUKRKXE 37 | -------------------------------------------------------------------------------- /galaxy/test-data/fastx_clipper1a.out: -------------------------------------------------------------------------------- 1 | @CSHL_3_FC042AGLLWW:1:2:7:203 2 | GTACGCATGACCGAACCCCCCNCCCCC 3 | +CSHL_3_FC042AGLLWW:1:2:7:203 4 | aab^V^aU]`aa^aZaaabbXEZabaa 5 | @CSHL_3_FC042AGLLWW:1:2:8:250 6 | TGCCGCGCACACTGATG 7 | +CSHL_3_FC042AGLLWW:1:2:8:250 8 | aaaaaaaa^aaaaaabb 9 | -------------------------------------------------------------------------------- /galaxy/test-data/fastx_renamer1.fastq: -------------------------------------------------------------------------------- 1 | @CSHL_3_FC042AGLLWW:1:2:7:203 2 | GTACGCATGACCGAACCCCCCNCCCCCCAATTGGTT 3 | +CSHL_3_FC042AGLLWW:1:2:7:203 4 | aab^V^aU]`aa^aZaaabbXEZabaaaaaaaa]]` 5 | @CSHL_3_FC042AGLLWW:1:2:7:33 6 | CAATGCCTCCAATTGGTTAATCCCCCTATATATACT 7 | +CSHL_3_FC042AGLLWW:1:2:7:33 8 | Waaa^aZaaW^U_XaWaa\WMEP^KEZXRPEEEGaa 9 | @CSHL_3_FC042AGLLWW:1:2:7:169 10 | GCAGCAGGCGCGTCAGAGAGCCCCCCCCCCCCCCCC 11 | +CSHL_3_FC042AGLLWW:1:2:7:169 12 | a_M^a\Uaaa_M_aaaaaaaaaaaaaaaV\ZUGUUR 13 | @CSHL_3_FC042AGLLWW:1:2:7:1436 14 | AATTATTTATTAAATTTTAATAATATGGGAGACACT 15 | +CSHL_3_FC042AGLLWW:1:2:7:1436 16 | a^aaaaaaaaaaaaaaa_U`aaaaa_S_aaaaaVV[ 17 | @CSHL_3_FC042AGLLWW:1:2:7:292 18 | GGAGAAATACACACAATTGGTTAATCCCCCTATATA 19 | +CSHL_3_FC042AGLLWW:1:2:7:292 20 | babaaaaaaaUMaaaaaaaaaaa\XEUUEP_]UERE 21 | @CSHL_3_FC042AGLLWW:1:2:7:1819 22 | AATTCAAACCACCCCAACCCACACACAGAGATACAA 23 | +CSHL_3_FC042AGLLWW:1:2:7:1819 24 | a\\QVVVLaaLOEXUWUUEKUULEMUEUUKULIQMU 25 | @CSHL_3_FC042AGLLWW:1:2:7:1875 26 | GCAAAAGAGTAGTGTACCCCCCCCCCCCCCCCCCCC 27 | +CSHL_3_FC042AGLLWW:1:2:7:1875 28 | aaaaaaaaaXUXXEXaaaaa`_ZaaaaaaaaaXEXU 29 | @CSHL_3_FC042AGLLWW:1:2:8:624 30 | ACTGCAATTGGTTAATCCCCCTATATAGCGCTGTGG 31 | +CSHL_3_FC042AGLLWW:1:2:8:624 32 | aa[S^`X`aa_]]OOXMU^_[MU_aaaaaaaaaaaa 33 | @CSHL_3_FC042AGLLWW:1:2:8:250 34 | TGCCGCGCACACTGATGCAATTGGTTAATCCCCCTA 35 | +CSHL_3_FC042AGLLWW:1:2:8:250 36 | aaaaaaaa^aaaaaabbb[KXPEU[RXZ^JUKRKXE 37 | -------------------------------------------------------------------------------- /galaxy/test-data/fastx_renamer1.out: -------------------------------------------------------------------------------- 1 | @GTACGCATGACCGAACCCCCCNCCCCCCAATTGGTT 2 | GTACGCATGACCGAACCCCCCNCCCCCCAATTGGTT 3 | +GTACGCATGACCGAACCCCCCNCCCCCCAATTGGTT 4 | aab^V^aU]`aa^aZaaabbXEZabaaaaaaaa]]` 5 | @CAATGCCTCCAATTGGTTAATCCCCCTATATATACT 6 | CAATGCCTCCAATTGGTTAATCCCCCTATATATACT 7 | +CAATGCCTCCAATTGGTTAATCCCCCTATATATACT 8 | Waaa^aZaaW^U_XaWaa\WMEP^KEZXRPEEEGaa 9 | @GCAGCAGGCGCGTCAGAGAGCCCCCCCCCCCCCCCC 10 | GCAGCAGGCGCGTCAGAGAGCCCCCCCCCCCCCCCC 11 | +GCAGCAGGCGCGTCAGAGAGCCCCCCCCCCCCCCCC 12 | a_M^a\Uaaa_M_aaaaaaaaaaaaaaaV\ZUGUUR 13 | @AATTATTTATTAAATTTTAATAATATGGGAGACACT 14 | AATTATTTATTAAATTTTAATAATATGGGAGACACT 15 | +AATTATTTATTAAATTTTAATAATATGGGAGACACT 16 | a^aaaaaaaaaaaaaaa_U`aaaaa_S_aaaaaVV[ 17 | @GGAGAAATACACACAATTGGTTAATCCCCCTATATA 18 | GGAGAAATACACACAATTGGTTAATCCCCCTATATA 19 | +GGAGAAATACACACAATTGGTTAATCCCCCTATATA 20 | babaaaaaaaUMaaaaaaaaaaa\XEUUEP_]UERE 21 | @AATTCAAACCACCCCAACCCACACACAGAGATACAA 22 | AATTCAAACCACCCCAACCCACACACAGAGATACAA 23 | +AATTCAAACCACCCCAACCCACACACAGAGATACAA 24 | a\\QVVVLaaLOEXUWUUEKUULEMUEUUKULIQMU 25 | @GCAAAAGAGTAGTGTACCCCCCCCCCCCCCCCCCCC 26 | GCAAAAGAGTAGTGTACCCCCCCCCCCCCCCCCCCC 27 | +GCAAAAGAGTAGTGTACCCCCCCCCCCCCCCCCCCC 28 | aaaaaaaaaXUXXEXaaaaa`_ZaaaaaaaaaXEXU 29 | @ACTGCAATTGGTTAATCCCCCTATATAGCGCTGTGG 30 | ACTGCAATTGGTTAATCCCCCTATATAGCGCTGTGG 31 | +ACTGCAATTGGTTAATCCCCCTATATAGCGCTGTGG 32 | aa[S^`X`aa_]]OOXMU^_[MU_aaaaaaaaaaaa 33 | @TGCCGCGCACACTGATGCAATTGGTTAATCCCCCTA 34 | TGCCGCGCACACTGATGCAATTGGTTAATCCCCCTA 35 | +TGCCGCGCACACTGATGCAATTGGTTAATCCCCCTA 36 | aaaaaaaa^aaaaaabbb[KXPEU[RXZ^JUKRKXE 37 | -------------------------------------------------------------------------------- /galaxy/test-data/fastx_rev_comp1.fasta: -------------------------------------------------------------------------------- 1 | >CSHL__2_FC042NGABCD:8:1:120:202 2 | ACGATAGATCGGAAGAGCTAGTATGCCGTTTTCTGC 3 | >CSHL__2_FC042NGABCD:8:1:103:1185 4 | ATCACGATAGATCGGCAGAGCTCGTTTACCGTCTTC 5 | -------------------------------------------------------------------------------- /galaxy/test-data/fastx_rev_comp2.fastq: -------------------------------------------------------------------------------- 1 | @CSHL__2_FC042NGABCD:8:1:120:202 2 | ACGATAGATCGGAAGAGCTAGTATGCCGTTTTCTGC 3 | +CSHL__2_FC042NGABCD:8:1:120:202 4 | 40 40 40 40 20 40 40 40 40 6 40 40 28 40 40 25 40 20 40 -1 30 40 14 27 40 8 1 3 7 -1 11 10 -1 21 10 8 5 | @CSHL__2_FC042NGABCD:8:1:103:1185 6 | ATCACGATAGATCGGCAGAGCTCGTTTACCGTCTTC 7 | +CSHL__2_FC042NGABCD:8:1:103:1185 8 | 40 40 40 40 40 35 33 31 40 40 40 32 30 22 40 -0 9 22 17 14 8 36 15 34 22 12 23 3 10 -0 8 2 4 25 30 2 9 | -------------------------------------------------------------------------------- /galaxy/test-data/fastx_reverse_complement1.out: -------------------------------------------------------------------------------- 1 | >CSHL__2_FC042NGABCD:8:1:120:202 2 | GCAGAAAACGGCATACTAGCTCTTCCGATCTATCGT 3 | >CSHL__2_FC042NGABCD:8:1:103:1185 4 | GAAGACGGTAAACGAGCTCTGCCGATCTATCGTGAT 5 | -------------------------------------------------------------------------------- /galaxy/test-data/fastx_reverse_complement2.out: -------------------------------------------------------------------------------- 1 | @CSHL__2_FC042NGABCD:8:1:120:202 2 | GCAGAAAACGGCATACTAGCTCTTCCGATCTATCGT 3 | +CSHL__2_FC042NGABCD:8:1:120:202 4 | 8 10 21 -1 10 11 -1 7 3 1 8 40 27 14 40 30 -1 40 20 40 25 40 40 28 40 40 6 40 40 40 40 20 40 40 40 40 5 | @CSHL__2_FC042NGABCD:8:1:103:1185 6 | GAAGACGGTAAACGAGCTCTGCCGATCTATCGTGAT 7 | +CSHL__2_FC042NGABCD:8:1:103:1185 8 | 2 30 25 4 2 8 0 10 3 23 12 22 34 15 36 8 14 17 22 9 0 40 22 30 32 40 40 40 31 33 35 40 40 40 40 40 9 | -------------------------------------------------------------------------------- /galaxy/test-data/fastx_seqid_uncollapse1.out: -------------------------------------------------------------------------------- 1 | 21 0 0 0 0 0 0 0 - 148-5 22 021 GALNT15 22 1 22 1 21, 1, 1 2 | 21 0 0 0 0 0 0 0 - 148-5 22 021 GALNT15 22 1 22 1 21, 1, 1 3 | 21 0 0 0 0 0 0 0 - 148-5 22 021 GALNT15 22 1 22 1 21, 1, 1 4 | 21 0 0 0 0 0 0 0 - 148-5 22 021 GALNT15 22 1 22 1 21, 1, 1 5 | 21 0 0 0 0 0 0 0 - 148-5 22 021 GALNT15 22 1 22 1 21, 1, 1 6 | 21 1 0 0 0 0 0 0 - 190-2 22 022 ADDA2 22 0 22 1 22, 0, 0 7 | 21 1 0 0 0 0 0 0 - 190-2 22 022 ADDA2 22 0 22 1 22, 0, 0 8 | 21 0 0 0 0 0 0 0 - 253-1 22 021 FORRIL2 22 1 22 1 21, 1, 1 9 | 21 1 0 0 0 0 0 0 - 396-1 22 022 JMK446 22 0 22 1 22, 0, 0 10 | 21 1 0 0 0 0 0 0 - 463-7 22 022 OPTQTL5 22 0 22 1 22, 0, 0 11 | 21 1 0 0 0 0 0 0 - 463-7 22 022 OPTQTL5 22 0 22 1 22, 0, 0 12 | 21 1 0 0 0 0 0 0 - 463-7 22 022 OPTQTL5 22 0 22 1 22, 0, 0 13 | 21 1 0 0 0 0 0 0 - 463-7 22 022 OPTQTL5 22 0 22 1 22, 0, 0 14 | 21 1 0 0 0 0 0 0 - 463-7 22 022 OPTQTL5 22 0 22 1 22, 0, 0 15 | 21 1 0 0 0 0 0 0 - 463-7 22 022 OPTQTL5 22 0 22 1 22, 0, 0 16 | 21 1 0 0 0 0 0 0 - 463-7 22 022 OPTQTL5 22 0 22 1 22, 0, 0 17 | -------------------------------------------------------------------------------- /galaxy/test-data/fastx_seqid_uncollapse1.psl: -------------------------------------------------------------------------------- 1 | 21 0 0 0 0 0 0 0 - 148-5 22 021 GALNT15 22 1 22 1 21, 1, 1 2 | 21 1 0 0 0 0 0 0 - 190-2 22 022 ADDA2 22 0 22 1 22, 0, 0 3 | 21 0 0 0 0 0 0 0 - 253-1 22 021 FORRIL2 22 1 22 1 21, 1, 1 4 | 21 1 0 0 0 0 0 0 - 396-1 22 022 JMK446 22 0 22 1 22, 0, 0 5 | 21 1 0 0 0 0 0 0 - 463-7 22 022 OPTQTL5 22 0 22 1 22, 0, 0 6 | -------------------------------------------------------------------------------- /galaxy/test-data/fastx_trimmer1.fasta: -------------------------------------------------------------------------------- 1 | >CSHL__2_FC042NGABCD:8:1:120:202 2 | ACGATAGATCGGAAGAGCTAGTATGCCGTTTTCTGC 3 | >CSHL__2_FC042NGABCD:8:1:103:1185 4 | ATCACGATAGATCGGCAGAGCTCGTTTACCGTCTTC 5 | -------------------------------------------------------------------------------- /galaxy/test-data/fastx_trimmer1.out: -------------------------------------------------------------------------------- 1 | >CSHL__2_FC042NGABCD:8:1:120:202 2 | TAGATCGGAAGAGCTAGTATGCCGTTTTCTGC 3 | >CSHL__2_FC042NGABCD:8:1:103:1185 4 | CGATAGATCGGCAGAGCTCGTTTACCGTCTTC 5 | -------------------------------------------------------------------------------- /galaxy/test-data/fastx_trimmer2.fastq: -------------------------------------------------------------------------------- 1 | @CSHL__2_FC042NGABCD:8:1:120:202 2 | ACGATAGATCGGAAGAGCTAGTATGCCGTTTTCTGC 3 | +CSHL__2_FC042NGABCD:8:1:120:202 4 | 40 40 40 40 20 40 40 40 40 6 40 40 28 40 40 25 40 20 40 -1 30 40 14 27 40 8 1 3 7 -1 11 10 -1 21 10 8 5 | @CSHL__2_FC042NGABCD:8:1:103:1185 6 | ATCACGATAGATCGGCAGAGCTCGTTTACCGTCTTC 7 | +CSHL__2_FC042NGABCD:8:1:103:1185 8 | 40 40 40 40 40 35 33 31 40 40 40 32 30 22 40 -0 9 22 17 14 8 36 15 34 22 12 23 3 10 -0 8 2 4 25 30 2 9 | -------------------------------------------------------------------------------- /galaxy/test-data/fastx_trimmer2.out: -------------------------------------------------------------------------------- 1 | @CSHL__2_FC042NGABCD:8:1:120:202 2 | ACGATAGATCGGAAGAGCTAGTATGCC 3 | +CSHL__2_FC042NGABCD:8:1:120:202 4 | 40 40 40 40 20 40 40 40 40 6 40 40 28 40 40 25 40 20 40 -1 30 40 14 27 40 8 1 5 | @CSHL__2_FC042NGABCD:8:1:103:1185 6 | ATCACGATAGATCGGCAGAGCTCGTTT 7 | +CSHL__2_FC042NGABCD:8:1:103:1185 8 | 40 40 40 40 40 35 33 31 40 40 40 32 30 22 40 0 9 22 17 14 8 36 15 34 22 12 23 9 | -------------------------------------------------------------------------------- /galaxy/test-data/fastx_trimmer_from_end1.fasta: -------------------------------------------------------------------------------- 1 | >67-3461 2 | TGAGATACATTTGAACATTGAT 3 | >68-3451 4 | ACATAGCCTATAAGAA 5 | >69-3397 6 | TAGGGAAGTAACACACTGGATT 7 | >70-3391 8 | TGAGATCACAGTTTCCACACATTTGGAGGCAACAT 9 | >71-3291 10 | TCACAGGATTAGAGTCTGAGCTAT 11 | >72-3193 12 | TAGAAATTCAAGCTTTGATGTTTTT 13 | -------------------------------------------------------------------------------- /galaxy/test-data/fastx_trimmer_from_end1.out: -------------------------------------------------------------------------------- 1 | >67-3461 2 | TGAGATACATTTGAACATTG 3 | >69-3397 4 | TAGGGAAGTAACACACTGGA 5 | >70-3391 6 | TGAGATCACAGTTTCCACACATTTGGAGGCAAC 7 | >71-3291 8 | TCACAGGATTAGAGTCTGAGCT 9 | >72-3193 10 | TAGAAATTCAAGCTTTGATGTTT 11 | -------------------------------------------------------------------------------- /galaxy/tool-data/Makefile.am: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2008-2013 Assaf Gordon 2 | # 3 | # This file is free software; as a special exception the author gives 4 | # unlimited permission to copy and/or distribute it, with or without 5 | # modifications, as long as this notice is preserved. 6 | # 7 | # This program is distributed in the hope that it will be useful, but 8 | # WITHOUT ANY WARRANTY, to the extent permitted by law; without even the 9 | # implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 10 | 11 | EXTRA_DIST = fastx_clipper_sequences.txt 12 | -------------------------------------------------------------------------------- /galaxy/tool-data/fastx_clipper_sequences.txt: -------------------------------------------------------------------------------- 1 | # 2 | # Adapter/Linker sequences for FASTX-Clipper tool. 3 | # 4 | # Format: 5 | # Adapter Sequence Descriptive name 6 | # 7 | # Example: 8 | # AAATTTGATAAGATA Our-Adapter 9 | # 10 | # Some adapters can be found here: 11 | # http://seqanswers.com/forums/showthread.php?t=198 12 | 13 | TGTAGGCC Dummy-Adapter (don't use me) 14 | -------------------------------------------------------------------------------- /galaxy/tools/Makefile.am: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2008-2013 Assaf Gordon 2 | # 3 | # This file is free software; as a special exception the author gives 4 | # unlimited permission to copy and/or distribute it, with or without 5 | # modifications, as long as this notice is preserved. 6 | # 7 | # This program is distributed in the hope that it will be useful, but 8 | # WITHOUT ANY WARRANTY, to the extent permitted by law; without even the 9 | # implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 10 | 11 | SUBDIRS = fastx_toolkit 12 | -------------------------------------------------------------------------------- /galaxy/tools/fastx_toolkit/Makefile.am: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2008-2013 Assaf Gordon 2 | # 3 | # This file is free software; as a special exception the author gives 4 | # unlimited permission to copy and/or distribute it, with or without 5 | # modifications, as long as this notice is preserved. 6 | # 7 | # This program is distributed in the hope that it will be useful, but 8 | # WITHOUT ANY WARRANTY, to the extent permitted by law; without even the 9 | # implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 10 | 11 | EXTRA_DIST = fastq_quality_converter.xml \ 12 | fastq_quality_filter.xml \ 13 | fastx_quality_statistics.xml \ 14 | fastq_to_fasta.xml \ 15 | fastx_artifacts_filter.xml \ 16 | fastx_clipper.xml \ 17 | fastx_reverse_complement.xml \ 18 | fastx_trimmer.xml \ 19 | fastx_renamer.xml \ 20 | fastx_barcode_splitter.xml fastx_barcode_splitter_galaxy_wrapper.sh \ 21 | fastx_nucleotides_distribution.xml \ 22 | fastq_quality_boxplot.xml \ 23 | fasta_clipping_histogram.xml \ 24 | fastx_collapser.xml \ 25 | fasta_formatter.xml \ 26 | fasta_nucleotide_changer.xml \ 27 | fastq_quality_trimmer.xml \ 28 | fastx_trimmer_from_end.xml \ 29 | fastx_uncollapser.xml \ 30 | seqid_uncollapser.xml \ 31 | fastx_nucleotides_distribution_line.xml \ 32 | fastq_masker.xml 33 | 34 | -------------------------------------------------------------------------------- /galaxy/tools/fastx_toolkit/fasta_clipping_histogram.xml: -------------------------------------------------------------------------------- 1 | 2 | chart 3 | fasta_clipping_histogram.pl $input $outfile 4 | 5 | 6 | 7 | 8 | 9 | 10 | 12 | 13 | 14 | 15 | **What it does** 16 | 17 | This tool creates a histogram image of sequence lengths distribution in a given fasta dataset file. 18 | 19 | **TIP:** Use this tool after clipping your library (with **FASTX Clipper tool**), to visualize the clipping results. 20 | 21 | ----- 22 | 23 | **Output Examples** 24 | 25 | In the following library, most sequences are 24-mers to 27-mers. 26 | This could indicate an abundance of endo-siRNAs (depending of course of what you've tried to sequence in the first place). 27 | 28 | .. image:: ./static/fastx_icons/fasta_clipping_histogram_1.png 29 | 30 | 31 | In the following library, most sequences are 19,22 or 23-mers. 32 | This could indicate an abundance of miRNAs (depending of course of what you've tried to sequence in the first place). 33 | 34 | .. image:: ./static/fastx_icons/fasta_clipping_histogram_2.png 35 | 36 | 37 | ----- 38 | 39 | 40 | **Input Formats** 41 | 42 | This tool accepts short-reads FASTA files. The reads don't have to be short, but they do have to be on a single line, like so:: 43 | 44 | >sequence1 45 | AGTAGTAGGTGATGTAGAGAGAGAGAGAGTAG 46 | >sequence2 47 | GTGTGTGTGGGAAGTTGACACAGTA 48 | >sequence3 49 | CCTTGAGATTAACGCTAATCAAGTAAAC 50 | 51 | 52 | If the sequences span over multiple lines:: 53 | 54 | >sequence1 55 | CAGCATCTACATAATATGATCGCTATTAAACTTAAATCTCCTTGACGGAG 56 | TCTTCGGTCATAACACAAACCCAGACCTACGTATATGACAAAGCTAATAG 57 | aactggtctttacctTTAAGTTG 58 | 59 | Use the **FASTA Width Formatter** tool to re-format the FASTA into a single-lined sequences:: 60 | 61 | >sequence1 62 | CAGCATCTACATAATATGATCGCTATTAAACTTAAATCTCCTTGACGGAGTCTTCGGTCATAACACAAACCCAGACCTACGTATATGACAAAGCTAATAGaactggtctttacctTTAAGTTG 63 | 64 | 65 | ----- 66 | 67 | 68 | 69 | **Multiplicity counts (a.k.a reads-count)** 70 | 71 | If the sequence identifier (the text after the '>') contains a dash and a number, it is treated as a multiplicity count value (i.e. how many times that individual sequence repeated in the original FASTA file, before collapsing). 72 | 73 | Example 1 - The following FASTA file *does not* have multiplicity counts:: 74 | 75 | >seq1 76 | GGATCC 77 | >seq2 78 | GGTCATGGGTTTAAA 79 | >seq3 80 | GGGATATATCCCCACACACACACAC 81 | 82 | Each sequence is counts as one, to produce the following chart: 83 | 84 | .. image:: ./static/fastx_icons/fasta_clipping_histogram_3.png 85 | 86 | 87 | Example 2 - The following FASTA file have multiplicity counts:: 88 | 89 | >seq1-2 90 | GGATCC 91 | >seq2-10 92 | GGTCATGGGTTTAAA 93 | >seq3-3 94 | GGGATATATCCCCACACACACACAC 95 | 96 | The first sequence counts as 2, the second as 10, the third as 3, to produce the following chart: 97 | 98 | .. image:: ./static/fastx_icons/fasta_clipping_histogram_4.png 99 | 100 | Use the **FASTA Collapser** tool to create FASTA files with multiplicity counts. 101 | 102 | 103 | 104 | 105 | -------------------------------------------------------------------------------- /galaxy/tools/fastx_toolkit/fasta_formatter.xml: -------------------------------------------------------------------------------- 1 | 2 | formatter 3 | 12 | 13 | cat '$input' | 14 | fasta_formatter -w $width -o '$output' 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 39 | 40 | 41 | 42 | **What it does** 43 | 44 | This tool re-formats a FASTA file, changing the width of the nucleotides lines. 45 | 46 | **TIP:** Outputting a single line (with **width = 0**) can be useful for scripting (with **grep**, **awk**, and **perl**). Every odd line is a sequence identifier, and every even line is a nucleotides line. 47 | 48 | -------- 49 | 50 | **Example** 51 | 52 | Input FASTA file (each nucleotides line is 50 characters long):: 53 | 54 | >Scaffold3648 55 | AGGAATGATGACTACAATGATCAACTTAACCTATCTATTTAATTTAGTTC 56 | CCTAATGTCAGGGACCTACCTGTTTTTGTTATGTTTGGGTTTTGTTGTTG 57 | TTGTTTTTTTAATCTGAAGGTATTGTGCATTATATGACCTGTAATACACA 58 | ATTAAAGTCAATTTTAATGAACATGTAGTAAAAACT 59 | >Scaffold9299 60 | CAGCATCTACATAATATGATCGCTATTAAACTTAAATCTCCTTGACGGAG 61 | TCTTCGGTCATAACACAAACCCAGACCTACGTATATGACAAAGCTAATAG 62 | aactggtctttacctTTAAGTTG 63 | 64 | 65 | Output FASTA file (with width=80):: 66 | 67 | >Scaffold3648 68 | AGGAATGATGACTACAATGATCAACTTAACCTATCTATTTAATTTAGTTCCCTAATGTCAGGGACCTACCTGTTTTTGTT 69 | ATGTTTGGGTTTTGTTGTTGTTGTTTTTTTAATCTGAAGGTATTGTGCATTATATGACCTGTAATACACAATTAAAGTCA 70 | ATTTTAATGAACATGTAGTAAAAACT 71 | >Scaffold9299 72 | CAGCATCTACATAATATGATCGCTATTAAACTTAAATCTCCTTGACGGAGTCTTCGGTCATAACACAAACCCAGACCTAC 73 | GTATATGACAAAGCTAATAGaactggtctttacctTTAAGTTG 74 | 75 | Output FASTA file (with width=0 => single line):: 76 | 77 | >Scaffold3648 78 | AGGAATGATGACTACAATGATCAACTTAACCTATCTATTTAATTTAGTTCCCTAATGTCAGGGACCTACCTGTTTTTGTTATGTTTGGGTTTTGTTGTTGTTGTTTTTTTAATCTGAAGGTATTGTGCATTATATGACCTGTAATACACAATTAAAGTCAATTTTAATGAACATGTAGTAAAAACT 79 | >Scaffold9299 80 | CAGCATCTACATAATATGATCGCTATTAAACTTAAATCTCCTTGACGGAGTCTTCGGTCATAACACAAACCCAGACCTACGTATATGACAAAGCTAATAGaactggtctttacctTTAAGTTG 81 | 82 | 83 | ------ 84 | 85 | This tool is based on `FASTX-toolkit`__ by Assaf Gordon. 86 | 87 | .. __: http://hannonlab.cshl.edu/fastx_toolkit/ 88 | 89 | 90 | 91 | -------------------------------------------------------------------------------- /galaxy/tools/fastx_toolkit/fasta_nucleotide_changer.xml: -------------------------------------------------------------------------------- 1 | 2 | converter 3 | 4 | cat '$input' | 5 | fasta_nucleotide_changer $mode -v -o '$output' 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 34 | 35 | 36 | 37 | **What it does** 38 | 39 | This tool converts RNA FASTA files to DNA (and vice-versa). 40 | 41 | In **RNA-to-DNA** mode, U's are changed into T's. 42 | 43 | In **DNA-to-RNA** mode, T's are changed into U's. 44 | 45 | -------- 46 | 47 | **Example** 48 | 49 | Input RNA FASTA file ( from Sanger's mirBase ):: 50 | 51 | >cel-let-7 MIMAT0000001 Caenorhabditis elegans let-7 52 | UGAGGUAGUAGGUUGUAUAGUU 53 | >cel-lin-4 MIMAT0000002 Caenorhabditis elegans lin-4 54 | UCCCUGAGACCUCAAGUGUGA 55 | >cel-miR-1 MIMAT0000003 Caenorhabditis elegans miR-1 56 | UGGAAUGUAAAGAAGUAUGUA 57 | 58 | Output DNA FASTA file (with RNA-to-DNA mode):: 59 | 60 | >cel-let-7 MIMAT0000001 Caenorhabditis elegans let-7 61 | TGAGGTAGTAGGTTGTATAGTT 62 | >cel-lin-4 MIMAT0000002 Caenorhabditis elegans lin-4 63 | TCCCTGAGACCTCAAGTGTGA 64 | >cel-miR-1 MIMAT0000003 Caenorhabditis elegans miR-1 65 | TGGAATGTAAAGAAGTATGTA 66 | 67 | ------ 68 | 69 | This tool is based on `FASTX-toolkit`__ by Assaf Gordon. 70 | 71 | .. __: http://hannonlab.cshl.edu/fastx_toolkit/ 72 | 73 | 74 | 75 | -------------------------------------------------------------------------------- /galaxy/tools/fastx_toolkit/fastq_masker.xml: -------------------------------------------------------------------------------- 1 | 2 | (based on quality) 3 | 4 | cat '$input' | 5 | fastq_masker 6 | #if $input.ext == "fastqsanger": 7 | -Q 33 8 | #elif $input.ext == "fastq": 9 | -Q 64 10 | #end if 11 | -v -q $cutoff -r '$maskchar' -o '$output' 12 | 13 | 14 | 15 | 16 | 17 | Nucleotides below this quality will be masked 18 | 19 | 20 | 21 | 22 | Replace low-quality nucleotides with this character. Common values: 'N' or '.' 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 38 | 39 | 40 | **What it does** 41 | 42 | This tool masks low-quality nucleotides in a FASTQ file, and replaces them with the specifed mask character (**N** by default). 43 | 44 | -------- 45 | 46 | **Example** 47 | 48 | Input FASTQ file:: 49 | 50 | @1 51 | TATGGTCAGAAACCATATGC 52 | +1 53 | 40 40 40 40 40 40 40 40 40 40 40 20 19 19 19 19 19 19 19 19 54 | @2 55 | CAGCGAGGCTTTAATGCCAT 56 | +2 57 | 40 40 40 40 40 40 40 40 30 20 19 20 19 19 19 19 19 19 19 19 58 | @3 59 | CAGCGAGGCTTTAATGCCAT 60 | +3 61 | 40 40 40 40 40 40 40 40 20 19 19 19 19 19 19 19 19 19 19 19 62 | 63 | After Masking nucleotides with quality lower than 20 with the character **N**:: 64 | 65 | @1 66 | TATGGTCAGAAANNNNNNNN 67 | +1 68 | 40 40 40 40 40 40 40 40 40 40 40 20 19 19 19 19 19 19 19 19 69 | @2 70 | CAGCGAGGCTNTNNNNNNNN 71 | +2 72 | 40 40 40 40 40 40 40 40 30 20 19 20 19 19 19 19 19 19 19 19 73 | @3 74 | CAGCGAGGCNNNNNNNNNNN 75 | +3 76 | 40 40 40 40 40 40 40 40 20 19 19 19 19 19 19 19 19 19 19 19 77 | 78 | 79 | ------ 80 | 81 | This tool is based on `FASTX-toolkit`__ by Assaf Gordon. 82 | 83 | .. __: http://hannonlab.cshl.edu/fastx_toolkit/ 84 | 85 | 86 | 87 | 88 | -------------------------------------------------------------------------------- /galaxy/tools/fastx_toolkit/fastq_quality_boxplot.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | fastq_quality_boxplot_graph.sh -t '$input.name' -i $input -o '$output' 5 | 6 | 7 | 8 | 9 | 10 | 11 | 13 | 14 | 15 | 16 | **What it does** 17 | 18 | Creates a boxplot graph for the quality scores in the library. 19 | 20 | .. class:: infomark 21 | 22 | **TIP:** Use the **FASTQ Statistics** tool to generate the report file needed for this tool. 23 | 24 | ----- 25 | 26 | **Output Examples** 27 | 28 | * Black horizontal lines are medians 29 | * Rectangular red boxes show the Inter-quartile Range (IQR) (top value is Q3, bottom value is Q1) 30 | * Whiskers show outlier at max. 1.5*IQR 31 | 32 | 33 | An excellent quality library (median quality is 40 for almost all 36 cycles): 34 | 35 | .. image:: ../static/fastx_icons/fastq_quality_boxplot_1.png 36 | 37 | 38 | A relatively good quality library (median quality degrades towards later cycles): 39 | 40 | .. image:: ../static/fastx_icons/fastq_quality_boxplot_2.png 41 | 42 | A low quality library (median drops quickly): 43 | 44 | .. image:: ../static/fastx_icons/fastq_quality_boxplot_3.png 45 | 46 | ------ 47 | 48 | This tool is based on `FASTX-toolkit`__ by Assaf Gordon. 49 | 50 | .. __: http://hannonlab.cshl.edu/fastx_toolkit/ 51 | 52 | 53 | 54 | 55 | -------------------------------------------------------------------------------- /galaxy/tools/fastx_toolkit/fastq_quality_converter.xml: -------------------------------------------------------------------------------- 1 | 2 | (ASCII-Numeric) 3 | 4 | cat '$input' | 5 | fastq_quality_converter $QUAL_FORMAT -o '$output' -Q $offset 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 54 | 55 | 56 | 57 | **What it does** 58 | 59 | Converts a Solexa FASTQ file to/from numeric or ASCII quality format. 60 | 61 | .. class:: warningmark 62 | 63 | Re-scaling is **not** performed. (e.g. conversion from Phred scale to Solexa scale). 64 | 65 | 66 | ----- 67 | 68 | FASTQ with Numeric quality scores:: 69 | 70 | @CSHL__2_FC042AGWWWXX:8:1:120:202 71 | ACGATAGATCGGAAGAGCTAGTATGCCGTTTTCTGC 72 | +CSHL__2_FC042AGWWWXX:8:1:120:202 73 | 40 40 40 40 20 40 40 40 40 6 40 40 28 40 40 25 40 20 40 -1 30 40 14 27 40 8 1 3 7 -1 11 10 -1 21 10 8 74 | @CSHL__2_FC042AGWWWXX:8:1:103:1185 75 | ATCACGATAGATCGGCAGAGCTCGTTTACCGTCTTC 76 | +CSHL__2_FC042AGWWWXX:8:1:103:1185 77 | 40 40 40 40 40 35 33 31 40 40 40 32 30 22 40 -0 9 22 17 14 8 36 15 34 22 12 23 3 10 -0 8 2 4 25 30 2 78 | 79 | 80 | FASTQ with ASCII quality scores:: 81 | 82 | @CSHL__2_FC042AGWWWXX:8:1:120:202 83 | ACGATAGATCGGAAGAGCTAGTATGCCGTTTTCTGC 84 | +CSHL__2_FC042AGWWWXX:8:1:120:202 85 | hhhhThhhhFhh\hhYhTh?^hN[hHACG?KJ?UJH 86 | @CSHL__2_FC042AGWWWXX:8:1:103:1185 87 | ATCACGATAGATCGGCAGAGCTCGTTTACCGTCTTC 88 | +CSHL__2_FC042AGWWWXX:8:1:103:1185 89 | hhhhhca_hhh`^Vh@IVQNHdObVLWCJ@HBDY^B 90 | 91 | ------ 92 | 93 | This tool is based on `FASTX-toolkit`__ by Assaf Gordon. 94 | 95 | .. __: http://hannonlab.cshl.edu/fastx_toolkit/ 96 | 97 | 98 | 99 | 100 | -------------------------------------------------------------------------------- /galaxy/tools/fastx_toolkit/fastq_quality_filter.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | cat '$input' | 6 | fastq_quality_filter 7 | #if $input.ext == "fastqsanger": 8 | -Q 33 9 | #elif $input.ext == "fastq": 10 | -Q 64 11 | #end if 12 | -q $quality -p $percent -v -o '$output' 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 46 | 47 | 48 | 49 | **What it does** 50 | 51 | This tool filters reads based on quality scores. 52 | 53 | .. class:: infomark 54 | 55 | Using **percent = 100** requires all cycles of all reads to be at least the quality cut-off value. 56 | 57 | .. class:: infomark 58 | 59 | Using **percent = 50** requires the median quality of the cycles (in each read) to be at least the quality cut-off value. 60 | 61 | -------- 62 | 63 | Quality score distribution (of all cycles) is calculated for each read. If it is lower than the quality cut-off value - the read is discarded. 64 | 65 | 66 | **Example**:: 67 | 68 | @CSHL_4_FC042AGOOII:1:2:214:584 69 | GACAATAAAC 70 | +CSHL_4_FC042AGOOII:1:2:214:584 71 | 30 30 30 30 30 30 30 30 20 10 72 | 73 | Using **percent = 50** and **cut-off = 30** - This read will not be discarded (the median quality is higher than 30). 74 | 75 | Using **percent = 90** and **cut-off = 30** - This read will be discarded (90% of the cycles do no have quality equal to / higher than 30). 76 | 77 | Using **percent = 100** and **cut-off = 20** - This read will be discarded (not all cycles have quality equal to / higher than 20). 78 | 79 | ------ 80 | 81 | This tool is based on `FASTX-toolkit`__ by Assaf Gordon. 82 | 83 | .. __: http://hannonlab.cshl.edu/fastx_toolkit/ 84 | 85 | 86 | 87 | -------------------------------------------------------------------------------- /galaxy/tools/fastx_toolkit/fastq_quality_trimmer.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | cat '$input' | 5 | fastq_quality_trimmer 6 | #if $input.ext == "fastqsanger": 7 | -Q 33 8 | #elif $input.ext == "fastq": 9 | -Q 64 10 | #end if 11 | -v -t $cutoff -l $minlen -o '$output' 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | Nucleotides below this quality will be trimmed 20 | 21 | 22 | 23 | 24 | Sequences shorter than this length will be discard. Leave at zero to keep all sequences 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 40 | 41 | 42 | **What it does** 43 | 44 | This tool scans the sequence from the end for the first nucleotide to possess the specified minimum quality score. It will then trim (remove nucleotides from) the sequence after this position. After trimming, sequences that are shorter than the minimum length are discarded. 45 | 46 | -------- 47 | 48 | **Example** 49 | 50 | Input Fasta file (with 20 bases in each sequences):: 51 | 52 | @1 53 | TATGGTCAGAAACCATATGC 54 | +1 55 | 40 40 40 40 40 40 40 40 40 40 40 20 19 19 19 19 19 19 19 19 56 | @2 57 | CAGCGAGGCTTTAATGCCAT 58 | +2 59 | 40 40 40 40 40 40 40 40 30 20 19 20 19 19 19 19 19 19 19 19 60 | @3 61 | CAGCGAGGCTTTAATGCCAT 62 | +3 63 | 40 40 40 40 40 40 40 40 20 19 19 19 19 19 19 19 19 19 19 19 64 | 65 | 66 | Trimming with a cutoff of 20, we get the following FASTQ file:: 67 | 68 | @1 69 | TATGGTCAGAAA 70 | +1 71 | 40 40 40 40 40 40 40 40 40 40 40 20 72 | @2 73 | CAGCGAGGCTTT 74 | +2 75 | 40 40 40 40 40 40 40 40 30 20 19 20 76 | @3 77 | CAGCGAGGC 78 | +3 79 | 40 40 40 40 40 40 40 40 20 80 | 81 | Trimming with a cutoff of 20 and a minimum length of 12, we get the following FASTQ file:: 82 | 83 | @1 84 | TATGGTCAGAAA 85 | +1 86 | 40 40 40 40 40 40 40 40 40 40 40 20 87 | @2 88 | CAGCGAGGCTTT 89 | +2 90 | 40 40 40 40 40 40 40 40 30 20 19 20 91 | 92 | ------ 93 | 94 | This tool is based on `FASTX-toolkit`__ by Assaf Gordon. 95 | 96 | .. __: http://hannonlab.cshl.edu/fastx_toolkit/ 97 | 98 | 99 | 100 | 101 | -------------------------------------------------------------------------------- /galaxy/tools/fastx_toolkit/fastq_to_fasta.xml: -------------------------------------------------------------------------------- 1 | 2 | converter 3 | 4 | cat '$input' | 5 | fastq_to_fasta 6 | #if $input.ext == "fastqsanger": 7 | -Q 33 8 | #elif $input.ext == "fastq": 9 | -Q 64 10 | #end if 11 | $SKIPN $RENAMESEQ -o '$output' -v 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 48 | 49 | 50 | 51 | 52 | **What it does** 53 | 54 | This tool converts data from Solexa format to FASTA format (scroll down for format description). 55 | 56 | -------- 57 | 58 | **Example** 59 | 60 | The following data in Solexa-FASTQ format:: 61 | 62 | @CSHL_4_FC042GAMMII_2_1_517_596 63 | GGTCAATGATGAGTTGGCACTGTAGGCACCATCAAT 64 | +CSHL_4_FC042GAMMII_2_1_517_596 65 | 40 40 40 40 40 40 40 40 40 40 38 40 40 40 40 40 14 40 40 40 40 40 36 40 13 14 24 24 9 24 9 40 10 10 15 40 66 | 67 | Will be converted to FASTA (with 'rename sequence names' = NO):: 68 | 69 | >CSHL_4_FC042GAMMII_2_1_517_596 70 | GGTCAATGATGAGTTGGCACTGTAGGCACCATCAAT 71 | 72 | Will be converted to FASTA (with 'rename sequence names' = YES):: 73 | 74 | >1 75 | GGTCAATGATGAGTTGGCACTGTAGGCACCATCAAT 76 | 77 | ------ 78 | 79 | This tool is based on `FASTX-toolkit`__ by Assaf Gordon. 80 | 81 | .. __: http://hannonlab.cshl.edu/fastx_toolkit/ 82 | 83 | 84 | 85 | -------------------------------------------------------------------------------- /galaxy/tools/fastx_toolkit/fastx_artifacts_filter.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | cat '$input' | 5 | fastx_artifacts_filter 6 | #if $input.ext == "fastqsanger": 7 | -Q 33 8 | #elif $input.ext == "fastq": 9 | -Q 64 10 | #end if 11 | -v -o '$output' 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 35 | 36 | 37 | **What it does** 38 | 39 | This tool filters sequencing artifacts (reads with all but 3 identical bases). 40 | 41 | -------- 42 | 43 | **The following is an example of sequences which will be filtered out**:: 44 | 45 | AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA 46 | AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA 47 | AAAAAAAAAAAAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA 48 | AAAAAAAAAAAAAAAAAAACACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA 49 | AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA 50 | AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA 51 | AAAAAAAAAAAAAAAAAAACACAAAAAAAAAAAAAAAAAAAAAAAAAAAAACACAAAAAA 52 | AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA 53 | AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA 54 | AAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA 55 | AAAAAACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA 56 | AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA 57 | CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC 58 | AAAAACACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA 59 | AAAAAAAAAAAAAAAAAAAAAAAAACACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA 60 | AAAAAAAAAAAAAAAAAAACACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAA 61 | AAAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA 62 | AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAA 63 | AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA 64 | AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAA 65 | AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACA 66 | AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAA 67 | AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAA 68 | AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAA 69 | AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAA 70 | AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAA 71 | AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAA 72 | AAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA 73 | AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAA 74 | AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA 75 | AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAA 76 | AAAAAAAAAAAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA 77 | AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAAAA 78 | AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA 79 | AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAAAAAA 80 | AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAA 81 | AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAAAAA 82 | AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA 83 | AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA 84 | AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAAAA 85 | AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAAAAAAA 86 | AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAAA 87 | 88 | ------ 89 | 90 | This tool is based on `FASTX-toolkit`__ by Assaf Gordon. 91 | 92 | .. __: http://hannonlab.cshl.edu/fastx_toolkit/ 93 | 94 | 95 | 96 | -------------------------------------------------------------------------------- /galaxy/tools/fastx_toolkit/fastx_barcode_splitter.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | fastx_barcode_splitter_galaxy_wrapper.sh $BARCODE $input "$input.name" "$output.files_path" --mismatches $mismatches --partial $partial $EOL > $output 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 35 | 36 | 37 | 38 | **What it does** 39 | 40 | This tool splits a FASTQ or FASTA file into several files, using barcodes as the split criteria. 41 | 42 | -------- 43 | 44 | **Barcode file Format** 45 | 46 | Barcode files are simple text files. 47 | Each line should contain an identifier (descriptive name for the barcode), and the barcode itself (A/C/G/T), separated by a TAB character. 48 | Example:: 49 | 50 | #This line is a comment (starts with a 'number' sign) 51 | BC1 GATCT 52 | BC2 ATCGT 53 | BC3 GTGAT 54 | BC4 TGTCT 55 | 56 | For each barcode, a new FASTQ file will be created (with the barcode's identifier as part of the file name). 57 | Sequences matching the barcode will be stored in the appropriate file. 58 | 59 | One additional FASTQ file will be created (the 'unmatched' file), where sequences not matching any barcode will be stored. 60 | 61 | The output of this tool is an HTML file, displaying the split counts and the file locations. 62 | 63 | **Output Example** 64 | 65 | .. image:: ./static/fastx_icons/barcode_splitter_output_example.png 66 | 67 | ------ 68 | 69 | This tool is based on `FASTX-toolkit`__ by Assaf Gordon. 70 | 71 | .. __: http://hannonlab.cshl.edu/fastx_toolkit/ 72 | 73 | 74 | 75 | 76 | -------------------------------------------------------------------------------- /galaxy/tools/fastx_toolkit/fastx_barcode_splitter_galaxy_wrapper.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # FASTX-toolkit - FASTA/FASTQ preprocessing tools. 4 | # Copyright (C) 2009-2013 A. Gordon (assafgordon@gmail.com) 5 | # 6 | # This program is free software: you can redistribute it and/or modify 7 | # it under the terms of the GNU Affero General Public License as 8 | # published by the Free Software Foundation, either version 3 of the 9 | # License, or (at your option) any later version. 10 | # 11 | # This program is distributed in the hope that it will be useful, 12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | # GNU Affero General Public License for more details. 15 | # 16 | # You should have received a copy of the GNU Affero General Public License 17 | # along with this program. If not, see . 18 | 19 | # 20 | #This is a shell script wrapper for 'fastx_barcode_splitter.pl' 21 | # 22 | # 1. Output files are saved at the dataset's files_path directory. 23 | # 24 | # 2. 'fastx_barcode_splitter.pl' outputs a textual table. 25 | # This script turns it into pretty HTML with working URL 26 | # (so lazy users can just click on the URLs and get their files) 27 | 28 | if [ "$1x" = "x" ]; then 29 | echo "Usage: $0 [BARCODE FILE] [FASTQ FILE] [LIBRARY_NAME] [OUTPUT_PATH]" >&2 30 | exit 1 31 | fi 32 | 33 | BARCODE_FILE="$1" 34 | FASTQ_FILE="$2" 35 | LIBNAME="$3" 36 | OUTPUT_PATH="$4" 37 | shift 4 38 | # The rest of the parameters are passed to the split program 39 | 40 | if [ "${OUTPUT_PATH}x" = "x" ]; then 41 | echo "Usage: $0 [BARCODE FILE] [FASTQ FILE] [LIBRARY_NAME] [OUTPUT_PATH]" >&2 42 | exit 1 43 | fi 44 | 45 | #Sanitize library name, make sure we can create a file with this name 46 | LIBNAME=${LIBNAME%.gz} 47 | LIBNAME=${LIBNAME%.txt} 48 | LIBNAME=$(echo "$LIBNAME" | tr -cd '[:alnum:]') 49 | 50 | if [ ! -r "$FASTQ_FILE" ]; then 51 | echo "Error: Input file ($FASTQ_FILE) not found!" >&2 52 | exit 1 53 | fi 54 | if [ ! -r "$BARCODE_FILE" ]; then 55 | echo "Error: barcode file ($BARCODE_FILE) not found!" >&2 56 | exit 1 57 | fi 58 | mkdir -p "$OUTPUT_PATH" 59 | if [ ! -d "$OUTPUT_PATH" ]; then 60 | echo "Error: failed to create output path '$OUTPUT_PATH'" >&2 61 | exit 1 62 | fi 63 | 64 | PUBLICURL="" 65 | BASEPATH="$OUTPUT_PATH/" 66 | #PREFIX="$BASEPATH"`date "+%Y-%m-%d_%H%M__"`"${LIBNAME}__" 67 | PREFIX="$BASEPATH""${LIBNAME}__" 68 | SUFFIX=".txt" 69 | 70 | RESULTS=`gzip -cdf "$FASTQ_FILE" | fastx_barcode_splitter.pl --bcfile "$BARCODE_FILE" --prefix "$PREFIX" --suffix "$SUFFIX" "$@"` 71 | if [ $? != 0 ]; then 72 | echo "error" 73 | fi 74 | 75 | # 76 | # Convert the textual tab-separated table into simple HTML table, 77 | # with the local path replaces with a valid URL 78 | echo "" 79 | echo "$RESULTS" | sed -r "s|$BASEPATH(.*)|\\1|" | sed ' 80 | i
81 | s|\t||g 82 | a<\/td><\/tr> 83 | ' 84 | echo "

" 85 | echo "

" 86 | -------------------------------------------------------------------------------- /galaxy/tools/fastx_toolkit/fastx_clipper.xml: -------------------------------------------------------------------------------- 1 | 2 | adapter sequences 3 | 4 | cat '$input' | 5 | fastx_clipper 6 | #if $input.ext == "fastqsanger": 7 | -Q 33 8 | #elif $input.ext == "fastq": 9 | -Q 64 10 | #end if 11 | -l $minlength -a '$clip_source.clip_sequence' -d $keepdelta -o '$output' -v $KEEP_N $DISCARD_OPTIONS 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | use this for hairpin barcoding. keep at 0 unless you know what you're doing. 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 75 | 76 | 77 | 78 | **What it does** 79 | 80 | This tool clips adapters from the 3'-end of the sequences in a FASTA/FASTQ file. 81 | 82 | -------- 83 | 84 | 85 | **Clipping Illustration:** 86 | 87 | .. image:: ../static/fastx_icons/fastx_clipper_illustration.png 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | **Clipping Example:** 97 | 98 | .. image:: ../static/fastx_icons/fastx_clipper_example.png 99 | 100 | 101 | 102 | **In the above example:** 103 | 104 | * Sequence no. 1 was discarded since it wasn't clipped (i.e. didn't contain the adapter sequence). (**Output** parameter). 105 | * Sequence no. 5 was discarded --- it's length (after clipping) was shorter than 15 nt (**Minimum Sequence Length** parameter). 106 | 107 | 108 | ------ 109 | 110 | This tool is based on `FASTX-toolkit`__ by Assaf Gordon. 111 | 112 | .. __: http://hannonlab.cshl.edu/fastx_toolkit/ 113 | 114 | 115 | 116 | 117 | 118 | -------------------------------------------------------------------------------- /galaxy/tools/fastx_toolkit/fastx_collapser.xml: -------------------------------------------------------------------------------- 1 | 2 | sequences 3 | 4 | cat '$input' | 5 | fastx_collapser 6 | #if $input.ext == "fastqsanger": 7 | -Q 33 8 | #elif $input.ext == "fastq": 9 | -Q 64 10 | #end if 11 | -v -o '$output' 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 28 | 29 | 30 | 31 | **What it does** 32 | 33 | This tool collapses identical sequences in a FASTQ or FASTA file into a single sequence. 34 | 35 | -------- 36 | 37 | **Example** 38 | 39 | Example Input File (Sequence "ATAT" appears multiple times):: 40 | 41 | >CSHL_2_FC0042AGLLOO_1_1_605_414 42 | TGCG 43 | >CSHL_2_FC0042AGLLOO_1_1_537_759 44 | ATAT 45 | >CSHL_2_FC0042AGLLOO_1_1_774_520 46 | TGGC 47 | >CSHL_2_FC0042AGLLOO_1_1_742_502 48 | ATAT 49 | >CSHL_2_FC0042AGLLOO_1_1_781_514 50 | TGAG 51 | >CSHL_2_FC0042AGLLOO_1_1_757_487 52 | TTCA 53 | >CSHL_2_FC0042AGLLOO_1_1_903_769 54 | ATAT 55 | >CSHL_2_FC0042AGLLOO_1_1_724_499 56 | ATAT 57 | 58 | Example Output file:: 59 | 60 | >1-1 61 | TGCG 62 | >2-4 63 | ATAT 64 | >3-1 65 | TGGC 66 | >4-1 67 | TGAG 68 | >5-1 69 | TTCA 70 | 71 | .. class:: infomark 72 | 73 | Original Sequence Names / Lane descriptions (e.g. "CSHL_2_FC0042AGLLOO_1_1_742_502") are discarded. 74 | 75 | The output sequence name is composed of two numbers: the first is the sequence's number, the second is the multiplicity value. 76 | 77 | The following output:: 78 | 79 | >2-4 80 | ATAT 81 | 82 | means that the sequence "ATAT" is the second sequence in the file, and it appeared 4 times in the input FASTA file. 83 | 84 | ------ 85 | 86 | This tool is based on `FASTX-toolkit`__ by Assaf Gordon. 87 | 88 | .. __: http://hannonlab.cshl.edu/fastx_toolkit/ 89 | 90 | 91 | 92 | 93 | -------------------------------------------------------------------------------- /galaxy/tools/fastx_toolkit/fastx_nucleotides_distribution.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | fastx_nucleotide_distribution_graph.sh -t '$input.name' -i $input -o '$output' 4 | 5 | 6 | 7 | 8 | 9 | 10 | 12 | 13 | 14 | 15 | **What it does** 16 | 17 | Creates a stacked-histogram graph for the nucleotide distribution in the Solexa library. 18 | 19 | .. class:: infomark 20 | 21 | **TIP:** Use the **FASTQ Statistics** tool to generate the report file needed for this tool. 22 | 23 | ----- 24 | 25 | **Output Examples** 26 | 27 | The following chart clearly shows the barcode used at the 5'-end of the library: **GATCT** 28 | 29 | .. image:: ./static/fastx_icons/fastq_nucleotides_distribution_1.png 30 | 31 | In the following chart, one can almost 'read' the most abundant sequence by looking at the dominant values: **TGATA TCGTA TTGAT GACTG AA...** 32 | 33 | .. image:: ./static/fastx_icons/fastq_nucleotides_distribution_2.png 34 | 35 | The following chart shows a growing number of unknown (N) nucleotides towards later cycles (which might indicate a sequencing problem): 36 | 37 | .. image:: ./static/fastx_icons/fastq_nucleotides_distribution_3.png 38 | 39 | But most of the time, the chart will look rather random: 40 | 41 | .. image:: ./static/fastx_icons/fastq_nucleotides_distribution_4.png 42 | 43 | ------ 44 | 45 | This tool is based on `FASTX-toolkit`__ by Assaf Gordon. 46 | 47 | .. __: http://hannonlab.cshl.edu/fastx_toolkit/ 48 | 49 | 50 | 51 | 52 | -------------------------------------------------------------------------------- /galaxy/tools/fastx_toolkit/fastx_nucleotides_distribution_line.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | fastx_nucleotide_distribution_line_graph.sh -i '$input' -o '$output' 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 13 | 14 | 15 | 16 | **What it does** 17 | 18 | Creates a line and points graph for the nucleotide distribution in the Solexa library. 19 | 20 | .. class:: infomark 21 | 22 | **TIP:** Use the **FASTQ Statistics** tool to generate the report file needed for this tool. 23 | 24 | ----- 25 | 26 | **Output Examples** 27 | 28 | .. image:: ../static/fastx_icons/fastq_nucleotides_distribution_line_graph.png 29 | 30 | ------ 31 | 32 | This tool was created by Oliver Tam, based on `FASTX-toolkit`__ by Assaf Gordon. 33 | 34 | .. __: http://hannonlab.cshl.edu/fastx_toolkit/ 35 | 36 | 37 | 38 | 39 | -------------------------------------------------------------------------------- /galaxy/tools/fastx_toolkit/fastx_renamer.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | cat '$input' | 5 | fastx_renamer 6 | #if $input.ext == "fastqsanger": 7 | -Q 33 8 | #elif $input.ext == "fastq": 9 | -Q 64 10 | #end if 11 | -n $TYPE -o '$output' -v 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | **What it does** 36 | 37 | This tool renames the sequence identifiers in a FASTQ/A file. 38 | 39 | .. class:: infomark 40 | 41 | Use this tool at the beginning of your workflow, as a way to keep the original sequence (before trimming, clipping, barcode-removal, etc). 42 | 43 | -------- 44 | 45 | **Example** 46 | 47 | The following Solexa-FASTQ file:: 48 | 49 | @CSHL_4_FC042GAMMII_2_1_517_596 50 | GGTCAATGATGAGTTGGCACTGTAGGCACCATCAAT 51 | +CSHL_4_FC042GAMMII_2_1_517_596 52 | 40 40 40 40 40 40 40 40 40 40 38 40 40 40 40 40 14 40 40 40 40 40 36 40 13 14 24 24 9 24 9 40 10 10 15 40 53 | 54 | Renamed to **nucleotides sequence**:: 55 | 56 | @GGTCAATGATGAGTTGGCACTGTAGGCACCATCAAT 57 | GGTCAATGATGAGTTGGCACTGTAGGCACCATCAAT 58 | +GGTCAATGATGAGTTGGCACTGTAGGCACCATCAAT 59 | 40 40 40 40 40 40 40 40 40 40 38 40 40 40 40 40 14 40 40 40 40 40 36 40 13 14 24 24 9 24 9 40 10 10 15 40 60 | 61 | Renamed to **numeric counter**:: 62 | 63 | @1 64 | GGTCAATGATGAGTTGGCACTGTAGGCACCATCAAT 65 | +1 66 | 40 40 40 40 40 40 40 40 40 40 38 40 40 40 40 40 14 40 40 40 40 40 36 40 13 14 24 24 9 24 9 40 10 10 15 40 67 | 68 | ------ 69 | 70 | This tool is based on `FASTX-toolkit`__ by Assaf Gordon. 71 | 72 | .. __: http://hannonlab.cshl.edu/fastx_toolkit/ 73 | 74 | 75 | 76 | -------------------------------------------------------------------------------- /galaxy/tools/fastx_toolkit/fastx_reverse_complement.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | cat '$input' | 5 | fastx_reverse_complement 6 | #if $input.ext == "fastqsanger": 7 | -Q 33 8 | #elif $input.ext == "fastq": 9 | -Q 64 10 | #end if 11 | -v -o '$output' 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 33 | 34 | 35 | 36 | **What it does** 37 | 38 | This tool reverse-complements each sequence in a library. 39 | If the library is a FASTQ, the quality-scores are also reversed. 40 | 41 | -------- 42 | 43 | **Example** 44 | 45 | Input FASTQ file:: 46 | 47 | @CSHL_1_FC42AGWWWXX:8:1:3:740 48 | TGTCTGTAGCCTCNTCCTTGTAATTCAAAGNNGGTA 49 | +CSHL_1_FC42AGWWWXX:8:1:3:740 50 | 33 33 33 34 33 33 33 33 33 33 33 33 27 5 27 33 33 33 33 33 33 27 21 27 33 32 31 29 26 24 5 5 15 17 27 26 51 | 52 | 53 | Output FASTQ file:: 54 | 55 | @CSHL_1_FC42AGWWWXX:8:1:3:740 56 | TACCNNCTTTGAATTACAAGGANGAGGCTACAGACA 57 | +CSHL_1_FC42AGWWWXX:8:1:3:740 58 | 26 27 17 15 5 5 24 26 29 31 32 33 27 21 27 33 33 33 33 33 33 27 5 27 33 33 33 33 33 33 33 33 34 33 33 33 59 | 60 | ------ 61 | 62 | This tool is based on `FASTX-toolkit`__ by Assaf Gordon. 63 | 64 | .. __: http://hannonlab.cshl.edu/fastx_toolkit/ 65 | 66 | 67 | 68 | 69 | -------------------------------------------------------------------------------- /galaxy/tools/fastx_toolkit/fastx_trimmer.xml: -------------------------------------------------------------------------------- 1 | 2 | to fixed length 3 | 4 | cat '$input' | 5 | fastx_trimmer 6 | #if $input.ext == "fastqsanger": 7 | -Q 33 8 | #elif $input.ext == "fastq": 9 | -Q 64 10 | #end if 11 | -v -f $first -l $last -o '$output' 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 45 | 46 | 47 | **What it does** 48 | 49 | This tool trims (cut nucleotides from) sequences in a FASTA/Q file. 50 | 51 | -------- 52 | 53 | **Example** 54 | 55 | Input Fasta file (with 36 bases in each sequences):: 56 | 57 | >1-1 58 | TATGGTCAGAAACCATATGCAGAGCCTGTAGGCACC 59 | >2-1 60 | CAGCGAGGCTTTAATGCCATTTGGCTGTAGGCACCA 61 | 62 | 63 | Trimming with First=1 and Last=21, we get a FASTA file with 21 bases in each sequences (starting from the first base):: 64 | 65 | >1-1 66 | TATGGTCAGAAACCATATGCA 67 | >2-1 68 | CAGCGAGGCTTTAATGCCATT 69 | 70 | Trimming with First=6 and Last=10, will generate a FASTA file with 5 bases (bases 6,7,8,9,10) in each sequences:: 71 | 72 | >1-1 73 | TCAGA 74 | >2-1 75 | AGGCT 76 | 77 | ------ 78 | 79 | This tool is based on `FASTX-toolkit`__ by Assaf Gordon. 80 | 81 | .. __: http://hannonlab.cshl.edu/fastx_toolkit/ 82 | 83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /galaxy/tools/fastx_toolkit/fastx_trimmer_from_end.xml: -------------------------------------------------------------------------------- 1 | 2 | of sequences 3 | 4 | cat '$input' | 5 | fastx_trimmer 6 | #if $input.ext == "fastqsanger": 7 | -Q 33 8 | #elif $input.ext == "fastq": 9 | -Q 64 10 | #end if 11 | -v -t $trimnum -m $minlen -o '$output' 12 | 13 | 14 | 15 | 16 | 17 | 18 | This will trim from the end of the sequences 19 | 20 | 21 | 22 | 23 | Sequences shorter than this length will be discarded 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 39 | 40 | 41 | **What it does** 42 | 43 | This tool trims (cut nucleotides from) sequences in a FASTQ/FASTA file from the 3' end. 44 | 45 | .. class:: infomark 46 | 47 | When trimming a FASTQ file, the quality scores will be trimmed appropriately (to the same length of the corresponding sequence). 48 | 49 | -------- 50 | 51 | **Example** 52 | 53 | Input Fasta file:: 54 | 55 | >1-1 56 | TATGGTCAGAAACCATATGCAGAGCCTGTAGGCACC 57 | >2-1 58 | CAGCGAGGCTTTAATGCCATT 59 | 60 | 61 | Trimming 5 nucleotides from the end, and discarding sequences shorter than 10 , we get the following FASTA file:: 62 | 63 | >1-1 64 | TATGGTCAGAAACCATATGCAGAGCCTGTAG 65 | >2-1 66 | CAGCGAGGCTTTAATG 67 | 68 | Trimming 10 nucleotides from the end, and discarding sequences shorter than 15 , we get the following FASTA file:: 69 | 70 | >1-1 71 | TATGGTCAGAAACCATATGCAGAGCC 72 | 73 | ------ 74 | 75 | This tool is based on `FASTX-toolkit`__ by Assaf Gordon. 76 | 77 | .. __: http://hannonlab.cshl.edu/fastx_toolkit/ 78 | 79 | 80 | 81 | 82 | -------------------------------------------------------------------------------- /galaxy/tools/fastx_toolkit/fastx_uncollapser.xml: -------------------------------------------------------------------------------- 1 | 2 | sequences 3 | 4 | cat '$input' | 5 | fastx_uncollapser -v -o '$output' 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 21 | 22 | 23 | 24 | **What it does** 25 | 26 | This tool uncollapses a previously-collapsed FASTA file. It reads each collapsed sequence and generates multiple sequences based on the collapsed read count. 27 | 28 | -------- 29 | 30 | **Example** 31 | 32 | Example Input - a collapsed FASTA file (Sequence "ATAT" has four collapsed reads):: 33 | 34 | >1-1 35 | TGCG 36 | >2-4 37 | ATAT 38 | 39 | Example Output - uncollapsed FASTA file (Sequence "ATAT" now appears as 4 separate sequences):: 40 | 41 | >1 42 | TGCG 43 | >2 44 | ATAT 45 | >3 46 | ATAT 47 | >4 48 | ATAT 49 | >5 50 | ATAT 51 | 52 | .. class:: infomark 53 | 54 | The original sequence id (with the read counts) are discarded, with the sequence given a numerical name. 55 | 56 | ----- 57 | 58 | This tool is based on `FASTX-toolkit`__ by Assaf Gordon. 59 | 60 | .. __: http://hannonlab.cshl.edu/fastx_toolkit/ 61 | 62 | 63 | 64 | 65 | -------------------------------------------------------------------------------- /galaxy/tools/fastx_toolkit/seqid_uncollapser.xml: -------------------------------------------------------------------------------- 1 | 2 | containing collapsed sequence IDs 3 | 4 | cat '$input' | 5 | fastx_uncollapser -c $idcol -v -o '$output' 6 | 7 | 8 | 9 | 10 | This column contains the sequence id from a collapsed FASTA file in the form of "(seq number)-(read count)" (e.g. 15-4). Use 10 if you're analyzing BLAT output 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 24 | 25 | 26 | 27 | **What it does** 28 | 29 | This tool reads a row (in a table) containing a collapsed sequence ID, and duplicates the . 30 | 31 | .. class:: warningmark 32 | 33 | You must specify the column containing the collapsed sequence ID (e.g. 15-4). 34 | 35 | -------- 36 | 37 | **Example Input File** 38 | 39 | The following input file contains two collapsed sequence identifiers at column 10: *84-2* and *87-5* 40 | 41 | (meaning the first has multiplicity-count of 2 and the second has multiplicity count of 5):: 42 | 43 | 44 | 23 0 0 0 0 0 0 0 + 84-2 ... 45 | 22 0 0 0 0 0 0 0 + 87-5 ... 46 | 47 | 48 | **Output Example** 49 | 50 | After **uncollapsing** (on column 10), the line of the first sequence-identifier is repeated *twice*, and the line of the second sequence-identifier is repeated *five* times:: 51 | 52 | 23 0 0 0 0 0 0 0 + 84-2 ... 53 | 23 0 0 0 0 0 0 0 + 84-2 ... 54 | 22 0 0 0 0 0 0 0 + 87-5 ... 55 | 22 0 0 0 0 0 0 0 + 87-5 ... 56 | 22 0 0 0 0 0 0 0 + 87-5 ... 57 | 22 0 0 0 0 0 0 0 + 87-5 ... 58 | 22 0 0 0 0 0 0 0 + 87-5 ... 59 | 60 | 61 | Uncollapsing a text file allows analsys of collapsed FASTA files to be used with any tool which doesn't 'understand' collapsed multiplicity counts. 62 | 63 | .. class:: infomark 64 | 65 | See the *Collapse* tool in the *FASTA Manipulation* category for more details about collapsing FASTA files. 66 | 67 | ----- 68 | 69 | This tool is based on `FASTX-toolkit`__ by Assaf Gordon. 70 | 71 | .. __: http://hannonlab.cshl.edu/fastx_toolkit/ 72 | 73 | 74 | 75 | 76 | -------------------------------------------------------------------------------- /install_galaxy_files.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # 4 | # Arguments check and suage information 5 | # 6 | SRC="." 7 | DEST="$1" 8 | if [ -z "$DEST" ]; then 9 | cat< 2 | # 3 | # This file is free software; as a special exception the author gives 4 | # unlimited permission to copy and/or distribute it, with or without 5 | # modifications, as long as this notice is preserved. 6 | # 7 | # This program is distributed in the hope that it will be useful, but 8 | # WITHOUT ANY WARRANTY, to the extent permitted by law; without even the 9 | # implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 10 | 11 | # Install m4 macros in this directory 12 | m4datadir = $(datadir)/aclocal 13 | 14 | # List your m4 macros here 15 | m4macros = ax_c_long_long.m4 \ 16 | ax_cxx_compile_stdcxx_11.m4 \ 17 | ax_cxx_header_stdcxx_tr1.m4 18 | 19 | # The following is boilerplate 20 | m4data_DATA = $(m4macros) 21 | EXTRA_DIST = $(m4data_DATA) 22 | 23 | -------------------------------------------------------------------------------- /m4/ax_c_long_long.m4: -------------------------------------------------------------------------------- 1 | # =========================================================================== 2 | # http://www.nongnu.org/autoconf-archive/ax_c_long_long.html 3 | # =========================================================================== 4 | # 5 | # SYNOPSIS 6 | # 7 | # AX_C_LONG_LONG 8 | # 9 | # DESCRIPTION 10 | # 11 | # Provides a test for the existence of the long long int type and defines 12 | # HAVE_LONG_LONG if it is found. 13 | # 14 | # LICENSE 15 | # 16 | # Copyright (c) 2008 Caolan McNamara 17 | # 18 | # Copying and distribution of this file, with or without modification, are 19 | # permitted in any medium without royalty provided the copyright notice 20 | # and this notice are preserved. This file is offered as-is, without any 21 | # warranty. 22 | 23 | #serial 4 24 | 25 | AU_ALIAS([AC_C_LONG_LONG], [AX_C_LONG_LONG]) 26 | AC_DEFUN([AX_C_LONG_LONG], 27 | [AC_CACHE_CHECK(for long long int, ac_cv_c_long_long, 28 | [if test "$GCC" = yes; then 29 | ac_cv_c_long_long=yes 30 | else 31 | AC_TRY_COMPILE(,[long long int i;], 32 | ac_cv_c_long_long=yes, 33 | ac_cv_c_long_long=no) 34 | fi]) 35 | if test $ac_cv_c_long_long = yes; then 36 | AC_DEFINE(HAVE_LONG_LONG, 1, [compiler understands long long]) 37 | fi 38 | ]) 39 | -------------------------------------------------------------------------------- /m4/ax_cxx_header_stdcxx_tr1.m4: -------------------------------------------------------------------------------- 1 | # =========================================================================== 2 | # http://www.nongnu.org/autoconf-archive/ax_cxx_header_stdcxx_tr1.html 3 | # =========================================================================== 4 | # 5 | # SYNOPSIS 6 | # 7 | # AX_CXX_HEADER_STDCXX_TR1 8 | # 9 | # DESCRIPTION 10 | # 11 | # Check for library coverage of the TR1 standard. 12 | # 13 | # LICENSE 14 | # 15 | # Copyright (c) 2008 Benjamin Kosnik 16 | # 17 | # Copying and distribution of this file, with or without modification, are 18 | # permitted in any medium without royalty provided the copyright notice 19 | # and this notice are preserved. This file is offered as-is, without any 20 | # warranty. 21 | ## 22 | ## Modified by A. Gordon (assafgordon@gmail.com), 1-Feb-2010 23 | ## Removed unused header files (which can't be found on some TR1 gcc-4.2.4 on CentOS 5.4) 24 | ## 25 | 26 | #serial 5 27 | 28 | AU_ALIAS([AC_CXX_HEADER_STDCXX_TR1], [AX_CXX_HEADER_STDCXX_TR1]) 29 | AC_DEFUN([AX_CXX_HEADER_STDCXX_TR1], [ 30 | AC_CACHE_CHECK(for ISO C++ TR1 include files, 31 | ax_cv_cxx_stdcxx_tr1, 32 | [AC_LANG_SAVE 33 | AC_LANG_CPLUSPLUS 34 | AC_TRY_COMPILE([ 35 | #include 36 | ],, 37 | ax_cv_cxx_stdcxx_tr1=yes, ax_cv_cxx_stdcxx_tr1=no) 38 | AC_LANG_RESTORE 39 | ]) 40 | if test "$ax_cv_cxx_stdcxx_tr1" = yes; then 41 | AC_DEFINE(STDCXX_TR1_HEADERS,,[Define if ISO C++ TR1 header files are present. ]) 42 | fi 43 | ]) 44 | -------------------------------------------------------------------------------- /reconf: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2008-2013 Assaf Gordon 2 | # 3 | # This file is free software; as a special exception the author gives 4 | # unlimited permission to copy and/or distribute it, with or without 5 | # modifications, as long as this notice is preserved. 6 | # 7 | # This program is distributed in the hope that it will be useful, but 8 | # WITHOUT ANY WARRANTY, to the extent permitted by law; without even the 9 | # implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 10 | 11 | #!/bin/sh 12 | rm -f config.cache 13 | mkdir -p config 14 | echo "- libtoolize." 15 | libtoolize -i 16 | echo "- aclocal." 17 | aclocal -I m4 18 | echo "- autoconf." 19 | autoconf 20 | echo "- autoheader." 21 | autoheader 22 | echo "- automake." 23 | automake -a 24 | exit 25 | -------------------------------------------------------------------------------- /scripts/Makefile.am: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2008-2013 Assaf Gordon 2 | # 3 | # This file is free software; as a special exception the author gives 4 | # unlimited permission to copy and/or distribute it, with or without 5 | # modifications, as long as this notice is preserved. 6 | # 7 | # This program is distributed in the hope that it will be useful, but 8 | # WITHOUT ANY WARRANTY, to the extent permitted by law; without even the 9 | # implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 10 | 11 | bin_SCRIPTS = fastx_barcode_splitter.pl \ 12 | fastx_nucleotide_distribution_graph.sh \ 13 | fastx_nucleotide_distribution_line_graph.sh \ 14 | fastq_quality_boxplot_graph.sh \ 15 | fasta_clipping_histogram.pl 16 | 17 | EXTRA_DIST = fastx_barcode_splitter.pl \ 18 | fastx_nucleotide_distribution_graph.sh \ 19 | fastx_nucleotide_distribution_line_graph.sh \ 20 | fastq_quality_boxplot_graph.sh \ 21 | fasta_clipping_histogram.pl 22 | -------------------------------------------------------------------------------- /scripts/fasta_clipping_histogram.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | # FASTX-toolkit - FASTA/FASTQ preprocessing tools. 4 | # Copyright (C) 2009-2013 A. Gordon (assafgordon@gmail.com) 5 | # 6 | # This program is free software: you can redistribute it and/or modify 7 | # it under the terms of the GNU Affero General Public License as 8 | # published by the Free Software Foundation, either version 3 of the 9 | # License, or (at your option) any later version. 10 | # 11 | # This program is distributed in the hope that it will be useful, 12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | # GNU Affero General Public License for more details. 15 | # 16 | # You should have received a copy of the GNU Affero General Public License 17 | # along with this program. If not, see . 18 | 19 | use strict; 20 | use warnings; 21 | use GD::Graph::bars; 22 | use Data::Dumper; 23 | use PerlIO::gzip; 24 | 25 | if (scalar @ARGV==0) { 26 | print<$ARGV[1]") or die "Cannot create output file $ARGV[1]\n"; 44 | binmode OUT; 45 | 46 | my %histogram ; 47 | 48 | while (my $name = ) { 49 | my $sequence = ; 50 | chomp $sequence; 51 | 52 | my $sequence_length = length($sequence); 53 | 54 | my $count; 55 | 56 | if ( index($name, "-")==-1 ) { 57 | #Assume this file is not collapsed, just count each seqeunce as 1 58 | $count = 1 ; 59 | } else { 60 | #Assume file is collapsed (that is - sequence-ID has two numbers with a separating dash) 61 | ($count) = $name =~ /^\>[^-]+\-(\d+)$/ ; 62 | 63 | # If the match failed, treat this fasta as not collapsed; 64 | $count = 1 if not defined $count ; 65 | } 66 | 67 | $histogram{$sequence_length} += $count ; 68 | } 69 | 70 | #Textual Output 71 | if (0) { 72 | print "Length\tCount\n"; 73 | foreach my $length_key ( sort { $a <=> $b } keys %histogram ) { 74 | print $length_key,"\t", $histogram{$length_key},"\n"; 75 | } 76 | exit 0; 77 | } 78 | 79 | ## Build the data as required by GD::Graph::bars. 80 | ## Data list has two items (each item is itself a list) 81 | ## 1. a list of x-axis labels (these are the keys from the histogram) 82 | ## 2. a list of values 83 | my @data = ( 84 | [ sort { $a <=> $b } keys %histogram ], 85 | [ map { $histogram{$_} } sort { $a <=> $b } keys %histogram ] ) ; 86 | 87 | my $graph = new GD::Graph::bars (1000,800); 88 | 89 | $graph->set( 90 | x_label => 'Length', 91 | y_label => 'Amount', 92 | title => 'Sequences lengths Distribution (after clipping)', 93 | bar_spacing => 10, 94 | transparent => 0, 95 | t_margin => 10, 96 | y_tick_number => 20, 97 | y_long_ticks => 1, 98 | ) or die $graph->error; 99 | 100 | $graph->plot(\@data) or die $graph->error; 101 | print OUT $graph->gd->png; 102 | 103 | close IN; 104 | close OUT; 105 | -------------------------------------------------------------------------------- /src/Makefile.am: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2008-2013 Assaf Gordon 2 | # 3 | # This file is free software; as a special exception the author gives 4 | # unlimited permission to copy and/or distribute it, with or without 5 | # modifications, as long as this notice is preserved. 6 | # 7 | # This program is distributed in the hope that it will be useful, but 8 | # WITHOUT ANY WARRANTY, to the extent permitted by law; without even the 9 | # implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 10 | 11 | SUBDIRS = libfastx \ 12 | fastx_clipper \ 13 | fastx_trimmer \ 14 | fastx_quality_stats \ 15 | fastq_quality_converter \ 16 | fastq_to_fasta \ 17 | fastq_quality_filter \ 18 | fastq_quality_trimmer \ 19 | fastx_artifacts_filter \ 20 | fastx_reverse_complement \ 21 | fastx_collapser \ 22 | fastx_uncollapser \ 23 | seqalign_test \ 24 | fasta_formatter \ 25 | fasta_nucleotide_changer \ 26 | fastx_renamer \ 27 | fastq_masker 28 | 29 | 30 | 31 | -------------------------------------------------------------------------------- /src/fasta_formatter/Makefile.am: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2008-2013 Assaf Gordon 2 | # 3 | # This file is free software; as a special exception the author gives 4 | # unlimited permission to copy and/or distribute it, with or without 5 | # modifications, as long as this notice is preserved. 6 | # 7 | # This program is distributed in the hope that it will be useful, but 8 | # WITHOUT ANY WARRANTY, to the extent permitted by law; without even the 9 | # implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 10 | 11 | 12 | bin_PROGRAMS = fasta_formatter 13 | 14 | AM_CPPFLAGS = $(CC_WARNINGS) $(GTEXTUTILS_CFLAGS) \ 15 | -I$(top_srcdir)/src/libfastx 16 | 17 | LDADD = $(GTEXTUTILS_LIBS) ../libfastx/libfastx.a $(LT_LDFLAGS) 18 | 19 | fasta_formatter_SOURCES = fasta_formatter.cpp \ 20 | sequence_writers.h 21 | 22 | -------------------------------------------------------------------------------- /src/fasta_formatter/sequence_writers.h: -------------------------------------------------------------------------------- 1 | /* 2 | FASTX-toolkit - FASTA/FASTQ preprocessing tools. 3 | Copyright (C) 2009-2013 A. Gordon (assafgordon@gmail.com) 4 | 5 | This program is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU Affero General Public License as 7 | published by the Free Software Foundation, either version 3 of the 8 | License, or (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU Affero General Public License for more details. 14 | 15 | You should have received a copy of the GNU Affero General Public License 16 | along with this program. If not, see . 17 | */ 18 | #ifndef __SEQUENCE_WRITERS__ 19 | #define __SEQUENCE_WRITERS__ 20 | 21 | #include 22 | #include 23 | 24 | class SequencesWriter 25 | { 26 | public: 27 | virtual ~SequencesWriter() {} 28 | virtual void write ( const std::string& sequence_id, const std::string& sequence_bases ) = 0 ; 29 | }; 30 | 31 | class EmptySequencesFilter : public SequencesWriter 32 | { 33 | private: 34 | SequencesWriter* upstream ; 35 | 36 | public: 37 | EmptySequencesFilter ( SequencesWriter * _upstream ) : upstream(_upstream) {} 38 | 39 | ~EmptySequencesFilter() 40 | { 41 | delete upstream; 42 | } 43 | 44 | virtual void write ( const std::string & sequence_id, const std::string& sequence_bases) 45 | { 46 | if ( !sequence_bases.empty() ) 47 | upstream->write ( sequence_id, sequence_bases ) ; 48 | } 49 | }; 50 | 51 | class SingleLineFastaWriter : public SequencesWriter 52 | { 53 | private: 54 | std::ostream& ostrm ; 55 | public: 56 | SingleLineFastaWriter ( std::ostream& output_stream ) : ostrm ( output_stream ) { } 57 | 58 | virtual void write ( const std::string & sequence_id, const std::string& sequence_bases ) 59 | { 60 | ostrm << sequence_id << std::endl; 61 | if ( !sequence_bases.empty() ) 62 | ostrm << sequence_bases << std::endl ; 63 | } 64 | }; 65 | 66 | class MultiLineFastaWriter : public SequencesWriter 67 | { 68 | private: 69 | std::ostream& ostrm ; 70 | size_t max_width ; 71 | 72 | public: 73 | MultiLineFastaWriter ( std::ostream& output_stream, size_t _max_width ) : 74 | ostrm ( output_stream ), max_width ( _max_width ) 75 | { 76 | } 77 | 78 | virtual void write ( const std::string & sequence_id, const std::string& sequence_bases ) 79 | { 80 | ostrm << sequence_id << std::endl; 81 | if ( !sequence_bases.empty() ) { 82 | size_t start = 0 ; 83 | while ( (sequence_bases.length() - start) >= max_width ) { 84 | ostrm << sequence_bases.substr ( start, max_width ) << std::endl; 85 | start += max_width ; 86 | } 87 | if ( sequence_bases.length() - start > 0 ) 88 | ostrm << sequence_bases.substr ( start ) << std::endl ; 89 | } 90 | } 91 | }; 92 | 93 | class TabulatedFastaWriter : public SequencesWriter 94 | { 95 | private: 96 | std::ostream& ostrm ; 97 | public: 98 | TabulatedFastaWriter ( std::ostream& output_stream ) : ostrm ( output_stream ) { } 99 | 100 | virtual void write ( const std::string & sequence_id, const std::string& sequence_bases ) 101 | { 102 | ostrm << sequence_id.substr(1) ; 103 | if ( !sequence_bases.empty() ) { 104 | ostrm << "\t" ; 105 | ostrm << sequence_bases ; 106 | } 107 | ostrm << std::endl; 108 | } 109 | }; 110 | 111 | #endif 112 | 113 | -------------------------------------------------------------------------------- /src/fasta_nucleotide_changer/Makefile.am: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2008-2013 Assaf Gordon 2 | # 3 | # This file is free software; as a special exception the author gives 4 | # unlimited permission to copy and/or distribute it, with or without 5 | # modifications, as long as this notice is preserved. 6 | # 7 | # This program is distributed in the hope that it will be useful, but 8 | # WITHOUT ANY WARRANTY, to the extent permitted by law; without even the 9 | # implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 10 | 11 | 12 | bin_PROGRAMS = fasta_nucleotide_changer 13 | 14 | AM_CPPFLAGS = \ 15 | $(CC_WARNINGS) \ 16 | -I$(top_srcdir)/src/libfastx 17 | 18 | fasta_nucleotide_changer_SOURCES = fasta_nucleotide_changer.c 19 | 20 | fasta_nucleotide_changer_LDADD = ../libfastx/libfastx.a $(LT_LDFLAGS) 21 | 22 | -------------------------------------------------------------------------------- /src/fasta_nucleotide_changer/fasta_nucleotide_changer.c: -------------------------------------------------------------------------------- 1 | /* 2 | FASTX-toolkit - FASTA/FASTQ preprocessing tools. 3 | Copyright (C) 2009-2013 A. Gordon (assafgordon@gmail.com) 4 | 5 | This program is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU Affero General Public License as 7 | published by the Free Software Foundation, either version 3 of the 8 | License, or (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU Affero General Public License for more details. 14 | 15 | You should have received a copy of the GNU Affero General Public License 16 | along with this program. If not, see . 17 | */ 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | #include 27 | 28 | #include "fastx.h" 29 | #include "fastx_args.h" 30 | 31 | #define MAX_ADAPTER_LEN 100 32 | 33 | const char* usage= 34 | "usage: fasta_nucleotide_changer [-h] [-z] [-v] [-i INFILE] [-o OUTFILE] [-r] [-d]\n" \ 35 | "Part of " PACKAGE_STRING " by A. Gordon (assafgordon@gmail.com)\n" \ 36 | "\n" \ 37 | " [-h] = This helpful help screen.\n" \ 38 | " [-z] = Compress output with GZIP.\n" \ 39 | " [-v] = Verbose mode. Prints a short summary.\n" \ 40 | " with [-o], summary is printed to STDOUT.\n" \ 41 | " Otherwise, summary is printed to STDERR.\n" \ 42 | " [-i INFILE] = FASTA/Q input file. default is STDIN.\n" \ 43 | " [-o OUTFILE] = FASTA/Q output file. default is STDOUT.\n" \ 44 | " [-r] = DNA-to-RNA mode - change T's into U's.\n" \ 45 | " [-d] = RNA-to-DNA mode - change U's into T's.\n" \ 46 | "\n"; 47 | 48 | int flag_dna_mode = 0; 49 | int flag_rna_mode = 0; 50 | 51 | FASTX fastx; 52 | 53 | int parse_program_args(int __attribute__((unused)) optind, int optc, char __attribute__((unused))* optarg) 54 | { 55 | switch(optc) { 56 | case 'd': 57 | flag_dna_mode = 1 ; 58 | break; 59 | 60 | case 'r': 61 | flag_rna_mode = 1 ; 62 | break; 63 | 64 | default: 65 | errx(1, __FILE__ ":%d: Unknown argument (%c)", __LINE__, optc ) ; 66 | } 67 | return 1; 68 | } 69 | 70 | 71 | int main(int argc, char* argv[]) 72 | { 73 | size_t i; 74 | char nuc_from ; 75 | char nuc_to; 76 | size_t changes_count=0 ; 77 | 78 | fastx_parse_cmdline(argc, argv, "rd", parse_program_args); 79 | 80 | if ( !flag_dna_mode && !flag_rna_mode ) 81 | errx(1,"Please specify either RNA mode (-r) or DNA mode (-d)" ); 82 | 83 | if ( flag_dna_mode && flag_rna_mode ) 84 | errx(1,"RNA mode (-r) and DNA mode (-d) can not be used together." ); 85 | 86 | if ( flag_dna_mode ) { 87 | nuc_from = 'U'; 88 | nuc_to = 'T'; 89 | } 90 | if ( flag_rna_mode ) { 91 | nuc_from = 'T'; 92 | nuc_to = 'U'; 93 | } 94 | 95 | fastx_init_reader(&fastx, get_input_filename(), 96 | FASTA_OR_FASTQ, ALLOW_N | ALLOW_U, REQUIRE_UPPERCASE, 97 | get_fastq_ascii_quality_offset() ); 98 | 99 | fastx_init_writer(&fastx, get_output_filename(), OUTPUT_FASTA, compress_output_flag()); 100 | 101 | while ( fastx_read_next_record(&fastx) ) { 102 | 103 | for (i=0; i < strlen(fastx.nucleotides) ; i++) { 104 | if ( fastx.nucleotides[i] == nuc_to ) { 105 | errx(1,"Error: found '%c' nucleotide on line %lld. (input should not contain '%c' nucleotides in %s mode)", 106 | nuc_to, fastx.input_line_number, nuc_to,(flag_dna_mode)?"RNA-to-DNA":"DNA-to-RNA" ); 107 | } 108 | if ( fastx.nucleotides[i] == nuc_from ) { 109 | fastx.nucleotides[i] = nuc_to ; 110 | changes_count++; 111 | } 112 | } 113 | 114 | fastx_write_record(&fastx); 115 | } 116 | 117 | if ( verbose_flag() ) { 118 | fprintf(get_report_file(), "Mode: %s\n", (flag_dna_mode)?"RNA-to-DNA":"DNA-to-RNA" ) ; 119 | fprintf(get_report_file(), "Input: %zu reads.\n", num_input_reads(&fastx) ) ; 120 | fprintf(get_report_file(), "Output: %zu reads.\n", num_output_reads(&fastx) ) ; 121 | fprintf(get_report_file(), "Nucleotides changed: %zu\n", changes_count ) ; 122 | } 123 | return 0; 124 | } 125 | -------------------------------------------------------------------------------- /src/fastq_masker/Makefile.am: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2008-2013 Assaf Gordon 2 | # 3 | # This file is free software; as a special exception the author gives 4 | # unlimited permission to copy and/or distribute it, with or without 5 | # modifications, as long as this notice is preserved. 6 | # 7 | # This program is distributed in the hope that it will be useful, but 8 | # WITHOUT ANY WARRANTY, to the extent permitted by law; without even the 9 | # implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 10 | 11 | 12 | bin_PROGRAMS = fastq_masker 13 | 14 | AM_CPPFLAGS = \ 15 | $(CC_WARNINGS) \ 16 | -I$(top_srcdir)/src/libfastx 17 | 18 | fastq_masker_SOURCES = fastq_masker.c 19 | 20 | fastq_masker_LDADD = ../libfastx/libfastx.a $(LT_LDFLAGS) 21 | 22 | -------------------------------------------------------------------------------- /src/fastq_masker/fastq_masker.c: -------------------------------------------------------------------------------- 1 | /* 2 | FASTX-toolkit - FASTA/FASTQ preprocessing tools. 3 | Copyright (C) 2009-2013 A. Gordon (assafgordon@gmail.com) 4 | 5 | This program is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU Affero General Public License as 7 | published by the Free Software Foundation, either version 3 of the 8 | License, or (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU Affero General Public License for more details. 14 | 15 | You should have received a copy of the GNU Affero General Public License 16 | along with this program. If not, see . 17 | */ 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | #include 27 | 28 | #include "fastx.h" 29 | #include "fastx_args.h" 30 | 31 | const char* usage= 32 | "usage: fastq_masker [-h] [-v] [-q N] [-r C] [-z] [-i INFILE] [-o OUTFILE]\n" \ 33 | "Part of " PACKAGE_STRING " by A. Gordon (assafgordon@gmail.com)\n" \ 34 | "\n" \ 35 | " [-h] = This helpful help screen.\n" \ 36 | " [-q N] = Quality threshold - nucleotides with lower quality will be masked\n" \ 37 | " Default is 10.\n" \ 38 | " [-r C] = Replace low-quality nucleotides with character C. Default is 'N'\n" \ 39 | " [-z] = Compress output with GZIP.\n" \ 40 | " [-i INFILE] = FASTQ input file. default is STDIN.\n" \ 41 | " [-o OUTFILE] = FASTQ output file. default is STDOUT.\n" \ 42 | " [-v] = Verbose - report number of sequences.\n" \ 43 | " If [-o] is specified, report will be printed to STDOUT.\n" \ 44 | " If [-o] is not specified (and output goes to STDOUT),\n" \ 45 | " report will be printed to STDERR.\n" \ 46 | "\n"; 47 | 48 | int min_quality_threshold=10; 49 | char mask_character='N'; 50 | 51 | FASTX fastx; 52 | 53 | int parse_program_args(int __attribute__((unused)) optind, int optc, char* optarg) 54 | { 55 | switch(optc) { 56 | case 'q': 57 | if (optarg==NULL) 58 | errx(1, "[-q] parameter requires an argument value"); 59 | min_quality_threshold = atoi(optarg); 60 | if (min_quality_threshold<-40) 61 | errx(1,"Invalid minimum length value (-q %s)", optarg); 62 | break; 63 | 64 | case 'r': 65 | if (optarg==NULL) 66 | errx(1, "[-r] parameter requires an argument value"); 67 | if (strlen(optarg)!=1) 68 | errx(1, "[-r] parameter requires a single character as value"); 69 | mask_character = optarg[0]; 70 | break; 71 | 72 | default: 73 | errx(1, __FILE__ ":%d: Unknown argument (%c)", __LINE__, optc ) ; 74 | } 75 | return 1; 76 | } 77 | 78 | int main(int argc, char* argv[]) 79 | { 80 | int i ; 81 | size_t masked_reads_count=0; 82 | size_t masked_nucleotides_count=0; 83 | 84 | fastx_parse_cmdline(argc, argv, "q:r:", parse_program_args); 85 | 86 | fastx_init_reader(&fastx, get_input_filename(), 87 | FASTQ_ONLY, ALLOW_N, REQUIRE_UPPERCASE, 88 | get_fastq_ascii_quality_offset() ); 89 | 90 | fastx_init_writer(&fastx, get_output_filename(), OUTPUT_SAME_AS_INPUT, compress_output_flag()); 91 | 92 | while ( fastx_read_next_record(&fastx) ) { 93 | 94 | int masked = 0; 95 | 96 | //Scan each sequence - backwards 97 | for ( i=0; i<(int)strlen(fastx.nucleotides); ++i ) { 98 | if ( fastx.quality[i] < min_quality_threshold ) { 99 | fastx.nucleotides[i] = mask_character ; 100 | masked = 1; 101 | ++masked_nucleotides_count; 102 | } 103 | } 104 | if (masked) 105 | masked_reads_count += get_reads_count(&fastx); 106 | 107 | fastx_write_record(&fastx); 108 | } 109 | // 110 | //Print verbose report 111 | if ( verbose_flag() ) { 112 | fprintf(get_report_file(), "Minimum Quality Threshold: %d\n", min_quality_threshold); 113 | fprintf(get_report_file(), "Low-quality nucleotides replaced with '%c'\n", mask_character); 114 | 115 | fprintf(get_report_file(), "Input: %zu reads.\n", num_input_reads(&fastx) ) ; 116 | fprintf(get_report_file(), "Output: %zu reads.\n", num_output_reads(&fastx) ) ; 117 | 118 | fprintf(get_report_file(), "Masked reads: %zu\n", masked_reads_count ) ; 119 | fprintf(get_report_file(), "Masked nucleotides: %zu\n", masked_nucleotides_count ) ; 120 | } 121 | 122 | return 0; 123 | } 124 | -------------------------------------------------------------------------------- /src/fastq_quality_converter/Makefile.am: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2008-2013 Assaf Gordon 2 | # 3 | # This file is free software; as a special exception the author gives 4 | # unlimited permission to copy and/or distribute it, with or without 5 | # modifications, as long as this notice is preserved. 6 | # 7 | # This program is distributed in the hope that it will be useful, but 8 | # WITHOUT ANY WARRANTY, to the extent permitted by law; without even the 9 | # implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 10 | 11 | 12 | bin_PROGRAMS = fastq_quality_converter 13 | 14 | AM_CPPFLAGS = \ 15 | $(CC_WARNINGS) \ 16 | -I$(top_srcdir)/src/libfastx 17 | 18 | fastq_quality_converter_SOURCES = fastq_quality_converter.c 19 | 20 | fastq_quality_converter_LDADD = ../libfastx/libfastx.a $(LT_LDFLAGS) 21 | 22 | -------------------------------------------------------------------------------- /src/fastq_quality_converter/fastq_quality_converter.c: -------------------------------------------------------------------------------- 1 | /* 2 | FASTX-toolkit - FASTA/FASTQ preprocessing tools. 3 | Copyright (C) 2009-2013 A. Gordon (assafgordon@gmail.com) 4 | 5 | This program is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU Affero General Public License as 7 | published by the Free Software Foundation, either version 3 of the 8 | License, or (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU Affero General Public License for more details. 14 | 15 | You should have received a copy of the GNU Affero General Public License 16 | along with this program. If not, see . 17 | */ 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | #include 27 | 28 | #include "fastx.h" 29 | #include "fastx_args.h" 30 | 31 | const char* usage= 32 | "usage: fastq_quality_converter [-h] [-a] [-n] [-z] [-i INFILE] [-f OUTFILE]\n" \ 33 | "Part of " PACKAGE_STRING " by A. Gordon (assafgordon@gmail.com)\n" \ 34 | "\n" \ 35 | " [-h] = This helpful help screen.\n" \ 36 | " [-a] = Output ASCII quality scores (default).\n" \ 37 | " [-n] = Output numeric quality scores.\n" \ 38 | " [-z] = Compress output with GZIP.\n" \ 39 | " [-i INFILE] = FASTA/Q input file. default is STDIN.\n" \ 40 | " [-o OUTFILE] = FASTA output file. default is STDOUT.\n" \ 41 | "\n"; 42 | 43 | FASTX fastx; 44 | int flag_output_ascii = 1; 45 | 46 | int parse_program_args(int __attribute__((unused)) optind, int optc, char __attribute__((unused)) *optarg) 47 | { 48 | switch(optc) { 49 | case 'a': //this is the default, nothing to change 50 | break; 51 | 52 | case 'n': 53 | flag_output_ascii = 0 ; 54 | break; 55 | default: 56 | errx(1, __FILE__ ":%d: Unknown argument (%c)", __LINE__, optc ) ; 57 | } 58 | return 1; 59 | } 60 | 61 | 62 | int main(int argc, char* argv[]) 63 | { 64 | fastx_parse_cmdline(argc, argv, "an", parse_program_args); 65 | 66 | fastx_init_reader(&fastx, get_input_filename(), 67 | FASTQ_ONLY, ALLOW_N, REQUIRE_UPPERCASE, 68 | get_fastq_ascii_quality_offset() ); 69 | 70 | fastx_init_writer(&fastx, get_output_filename(), 71 | flag_output_ascii ? OUTPUT_FASTQ_ASCII_QUAL : OUTPUT_FASTQ_NUMERIC_QUAL, 72 | compress_output_flag()); 73 | 74 | while ( fastx_read_next_record(&fastx) ) { 75 | fastx_write_record(&fastx); 76 | } 77 | 78 | //Print verbose report 79 | if ( verbose_flag() ) { 80 | fprintf(get_report_file(), "Input: %zu reads.\n", num_input_reads(&fastx) ) ; 81 | fprintf(get_report_file(), "Output: %zu reads.\n", num_output_reads(&fastx) ) ; 82 | } 83 | return 0; 84 | } 85 | -------------------------------------------------------------------------------- /src/fastq_quality_filter/Makefile.am: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2008-2013 Assaf Gordon 2 | # 3 | # This file is free software; as a special exception the author gives 4 | # unlimited permission to copy and/or distribute it, with or without 5 | # modifications, as long as this notice is preserved. 6 | # 7 | # This program is distributed in the hope that it will be useful, but 8 | # WITHOUT ANY WARRANTY, to the extent permitted by law; without even the 9 | # implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 10 | 11 | 12 | bin_PROGRAMS = fastq_quality_filter 13 | 14 | AM_CPPFLAGS = \ 15 | $(CC_WARNINGS) \ 16 | -I$(top_srcdir)/src/libfastx 17 | 18 | fastq_quality_filter_SOURCES = fastq_quality_filter.c 19 | 20 | fastq_quality_filter_LDADD = ../libfastx/libfastx.a $(LT_LDFLAGS) 21 | 22 | -------------------------------------------------------------------------------- /src/fastq_quality_trimmer/Makefile.am: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2008-2013 Assaf Gordon 2 | # 3 | # This file is free software; as a special exception the author gives 4 | # unlimited permission to copy and/or distribute it, with or without 5 | # modifications, as long as this notice is preserved. 6 | # 7 | # This program is distributed in the hope that it will be useful, but 8 | # WITHOUT ANY WARRANTY, to the extent permitted by law; without even the 9 | # implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 10 | 11 | 12 | bin_PROGRAMS = fastq_quality_trimmer 13 | 14 | AM_CPPFLAGS = \ 15 | $(CC_WARNINGS) \ 16 | -I$(top_srcdir)/src/libfastx 17 | 18 | fastq_quality_trimmer_SOURCES = fastq_quality_trimmer.c 19 | 20 | fastq_quality_trimmer_LDADD = ../libfastx/libfastx.a $(LT_LDFLAGS) 21 | 22 | -------------------------------------------------------------------------------- /src/fastq_to_fasta/Makefile.am: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2008-2013 Assaf Gordon 2 | # 3 | # This file is free software; as a special exception the author gives 4 | # unlimited permission to copy and/or distribute it, with or without 5 | # modifications, as long as this notice is preserved. 6 | # 7 | # This program is distributed in the hope that it will be useful, but 8 | # WITHOUT ANY WARRANTY, to the extent permitted by law; without even the 9 | # implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 10 | 11 | 12 | bin_PROGRAMS = fastq_to_fasta 13 | 14 | AM_CPPFLAGS = \ 15 | $(CC_WARNINGS) \ 16 | -I$(top_srcdir)/src/libfastx 17 | 18 | fastq_to_fasta_SOURCES = fastq_to_fasta.c 19 | 20 | fastq_to_fasta_LDADD = ../libfastx/libfastx.a $(LT_LDFLAGS) 21 | 22 | -------------------------------------------------------------------------------- /src/fastq_to_fasta/fastq_to_fasta.c: -------------------------------------------------------------------------------- 1 | /* 2 | FASTX-toolkit - FASTA/FASTQ preprocessing tools. 3 | Copyright (C) 2009-2013 A. Gordon (assafgordon@gmail.com) 4 | 5 | This program is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU Affero General Public License as 7 | published by the Free Software Foundation, either version 3 of the 8 | License, or (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU Affero General Public License for more details. 14 | 15 | You should have received a copy of the GNU Affero General Public License 16 | along with this program. If not, see . 17 | */ 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | #include 27 | 28 | #include "fastx.h" 29 | #include "fastx_args.h" 30 | 31 | const char* usage= 32 | "usage: fastq_to_fasta [-h] [-r] [-n] [-v] [-z] [-i INFILE] [-o OUTFILE]\n" \ 33 | "Part of " PACKAGE_STRING " by A. Gordon (assafgordon@gmail.com)\n" \ 34 | "\n" \ 35 | " [-h] = This helpful help screen.\n" \ 36 | " [-r] = Rename sequence identifiers to numbers.\n" \ 37 | " [-n] = keep sequences with unknown (N) nucleotides.\n" \ 38 | " Default is to discard such sequences.\n" \ 39 | " [-v] = Verbose - report number of sequences.\n" \ 40 | " If [-o] is specified, report will be printed to STDOUT.\n" \ 41 | " If [-o] is not specified (and output goes to STDOUT),\n" \ 42 | " report will be printed to STDERR.\n" \ 43 | " [-z] = Compress output with GZIP.\n" \ 44 | " [-i INFILE] = FASTA/Q input file. default is STDIN.\n" \ 45 | " [-o OUTFILE] = FASTA output file. default is STDOUT.\n" \ 46 | "\n"; 47 | 48 | FASTX fastx; 49 | int flag_rename_seqid = 0; 50 | int flag_discard_N = 1 ; 51 | 52 | int parse_program_args(int __attribute__((unused)) optind, int optc, char __attribute__((unused)) *optarg) 53 | { 54 | switch(optc) { 55 | case 'n': 56 | flag_discard_N = 0 ; 57 | break; 58 | 59 | case 'r': 60 | flag_rename_seqid = 1; 61 | break; 62 | default: 63 | errx(1, __FILE__ ":%d: Unknown argument (%c)", __LINE__, optc ) ; 64 | } 65 | return 1; 66 | } 67 | 68 | 69 | int main(int argc, char* argv[]) 70 | { 71 | fastx_parse_cmdline(argc, argv, "rn", parse_program_args); 72 | 73 | fastx_init_reader(&fastx, get_input_filename(), 74 | FASTQ_ONLY, ALLOW_N, REQUIRE_UPPERCASE, 75 | get_fastq_ascii_quality_offset() ); 76 | 77 | fastx_init_writer(&fastx, get_output_filename(), OUTPUT_FASTA, compress_output_flag()); 78 | 79 | while ( fastx_read_next_record(&fastx) ) { 80 | //See if the input sequence contained 'N' nucleotides 81 | if ( flag_discard_N && (strchr(fastx.nucleotides,'N') != NULL)) 82 | continue; 83 | 84 | if ( flag_rename_seqid ) 85 | snprintf(fastx.name, sizeof(fastx.name), "%zu", num_output_reads(&fastx)+1) ; 86 | 87 | fastx_write_record(&fastx); 88 | } 89 | 90 | //Print verbose report 91 | if ( verbose_flag() ) { 92 | fprintf(get_report_file(), "Input: %zu reads.\n", num_input_reads(&fastx) ) ; 93 | fprintf(get_report_file(), "Output: %zu reads.\n", num_output_reads(&fastx) ) ; 94 | 95 | if ( flag_discard_N ) { 96 | size_t discarded = num_input_reads(&fastx) - num_output_reads(&fastx) ; 97 | fprintf(get_report_file(), "discarded %zu (%zu%%) low-quality reads.\n", 98 | discarded, (discarded*100)/( num_input_reads(&fastx) ) ) ; 99 | } 100 | } 101 | 102 | return 0; 103 | } 104 | -------------------------------------------------------------------------------- /src/fastx_artifacts_filter/Makefile.am: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2008-2013 Assaf Gordon 2 | # 3 | # This file is free software; as a special exception the author gives 4 | # unlimited permission to copy and/or distribute it, with or without 5 | # modifications, as long as this notice is preserved. 6 | # 7 | # This program is distributed in the hope that it will be useful, but 8 | # WITHOUT ANY WARRANTY, to the extent permitted by law; without even the 9 | # implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 10 | 11 | 12 | bin_PROGRAMS = fastx_artifacts_filter 13 | 14 | AM_CPPFLAGS = \ 15 | $(CC_WARNINGS) \ 16 | -I$(top_srcdir)/src/libfastx 17 | 18 | fastx_artifacts_filter_SOURCES = fastx_artifacts_filter.c 19 | 20 | fastx_artifacts_filter_LDADD = ../libfastx/libfastx.a $(LT_LDFLAGS) 21 | 22 | -------------------------------------------------------------------------------- /src/fastx_artifacts_filter/fastx_artifacts_filter.c: -------------------------------------------------------------------------------- 1 | /* 2 | FASTX-toolkit - FASTA/FASTQ preprocessing tools. 3 | Copyright (C) 2009-2013 A. Gordon (assafgordon@gmail.com) 4 | 5 | This program is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU Affero General Public License as 7 | published by the Free Software Foundation, either version 3 of the 8 | License, or (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU Affero General Public License for more details. 14 | 15 | You should have received a copy of the GNU Affero General Public License 16 | along with this program. If not, see . 17 | */ 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | #include 27 | 28 | #include "fastx.h" 29 | #include "fastx_args.h" 30 | 31 | #define MAX_ADAPTER_LEN 100 32 | 33 | const char* usage= 34 | "usage: fastx_artifacts_filter [-h] [-v] [-z] [-i INFILE] [-o OUTFILE]\n" \ 35 | "Part of " PACKAGE_STRING " by A. Gordon (assafgordon@gmail.com)\n" \ 36 | "\n" \ 37 | " [-h] = This helpful help screen.\n" \ 38 | " [-i INFILE] = FASTA/Q input file. default is STDIN.\n" \ 39 | " [-o OUTFILE] = FASTA/Q output file. default is STDOUT.\n" \ 40 | " [-z] = Compress output with GZIP.\n" \ 41 | " [-v] = Verbose - report number of processed reads.\n" \ 42 | " If [-o] is specified, report will be printed to STDOUT.\n" \ 43 | " If [-o] is not specified (and output goes to STDOUT),\n" \ 44 | " report will be printed to STDERR.\n" \ 45 | "\n"; 46 | 47 | #define DO_NOT_TRIM_LAST_BASE (0) 48 | 49 | FASTX fastx; 50 | 51 | int parse_commandline(int argc, char* argv[]) 52 | { 53 | return fastx_parse_cmdline(argc, argv, "", NULL); 54 | } 55 | 56 | int artifact_sequence(const FASTX *fastx) 57 | { 58 | int n_count=0; 59 | int a_count=0; 60 | int c_count=0; 61 | int t_count=0; 62 | int g_count=0; 63 | int total_count=0; 64 | 65 | int max_allowed_different_bases = 3 ; 66 | 67 | int i=0; 68 | 69 | while (1) { 70 | if (fastx->nucleotides[i]==0) 71 | break; 72 | 73 | total_count++; 74 | switch(fastx->nucleotides[i]) 75 | { 76 | case 'A': 77 | a_count++; 78 | break; 79 | case 'C': 80 | c_count++; 81 | break; 82 | case 'G': 83 | g_count++; 84 | break; 85 | case 'T': 86 | t_count++; 87 | break; 88 | case 'N': 89 | n_count++; 90 | break; 91 | default: 92 | errx(1, __FILE__":%d: invalid nucleotide value (%c) at position %d", 93 | __LINE__, fastx->nucleotides[i], i ) ; 94 | } 95 | i++; 96 | } 97 | 98 | //Rules for artifacts 99 | 100 | if ( a_count>=(total_count-max_allowed_different_bases) 101 | || 102 | c_count>=(total_count-max_allowed_different_bases) 103 | || 104 | g_count>=(total_count-max_allowed_different_bases) 105 | || 106 | t_count>=(total_count-max_allowed_different_bases) 107 | ) 108 | return 1; 109 | 110 | 111 | return 0; 112 | } 113 | 114 | int main(int argc, char* argv[]) 115 | { 116 | parse_commandline(argc, argv); 117 | 118 | fastx_init_reader(&fastx, get_input_filename(), 119 | FASTA_OR_FASTQ, ALLOW_N, REQUIRE_UPPERCASE, 120 | get_fastq_ascii_quality_offset() ); 121 | 122 | fastx_init_writer(&fastx, get_output_filename(), 123 | OUTPUT_SAME_AS_INPUT, compress_output_flag()); 124 | 125 | while ( fastx_read_next_record(&fastx) ) { 126 | 127 | if ( artifact_sequence(&fastx) ) { 128 | } else { 129 | fastx_write_record(&fastx); 130 | } 131 | } 132 | 133 | //Print verbose report 134 | if ( verbose_flag() ) { 135 | fprintf(get_report_file(), "Input: %zu reads.\n", num_input_reads(&fastx) ) ; 136 | fprintf(get_report_file(), "Output: %zu reads.\n", num_output_reads(&fastx) ) ; 137 | 138 | size_t discarded = num_input_reads(&fastx) - num_output_reads(&fastx) ; 139 | fprintf(get_report_file(), "discarded %zu (%zu%%) artifact reads.\n", 140 | discarded, (discarded*100)/( num_input_reads(&fastx) ) ) ; 141 | } 142 | 143 | return 0; 144 | } 145 | -------------------------------------------------------------------------------- /src/fastx_clipper/Makefile.am: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2008-2013 Assaf Gordon 2 | # 3 | # This file is free software; as a special exception the author gives 4 | # unlimited permission to copy and/or distribute it, with or without 5 | # modifications, as long as this notice is preserved. 6 | # 7 | # This program is distributed in the hope that it will be useful, but 8 | # WITHOUT ANY WARRANTY, to the extent permitted by law; without even the 9 | # implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 10 | 11 | 12 | bin_PROGRAMS = fastx_clipper 13 | 14 | AM_CPPFLAGS = \ 15 | $(CC_WARNINGS) \ 16 | -I$(top_srcdir)/src/libfastx 17 | 18 | fastx_clipper_SOURCES = fastx_clipper.cpp 19 | 20 | fastx_clipper_LDADD = ../libfastx/libfastx.a $(LT_LDFLAGS) 21 | 22 | -------------------------------------------------------------------------------- /src/fastx_collapser/Makefile.am: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2008-2013 Assaf Gordon 2 | # 3 | # This file is free software; as a special exception the author gives 4 | # unlimited permission to copy and/or distribute it, with or without 5 | # modifications, as long as this notice is preserved. 6 | # 7 | # This program is distributed in the hope that it will be useful, but 8 | # WITHOUT ANY WARRANTY, to the extent permitted by law; without even the 9 | # implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 10 | 11 | 12 | bin_PROGRAMS = fastx_collapser 13 | 14 | AM_CPPFLAGS = \ 15 | $(CC_WARNINGS) \ 16 | -I$(top_srcdir)/src/libfastx 17 | 18 | fastx_collapser_SOURCES = fastx_collapser.cpp \ 19 | std_hash.h 20 | 21 | fastx_collapser_LDADD = ../libfastx/libfastx.a $(LT_LDFLAGS) 22 | 23 | -------------------------------------------------------------------------------- /src/fastx_collapser/std_hash.h: -------------------------------------------------------------------------------- 1 | /* 2 | FASTX-toolkit - FASTA/FASTQ preprocessing tools. 3 | Copyright (C) 2009-2013 A. Gordon (assafgordon@gmail.com) 4 | 5 | This program is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU Affero General Public License as 7 | published by the Free Software Foundation, either version 3 of the 8 | License, or (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU Affero General Public License for more details. 14 | 15 | You should have received a copy of the GNU Affero General Public License 16 | along with this program. If not, see . 17 | */ 18 | #ifndef __STD_HASH__ 19 | #define __STD_HASH__ 20 | 21 | 22 | /* 23 | * Centralized place to load std::hash_map 24 | * 25 | * GCC needs the following hacks... 26 | * Other compilers/systems might require different hacks 27 | */ 28 | 29 | #include 30 | #include 31 | 32 | namespace std 33 | { 34 | using namespace __gnu_cxx; 35 | 36 | struct std_string_hash 37 | { 38 | size_t operator()( const std::string& x ) const 39 | { 40 | //printf("std_string_hash: hashing '%s'\n", x.c_str()); 41 | return hash< const char* >()( x.c_str() ); 42 | } 43 | }; 44 | 45 | /* 46 | * 'eqstr' and 'hash_map' usage is based on http://www.sgi.com/tech/stl/hash_map.html 47 | */ 48 | struct eqstr 49 | { 50 | bool operator()(const char* s1, const char* s2) const 51 | { 52 | return strcmp(s1, s2) == 0; 53 | } 54 | }; 55 | 56 | typedef hash_map< const char*, int, hash< const char* >, eqstr > hash_map_charptr_to_int; 57 | 58 | typedef hash_map< string, int, std_string_hash > hash_map_string_to_int; 59 | 60 | typedef hash_set < string, std_string_hash > hash_set_string ; 61 | } 62 | 63 | #endif 64 | 65 | -------------------------------------------------------------------------------- /src/fastx_quality_stats/Makefile.am: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2008-2013 Assaf Gordon 2 | # 3 | # This file is free software; as a special exception the author gives 4 | # unlimited permission to copy and/or distribute it, with or without 5 | # modifications, as long as this notice is preserved. 6 | # 7 | # This program is distributed in the hope that it will be useful, but 8 | # WITHOUT ANY WARRANTY, to the extent permitted by law; without even the 9 | # implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 10 | 11 | 12 | bin_PROGRAMS = fastx_quality_stats 13 | 14 | AM_CPPFLAGS = \ 15 | $(CC_WARNINGS) \ 16 | -I$(top_srcdir)/src/libfastx 17 | 18 | fastx_quality_stats_SOURCES = fastx_quality_stats.c 19 | 20 | fastx_quality_stats_LDADD = ../libfastx/libfastx.a $(LT_LDFLAGS) 21 | 22 | -------------------------------------------------------------------------------- /src/fastx_renamer/Makefile.am: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2008-2013 Assaf Gordon 2 | # 3 | # This file is free software; as a special exception the author gives 4 | # unlimited permission to copy and/or distribute it, with or without 5 | # modifications, as long as this notice is preserved. 6 | # 7 | # This program is distributed in the hope that it will be useful, but 8 | # WITHOUT ANY WARRANTY, to the extent permitted by law; without even the 9 | # implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 10 | 11 | 12 | bin_PROGRAMS = fastx_renamer 13 | 14 | AM_CPPFLAGS = \ 15 | $(CC_WARNINGS) \ 16 | -I$(top_srcdir)/src/libfastx 17 | 18 | fastx_renamer_SOURCES = fastx_renamer.c 19 | 20 | fastx_renamer_LDADD = ../libfastx/libfastx.a $(LT_LDFLAGS) 21 | 22 | -------------------------------------------------------------------------------- /src/fastx_renamer/fastx_renamer.c: -------------------------------------------------------------------------------- 1 | /* 2 | FASTX-toolkit - FASTA/FASTQ preprocessing tools. 3 | Copyright (C) 2009-2013 A. Gordon (assafgordon@gmail.com) 4 | 5 | This program is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU Affero General Public License as 7 | published by the Free Software Foundation, either version 3 of the 8 | License, or (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU Affero General Public License for more details. 14 | 15 | You should have received a copy of the GNU Affero General Public License 16 | along with this program. If not, see . 17 | */ 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | #include 27 | 28 | #include "fastx.h" 29 | #include "fastx_args.h" 30 | 31 | const char* usage= 32 | "usage: fastx_renamer [-n TYPE] [-h] [-z] [-v] [-i INFILE] [-o OUTFILE]\n" \ 33 | "Part of " PACKAGE_STRING " by A. Gordon (assafgordon@gmail.com)\n" \ 34 | "\n" \ 35 | " [-n TYPE] = rename type:\n" \ 36 | " SEQ - use the nucleotides sequence as the name.\n" \ 37 | " COUNT - use simply counter as the name.\n" \ 38 | " [-h] = This helpful help screen.\n" \ 39 | " [-z] = Compress output with GZIP.\n" \ 40 | " [-i INFILE] = FASTA/Q input file. default is STDIN.\n" \ 41 | " [-o OUTFILE] = FASTA/Q output file. default is STDOUT.\n" \ 42 | "\n"; 43 | 44 | enum RENAME_TYPE { 45 | SEQ, 46 | COUNT 47 | }; 48 | enum RENAME_TYPE rename_type; 49 | unsigned int counter = 1 ; 50 | FASTX fastx; 51 | 52 | int parse_program_args(int __attribute__((unused)) optind, int optc, char* optarg) 53 | { 54 | switch(optc) { 55 | case 'n': 56 | if (optarg==NULL) 57 | errx(1, "[-n] parameter requires an argument value"); 58 | if (strncmp(optarg,"SEQ",3)==0) { 59 | rename_type = SEQ ; 60 | } 61 | else 62 | if (strncmp(optarg,"COUNT",5)==0) { 63 | rename_type = COUNT ; 64 | counter = 1 ; 65 | } 66 | else 67 | errx(1,"Uknown rename type [-n]: '%s'", optarg); 68 | break; 69 | 70 | default: 71 | errx(1, __FILE__ ":%d: Unknown argument (%c)", __LINE__, optc ) ; 72 | 73 | } 74 | return 1; 75 | } 76 | 77 | 78 | int main(int argc, char* argv[]) 79 | { 80 | fastx_parse_cmdline(argc, argv, "n:", parse_program_args); 81 | 82 | fastx_init_reader(&fastx, get_input_filename(), 83 | FASTA_OR_FASTQ, ALLOW_N, REQUIRE_UPPERCASE, get_fastq_ascii_quality_offset() ); 84 | 85 | fastx_init_writer(&fastx, get_output_filename(), OUTPUT_SAME_AS_INPUT, compress_output_flag()); 86 | 87 | while ( fastx_read_next_record(&fastx) ) { 88 | 89 | switch(rename_type) 90 | { 91 | case SEQ: 92 | strncpy(fastx.name, fastx.nucleotides, sizeof(fastx.name)); 93 | strncpy(fastx.name2, fastx.nucleotides, sizeof(fastx.name2)); 94 | break; 95 | case COUNT: 96 | snprintf(fastx.name, sizeof(fastx.name),"%u",counter); 97 | strncpy(fastx.name2, fastx.name, sizeof(fastx.name2)); 98 | counter++; 99 | break; 100 | default: 101 | errx(1,"Internal error: rename_type = %d", (int)rename_type); 102 | } 103 | 104 | fastx_write_record(&fastx); 105 | } 106 | 107 | if ( verbose_flag() ) { 108 | fprintf(get_report_file(), "Renamed: %zu reads.\n", num_input_reads(&fastx) ) ; 109 | } 110 | return 0; 111 | } 112 | -------------------------------------------------------------------------------- /src/fastx_reverse_complement/Makefile.am: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2008-2013 Assaf Gordon 2 | # 3 | # This file is free software; as a special exception the author gives 4 | # unlimited permission to copy and/or distribute it, with or without 5 | # modifications, as long as this notice is preserved. 6 | # 7 | # This program is distributed in the hope that it will be useful, but 8 | # WITHOUT ANY WARRANTY, to the extent permitted by law; without even the 9 | # implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 10 | 11 | 12 | bin_PROGRAMS = fastx_reverse_complement 13 | 14 | AM_CPPFLAGS = \ 15 | $(CC_WARNINGS) \ 16 | -I$(top_srcdir)/src/libfastx 17 | 18 | fastx_reverse_complement_SOURCES = fastx_reverse_complement.c 19 | 20 | fastx_reverse_complement_LDADD = ../libfastx/libfastx.a $(LT_LDFLAGS) 21 | 22 | -------------------------------------------------------------------------------- /src/fastx_reverse_complement/fastx_reverse_complement.c: -------------------------------------------------------------------------------- 1 | /* 2 | FASTX-toolkit - FASTA/FASTQ preprocessing tools. 3 | Copyright (C) 2009-2013 A. Gordon (assafgordon@gmail.com) 4 | 5 | This program is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU Affero General Public License as 7 | published by the Free Software Foundation, either version 3 of the 8 | License, or (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU Affero General Public License for more details. 14 | 15 | You should have received a copy of the GNU Affero General Public License 16 | along with this program. If not, see . 17 | */ 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | #include 27 | 28 | #include "fastx.h" 29 | #include "fastx_args.h" 30 | 31 | const char* usage= 32 | "usage: fastx_reverse_complement [-h] [-r] [-z] [-v] [-i INFILE] [-o OUTFILE]\n" \ 33 | "Part of " PACKAGE_STRING " by A. Gordon (assafgordon@gmail.com)\n" \ 34 | "\n" \ 35 | " [-h] = This helpful help screen.\n" \ 36 | " [-z] = Compress output with GZIP.\n" \ 37 | " [-i INFILE] = FASTA/Q input file. default is STDIN.\n" \ 38 | " [-o OUTFILE] = FASTA/Q output file. default is STDOUT.\n" \ 39 | "\n"; 40 | 41 | FASTX fastx; 42 | 43 | char reverse_complement_base ( const char input ) 44 | { 45 | switch(input) 46 | { 47 | case 'N': 48 | return 'N'; 49 | case 'n': 50 | return 'n'; 51 | case 'A': 52 | return 'T'; 53 | case 'T': 54 | return 'A'; 55 | case 'G': 56 | return 'C'; 57 | case 'C': 58 | return 'G'; 59 | case 'a': 60 | return 't'; 61 | case 't': 62 | return 'a'; 63 | case 'g': 64 | return 'c'; 65 | case 'c': 66 | return 'g'; 67 | default: 68 | errx(1,"Invalid nucleotide value (%c) in reverse_complement_base()", input ); 69 | } 70 | 71 | return '0'; //should not get here - just to please the compiler 72 | } 73 | 74 | void reverse_complement_fastx(FASTX* pFASTX) 75 | { 76 | int i,j ; 77 | int length = strlen(pFASTX->nucleotides); 78 | 79 | char temp_nuc; 80 | int temp_qual; 81 | 82 | for (i=0;inucleotides[i] = reverse_complement_base ( pFASTX->nucleotides[i] ) ; 84 | 85 | i = 0 ; 86 | j = length - 1 ; 87 | while ( i < j ) { 88 | //Swap the nucleotides 89 | temp_nuc = pFASTX->nucleotides[i] ; 90 | pFASTX->nucleotides[i] = pFASTX->nucleotides[j] ; 91 | pFASTX->nucleotides[j] = temp_nuc; 92 | 93 | //Swap the quality scores 94 | if (pFASTX->read_fastq) { 95 | temp_qual = pFASTX->quality[i]; 96 | pFASTX->quality[i] = pFASTX->quality[j]; 97 | pFASTX->quality[j] = temp_qual ; 98 | } 99 | 100 | //Advance to next position 101 | i++; 102 | j--; 103 | } 104 | } 105 | 106 | 107 | int main(int argc, char* argv[]) 108 | { 109 | fastx_parse_cmdline(argc, argv, "", NULL); 110 | 111 | fastx_init_reader(&fastx, get_input_filename(), 112 | FASTA_OR_FASTQ, ALLOW_N, REQUIRE_UPPERCASE, 113 | get_fastq_ascii_quality_offset() ); 114 | 115 | fastx_init_writer(&fastx, get_output_filename(), OUTPUT_SAME_AS_INPUT, compress_output_flag()); 116 | 117 | while ( fastx_read_next_record(&fastx) ) { 118 | reverse_complement_fastx(&fastx); 119 | fastx_write_record(&fastx); 120 | } 121 | 122 | if ( verbose_flag() ) { 123 | fprintf(get_report_file(), "Printing Reverse-Complement Sequences.\n" ); 124 | fprintf(get_report_file(), "Input: %zu reads.\n", num_input_reads(&fastx) ) ; 125 | fprintf(get_report_file(), "Output: %zu reads.\n", num_output_reads(&fastx) ) ; 126 | } 127 | return 0; 128 | } 129 | -------------------------------------------------------------------------------- /src/fastx_trimmer/Makefile.am: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2008-2013 Assaf Gordon 2 | # 3 | # This file is free software; as a special exception the author gives 4 | # unlimited permission to copy and/or distribute it, with or without 5 | # modifications, as long as this notice is preserved. 6 | # 7 | # This program is distributed in the hope that it will be useful, but 8 | # WITHOUT ANY WARRANTY, to the extent permitted by law; without even the 9 | # implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 10 | 11 | 12 | bin_PROGRAMS = fastx_trimmer 13 | 14 | AM_CPPFLAGS = \ 15 | $(CC_WARNINGS) \ 16 | -I$(top_srcdir)/src/libfastx 17 | 18 | fastx_trimmer_SOURCES = fastx_trimmer.c 19 | 20 | fastx_trimmer_LDADD = ../libfastx/libfastx.a $(LT_LDFLAGS) 21 | 22 | -------------------------------------------------------------------------------- /src/fastx_uncollapser/Makefile.am: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2008-2013 Assaf Gordon 2 | # 3 | # This file is free software; as a special exception the author gives 4 | # unlimited permission to copy and/or distribute it, with or without 5 | # modifications, as long as this notice is preserved. 6 | # 7 | # This program is distributed in the hope that it will be useful, but 8 | # WITHOUT ANY WARRANTY, to the extent permitted by law; without even the 9 | # implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 10 | 11 | 12 | bin_PROGRAMS = fastx_uncollapser 13 | 14 | AM_CPPFLAGS = \ 15 | $(CC_WARNINGS) \ 16 | $(GTEXTUTILS_CFLAGS) \ 17 | -I$(top_srcdir)/src/libfastx 18 | 19 | LDADD = $(GTEXTUTILS_LIBS) ../libfastx/libfastx.a $(LT_LDFLAGS) 20 | 21 | fastx_uncollapser_SOURCES = fastx_uncollapser.cpp 22 | 23 | -------------------------------------------------------------------------------- /src/libfastx/Makefile.am: -------------------------------------------------------------------------------- 1 | 2 | noinst_LIBRARIES = libfastx.a 3 | 4 | libfastx_a_SOURCES = chomp.c chomp.h \ 5 | fastx.c fastx.h \ 6 | fastx_args.c fastx_args.h \ 7 | sequence_alignment.h sequence_alignment.cpp 8 | 9 | -------------------------------------------------------------------------------- /src/libfastx/chomp.c: -------------------------------------------------------------------------------- 1 | /* 2 | FASTX-toolkit - FASTA/FASTQ preprocessing tools. 3 | Copyright (C) 2009-2013 A. Gordon (assafgordon@gmail.com) 4 | 5 | This program is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU Affero General Public License as 7 | published by the Free Software Foundation, either version 3 of the 8 | License, or (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU Affero General Public License for more details. 14 | 15 | You should have received a copy of the GNU Affero General Public License 16 | along with this program. If not, see . 17 | */ 18 | #include "chomp.h" 19 | 20 | /* 21 | Chomp - 22 | Removes CR/LF from given string. 23 | 24 | Input - 25 | string - NULL terminated string. 26 | WILL BE MODIFIED! 27 | Output - 28 | None 29 | 30 | Remarks - 31 | The first CR (ASCII 13) or LF (ASCII 10) found in the string will be replaced with a NULL - 32 | Effectively chomping the string. 33 | */ 34 | void chomp(char *string) 35 | { 36 | while (*string != 0) { 37 | if (*string==13 || *string==10) { 38 | *string = 0 ; 39 | return; 40 | } 41 | string++; 42 | } 43 | return ; 44 | } 45 | 46 | 47 | -------------------------------------------------------------------------------- /src/libfastx/chomp.h: -------------------------------------------------------------------------------- 1 | /* 2 | FASTX-toolkit - FASTA/FASTQ preprocessing tools. 3 | Copyright (C) 2009-2013 A. Gordon (assafgordon@gmail.com) 4 | 5 | This program is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU Affero General Public License as 7 | published by the Free Software Foundation, either version 3 of the 8 | License, or (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU Affero General Public License for more details. 14 | 15 | You should have received a copy of the GNU Affero General Public License 16 | along with this program. If not, see . 17 | */ 18 | #ifndef __CHOMP_H__ 19 | #define __CHOMP_H__ 20 | 21 | void chomp(char *string); 22 | 23 | #endif 24 | 25 | -------------------------------------------------------------------------------- /src/libfastx/fastx_args.c: -------------------------------------------------------------------------------- 1 | /* 2 | FASTX-toolkit - FASTA/FASTQ preprocessing tools. 3 | Copyright (C) 2009-2013 A. Gordon (assafgordon@gmail.com) 4 | 5 | This program is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU Affero General Public License as 7 | published by the Free Software Foundation, either version 3 of the 8 | License, or (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU Affero General Public License for more details. 14 | 15 | You should have received a copy of the GNU Affero General Public License 16 | along with this program. If not, see . 17 | */ 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | #include "fastx_args.h" 27 | 28 | /* 29 | * Each program should specify its own usage string 30 | */ 31 | extern char* usage; 32 | 33 | 34 | /* 35 | * globals.. yuck 36 | * 37 | * some day this will be a stand alone class 38 | */ 39 | const char* input_filename = "-"; 40 | const char* output_filename = "-"; 41 | int verbose = 0; 42 | int compress_output = 0 ; 43 | int fastq_ascii_quality_offset = 33 ; 44 | FILE* report_file; 45 | 46 | int get_fastq_ascii_quality_offset() 47 | { 48 | return fastq_ascii_quality_offset; 49 | } 50 | 51 | const char* get_input_filename() 52 | { 53 | return input_filename; 54 | } 55 | 56 | const char* get_output_filename() 57 | { 58 | return output_filename; 59 | } 60 | 61 | int verbose_flag() 62 | { 63 | return verbose; 64 | } 65 | 66 | int compress_output_flag() 67 | { 68 | return compress_output ; 69 | } 70 | 71 | FILE* get_report_file() 72 | { 73 | return report_file; 74 | } 75 | 76 | int fastx_parse_cmdline( int argc, char* argv[], 77 | const char* program_options, 78 | parse_argument_func program_parse_args ) 79 | { 80 | int opt; 81 | 82 | char combined_options_string[100]; 83 | 84 | strcpy(combined_options_string, "Q:zhvi:o:"); 85 | strcat(combined_options_string, program_options); 86 | 87 | report_file = stderr ; //since the default output is STDOUT, the report goes by default to STDERR 88 | 89 | while ( (opt = getopt(argc, argv, combined_options_string) ) != -1 ) { 90 | 91 | // Parse the program's custom options 92 | if ( strchr(program_options, opt) != NULL ) { 93 | if (!program_parse_args(optind, opt, optarg)) 94 | return 0; 95 | continue; 96 | } 97 | 98 | //Parse the default options 99 | switch(opt) { 100 | case 'h': 101 | printf("%s", usage); 102 | exit(1); 103 | 104 | case 'v': 105 | verbose = 1 ; 106 | break ; 107 | 108 | case 'z': 109 | compress_output = 1 ; 110 | break ; 111 | 112 | 113 | case 'i': 114 | if (optarg==NULL) 115 | errx(1,"[-i] option requires FILENAME argument"); 116 | input_filename = optarg; 117 | break; 118 | 119 | case 'o': 120 | if (optarg==NULL) 121 | errx(1,"[-o] option requires FILENAME argument"); 122 | output_filename = optarg; 123 | 124 | //The user specified a specific output file, so the report can go to STDOUT 125 | report_file = stdout; 126 | break; 127 | 128 | case 'Q': 129 | if (optarg==NULL) 130 | errx(1,"[-Q] option requires VALUE argument"); 131 | fastq_ascii_quality_offset = atoi(optarg); 132 | break; 133 | 134 | default: 135 | printf("use '-h' for usage information.\n"); 136 | exit(1); 137 | break; 138 | 139 | } 140 | } 141 | 142 | return 1; 143 | } 144 | 145 | -------------------------------------------------------------------------------- /src/libfastx/fastx_args.h: -------------------------------------------------------------------------------- 1 | /* 2 | FASTX-toolkit - FASTA/FASTQ preprocessing tools. 3 | Copyright (C) 2009-2013 A. Gordon (assafgordon@gmail.com) 4 | 5 | This program is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU Affero General Public License as 7 | published by the Free Software Foundation, either version 3 of the 8 | License, or (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU Affero General Public License for more details. 14 | 15 | You should have received a copy of the GNU Affero General Public License 16 | along with this program. If not, see . 17 | */ 18 | #ifndef __FASTX_ARGS__ 19 | #define __FASTX_ARGS__ 20 | 21 | #ifdef __cplusplus 22 | extern "C" { 23 | #endif 24 | 25 | //One day this would all be OO :-) 26 | 27 | const char* get_input_filename(); 28 | const char* get_output_filename(); 29 | int verbose_flag(); 30 | int compress_output_flag(); 31 | int get_fastq_ascii_quality_offset(); 32 | FILE* get_report_file(); 33 | 34 | typedef int (*parse_argument_func)(int optind, int optc, char* optarg) ; 35 | 36 | int fastx_parse_cmdline( int argc, char* argv[], 37 | const char* program_options, 38 | parse_argument_func program_parse_arg ) ; 39 | 40 | 41 | #ifdef __cplusplus 42 | } 43 | #endif 44 | 45 | #endif 46 | 47 | -------------------------------------------------------------------------------- /src/seqalign_test/Makefile.am: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2008-2013 Assaf Gordon 2 | # 3 | # This file is free software; as a special exception the author gives 4 | # unlimited permission to copy and/or distribute it, with or without 5 | # modifications, as long as this notice is preserved. 6 | # 7 | # This program is distributed in the hope that it will be useful, but 8 | # WITHOUT ANY WARRANTY, to the extent permitted by law; without even the 9 | # implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 10 | 11 | 12 | noinst_PROGRAMS = seqalign_test 13 | 14 | AM_CPPFLAGS = \ 15 | $(CC_WARNINGS) \ 16 | -I$(top_srcdir)/src/libfastx 17 | 18 | seqalign_test_SOURCES = seqalign_test.cpp 19 | 20 | seqalign_test_LDADD = ../libfastx/libfastx.a 21 | 22 | -------------------------------------------------------------------------------- /src/seqalign_test/seqalign_test.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "sequence_alignment.h" 7 | 8 | 9 | int main( /*int argc, char* argv[] */) 10 | { 11 | HalfLocalSequenceAlignment lsa ; 12 | 13 | const SequenceAlignmentResults& results = lsa.align("AAAGGTTTCCC","AGGCTT" ); 14 | lsa.print_matrix(); 15 | results.print(); 16 | 17 | 18 | return 0; 19 | } 20 | --------------------------------------------------------------------------------