├── .flake8
├── .gitattributes
├── .github
├── styler.R
└── workflows
│ ├── ci.yaml
│ ├── pr.yaml
│ ├── pr_without_tool_change.yaml
│ └── slash.yaml
├── .gitignore
├── .tt_skip
├── CONTRIBUTING.md
├── README.rst
├── data_managers
├── README.rst
└── ncbi_blastdb
│ ├── README.rst
│ ├── blastdb.xml
│ ├── data_manager_conf.xml
│ ├── fetch_blast_db.py
│ └── tool_dependencies.xml
├── datatypes
├── README.rst
└── blast_datatypes
│ ├── .shed.yml
│ ├── README.rst
│ ├── blast.py
│ └── datatypes_conf.xml
├── packages
├── README.rst
├── package_blast_plus_2_2_26
│ ├── .shed.yml
│ └── tool_dependencies.xml
├── package_blast_plus_2_2_27
│ ├── .shed.yml
│ └── tool_dependencies.xml
├── package_blast_plus_2_2_28
│ ├── .shed.yml
│ └── tool_dependencies.xml
├── package_blast_plus_2_2_29
│ ├── .shed.yml
│ └── tool_dependencies.xml
├── package_blast_plus_2_2_30
│ ├── .shed.yml
│ └── tool_dependencies.xml
├── package_blast_plus_2_2_31
│ ├── .shed.yml
│ └── tool_dependencies.xml
├── package_blast_plus_2_3_0
│ ├── .shed.yml
│ └── tool_dependencies.xml
├── package_blast_plus_2_4_0
│ ├── .shed.yml
│ └── tool_dependencies.xml
├── package_blast_plus_2_5_0
│ ├── .shed.yml
│ └── tool_dependencies.xml
├── package_blast_plus_2_6_0
│ ├── .shed.yml
│ └── tool_dependencies.xml
├── package_blast_plus_2_7_0
│ ├── .shed.yml
│ └── tool_dependencies.xml
└── package_blast_plus_2_7_1
│ ├── .shed.yml
│ └── tool_dependencies.xml
├── test-data
├── README.rst
├── all_fasta.loc
├── blastdb.loc
├── blastdb_d.loc
├── blastdb_p.loc
├── blastn_arabidopsis.extended.tabular
├── blastn_arabidopsis.standard.tabular
├── blastn_arabidopsis.xml
├── blastn_chimera_vs_rhodopsin_db.tabular
├── blastn_chimera_vs_rhodopsin_db_max_hsps1.tabular
├── blastn_chimera_vs_three_human_and_rhodopsin_db.tabular
├── blastn_chimera_vs_three_human_db.tabular
├── blastn_chimera_vs_three_human_max1.tabular
├── blastn_chimera_vs_three_human_max1.txt
├── blastn_rhodopsin_vs_three_human.columns.tabular
├── blastn_rhodopsin_vs_three_human.tabular
├── blastn_rhodopsin_vs_three_human.xml
├── blastn_rhodopsin_vs_three_human_converted.tabular
├── blastp_four_human_vs_rhodopsin.tabular
├── blastp_four_human_vs_rhodopsin.xml
├── blastp_four_human_vs_rhodopsin_converted.tabular
├── blastp_four_human_vs_rhodopsin_converted_ext.tabular
├── blastp_four_human_vs_rhodopsin_ext.tabular
├── blastp_four_human_vs_rhodopsin_top3.tabular
├── blastp_four_human_vs_rhodopsin_top3_positive.tabular
├── blastp_human_vs_pdb_seg_no.xml
├── blastp_human_vs_pdb_seg_no_converted_ext.tabular
├── blastp_human_vs_pdb_seg_no_converted_std.tabular
├── blastp_rhodopsin_adv_vs_four_human.tabular
├── blastp_rhodopsin_peptides_vs_four_human.tabular
├── blastp_rhodopsin_vs_four_human.tabular
├── blastp_rhodopsin_vs_four_human_db.taxid.tabular
├── blastp_sample.blast2go.tabular
├── blastp_sample.xml
├── blastp_sample_converted.tabular
├── blastx_rhodopsin_adv_vs_four_human.tabular
├── blastx_rhodopsin_vs_four_human.tabular
├── blastx_rhodopsin_vs_four_human.xml
├── blastx_rhodopsin_vs_four_human_all.tabular
├── blastx_rhodopsin_vs_four_human_converted.tabular
├── blastx_rhodopsin_vs_four_human_converted_ext.tabular
├── blastx_rhodopsin_vs_four_human_ext.tabular
├── blastx_sample.xml
├── blastx_sample_converted.tabular
├── cd00003.smp
├── cd00003_and_cd00008.aux
├── cd00003_and_cd00008.freq
├── cd00003_and_cd00008.loo
├── cd00003_and_cd00008.phr
├── cd00003_and_cd00008.pin
├── cd00003_and_cd00008.psd
├── cd00003_and_cd00008.psi
├── cd00003_and_cd00008.psq
├── cd00003_and_cd00008.rps
├── cd00008.smp
├── chimera.fasta
├── chimera.fasta.gz
├── convert2blastmask_four_human_masked.maskinfo-asn1
├── convert2blastmask_four_human_masked.maskinfo-asn1-binary
├── deduplicate.nosortids.fasta
├── deduplicate.sortids.fasta
├── deltablast_four_human_vs_rhodopsin.tabular
├── deltablast_four_human_vs_rhodopsin.xml
├── deltablast_four_human_vs_rhodopsin_ext.tabular
├── deltablast_rhodopsin_vs_four_human.tabular
├── duplicates.fasta
├── duplicates.fasta.gz
├── duplicates.nr.fasta
├── dustmasker_three_human.fasta
├── dustmasker_three_human.maskinfo-asn1
├── dustmasker_three_human.maskinfo-asn1-binary
├── empty.fasta
├── empty_file.dat
├── est_out.json
├── four_human_proteins.dbinfo.txt
├── four_human_proteins.fasta
├── four_human_proteins.fasta.log.txt
├── four_human_proteins.fasta.phd
├── four_human_proteins.fasta.phi
├── four_human_proteins.fasta.phr
├── four_human_proteins.fasta.pin
├── four_human_proteins.fasta.pog
├── four_human_proteins.fasta.psd
├── four_human_proteins.fasta.psi
├── four_human_proteins.fasta.psq
├── four_human_proteins_masked.fasta
├── four_human_proteins_taxid.fasta.log.txt
├── four_human_proteins_taxid.fasta.phd
├── four_human_proteins_taxid.fasta.phi
├── four_human_proteins_taxid.fasta.phr
├── four_human_proteins_taxid.fasta.pin
├── four_human_proteins_taxid.fasta.pog
├── four_human_proteins_taxid.fasta.psd
├── four_human_proteins_taxid.fasta.psi
├── four_human_proteins_taxid.fasta.psq
├── k12_edited_proteins.fasta
├── k12_ten_proteins.fasta
├── makeprofiledb_input_cd00003.smp
├── makeprofiledb_input_cd00008.smp
├── more_duplicates.fasta
├── rbh_blastn_three_human_mRNA_vs_rhodopsin_nucs.tabular
├── rbh_blastp_four_human_vs_rhodopsin_proteins.tabular
├── rbh_blastp_k12.tabular
├── rbh_blastp_k12_self.tabular
├── rbh_megablast_rhodopsin_nucs_vs_three_human_mRNA.tabular
├── rbh_none.tabular
├── rbh_tblastx_rhodopsin_nucs_vs_three_human_mRNA.tabular
├── rhodopsin_bufo.fasta
├── rhodopsin_nucs.blastdbcmd.txt
├── rhodopsin_nucs.dbinfo.txt
├── rhodopsin_nucs.fasta
├── rhodopsin_nucs.fasta.gz
├── rhodopsin_nucs.fasta.log.txt
├── rhodopsin_nucs.fasta.nhd
├── rhodopsin_nucs.fasta.nhi
├── rhodopsin_nucs.fasta.nhr
├── rhodopsin_nucs.fasta.nin
├── rhodopsin_nucs.fasta.nnd
├── rhodopsin_nucs.fasta.nni
├── rhodopsin_nucs.fasta.nog
├── rhodopsin_nucs.fasta.nsd
├── rhodopsin_nucs.fasta.nsi
├── rhodopsin_nucs.fasta.nsq
├── rhodopsin_nucs.no_gi.fasta
├── rhodopsin_nucs.no_gi.region.fasta
├── rhodopsin_peptides.fasta
├── rhodopsin_proteins.fasta
├── segmasker_four_human.fasta
├── segmasker_four_human.maskinfo-asn1
├── segmasker_four_human.maskinfo-asn1-binary
├── tblastn_four_human_vs_rhodopsin.html
├── tblastn_four_human_vs_rhodopsin.tabular
├── tblastn_four_human_vs_rhodopsin.xml
├── tblastn_four_human_vs_rhodopsin_deflines.tabular
├── tblastn_four_human_vs_rhodopsin_ext.tabular
├── tblastx_rhodopsin_vs_three_human.tabular
├── three_human_mRNA.dbinfo.txt
├── three_human_mRNA.fasta
├── three_human_mRNA.fasta.gz
├── three_human_mRNA.fasta.log.txt
├── three_human_mRNA.fasta.nhd
├── three_human_mRNA.fasta.nhi
├── three_human_mRNA.fasta.nhr
├── three_human_mRNA.fasta.nin
├── three_human_mRNA.fasta.nog
├── three_human_mRNA.fasta.nsd
├── three_human_mRNA.fasta.nsi
├── three_human_mRNA.fasta.nsq
├── three_human_mRNA_and_rhodopsin_nucs.dbinfo.txt
└── tool_data_table_conf.xml.test
├── tool-data
├── README.rst
├── all_fasta.loc.sample
├── blast2go.loc.sample
├── blastdb.loc.sample
├── blastdb_d.loc.sample
├── blastdb_p.loc.sample
└── tool_data_table_conf.xml.sample
├── tools
├── README.rst
├── blast2go
│ ├── .shed.yml
│ ├── README.rst
│ ├── b2g_slim.py
│ ├── blast2go.py
│ ├── blast2go.xml
│ ├── go_categorize.py
│ ├── massage_xml_for_blast2go.py
│ └── test-data
├── blast_rbh
│ ├── .shed.yml
│ ├── README.rst
│ ├── best_hits.py
│ ├── blast_rbh.py
│ ├── blast_rbh.xml
│ ├── blast_rbh_report.py
│ ├── test-data
│ └── update_tests.sh
├── blastxml_to_top_descr
│ ├── .shed.yml
│ ├── README.rst
│ ├── blastxml_to_top_descr.py
│ ├── blastxml_to_top_descr.xml
│ └── test-data
├── make_nr
│ ├── .shed.yml
│ ├── README.rst
│ ├── make_nr.py
│ ├── make_nr.xml
│ └── test-data
├── ncbi_blast_plus
│ ├── .lint_skip
│ ├── .shed.yml
│ ├── README.rst
│ ├── blastxml_to_tabular.py
│ ├── blastxml_to_tabular.xml
│ ├── check_no_duplicates.py
│ ├── get_species_taxids.xml
│ ├── ncbi_blastdbcmd_info.xml
│ ├── ncbi_blastdbcmd_wrapper.xml
│ ├── ncbi_blastn_wrapper.xml
│ ├── ncbi_blastp_wrapper.xml
│ ├── ncbi_blastx_wrapper.xml
│ ├── ncbi_convert2blastmask_wrapper.xml
│ ├── ncbi_deltablast_wrapper.xml
│ ├── ncbi_dustmasker_wrapper.xml
│ ├── ncbi_macros.xml
│ ├── ncbi_makeblastdb.xml
│ ├── ncbi_makeprofiledb.xml
│ ├── ncbi_psiblast_wrapper.xml
│ ├── ncbi_rpsblast_wrapper.xml
│ ├── ncbi_rpstblastn_wrapper.xml
│ ├── ncbi_segmasker_wrapper.xml
│ ├── ncbi_tblastn_wrapper.xml
│ ├── ncbi_tblastx_wrapper.xml
│ ├── test-data
│ ├── tool-data
│ └── update_test_files.sh
└── reciprocal_best_hits
│ ├── README.rst
│ ├── reciprocal_best_hits.py
│ ├── reciprocal_best_hits.xml
│ └── test-data
└── workflows
├── README.rst
└── blast_top_hit_species
├── .shed.yml
├── N_abberans_piechart_mouseover.png
├── README.rst
├── blast_top_hit_species.ga
├── blast_top_hit_species.png
└── repository_dependencies.xml
/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | # Recommend matching the black default line length of 88,
3 | # rather than the flake8 default of 79:
4 | max-line-length = 88
5 | extend-ignore =
6 | # See https://github.com/PyCQA/pycodestyle/issues/373
7 | E203,
8 | # B902 blind except Exception: statement
9 | # For now willing to ignore this as testing assorted
10 | # operating systems for the right exception is hard:
11 | B902,
12 |
13 | # For flake8-import-order, pycharm is like smarkets but case sensitive
14 | import-order-style = pycharm
15 |
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Binary files (no line-ending conversions)
2 | #
3 | # Enable hexdump-diff by adding this to .git/config
4 | #
5 | # [diff "hex"]
6 | # textconv = hexdump -v -C
7 | # binary = true
8 | #
9 | *.pin binary diff=hex
10 | *.nin binary diff=hex
11 |
--------------------------------------------------------------------------------
/.github/styler.R:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env Rscript
2 |
3 | library("argparse")
4 | library("styler")
5 |
6 | parser <- ArgumentParser(description = "Call styler")
7 | parser$add_argument("dir",
8 | metavar = "DIR", type = "character",
9 | help = "File to parse"
10 | )
11 | parser$add_argument("--dry",
12 | choices = c("off", "on"), default = "on"
13 | )
14 | args <- parser$parse_args()
15 |
16 | file_info <- file.info(args$dir)
17 | is_directory <- file_info$isdir
18 |
19 | if (is_directory) {
20 | captured_output <- capture.output({
21 | result <- style_dir(args$dir, indent_by = 4, dry = args$dry, recursive = TRUE)
22 | })
23 | } else {
24 | captured_output <- capture.output({
25 | result <- style_file(args$dir, indent_by = 4, dry = args$dry)
26 | })
27 | }
28 |
29 | n <- nrow(subset(result, changed == TRUE))
30 | if (n > 0) {
31 | if (args$dry == "off") {
32 | print(paste("Changed", n, "files"))
33 | } else {
34 | stop(paste("Linting failed for", n, "files"))
35 | }
36 | }
37 |
--------------------------------------------------------------------------------
/.github/workflows/pr_without_tool_change.yaml:
--------------------------------------------------------------------------------
1 | name: Fallback
2 | # Fallback workflow that provides a succeeding "Check workflow success" job
3 | # as this is a requirement for being able to merge a PR
4 | # see https://docs.github.com/en/repositories/configuring-branches-and-merges-in-your-repository/defining-the-mergeability-of-pull-requests/troubleshooting-required-status-checks#handling-skipped-but-required-checks
5 | on:
6 | pull_request:
7 | concurrency:
8 | group: ${{ github.workflow }}-${{ github.ref }}
9 | cancel-in-progress: true
10 | jobs:
11 | determine-success:
12 | name: Check workflow success
13 | runs-on: ubuntu-latest
14 | steps:
15 | - run: 'echo "No tool tests required for this PR"'
16 |
--------------------------------------------------------------------------------
/.github/workflows/slash.yaml:
--------------------------------------------------------------------------------
1 | name: Slash Command Dispatch
2 | on:
3 | issue_comment:
4 | types: [created]
5 | jobs:
6 | slashCommandDispatch:
7 | runs-on: ubuntu-latest
8 | steps:
9 | - name: Slash Command Dispatch
10 | # workaround for checking availablity of secret https://github.com/actions/runner/issues/520
11 | env:
12 | PAT: ${{ secrets.PAT }}
13 | if: ${{ ( github.repository_owner == 'peterjc' ) && ( env.PAT != '' ) }}
14 | uses: peter-evans/slash-command-dispatch@v4
15 | with:
16 | token: ${{ secrets.PAT }}
17 | commands: |
18 | run-all-tool-tests
19 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | #Ignore backup files from some Unix editors,
2 | *~
3 | *.swp
4 | *.bak
5 | [#]*[#]
6 |
7 | #Ignore any tar-balls prepared to upload to Galaxy Tool Shed
8 | *.tar.gz
9 |
10 | #Ignore patches and any original files created by patch command
11 | *.diff
12 | *.patch
13 | *.orig
14 | *.rej
15 |
16 | #Ignore these hidden files from Mac OS X
17 | .DS_Store
18 |
19 | #Ignore hidden files from Dolphin window manager
20 | .directory
21 |
22 | #Ignore all compiled python files (e.g. from running the unit tests):
23 | *.pyc
24 | *.pyo
25 |
26 | #Ignore all Jython class files (present if using Jython)
27 | *.class
28 |
29 | #Ignore planemo test output
30 | tool_test_output.html
31 | tool_test_output.json
32 |
33 | #Ignore any NCBI BLAST taxonomy database present
34 | taxdb.btd
35 | taxdb.bti
36 |
37 | #Ignore any log files (e.g. from running makeblastdb etc)
38 | *.log
39 |
--------------------------------------------------------------------------------
/.tt_skip:
--------------------------------------------------------------------------------
1 | tools/reciprocal_best_hits/
2 | tools/blast2go/
3 | datatypes/
4 | workflows/
5 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing
2 |
3 | This document describes how to contribute to this repository. Pull
4 | requests containing bug fixes, updates, and extensions to the existing
5 | tools in this repository will be considered for inclusion.
6 |
7 | To maximize the likelihood your contribution will be accepted, it is a
8 | good practice to file an
9 | [issue](https://github.com/peterjc/galaxy_blast/issues) first and
10 | discuss potential solution before proceeding with development.
11 |
12 | ## How to Contribute
13 |
14 | * Make sure you have a [GitHub account](https://github.com/signup/free)
15 | * Make sure you have git [installed](https://help.github.com/articles/set-up-git)
16 | * Fork the repository on [GitHub](https://github.com/peterjc/galaxy_blast/fork)
17 | * Make the desired modifications - consider using a [feature branch](https://github.com/Kunena/Kunena-Forum/wiki/Create-a-new-branch-with-git-and-manage-branches).
18 | * Make sure you have added the necessary tests for your changes and they pass. See [TESTING](https://github.com/peterjc/galaxy_blast#testing) for more information.
19 | * Open a [pull request](https://help.github.com/articles/using-pull-requests) with these changes.
20 |
21 | ## Coding style
22 |
23 | Via the Travis continuous integration testing we enforce various style
24 | checks, including running ``flake8`` on the Python code with this set
25 | of plugins:
26 |
27 | ```
28 | $ pip install flake8 flake8-blind-except flake8-docstrings flake8-rst-docstrings
29 | ```
30 |
31 | Additionally, we have adopted the command line tool ``black`` for the
32 | Python coding style - must this must be installed under Python 3, try:
33 |
34 | ```
35 | $ pip install black
36 | ```
37 |
38 | Or:
39 |
40 | ```
41 | $ python3 -m pip install black
42 | ```
43 |
44 | If you are using Python 3, then we also recommand:
45 |
46 | ```
47 | $ pip install flake8-black
48 | ```
49 |
50 | The reStructuredText markup is tested with ``restructuredtext-lint``:
51 |
52 | ```
53 | $ pip install restructuredtext-lint
54 | ```
55 |
--------------------------------------------------------------------------------
/data_managers/README.rst:
--------------------------------------------------------------------------------
1 | Galaxy Data Manager definitions
2 | ===============================
3 |
4 | Each sub-folder represents a different entry on the Galaxy ToolShed,
5 | for example ``ncbi_blastdb`` contains a Data Manager for fetching
6 | the NCBI BLAST databases.
7 |
8 | For general information, see the `main README file <../README.rst>`_.
9 |
--------------------------------------------------------------------------------
/data_managers/ncbi_blastdb/README.rst:
--------------------------------------------------------------------------------
1 | Galaxy Data Manager for NCBI BLAST databases
2 | ============================================
3 |
4 | Copyright 2014 by Daniel Blankenberg (Penn State University, PA 16802, USA),
5 | and additional contributors. All rights reserved. See the licence text below.
6 |
7 | Downloads and populates blastdb data table. This is just a simple example to
8 | demonstrate the use of Data Managers for processing BLAST databases, and
9 | uses the NCBI's ``update_blast.pl`` script internally. See:
10 |
11 | Blankenberg et al. (2014) Wrangling Galaxy's reference data
12 | https://doi.org/10.1093/bioinformatics/btu119
13 |
14 | This tool is currently available from the Galaxy Test Tool Shed at:
15 | http://testtoolshed.g2.bx.psu.edu/view/blankenberg/data_manager_example_blastdb_ncbi_update_blastdb
16 |
17 |
18 | History
19 | =======
20 |
21 | ======= ======================================================================
22 | Version Changes
23 | ------- ----------------------------------------------------------------------
24 | v0.0.1 - Initial release as an example Data Manager on the Test ToolShed.
25 | - Depends on ``package_blast_plus_2_2_28`` in ToolShed.
26 | v0.0.2 - Development moved to GitHub, https://github.com/peterjc/galaxy_blast
27 | - Updated citation information (Blankenberg et al. 2014).
28 | - Adopted standard MIT License.
29 | - Now depends on ``package_blast_plus_2_2_30`` in ToolShed.
30 | v0.0.3 - Reorder XML elements (internal change only).
31 | - Python 3 compatible syntax.
32 | ======= ======================================================================
33 |
34 |
35 | Bug Reports
36 | ===========
37 |
38 | You can file an issue here https://github.com/peterjc/galaxy_blast/issues or ask
39 | us on the Galaxy development list http://lists.bx.psu.edu/listinfo/galaxy-dev
40 |
41 |
42 | Developers
43 | ==========
44 |
45 | This data manager was originally developed as an example to accompany the
46 | paper Blankenberg et al. (2014), and posted on the Galaxy Test Tool Shed at:
47 | http://testtoolshed.g2.bx.psu.edu/view/blankenberg/data_manager_example_blastdb_ncbi_update_blastdb
48 |
49 | As of April 2014, development is continuing within the Galaxy BLAST+ wrapper
50 | repository on GitHub: https://github.com/peterjc/galaxy_blast
51 |
52 |
53 | Licence (MIT)
54 | =============
55 |
56 | Permission is hereby granted, free of charge, to any person obtaining a copy
57 | of this software and associated documentation files (the "Software"), to deal
58 | in the Software without restriction, including without limitation the rights
59 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
60 | copies of the Software, and to permit persons to whom the Software is
61 | furnished to do so, subject to the following conditions:
62 |
63 | The above copyright notice and this permission notice shall be included in
64 | all copies or substantial portions of the Software.
65 |
66 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
67 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
68 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
69 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
70 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
71 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
72 | THE SOFTWARE.
73 |
--------------------------------------------------------------------------------
/data_managers/ncbi_blastdb/blastdb.xml:
--------------------------------------------------------------------------------
1 |
2 | Downloader
3 |
4 | blast+
5 | python
6 |
7 |
8 |
9 |
10 | fetch_blast_db.py --filename "${out_file}" --tool_data_table_name "blastdb"
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 | **What it does**
38 |
39 | Downloads Blast DBs and updates blastdb tool data tables.
40 |
41 | ------
42 |
43 |
44 | .. class:: infomark
45 |
46 | **Notice:** This is a functional, but basic, tool for fetching preformatted blastdbs.
47 |
48 |
49 | -------
50 |
51 | **References**
52 |
53 | If you use this Galaxy tool in work leading to a scientific publication please
54 | cite the following paper:
55 |
56 | Blankenberg et al. (2014) Wrangling Galaxy's reference data
57 | https://doi.org/10.1093/bioinformatics/btu119
58 |
59 |
60 |
61 | 10.1093/bioinformatics/btu119
62 |
63 |
64 |
--------------------------------------------------------------------------------
/data_managers/ncbi_blastdb/data_manager_conf.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
16 |
17 |
18 |
19 |
--------------------------------------------------------------------------------
/data_managers/ncbi_blastdb/fetch_blast_db.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # Dan Blankenberg
3 | """Script that calls update_blastdb.pl to download preformatted databases."""
4 |
5 | from __future__ import print_function
6 |
7 | import hashlib
8 | import optparse
9 | import os
10 | import subprocess
11 | import sys
12 |
13 | from galaxy.util.json import from_json_string, to_json_string
14 |
15 |
16 | if sys.version_info[0] >= 3:
17 | basestring = str
18 |
19 | DEFAULT_ALGORITHM = hashlib.sha512
20 | CHUNK_SIZE = 2**20 # 1mb
21 |
22 |
23 | def get_dir_hash(directory, algorithm=None, followlinks=True, chunk_size=None):
24 | """Get hash of directory contents."""
25 | chunk_size = chunk_size or CHUNK_SIZE
26 | algorithm = algorithm or DEFAULT_ALGORITHM
27 | if isinstance(algorithm, basestring):
28 | hash = hashlib.new(algorithm)
29 | else:
30 | hash = algorithm()
31 | # we hash a directory by taking names of directories, files and their
32 | # contents
33 | for dirpath, dirnames, filenames in os.walk(directory, followlinks=followlinks):
34 | dirnames.sort()
35 | filenames.sort()
36 | for name in dirnames:
37 | hash.update(os.path.relpath(os.path.join(dirpath, name), directory))
38 | for name in filenames:
39 | filename = os.path.join(dirpath, name)
40 | hash.update(os.path.relpath(filename, directory))
41 | fh = open(filename, "rb")
42 | while True:
43 | data = fh.read(chunk_size)
44 | if not data:
45 | break
46 | hash.update(data)
47 | fh.close()
48 |
49 | return hash.hexdigest()
50 |
51 |
52 | def main():
53 | """Parse and execute the arguments from the command line."""
54 | parser = optparse.OptionParser()
55 | parser.add_option(
56 | "-f",
57 | "--filename",
58 | dest="filename",
59 | action="store",
60 | type="string",
61 | default=None,
62 | help="filename",
63 | )
64 | parser.add_option(
65 | "-t",
66 | "--tool_data_table_name",
67 | dest="tool_data_table_name",
68 | action="store",
69 | type="string",
70 | default=None,
71 | help="tool_data_table_name",
72 | )
73 | (options, args) = parser.parse_args()
74 |
75 | params = from_json_string(open(options.filename).read())
76 | target_directory = params["output_data"][0]["extra_files_path"]
77 | os.mkdir(target_directory)
78 |
79 | blastdb_name = params["param_dict"]["blastdb_name"] # value
80 | data_description = params["param_dict"]["advanced"].get("data_description", None)
81 | data_id = params["param_dict"]["advanced"].get("data_id", None)
82 |
83 | cmd_options = ["--decompress"]
84 |
85 | args = ["update_blastdb.pl"] + cmd_options + [blastdb_name]
86 | proc = subprocess.Popen(args=args, shell=False, cwd=target_directory)
87 | return_code = proc.wait()
88 | if return_code != 1:
89 | sys.exit("Error obtaining blastdb (%s)" % return_code)
90 |
91 | if not data_id:
92 | data_id = "%s_%s" % (blastdb_name, get_dir_hash(target_directory))
93 |
94 | if not data_description:
95 | alias_date = None
96 | try:
97 | for line in open(os.path.join(target_directory, "%s.nal" % (blastdb_name))):
98 | if line.startswith("# Alias file created "):
99 | alias_date = line.split("# Alias file created ", 1)[1].strip()
100 | if line.startswith("TITLE"):
101 | data_description = line.split(None, 1)[1].strip()
102 | break
103 | except Exception as e:
104 | sys.stderr.write("Error Parsing Alias file for TITLE and date: %s\n" % e)
105 | if alias_date and data_description:
106 | data_description = "%s (%s)" % (data_description, alias_date)
107 |
108 | if not data_description:
109 | data_description = data_id
110 |
111 | data_table_entry = {
112 | "value": data_id,
113 | "name": data_description,
114 | "path": os.path.join(blastdb_name, data_id),
115 | "nucleotide_alias_name": blastdb_name,
116 | }
117 | data_manager_dict = {
118 | "data_tables": {options.tool_data_table_name: [data_table_entry]}
119 | }
120 |
121 | # save info to json file
122 | with open(options.filename, "wb") as fh:
123 | fh.write(to_json_string(data_manager_dict))
124 |
125 |
126 | if __name__ == "__main__":
127 | main()
128 |
--------------------------------------------------------------------------------
/data_managers/ncbi_blastdb/tool_dependencies.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/datatypes/README.rst:
--------------------------------------------------------------------------------
1 | Galaxy datatype definitions (OBSOLETE)
2 | ======================================
3 |
4 | For a time, the Galaxy community shared additional datatypes on the
5 | Galaxy Tool Shed. Since the October 2016 release of Galaxy, our NCBI
6 | BLAST XML and database datatypes returned to the Galaxy core.
7 |
8 | Each sub-folder represented a different entry on the Galaxy ToolShed,
9 | for example ``blast_datatypes`` contains definitions for BLAST specific
10 | file types such as BLAST XML and BLAST databases.
11 |
12 | For general information, see the `main README file <../README.rst>`_.
13 |
--------------------------------------------------------------------------------
/datatypes/blast_datatypes/.shed.yml:
--------------------------------------------------------------------------------
1 | name: blast_datatypes
2 | owner: devteam
3 | homepage_url: https://blast.ncbi.nlm.nih.gov/
4 | remote_repository_url: https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/
5 | description: Datatypes for NCBI BLAST (blastxml, databases, etc)
6 | long_description: |
7 | This is a repository for blastxml and other NCBI BLAST related datatypes
8 | such as BLAST databases.
9 | categories:
10 | - Sequence Analysis
11 | type: unrestricted
12 |
--------------------------------------------------------------------------------
/datatypes/blast_datatypes/datatypes_conf.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
--------------------------------------------------------------------------------
/packages/README.rst:
--------------------------------------------------------------------------------
1 | Galaxy dependency definitions
2 | =============================
3 |
4 | These packages are now obsolete as Galaxy has transitioned to using
5 | Conda and the BioConda channel for packaging tool dependencies.
6 | Specifically, we now use https://anaconda.org/bioconda/blast for the
7 | NCBI BLAST+ binaries.
8 |
9 | Each sub-folder represents a different entry on the Galaxy ToolShed, for
10 | example ``package_blast_plus_2_2_31`` defines the BLAST+ 2.2.31 dependency
11 | which can be used by other Galaxy ToolShed entries via the IUC owned
12 | https://toolshed.g2.bx.psu.edu/view/iuc/package_blast_plus_2_2_31
13 |
14 | Each folder has a (hidden) special file ``.shed.yml`` for use with the
15 | command line tool Planemo to help automate pushing updates to the Galaxy
16 | Tool Shed, e.g.
17 |
18 | $ planemo shed_update --shed_target testtoolshed --check_diff package_blast_plus_2_2_31
19 | ...
20 | $ planemo shed_update --shed_target toolshed --check_diff package_blast_plus_2_2_31
21 | ...
22 |
23 | For general information, see the `main README file <../README.rst>`_.
24 |
--------------------------------------------------------------------------------
/packages/package_blast_plus_2_2_26/.shed.yml:
--------------------------------------------------------------------------------
1 | name: package_blast_plus_2_2_26
2 | owner: iuc
3 | homepage_url: https://blast.ncbi.nlm.nih.gov/
4 | remote_repository_url: https://github.com/peterjc/galaxy_blast/tree/master/packages/package_blast_plus_2_2_26/
5 | description: NCBI BLAST+ 2.2.26 (binaries only)
6 | long_description: |
7 | This Tool Shed package is intended to be used as a dependency of the Galaxy wrappers
8 | for NCBI BLAST+ and any other tools which call the BLAST+ binaries internally.
9 | categories:
10 | - Tool Dependency Packages
11 | type: tool_dependency_definition
12 |
--------------------------------------------------------------------------------
/packages/package_blast_plus_2_2_27/.shed.yml:
--------------------------------------------------------------------------------
1 | name: package_blast_plus_2_2_27
2 | owner: iuc
3 | homepage_url: https://blast.ncbi.nlm.nih.gov/
4 | remote_repository_url: https://github.com/peterjc/galaxy_blast/tree/master/packages/package_blast_plus_2_2_27/
5 | description: NCBI BLAST+ 2.2.27 (binaries only)
6 | long_description: |
7 | This Tool Shed package is intended to be used as a dependency of the Galaxy wrappers
8 | for NCBI BLAST+ and any other tools which call the BLAST+ binaries internally.
9 | categories:
10 | - Tool Dependency Packages
11 | type: tool_dependency_definition
12 |
--------------------------------------------------------------------------------
/packages/package_blast_plus_2_2_28/.shed.yml:
--------------------------------------------------------------------------------
1 | name: package_blast_plus_2_2_28
2 | owner: iuc
3 | homepage_url: https://blast.ncbi.nlm.nih.gov/
4 | remote_repository_url: https://github.com/peterjc/galaxy_blast/tree/master/packages/package_blast_plus_2_2_28/
5 | description: NCBI BLAST+ 2.2.28 (binaries only)
6 | long_description: |
7 | This Tool Shed package is intended to be used as a dependency of the Galaxy wrappers
8 | for NCBI BLAST+ and any other tools which call the BLAST+ binaries internally.
9 | categories:
10 | - Tool Dependency Packages
11 | type: tool_dependency_definition
12 |
--------------------------------------------------------------------------------
/packages/package_blast_plus_2_2_29/.shed.yml:
--------------------------------------------------------------------------------
1 | name: package_blast_plus_2_2_29
2 | owner: iuc
3 | homepage_url: https://blast.ncbi.nlm.nih.gov/
4 | remote_repository_url: https://github.com/peterjc/galaxy_blast/tree/master/packages/package_blast_plus_2_2_29/
5 | description: NCBI BLAST+ 2.2.29 (binaries only)
6 | long_description: |
7 | This Tool Shed package is intended to be used as a dependency of the Galaxy wrappers
8 | for NCBI BLAST+ and any other tools which call the BLAST+ binaries internally.
9 | categories:
10 | - Tool Dependency Packages
11 | type: tool_dependency_definition
12 |
--------------------------------------------------------------------------------
/packages/package_blast_plus_2_2_30/.shed.yml:
--------------------------------------------------------------------------------
1 | name: package_blast_plus_2_2_30
2 | owner: iuc
3 | homepage_url: https://blast.ncbi.nlm.nih.gov/
4 | remote_repository_url: https://github.com/peterjc/galaxy_blast/tree/master/packages/package_blast_plus_2_2_30/
5 | description: NCBI BLAST+ 2.2.30 (binaries only)
6 | long_description: |
7 | This Tool Shed package is intended to be used as a dependency of the Galaxy wrappers
8 | for NCBI BLAST+ and any other tools which call the BLAST+ binaries internally.
9 | categories:
10 | - Tool Dependency Packages
11 | type: tool_dependency_definition
12 |
--------------------------------------------------------------------------------
/packages/package_blast_plus_2_2_31/.shed.yml:
--------------------------------------------------------------------------------
1 | name: package_blast_plus_2_2_31
2 | owner: iuc
3 | homepage_url: https://blast.ncbi.nlm.nih.gov/
4 | remote_repository_url: https://github.com/peterjc/galaxy_blast/tree/master/packages/package_blast_plus_2_2_31/
5 | description: NCBI BLAST+ 2.2.31 (binaries only)
6 | long_description: |
7 | This Tool Shed package is intended to be used as a dependency of the Galaxy wrappers
8 | for NCBI BLAST+ and any other tools which call the BLAST+ binaries internally.
9 | categories:
10 | - Tool Dependency Packages
11 | type: tool_dependency_definition
12 |
--------------------------------------------------------------------------------
/packages/package_blast_plus_2_2_31/tool_dependencies.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 | https://depot.galaxyproject.org/software/blast_plus/blast_plus_2.2.31_darwin_all.tar.gz
10 |
11 | bin
12 | $INSTALL_DIR
13 |
14 |
15 |
16 |
17 |
18 | https://depot.galaxyproject.org/software/blast_plus/blast_plus_2.2.31_darwin_all.tar.gz
19 |
20 | bin
21 | $INSTALL_DIR
22 |
23 |
24 |
25 |
26 |
27 | https://depot.galaxyproject.org/software/blast_plus/blast_plus_2.2.31_linux_x64.tar.gz
28 |
29 | bin
30 | $INSTALL_DIR
31 |
32 |
33 |
34 | echo "ERROR: Automated installation on your operating system and CPU architecture combination is not yet supported."
35 | echo "Your machine details (the output from 'uname' and 'arch'):"
36 | uname
37 | arch
38 | echo "Please report this via https://github.com/peterjc/galaxy_blast/issues - thank you!"
39 | false
40 |
41 |
42 |
43 |
44 | $INSTALL_DIR
45 | $INSTALL_DIR
46 |
47 |
48 |
49 |
50 | Downloads the precompiled 64 bit Linux, or Mac OS X BLAST+ binaries from the NCBI,
51 | which is faster than performing a local compilation, avoids any issues with build
52 | dependencies, and is more reproducible between installations as there is no
53 | variability from the compiler or library versions.
54 |
55 | Note that as of BLAST+ 2.2.31 the NCBI do not provide 32 bit Linux binaries.
56 |
57 | For more details, see:
58 | http://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Web&PAGE_TYPE=BlastDocs&DOC_TYPE=Download
59 |
60 |
61 |
62 |
--------------------------------------------------------------------------------
/packages/package_blast_plus_2_3_0/.shed.yml:
--------------------------------------------------------------------------------
1 | name: package_blast_plus_2_3_0
2 | owner: iuc
3 | homepage_url: https://blast.ncbi.nlm.nih.gov/
4 | remote_repository_url: https://github.com/peterjc/galaxy_blast/tree/master/packages/package_blast_plus_2_3_0/
5 | description: NCBI BLAST+ 2.3.0 (binaries only)
6 | long_description: |
7 | This Tool Shed package is intended to be used as a dependency of the Galaxy wrappers
8 | for NCBI BLAST+ and any other tools which call the BLAST+ binaries internally.
9 | categories:
10 | - Tool Dependency Packages
11 | type: tool_dependency_definition
12 |
--------------------------------------------------------------------------------
/packages/package_blast_plus_2_3_0/tool_dependencies.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 | https://depot.galaxyproject.org/software/blast_plus/blast_plus_2.3.0_darwin_all.tar.gz
10 |
11 | bin
12 | $INSTALL_DIR
13 |
14 |
15 |
16 |
17 |
18 | https://depot.galaxyproject.org/software/blast_plus/blast_plus_2.3.0_darwin_all.tar.gz
19 |
20 | bin
21 | $INSTALL_DIR
22 |
23 |
24 |
25 |
26 |
27 | https://depot.galaxyproject.org/software/blast_plus/blast_plus_2.3.0_linux_x64.tar.gz
28 |
29 | bin
30 | $INSTALL_DIR
31 |
32 |
33 |
34 | echo "ERROR: Automated installation on your operating system and CPU architecture combination is not yet supported."
35 | echo "Your machine details (the output from 'uname' and 'arch'):"
36 | uname
37 | arch
38 | echo "Please report this via https://github.com/peterjc/galaxy_blast/issues - thank you!"
39 | false
40 |
41 |
42 |
43 |
44 | $INSTALL_DIR
45 | $INSTALL_DIR
46 |
47 |
48 |
49 |
50 | Downloads the precompiled 64 bit Linux, or Mac OS X BLAST+ binaries from the NCBI,
51 | which is faster than performing a local compilation, avoids any issues with build
52 | dependencies, and is more reproducible between installations as there is no
53 | variability from the compiler or library versions.
54 |
55 | Note that NCBI do not provide 32 bit Linux binaries anymore.
56 |
57 | For more details, see:
58 | http://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Web&PAGE_TYPE=BlastDocs&DOC_TYPE=Download
59 |
60 |
61 |
62 |
--------------------------------------------------------------------------------
/packages/package_blast_plus_2_4_0/.shed.yml:
--------------------------------------------------------------------------------
1 | name: package_blast_plus_2_4_0
2 | owner: iuc
3 | homepage_url: https://blast.ncbi.nlm.nih.gov/
4 | remote_repository_url: https://github.com/peterjc/galaxy_blast/tree/master/packages/package_blast_plus_2_4_0/
5 | description: NCBI BLAST+ 2.4.0 (binaries only)
6 | long_description: |
7 | This Tool Shed package is intended to be used as a dependency of the Galaxy wrappers
8 | for NCBI BLAST+ and any other tools which call the BLAST+ binaries internally.
9 | categories:
10 | - Tool Dependency Packages
11 | type: tool_dependency_definition
12 |
--------------------------------------------------------------------------------
/packages/package_blast_plus_2_4_0/tool_dependencies.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 | https://depot.galaxyproject.org/software/blast_plus/blast_plus_2.4.0_darwin_all.tar.gz
10 |
11 | bin
12 | $INSTALL_DIR
13 |
14 |
15 |
16 |
17 |
18 | https://depot.galaxyproject.org/software/blast_plus/blast_plus_2.4.0_darwin_all.tar.gz
19 |
20 | bin
21 | $INSTALL_DIR
22 |
23 |
24 |
25 |
26 |
27 | https://depot.galaxyproject.org/software/blast_plus/blast_plus_2.4.0_linux_x64.tar.gz
28 |
29 | bin
30 | $INSTALL_DIR
31 |
32 |
33 |
34 | echo "ERROR: Automated installation on your operating system and CPU architecture combination is not yet supported."
35 | echo "Your machine details (the output from 'uname' and 'arch'):"
36 | uname
37 | arch
38 | echo "Please report this via https://github.com/peterjc/galaxy_blast/issues - thank you!"
39 | false
40 |
41 |
42 |
43 |
44 | $INSTALL_DIR
45 | $INSTALL_DIR
46 |
47 |
48 |
49 |
50 | Downloads the precompiled 64 bit Linux, or Mac OS X BLAST+ binaries from the NCBI,
51 | which is faster than performing a local compilation, avoids any issues with build
52 | dependencies, and is more reproducible between installations as there is no
53 | variability from the compiler or library versions.
54 |
55 | Note that NCBI do not provide 32 bit Linux binaries anymore.
56 |
57 | For more details, see:
58 | http://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Web&PAGE_TYPE=BlastDocs&DOC_TYPE=Download
59 |
60 |
61 |
62 |
--------------------------------------------------------------------------------
/packages/package_blast_plus_2_5_0/.shed.yml:
--------------------------------------------------------------------------------
1 | name: package_blast_plus_2_5_0
2 | owner: iuc
3 | homepage_url: https://blast.ncbi.nlm.nih.gov/
4 | remote_repository_url: https://github.com/peterjc/galaxy_blast/tree/master/packages/package_blast_plus_2_5_0/
5 | description: NCBI BLAST+ 2.5.0 (binaries only)
6 | long_description: |
7 | This Tool Shed package is intended to be used as a dependency
8 | of the Galaxy wrappers for NCBI BLAST+ and any other tools which
9 | call the BLAST+ binaries internally.
10 |
11 | Note that for compatibility with BioConda, internally this is now
12 | called "blast" rather than "blast+" as in the older Galaxy BLAST+
13 | packages.
14 | categories:
15 | - Tool Dependency Packages
16 | type: tool_dependency_definition
17 |
--------------------------------------------------------------------------------
/packages/package_blast_plus_2_5_0/tool_dependencies.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 | https://depot.galaxyproject.org/software/blast_plus/blast_plus_2.5.0_darwin_x64.tar.gz
10 |
11 | bin
12 | $INSTALL_DIR
13 |
14 |
15 |
16 |
17 |
18 | https://depot.galaxyproject.org/software/blast_plus/blast_plus_2.5.0_linux_x64.tar.gz
19 |
20 | bin
21 | $INSTALL_DIR
22 |
23 |
24 |
25 | echo "ERROR: Automated installation on your operating system and CPU architecture combination is not supported."
26 | echo "Your machine details (the output from 'uname' and 'arch'):"
27 | uname
28 | arch
29 | echo "Please report this via https://github.com/peterjc/galaxy_blast/issues - thank you!"
30 | false
31 |
32 |
33 |
34 |
35 | $INSTALL_DIR
36 | $INSTALL_DIR
37 |
38 |
39 |
40 |
41 | Downloads the precompiled 64 bit Linux, or Mac OS X BLAST+ binaries from the NCBI,
42 | which is faster than performing a local compilation, avoids any issues with build
43 | dependencies, and is more reproducible between installations as there is no
44 | variability from the compiler or library versions.
45 |
46 | Note that NCBI do not provide any 32 bit binaries anymore.
47 |
48 | For more details, see:
49 | http://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Web&PAGE_TYPE=BlastDocs&DOC_TYPE=Download
50 |
51 |
52 |
53 |
--------------------------------------------------------------------------------
/packages/package_blast_plus_2_6_0/.shed.yml:
--------------------------------------------------------------------------------
1 | name: package_blast_plus_2_6_0
2 | owner: iuc
3 | homepage_url: https://blast.ncbi.nlm.nih.gov/
4 | remote_repository_url: https://github.com/peterjc/galaxy_blast/tree/master/packages/package_blast_plus_2_6_0/
5 | description: NCBI BLAST+ 2.6.0 (binaries only)
6 | long_description: |
7 | This Tool Shed package is intended to be used as a dependency
8 | of the Galaxy wrappers for NCBI BLAST+ and any other tools which
9 | call the BLAST+ binaries internally.
10 |
11 | Note that for compatibility with BioConda, internally this is now
12 | called "blast" rather than "blast+" as in the older Galaxy BLAST+
13 | packages.
14 | categories:
15 | - Tool Dependency Packages
16 | type: tool_dependency_definition
17 |
--------------------------------------------------------------------------------
/packages/package_blast_plus_2_6_0/tool_dependencies.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 | https://depot.galaxyproject.org/software/blast_plus/blast_plus_2.6.0_darwin_x64.tar.gz
10 |
11 | bin
12 | $INSTALL_DIR
13 |
14 |
15 |
16 |
17 |
18 | https://depot.galaxyproject.org/software/blast_plus/blast_plus_2.6.0_linux_x64.tar.gz
19 |
20 | bin
21 | $INSTALL_DIR
22 |
23 |
24 |
25 | echo "ERROR: Automated installation on your operating system and CPU architecture combination is not supported."
26 | echo "Your machine details (the output from 'uname' and 'arch'):"
27 | uname
28 | arch
29 | echo "Please report this via https://github.com/peterjc/galaxy_blast/issues - thank you!"
30 | false
31 |
32 |
33 |
34 |
35 | $INSTALL_DIR
36 | $INSTALL_DIR
37 |
38 |
39 |
40 |
41 | Downloads the precompiled 64 bit Linux, or Mac OS X BLAST+ binaries from the NCBI,
42 | which is faster than performing a local compilation, avoids any issues with build
43 | dependencies, and is more reproducible between installations as there is no
44 | variability from the compiler or library versions.
45 |
46 | Note that NCBI do not provide any 32 bit binaries anymore.
47 |
48 | For more details, see:
49 | http://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Web&PAGE_TYPE=BlastDocs&DOC_TYPE=Download
50 |
51 |
52 |
53 |
--------------------------------------------------------------------------------
/packages/package_blast_plus_2_7_0/.shed.yml:
--------------------------------------------------------------------------------
1 | name: package_blast_plus_2_7_0
2 | owner: iuc
3 | homepage_url: https://blast.ncbi.nlm.nih.gov/
4 | remote_repository_url: https://github.com/peterjc/galaxy_blast/tree/master/packages/package_blast_plus_2_7_0/
5 | description: NCBI BLAST+ 2.7.0 (binaries only; deprecated)
6 | long_description: |
7 | This Tool Shed package is intended to be used as a dependency
8 | of the Galaxy wrappers for NCBI BLAST+ and any other tools which
9 | call the BLAST+ binaries internally.
10 |
11 | Note that for compatibility with BioConda, internally this is now
12 | called "blast" rather than "blast+" as in the older Galaxy BLAST+
13 | packages.
14 |
15 | Note the NCBI withdrew BLAST+ 2.7.0 in favor of 2.7.1 released
16 | shortly afterwards.
17 | categories:
18 | - Tool Dependency Packages
19 | type: tool_dependency_definition
20 |
--------------------------------------------------------------------------------
/packages/package_blast_plus_2_7_0/tool_dependencies.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 | https://depot.galaxyproject.org/software/blast_plus/blast_plus_2.7.0_darwin_x64.tar.gz
10 |
11 | bin
12 | $INSTALL_DIR
13 |
14 |
15 |
16 |
17 |
18 | https://depot.galaxyproject.org/software/blast_plus/blast_plus_2.7.0_linux_x64.tar.gz
19 |
20 | bin
21 | $INSTALL_DIR
22 |
23 |
24 |
25 | echo "ERROR: Automated installation on your operating system and CPU architecture combination is not supported."
26 | echo "Your machine details (the output from 'uname' and 'arch'):"
27 | uname
28 | arch
29 | echo "Please report this via https://github.com/peterjc/galaxy_blast/issues - thank you!"
30 | false
31 |
32 |
33 |
34 |
35 | $INSTALL_DIR
36 | $INSTALL_DIR
37 |
38 |
39 |
40 |
41 | Downloads the precompiled 64 bit Linux, or Mac OS X BLAST+ binaries from the NCBI,
42 | which is faster than performing a local compilation, avoids any issues with build
43 | dependencies, and is more reproducible between installations as there is no
44 | variability from the compiler or library versions.
45 |
46 | Note that NCBI do not provide any 32 bit binaries anymore.
47 |
48 | For more details, see:
49 | http://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Web&PAGE_TYPE=BlastDocs&DOC_TYPE=Download
50 |
51 |
52 |
53 |
--------------------------------------------------------------------------------
/packages/package_blast_plus_2_7_1/.shed.yml:
--------------------------------------------------------------------------------
1 | name: package_blast_plus_2_7_1
2 | owner: iuc
3 | homepage_url: https://blast.ncbi.nlm.nih.gov/
4 | remote_repository_url: https://github.com/peterjc/galaxy_blast/tree/master/packages/package_blast_plus_2_7_1/
5 | description: NCBI BLAST+ 2.7.1 (binaries only)
6 | long_description: |
7 | This Tool Shed package is intended to be used as a dependency
8 | of the Galaxy wrappers for NCBI BLAST+ and any other tools which
9 | call the BLAST+ binaries internally.
10 |
11 | Note that for compatibility with BioConda, internally this is now
12 | called "blast" rather than "blast+" as in the older Galaxy BLAST+
13 | packages.
14 | categories:
15 | - Tool Dependency Packages
16 | type: tool_dependency_definition
17 |
--------------------------------------------------------------------------------
/packages/package_blast_plus_2_7_1/tool_dependencies.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 | https://depot.galaxyproject.org/software/blast_plus/blast_plus_2.7.1_darwin_x64.tar.gz
10 |
11 | bin
12 | $INSTALL_DIR
13 |
14 |
15 |
16 |
17 |
18 | https://depot.galaxyproject.org/software/blast_plus/blast_plus_2.7.1_linux_x64.tar.gz
19 |
20 | bin
21 | $INSTALL_DIR
22 |
23 |
24 |
25 | echo "ERROR: Automated installation on your operating system and CPU architecture combination is not supported."
26 | echo "Your machine details (the output from 'uname' and 'arch'):"
27 | uname
28 | arch
29 | echo "Please report this via https://github.com/peterjc/galaxy_blast/issues - thank you!"
30 | false
31 |
32 |
33 |
34 |
35 | $INSTALL_DIR
36 | $INSTALL_DIR
37 |
38 |
39 |
40 |
41 | Downloads the precompiled 64 bit Linux, or Mac OS X BLAST+ binaries from the NCBI,
42 | which is faster than performing a local compilation, avoids any issues with build
43 | dependencies, and is more reproducible between installations as there is no
44 | variability from the compiler or library versions.
45 |
46 | Note that NCBI do not provide any 32 bit binaries anymore.
47 |
48 | For more details, see:
49 | http://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Web&PAGE_TYPE=BlastDocs&DOC_TYPE=Download
50 |
51 |
52 |
53 |
--------------------------------------------------------------------------------
/test-data/README.rst:
--------------------------------------------------------------------------------
1 | Galaxy Tool test data
2 | =====================
3 |
4 | This folder contains sample files used in the functional tests of the
5 | Galaxy tools defined elsewhere in this repository.
6 |
7 | For general information, see the `main README file <../README.rst>`_.
8 |
--------------------------------------------------------------------------------
/test-data/all_fasta.loc:
--------------------------------------------------------------------------------
1 | #
2 | #
3 | three_human_mRNA thmRNA Three-Human-mRANs ${__HERE__}/three_human_mRNA.fasta
4 |
--------------------------------------------------------------------------------
/test-data/blastdb.loc:
--------------------------------------------------------------------------------
1 | # This is a test file distributed with the Galaxy BLAST+ wrapper for
2 | # defining a list of nucleotide BLAST databases used in functional
3 | # tests for blastn etc.
4 | #
5 | # See the file tool-data/blastdb.loc.sample for more information.
6 | #
7 | three_human_mRNA Three Human mRNAs ${__HERE__}/three_human_mRNA.fasta
8 | rhodopsin_nucs Rhodopsin nucleotides ${__HERE__}/rhodopsin_nucs.fasta
9 |
--------------------------------------------------------------------------------
/test-data/blastdb_d.loc:
--------------------------------------------------------------------------------
1 | # This is a test file distributed with the Galaxy BLAST+ wrapper for
2 | # defining a list of protein domain BLAST databases used in functional
3 | # tests of rpsblast etc.
4 | #
5 | # See the file tool-data/blastdb_d.loc.sample for more information.
6 | #
7 | cd00003_and_cd00008 Domains CD00003 (PNPsynthase) and CD00008 (PIN_53EXO-like) ${__HERE__}/cd00003_and_cd00008
8 |
--------------------------------------------------------------------------------
/test-data/blastdb_p.loc:
--------------------------------------------------------------------------------
1 | # This is a test file distributed with the Galaxy BLAST+ wrapper for
2 | # defining a list of protein BLAST databases used in functional tests
3 | # for blastp etc.
4 | #
5 | # See the file tool-data/blastdb_p.loc.sample for more information.
6 | #
7 | four_human_proteins Four Human Proteins (no taxid) ${__HERE__}/four_human_proteins.fasta
8 | four_human_proteins_taxid Four Human Proteins (with taxid) ${__HERE__}/four_human_proteins_taxid.fasta
9 |
--------------------------------------------------------------------------------
/test-data/blastn_arabidopsis.extended.tabular:
--------------------------------------------------------------------------------
1 | chunk_of_plant chrIII 100.000 630 0 0 1 630 4341 4970 0.0 1164 chrIII 630 630 630 0 100.00 1 1 GATGTTCAATACTGTTTCCAACAAAAAGATTGTTGTCCTCGAGTTCGCCTTCAAGAAAGACACGAGAGAGACTCCAGCCATTGACGTCTGCAAAGGTTTGTTAGGAGACAAGGCCCGAATCAGCATCTATGATCCACAAGTCACGGAAGAACAAATCCAAAGAGACTTAACCATGAACACATTCGACTGGGACCATCCACTTCACCTCCAACCCATGAGTCCAACCACTGTGAAACAAGTCTCAGTTGCTTGGGACGCTTACGCTGCCACCAAAGACGCCCACGGAATCTGCTTGTTAACCGAGTGGGACGAGTATAAGACGCTTGACTATGAGCGGATTTTTGAAAACATGCAGAAACCAGCGTTTGTCTTCGATGGCAGAAATGTTTTTGATGCAGAGAAGCTGAGGAAGATAGGGTTTATTGTTTACTCTATTGGTAAGCCGTTGGACCAGTGGCACATGCCTGCTCTTGCTTAGCTCAGACTCTTTGCCCTTTCTCAAGATTTGGATTGTTTTTCTCTCTGTTGCTTATATCAAATAATTTGTTCTGTTTCTTCTTGACGAGATATTTTCCTATACTTATTATGTTGGTTAGAACAAGAGACTAGGTTTGGTTATTATTGCTAACT GATGTTCAATACTGTTTCCAACAAAAAGATTGTTGTCCTCGAGTTCGCCTTCAAGAAAGACACGAGAGAGACTCCAGCCATTGACGTCTGCAAAGGTTTGTTAGGAGACAAGGCCCGAATCAGCATCTATGATCCACAAGTCACGGAAGAACAAATCCAAAGAGACTTAACCATGAACACATTCGACTGGGACCATCCACTTCACCTCCAACCCATGAGTCCAACCACTGTGAAACAAGTCTCAGTTGCTTGGGACGCTTACGCTGCCACCAAAGACGCCCACGGAATCTGCTTGTTAACCGAGTGGGACGAGTATAAGACGCTTGACTATGAGCGGATTTTTGAAAACATGCAGAAACCAGCGTTTGTCTTCGATGGCAGAAATGTTTTTGATGCAGAGAAGCTGAGGAAGATAGGGTTTATTGTTTACTCTATTGGTAAGCCGTTGGACCAGTGGCACATGCCTGCTCTTGCTTAGCTCAGACTCTTTGCCCTTTCTCAAGATTTGGATTGTTTTTCTCTCTGTTGCTTATATCAAATAATTTGTTCTGTTTCTTCTTGACGAGATATTTTCCTATACTTATTATGTTGGTTAGAACAAGAGACTAGGTTTGGTTATTATTGCTAACT 630 23459830 gi|240255695|ref|NC_003074.8| Arabidopsis thaliana chromosome 3, complete sequence
2 |
--------------------------------------------------------------------------------
/test-data/blastn_arabidopsis.standard.tabular:
--------------------------------------------------------------------------------
1 | chunk_of_plant chrIII 100.000 630 0 0 1 630 4341 4970 0.0 1164
2 |
--------------------------------------------------------------------------------
/test-data/blastn_chimera_vs_rhodopsin_db.tabular:
--------------------------------------------------------------------------------
1 | chimera NM_001009242.1 92.308 1014 78 0 8881 9894 34 1047 0.0 1441
2 | chimera GQ290312.1 91.527 956 81 0 8881 9836 4 959 0.0 1317
3 | chimera AB062417.1 87.586 1015 124 2 8881 9894 34 1047 0.0 1175
4 | chimera GQ290303.1 91.515 330 28 0 8881 9210 4 333 8.28e-130 455
5 | chimera GQ290303.1 91.358 243 19 2 9542 9783 3127 3368 1.46e-92 331
6 | chimera GQ290303.1 94.220 173 10 0 9208 9380 1410 1582 1.50e-72 265
7 | chimera GQ290303.1 92.941 170 12 0 9375 9544 2854 3023 1.51e-67 248
8 | chimera GQ290303.1 95.588 68 3 0 9781 9848 4222 4289 7.43e-26 110
9 |
--------------------------------------------------------------------------------
/test-data/blastn_chimera_vs_rhodopsin_db_max_hsps1.tabular:
--------------------------------------------------------------------------------
1 | chimera NM_001009242.1 92.308 1014 78 0 8881 9894 34 1047 0.0 1441
2 | chimera GQ290312.1 91.527 956 81 0 8881 9836 4 959 0.0 1317
3 | chimera AB062417.1 87.586 1015 124 2 8881 9894 34 1047 0.0 1175
4 | chimera GQ290303.1 91.515 330 28 0 8881 9210 4 333 8.28e-130 455
5 |
--------------------------------------------------------------------------------
/test-data/blastn_chimera_vs_three_human_and_rhodopsin_db.tabular:
--------------------------------------------------------------------------------
1 | chimera ENA|AB011145|AB011145.1 100.000 4560 0 0 1 4560 121 4680 0.0 8421
2 | chimera ENA|M10051|M10051.1 99.931 4331 3 0 4560 8890 60 4390 0.0 7982
3 | chimera ENA|BC112106|BC112106.1 100.000 1093 0 0 8881 9973 121 1213 0.0 2019
4 | chimera NM_001009242.1 92.308 1014 78 0 8881 9894 34 1047 0.0 1441
5 | chimera GQ290312.1 91.527 956 81 0 8881 9836 4 959 0.0 1317
6 | chimera AB062417.1 87.586 1015 124 2 8881 9894 34 1047 0.0 1175
7 | chimera GQ290303.1 91.515 330 28 0 8881 9210 4 333 1.70e-129 455
8 | chimera GQ290303.1 91.358 243 19 2 9542 9783 3127 3368 2.98e-92 331
9 | chimera GQ290303.1 94.220 173 10 0 9208 9380 1410 1582 3.07e-72 265
10 | chimera GQ290303.1 92.941 170 12 0 9375 9544 2854 3023 3.09e-67 248
11 | chimera GQ290303.1 95.588 68 3 0 9781 9848 4222 4289 1.52e-25 110
12 |
--------------------------------------------------------------------------------
/test-data/blastn_chimera_vs_three_human_db.tabular:
--------------------------------------------------------------------------------
1 | chimera ENA|AB011145|AB011145.1 100.000 4560 0 0 1 4560 121 4680 0.0 8421
2 | chimera ENA|M10051|M10051.1 99.931 4331 3 0 4560 8890 60 4390 0.0 7982
3 | chimera ENA|BC112106|BC112106.1 100.000 1093 0 0 8881 9973 121 1213 0.0 2019
4 |
--------------------------------------------------------------------------------
/test-data/blastn_chimera_vs_three_human_max1.tabular:
--------------------------------------------------------------------------------
1 | chimera ENA|AB011145|AB011145.1 100.000 4560 0 0 1 4560 121 4680 0.0 8421
2 |
--------------------------------------------------------------------------------
/test-data/blastn_rhodopsin_vs_three_human.columns.tabular:
--------------------------------------------------------------------------------
1 | gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 92.073 1047 1213
2 | gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 91.592 4301 1213
3 | gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 91.358 4301 1213
4 | gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 94.220 4301 1213
5 | gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 92.941 4301 1213
6 | gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 91.554 983 1213
7 | gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 87.500 1047 1213
8 |
--------------------------------------------------------------------------------
/test-data/blastn_rhodopsin_vs_three_human.tabular:
--------------------------------------------------------------------------------
1 | gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 92.073 1047 83 0 1 1047 88 1134 0.0 1474
2 | gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 91.592 333 28 0 1 333 118 450 8.03e-132 460
3 | gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 91.358 243 19 2 3127 3368 782 1023 6.57e-93 331
4 | gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 94.220 173 10 0 1410 1582 448 620 6.76e-73 265
5 | gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 92.941 170 12 0 2854 3023 615 784 6.81e-68 248
6 | gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 91.554 959 81 0 1 959 118 1076 0.0 1323
7 | gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 87.500 1048 129 2 1 1047 88 1134 0.0 1208
8 |
--------------------------------------------------------------------------------
/test-data/blastn_rhodopsin_vs_three_human_converted.tabular:
--------------------------------------------------------------------------------
1 | gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 92.073 1047 83 0 1 1047 88 1134 0.0 1474
2 | gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 91.592 333 28 0 1 333 118 450 8e-132 460
3 | gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 91.358 243 19 2 3127 3368 782 1023 7e-93 331
4 | gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 94.220 173 10 0 1410 1582 448 620 7e-73 265
5 | gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 92.941 170 12 0 2854 3023 615 784 7e-68 248
6 | gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 91.554 959 81 0 1 959 118 1076 0.0 1323
7 | gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 87.500 1048 129 2 1 1047 88 1134 0.0 1208
8 |
--------------------------------------------------------------------------------
/test-data/blastp_four_human_vs_rhodopsin.tabular:
--------------------------------------------------------------------------------
1 | P08100 NP_001009242.1 96.552 348 12 0 1 348 1 348 0.0 701
2 | P08100 0811197A 93.103 348 23 1 1 348 1 347 0.0 673
3 | P08100 ADB45242.1 94.817 328 17 0 11 338 1 328 0.0 653
4 | P08100 ADB45229.1 94.817 328 17 0 11 338 1 328 0.0 631
5 | P08100 P56514.1 84.795 342 51 1 1 341 1 342 0.0 619
6 | P08100 BAB21486.1 82.164 342 60 1 1 341 1 342 0.0 599
7 |
--------------------------------------------------------------------------------
/test-data/blastp_four_human_vs_rhodopsin_converted.tabular:
--------------------------------------------------------------------------------
1 | P08100 gi|57163783|ref|NP_001009242.1| 96.552 348 12 0 1 348 1 348 0.0 701
2 | P08100 gi|223523|prf||0811197A 93.103 348 23 1 1 348 1 347 0.0 673
3 | P08100 gi|283855846|gb|ADB45242.1| 94.817 328 17 0 11 338 1 328 0.0 653
4 | P08100 gi|283855823|gb|ADB45229.1| 94.817 328 17 0 11 338 1 328 0.0 631
5 | P08100 gi|3024260|sp|P56514.1|OPSD_BUFBU 84.795 342 51 1 1 341 1 342 0.0 619
6 | P08100 gi|12583665|dbj|BAB21486.1| 82.164 342 60 1 1 341 1 342 0.0 599
7 |
--------------------------------------------------------------------------------
/test-data/blastp_four_human_vs_rhodopsin_ext.tabular:
--------------------------------------------------------------------------------
1 | P08100 NP_001009242.1 96.552 348 12 0 1 348 1 348 0.0 701 gi|57163783|ref|NP_001009242.1| 1808 336 343 0 98.56 1 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA MNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA 348 348 N/A
2 | P08100 0811197A 93.103 348 23 1 1 348 1 347 0.0 673 gi|223523|prf||0811197A 1736 324 336 1 96.55 1 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGID-YTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA 348 347 N/A
3 | P08100 ADB45242.1 94.817 328 17 0 11 338 1 328 0.0 653 gi|283855846|gb|ADB45242.1| 1684 311 321 0 97.87 1 1 VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVS VPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS 348 328 N/A
4 | P08100 ADB45229.1 94.817 328 17 0 11 338 1 328 0.0 631 gi|283855823|gb|ADB45229.1| 1627 311 323 0 98.48 1 1 VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVS VPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS 348 328 N/A
5 | P08100 P56514.1 84.795 342 51 1 1 341 1 342 0.0 619 gi|3024260|sp|P56514.1|OPSD_BUFBU 1595 290 322 1 94.15 1 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEA-SATVSKTE MNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVKEAAAQQQESATTQKAEKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDDASSAATSKTE 348 354 N/A
6 | P08100 BAB21486.1 82.164 342 60 1 1 341 1 342 0.0 599 gi|12583665|dbj|BAB21486.1| 1544 281 314 1 91.81 1 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPL-GDDEASATVSKTE MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDGASATSSKTE 348 354 N/A
7 |
--------------------------------------------------------------------------------
/test-data/blastp_four_human_vs_rhodopsin_top3.tabular:
--------------------------------------------------------------------------------
1 | #Query BLAST hit 1 BLAST hit 2 BLAST hit 3
2 | Q9BS26
3 | Q9NSY1
4 | P06213
5 | P08100 gi|57163783|ref|NP_001009242.1| rhodopsin [Felis catus] gi|223523|prf||0811197A rhodopsin [Bos taurus] gi|283855846|gb|ADB45242.1| rhodopsin [Cynopterus brachyotis]
6 |
--------------------------------------------------------------------------------
/test-data/blastp_four_human_vs_rhodopsin_top3_positive.tabular:
--------------------------------------------------------------------------------
1 | #Query BLAST hit 1 BLAST hit 2 BLAST hit 3
2 | P08100 gi|57163783|ref|NP_001009242.1| rhodopsin [Felis catus] gi|223523|prf||0811197A rhodopsin [Bos taurus] gi|283855846|gb|ADB45242.1| rhodopsin [Cynopterus brachyotis]
3 |
--------------------------------------------------------------------------------
/test-data/blastp_human_vs_pdb_seg_no_converted_std.tabular:
--------------------------------------------------------------------------------
1 | sp|Q9BS26|ERP44_HUMAN gi|193885198|pdb|2R2J|A 97.113 381 11 0 26 406 2 382 0.0 768
2 | sp|Q9BS26|ERP44_HUMAN gi|88192228|pdb|2B5E|A 25.172 290 193 8 25 306 10 283 4e-20 95.1
3 | sp|Q9NSY1|BMP2K_HUMAN gi|73536291|pdb|2BUJ|A 29.391 279 182 8 40 308 21 294 1e-22 105
4 | sp|Q9NSY1|BMP2K_HUMAN gi|270346335|pdb|2WQM|A 27.206 272 166 12 53 311 36 288 6e-17 86.3
5 | sp|P06213|INSR_HUMAN gi|116667097|pdb|2DTG|E 95.905 928 7 2 28 955 1 897 0.0 1846
6 | sp|P06213|INSR_HUMAN gi|114794482|pdb|2HR7|A 99.588 485 2 0 28 512 1 485 0.0 1016
7 | sp|P08100|OPSD_HUMAN gi|16975387|pdb|1JFP|A 93.391 348 23 0 1 348 1 348 0.0 681
8 | sp|P08100|OPSD_HUMAN gi|195927458|pdb|3C9M|A 93.103 348 24 0 1 348 1 348 0.0 674
9 |
--------------------------------------------------------------------------------
/test-data/blastp_rhodopsin_adv_vs_four_human.tabular:
--------------------------------------------------------------------------------
1 | gi|57163783|ref|NP_001009242.1| sp|P08100|OPSD_HUMAN 96.552 348 12 0 1 348 1 348 0.0 701
2 | gi|3024260|sp|P56514.1|OPSD_BUFBU sp|P08100|OPSD_HUMAN 83.333 354 53 2 1 354 1 348 0.0 605
3 | gi|283855846|gb|ADB45242.1| sp|P08100|OPSD_HUMAN 94.817 328 17 0 1 328 11 338 0.0 630
4 | gi|283855823|gb|ADB45229.1| sp|P08100|OPSD_HUMAN 94.817 328 17 0 1 328 11 338 0.0 630
5 | gi|223523|prf||0811197A sp|P08100|OPSD_HUMAN 93.103 348 23 1 1 347 1 348 0.0 651
6 | gi|12583665|dbj|BAB21486.1| sp|P08100|OPSD_HUMAN 81.089 349 65 1 1 349 1 348 0.0 587
7 |
--------------------------------------------------------------------------------
/test-data/blastp_rhodopsin_peptides_vs_four_human.tabular:
--------------------------------------------------------------------------------
1 | gi|57163783|ref|NP_001009242.1| sp|P08100|OPSD_HUMAN 100.000 16 0 0 1 16 95 110 4.51e-14 52.4
2 | gi|57163783|ref|NP_001009242.1| sp|Q9NSY1|BMP2K_HUMAN 83.333 6 1 0 4 9 1107 1112 0.27 16.3
3 | gi|57163783|ref|NP_001009242.1| sp|Q9NSY1|BMP2K_HUMAN 100.000 2 0 0 11 12 266 267 53 10.0
4 | gi|57163783|ref|NP_001009242.1| sp|Q9BS26|ERP44_HUMAN 27.778 18 0 1 9 13 347 364 25 10.8
5 | gi|57163783|ref|NP_001009242.1| sp|P06213|INSR_HUMAN 100.000 2 0 0 14 15 816 817 95 9.1
6 | gi|3024260|sp|P56514.1|OPSD_BUFBU sp|P08100|OPSD_HUMAN 67.857 28 8 1 1 28 319 345 1.31e-13 54.1
7 | gi|3024260|sp|P56514.1|OPSD_BUFBU sp|Q9NSY1|BMP2K_HUMAN 85.714 7 1 0 8 14 625 631 0.004 23.5
8 | gi|3024260|sp|P56514.1|OPSD_BUFBU sp|Q9NSY1|BMP2K_HUMAN 100.000 4 0 0 9 12 265 268 1.0 16.3
9 | gi|3024260|sp|P56514.1|OPSD_BUFBU sp|Q9NSY1|BMP2K_HUMAN 50.000 10 5 0 15 24 343 352 7.0 13.4
10 | gi|3024260|sp|P56514.1|OPSD_BUFBU sp|Q9NSY1|BMP2K_HUMAN 40.000 20 6 2 15 31 376 392 38 11.2
11 | gi|3024260|sp|P56514.1|OPSD_BUFBU sp|Q9NSY1|BMP2K_HUMAN 57.143 7 1 1 20 24 942 948 74 10.4
12 | gi|3024260|sp|P56514.1|OPSD_BUFBU sp|Q9NSY1|BMP2K_HUMAN 100.000 2 0 0 25 26 343 344 315 8.3
13 | gi|3024260|sp|P56514.1|OPSD_BUFBU sp|P06213|INSR_HUMAN 75.000 8 0 1 27 32 630 637 1.0 16.3
14 | gi|3024260|sp|P56514.1|OPSD_BUFBU sp|P06213|INSR_HUMAN 47.059 17 3 2 18 31 778 791 8.4 13.4
15 | gi|3024260|sp|P56514.1|OPSD_BUFBU sp|P06213|INSR_HUMAN 66.667 6 0 1 22 27 745 748 101 10.0
16 | gi|283855846|gb|ADB45242.1| sp|P08100|OPSD_HUMAN 93.333 15 1 0 1 15 11 25 4.81e-11 43.9
17 | gi|283855846|gb|ADB45242.1| sp|Q9NSY1|BMP2K_HUMAN 62.500 8 3 0 1 8 957 964 4.2 12.9
18 | gi|283855846|gb|ADB45242.1| sp|Q9NSY1|BMP2K_HUMAN 37.500 16 10 0 1 16 681 696 35 10.4
19 | gi|283855846|gb|ADB45242.1| sp|Q9NSY1|BMP2K_HUMAN 66.667 3 1 0 13 15 958 960 95 9.1
20 | gi|283855846|gb|ADB45242.1| sp|P06213|INSR_HUMAN 45.455 22 6 2 1 16 774 795 12 11.7
21 | gi|283855846|gb|ADB45242.1| sp|P06213|INSR_HUMAN 100.000 2 0 0 15 16 1357 1358 38 10.4
22 | gi|283855846|gb|ADB45242.1| sp|P06213|INSR_HUMAN 100.000 2 0 0 9 10 558 559 85 9.1
23 | gi|283855823|gb|ADB45229.1| sp|P08100|OPSD_HUMAN 97.778 45 1 0 1 45 11 55 3.28e-27 95.6
24 | gi|283855823|gb|ADB45229.1| sp|P08100|OPSD_HUMAN 100.000 2 0 0 40 41 328 329 1001 7.4
25 | gi|283855823|gb|ADB45229.1| sp|P06213|INSR_HUMAN 31.818 22 13 1 20 41 1011 1030 4.5 14.6
26 | gi|283855823|gb|ADB45229.1| sp|P06213|INSR_HUMAN 46.667 15 8 0 25 39 1273 1287 5.1 14.6
27 | gi|283855823|gb|ADB45229.1| sp|P06213|INSR_HUMAN 40.000 15 9 0 8 22 142 156 30 12.1
28 | gi|283855823|gb|ADB45229.1| sp|P06213|INSR_HUMAN 23.529 17 13 0 2 18 688 704 52 11.2
29 | gi|283855823|gb|ADB45229.1| sp|P06213|INSR_HUMAN 100.000 4 0 0 37 40 880 883 68 10.8
30 | gi|283855823|gb|ADB45229.1| sp|P06213|INSR_HUMAN 57.143 7 3 0 4 10 553 559 722 7.9
31 | gi|283855823|gb|ADB45229.1| sp|P06213|INSR_HUMAN 100.000 2 0 0 22 23 752 753 1254 7.0
32 | gi|283855823|gb|ADB45229.1| sp|Q9NSY1|BMP2K_HUMAN 62.500 8 3 0 1 8 957 964 7.9 13.8
33 | gi|283855823|gb|ADB45229.1| sp|Q9NSY1|BMP2K_HUMAN 55.556 9 4 0 20 28 564 572 17 12.9
34 | gi|283855823|gb|ADB45229.1| sp|Q9NSY1|BMP2K_HUMAN 50.000 8 2 1 8 15 955 960 51 11.2
35 | gi|283855823|gb|ADB45229.1| sp|Q9NSY1|BMP2K_HUMAN 100.000 2 0 0 23 24 152 153 171 9.5
36 | gi|283855823|gb|ADB45229.1| sp|Q9NSY1|BMP2K_HUMAN 100.000 2 0 0 31 32 347 348 517 8.3
37 | gi|283855823|gb|ADB45229.1| sp|Q9BS26|ERP44_HUMAN 100.000 2 0 0 44 45 294 295 152 10.0
38 | gi|283855823|gb|ADB45229.1| sp|Q9BS26|ERP44_HUMAN 100.000 2 0 0 21 22 390 391 448 8.3
39 | gi|223523|prf||0811197A sp|P08100|OPSD_HUMAN 93.333 30 2 0 1 30 1 30 5.63e-16 60.4
40 | gi|223523|prf||0811197A sp|P06213|INSR_HUMAN 61.538 13 1 3 2 11 933 944 1.6 15.5
41 | gi|223523|prf||0811197A sp|P06213|INSR_HUMAN 57.143 7 3 0 10 16 304 310 77 10.4
42 | gi|223523|prf||0811197A sp|P06213|INSR_HUMAN 100.000 2 0 0 19 20 558 559 172 9.1
43 | gi|223523|prf||0811197A sp|P06213|INSR_HUMAN 100.000 2 0 0 14 15 553 554 173 9.1
44 | gi|223523|prf||0811197A sp|P06213|INSR_HUMAN 100.000 2 0 0 16 17 487 488 193 9.1
45 | gi|223523|prf||0811197A sp|Q9NSY1|BMP2K_HUMAN 55.556 9 4 0 10 18 956 964 9.4 12.9
46 | gi|223523|prf||0811197A sp|Q9NSY1|BMP2K_HUMAN 50.000 4 2 0 23 26 958 961 191 9.1
47 | gi|223523|prf||0811197A sp|Q9BS26|ERP44_HUMAN 100.000 2 0 0 12 13 262 263 73 10.4
48 | gi|12583665|dbj|BAB21486.1| sp|P08100|OPSD_HUMAN 100.000 8 0 0 1 8 1 8 2.14e-06 28.6
49 |
--------------------------------------------------------------------------------
/test-data/blastp_rhodopsin_vs_four_human.tabular:
--------------------------------------------------------------------------------
1 | gi|57163783|ref|NP_001009242.1| sp|P08100|OPSD_HUMAN 96.552 348 12 0 1 348 1 348 0.0 701
2 | gi|3024260|sp|P56514.1|OPSD_BUFBU sp|P08100|OPSD_HUMAN 83.333 354 53 2 1 354 1 348 0.0 605
3 | gi|283855846|gb|ADB45242.1| sp|P08100|OPSD_HUMAN 94.817 328 17 0 1 328 11 338 0.0 630
4 | gi|283855823|gb|ADB45229.1| sp|P08100|OPSD_HUMAN 94.817 328 17 0 1 328 11 338 0.0 630
5 | gi|223523|prf||0811197A sp|P08100|OPSD_HUMAN 93.103 348 23 1 1 347 1 348 0.0 651
6 | gi|12583665|dbj|BAB21486.1| sp|P08100|OPSD_HUMAN 81.089 349 65 1 1 349 1 348 0.0 587
7 |
--------------------------------------------------------------------------------
/test-data/blastp_rhodopsin_vs_four_human_db.taxid.tabular:
--------------------------------------------------------------------------------
1 | gi|57163783|ref|NP_001009242.1| sp|P08100|OPSD_HUMAN 96.55 9606 Homo sapiens human Eukaryota primates
2 | gi|3024260|sp|P56514.1|OPSD_BUFBU sp|P08100|OPSD_HUMAN 83.33 9606 Homo sapiens human Eukaryota primates
3 | gi|283855846|gb|ADB45242.1| sp|P08100|OPSD_HUMAN 94.82 9606 Homo sapiens human Eukaryota primates
4 | gi|283855823|gb|ADB45229.1| sp|P08100|OPSD_HUMAN 94.82 9606 Homo sapiens human Eukaryota primates
5 | gi|223523|prf||0811197A sp|P08100|OPSD_HUMAN 93.10 9606 Homo sapiens human Eukaryota primates
6 | gi|12583665|dbj|BAB21486.1| sp|P08100|OPSD_HUMAN 81.09 9606 Homo sapiens human Eukaryota primates
7 |
--------------------------------------------------------------------------------
/test-data/blastp_sample.blast2go.tabular:
--------------------------------------------------------------------------------
1 | Sample GO:0005488 tail tape measure protein
2 |
--------------------------------------------------------------------------------
/test-data/blastp_sample_converted.tabular:
--------------------------------------------------------------------------------
1 | Sample gi|119953746|ref|YP_950551.1| 96.899 516 16 0 1 516 27 542 0.0 949
2 | Sample gi|148986157|ref|ZP_01819143.1| 41.270 252 115 3 49 300 679 897 2e-41 174
3 | Sample gi|77411259|ref|ZP_00787609.1| 40.996 261 143 2 50 310 655 904 8e-39 165
4 | Sample gi|76786754|ref|YP_329383.1| 39.464 261 147 2 50 310 655 904 7e-37 159
5 | Sample gi|153811333|ref|ZP_01964001.1| 29.982 557 277 18 3 516 573 1059 2e-36 157
6 | Sample gi|56962696|ref|YP_174422.1| 28.792 389 228 8 48 433 123 465 3e-33 146
7 | Sample gi|50914476|ref|YP_060448.1| 43.820 178 100 0 50 227 655 832 5e-33 146
8 | Sample gi|29374987|ref|NP_814140.1| 25.463 432 244 8 73 482 545 920 7e-31 139
9 | Sample gi|163941333|ref|YP_001646217.1| 27.189 434 287 7 61 480 142 560 8e-31 138
10 |
--------------------------------------------------------------------------------
/test-data/blastx_rhodopsin_adv_vs_four_human.tabular:
--------------------------------------------------------------------------------
1 | gi|57163782|ref|NM_001009242.1| sp|P08100|OPSD_HUMAN 96.552 348 12 0 1 1044 1 348 0.0 639
2 | gi|2734705|gb|U59921.1|BBU59921 sp|P08100|OPSD_HUMAN 85.241 332 49 0 42 1037 1 332 0.0 551
3 | gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 96.396 111 4 0 1 333 11 121 3.78e-67 220
4 | gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 92.308 65 5 0 3174 3368 248 312 4.13e-35 127
5 | gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 91.525 59 5 0 2855 3031 177 235 3.99e-33 121
6 | gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 93.220 59 4 0 1404 1580 119 177 7.46e-25 97.1
7 | gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 88.462 26 3 0 4222 4299 312 337 1.13e-11 57.0
8 | gi|283855822|gb|GQ290312.1| sp|P08100|OPSD_HUMAN 95.092 326 16 0 1 978 11 336 0.0 589
9 | gi|18148870|dbj|AB062417.1| sp|P08100|OPSD_HUMAN 93.391 348 23 0 1 1044 1 348 0.0 619
10 | gi|12583664|dbj|AB043817.1| sp|P08100|OPSD_HUMAN 81.682 333 61 0 23 1021 1 333 0.0 532
11 |
--------------------------------------------------------------------------------
/test-data/blastx_rhodopsin_vs_four_human.tabular:
--------------------------------------------------------------------------------
1 | gi|57163782|ref|NM_001009242.1| sp|P08100|OPSD_HUMAN 96.552 348 12 0 1 1044 1 348 0.0 639
2 | gi|2734705|gb|U59921.1|BBU59921 sp|P08100|OPSD_HUMAN 85.241 332 49 0 42 1037 1 332 0.0 551
3 | gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 96.396 111 4 0 1 333 11 121 3.78e-67 220
4 | gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 92.308 65 5 0 3174 3368 248 312 4.13e-35 127
5 | gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 91.525 59 5 0 2855 3031 177 235 3.99e-33 121
6 | gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 93.220 59 4 0 1404 1580 119 177 7.46e-25 97.1
7 | gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 88.462 26 3 0 4222 4299 312 337 1.13e-11 57.0
8 | gi|283855822|gb|GQ290312.1| sp|P08100|OPSD_HUMAN 95.092 326 16 0 1 978 11 336 0.0 589
9 | gi|18148870|dbj|AB062417.1| sp|P08100|OPSD_HUMAN 93.391 348 23 0 1 1044 1 348 0.0 619
10 | gi|12583664|dbj|AB043817.1| sp|P08100|OPSD_HUMAN 81.682 333 61 0 23 1021 1 333 0.0 532
11 |
--------------------------------------------------------------------------------
/test-data/blastx_rhodopsin_vs_four_human_converted.tabular:
--------------------------------------------------------------------------------
1 | gi|57163782|ref|NM_001009242.1| sp|P08100|OPSD_HUMAN 96.552 348 12 0 1 1044 1 348 0.0 639
2 | gi|2734705|gb|U59921.1|BBU59921 sp|P08100|OPSD_HUMAN 85.241 332 49 0 42 1037 1 332 0.0 551
3 | gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 96.396 111 4 0 1 333 11 121 4e-67 220
4 | gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 92.308 65 5 0 3174 3368 248 312 4e-35 127
5 | gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 91.525 59 5 0 2855 3031 177 235 4e-33 121
6 | gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 93.220 59 4 0 1404 1580 119 177 7e-25 97.1
7 | gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 88.462 26 3 0 4222 4299 312 337 1e-11 57.0
8 | gi|283855822|gb|GQ290312.1| sp|P08100|OPSD_HUMAN 95.092 326 16 0 1 978 11 336 0.0 589
9 | gi|18148870|dbj|AB062417.1| sp|P08100|OPSD_HUMAN 93.391 348 23 0 1 1044 1 348 0.0 619
10 | gi|12583664|dbj|AB043817.1| sp|P08100|OPSD_HUMAN 81.682 333 61 0 23 1021 1 333 0.0 532
11 |
--------------------------------------------------------------------------------
/test-data/blastx_sample_converted.tabular:
--------------------------------------------------------------------------------
1 | phage_suis gi|119953746|ref|YP_950551.1| 100.000 518 0 0 336 1889 25 542 0.0 988
2 | phage_suis gi|289551554|ref|YP_003472458.1| 32.946 516 280 6 342 1889 657 1106 6e-66 256
3 | phage_suis gi|223044325|ref|ZP_03614360.1| 30.220 546 327 7 393 1889 655 1193 1e-64 252
4 | phage_suis gi|223044325|ref|ZP_03614360.1| 19.882 508 328 9 384 1796 844 1309 6e-28 130
5 | phage_suis gi|268611153|ref|ZP_06144880.1| 28.638 639 371 11 78 1847 440 1042 1e-60 239
6 | phage_suis gi|268611153|ref|ZP_06144880.1| 23.356 441 286 7 543 1856 547 938 4e-31 141
7 | phage_suis gi|268611153|ref|ZP_06144880.1| 25.272 459 266 11 522 1844 722 1121 8e-31 140
8 | phage_suis gi|268611153|ref|ZP_06144880.1| 24.631 406 267 8 501 1694 770 1144 3e-23 115
9 | phage_suis gi|268611153|ref|ZP_06144880.1| 27.801 241 145 3 492 1148 811 1044 6e-16 90.9
10 | phage_suis gi|268611153|ref|ZP_06144880.1| 19.763 253 168 6 1158 1883 547 775 3e-04 52.0
11 | phage_suis gi|268610688|ref|ZP_06144415.1| 28.951 639 369 11 78 1847 440 1042 3e-59 234
12 | phage_suis gi|268610688|ref|ZP_06144415.1| 24.644 491 316 9 501 1856 770 1245 4e-39 167
13 | phage_suis gi|268610688|ref|ZP_06144415.1| 23.791 517 319 9 492 1832 811 1322 3e-37 161
14 | phage_suis gi|268610688|ref|ZP_06144415.1| 21.907 493 322 11 510 1859 905 1377 1e-25 123
15 | phage_suis gi|268610688|ref|ZP_06144415.1| 20.548 292 197 5 486 1343 1138 1400 4e-10 71.6
16 | phage_suis gi|268610688|ref|ZP_06144415.1| 21.408 341 225 10 894 1883 467 775 8e-05 53.9
17 | phage_suis gi|153811333|ref|ZP_01964001.1| 28.341 621 364 16 108 1847 493 1073 8e-55 219
18 | phage_suis gi|153811333|ref|ZP_01964001.1| 29.673 428 250 9 519 1760 709 1099 2e-47 195
19 | phage_suis gi|153811333|ref|ZP_01964001.1| 29.412 391 226 7 498 1640 746 1096 1e-39 169
20 | phage_suis gi|153811333|ref|ZP_01964001.1| 26.493 268 174 3 492 1256 854 1111 3e-24 118
21 | phage_suis gi|153811333|ref|ZP_01964001.1| 27.124 306 198 4 510 1385 816 1110 1e-23 116
22 | phage_suis gi|262113750|emb|CAR95417.1| 38.462 286 169 1 384 1241 540 818 2e-54 218
23 | phage_suis gi|262113750|emb|CAR95417.1| 29.684 411 271 7 657 1871 460 858 3e-40 171
24 | phage_suis gi|77411259|ref|ZP_00787609.1| 37.193 285 172 1 387 1241 628 905 2e-53 215
25 | phage_suis gi|77411259|ref|ZP_00787609.1| 28.010 407 281 6 660 1871 548 945 1e-40 172
26 | phage_suis gi|77411259|ref|ZP_00787609.1| 22.817 355 207 7 978 1877 540 882 9e-14 83.6
27 | phage_suis gi|76786754|ref|YP_329383.1| 36.842 285 173 1 387 1241 628 905 8e-53 213
28 | phage_suis gi|76786754|ref|YP_329383.1| 27.273 407 284 6 660 1871 548 945 3e-38 164
29 | phage_suis gi|76786754|ref|YP_329383.1| 24.735 283 194 2 543 1391 637 900 3e-23 115
30 | phage_suis gi|76786754|ref|YP_329383.1| 22.910 323 204 6 978 1847 540 850 2e-13 82.4
31 | phage_suis gi|50914476|ref|YP_060448.1| 35.862 290 179 1 372 1241 623 905 4e-51 207
32 | phage_suis gi|50914476|ref|YP_060448.1| 27.007 411 280 7 660 1871 548 945 2e-35 155
33 | phage_suis gi|50914476|ref|YP_060448.1| 22.997 387 269 5 543 1673 637 1004 3e-25 121
34 |
--------------------------------------------------------------------------------
/test-data/cd00003_and_cd00008.aux:
--------------------------------------------------------------------------------
1 | BLOSUM62
2 | 11
3 | 1
4 | 0.000000e+00
5 | 0.000000e+00
6 | 0
7 | 0
8 | 100.000000
9 | 234
10 | 6.955024e-02
11 | 160
12 | 4.862535e-02
13 |
--------------------------------------------------------------------------------
/test-data/cd00003_and_cd00008.freq:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peterjc/galaxy_blast/75b5762aa9699a0b3f97a67158111599a2cb6f59/test-data/cd00003_and_cd00008.freq
--------------------------------------------------------------------------------
/test-data/cd00003_and_cd00008.loo:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peterjc/galaxy_blast/75b5762aa9699a0b3f97a67158111599a2cb6f59/test-data/cd00003_and_cd00008.loo
--------------------------------------------------------------------------------
/test-data/cd00003_and_cd00008.phr:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peterjc/galaxy_blast/75b5762aa9699a0b3f97a67158111599a2cb6f59/test-data/cd00003_and_cd00008.phr
--------------------------------------------------------------------------------
/test-data/cd00003_and_cd00008.pin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peterjc/galaxy_blast/75b5762aa9699a0b3f97a67158111599a2cb6f59/test-data/cd00003_and_cd00008.pin
--------------------------------------------------------------------------------
/test-data/cd00003_and_cd00008.psd:
--------------------------------------------------------------------------------
1 | gnl|cdd|1890191
2 | gnl|cdd|2379770
3 |
--------------------------------------------------------------------------------
/test-data/cd00003_and_cd00008.psi:
--------------------------------------------------------------------------------
1 | " @ " 4 Egnl|cdd|1890191
--------------------------------------------------------------------------------
/test-data/cd00003_and_cd00008.psq:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
--------------------------------------------------------------------------------
/test-data/cd00003_and_cd00008.rps:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peterjc/galaxy_blast/75b5762aa9699a0b3f97a67158111599a2cb6f59/test-data/cd00003_and_cd00008.rps
--------------------------------------------------------------------------------
/test-data/chimera.fasta.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peterjc/galaxy_blast/75b5762aa9699a0b3f97a67158111599a2cb6f59/test-data/chimera.fasta.gz
--------------------------------------------------------------------------------
/test-data/convert2blastmask_four_human_masked.maskinfo-asn1:
--------------------------------------------------------------------------------
1 | Blast-db-mask-info ::= {
2 | algo-id 0,
3 | algo-program seg,
4 | algo-options "window=12; locut=2.2; hicut=2.5",
5 | masks {
6 | masks {
7 | int {
8 | from 6,
9 | to 18,
10 | id swissprot {
11 | name "ERP44_HUMAN",
12 | accession "Q9BS26",
13 | release "reviewed"
14 | }
15 | },
16 | packed-int {
17 | {
18 | from 11,
19 | to 46,
20 | id swissprot {
21 | name "BMP2K_HUMAN",
22 | accession "Q9NSY1",
23 | release "reviewed"
24 | }
25 | },
26 | {
27 | from 325,
28 | to 332,
29 | id swissprot {
30 | name "BMP2K_HUMAN",
31 | accession "Q9NSY1",
32 | release "reviewed"
33 | }
34 | },
35 | {
36 | from 421,
37 | to 496,
38 | id swissprot {
39 | name "BMP2K_HUMAN",
40 | accession "Q9NSY1",
41 | release "reviewed"
42 | }
43 | },
44 | {
45 | from 501,
46 | to 516,
47 | id swissprot {
48 | name "BMP2K_HUMAN",
49 | accession "Q9NSY1",
50 | release "reviewed"
51 | }
52 | },
53 | {
54 | from 536,
55 | to 558,
56 | id swissprot {
57 | name "BMP2K_HUMAN",
58 | accession "Q9NSY1",
59 | release "reviewed"
60 | }
61 | },
62 | {
63 | from 636,
64 | to 648,
65 | id swissprot {
66 | name "BMP2K_HUMAN",
67 | accession "Q9NSY1",
68 | release "reviewed"
69 | }
70 | },
71 | {
72 | from 737,
73 | to 762,
74 | id swissprot {
75 | name "BMP2K_HUMAN",
76 | accession "Q9NSY1",
77 | release "reviewed"
78 | }
79 | },
80 | {
81 | from 789,
82 | to 806,
83 | id swissprot {
84 | name "BMP2K_HUMAN",
85 | accession "Q9NSY1",
86 | release "reviewed"
87 | }
88 | },
89 | {
90 | from 970,
91 | to 983,
92 | id swissprot {
93 | name "BMP2K_HUMAN",
94 | accession "Q9NSY1",
95 | release "reviewed"
96 | }
97 | },
98 | {
99 | from 999,
100 | to 1010,
101 | id swissprot {
102 | name "BMP2K_HUMAN",
103 | accession "Q9NSY1",
104 | release "reviewed"
105 | }
106 | }
107 | },
108 | packed-int {
109 | {
110 | from 3,
111 | to 26,
112 | id swissprot {
113 | name "INSR_HUMAN",
114 | accession "P06213",
115 | release "reviewed"
116 | }
117 | },
118 | {
119 | from 372,
120 | to 390,
121 | id swissprot {
122 | name "INSR_HUMAN",
123 | accession "P06213",
124 | release "reviewed"
125 | }
126 | },
127 | {
128 | from 766,
129 | to 791,
130 | id swissprot {
131 | name "INSR_HUMAN",
132 | accession "P06213",
133 | release "reviewed"
134 | }
135 | },
136 | {
137 | from 1312,
138 | to 1324,
139 | id swissprot {
140 | name "INSR_HUMAN",
141 | accession "P06213",
142 | release "reviewed"
143 | }
144 | }
145 | },
146 | int {
147 | from 230,
148 | to 246,
149 | id swissprot {
150 | name "OPSD_HUMAN",
151 | accession "P08100",
152 | release "reviewed"
153 | }
154 | }
155 | },
156 | more FALSE
157 | }
158 | }
159 |
--------------------------------------------------------------------------------
/test-data/convert2blastmask_four_human_masked.maskinfo-asn1-binary:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peterjc/galaxy_blast/75b5762aa9699a0b3f97a67158111599a2cb6f59/test-data/convert2blastmask_four_human_masked.maskinfo-asn1-binary
--------------------------------------------------------------------------------
/test-data/deduplicate.nosortids.fasta:
--------------------------------------------------------------------------------
1 | >Quick;Brown;Fox;3;5 representing 5 records
2 | acgt
3 | >1 first entry
4 | act
5 | >2 The A-Team
6 | AAaa
7 | >4
8 | CCCC
9 | >6 last!
10 | GGGG
11 |
--------------------------------------------------------------------------------
/test-data/deduplicate.sortids.fasta:
--------------------------------------------------------------------------------
1 | >3;5;Brown;Fox;Quick representing 5 records
2 | acgt
3 | >1 first entry
4 | act
5 | >2 The A-Team
6 | AAaa
7 | >4
8 | CCCC
9 | >6 last!
10 | GGGG
11 |
--------------------------------------------------------------------------------
/test-data/deltablast_four_human_vs_rhodopsin.tabular:
--------------------------------------------------------------------------------
1 | P08100 NP_001009242.1 96.552 348 12 0 1 348 1 348 4.79e-146 406
2 | P08100 P56514.1 84.795 342 51 1 1 341 1 342 3.20e-128 361
3 | P08100 ADB45242.1 94.817 328 17 0 11 338 1 328 7.59e-130 364
4 | P08100 ADB45229.1 94.817 328 17 0 11 338 1 328 1.25e-122 346
5 | P08100 0811197A 93.103 348 23 1 1 348 1 347 4.22e-143 398
6 | P08100 BAB21486.1 82.164 342 60 1 1 341 1 342 9.21e-128 360
7 |
--------------------------------------------------------------------------------
/test-data/deltablast_four_human_vs_rhodopsin_ext.tabular:
--------------------------------------------------------------------------------
1 | P08100 NP_001009242.1 96.552 348 12 0 1 348 1 348 4.79e-146 406 gi|57163783|ref|NP_001009242.1| 1044 336 343 0 98.56 1 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA MNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA 348 348 N/A
2 | P08100 P56514.1 84.795 342 51 1 1 341 1 342 3.20e-128 361 gi|3024260|sp|P56514.1|OPSD_BUFBU 927 290 322 1 94.15 1 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEA-SATVSKTE MNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVKEAAAQQQESATTQKAEKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDDASSAATSKTE 348 354 N/A
3 | P08100 ADB45242.1 94.817 328 17 0 11 338 1 328 7.59e-130 364 gi|283855846|gb|ADB45242.1| 936 311 321 0 97.87 1 1 VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVS VPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS 348 328 N/A
4 | P08100 ADB45229.1 94.817 328 17 0 11 338 1 328 1.25e-122 346 gi|283855823|gb|ADB45229.1| 888 311 323 0 98.48 1 1 VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVS VPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS 348 328 N/A
5 | P08100 0811197A 93.103 348 23 1 1 348 1 347 4.22e-143 398 gi|223523|prf||0811197A 1025 324 336 1 96.55 1 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDY-TPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA 348 347 N/A
6 | P08100 BAB21486.1 82.164 342 60 1 1 341 1 342 9.21e-128 360 gi|12583665|dbj|BAB21486.1| 924 281 314 1 91.81 1 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPL-GDDEASATVSKTE MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDGASATSSKTE 348 354 N/A
7 |
--------------------------------------------------------------------------------
/test-data/deltablast_rhodopsin_vs_four_human.tabular:
--------------------------------------------------------------------------------
1 | gi|57163783|ref|NP_001009242.1| sp|P08100|OPSD_HUMAN 96.552 348 12 0 1 348 1 348 1.71e-137 385
2 | gi|3024260|sp|P56514.1|OPSD_BUFBU sp|P08100|OPSD_HUMAN 83.333 354 53 2 1 354 1 348 3.53e-130 367
3 | gi|283855846|gb|ADB45242.1| sp|P08100|OPSD_HUMAN 94.817 328 17 0 1 328 11 338 7.52e-121 341
4 | gi|283855823|gb|ADB45229.1| sp|P08100|OPSD_HUMAN 94.817 328 17 0 1 328 11 338 2.06e-121 343
5 | gi|223523|prf||0811197A sp|P08100|OPSD_HUMAN 92.816 348 24 1 1 347 1 348 1.05e-134 378
6 | gi|12583665|dbj|BAB21486.1| sp|P08100|OPSD_HUMAN 81.089 349 65 1 1 349 1 348 5.41e-127 358
7 |
--------------------------------------------------------------------------------
/test-data/duplicates.fasta:
--------------------------------------------------------------------------------
1 | >1 first entry
2 | act
3 | >2 The A-Team
4 | AAaa
5 | >3 not unique...
6 | ACgt
7 | >4
8 | CCCC
9 | >5 a duplicate
10 | acgt
11 | >6 last!
12 | GGGG
13 |
--------------------------------------------------------------------------------
/test-data/duplicates.fasta.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peterjc/galaxy_blast/75b5762aa9699a0b3f97a67158111599a2cb6f59/test-data/duplicates.fasta.gz
--------------------------------------------------------------------------------
/test-data/duplicates.nr.fasta:
--------------------------------------------------------------------------------
1 | >1 first entry
2 | act
3 | >2 The A-Team
4 | AAaa
5 | >3;5 representing 2 records
6 | ACgt
7 | >4
8 | CCCC
9 | >6 last!
10 | GGGG
11 |
--------------------------------------------------------------------------------
/test-data/dustmasker_three_human.maskinfo-asn1:
--------------------------------------------------------------------------------
1 | Blast-db-mask-info ::= {
2 | algo-id 2,
3 | algo-program dust,
4 | algo-options "window=64; level=20; linker=1",
5 | masks {
6 | masks {
7 | packed-int {
8 | {
9 | from 1447,
10 | to 1495,
11 | id local id 1
12 | },
13 | {
14 | from 1540,
15 | to 1552,
16 | id local id 1
17 | },
18 | {
19 | from 1886,
20 | to 1892,
21 | id local id 1
22 | },
23 | {
24 | from 2278,
25 | to 2284,
26 | id local id 1
27 | },
28 | {
29 | from 4409,
30 | to 4415,
31 | id local id 1
32 | },
33 | {
34 | from 4635,
35 | to 4653,
36 | id local id 1
37 | },
38 | {
39 | from 4726,
40 | to 4734,
41 | id local id 1
42 | }
43 | },
44 | packed-int {
45 | {
46 | from 139,
47 | to 219,
48 | id local id 2
49 | },
50 | {
51 | from 4569,
52 | to 4584,
53 | id local id 2
54 | },
55 | {
56 | from 4621,
57 | to 4648,
58 | id local id 2
59 | }
60 | }
61 | },
62 | more FALSE
63 | }
64 | }
65 |
--------------------------------------------------------------------------------
/test-data/dustmasker_three_human.maskinfo-asn1-binary:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peterjc/galaxy_blast/75b5762aa9699a0b3f97a67158111599a2cb6f59/test-data/dustmasker_three_human.maskinfo-asn1-binary
--------------------------------------------------------------------------------
/test-data/empty.fasta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peterjc/galaxy_blast/75b5762aa9699a0b3f97a67158111599a2cb6f59/test-data/empty.fasta
--------------------------------------------------------------------------------
/test-data/empty_file.dat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peterjc/galaxy_blast/75b5762aa9699a0b3f97a67158111599a2cb6f59/test-data/empty_file.dat
--------------------------------------------------------------------------------
/test-data/est_out.json:
--------------------------------------------------------------------------------
1 | {"data_tables": {"blastdb": [{"path": "est/est_a3aebb9941bff066cfbd40ebab14c3992f7aadabb64999f3e3b53d783c06f08033ba9066e5efd9380c6bbf9dcec808a281b7a6e9138087cc207c93f2e3ae3f67", "nucleotide_alias_name": "est", "name": "Database of GenBank+EMBL+DDBJ sequences from EST Divisions (12/05/2013 07:12:35)", "value": "est_a3aebb9941bff066cfbd40ebab14c3992f7aadabb64999f3e3b53d783c06f08033ba9066e5efd9380c6bbf9dcec808a281b7a6e9138087cc207c93f2e3ae3f67"}]}}
--------------------------------------------------------------------------------
/test-data/four_human_proteins.dbinfo.txt:
--------------------------------------------------------------------------------
1 | Database: Just 4 human proteins
2 | 4 sequences; 3,297 total residues
3 |
4 |
--------------------------------------------------------------------------------
/test-data/four_human_proteins.fasta:
--------------------------------------------------------------------------------
1 | >sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1
2 | MHPAVFLSLPDLRCSLLLLVTWVFTPVTTEITSLDTENIDEILNNADVALVNFYADWCRFSQMLHPIFEEASDVIKEEFP
3 | NENQVVFARVDCDQHSDIAQRYRISKYPTLKLFRNGMMMKREYRGQRSVKALADYIRQQKSDPIQEIRDLAEITTLDRSK
4 | RNIIGYFEQKDSDNYRVFERVANILHDDCAFLSAFGDVSKPERYSGDNIIYKPPGHSAPDMVYLGAMTNFDVTYNWIQDK
5 | CVPLVREITFENGEELTEEGLPFLILFHMKEDTESLEIFQNEVARQLISEKGTINFLHADCDKFRHPLLHIQKTPADCPV
6 | IAIDSFRHMYVFGDFKDVLIPGKLKQFVFDLHSGKLHREFHHGPDPTDTAPGEQAQDVASSPPESSFQKLAPSEYRYTLL
7 | RDRDEL
8 | >sp|Q9NSY1|BMP2K_HUMAN BMP-2-inducible protein kinase OS=Homo sapiens GN=BMP2K PE=1 SV=2
9 | MKKFSRMPKSEGGSGGGAAGGGAGGAGAGAGCGSGGSSVGVRVFAVGRHQVTLEESLAEGGFSTVFLVRTHGGIRCALKR
10 | MYVNNMPDLNVCKREITIMKELSGHKNIVGYLDCAVNSISDNVWEVLILMEYCRAGQVVNQMNKKLQTGFTEPEVLQIFC
11 | DTCEAVARLHQCKTPIIHRDLKVENILLNDGGNYVLCDFGSATNKFLNPQKDGVNVVEEEIKKYTTLSYRAPEMINLYGG
12 | KPITTKADIWALGCLLYKLCFFTLPFGESQVAICDGNFTIPDNSRYSRNIHCLIRFMLEPDPEHRPDIFQVSYFAFKFAK
13 | KDCPVSNINNSSIPSALPEPMTASEAAARKSQIKARITDTIGPTETSIAPRQRPKANSATTATPSVLTIQSSATPVKVLA
14 | PGEFGNHRPKGALRPGNGPEILLGQGPPQQPPQQHRVLQQLQQGDWRLQQLHLQHRHPHQQQQQQQQQQQQQQQQQQQQQ
15 | QQQQQQHHHHHHHHLLQDAYMQQYQHATQQQQMLQQQFLMHSVYQPQPSASQYPTMMPQYQQAFFQQQMLAQHQPSQQQA
16 | SPEYLTSPQEFSPALVSYTSSLPAQVGTIMDSSYSANRSVADKEAIANFTNQKNISNPPDMSGWNPFGEDNFSKLTEEEL
17 | LDREFDLLRSNRLEERASSDKNVDSLSAPHNHPPEDPFGSVPFISHSGSPEKKAEHSSINQENGTANPIKNGKTSPASKD
18 | QRTGKKTSVQGQVQKGNDESESDFESDPPSPKSSEEEEQDDEEVLQGEQGDFNDDDTEPENLGHRPLLMDSEDEEEEEKH
19 | SSDSDYEQAKAKYSDMSSVYRDRSGSGPTQDLNTILLTSAQLSSDVAVETPKQEFDVFGAVPFFAVRAQQPQQEKNEKNL
20 | PQHRFPAAGLEQEEFDVFTKAPFSKKVNVQECHAVGPEAHTIPGYPKSVDVFGSTPFQPFLTSTSKSESNEDLFGLVPFD
21 | EITGSQQQKVKQRSLQKLSSRQRRTKQDMSKSNGKRHHGTPTSTKKTLKPTYRTPERARRHKKVGRRDSQSSNEFLTISD
22 | SKENISVALTDGKDRGNVLQPEESLLDPFGAKPFHSPDLSWHPPHQGLSDIRADHNTVLPGRPRQNSLHGSFHSADVLKM
23 | DDFGAVPFTELVVQSITPHQSQQSQPVELDPFGAAPFPSKQ
24 | >sp|P06213|INSR_HUMAN Insulin receptor OS=Homo sapiens GN=INSR PE=1 SV=4
25 | MATGGRRGAAAAPLLVAVAALLLGAAGHLYPGEVCPGMDIRNNLTRLHELENCSVIEGHLQILLMFKTRPEDFRDLSFPK
26 | LIMITDYLLLFRVYGLESLKDLFPNLTVIRGSRLFFNYALVIFEMVHLKELGLYNLMNITRGSVRIEKNNELCYLATIDW
27 | SRILDSVEDNYIVLNKDDNEECGDICPGTAKGKTNCPATVINGQFVERCWTHSHCQKVCPTICKSHGCTAEGLCCHSECL
28 | GNCSQPDDPTKCVACRNFYLDGRCVETCPPPYYHFQDWRCVNFSFCQDLHHKCKNSRRQGCHQYVIHNNKCIPECPSGYT
29 | MNSSNLLCTPCLGPCPKVCHLLEGEKTIDSVTSAQELRGCTVINGSLIINIRGGNNLAAELEANLGLIEEISGYLKIRRS
30 | YALVSLSFFRKLRLIRGETLEIGNYSFYALDNQNLRQLWDWSKHNLTITQGKLFFHYNPKLCLSEIHKMEEVSGTKGRQE
31 | RNDIALKTNGDQASCENELLKFSYIRTSFDKILLRWEPYWPPDFRDLLGFMLFYKEAPYQNVTEFDGQDACGSNSWTVVD
32 | IDPPLRSNDPKSQNHPGWLMRGLKPWTQYAIFVKTLVTFSDERRTYGAKSDIIYVQTDATNPSVPLDPISVSNSSSQIIL
33 | KWKPPSDPNGNITHYLVFWERQAEDSELFELDYCLKGLKLPSRTWSPPFESEDSQKHNQSEYEDSAGECCSCPKTDSQIL
34 | KELEESSFRKTFEDYLHNVVFVPRKTSSGTGAEDPRPSRKRRSLGDVGNVTVAVPTVAAFPNTSSTSVPTSPEEHRPFEK
35 | VVNKESLVISGLRHFTGYRIELQACNQDTPEERCSVAAYVSARTMPEAKADDIVGPVTHEIFENNVVHLMWQEPKEPNGL
36 | IVLYEVSYRRYGDEELHLCVSRKHFALERGCRLRGLSPGNYSVRIRATSLAGNGSWTEPTYFYVTDYLDVPSNIAKIIIG
37 | PLIFVFLFSVVIGSIYLFLRKRQPDGPLGPLYASSNPEYLSASDVFPCSVYVPDEWEVSREKITLLRELGQGSFGMVYEG
38 | NARDIIKGEAETRVAVKTVNESASLRERIEFLNEASVMKGFTCHHVVRLLGVVSKGQPTLVVMELMAHGDLKSYLRSLRP
39 | EAENNPGRPPPTLQEMIQMAAEIADGMAYLNAKKFVHRDLAARNCMVAHDFTVKIGDFGMTRDIYETDYYRKGGKGLLPV
40 | RWMAPESLKDGVFTTSSDMWSFGVVLWEITSLAEQPYQGLSNEQVLKFVMDGGYLDQPDNCPERVTDLMRMCWQFNPKMR
41 | PTFLEIVNLLKDDLHPSFPEVSFFHSEENKAPESEELEMEFEDMENVPLDRSSHCQREEAGGRDGGSSLGFKRSYEEHIP
42 | YTHMNGGKKNGRILTLPRSNPS
43 | >sp|P08100|OPSD_HUMAN Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1
44 | MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLA
45 | VADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFT
46 | WVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQES
47 | ATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTT
48 | ICCGKNPLGDDEASATVSKTETSQVAPA
49 |
--------------------------------------------------------------------------------
/test-data/four_human_proteins.fasta.log.txt:
--------------------------------------------------------------------------------
1 | New DB title: Just 4 human proteins
2 | Sequence type: Protein
3 |
--------------------------------------------------------------------------------
/test-data/four_human_proteins.fasta.phd:
--------------------------------------------------------------------------------
1 | 11117184492
2 | 29249033410
3 | 36665887501
4 | 5392473183
5 |
--------------------------------------------------------------------------------
/test-data/four_human_proteins.fasta.phi:
--------------------------------------------------------------------------------
1 | 3 @ 3 4 A11117184492
--------------------------------------------------------------------------------
/test-data/four_human_proteins.fasta.phr:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peterjc/galaxy_blast/75b5762aa9699a0b3f97a67158111599a2cb6f59/test-data/four_human_proteins.fasta.phr
--------------------------------------------------------------------------------
/test-data/four_human_proteins.fasta.pin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peterjc/galaxy_blast/75b5762aa9699a0b3f97a67158111599a2cb6f59/test-data/four_human_proteins.fasta.pin
--------------------------------------------------------------------------------
/test-data/four_human_proteins.fasta.pog:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peterjc/galaxy_blast/75b5762aa9699a0b3f97a67158111599a2cb6f59/test-data/four_human_proteins.fasta.pog
--------------------------------------------------------------------------------
/test-data/four_human_proteins.fasta.psd:
--------------------------------------------------------------------------------
1 | gnl|bl_ord_id|00
2 | gnl|bl_ord_id|11
3 | gnl|bl_ord_id|22
4 | gnl|bl_ord_id|33
5 |
--------------------------------------------------------------------------------
/test-data/four_human_proteins.fasta.psi:
--------------------------------------------------------------------------------
1 | H @ H 4 Fgnl|bl_ord_id|00
--------------------------------------------------------------------------------
/test-data/four_human_proteins.fasta.psq:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 |
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 |
149 |
150 |
151 |
152 |
153 |
154 |
155 |
156 |
157 |
158 |
159 |
160 |
161 |
162 |
163 |
164 |
165 |
--------------------------------------------------------------------------------
/test-data/four_human_proteins_masked.fasta:
--------------------------------------------------------------------------------
1 | >sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1
2 | MHPAVFlslpdlrcsllllVTWVFTPVTTEITSLDTENIDEILNNADVALVNFYADWCRF
3 | SQMLHPIFEEASDVIKEEFPNENQVVFARVDCDQHSDIAQRYRISKYPTLKLFRNGMMMK
4 | REYRGQRSVKALADYIRQQKSDPIQEIRDLAEITTLDRSKRNIIGYFEQKDSDNYRVFER
5 | VANILHDDCAFLSAFGDVSKPERYSGDNIIYKPPGHSAPDMVYLGAMTNFDVTYNWIQDK
6 | CVPLVREITFENGEELTEEGLPFLILFHMKEDTESLEIFQNEVARQLISEKGTINFLHAD
7 | CDKFRHPLLHIQKTPADCPVIAIDSFRHMYVFGDFKDVLIPGKLKQFVFDLHSGKLHREF
8 | HHGPDPTDTAPGEQAQDVASSPPESSFQKLAPSEYRYTLLRDRDEL
9 | >sp|Q9NSY1|BMP2K_HUMAN BMP-2-inducible protein kinase OS=Homo sapiens GN=BMP2K PE=1 SV=2
10 | MKKFSRMPKSEggsgggaagggaggagagagcgsggssvgvrvfavgRHQVTLEESLAEG
11 | GFSTVFLVRTHGGIRCALKRMYVNNMPDLNVCKREITIMKELSGHKNIVGYLDCAVNSIS
12 | DNVWEVLILMEYCRAGQVVNQMNKKLQTGFTEPEVLQIFCDTCEAVARLHQCKTPIIHRD
13 | LKVENILLNDGGNYVLCDFGSATNKFLNPQKDGVNVVEEEIKKYTTLSYRAPEMINLYGG
14 | KPITTKADIWALGCLLYKLCFFTLPFGESQVAICDGNFTIPDNSRYSRNIHCLIRFMLEP
15 | DPEHRPDIFQVSYFAFKFAKKDCPVsninnssiPSALPEPMTASEAAARKSQIKARITDT
16 | IGPTETSIAPRQRPKANSATTATPSVLTIQSSATPVKVLAPGEFGNHRPKGALRPGNGPE
17 | Illgqgppqqppqqhrvlqqlqqgdwrlqqlhlqhrhphqqqqqqqqqqqqqqqqqqqqq
18 | qqqqqqhhhhhhhhllqDAYMqqyqhatqqqqmlqqqFLMHSVYQPQPSASQYPTMmpqy
19 | qqaffqqqmlaqhqpsqqqASPEYLTSPQEFSPALVSYTSSLPAQVGTIMDSSYSANRSV
20 | ADKEAIANFTNQKNISNPPDMSGWNPFGEDNFSKLTeeelldrefdllrSNRLEERASSD
21 | KNVDSLSAPHNHPPEDPFGSVPFISHSGSPEKKAEHSSINQENGTANPIKNGKTSPASKD
22 | QRTGKKTSVQGQVQKGNdesesdfesdppspksseeeeqddeeVLQGEQGDFNDDDTEPE
23 | NLGHRPLLMdsedeeeeekhssdsdyeQAKAKYSDMSSVYRDRSGSGPTQDLNTILLTSA
24 | QLSSDVAVETPKQEFDVFGAVPFFAVRAQQPQQEKNEKNLPQHRFPAAGLEQEEFDVFTK
25 | APFSKKVNVQECHAVGPEAHTIPGYPKSVDVFGSTPFQPFLTSTSKSESNEDLFGLVPFD
26 | EITGSQQQKVkqrslqklssrqrrTKQDMSKSNGKRHHGtptstkktlkptYRTPERARR
27 | HKKVGRRDSQSSNEFLTISDSKENISVALTDGKDRGNVLQPEESLLDPFGAKPFHSPDLS
28 | WHPPHQGLSDIRADHNTVLPGRPRQNSLHGSFHSADVLKMDDFGAVPFTELVVQSITPHQ
29 | SQQSQPVELDPFGAAPFPSKQ
30 | >sp|P06213|INSR_HUMAN Insulin receptor OS=Homo sapiens GN=INSR PE=1 SV=4
31 | MATggrrgaaaapllvavaalllgaagHLYPGEVCPGMDIRNNLTRLHELENCSVIEGHL
32 | QILLMFKTRPEDFRDLSFPKLIMITDYLLLFRVYGLESLKDLFPNLTVIRGSRLFFNYAL
33 | VIFEMVHLKELGLYNLMNITRGSVRIEKNNELCYLATIDWSRILDSVEDNYIVLNKDDNE
34 | ECGDICPGTAKGKTNCPATVINGQFVERCWTHSHCQKVCPTICKSHGCTAEGLCCHSECL
35 | GNCSQPDDPTKCVACRNFYLDGRCVETCPPPYYHFQDWRCVNFSFCQDLHHKCKNSRRQG
36 | CHQYVIHNNKCIPECPSGYTMNSSNLLCTPCLGPCPKVCHLLEGEKTIDSVTSAQELRGC
37 | TVINGSLIINIRggnnlaaeleanlglieeiSGYLKIRRSYALVSLSFFRKLRLIRGETL
38 | EIGNYSFYALDNQNLRQLWDWSKHNLTITQGKLFFHYNPKLCLSEIHKMEEVSGTKGRQE
39 | RNDIALKTNGDQASCENELLKFSYIRTSFDKILLRWEPYWPPDFRDLLGFMLFYKEAPYQ
40 | NVTEFDGQDACGSNSWTVVDIDPPLRSNDPKSQNHPGWLMRGLKPWTQYAIFVKTLVTFS
41 | DERRTYGAKSDIIYVQTDATNPSVPLDPISVSNSSSQIILKWKPPSDPNGNITHYLVFWE
42 | RQAEDSELFELDYCLKGLKLPSRTWSPPFESEDSQKHNQSEYEDSAGECCSCPKTDSQIL
43 | KELEESSFRKTFEDYLHNVVFVPRKTSSGTGAEDPRPSRKRRSLGDvgnvtvavptvaaf
44 | pntsstsvptspEEHRPFEKVVNKESLVISGLRHFTGYRIELQACNQDTPEERCSVAAYV
45 | SARTMPEAKADDIVGPVTHEIFENNVVHLMWQEPKEPNGLIVLYEVSYRRYGDEELHLCV
46 | SRKHFALERGCRLRGLSPGNYSVRIRATSLAGNGSWTEPTYFYVTDYLDVPSNIAKIIIG
47 | PLIFVFLFSVVIGSIYLFLRKRQPDGPLGPLYASSNPEYLSASDVFPCSVYVPDEWEVSR
48 | EKITLLRELGQGSFGMVYEGNARDIIKGEAETRVAVKTVNESASLRERIEFLNEASVMKG
49 | FTCHHVVRLLGVVSKGQPTLVVMELMAHGDLKSYLRSLRPEAENNPGRPPPTLQEMIQMA
50 | AEIADGMAYLNAKKFVHRDLAARNCMVAHDFTVKIGDFGMTRDIYETDYYRKGGKGLLPV
51 | RWMAPESLKDGVFTTSSDMWSFGVVLWEITSLAEQPYQGLSNEQVLKFVMDGGYLDQPDN
52 | CPERVTDLMRMCWQFNPKMRPTFLEIVNLLKDDLHPSFPEVSFFHSEENKAPeseeleme
53 | fedmeNVPLDRSSHCQREEAGGRDGGSSLGFKRSYEEHIPYTHMNGGKKNGRILTLPRSN
54 | PS
55 | >sp|P08100|OPSD_HUMAN Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1
56 | MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLY
57 | VTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLG
58 | GEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIP
59 | EGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVkeaaaqqqes
60 | attqkaeKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAI
61 | YNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA
62 |
--------------------------------------------------------------------------------
/test-data/four_human_proteins_taxid.fasta.log.txt:
--------------------------------------------------------------------------------
1 | New DB title: Just 4 human proteins
2 | Sequence type: Protein
3 |
--------------------------------------------------------------------------------
/test-data/four_human_proteins_taxid.fasta.phd:
--------------------------------------------------------------------------------
1 | 11117184492
2 | 29249033410
3 | 36665887501
4 | 5392473183
5 |
--------------------------------------------------------------------------------
/test-data/four_human_proteins_taxid.fasta.phi:
--------------------------------------------------------------------------------
1 | 3 @ 3 4 A11117184492
--------------------------------------------------------------------------------
/test-data/four_human_proteins_taxid.fasta.phr:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peterjc/galaxy_blast/75b5762aa9699a0b3f97a67158111599a2cb6f59/test-data/four_human_proteins_taxid.fasta.phr
--------------------------------------------------------------------------------
/test-data/four_human_proteins_taxid.fasta.pin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peterjc/galaxy_blast/75b5762aa9699a0b3f97a67158111599a2cb6f59/test-data/four_human_proteins_taxid.fasta.pin
--------------------------------------------------------------------------------
/test-data/four_human_proteins_taxid.fasta.pog:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peterjc/galaxy_blast/75b5762aa9699a0b3f97a67158111599a2cb6f59/test-data/four_human_proteins_taxid.fasta.pog
--------------------------------------------------------------------------------
/test-data/four_human_proteins_taxid.fasta.psd:
--------------------------------------------------------------------------------
1 | gnl|bl_ord_id|00
2 | gnl|bl_ord_id|11
3 | gnl|bl_ord_id|22
4 | gnl|bl_ord_id|33
5 |
--------------------------------------------------------------------------------
/test-data/four_human_proteins_taxid.fasta.psi:
--------------------------------------------------------------------------------
1 | H @ H 4 Fgnl|bl_ord_id|00
--------------------------------------------------------------------------------
/test-data/four_human_proteins_taxid.fasta.psq:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 |
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 |
149 |
150 |
151 |
152 |
153 |
154 |
155 |
156 |
157 |
158 |
159 |
160 |
161 |
162 |
163 |
164 |
165 |
--------------------------------------------------------------------------------
/test-data/k12_ten_proteins.fasta:
--------------------------------------------------------------------------------
1 | >gi|16127995|ref|NP_414542.1| thr operon leader peptide [Escherichia coli str. K-12 substr. MG1655]
2 | MKRISTTITTTITITTGNGAG
3 | >gi|16127996|ref|NP_414543.1| fused aspartokinase I and homoserine dehydrogenase I [Escherichia coli str. K-12 substr. MG1655]
4 | MRVLKFGGTSVANAERFLRVADILESNARQGQVATVLSAPAKITNHLVAMIEKTISGQDALPNISDAERI
5 | FAELLTGLAAAQPGFPLAQLKTFVDQEFAQIKHVLHGISLLGQCPDSINAALICRGEKMSIAIMAGVLEA
6 | RGHNVTVIDPVEKLLAVGHYLESTVDIAESTRRIAASRIPADHMVLMAGFTAGNEKGELVVLGRNGSDYS
7 | AAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPC
8 | LIKNTGNPQAPGTLIGASRDEDELPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFAAMSRARISVVLIT
9 | QSSSEYSISFCVPQSDCVRAERAMQEEFYLELKEGLLEPLAVTERLAIISVVGDGMRTLRGISAKFFAAL
10 | ARANINIVAIAQGSSERSISVVVNNDDATTGVRVTHQMLFNTDQVIEVFVIGVGGVGGALLEQLKRQQSW
11 | LKNKHIDLRVCGVANSKALLTNVHGLNLENWQEELAQAKEPFNLGRLIRLVKEYHLLNPVIVDCTSSQAV
12 | ADQYADFLREGFHVVTPNKKANTSSMDYYHQLRYAAEKSRRKFLYDTNVGAGLPVIENLQNLLNAGDELM
13 | KFSGILSGSLSYIFGKLDEGMSFSEATTLAREMGYTEPDPRDDLSGMDVARKLLILARETGRELELADIE
14 | IEPVLPAEFNAEGDVAAFMANLSQLDDLFAARVAKARDEGKVLRYVGNIDEDGVCRVKIAEVDGNDPLFK
15 | VKNGENALAFYSHYYQPLPLVLRGYGAGNDVTAAGVFADLLRTLSWKLGV
16 | >gi|16127997|ref|NP_414544.1| homoserine kinase [Escherichia coli str. K-12 substr. MG1655]
17 | MVKVYAPASSANMSVGFDVLGAAVTPVDGALLGDVVTVEAAETFSLNNLGRFADKLPSEPRENIVYQCWE
18 | RFCQELGKQIPVAMTLEKNMPIGSGLGSSACSVVAALMAMNEHCGKPLNDTRLLALMGELEGRISGSIHY
19 | DNVAPCFLGGMQLMIEENDIISQQVPGFDEWLWVLAYPGIKVSTAEARAILPAQYRRQDCIAHGRHLAGF
20 | IHACYSRQPELAAKLMKDVIAEPYRERLLPGFRQARQAVAEIGAVASGISGSGPTLFALCDKPETAQRVA
21 | DWLGKNYLQNQEGFVHICRLDTAGARVLEN
22 | >gi|16127998|ref|NP_414545.1| threonine synthase [Escherichia coli str. K-12 substr. MG1655]
23 | MKLYNLKDHNEQVSFAQAVTQGLGKNQGLFFPHDLPEFSLTEIDEMLKLDFVTRSAKILSAFIGDEIPQE
24 | ILEERVRAAFAFPAPVANVESDVGCLELFHGPTLAFKDFGGRFMAQMLTHIAGDKPVTILTATSGDTGAA
25 | VAHAFYGLPNVKVVILYPRGKISPLQEKLFCTLGGNIETVAIDGDFDACQALVKQAFDDEELKVALGLNS
26 | ANSINISRLLAQICYYFEAVAQLPQETRNQLVVSVPSGNFGDLTAGLLAKSLGLPVKRFIAATNVNDTVP
27 | RFLHDGQWSPKATQATLSNAMDVSQPNNWPRVEELFRRKIWQLKELGYAAVDDETTQQTMRELKELGYTS
28 | EPHAAVAYRALRDQLNPGEYGLFLGTAHPAKFKESVEAILGETLDLPKELAERADLPLLSHNLPADFAAL
29 | RKLMMNHQ
30 | >gi|16127999|ref|NP_414546.1| hypothetical protein b0005 [Escherichia coli str. K-12 substr. MG1655]
31 | MKKMQSIVLALSLVLVAPMAAQAAEITLVPSVKLQIGDRDNRGYYWDGGHWRDHGWWKQHYEWRGNRWHL
32 | HGPPPPPRHHKKAPHDHHGGHGPGKHHR
33 | >gi|16128000|ref|NP_414547.1| peroxide resistance protein, lowers intracellular iron [Escherichia coli str. K-12 substr. MG1655]
34 | MLILISPAKTLDYQSPLTTTRYTLPELLDNSQQLIHEARKLTPPQISTLMRISDKLAGINAARFHDWQPD
35 | FTPANARQAILAFKGDVYTGLQAETFSEDDFDFAQQHLRMLSGLYGVLRPLDLMQPYRLEMGIRLENARG
36 | KDLYQFWGDIITNKLNEALAAQGDNVVINLASDEYFKSVKPKKLNAEIIKPVFLDEKNGKFKIISFYAKK
37 | ARGLMSRFIIENRLTKPEQLTGFNSEGYFFDEDSSSNGELVFKRYEQR
38 | >gi|16128001|ref|NP_414548.1| putative transporter [Escherichia coli str. K-12 substr. MG1655]
39 | MPDFFSFINSVLWGSVMIYLLFGAGCWFTFRTGFVQFRYIRQFGKSLKNSIHPQPGGLTSFQSLCTSLAA
40 | RVGSGNLAGVALAITAGGPGAVFWMWVAAFIGMATSFAECSLAQLYKERDVNGQFRGGPAWYMARGLGMR
41 | WMGVLFAVFLLIAYGIIFSGVQANAVARALSFSFDFPPLVTGIILAVFTLLAITRGLHGVARLMQGFVPL
42 | MAIIWVLTSLVICVMNIGQLPHVIWSIFESAFGWQEAAGGAAGYTLSQAITNGFQRSMFSNEAGMGSTPN
43 | AAAAAASWPPHPAAQGIVQMIGIFIDTLVICTASAMLILLAGNGTTYMPLEGIQLIQKAMRVLMGSWGAE
44 | FVTLVVILFAFSSIVANYIYAENNLFFLRLNNPKAIWCLRICTFATVIGGTLLSLPLMWQLADIIMACMA
45 | ITNLTAILLLSPVVHTIASDYLRQRKLGVRPVFDPLRYPDIGRQLSPDAWDDVSQE
46 | >gi|16128002|ref|NP_414549.1| transaldolase B [Escherichia coli str. K-12 substr. MG1655]
47 | MTDKLTSLRQYTTVVADTGDIAAMKLYQPQDATTNPSLILNAAQIPEYRKLIDDAVAWAKQQSNDRAQQI
48 | VDATDKLAVNIGLEILKLVPGRISTEVDARLSYDTEASIAKAKRLIKLYNDAGISNDRILIKLASTWQGI
49 | RAAEQLEKEGINCNLTLLFSFAQARACAEAGVFLISPFVGRILDWYKANTDKKEYAPAEDPGVVSVSEIY
50 | QYYKEHGYETVVMGASFRNIGEILELAGCDRLTIAPALLKELAESEGAIERKLSYTGEVKARPARITESE
51 | FLWQHNQDPMAVDKLAEGIRKFAIDQEKLEKMIGDLL
52 | >gi|16128003|ref|NP_414550.1| molybdochelatase incorporating molybdenum into molybdopterin [Escherichia coli str. K-12 substr. MG1655]
53 | MNTLRIGLVSISDRASSGVYQDKGIPALEEWLTSALTTPFELETRLIPDEQAIIEQTLCELVDEMSCHLV
54 | LTTGGTGPARRDVTPDATLAVADREMPGFGEQMRQISLHFVPTAILSRQVGVIRKQALILNLPGQPKSIK
55 | ETLEGVKDAEGNVVVHGIFASVPYCIQLLEGPYVETAPEVVAAFRPKSARRDVSE
56 | >gi|16128004|ref|NP_414551.1| inner membrane protein, Grp1_Fun34_YaaH family [Escherichia coli str. K-12 substr. MG1655]
57 | MGNTKLANPAPLGLMGFGMTTILLNLHNVGYFALDGIILAMGIFYGGIAQIFAGLLEYKKGNTFGLTAFT
58 | SYGSFWLTLVAILLMPKLGLTDAPNAQFLGVYLGLWGVFTLFMFFGTLKGARVLQFVFFSLTVLFALLAI
59 | GNIAGNAAIIHFAGWIGLICGASAIYLAMGEVLNEQFGRTVLPIGESH
60 |
61 |
--------------------------------------------------------------------------------
/test-data/more_duplicates.fasta:
--------------------------------------------------------------------------------
1 | >Quick
2 | acgt
3 | >Brown
4 | ACGT
5 | >Fox
6 | ACGT
7 |
--------------------------------------------------------------------------------
/test-data/rbh_blastn_three_human_mRNA_vs_rhodopsin_nucs.tabular:
--------------------------------------------------------------------------------
1 | #A_id B_id A_length B_length A_qcovhsp B_qcovhsp length pident bitscore
2 | ENA|BC112106|BC112106.1 gi|57163782|ref|NM_001009242.1| 1213 1047 86 100 1047 92.073 1515
3 |
--------------------------------------------------------------------------------
/test-data/rbh_blastp_four_human_vs_rhodopsin_proteins.tabular:
--------------------------------------------------------------------------------
1 | #A_id B_id A_length B_length A_qcovhsp B_qcovhsp length pident bitscore
2 | sp|P08100|OPSD_HUMAN gi|57163783|ref|NP_001009242.1| 348 348 100 100 348 96.552 701
3 |
--------------------------------------------------------------------------------
/test-data/rbh_blastp_k12.tabular:
--------------------------------------------------------------------------------
1 | #A_id B_id A_length B_length A_qcovhsp B_qcovhsp length pident bitscore
2 | gi|16127995|ref|NP_414542.1| gi|16127995|ref|NP_414542.1| 21 21 100 100 21 100.000 38.1
3 | gi|16127996|ref|NP_414543.1| gi|16127996|ref|NP_414543.1| 820 820 100 100 820 100.000 1687
4 | gi|16127997|ref|NP_414544.1| gi|16127997|ref|NP_414544.1| 310 310 100 100 310 100.000 642
5 | gi|16127998|ref|NP_414545.1| gi|16127998|ref|NP_414545.1| 428 428 100 100 428 100.000 882
6 | gi|16128000|ref|NP_414547.1| gi|16128000|ref|NP_414547.1| 258 258 100 100 258 100.000 531
7 | gi|16128001|ref|NP_414548.1| gi|16128001|ref|NP_414548.1| 476 476 100 100 476 100.000 959
8 | gi|16128002|ref|NP_414549.1| gi|16128002|ref|NP_414549.1| 317 317 100 100 317 100.000 648
9 | gi|16128003|ref|NP_414550.1| gi|16128003|ref|NP_414550.1| 195 195 100 100 195 100.000 397
10 | gi|16128004|ref|NP_414551.1| gi|16128004|ref|NP_414551.1| 188 188 100 100 188 100.000 365
11 |
--------------------------------------------------------------------------------
/test-data/rbh_blastp_k12_self.tabular:
--------------------------------------------------------------------------------
1 | #A_id B_id A_length B_length A_qcovhsp B_qcovhsp length pident bitscore
2 | gi|16127997|ref|NP_414544.1| NP_414544_near_copy 310 309 99 100 309 99.676 638
3 | NP_414544_near_copy gi|16127997|ref|NP_414544.1| 309 310 100 99 309 99.676 638
4 | NP_414546_near_copy_1 NP_414546_near_copy_2 99 100 99 98 98 100.000 197
5 | NP_414546_near_copy_2 NP_414546_near_copy_1 100 99 98 99 98 100.000 197
6 |
--------------------------------------------------------------------------------
/test-data/rbh_megablast_rhodopsin_nucs_vs_three_human_mRNA.tabular:
--------------------------------------------------------------------------------
1 | #A_id B_id A_length B_length A_qcovhsp B_qcovhsp length pident bitscore
2 | gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 1047 1213 100 86 1047 92.073 1474
3 |
--------------------------------------------------------------------------------
/test-data/rbh_none.tabular:
--------------------------------------------------------------------------------
1 | #A_id B_id A_length B_length A_qcovhsp B_qcovhsp length pident bitscore
2 |
--------------------------------------------------------------------------------
/test-data/rbh_tblastx_rhodopsin_nucs_vs_three_human_mRNA.tabular:
--------------------------------------------------------------------------------
1 | #A_id B_id A_length B_length A_qcovhsp B_qcovhsp length pident bitscore
2 | gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 1047 1213 22 19 230 97.391 559
3 |
--------------------------------------------------------------------------------
/test-data/rhodopsin_bufo.fasta:
--------------------------------------------------------------------------------
1 | >U59921.1 Bufo bufo rhodopsin mRNA, complete cds
2 | TCTTTCTAGTTTGGGGGGGGGGACTTTAAAGAGCCGCCAATATGAACGGAACAGAAGGCCCAAACTTTTACATACCCATG
3 | TCCAACAAGACTGGGGTGGTGCGAAGCCCCTTTGAATACCCTCAGTATTACCTGGCAGAGCCATGGCAATATTCCATTCT
4 | GTGCGCGTACATGTTCCTGCTCATTCTACTTGGGTTCCCAATCAACTTCATGACCTTGTACGTCACCATCCAGCACAAGA
5 | AGCTCCGGACACCCTTAAACTATATCCTGCTGAATTTGGCCTTTGCCAACCACTTCATGGTCCTGTGTGGATTCACGGTG
6 | ACAATGTACTCCTCAATGAACGGATACTTCATCCTCGGAGCCACCGGTTGCTATGTTGAAGGCTTCTTCGCTACCCTTGG
7 | TGGTGAAATCGCCCTTTGGTCCCTGGTGGTCTTGGCCATTGAACGATACGTGGTCGTCTGTAAGCCCATGAGCAACTTCC
8 | GATTTAGTGAGAACCATGCCGTCATGGGCGTAGCGTTCACCTGGATAATGGCTTTGTCCTGTGCTGTTCCTCCACTCCTT
9 | GGATGGTCCAGGTACATCCCCGAGGGCATGCAGTGCTCCTGCGGAGTCGACTACTACACCCTGAAGCCCGAGGTCAACAA
10 | CGAGTCCTTCGTCATCTACATGTTCGTCGTCCACTTCACCATCCCCCTGATTATCATTTTCTTCTGCTATGGCCGCCTGG
11 | TGTGCACTGTGAAAGAGGCTGCAGCTCAACAGCAAGAGTCCGCCACCACCCAGAAGGCCGAGAAAGAGGTGACCAGGATG
12 | GTGATCATCATGGTGGTCTTCTTCCTTATCTGTTGGGTCCCCTACGCCTCTGTCGCTTTCTTCATCTTCAGCAATCAGGG
13 | CTCTGAGTTCGGCCCCATCTTCATGACCGTCCCAGCTTTCTTTGCCAAGAGTTCTTCCATCTACAACCCCGTCATCTACA
14 | TCATGCTCAACAAGCAGTTCCGTAACTGCATGATCACCACCCTGTGCTGCGGCAAGAATCCCTTTGGAGAAGACGATGCC
15 | TCCTCTGCCGCCACCTCCAAGACAGAGGCTTCTTCTGTTTCTTCCAGCCAGGTGTCTCCTGCATAAGACCTTCCACCAGG
16 | CCTGTCTCAGGGTCCGCTGCCTCACACAGCTCCCACCGCCCCAACTCCGTCTCCTGCTCGCTAAGGCGGCGAAGTTCCCC
17 | TTCCATTACATAAAACGTATCTGTTCAAGAAAGGCGACGACGAAGGAGAAGAAGAGGAGCCCCCCCGAACCCCTTCGCTG
18 | CTGCTGAAAACGACTTGATTGCTTCTGCAACGCAACGGGGCCTTACGGCAGCGAAGGGGTTGTCATCCGGACGCGCCAAG
19 | AATTCCTTCGAGACTGTAAATATCTTAAAGGAACCGTCCTGCTAGTTACCGACGCCGCTCCTGTAGCCGCCGTTCCCCCG
20 | CACTCCGGCCGGTTCATACCTCTTATTTTTTTGCAATGCAACAGAAAATAATATTTTTGTTCCCACGGCTTTTCCCGGTC
21 | AGGTCTGGTAGTGGCGGAGATTGGCCGACCCCTCGCACCTGTAATAAAGCGCAG
22 |
--------------------------------------------------------------------------------
/test-data/rhodopsin_nucs.blastdbcmd.txt:
--------------------------------------------------------------------------------
1 | gi|57163782|ref|NM_001009242.1|
2 | gi|2734705|gb|U59921.1|BBU59921 "1 -" +
3 | gi|283855845|gb|GQ290303.1| 1-4301 +
4 | gi|283855822|gb|GQ290312.1| "1-983"
5 | gi|18148870|dbj|AB062417.1| "1 -" +
6 | gi|12583664|dbj|AB043817.1| "1--"
7 |
8 |
--------------------------------------------------------------------------------
/test-data/rhodopsin_nucs.dbinfo.txt:
--------------------------------------------------------------------------------
1 | Database: Rhodopsin nucleotides
2 | 6 sequences; 10,296 total bases
3 |
4 |
--------------------------------------------------------------------------------
/test-data/rhodopsin_nucs.fasta.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peterjc/galaxy_blast/75b5762aa9699a0b3f97a67158111599a2cb6f59/test-data/rhodopsin_nucs.fasta.gz
--------------------------------------------------------------------------------
/test-data/rhodopsin_nucs.fasta.log.txt:
--------------------------------------------------------------------------------
1 | New DB title: Rhodopsin nucleotides
2 | Sequence type: Nucleotide
3 |
--------------------------------------------------------------------------------
/test-data/rhodopsin_nucs.fasta.nhd:
--------------------------------------------------------------------------------
1 | 12397459091
2 | 20759409394
3 | 22689758313
4 | 28815213262
5 | 36620822910
6 | 40074407105
7 |
--------------------------------------------------------------------------------
/test-data/rhodopsin_nucs.fasta.nhi:
--------------------------------------------------------------------------------
1 | N @ N 4 A12397459091
--------------------------------------------------------------------------------
/test-data/rhodopsin_nucs.fasta.nhr:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peterjc/galaxy_blast/75b5762aa9699a0b3f97a67158111599a2cb6f59/test-data/rhodopsin_nucs.fasta.nhr
--------------------------------------------------------------------------------
/test-data/rhodopsin_nucs.fasta.nin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peterjc/galaxy_blast/75b5762aa9699a0b3f97a67158111599a2cb6f59/test-data/rhodopsin_nucs.fasta.nin
--------------------------------------------------------------------------------
/test-data/rhodopsin_nucs.fasta.nnd:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peterjc/galaxy_blast/75b5762aa9699a0b3f97a67158111599a2cb6f59/test-data/rhodopsin_nucs.fasta.nnd
--------------------------------------------------------------------------------
/test-data/rhodopsin_nucs.fasta.nni:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peterjc/galaxy_blast/75b5762aa9699a0b3f97a67158111599a2cb6f59/test-data/rhodopsin_nucs.fasta.nni
--------------------------------------------------------------------------------
/test-data/rhodopsin_nucs.fasta.nog:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peterjc/galaxy_blast/75b5762aa9699a0b3f97a67158111599a2cb6f59/test-data/rhodopsin_nucs.fasta.nog
--------------------------------------------------------------------------------
/test-data/rhodopsin_nucs.fasta.nsd:
--------------------------------------------------------------------------------
1 | ab0438175
2 | ab043817.15
3 | ab0624174
4 | ab062417.14
5 | bbu599211
6 | gq2903032
7 | gq290303.12
8 | gq2903123
9 | gq290312.13
10 | nm_0010092420
11 | nm_001009242.10
12 | u599211
13 | u59921.11
14 |
--------------------------------------------------------------------------------
/test-data/rhodopsin_nucs.fasta.nsi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peterjc/galaxy_blast/75b5762aa9699a0b3f97a67158111599a2cb6f59/test-data/rhodopsin_nucs.fasta.nsi
--------------------------------------------------------------------------------
/test-data/rhodopsin_nucs.fasta.nsq:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peterjc/galaxy_blast/75b5762aa9699a0b3f97a67158111599a2cb6f59/test-data/rhodopsin_nucs.fasta.nsq
--------------------------------------------------------------------------------
/test-data/rhodopsin_peptides.fasta:
--------------------------------------------------------------------------------
1 | >gi|57163783|ref|NP_001009242.1| rhodopsin [Felis catus]
2 | LYTSLHGYFVFGPTGC
3 |
4 | >gi|3024260|sp|P56514.1|OPSD_BUFBU RecName: Full=Rhodopsin
5 | TTLCCGKNPFGEDDASSAATSKTEASSVSSSQ
6 |
7 | >gi|283855846|gb|ADB45242.1| rhodopsin [Cynopterus brachyotis]
8 | VPFSNKTGVVRSPFEH
9 |
10 | >gi|283855823|gb|ADB45229.1| rhodopsin [Myotis pilosus]
11 | VPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPIN
12 |
13 | >gi|223523|prf||0811197A rhodopsin [Bos taurus]
14 | MNGTEGPNFYVPFSNKTGVVRSPFEAPQYY
15 |
16 | >gi|12583665|dbj|BAB21486.1| fresh water form rod opsin [Conger myriaster]
17 | MNGTEGPN
18 |
--------------------------------------------------------------------------------
/test-data/rhodopsin_proteins.fasta:
--------------------------------------------------------------------------------
1 | >gi|57163783|ref|NP_001009242.1| rhodopsin [Felis catus]
2 | MNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRT
3 | PLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVC
4 | KPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVV
5 | HFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQG
6 | SNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA
7 |
8 | >gi|3024260|sp|P56514.1|OPSD_BUFBU RecName: Full=Rhodopsin
9 | MNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRT
10 | PLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVC
11 | KPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVV
12 | HFTIPLIIIFFCYGRLVCTVKEAAAQQQESATTQKAEKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQG
13 | SEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDDASSAATSKTEASSVSSSQ
14 | VSPA
15 |
16 | >gi|283855846|gb|ADB45242.1| rhodopsin [Cynopterus brachyotis]
17 | VPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLA
18 | VADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGE
19 | NHAIMGLALTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIF
20 | FCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTL
21 | PAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS
22 |
23 | >gi|283855823|gb|ADB45229.1| rhodopsin [Myotis pilosus]
24 | VPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLA
25 | VANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGE
26 | NHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIF
27 | FCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTI
28 | PAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS
29 |
30 | >gi|223523|prf||0811197A rhodopsin [Bos taurus]
31 | MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRT
32 | PLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVC
33 | KPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYTPHEETNNESFVIYMFVVH
34 | FIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGS
35 | DFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA
36 |
37 | >gi|12583665|dbj|BAB21486.1| fresh water form rod opsin [Conger myriaster]
38 | MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRT
39 | PLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVC
40 | KPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTC
41 | HFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQG
42 | STFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDGASATSSKTEASSVSSSS
43 | VSPA
44 |
--------------------------------------------------------------------------------
/test-data/segmasker_four_human.fasta:
--------------------------------------------------------------------------------
1 | >sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1
2 | MHPAVFlslpdlrcsllllVTWVFTPVTTEITSLDTENIDEILNNADVALVNFYADWCRF
3 | SQMLHPIFEEASDVIKEEFPNENQVVFARVDCDQHSDIAQRYRISKYPTLKLFRNGMMMK
4 | REYRGQRSVKALADYIRQQKSDPIQEIRDLAEITTLDRSKRNIIGYFEQKDSDNYRVFER
5 | VANILHDDCAFLSAFGDVSKPERYSGDNIIYKPPGHSAPDMVYLGAMTNFDVTYNWIQDK
6 | CVPLVREITFENGEELTEEGLPFLILFHMKEDTESLEIFQNEVARQLISEKGTINFLHAD
7 | CDKFRHPLLHIQKTPADCPVIAIDSFRHMYVFGDFKDVLIPGKLKQFVFDLHSGKLHREF
8 | HHGPDPTDTAPGEQAQDVASSPPESSFQKLAPSEYRYTLLRDRDEL
9 | >sp|Q9NSY1|BMP2K_HUMAN BMP-2-inducible protein kinase OS=Homo sapiens GN=BMP2K PE=1 SV=2
10 | MKKFSRMPKSEggsgggaagggaggagagagcgsggssvgvrvfavgRHQVTLEESLAEG
11 | GFSTVFLVRTHGGIRCALKRMYVNNMPDLNVCKREITIMKELSGHKNIVGYLDCAVNSIS
12 | DNVWEVLILMEYCRAGQVVNQMNKKLQTGFTEPEVLQIFCDTCEAVARLHQCKTPIIHRD
13 | LKVENILLNDGGNYVLCDFGSATNKFLNPQKDGVNVVEEEIKKYTTLSYRAPEMINLYGG
14 | KPITTKADIWALGCLLYKLCFFTLPFGESQVAICDGNFTIPDNSRYSRNIHCLIRFMLEP
15 | DPEHRPDIFQVSYFAFKFAKKDCPVsninnssiPSALPEPMTASEAAARKSQIKARITDT
16 | IGPTETSIAPRQRPKANSATTATPSVLTIQSSATPVKVLAPGEFGNHRPKGALRPGNGPE
17 | Illgqgppqqppqqhrvlqqlqqgdwrlqqlhlqhrhphqqqqqqqqqqqqqqqqqqqqq
18 | qqqqqqhhhhhhhhllqDAYMqqyqhatqqqqmlqqqFLMHSVYQPQPSASQYPTMmpqy
19 | qqaffqqqmlaqhqpsqqqASPEYLTSPQEFSPALVSYTSSLPAQVGTIMDSSYSANRSV
20 | ADKEAIANFTNQKNISNPPDMSGWNPFGEDNFSKLTeeelldrefdllrSNRLEERASSD
21 | KNVDSLSAPHNHPPEDPFGSVPFISHSGSPEKKAEHSSINQENGTANPIKNGKTSPASKD
22 | QRTGKKTSVQGQVQKGNdesesdfesdppspksseeeeqddeeVLQGEQGDFNDDDTEPE
23 | NLGHRPLLMdsedeeeeekhssdsdyeQAKAKYSDMSSVYRDRSGSGPTQDLNTILLTSA
24 | QLSSDVAVETPKQEFDVFGAVPFFAVRAQQPQQEKNEKNLPQHRFPAAGLEQEEFDVFTK
25 | APFSKKVNVQECHAVGPEAHTIPGYPKSVDVFGSTPFQPFLTSTSKSESNEDLFGLVPFD
26 | EITGSQQQKVkqrslqklssrqrrTKQDMSKSNGKRHHGtptstkktlkptYRTPERARR
27 | HKKVGRRDSQSSNEFLTISDSKENISVALTDGKDRGNVLQPEESLLDPFGAKPFHSPDLS
28 | WHPPHQGLSDIRADHNTVLPGRPRQNSLHGSFHSADVLKMDDFGAVPFTELVVQSITPHQ
29 | SQQSQPVELDPFGAAPFPSKQ
30 | >sp|P06213|INSR_HUMAN Insulin receptor OS=Homo sapiens GN=INSR PE=1 SV=4
31 | MATggrrgaaaapllvavaalllgaagHLYPGEVCPGMDIRNNLTRLHELENCSVIEGHL
32 | QILLMFKTRPEDFRDLSFPKLIMITDYLLLFRVYGLESLKDLFPNLTVIRGSRLFFNYAL
33 | VIFEMVHLKELGLYNLMNITRGSVRIEKNNELCYLATIDWSRILDSVEDNYIVLNKDDNE
34 | ECGDICPGTAKGKTNCPATVINGQFVERCWTHSHCQKVCPTICKSHGCTAEGLCCHSECL
35 | GNCSQPDDPTKCVACRNFYLDGRCVETCPPPYYHFQDWRCVNFSFCQDLHHKCKNSRRQG
36 | CHQYVIHNNKCIPECPSGYTMNSSNLLCTPCLGPCPKVCHLLEGEKTIDSVTSAQELRGC
37 | TVINGSLIINIRggnnlaaeleanlglieeiSGYLKIRRSYALVSLSFFRKLRLIRGETL
38 | EIGNYSFYALDNQNLRQLWDWSKHNLTITQGKLFFHYNPKLCLSEIHKMEEVSGTKGRQE
39 | RNDIALKTNGDQASCENELLKFSYIRTSFDKILLRWEPYWPPDFRDLLGFMLFYKEAPYQ
40 | NVTEFDGQDACGSNSWTVVDIDPPLRSNDPKSQNHPGWLMRGLKPWTQYAIFVKTLVTFS
41 | DERRTYGAKSDIIYVQTDATNPSVPLDPISVSNSSSQIILKWKPPSDPNGNITHYLVFWE
42 | RQAEDSELFELDYCLKGLKLPSRTWSPPFESEDSQKHNQSEYEDSAGECCSCPKTDSQIL
43 | KELEESSFRKTFEDYLHNVVFVPRKTSSGTGAEDPRPSRKRRSLGDvgnvtvavptvaaf
44 | pntsstsvptspEEHRPFEKVVNKESLVISGLRHFTGYRIELQACNQDTPEERCSVAAYV
45 | SARTMPEAKADDIVGPVTHEIFENNVVHLMWQEPKEPNGLIVLYEVSYRRYGDEELHLCV
46 | SRKHFALERGCRLRGLSPGNYSVRIRATSLAGNGSWTEPTYFYVTDYLDVPSNIAKIIIG
47 | PLIFVFLFSVVIGSIYLFLRKRQPDGPLGPLYASSNPEYLSASDVFPCSVYVPDEWEVSR
48 | EKITLLRELGQGSFGMVYEGNARDIIKGEAETRVAVKTVNESASLRERIEFLNEASVMKG
49 | FTCHHVVRLLGVVSKGQPTLVVMELMAHGDLKSYLRSLRPEAENNPGRPPPTLQEMIQMA
50 | AEIADGMAYLNAKKFVHRDLAARNCMVAHDFTVKIGDFGMTRDIYETDYYRKGGKGLLPV
51 | RWMAPESLKDGVFTTSSDMWSFGVVLWEITSLAEQPYQGLSNEQVLKFVMDGGYLDQPDN
52 | CPERVTDLMRMCWQFNPKMRPTFLEIVNLLKDDLHPSFPEVSFFHSEENKAPeseeleme
53 | fedmeNVPLDRSSHCQREEAGGRDGGSSLGFKRSYEEHIPYTHMNGGKKNGRILTLPRSN
54 | PS
55 | >sp|P08100|OPSD_HUMAN Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1
56 | MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLY
57 | VTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLG
58 | GEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIP
59 | EGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVkeaaaqqqes
60 | attqkaeKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAI
61 | YNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA
62 |
--------------------------------------------------------------------------------
/test-data/segmasker_four_human.maskinfo-asn1:
--------------------------------------------------------------------------------
1 | Blast-db-mask-info ::= {
2 | algo-id 1,
3 | algo-program seg,
4 | algo-options "window=12; locut=2.2; hicut=2.5",
5 | masks {
6 | masks {
7 | int {
8 | from 6,
9 | to 18,
10 | id local id 1
11 | },
12 | packed-int {
13 | {
14 | from 11,
15 | to 46,
16 | id local id 2
17 | },
18 | {
19 | from 325,
20 | to 332,
21 | id local id 2
22 | },
23 | {
24 | from 421,
25 | to 443,
26 | id local id 2
27 | },
28 | {
29 | from 437,
30 | to 450,
31 | id local id 2
32 | },
33 | {
34 | from 447,
35 | to 496,
36 | id local id 2
37 | },
38 | {
39 | from 501,
40 | to 516,
41 | id local id 2
42 | },
43 | {
44 | from 536,
45 | to 554,
46 | id local id 2
47 | },
48 | {
49 | from 545,
50 | to 558,
51 | id local id 2
52 | },
53 | {
54 | from 636,
55 | to 648,
56 | id local id 2
57 | },
58 | {
59 | from 737,
60 | to 762,
61 | id local id 2
62 | },
63 | {
64 | from 789,
65 | to 806,
66 | id local id 2
67 | },
68 | {
69 | from 970,
70 | to 983,
71 | id local id 2
72 | },
73 | {
74 | from 999,
75 | to 1010,
76 | id local id 2
77 | }
78 | },
79 | packed-int {
80 | {
81 | from 3,
82 | to 26,
83 | id local id 3
84 | },
85 | {
86 | from 372,
87 | to 390,
88 | id local id 3
89 | },
90 | {
91 | from 766,
92 | to 782,
93 | id local id 3
94 | },
95 | {
96 | from 780,
97 | to 791,
98 | id local id 3
99 | },
100 | {
101 | from 1312,
102 | to 1324,
103 | id local id 3
104 | }
105 | },
106 | int {
107 | from 230,
108 | to 246,
109 | id local id 4
110 | }
111 | },
112 | more FALSE
113 | }
114 | }
115 |
--------------------------------------------------------------------------------
/test-data/segmasker_four_human.maskinfo-asn1-binary:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peterjc/galaxy_blast/75b5762aa9699a0b3f97a67158111599a2cb6f59/test-data/segmasker_four_human.maskinfo-asn1-binary
--------------------------------------------------------------------------------
/test-data/tblastn_four_human_vs_rhodopsin.tabular:
--------------------------------------------------------------------------------
1 | sp|P08100|OPSD_HUMAN gi|57163782|ref|NM_001009242.1| 96.552 348 12 0 1 348 1 1044 0.0 732
2 | sp|P08100|OPSD_HUMAN gi|18148870|dbj|AB062417.1| 93.391 348 23 0 1 348 1 1044 0.0 711
3 | sp|P08100|OPSD_HUMAN gi|283855822|gb|GQ290312.1| 95.092 326 16 0 11 336 1 978 0.0 682
4 | sp|P08100|OPSD_HUMAN gi|2734705|gb|U59921.1|BBU59921 84.795 342 51 1 1 341 42 1067 0.0 646
5 | sp|P08100|OPSD_HUMAN gi|12583664|dbj|AB043817.1| 82.164 342 60 1 1 341 23 1048 0.0 626
6 | sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 93.243 74 5 0 239 312 3147 3368 1.34e-71 151
7 | sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 91.525 59 5 0 177 235 2855 3031 1.34e-71 126
8 | sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 96.396 111 4 0 11 121 1 333 3.31e-67 229
9 | sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 93.220 59 4 0 119 177 1404 1580 2.31e-32 122
10 | sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 88.462 26 3 0 312 337 4222 4299 3.61e-12 57.7
11 |
--------------------------------------------------------------------------------
/test-data/tblastn_four_human_vs_rhodopsin_deflines.tabular:
--------------------------------------------------------------------------------
1 | P08100 NM_001009242.1 96.552 348 12 0 1 348 1 1044 0.0 732
2 | P08100 AB062417.1 93.391 348 23 0 1 348 1 1044 0.0 711
3 | P08100 GQ290312.1 95.092 326 16 0 11 336 1 978 0.0 682
4 | P08100 U59921.1 84.795 342 51 1 1 341 42 1067 0.0 646
5 | P08100 AB043817.1 82.164 342 60 1 1 341 23 1048 0.0 626
6 | P08100 GQ290303.1 93.243 74 5 0 239 312 3147 3368 1.34e-71 151
7 | P08100 GQ290303.1 91.525 59 5 0 177 235 2855 3031 1.34e-71 126
8 | P08100 GQ290303.1 96.396 111 4 0 11 121 1 333 3.31e-67 229
9 | P08100 GQ290303.1 93.220 59 4 0 119 177 1404 1580 2.31e-32 122
10 | P08100 GQ290303.1 88.462 26 3 0 312 337 4222 4299 3.61e-12 57.7
11 |
--------------------------------------------------------------------------------
/test-data/three_human_mRNA.dbinfo.txt:
--------------------------------------------------------------------------------
1 | Database: Just 3 human mRNA sequences
2 | 3 sequences; 10,732 total bases
3 |
4 |
--------------------------------------------------------------------------------
/test-data/three_human_mRNA.fasta.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peterjc/galaxy_blast/75b5762aa9699a0b3f97a67158111599a2cb6f59/test-data/three_human_mRNA.fasta.gz
--------------------------------------------------------------------------------
/test-data/three_human_mRNA.fasta.log.txt:
--------------------------------------------------------------------------------
1 | New DB title: Just 3 human mRNA sequences
2 | Sequence type: Nucleotide
3 |
--------------------------------------------------------------------------------
/test-data/three_human_mRNA.fasta.nhd:
--------------------------------------------------------------------------------
1 | 12956943350
2 | 13082197871
3 | 19180330422
4 |
--------------------------------------------------------------------------------
/test-data/three_human_mRNA.fasta.nhi:
--------------------------------------------------------------------------------
1 | ' @ ' 4 A12956943350
--------------------------------------------------------------------------------
/test-data/three_human_mRNA.fasta.nhr:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peterjc/galaxy_blast/75b5762aa9699a0b3f97a67158111599a2cb6f59/test-data/three_human_mRNA.fasta.nhr
--------------------------------------------------------------------------------
/test-data/three_human_mRNA.fasta.nin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peterjc/galaxy_blast/75b5762aa9699a0b3f97a67158111599a2cb6f59/test-data/three_human_mRNA.fasta.nin
--------------------------------------------------------------------------------
/test-data/three_human_mRNA.fasta.nog:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peterjc/galaxy_blast/75b5762aa9699a0b3f97a67158111599a2cb6f59/test-data/three_human_mRNA.fasta.nog
--------------------------------------------------------------------------------
/test-data/three_human_mRNA.fasta.nsd:
--------------------------------------------------------------------------------
1 | gnl|bl_ord_id|00
2 | gnl|bl_ord_id|11
3 | gnl|bl_ord_id|22
4 |
--------------------------------------------------------------------------------
/test-data/three_human_mRNA.fasta.nsi:
--------------------------------------------------------------------------------
1 | 6 @ 6 4 Fgnl|bl_ord_id|00
--------------------------------------------------------------------------------
/test-data/three_human_mRNA.fasta.nsq:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peterjc/galaxy_blast/75b5762aa9699a0b3f97a67158111599a2cb6f59/test-data/three_human_mRNA.fasta.nsq
--------------------------------------------------------------------------------
/test-data/three_human_mRNA_and_rhodopsin_nucs.dbinfo.txt:
--------------------------------------------------------------------------------
1 | Database: Just 3 human mRNA sequences; Rhodopsin nucleotides
2 | 9 sequences; 21,028 total bases
3 |
4 |
--------------------------------------------------------------------------------
/test-data/tool_data_table_conf.xml.test:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | value, name, path
5 |
6 |
7 |
8 | value, name, path
9 |
10 |
11 |
12 | value, name, path
13 |
14 |
15 |
16 | value, dbkey, name, path
17 |
18 |
19 |
20 |
--------------------------------------------------------------------------------
/tool-data/README.rst:
--------------------------------------------------------------------------------
1 | Galaxy Tool sample data
2 | =======================
3 |
4 | This folder contains sample files used by Galaxy tools defined elsewhere
5 | in this repository. For example, ``blastdb_p.loc.sample`` is a sample
6 | file used to generate the default ``blastdb_p.loc`` file describing any
7 | system-level protein BLAST databases available within Galaxy.
8 |
9 | For general information, see the `main README file <../README.rst>`_.
10 |
--------------------------------------------------------------------------------
/tool-data/all_fasta.loc.sample:
--------------------------------------------------------------------------------
1 | #This file lists the locations and dbkeys of all the fasta files
2 | #under the "genome" directory (a directory that contains a directory
3 | #for each build). The script extract_fasta.py will generate the file
4 | #all_fasta.loc. This file has the format (white space characters are
5 | #TAB characters):
6 | #
7 | #
8 | #
9 | #So, all_fasta.loc could look something like this:
10 | #
11 | #apiMel3 apiMel3 Honeybee (Apis mellifera): apiMel3 /path/to/genome/apiMel3/apiMel3.fa
12 | #hg19canon hg19 Human (Homo sapiens): hg19 Canonical /path/to/genome/hg19/hg19canon.fa
13 | #hg19full hg19 Human (Homo sapiens): hg19 Full /path/to/genome/hg19/hg19full.fa
14 | #
15 | #Your all_fasta.loc file should contain an entry for each individual
16 | #fasta file. So there will be multiple fasta files for each build,
17 | #such as with hg19 above.
18 | #
19 |
--------------------------------------------------------------------------------
/tool-data/blast2go.loc.sample:
--------------------------------------------------------------------------------
1 | # This is a three column tab separated file to define the properties
2 | # file (settings) to be offered for Blast2GO for Pipelines (b2g4pipe).
3 | #
4 | # Column 1 - ID, string that Galaxy will save in its database
5 | # Column 2 - Human readable name, Galaxy will show this in the UI
6 | # Column 3 - Filename, Galaxy will use this when calling the tool
7 | #
8 | # Probably the most important setting in the properties file is the
9 | # Blast2GO database to use. Currently b2g4pipe v2.5 ships with an
10 | # old configuration so consult http://www.blast2go.com for the latest
11 | # public database they host in Spain (or find this by running the GUI
12 | # version of Blast2GO via Java Web Start under the menu entry "Tools",
13 | # "General Settings", "DataAccess setting"). We also strongly recommend
14 | # configuring a local Blast2GO database.
15 | #
16 | # The property filenames can be fully qualified paths like
17 | # /opt/b2g4pipe/Spain_2012_August.properties or provided they are
18 | # in the same folder as the Blast2GO JAR file, just the filename
19 | # like Spain_2012_August.properties instead. This is intended to
20 | # make migrating between future versions of Blast2GO easier (as the
21 | # property files change between versions), and simpler overall.
22 | #
23 | #Local_2011_May Local database (May 2011) Local_2011_May.properties
24 | #Spain_2010_May Database in Spain (May 2010) Spain_2010_May.properties
25 | Spain_2012_August Database in Spain (August 2012) Spain_2012_August.properties
26 | Spain_2011_June Database in Spain (June 2011) Spain_2011_June.properties
27 | #default Default settings b2gPipe.properties
28 |
--------------------------------------------------------------------------------
/tool-data/blastdb.loc.sample:
--------------------------------------------------------------------------------
1 | # This is a sample file distributed with Galaxy that is used to define a
2 | # list of nucleotide BLAST databases, using three columns tab separated:
3 | #
4 | # {tab}{tab}
5 | #
6 | # The captions typically contain spaces and might end with the build date.
7 | # It is important that the actual database name does not have a space in
8 | # it, and that there are only two tabs on each line.
9 | #
10 | # You can download the NCBI provided protein databases like NR from here:
11 | # ftp://ftp.ncbi.nlm.nih.gov/blast/db/
12 | #
13 | # For simplicity, many Galaxy servers are configured to offer just a live
14 | # version of each NCBI BLAST database (updated with the NCBI provided
15 | # Perl scripts or similar). In this case, we recommend using the case
16 | # sensistive base-name of the NCBI BLAST databases as the unique id.
17 | # Consistent naming is important for sharing workflows between Galaxy
18 | # servers.
19 | #
20 | # For example, consider the NCBI partially non-redundant nucleotide
21 | # nt BLAST database, where you have downloaded and decompressed the
22 | # files under /data/blastdb/ meaning at the command line BLAST+ would
23 | # would look at the files /data/blastdb/nt.n* when run with:
24 | #
25 | # $ blastn -db /data/blastdb/nt -query ...
26 | #
27 | # In this case use nr (lower case to match the NCBI file naming) as the
28 | # unique id in the first column of blastdb_p.loc, giving an entry like
29 | # this:
30 | #
31 | # nt{tab}NCBI partially non-redundant (nt){tab}/data/blastdb/nt
32 | #
33 | # Alternatively, rather than a "live" mirror of the NCBI databases which
34 | # are updated automatically, for full reproducibility the Galaxy Team
35 | # recommend saving date-stamped copies of the databases. In this case
36 | # your blastdb.loc file should include an entry per line for each
37 | # version you have stored. For example:
38 | #
39 | # nt_05Jun2010{tab}NCBI nt (partially non-redundant) 05 Jun 2010{tab}/data/blastdb/05Jun2010/nt
40 | # nt_15Aug2010{tab}NCBI nt (partially non-redundant) 15 Aug 2010{tab}/data/blastdb/15Aug2010/nt
41 | # ...etc...
42 | #
43 | # See also blastdb_p.loc which is for any protein BLAST database, and
44 | # blastdb_d.loc which is for any protein domains databases (like CDD).
45 |
--------------------------------------------------------------------------------
/tool-data/blastdb_d.loc.sample:
--------------------------------------------------------------------------------
1 | # This is a sample file distributed with Galaxy that is used to define a
2 | # list of protein domain databases, using three columns tab separated
3 | # (longer whitespace are TAB characters):
4 | #
5 | # {tab}{tab}
6 | #
7 | # The captions typically contain spaces and might end with the build date.
8 | # It is important that the actual database name does not have a space in
9 | # it, and that there are only two tabs on each line.
10 | #
11 | # You can download the NCBI provided databases as tar-balls from here:
12 | # ftp://ftp.ncbi.nih.gov/pub/mmdb/cdd/little_endian/
13 | #
14 | # For simplicity, many Galaxy servers are configured to offer just a live
15 | # version of each NCBI BLAST database (updated with the NCBI provided
16 | # Perl scripts or similar). In this case, we recommend using the case
17 | # sensistive base-name of the NCBI BLAST databases as the unique id.
18 | # Consistent naming is important for sharing workflows between Galaxy
19 | # servers.
20 | #
21 | # For example, consider the NCBI Conserved Domains Database (CDD), where
22 | # you have downloaded and decompressed the files under the directory
23 | # /data/blastdb/domains/ meaning at the command line BLAST+ would be
24 | # run as follows any would look at the files /data/blastdb/domains/Cdd.*:
25 | #
26 | # $ rpsblast -db /data/blastdb/domains/Cdd -query ...
27 | #
28 | # In this case use Cdd (title case to match the NCBI file naming) as the
29 | # unique id in the first column of blastdb_d.loc, giving an entry like
30 | # this:
31 | #
32 | # Cdd{tab}NCBI Conserved Domains Database (CDD){tab}/data/blastdb/domains/Cdd
33 | #
34 | # Your blastdb_d.loc file should include an entry per line for each "base name"
35 | # you have stored. For example:
36 | #
37 | # Cdd{tab}NCBI CDD{tab}/data/blastdb/domains/Cdd
38 | # Kog{tab}KOG (eukaryotes){tab}/data/blastdb/domains/Kog
39 | # Cog{tab}COG (prokaryotes){tab}/data/blastdb/domains/Cog
40 | # Pfam{tab}Pfam-A{tab}/data/blastdb/domains/Pfam
41 | # Smart{tab}SMART{tab}/data/blastdb/domains/Smart
42 | # Tigr{tab}TIGR /data/blastdb/domains/Tigr
43 | # Prk{tab}Protein Clusters database{tab}/data/blastdb/domains/Prk
44 | # ...etc...
45 | #
46 | # Alternatively, rather than a "live" mirror of the NCBI databases which
47 | # are updated automatically, for full reproducibility the Galaxy Team
48 | # recommend saving date-stamped copies of the databases. In this case
49 | # your blastdb_d.loc file should include an entry per line for each
50 | # version you have stored. For example:
51 | #
52 | # Cdd_05Jun2010{tab}NCBI CDD 05 Jun 2010{tab}/data/blastdb/domains/05Jun2010/Cdd
53 | # Cdd_15Aug2010{tab}NCBI CDD 15 Aug 2010{tab}/data/blastdb/domains/15Aug2010/Cdd
54 | # ...etc...
55 | #
56 | # See also blastdb.loc which is for any nucleotide BLAST database, and
57 | # blastdb_p.loc which is for any protein BLAST databases.
58 |
--------------------------------------------------------------------------------
/tool-data/blastdb_p.loc.sample:
--------------------------------------------------------------------------------
1 | # This is a sample file distributed with Galaxy that is used to define a
2 | # list of protein BLAST databases, using three columns tab separated:
3 | #
4 | # {tab}{tab}
5 | #
6 | # The captions typically contain spaces and might end with the build date.
7 | # It is important that the actual database name does not have a space in
8 | # it, and that there are only two tabs on each line.
9 | #
10 | # You can download the NCBI provided protein databases like NR from here:
11 | # ftp://ftp.ncbi.nlm.nih.gov/blast/db/
12 | #
13 | # For simplicity, many Galaxy servers are configured to offer just a live
14 | # version of each NCBI BLAST database (updated with the NCBI provided
15 | # Perl scripts or similar). In this case, we recommend using the case
16 | # sensistive base-name of the NCBI BLAST databases as the unique id.
17 | # Consistent naming is important for sharing workflows between Galaxy
18 | # servers.
19 | #
20 | # For example, consider the NCBI "non-redundant" protein BLAST database
21 | # where you have downloaded and decompressed the files under /data/blastdb/
22 | # meaning at the command line BLAST+ would be run with something like
23 | # which would look at the files /data/blastdb/nr.p*:
24 | #
25 | # $ blastp -db /data/blastdb/nr -query ...
26 | #
27 | # In this case use nr (lower case to match the NCBI file naming) as the
28 | # unique id in the first column of blastdb_p.loc, giving an entry like
29 | # this:
30 | #
31 | # nr{tab}NCBI non-redundant (nr){tab}/data/blastdb/nr
32 | #
33 | # Alternatively, rather than a "live" mirror of the NCBI databases which
34 | # are updated automatically, for full reproducibility the Galaxy Team
35 | # recommend saving date-stamped copies of the databases. In this case
36 | # your blastdb_p.loc file should include an entry per line for each
37 | # version you have stored. For example:
38 | #
39 | # nr_05Jun2010{tab}NCBI NR (non redundant) 05 Jun 2010{tab}/data/blastdb/05Jun2010/nr
40 | # nr_15Aug2010{tab}NCBI NR (non redundant) 15 Aug 2010{tab}/data/blastdb/15Aug2010/nr
41 | # ...etc...
42 | #
43 | # See also blastdb.loc which is for any nucleotide BLAST database, and
44 | # blastdb_d.loc which is for any protein domains databases (like CDD).
45 |
--------------------------------------------------------------------------------
/tool-data/tool_data_table_conf.xml.sample:
--------------------------------------------------------------------------------
1 |
2 |
3 | value, name, path
4 |
5 |
6 |
7 | value, name, path
8 |
9 |
10 |
11 | value, name, path
12 |
13 |
14 |
15 | value, dbkey, name, path
16 |
17 |
18 |
19 |
--------------------------------------------------------------------------------
/tools/README.rst:
--------------------------------------------------------------------------------
1 | Galaxy Tool definitions
2 | =======================
3 |
4 | Each sub-folder represents a different entry on the Galaxy Tool Shed,
5 | for example ``ncbi_blast_plus`` contains wrappers for the BLAST+ suite.
6 |
7 | For general information, see the `main README file <../README.rst>`_.
8 |
--------------------------------------------------------------------------------
/tools/blast2go/.shed.yml:
--------------------------------------------------------------------------------
1 | name: blast2go
2 | owner: peterjc
3 | homepage_url: https://github.com/peterjc/galaxy_blast/tree/master/tools/blast2go
4 | remote_repository_url: https://github.com/peterjc/galaxy_blast/tree/master/tools/blast2go
5 | description: Maps BLAST results to GO annotation terms
6 | long_description: |
7 | Galaxy wrapper for Java command line tool Blast2GO for pipelines, b2g4pipe v2.5,
8 | available from http://blast2go.org/
9 |
10 | The tool takes a single BLAST XML file as input, searched against a protein database
11 | such as the NCBI non redundant database (NR). The tool gives a single tabular output
12 | file, the annotation file which can be opened with the Blast2GO GUI.
13 |
14 | The wrapper uses a Galaxy loc file to allow the use of one or more Blast2GO property
15 | files (e.g. different versions of the database, or different servers, or different
16 | evidence weighting settings). We use this to offer both a local Blast2GO database
17 | (fast) and the public database hosted in Valencia, Spain.
18 | categories:
19 | - Ontology Manipulation
20 | - Sequence Analysis
21 | type: unrestricted
22 | include:
23 | - strip_components: 2
24 | source:
25 | - ../../test-data/blastp_sample.blast2go.tabular
26 | - ../../test-data/blastp_sample.xml
27 | - ../../tool-data/blast2go.loc.sample
28 | - ../../tools/blast2go/README.rst
29 | - ../../tools/blast2go/blast2go.py
30 | - ../../tools/blast2go/blast2go.xml
31 | - ../../tools/blast2go/massage_xml_for_blast2go.py
32 |
--------------------------------------------------------------------------------
/tools/blast2go/go_categorize.py:
--------------------------------------------------------------------------------
1 | """Categorize GO terms."""
2 |
3 | from __future__ import print_function
4 |
5 | import gzip
6 | import sys
7 |
8 | _gzip_magic = "\x1f\x8b"
9 |
10 |
11 | def gzip_open(filename, mode="rb"):
12 | """Open a possibly gzipped file."""
13 | assert mode == "rb", mode
14 | h = open(filename, "rb")
15 | magic = h.read(2)
16 | h.seek(0)
17 | if magic == _gzip_magic:
18 | h.close()
19 | sys.stderr.write("%s is gzipped\n" % filename)
20 | return gzip.open(filename, "rb")
21 | else:
22 | sys.stderr.write("%s isn't compressed\n" % filename)
23 | return h
24 |
25 |
26 | def get_term_class(go, alias, is_a):
27 | """Find the class (P, C or F) of the given GO term."""
28 | x = alias.get(go, go)
29 | while x:
30 | if x in ["GO:0008150", "obsolete_biological_process"]:
31 | return "BP"
32 | elif x in ["GO:0005575", "obsolete_cellular_component"]:
33 | return "CC"
34 | elif x in ["GO:0003674", "obsolete_molecular_function"]:
35 | return "MF"
36 | try:
37 | x = is_a[x]
38 | except KeyError:
39 | return "??"
40 |
41 |
42 | def load_go_mapping(rdf_xml):
43 | """Quick and dirty GO RDF-XML parser."""
44 | sys.stderr.write("Loading %s\n" % rdf_xml)
45 | h = gzip_open(rdf_xml, "rb")
46 |
47 | names = dict()
48 | alias = dict()
49 | is_a = dict()
50 |
51 | go = None
52 | for line in h:
53 | # sys.stderr.write("... %r\n" % line)
54 | if "" in line:
55 | assert go is None, line
56 | go = line[line.find("") + 14 :]
57 | assert "" in line, line
58 | go = go[: go.find("")]
59 | elif "" in line:
60 | assert go is not None
61 | name = line[line.find("") + 9 :]
62 | assert "" in name, name
63 | name = name[: name.find("")]
64 | names[go] = name
65 | elif "GO:" in line:
66 | assert go is not None
67 | go2 = line[line.find("GO:") + 12 :]
68 | assert "" in line, line
69 | go2 = go2[: go2.find("")]
70 | alias[go2] = go
71 | elif '
74 | thing = line[
75 | line.find('
87 | # or
90 | thing = line[
91 | line.find('" in line:
97 | go = None
98 | h.close()
99 | sys.stderr.write(
100 | "%i names, %i aliases, %i parents\n" % (len(names), len(alias), len(is_a))
101 | )
102 |
103 | if "all" in names:
104 | del names["all"]
105 |
106 | for go in names:
107 | yield go, names[go], get_term_class(go, alias, is_a)
108 |
109 |
110 | for go, name, term_class in load_go_mapping(sys.argv[1]):
111 | print(go, term_class, name)
112 |
--------------------------------------------------------------------------------
/tools/blast2go/massage_xml_for_blast2go.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | """Script for reformatting Blast XML to suit Blast2GO.
3 |
4 | This script takes exactly two command line arguments:
5 | * Input BLAST XML filename
6 | * Output BLAST XML filename
7 |
8 | Sadly b2g4pipe (at least v2.3.5 to v2.5.0) cannot cope with current
9 | style large BLAST XML files (e.g. from BLAST 2.2.25+), so we reformat
10 | these to avoid it crashing with a Java heap space OutOfMemoryError.
11 |
12 | As part of this reformatting, we check for BLASTP or BLASTX output
13 | (otherwise raise an error), and print the query count.
14 |
15 | This script is called from my Galaxy wrapper for Blast2GO for pipelines,
16 | available from the Galaxy Tool Shed here:
17 | http://toolshed.g2.bx.psu.edu/view/peterjc/blast2go
18 |
19 | This script is under version control here:
20 | https://github.com/peterjc/galaxy_blast/tree/master/blast2go
21 | """
22 | import os
23 | import sys
24 |
25 |
26 | def prepare_xml(original_xml, mangled_xml):
27 | """Reformat BLAST XML to suit Blast2GO.
28 |
29 | Blast2GO can't cope with 1000s of tags within a
30 | single tag, so instead split this into one
31 | full XML record per interation (i.e. per query). This gives
32 | a concatenated XML file mimicing old versions of BLAST.
33 |
34 | This also checks for BLASTP or BLASTX output, and outputs
35 | the number of queries. Galaxy will show this as "info".
36 | """
37 | in_handle = open(original_xml)
38 | footer = " \n\n"
39 | header = ""
40 | while True:
41 | line = in_handle.readline()
42 | if not line:
43 | # No hits?
44 | sys.exit("Problem with XML file?")
45 | if line.strip() == "":
46 | break
47 | header += line
48 |
49 | if "blastx" in header:
50 | print("BLASTX output identified")
51 | elif "blastp" in header:
52 | print("BLASTP output identified")
53 | else:
54 | in_handle.close()
55 | sys.exit("Expect BLASTP or BLASTX output")
56 |
57 | out_handle = open(mangled_xml, "w")
58 | out_handle.write(header)
59 | out_handle.write(line)
60 | count = 1
61 | while True:
62 | line = in_handle.readline()
63 | if not line:
64 | break
65 | elif line.strip() == "":
66 | # Insert footer/header
67 | out_handle.write(footer)
68 | out_handle.write(header)
69 | count += 1
70 | out_handle.write(line)
71 |
72 | out_handle.close()
73 | in_handle.close()
74 | print("Input has %i queries" % count)
75 |
76 |
77 | if __name__ == "__main__":
78 | # Run the conversion...
79 | if len(sys.argv) != 3:
80 | sys.exit("Require two arguments: XML input filename, XML output filename")
81 |
82 | xml_file, out_xml_file = sys.argv[1:]
83 |
84 | if not os.path.isfile(xml_file):
85 | sys.exit("Input BLAST XML file not found: %s" % xml_file)
86 |
87 | prepare_xml(xml_file, out_xml_file)
88 |
--------------------------------------------------------------------------------
/tools/blast2go/test-data:
--------------------------------------------------------------------------------
1 | ../../test-data
--------------------------------------------------------------------------------
/tools/blast_rbh/.shed.yml:
--------------------------------------------------------------------------------
1 | name: blast_rbh
2 | owner: peterjc
3 | homepage_url: https://github.com/peterjc/galaxy_blast/tree/master/tools/blast_rbh
4 | remote_repository_url: https://github.com/peterjc/galaxy_blast/tree/master/tools/blast_rbh
5 | description: BLAST Reciprocal Best Hits (RBH) from two FASTA files
6 | long_description: |
7 | Builds BLAST databases and runs reciprocal searches, filters them,
8 | and then identifies and reports any reciprocal best hits (RBH).
9 | categories:
10 | - Fasta Manipulation
11 | - Sequence Analysis
12 | type: unrestricted
13 | include:
14 | - strip_components: 2
15 | source:
16 | - ../../test-data/four_human_proteins.fasta
17 | - ../../test-data/k12_edited_proteins.fasta
18 | - ../../test-data/k12_ten_proteins.fasta
19 | - ../../test-data/rbh_blastn_three_human_mRNA_vs_rhodopsin_nucs.tabular
20 | - ../../test-data/rbh_blastp_four_human_vs_rhodopsin_proteins.tabular
21 | - ../../test-data/rbh_blastp_k12.tabular
22 | - ../../test-data/rbh_blastp_k12_self.tabular
23 | - ../../test-data/rbh_megablast_rhodopsin_nucs_vs_three_human_mRNA.tabular
24 | - ../../test-data/rbh_none.tabular
25 | - ../../test-data/rbh_tblastx_rhodopsin_nucs_vs_three_human_mRNA.tabular
26 | - ../../test-data/rhodopsin_nucs.fasta
27 | - ../../test-data/rhodopsin_proteins.fasta
28 | - ../../test-data/three_human_mRNA.fasta
29 | - ../../tools/blast_rbh/README.rst
30 | - ../../tools/blast_rbh/best_hits.py
31 | - ../../tools/blast_rbh/blast_rbh.py
32 | - ../../tools/blast_rbh/blast_rbh.xml
33 |
--------------------------------------------------------------------------------
/tools/blast_rbh/test-data:
--------------------------------------------------------------------------------
1 | ../../test-data
--------------------------------------------------------------------------------
/tools/blast_rbh/update_tests.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | set -e
3 | echo "This will update test files using the current version of BLAST+"
4 |
5 | if [ -f "tools/blast_rbh/update_tests.sh" ]
6 | then
7 | echo "Good, in the expected directory"
8 | else
9 | echo "ERROR. Run this from the GitHub repository root directory."
10 | exit 1
11 | fi
12 |
13 | cd test-data
14 |
15 | echo rbh_none.tabular
16 | ../tools/blast_rbh/blast_rbh.py rhodopsin_nucs.fasta three_human_mRNA.fasta -a nucl -t megablast -i 100 -c 100 -o rbh_none.tabular
17 |
18 | echo rbh_blastn_three_human_mRNA_vs_rhodopsin_nucs.tabular
19 | ../tools/blast_rbh/blast_rbh.py three_human_mRNA.fasta rhodopsin_nucs.fasta -a nucl -t blastn -i 0 -c 0 -o rbh_blastn_three_human_mRNA_vs_rhodopsin_nucs.tabular
20 |
21 | echo rbh_megablast_rhodopsin_nucs_vs_three_human_mRNA.tabular
22 | ../tools/blast_rbh/blast_rbh.py rhodopsin_nucs.fasta three_human_mRNA.fasta -a nucl -t megablast -i 0 -c 0 -o rbh_megablast_rhodopsin_nucs_vs_three_human_mRNA.tabular
23 |
24 | echo rbh_tblastx_rhodopsin_nucs_vs_three_human_mRNA.tabular
25 | ../tools/blast_rbh/blast_rbh.py rhodopsin_nucs.fasta three_human_mRNA.fasta -a nucl -t tblastx -i 0 -c 0 -o rbh_tblastx_rhodopsin_nucs_vs_three_human_mRNA.tabular
26 |
27 | echo rbh_blastp_four_human_vs_rhodopsin_proteins.tabular
28 | ../tools/blast_rbh/blast_rbh.py four_human_proteins.fasta rhodopsin_proteins.fasta -a prot -t blastp -i 0 -c 0 -o rbh_blastp_four_human_vs_rhodopsin_proteins.tabular
29 |
30 | echo rbh_blastp_k12.tabular
31 | ../tools/blast_rbh/blast_rbh.py k12_edited_proteins.fasta k12_ten_proteins.fasta -a prot -t blastp -i 0 -c 0 -o rbh_blastp_k12.tabular
32 |
33 | echo rbh_blastp_k12_self.tabular
34 | ../tools/blast_rbh/blast_rbh.py k12_edited_proteins.fasta k12_edited_proteins.fasta -a prot -t blastp-fast -i 80 -c 80 -o rbh_blastp_k12_self.tabular
35 |
--------------------------------------------------------------------------------
/tools/blastxml_to_top_descr/.shed.yml:
--------------------------------------------------------------------------------
1 | name: blastxml_to_top_descr
2 | owner: peterjc
3 | homepage_url: https://github.com/peterjc/galaxy_blast/tree/master/tools/blastxml_to_top_descr
4 | remote_repository_url: https://github.com/peterjc/galaxy_blast/tree/master/tools/blastxml_to_top_descr
5 | description: Make table of top BLAST match descriptions
6 | long_description: |
7 | NCBI BLAST+ (and the older NCBI "legacy" BLAST) can output in a range of formats
8 | including text, tabular and a more detailed XML format. You can do a lot of things
9 | with tabular files in Galaxy (sorting, filtering, joins, etc), however until BLAST+
10 | 2.2.28 the tabular output never included the hit descriptions (titles) found in
11 | the other output formats.
12 |
13 | This tool turns a BLAST XML file into a simple tabular file containing one row per
14 | query sequence, containing the query identifier and then the three (by default)
15 | top hit descriptions (i.e. the first three). If a query doesn''t have that many
16 | hits, then these entries are left blank.
17 |
18 | This tool can also be used with the tabular output from BLAST+ instead, provided
19 | the relevant columns are provided. The default settings will work with the default
20 | 25 column extended output from the BLAST+ tools wrapped in Galaxy. Note if a query
21 | has no hits, it does not appear in the BLAST tabular output.
22 | categories:
23 | - Convert Formats
24 | - Sequence Analysis
25 | - Text Manipulation
26 | type: unrestricted
27 | include:
28 | - strip_components: 2
29 | source:
30 | - ../../test-data/blastp_four_human_vs_rhodopsin.xml
31 | - ../../test-data/blastp_four_human_vs_rhodopsin_converted_ext.tabular
32 | - ../../test-data/blastp_four_human_vs_rhodopsin_top3.tabular
33 | - ../../test-data/blastp_four_human_vs_rhodopsin_top3_positive.tabular
34 | - ../../tools/blastxml_to_top_descr/README.rst
35 | - ../../tools/blastxml_to_top_descr/blastxml_to_top_descr.py
36 | - ../../tools/blastxml_to_top_descr/blastxml_to_top_descr.xml
37 |
--------------------------------------------------------------------------------
/tools/blastxml_to_top_descr/test-data:
--------------------------------------------------------------------------------
1 | ../../test-data
--------------------------------------------------------------------------------
/tools/make_nr/.shed.yml:
--------------------------------------------------------------------------------
1 | name: make_nr
2 | owner: peterjc
3 | homepage_url: https://github.com/peterjc/galaxy_blast/tree/master/tools/make_nr
4 | remote_repository_url: https://github.com/peterjc/galaxy_blast/tree/master/tools/make_nr
5 | description: Make a FASTA file non-redundant
6 | long_description: |
7 | Python script intended to be run prior to calling the NCBI BLAST+
8 | command line tool ``makeblastdb`` or in other settings where you
9 | want to collapse duplicated sequences in a FASTA file to a single
10 | representative.
11 | categories:
12 | - Fasta Manipulation
13 | - Sequence Analysis
14 | type: unrestricted
15 | include:
16 | - strip_components: 2
17 | source:
18 | - ../../tools/make_nr/README.rst
19 | - ../../tools/make_nr/make_nr.py
20 | - ../../tools/make_nr/make_nr.xml
21 | - ../../test-data/duplicates.fasta
22 | - ../../test-data/duplicates.fasta.gz
23 | - ../../test-data/duplicates.nr.fasta
24 | - ../../test-data/more_duplicates.fasta
25 | - ../../test-data/deduplicate.nosortids.fasta
26 | - ../../test-data/deduplicate.sortids.fasta
27 | - ../../test-data/empty.fasta
28 |
--------------------------------------------------------------------------------
/tools/make_nr/test-data:
--------------------------------------------------------------------------------
1 | ../../test-data
--------------------------------------------------------------------------------
/tools/ncbi_blast_plus/.lint_skip:
--------------------------------------------------------------------------------
1 | # delta and psiblast miss tests
2 | TestsMissing
3 |
--------------------------------------------------------------------------------
/tools/ncbi_blast_plus/check_no_duplicates.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | """Check for duplicate sequence identifiers in FASTA files.
3 |
4 | This is run as a pre-check before makeblastdb, in order to avoid
5 | a regression bug in BLAST+ 2.2.28 which fails to catch this. See:
6 | http://blastedbio.blogspot.co.uk/2012/10/my-ids-not-good-enough-for-ncbi-blast.html
7 |
8 | This script takes one or more FASTA filenames as input, and
9 | will return a non-zero error if any duplicate identifiers
10 | are found.
11 | """
12 | import gzip
13 | import os
14 | import sys
15 |
16 |
17 | if "-v" in sys.argv or "--version" in sys.argv:
18 | print("v0.0.23")
19 | sys.exit(0)
20 |
21 | identifiers = set()
22 | files = 0
23 | for filename in sys.argv[1:]:
24 | if not os.path.isfile(filename):
25 | sys.stderr.write("Missing FASTA file %r\n" % filename)
26 | sys.exit(2)
27 | files += 1
28 |
29 | with open(filename, "rb") as binary_handle:
30 | magic = binary_handle.read(2)
31 | if not magic:
32 | # Empty file, special case
33 | continue
34 | elif magic == b"\x1f\x8b":
35 | # Gzipped
36 | handle = gzip.open(filename, "rt")
37 | elif magic[0:1] == b">":
38 | # Not gzipped, shoudl be plain FASTA
39 | handle = open(filename, "r")
40 |
41 | for line in handle:
42 | if line.startswith(">"):
43 | # The split will also take care of the new line character,
44 | # e.g. ">test\n" and ">test description here\n" both give "test"
45 | seq_id = line[1:].split(None, 1)[0]
46 | if seq_id in identifiers:
47 | handle.close()
48 | sys.exit("Repeated identifiers, e.g. %r" % seq_id)
49 | identifiers.add(seq_id)
50 | handle.close()
51 | if not files:
52 | sys.stderr.write("No FASTA files given to check for duplicates\n")
53 | sys.exit(3)
54 | elif files == 1:
55 | print("%i sequences" % len(identifiers))
56 | else:
57 | print("%i sequences in %i FASTA files" % (len(identifiers), files))
58 |
--------------------------------------------------------------------------------
/tools/ncbi_blast_plus/get_species_taxids.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | ncbi_macros.xml
5 |
6 |
7 | echo "@TOOL_VERSION@"
8 | &2 echo "could not find taxid for $name" && exit 1;
14 | else
15 | echo " $name -> \$taxid";
16 | fi &&
17 | get_species_taxids.sh -t "\$taxid" >> species_ids.txt &&
18 | #end for
19 | #else
20 | #for taxid in $type_cond.ids.split(',')
21 | get_species_taxids.sh -t "$taxid" >> species_ids.txt &&
22 | #end for
23 | #end if
24 | sort -n -u species_ids.txt > '$output'
25 | ]]>
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 | [a-zA-Z ,]+$
35 |
36 |
37 |
38 |
39 | [0-9,]+$
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
57 |
58 |
59 |
60 |
61 |
67 |
68 |
69 |
70 |
71 |
78 |
79 |
80 |
81 |
82 |
89 |
90 |
91 |
92 |
93 | **What it does**
94 |
95 | Returns a list of species taxids for a taxon. It relies on the get_species_taxids.sh script of the BLAST+ package https://www.ncbi.nlm.nih.gov/books/NBK546209/
96 |
97 |
98 |
99 |
100 |
--------------------------------------------------------------------------------
/tools/ncbi_blast_plus/ncbi_blastdbcmd_info.xml:
--------------------------------------------------------------------------------
1 |
2 | Show BLAST database information from blastdbcmd
3 |
4 | blastdbcmd
5 | ncbi_macros.xml
6 |
7 |
8 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 | **What it does**
44 |
45 | Calls the NCBI BLAST+ blastdbcmd command line tool with the -info
46 | switch to give summary information about a BLAST database, such as
47 | the size (number of sequences and total length) and date.
48 |
49 |
50 |
51 |
52 |
--------------------------------------------------------------------------------
/tools/ncbi_blast_plus/ncbi_convert2blastmask_wrapper.xml:
--------------------------------------------------------------------------------
1 |
2 | Convert masking information in lower-case masked FASTA input to file formats suitable for makeblastdb
3 |
4 | convert2blastmask
5 | ncbi_macros.xml
6 |
7 |
8 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 | **What it does**
72 |
73 | Convert masking information in lower-case masked FASTA input to file formats suitable for makeblastdb.
74 |
75 | More information about segmasker can be found in the `BLAST Command Line Applications User Manual`_.
76 |
77 | .. _BLAST Command Line Applications User Manual: https://www.ncbi.nlm.nih.gov/books/NBK279690/
78 |
79 |
80 |
81 |
82 |
--------------------------------------------------------------------------------
/tools/ncbi_blast_plus/ncbi_rpsblast_wrapper.xml:
--------------------------------------------------------------------------------
1 |
2 | Search protein domain database (PSSMs) with protein query sequence(s)
3 |
4 | deltablast
5 | ncbi_macros.xml
6 |
7 |
8 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 | @SEARCH_TIME_WARNING@
69 |
70 | **What it does**
71 |
72 | Search a *protein domain database* using a *protein query*,
73 | using the NCBI BLAST+ rpsblast command line tool.
74 |
75 | The protein domain databases use position-specific scoring matrices
76 | (PSSMs) and are available for a number of domain collections including:
77 |
78 | *CDD* - NCBI curarated meta-collection of domains, see
79 | https://www.ncbi.nlm.nih.gov/Structure/cdd/cdd_help.shtml#NCBI_curated_domains
80 |
81 | *Kog* - PSSMs from automatically aligned sequences and sequence
82 | fragments classified in the KOGs resource, the eukaryotic
83 | counterpart to COGs, see https://www.ncbi.nlm.nih.gov/COG/
84 |
85 | *Cog* - PSSMs from automatically aligned sequences and sequence
86 | fragments classified in the COGs resource, which focuses primarily
87 | on prokaryotes, see https://www.ncbi.nlm.nih.gov/COG/
88 |
89 | *Pfam* - PSSMs from Pfam-A seed alignment database, see
90 | http://xfam.org/
91 |
92 | *Smart* - PSSMs from SMART domain alignment database, see
93 | http://smart.embl-heidelberg.de/
94 |
95 | *Tigr* - PSSMs from TIGRFAM database of protein families, see
96 | ftp://ftp.jcvi.org/data/TIGRFAMs/
97 |
98 | *Prk* - PSSms from automatically aligned stable clusters in the
99 | Protein Clusters database, see
100 | https://www.ncbi.nlm.nih.gov/proteinclusters?cmd=search&db=proteinclusters
101 |
102 | The exact list of domain databases offered will depend on how your
103 | local Galaxy has been configured.
104 |
105 | -----
106 |
107 | @OUTPUT_FORMAT@
108 |
109 | -------
110 |
111 | @CLI_OPTIONS@
112 |
113 |
114 |
115 |
116 |
--------------------------------------------------------------------------------
/tools/ncbi_blast_plus/ncbi_rpstblastn_wrapper.xml:
--------------------------------------------------------------------------------
1 |
2 | Search protein domain database (PSSMs) with translated nucleotide query sequence(s)
3 |
4 | rpstblastn
5 | ncbi_macros.xml
6 |
7 |
8 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 | @SEARCH_TIME_WARNING@
69 |
70 | **What it does**
71 |
72 | Search a *protein domain database* using a *nucleotide query*,
73 | using the NCBI BLAST+ rpstblastn command line tool.
74 |
75 | The protein domain databases use position-specific scoring matrices
76 | (PSSMs) and are available for a number of domain collections including:
77 |
78 | *CDD* - NCBI curarated meta-collection of domains, see
79 | https://www.ncbi.nlm.nih.gov/Structure/cdd/cdd_help.shtml#NCBI_curated_domains
80 |
81 | *Kog* - PSSMs from automatically aligned sequences and sequence
82 | fragments classified in the KOGs resource, the eukaryotic
83 | counterpart to COGs, see https://www.ncbi.nlm.nih.gov/COG/
84 |
85 | *Cog* - PSSMs from automatically aligned sequences and sequence
86 | fragments classified in the COGs resource, which focuses primarily
87 | on prokaryotes, see https://www.ncbi.nlm.nih.gov/COG/
88 |
89 | *Pfam* - PSSMs from Pfam-A seed alignment database, see
90 | http://xfam.org/
91 |
92 | *Smart* - PSSMs from SMART domain alignment database, see
93 | http://smart.embl-heidelberg.de/
94 |
95 | *Tigr* - PSSMs from TIGRFAM database of protein families, see
96 | ftp://ftp.jcvi.org/data/TIGRFAMs/
97 |
98 | *Prk* - PSSms from automatically aligned stable clusters in the
99 | Protein Clusters database, see
100 | https://www.ncbi.nlm.nih.gov/proteinclusters?cmd=search&db=proteinclusters
101 |
102 | The exact list of domain databases offered will depend on how your
103 | local Galaxy has been configured.
104 |
105 | -----
106 |
107 | @OUTPUT_FORMAT@
108 |
109 | -------
110 |
111 | @CLI_OPTIONS@
112 |
113 |
114 |
115 |
116 |
--------------------------------------------------------------------------------
/tools/ncbi_blast_plus/ncbi_tblastx_wrapper.xml:
--------------------------------------------------------------------------------
1 |
2 | Search translated nucleotide database with translated nucleotide query sequence(s)
3 |
4 | tblastx
5 | ncbi_macros.xml
6 |
7 |
8 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 | @SEARCH_TIME_WARNING@
81 |
82 | **What it does**
83 |
84 | Search a *translated nucleotide database* using a *translated nucleotide query*,
85 | using the NCBI BLAST+ tblastx command line tool.
86 |
87 | @FASTA_WARNING@
88 |
89 | -----
90 |
91 | @OUTPUT_FORMAT@
92 |
93 | -------
94 |
95 | @CLI_OPTIONS@
96 |
97 |
98 |
99 |
100 |
--------------------------------------------------------------------------------
/tools/ncbi_blast_plus/test-data:
--------------------------------------------------------------------------------
1 | ../../test-data
--------------------------------------------------------------------------------
/tools/ncbi_blast_plus/tool-data:
--------------------------------------------------------------------------------
1 | ../../tool-data
--------------------------------------------------------------------------------
/tools/reciprocal_best_hits/README.rst:
--------------------------------------------------------------------------------
1 | Galaxy tool to find Reciprocal Best Hits (RBH) from BLAST etc
2 | =============================================================
3 |
4 | This tool is copyright 2011-2015 by Peter Cock, The James Hutton Institute
5 | (formerly SCRI, Scottish Crop Research Institute), UK. All rights reserved.
6 | See the licence text below.
7 |
8 | This tool is a short Python script to parse a pair of BLAST tabular files
9 | (or similar), and extract the reciprocal best hits.
10 |
11 | This was an experiment. I was also considering supporting BLAST XML as input,
12 | which could require extensions to Galaxy ideally so that the current column
13 | selection parameters can be conditional on tabular input. This would make
14 | it possible to integrate BLAST filtering into this tool - although that
15 | might be better done as a separate tool instead.
16 |
17 | This tool has been superceded by an integrated BLAST RBH tool taking two
18 | FASTA files as input instead, see:
19 |
20 | * https://toolshed.g2.bx.psu.edu/view/peterjc/blast_rbh
21 | * https://testtoolshed.g2.bx.psu.edu/view/peterjc/blast_rbh
22 | * https://github.com/peterjc/galaxy_blast/tree/master/tools/blast_rbh
23 |
24 |
25 | Licence (MIT)
26 | =============
27 |
28 | Permission is hereby granted, free of charge, to any person obtaining a copy
29 | of this software and associated documentation files (the "Software"), to deal
30 | in the Software without restriction, including without limitation the rights
31 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
32 | copies of the Software, and to permit persons to whom the Software is
33 | furnished to do so, subject to the following conditions:
34 |
35 | The above copyright notice and this permission notice shall be included in
36 | all copies or substantial portions of the Software.
37 |
38 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
39 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
40 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
41 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
42 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
43 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
44 | THE SOFTWARE.
45 |
--------------------------------------------------------------------------------
/tools/reciprocal_best_hits/reciprocal_best_hits.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | """Reciprocal Best Hit (RBH) using BLAST style tabular input.
3 |
4 | Takes seven command line options,
5 | 1. Tabular filename of A against B
6 | 2. Tabular filename of B against A
7 | 3. Query ID column number (assumed to be same for both input files), e.g. c1
8 | 4. Match ID column number (assumed to be same for both input files), e.g. c2
9 | 5. Score column number (assumed to be same for both input files), e.g. c12
10 | 6. Want higest or lowest score? (use string high or low)
11 | 7. Output filename
12 |
13 | """
14 | from __future__ import print_function
15 |
16 | import sys
17 |
18 | if "--version" in sys.argv[1:]:
19 | print("RBH v0.0.4")
20 | sys.exit(0)
21 |
22 | # Parse Command Line
23 | try:
24 | a_vs_b, b_vs_a, c_query, c_match, c_score, sort_order, out_file = sys.argv[1:]
25 | except ValueError:
26 | sys.exit("Expect 7 arguments: two input files, column settings, output file")
27 |
28 |
29 | want_highest = want_lowest = False
30 | if sort_order == "high":
31 | want_highest = True
32 | elif sort_order == "low":
33 | want_lowest = True
34 | else:
35 | sys.exit("Sort order argument should be high or low")
36 |
37 | if out_file in [a_vs_b, b_vs_a]:
38 | sys.exit("Output file would overwrite an input file")
39 |
40 | if "None" in [c_query, c_match, c_score]:
41 | sys.exit("Three distinct column numbers must be chosen")
42 |
43 |
44 | def get_col_index(col_str):
45 | """Return integer index from a column name string."""
46 | if col_str[0] == "c":
47 | col_str = col_str[1:]
48 | return int(col_str) - 1
49 |
50 |
51 | c_query = get_col_index(c_query)
52 | c_match = get_col_index(c_match)
53 | c_score = get_col_index(c_score)
54 | if len(set([c_query, c_match, c_score])) < 3:
55 | sys.exit("Need three different column numbers!")
56 |
57 | best_a_vs_b = dict()
58 | for line in open(a_vs_b):
59 | if line.startswith("#"):
60 | continue
61 | parts = line.rstrip("\n").split("\t")
62 | a = parts[c_query]
63 | b = parts[c_match]
64 | score = float(parts[c_score])
65 | if (
66 | (a not in best_a_vs_b)
67 | or (want_highest and score > best_a_vs_b[a][1])
68 | or (want_lowest and score < best_a_vs_b[a][1])
69 | ):
70 | best_a_vs_b[a] = (b, score, parts[c_score])
71 | b_short_list = set(b for (b, score, score_str) in best_a_vs_b.values())
72 |
73 | best_b_vs_a = dict()
74 | for line in open(b_vs_a):
75 | if line.startswith("#"):
76 | continue
77 | parts = line.rstrip("\n").split("\t")
78 | b = parts[c_query]
79 | a = parts[c_match]
80 | if a not in best_a_vs_b:
81 | continue
82 | # sys.exit("The A-vs-B file does not have A-ID %r found in B-vs-A file" % a)
83 | if b not in b_short_list:
84 | continue
85 | score = float(parts[c_score])
86 | if (
87 | (b not in best_b_vs_a)
88 | or (want_highest and score > best_b_vs_a[b][1])
89 | or (want_lowest and score < best_b_vs_a[b][1])
90 | ):
91 | best_b_vs_a[b] = (a, score, parts[c_score])
92 | # TODO - Preserve order from A vs B?
93 | a_short_list = sorted(set(a for (a, score, score_str) in best_b_vs_a.values()))
94 |
95 | count = 0
96 | outfile = open(out_file, "w")
97 | outfile.write("#A_id\tB_id\tA_vs_B\tB_vs_A\n")
98 | for a in a_short_list:
99 | b = best_a_vs_b[a][0]
100 | if b in best_b_vs_a and a == best_b_vs_a[b][0]:
101 | outfile.write("%s\t%s\t%s\t%s\n" % (a, b, best_a_vs_b[a][2], best_b_vs_a[b][2]))
102 | count += 1
103 | outfile.close()
104 | print("Done, %i RBH found" % count)
105 |
--------------------------------------------------------------------------------
/tools/reciprocal_best_hits/test-data:
--------------------------------------------------------------------------------
1 | ../../test-data
--------------------------------------------------------------------------------
/workflows/README.rst:
--------------------------------------------------------------------------------
1 | Galaxy Workflows
2 | ================
3 |
4 | Each sub-folder represents a different entry on the Galaxy Tool Shed,
5 | for example ``blast_top_hit_species`` contains a workflow which runs
6 | BLASTX and counts the species of each top hit.
7 |
8 | For general information, see the `main README file <../README.rst>`_.
9 |
--------------------------------------------------------------------------------
/workflows/blast_top_hit_species/.shed.yml:
--------------------------------------------------------------------------------
1 | name: blast_top_hit_species
2 | owner: peterjc
3 | homepage_url: https://github.com/peterjc/galaxy_blast/tree/master/workflows/blast_top_hit_species
4 | remote_repository_url: https://github.com/peterjc/galaxy_blast/tree/master/workflows/blast_top_hit_species
5 | description: Workflow to count species of top nr BLASTX hits of a transcriptome
6 | long_description: |
7 | This is a non-trivial example workflow using the NCBI BLAST+ wrappers, intended only
8 | for crude crude contamination assessment of a transcriptome assembly.
9 |
10 | This would ideally include a visualisation of the finally tally table as a Pie Chart,
11 | currently not possible with the Galaxy Visualization Framework.
12 | categories:
13 | - Sequence Analysis
14 | type: unrestricted
15 |
--------------------------------------------------------------------------------
/workflows/blast_top_hit_species/N_abberans_piechart_mouseover.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peterjc/galaxy_blast/75b5762aa9699a0b3f97a67158111599a2cb6f59/workflows/blast_top_hit_species/N_abberans_piechart_mouseover.png
--------------------------------------------------------------------------------
/workflows/blast_top_hit_species/blast_top_hit_species.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peterjc/galaxy_blast/75b5762aa9699a0b3f97a67158111599a2cb6f59/workflows/blast_top_hit_species/blast_top_hit_species.png
--------------------------------------------------------------------------------
/workflows/blast_top_hit_species/repository_dependencies.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
9 |
10 |
--------------------------------------------------------------------------------