├── .flake8 ├── .gitattributes ├── .github ├── styler.R └── workflows │ ├── ci.yaml │ ├── pr.yaml │ ├── pr_without_tool_change.yaml │ └── slash.yaml ├── .gitignore ├── .tt_skip ├── CONTRIBUTING.md ├── README.rst ├── data_managers ├── README.rst └── ncbi_blastdb │ ├── README.rst │ ├── blastdb.xml │ ├── data_manager_conf.xml │ ├── fetch_blast_db.py │ └── tool_dependencies.xml ├── datatypes ├── README.rst └── blast_datatypes │ ├── .shed.yml │ ├── README.rst │ ├── blast.py │ └── datatypes_conf.xml ├── packages ├── README.rst ├── package_blast_plus_2_2_26 │ ├── .shed.yml │ └── tool_dependencies.xml ├── package_blast_plus_2_2_27 │ ├── .shed.yml │ └── tool_dependencies.xml ├── package_blast_plus_2_2_28 │ ├── .shed.yml │ └── tool_dependencies.xml ├── package_blast_plus_2_2_29 │ ├── .shed.yml │ └── tool_dependencies.xml ├── package_blast_plus_2_2_30 │ ├── .shed.yml │ └── tool_dependencies.xml ├── package_blast_plus_2_2_31 │ ├── .shed.yml │ └── tool_dependencies.xml ├── package_blast_plus_2_3_0 │ ├── .shed.yml │ └── tool_dependencies.xml ├── package_blast_plus_2_4_0 │ ├── .shed.yml │ └── tool_dependencies.xml ├── package_blast_plus_2_5_0 │ ├── .shed.yml │ └── tool_dependencies.xml ├── package_blast_plus_2_6_0 │ ├── .shed.yml │ └── tool_dependencies.xml ├── package_blast_plus_2_7_0 │ ├── .shed.yml │ └── tool_dependencies.xml └── package_blast_plus_2_7_1 │ ├── .shed.yml │ └── tool_dependencies.xml ├── test-data ├── README.rst ├── all_fasta.loc ├── blastdb.loc ├── blastdb_d.loc ├── blastdb_p.loc ├── blastn_arabidopsis.extended.tabular ├── blastn_arabidopsis.standard.tabular ├── blastn_arabidopsis.xml ├── blastn_chimera_vs_rhodopsin_db.tabular ├── blastn_chimera_vs_rhodopsin_db_max_hsps1.tabular ├── blastn_chimera_vs_three_human_and_rhodopsin_db.tabular ├── blastn_chimera_vs_three_human_db.tabular ├── blastn_chimera_vs_three_human_max1.tabular ├── blastn_chimera_vs_three_human_max1.txt ├── blastn_rhodopsin_vs_three_human.columns.tabular ├── blastn_rhodopsin_vs_three_human.tabular ├── blastn_rhodopsin_vs_three_human.xml ├── blastn_rhodopsin_vs_three_human_converted.tabular ├── blastp_four_human_vs_rhodopsin.tabular ├── blastp_four_human_vs_rhodopsin.xml ├── blastp_four_human_vs_rhodopsin_converted.tabular ├── blastp_four_human_vs_rhodopsin_converted_ext.tabular ├── blastp_four_human_vs_rhodopsin_ext.tabular ├── blastp_four_human_vs_rhodopsin_top3.tabular ├── blastp_four_human_vs_rhodopsin_top3_positive.tabular ├── blastp_human_vs_pdb_seg_no.xml ├── blastp_human_vs_pdb_seg_no_converted_ext.tabular ├── blastp_human_vs_pdb_seg_no_converted_std.tabular ├── blastp_rhodopsin_adv_vs_four_human.tabular ├── blastp_rhodopsin_peptides_vs_four_human.tabular ├── blastp_rhodopsin_vs_four_human.tabular ├── blastp_rhodopsin_vs_four_human_db.taxid.tabular ├── blastp_sample.blast2go.tabular ├── blastp_sample.xml ├── blastp_sample_converted.tabular ├── blastx_rhodopsin_adv_vs_four_human.tabular ├── blastx_rhodopsin_vs_four_human.tabular ├── blastx_rhodopsin_vs_four_human.xml ├── blastx_rhodopsin_vs_four_human_all.tabular ├── blastx_rhodopsin_vs_four_human_converted.tabular ├── blastx_rhodopsin_vs_four_human_converted_ext.tabular ├── blastx_rhodopsin_vs_four_human_ext.tabular ├── blastx_sample.xml ├── blastx_sample_converted.tabular ├── cd00003.smp ├── cd00003_and_cd00008.aux ├── cd00003_and_cd00008.freq ├── cd00003_and_cd00008.loo ├── cd00003_and_cd00008.phr ├── cd00003_and_cd00008.pin ├── cd00003_and_cd00008.psd ├── cd00003_and_cd00008.psi ├── cd00003_and_cd00008.psq ├── cd00003_and_cd00008.rps ├── cd00008.smp ├── chimera.fasta ├── chimera.fasta.gz ├── convert2blastmask_four_human_masked.maskinfo-asn1 ├── convert2blastmask_four_human_masked.maskinfo-asn1-binary ├── deduplicate.nosortids.fasta ├── deduplicate.sortids.fasta ├── deltablast_four_human_vs_rhodopsin.tabular ├── deltablast_four_human_vs_rhodopsin.xml ├── deltablast_four_human_vs_rhodopsin_ext.tabular ├── deltablast_rhodopsin_vs_four_human.tabular ├── duplicates.fasta ├── duplicates.fasta.gz ├── duplicates.nr.fasta ├── dustmasker_three_human.fasta ├── dustmasker_three_human.maskinfo-asn1 ├── dustmasker_three_human.maskinfo-asn1-binary ├── empty.fasta ├── empty_file.dat ├── est_out.json ├── four_human_proteins.dbinfo.txt ├── four_human_proteins.fasta ├── four_human_proteins.fasta.log.txt ├── four_human_proteins.fasta.phd ├── four_human_proteins.fasta.phi ├── four_human_proteins.fasta.phr ├── four_human_proteins.fasta.pin ├── four_human_proteins.fasta.pog ├── four_human_proteins.fasta.psd ├── four_human_proteins.fasta.psi ├── four_human_proteins.fasta.psq ├── four_human_proteins_masked.fasta ├── four_human_proteins_taxid.fasta.log.txt ├── four_human_proteins_taxid.fasta.phd ├── four_human_proteins_taxid.fasta.phi ├── four_human_proteins_taxid.fasta.phr ├── four_human_proteins_taxid.fasta.pin ├── four_human_proteins_taxid.fasta.pog ├── four_human_proteins_taxid.fasta.psd ├── four_human_proteins_taxid.fasta.psi ├── four_human_proteins_taxid.fasta.psq ├── k12_edited_proteins.fasta ├── k12_ten_proteins.fasta ├── makeprofiledb_input_cd00003.smp ├── makeprofiledb_input_cd00008.smp ├── more_duplicates.fasta ├── rbh_blastn_three_human_mRNA_vs_rhodopsin_nucs.tabular ├── rbh_blastp_four_human_vs_rhodopsin_proteins.tabular ├── rbh_blastp_k12.tabular ├── rbh_blastp_k12_self.tabular ├── rbh_megablast_rhodopsin_nucs_vs_three_human_mRNA.tabular ├── rbh_none.tabular ├── rbh_tblastx_rhodopsin_nucs_vs_three_human_mRNA.tabular ├── rhodopsin_bufo.fasta ├── rhodopsin_nucs.blastdbcmd.txt ├── rhodopsin_nucs.dbinfo.txt ├── rhodopsin_nucs.fasta ├── rhodopsin_nucs.fasta.gz ├── rhodopsin_nucs.fasta.log.txt ├── rhodopsin_nucs.fasta.nhd ├── rhodopsin_nucs.fasta.nhi ├── rhodopsin_nucs.fasta.nhr ├── rhodopsin_nucs.fasta.nin ├── rhodopsin_nucs.fasta.nnd ├── rhodopsin_nucs.fasta.nni ├── rhodopsin_nucs.fasta.nog ├── rhodopsin_nucs.fasta.nsd ├── rhodopsin_nucs.fasta.nsi ├── rhodopsin_nucs.fasta.nsq ├── rhodopsin_nucs.no_gi.fasta ├── rhodopsin_nucs.no_gi.region.fasta ├── rhodopsin_peptides.fasta ├── rhodopsin_proteins.fasta ├── segmasker_four_human.fasta ├── segmasker_four_human.maskinfo-asn1 ├── segmasker_four_human.maskinfo-asn1-binary ├── tblastn_four_human_vs_rhodopsin.html ├── tblastn_four_human_vs_rhodopsin.tabular ├── tblastn_four_human_vs_rhodopsin.xml ├── tblastn_four_human_vs_rhodopsin_deflines.tabular ├── tblastn_four_human_vs_rhodopsin_ext.tabular ├── tblastx_rhodopsin_vs_three_human.tabular ├── three_human_mRNA.dbinfo.txt ├── three_human_mRNA.fasta ├── three_human_mRNA.fasta.gz ├── three_human_mRNA.fasta.log.txt ├── three_human_mRNA.fasta.nhd ├── three_human_mRNA.fasta.nhi ├── three_human_mRNA.fasta.nhr ├── three_human_mRNA.fasta.nin ├── three_human_mRNA.fasta.nog ├── three_human_mRNA.fasta.nsd ├── three_human_mRNA.fasta.nsi ├── three_human_mRNA.fasta.nsq ├── three_human_mRNA_and_rhodopsin_nucs.dbinfo.txt └── tool_data_table_conf.xml.test ├── tool-data ├── README.rst ├── all_fasta.loc.sample ├── blast2go.loc.sample ├── blastdb.loc.sample ├── blastdb_d.loc.sample ├── blastdb_p.loc.sample └── tool_data_table_conf.xml.sample ├── tools ├── README.rst ├── blast2go │ ├── .shed.yml │ ├── README.rst │ ├── b2g_slim.py │ ├── blast2go.py │ ├── blast2go.xml │ ├── go_categorize.py │ ├── massage_xml_for_blast2go.py │ └── test-data ├── blast_rbh │ ├── .shed.yml │ ├── README.rst │ ├── best_hits.py │ ├── blast_rbh.py │ ├── blast_rbh.xml │ ├── blast_rbh_report.py │ ├── test-data │ └── update_tests.sh ├── blastxml_to_top_descr │ ├── .shed.yml │ ├── README.rst │ ├── blastxml_to_top_descr.py │ ├── blastxml_to_top_descr.xml │ └── test-data ├── make_nr │ ├── .shed.yml │ ├── README.rst │ ├── make_nr.py │ ├── make_nr.xml │ └── test-data ├── ncbi_blast_plus │ ├── .lint_skip │ ├── .shed.yml │ ├── README.rst │ ├── blastxml_to_tabular.py │ ├── blastxml_to_tabular.xml │ ├── check_no_duplicates.py │ ├── get_species_taxids.xml │ ├── ncbi_blastdbcmd_info.xml │ ├── ncbi_blastdbcmd_wrapper.xml │ ├── ncbi_blastn_wrapper.xml │ ├── ncbi_blastp_wrapper.xml │ ├── ncbi_blastx_wrapper.xml │ ├── ncbi_convert2blastmask_wrapper.xml │ ├── ncbi_deltablast_wrapper.xml │ ├── ncbi_dustmasker_wrapper.xml │ ├── ncbi_macros.xml │ ├── ncbi_makeblastdb.xml │ ├── ncbi_makeprofiledb.xml │ ├── ncbi_psiblast_wrapper.xml │ ├── ncbi_rpsblast_wrapper.xml │ ├── ncbi_rpstblastn_wrapper.xml │ ├── ncbi_segmasker_wrapper.xml │ ├── ncbi_tblastn_wrapper.xml │ ├── ncbi_tblastx_wrapper.xml │ ├── test-data │ ├── tool-data │ └── update_test_files.sh └── reciprocal_best_hits │ ├── README.rst │ ├── reciprocal_best_hits.py │ ├── reciprocal_best_hits.xml │ └── test-data └── workflows ├── README.rst └── blast_top_hit_species ├── .shed.yml ├── N_abberans_piechart_mouseover.png ├── README.rst ├── blast_top_hit_species.ga ├── blast_top_hit_species.png └── repository_dependencies.xml /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | # Recommend matching the black default line length of 88, 3 | # rather than the flake8 default of 79: 4 | max-line-length = 88 5 | extend-ignore = 6 | # See https://github.com/PyCQA/pycodestyle/issues/373 7 | E203, 8 | # B902 blind except Exception: statement 9 | # For now willing to ignore this as testing assorted 10 | # operating systems for the right exception is hard: 11 | B902, 12 | 13 | # For flake8-import-order, pycharm is like smarkets but case sensitive 14 | import-order-style = pycharm 15 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Binary files (no line-ending conversions) 2 | # 3 | # Enable hexdump-diff by adding this to .git/config 4 | # 5 | # [diff "hex"] 6 | # textconv = hexdump -v -C 7 | # binary = true 8 | # 9 | *.pin binary diff=hex 10 | *.nin binary diff=hex 11 | -------------------------------------------------------------------------------- /.github/styler.R: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env Rscript 2 | 3 | library("argparse") 4 | library("styler") 5 | 6 | parser <- ArgumentParser(description = "Call styler") 7 | parser$add_argument("dir", 8 | metavar = "DIR", type = "character", 9 | help = "File to parse" 10 | ) 11 | parser$add_argument("--dry", 12 | choices = c("off", "on"), default = "on" 13 | ) 14 | args <- parser$parse_args() 15 | 16 | file_info <- file.info(args$dir) 17 | is_directory <- file_info$isdir 18 | 19 | if (is_directory) { 20 | captured_output <- capture.output({ 21 | result <- style_dir(args$dir, indent_by = 4, dry = args$dry, recursive = TRUE) 22 | }) 23 | } else { 24 | captured_output <- capture.output({ 25 | result <- style_file(args$dir, indent_by = 4, dry = args$dry) 26 | }) 27 | } 28 | 29 | n <- nrow(subset(result, changed == TRUE)) 30 | if (n > 0) { 31 | if (args$dry == "off") { 32 | print(paste("Changed", n, "files")) 33 | } else { 34 | stop(paste("Linting failed for", n, "files")) 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /.github/workflows/pr_without_tool_change.yaml: -------------------------------------------------------------------------------- 1 | name: Fallback 2 | # Fallback workflow that provides a succeeding "Check workflow success" job 3 | # as this is a requirement for being able to merge a PR 4 | # see https://docs.github.com/en/repositories/configuring-branches-and-merges-in-your-repository/defining-the-mergeability-of-pull-requests/troubleshooting-required-status-checks#handling-skipped-but-required-checks 5 | on: 6 | pull_request: 7 | concurrency: 8 | group: ${{ github.workflow }}-${{ github.ref }} 9 | cancel-in-progress: true 10 | jobs: 11 | determine-success: 12 | name: Check workflow success 13 | runs-on: ubuntu-latest 14 | steps: 15 | - run: 'echo "No tool tests required for this PR"' 16 | -------------------------------------------------------------------------------- /.github/workflows/slash.yaml: -------------------------------------------------------------------------------- 1 | name: Slash Command Dispatch 2 | on: 3 | issue_comment: 4 | types: [created] 5 | jobs: 6 | slashCommandDispatch: 7 | runs-on: ubuntu-latest 8 | steps: 9 | - name: Slash Command Dispatch 10 | # workaround for checking availablity of secret https://github.com/actions/runner/issues/520 11 | env: 12 | PAT: ${{ secrets.PAT }} 13 | if: ${{ ( github.repository_owner == 'peterjc' ) && ( env.PAT != '' ) }} 14 | uses: peter-evans/slash-command-dispatch@v4 15 | with: 16 | token: ${{ secrets.PAT }} 17 | commands: | 18 | run-all-tool-tests 19 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | #Ignore backup files from some Unix editors, 2 | *~ 3 | *.swp 4 | *.bak 5 | [#]*[#] 6 | 7 | #Ignore any tar-balls prepared to upload to Galaxy Tool Shed 8 | *.tar.gz 9 | 10 | #Ignore patches and any original files created by patch command 11 | *.diff 12 | *.patch 13 | *.orig 14 | *.rej 15 | 16 | #Ignore these hidden files from Mac OS X 17 | .DS_Store 18 | 19 | #Ignore hidden files from Dolphin window manager 20 | .directory 21 | 22 | #Ignore all compiled python files (e.g. from running the unit tests): 23 | *.pyc 24 | *.pyo 25 | 26 | #Ignore all Jython class files (present if using Jython) 27 | *.class 28 | 29 | #Ignore planemo test output 30 | tool_test_output.html 31 | tool_test_output.json 32 | 33 | #Ignore any NCBI BLAST taxonomy database present 34 | taxdb.btd 35 | taxdb.bti 36 | 37 | #Ignore any log files (e.g. from running makeblastdb etc) 38 | *.log 39 | -------------------------------------------------------------------------------- /.tt_skip: -------------------------------------------------------------------------------- 1 | tools/reciprocal_best_hits/ 2 | tools/blast2go/ 3 | datatypes/ 4 | workflows/ 5 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | This document describes how to contribute to this repository. Pull 4 | requests containing bug fixes, updates, and extensions to the existing 5 | tools in this repository will be considered for inclusion. 6 | 7 | To maximize the likelihood your contribution will be accepted, it is a 8 | good practice to file an 9 | [issue](https://github.com/peterjc/galaxy_blast/issues) first and 10 | discuss potential solution before proceeding with development. 11 | 12 | ## How to Contribute 13 | 14 | * Make sure you have a [GitHub account](https://github.com/signup/free) 15 | * Make sure you have git [installed](https://help.github.com/articles/set-up-git) 16 | * Fork the repository on [GitHub](https://github.com/peterjc/galaxy_blast/fork) 17 | * Make the desired modifications - consider using a [feature branch](https://github.com/Kunena/Kunena-Forum/wiki/Create-a-new-branch-with-git-and-manage-branches). 18 | * Make sure you have added the necessary tests for your changes and they pass. See [TESTING](https://github.com/peterjc/galaxy_blast#testing) for more information. 19 | * Open a [pull request](https://help.github.com/articles/using-pull-requests) with these changes. 20 | 21 | ## Coding style 22 | 23 | Via the Travis continuous integration testing we enforce various style 24 | checks, including running ``flake8`` on the Python code with this set 25 | of plugins: 26 | 27 | ``` 28 | $ pip install flake8 flake8-blind-except flake8-docstrings flake8-rst-docstrings 29 | ``` 30 | 31 | Additionally, we have adopted the command line tool ``black`` for the 32 | Python coding style - must this must be installed under Python 3, try: 33 | 34 | ``` 35 | $ pip install black 36 | ``` 37 | 38 | Or: 39 | 40 | ``` 41 | $ python3 -m pip install black 42 | ``` 43 | 44 | If you are using Python 3, then we also recommand: 45 | 46 | ``` 47 | $ pip install flake8-black 48 | ``` 49 | 50 | The reStructuredText markup is tested with ``restructuredtext-lint``: 51 | 52 | ``` 53 | $ pip install restructuredtext-lint 54 | ``` 55 | -------------------------------------------------------------------------------- /data_managers/README.rst: -------------------------------------------------------------------------------- 1 | Galaxy Data Manager definitions 2 | =============================== 3 | 4 | Each sub-folder represents a different entry on the Galaxy ToolShed, 5 | for example ``ncbi_blastdb`` contains a Data Manager for fetching 6 | the NCBI BLAST databases. 7 | 8 | For general information, see the `main README file <../README.rst>`_. 9 | -------------------------------------------------------------------------------- /data_managers/ncbi_blastdb/README.rst: -------------------------------------------------------------------------------- 1 | Galaxy Data Manager for NCBI BLAST databases 2 | ============================================ 3 | 4 | Copyright 2014 by Daniel Blankenberg (Penn State University, PA 16802, USA), 5 | and additional contributors. All rights reserved. See the licence text below. 6 | 7 | Downloads and populates blastdb data table. This is just a simple example to 8 | demonstrate the use of Data Managers for processing BLAST databases, and 9 | uses the NCBI's ``update_blast.pl`` script internally. See: 10 | 11 | Blankenberg et al. (2014) Wrangling Galaxy's reference data 12 | https://doi.org/10.1093/bioinformatics/btu119 13 | 14 | This tool is currently available from the Galaxy Test Tool Shed at: 15 | http://testtoolshed.g2.bx.psu.edu/view/blankenberg/data_manager_example_blastdb_ncbi_update_blastdb 16 | 17 | 18 | History 19 | ======= 20 | 21 | ======= ====================================================================== 22 | Version Changes 23 | ------- ---------------------------------------------------------------------- 24 | v0.0.1 - Initial release as an example Data Manager on the Test ToolShed. 25 | - Depends on ``package_blast_plus_2_2_28`` in ToolShed. 26 | v0.0.2 - Development moved to GitHub, https://github.com/peterjc/galaxy_blast 27 | - Updated citation information (Blankenberg et al. 2014). 28 | - Adopted standard MIT License. 29 | - Now depends on ``package_blast_plus_2_2_30`` in ToolShed. 30 | v0.0.3 - Reorder XML elements (internal change only). 31 | - Python 3 compatible syntax. 32 | ======= ====================================================================== 33 | 34 | 35 | Bug Reports 36 | =========== 37 | 38 | You can file an issue here https://github.com/peterjc/galaxy_blast/issues or ask 39 | us on the Galaxy development list http://lists.bx.psu.edu/listinfo/galaxy-dev 40 | 41 | 42 | Developers 43 | ========== 44 | 45 | This data manager was originally developed as an example to accompany the 46 | paper Blankenberg et al. (2014), and posted on the Galaxy Test Tool Shed at: 47 | http://testtoolshed.g2.bx.psu.edu/view/blankenberg/data_manager_example_blastdb_ncbi_update_blastdb 48 | 49 | As of April 2014, development is continuing within the Galaxy BLAST+ wrapper 50 | repository on GitHub: https://github.com/peterjc/galaxy_blast 51 | 52 | 53 | Licence (MIT) 54 | ============= 55 | 56 | Permission is hereby granted, free of charge, to any person obtaining a copy 57 | of this software and associated documentation files (the "Software"), to deal 58 | in the Software without restriction, including without limitation the rights 59 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 60 | copies of the Software, and to permit persons to whom the Software is 61 | furnished to do so, subject to the following conditions: 62 | 63 | The above copyright notice and this permission notice shall be included in 64 | all copies or substantial portions of the Software. 65 | 66 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 67 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 68 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 69 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 70 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 71 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 72 | THE SOFTWARE. 73 | -------------------------------------------------------------------------------- /data_managers/ncbi_blastdb/blastdb.xml: -------------------------------------------------------------------------------- 1 | 2 | Downloader 3 | 4 | blast+ 5 | python 6 | 7 | 8 | 9 | 10 | fetch_blast_db.py --filename "${out_file}" --tool_data_table_name "blastdb" 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | **What it does** 38 | 39 | Downloads Blast DBs and updates blastdb tool data tables. 40 | 41 | ------ 42 | 43 | 44 | .. class:: infomark 45 | 46 | **Notice:** This is a functional, but basic, tool for fetching preformatted blastdbs. 47 | 48 | 49 | ------- 50 | 51 | **References** 52 | 53 | If you use this Galaxy tool in work leading to a scientific publication please 54 | cite the following paper: 55 | 56 | Blankenberg et al. (2014) Wrangling Galaxy's reference data 57 | https://doi.org/10.1093/bioinformatics/btu119 58 | 59 | 60 | 61 | 10.1093/bioinformatics/btu119 62 | 63 | 64 | -------------------------------------------------------------------------------- /data_managers/ncbi_blastdb/data_manager_conf.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | blastdb/${path} 11 | 12 | ${GALAXY_DATA_MANAGER_DATA_PATH}/blastdb/${path}/${nucleotide_alias_name} 13 | abspath 14 | 15 | 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /data_managers/ncbi_blastdb/fetch_blast_db.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Dan Blankenberg 3 | """Script that calls update_blastdb.pl to download preformatted databases.""" 4 | 5 | from __future__ import print_function 6 | 7 | import hashlib 8 | import optparse 9 | import os 10 | import subprocess 11 | import sys 12 | 13 | from galaxy.util.json import from_json_string, to_json_string 14 | 15 | 16 | if sys.version_info[0] >= 3: 17 | basestring = str 18 | 19 | DEFAULT_ALGORITHM = hashlib.sha512 20 | CHUNK_SIZE = 2**20 # 1mb 21 | 22 | 23 | def get_dir_hash(directory, algorithm=None, followlinks=True, chunk_size=None): 24 | """Get hash of directory contents.""" 25 | chunk_size = chunk_size or CHUNK_SIZE 26 | algorithm = algorithm or DEFAULT_ALGORITHM 27 | if isinstance(algorithm, basestring): 28 | hash = hashlib.new(algorithm) 29 | else: 30 | hash = algorithm() 31 | # we hash a directory by taking names of directories, files and their 32 | # contents 33 | for dirpath, dirnames, filenames in os.walk(directory, followlinks=followlinks): 34 | dirnames.sort() 35 | filenames.sort() 36 | for name in dirnames: 37 | hash.update(os.path.relpath(os.path.join(dirpath, name), directory)) 38 | for name in filenames: 39 | filename = os.path.join(dirpath, name) 40 | hash.update(os.path.relpath(filename, directory)) 41 | fh = open(filename, "rb") 42 | while True: 43 | data = fh.read(chunk_size) 44 | if not data: 45 | break 46 | hash.update(data) 47 | fh.close() 48 | 49 | return hash.hexdigest() 50 | 51 | 52 | def main(): 53 | """Parse and execute the arguments from the command line.""" 54 | parser = optparse.OptionParser() 55 | parser.add_option( 56 | "-f", 57 | "--filename", 58 | dest="filename", 59 | action="store", 60 | type="string", 61 | default=None, 62 | help="filename", 63 | ) 64 | parser.add_option( 65 | "-t", 66 | "--tool_data_table_name", 67 | dest="tool_data_table_name", 68 | action="store", 69 | type="string", 70 | default=None, 71 | help="tool_data_table_name", 72 | ) 73 | (options, args) = parser.parse_args() 74 | 75 | params = from_json_string(open(options.filename).read()) 76 | target_directory = params["output_data"][0]["extra_files_path"] 77 | os.mkdir(target_directory) 78 | 79 | blastdb_name = params["param_dict"]["blastdb_name"] # value 80 | data_description = params["param_dict"]["advanced"].get("data_description", None) 81 | data_id = params["param_dict"]["advanced"].get("data_id", None) 82 | 83 | cmd_options = ["--decompress"] 84 | 85 | args = ["update_blastdb.pl"] + cmd_options + [blastdb_name] 86 | proc = subprocess.Popen(args=args, shell=False, cwd=target_directory) 87 | return_code = proc.wait() 88 | if return_code != 1: 89 | sys.exit("Error obtaining blastdb (%s)" % return_code) 90 | 91 | if not data_id: 92 | data_id = "%s_%s" % (blastdb_name, get_dir_hash(target_directory)) 93 | 94 | if not data_description: 95 | alias_date = None 96 | try: 97 | for line in open(os.path.join(target_directory, "%s.nal" % (blastdb_name))): 98 | if line.startswith("# Alias file created "): 99 | alias_date = line.split("# Alias file created ", 1)[1].strip() 100 | if line.startswith("TITLE"): 101 | data_description = line.split(None, 1)[1].strip() 102 | break 103 | except Exception as e: 104 | sys.stderr.write("Error Parsing Alias file for TITLE and date: %s\n" % e) 105 | if alias_date and data_description: 106 | data_description = "%s (%s)" % (data_description, alias_date) 107 | 108 | if not data_description: 109 | data_description = data_id 110 | 111 | data_table_entry = { 112 | "value": data_id, 113 | "name": data_description, 114 | "path": os.path.join(blastdb_name, data_id), 115 | "nucleotide_alias_name": blastdb_name, 116 | } 117 | data_manager_dict = { 118 | "data_tables": {options.tool_data_table_name: [data_table_entry]} 119 | } 120 | 121 | # save info to json file 122 | with open(options.filename, "wb") as fh: 123 | fh.write(to_json_string(data_manager_dict)) 124 | 125 | 126 | if __name__ == "__main__": 127 | main() 128 | -------------------------------------------------------------------------------- /data_managers/ncbi_blastdb/tool_dependencies.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | -------------------------------------------------------------------------------- /datatypes/README.rst: -------------------------------------------------------------------------------- 1 | Galaxy datatype definitions (OBSOLETE) 2 | ====================================== 3 | 4 | For a time, the Galaxy community shared additional datatypes on the 5 | Galaxy Tool Shed. Since the October 2016 release of Galaxy, our NCBI 6 | BLAST XML and database datatypes returned to the Galaxy core. 7 | 8 | Each sub-folder represented a different entry on the Galaxy ToolShed, 9 | for example ``blast_datatypes`` contains definitions for BLAST specific 10 | file types such as BLAST XML and BLAST databases. 11 | 12 | For general information, see the `main README file <../README.rst>`_. 13 | -------------------------------------------------------------------------------- /datatypes/blast_datatypes/.shed.yml: -------------------------------------------------------------------------------- 1 | name: blast_datatypes 2 | owner: devteam 3 | homepage_url: https://blast.ncbi.nlm.nih.gov/ 4 | remote_repository_url: https://github.com/peterjc/galaxy_blast/tree/master/datatypes/blast_datatypes/ 5 | description: Datatypes for NCBI BLAST (blastxml, databases, etc) 6 | long_description: | 7 | This is a repository for blastxml and other NCBI BLAST related datatypes 8 | such as BLAST databases. 9 | categories: 10 | - Sequence Analysis 11 | type: unrestricted 12 | -------------------------------------------------------------------------------- /datatypes/blast_datatypes/datatypes_conf.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /packages/README.rst: -------------------------------------------------------------------------------- 1 | Galaxy dependency definitions 2 | ============================= 3 | 4 | These packages are now obsolete as Galaxy has transitioned to using 5 | Conda and the BioConda channel for packaging tool dependencies. 6 | Specifically, we now use https://anaconda.org/bioconda/blast for the 7 | NCBI BLAST+ binaries. 8 | 9 | Each sub-folder represents a different entry on the Galaxy ToolShed, for 10 | example ``package_blast_plus_2_2_31`` defines the BLAST+ 2.2.31 dependency 11 | which can be used by other Galaxy ToolShed entries via the IUC owned 12 | https://toolshed.g2.bx.psu.edu/view/iuc/package_blast_plus_2_2_31 13 | 14 | Each folder has a (hidden) special file ``.shed.yml`` for use with the 15 | command line tool Planemo to help automate pushing updates to the Galaxy 16 | Tool Shed, e.g. 17 | 18 | $ planemo shed_update --shed_target testtoolshed --check_diff package_blast_plus_2_2_31 19 | ... 20 | $ planemo shed_update --shed_target toolshed --check_diff package_blast_plus_2_2_31 21 | ... 22 | 23 | For general information, see the `main README file <../README.rst>`_. 24 | -------------------------------------------------------------------------------- /packages/package_blast_plus_2_2_26/.shed.yml: -------------------------------------------------------------------------------- 1 | name: package_blast_plus_2_2_26 2 | owner: iuc 3 | homepage_url: https://blast.ncbi.nlm.nih.gov/ 4 | remote_repository_url: https://github.com/peterjc/galaxy_blast/tree/master/packages/package_blast_plus_2_2_26/ 5 | description: NCBI BLAST+ 2.2.26 (binaries only) 6 | long_description: | 7 | This Tool Shed package is intended to be used as a dependency of the Galaxy wrappers 8 | for NCBI BLAST+ and any other tools which call the BLAST+ binaries internally. 9 | categories: 10 | - Tool Dependency Packages 11 | type: tool_dependency_definition 12 | -------------------------------------------------------------------------------- /packages/package_blast_plus_2_2_27/.shed.yml: -------------------------------------------------------------------------------- 1 | name: package_blast_plus_2_2_27 2 | owner: iuc 3 | homepage_url: https://blast.ncbi.nlm.nih.gov/ 4 | remote_repository_url: https://github.com/peterjc/galaxy_blast/tree/master/packages/package_blast_plus_2_2_27/ 5 | description: NCBI BLAST+ 2.2.27 (binaries only) 6 | long_description: | 7 | This Tool Shed package is intended to be used as a dependency of the Galaxy wrappers 8 | for NCBI BLAST+ and any other tools which call the BLAST+ binaries internally. 9 | categories: 10 | - Tool Dependency Packages 11 | type: tool_dependency_definition 12 | -------------------------------------------------------------------------------- /packages/package_blast_plus_2_2_28/.shed.yml: -------------------------------------------------------------------------------- 1 | name: package_blast_plus_2_2_28 2 | owner: iuc 3 | homepage_url: https://blast.ncbi.nlm.nih.gov/ 4 | remote_repository_url: https://github.com/peterjc/galaxy_blast/tree/master/packages/package_blast_plus_2_2_28/ 5 | description: NCBI BLAST+ 2.2.28 (binaries only) 6 | long_description: | 7 | This Tool Shed package is intended to be used as a dependency of the Galaxy wrappers 8 | for NCBI BLAST+ and any other tools which call the BLAST+ binaries internally. 9 | categories: 10 | - Tool Dependency Packages 11 | type: tool_dependency_definition 12 | -------------------------------------------------------------------------------- /packages/package_blast_plus_2_2_29/.shed.yml: -------------------------------------------------------------------------------- 1 | name: package_blast_plus_2_2_29 2 | owner: iuc 3 | homepage_url: https://blast.ncbi.nlm.nih.gov/ 4 | remote_repository_url: https://github.com/peterjc/galaxy_blast/tree/master/packages/package_blast_plus_2_2_29/ 5 | description: NCBI BLAST+ 2.2.29 (binaries only) 6 | long_description: | 7 | This Tool Shed package is intended to be used as a dependency of the Galaxy wrappers 8 | for NCBI BLAST+ and any other tools which call the BLAST+ binaries internally. 9 | categories: 10 | - Tool Dependency Packages 11 | type: tool_dependency_definition 12 | -------------------------------------------------------------------------------- /packages/package_blast_plus_2_2_30/.shed.yml: -------------------------------------------------------------------------------- 1 | name: package_blast_plus_2_2_30 2 | owner: iuc 3 | homepage_url: https://blast.ncbi.nlm.nih.gov/ 4 | remote_repository_url: https://github.com/peterjc/galaxy_blast/tree/master/packages/package_blast_plus_2_2_30/ 5 | description: NCBI BLAST+ 2.2.30 (binaries only) 6 | long_description: | 7 | This Tool Shed package is intended to be used as a dependency of the Galaxy wrappers 8 | for NCBI BLAST+ and any other tools which call the BLAST+ binaries internally. 9 | categories: 10 | - Tool Dependency Packages 11 | type: tool_dependency_definition 12 | -------------------------------------------------------------------------------- /packages/package_blast_plus_2_2_31/.shed.yml: -------------------------------------------------------------------------------- 1 | name: package_blast_plus_2_2_31 2 | owner: iuc 3 | homepage_url: https://blast.ncbi.nlm.nih.gov/ 4 | remote_repository_url: https://github.com/peterjc/galaxy_blast/tree/master/packages/package_blast_plus_2_2_31/ 5 | description: NCBI BLAST+ 2.2.31 (binaries only) 6 | long_description: | 7 | This Tool Shed package is intended to be used as a dependency of the Galaxy wrappers 8 | for NCBI BLAST+ and any other tools which call the BLAST+ binaries internally. 9 | categories: 10 | - Tool Dependency Packages 11 | type: tool_dependency_definition 12 | -------------------------------------------------------------------------------- /packages/package_blast_plus_2_2_31/tool_dependencies.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | https://depot.galaxyproject.org/software/blast_plus/blast_plus_2.2.31_darwin_all.tar.gz 10 | 11 | bin 12 | $INSTALL_DIR 13 | 14 | 15 | 16 | 17 | 18 | https://depot.galaxyproject.org/software/blast_plus/blast_plus_2.2.31_darwin_all.tar.gz 19 | 20 | bin 21 | $INSTALL_DIR 22 | 23 | 24 | 25 | 26 | 27 | https://depot.galaxyproject.org/software/blast_plus/blast_plus_2.2.31_linux_x64.tar.gz 28 | 29 | bin 30 | $INSTALL_DIR 31 | 32 | 33 | 34 | echo "ERROR: Automated installation on your operating system and CPU architecture combination is not yet supported." 35 | echo "Your machine details (the output from 'uname' and 'arch'):" 36 | uname 37 | arch 38 | echo "Please report this via https://github.com/peterjc/galaxy_blast/issues - thank you!" 39 | false 40 | 41 | 42 | 43 | 44 | $INSTALL_DIR 45 | $INSTALL_DIR 46 | 47 | 48 | 49 | 50 | Downloads the precompiled 64 bit Linux, or Mac OS X BLAST+ binaries from the NCBI, 51 | which is faster than performing a local compilation, avoids any issues with build 52 | dependencies, and is more reproducible between installations as there is no 53 | variability from the compiler or library versions. 54 | 55 | Note that as of BLAST+ 2.2.31 the NCBI do not provide 32 bit Linux binaries. 56 | 57 | For more details, see: 58 | http://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Web&PAGE_TYPE=BlastDocs&DOC_TYPE=Download 59 | 60 | 61 | 62 | -------------------------------------------------------------------------------- /packages/package_blast_plus_2_3_0/.shed.yml: -------------------------------------------------------------------------------- 1 | name: package_blast_plus_2_3_0 2 | owner: iuc 3 | homepage_url: https://blast.ncbi.nlm.nih.gov/ 4 | remote_repository_url: https://github.com/peterjc/galaxy_blast/tree/master/packages/package_blast_plus_2_3_0/ 5 | description: NCBI BLAST+ 2.3.0 (binaries only) 6 | long_description: | 7 | This Tool Shed package is intended to be used as a dependency of the Galaxy wrappers 8 | for NCBI BLAST+ and any other tools which call the BLAST+ binaries internally. 9 | categories: 10 | - Tool Dependency Packages 11 | type: tool_dependency_definition 12 | -------------------------------------------------------------------------------- /packages/package_blast_plus_2_3_0/tool_dependencies.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | https://depot.galaxyproject.org/software/blast_plus/blast_plus_2.3.0_darwin_all.tar.gz 10 | 11 | bin 12 | $INSTALL_DIR 13 | 14 | 15 | 16 | 17 | 18 | https://depot.galaxyproject.org/software/blast_plus/blast_plus_2.3.0_darwin_all.tar.gz 19 | 20 | bin 21 | $INSTALL_DIR 22 | 23 | 24 | 25 | 26 | 27 | https://depot.galaxyproject.org/software/blast_plus/blast_plus_2.3.0_linux_x64.tar.gz 28 | 29 | bin 30 | $INSTALL_DIR 31 | 32 | 33 | 34 | echo "ERROR: Automated installation on your operating system and CPU architecture combination is not yet supported." 35 | echo "Your machine details (the output from 'uname' and 'arch'):" 36 | uname 37 | arch 38 | echo "Please report this via https://github.com/peterjc/galaxy_blast/issues - thank you!" 39 | false 40 | 41 | 42 | 43 | 44 | $INSTALL_DIR 45 | $INSTALL_DIR 46 | 47 | 48 | 49 | 50 | Downloads the precompiled 64 bit Linux, or Mac OS X BLAST+ binaries from the NCBI, 51 | which is faster than performing a local compilation, avoids any issues with build 52 | dependencies, and is more reproducible between installations as there is no 53 | variability from the compiler or library versions. 54 | 55 | Note that NCBI do not provide 32 bit Linux binaries anymore. 56 | 57 | For more details, see: 58 | http://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Web&PAGE_TYPE=BlastDocs&DOC_TYPE=Download 59 | 60 | 61 | 62 | -------------------------------------------------------------------------------- /packages/package_blast_plus_2_4_0/.shed.yml: -------------------------------------------------------------------------------- 1 | name: package_blast_plus_2_4_0 2 | owner: iuc 3 | homepage_url: https://blast.ncbi.nlm.nih.gov/ 4 | remote_repository_url: https://github.com/peterjc/galaxy_blast/tree/master/packages/package_blast_plus_2_4_0/ 5 | description: NCBI BLAST+ 2.4.0 (binaries only) 6 | long_description: | 7 | This Tool Shed package is intended to be used as a dependency of the Galaxy wrappers 8 | for NCBI BLAST+ and any other tools which call the BLAST+ binaries internally. 9 | categories: 10 | - Tool Dependency Packages 11 | type: tool_dependency_definition 12 | -------------------------------------------------------------------------------- /packages/package_blast_plus_2_4_0/tool_dependencies.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | https://depot.galaxyproject.org/software/blast_plus/blast_plus_2.4.0_darwin_all.tar.gz 10 | 11 | bin 12 | $INSTALL_DIR 13 | 14 | 15 | 16 | 17 | 18 | https://depot.galaxyproject.org/software/blast_plus/blast_plus_2.4.0_darwin_all.tar.gz 19 | 20 | bin 21 | $INSTALL_DIR 22 | 23 | 24 | 25 | 26 | 27 | https://depot.galaxyproject.org/software/blast_plus/blast_plus_2.4.0_linux_x64.tar.gz 28 | 29 | bin 30 | $INSTALL_DIR 31 | 32 | 33 | 34 | echo "ERROR: Automated installation on your operating system and CPU architecture combination is not yet supported." 35 | echo "Your machine details (the output from 'uname' and 'arch'):" 36 | uname 37 | arch 38 | echo "Please report this via https://github.com/peterjc/galaxy_blast/issues - thank you!" 39 | false 40 | 41 | 42 | 43 | 44 | $INSTALL_DIR 45 | $INSTALL_DIR 46 | 47 | 48 | 49 | 50 | Downloads the precompiled 64 bit Linux, or Mac OS X BLAST+ binaries from the NCBI, 51 | which is faster than performing a local compilation, avoids any issues with build 52 | dependencies, and is more reproducible between installations as there is no 53 | variability from the compiler or library versions. 54 | 55 | Note that NCBI do not provide 32 bit Linux binaries anymore. 56 | 57 | For more details, see: 58 | http://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Web&PAGE_TYPE=BlastDocs&DOC_TYPE=Download 59 | 60 | 61 | 62 | -------------------------------------------------------------------------------- /packages/package_blast_plus_2_5_0/.shed.yml: -------------------------------------------------------------------------------- 1 | name: package_blast_plus_2_5_0 2 | owner: iuc 3 | homepage_url: https://blast.ncbi.nlm.nih.gov/ 4 | remote_repository_url: https://github.com/peterjc/galaxy_blast/tree/master/packages/package_blast_plus_2_5_0/ 5 | description: NCBI BLAST+ 2.5.0 (binaries only) 6 | long_description: | 7 | This Tool Shed package is intended to be used as a dependency 8 | of the Galaxy wrappers for NCBI BLAST+ and any other tools which 9 | call the BLAST+ binaries internally. 10 | 11 | Note that for compatibility with BioConda, internally this is now 12 | called "blast" rather than "blast+" as in the older Galaxy BLAST+ 13 | packages. 14 | categories: 15 | - Tool Dependency Packages 16 | type: tool_dependency_definition 17 | -------------------------------------------------------------------------------- /packages/package_blast_plus_2_5_0/tool_dependencies.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | https://depot.galaxyproject.org/software/blast_plus/blast_plus_2.5.0_darwin_x64.tar.gz 10 | 11 | bin 12 | $INSTALL_DIR 13 | 14 | 15 | 16 | 17 | 18 | https://depot.galaxyproject.org/software/blast_plus/blast_plus_2.5.0_linux_x64.tar.gz 19 | 20 | bin 21 | $INSTALL_DIR 22 | 23 | 24 | 25 | echo "ERROR: Automated installation on your operating system and CPU architecture combination is not supported." 26 | echo "Your machine details (the output from 'uname' and 'arch'):" 27 | uname 28 | arch 29 | echo "Please report this via https://github.com/peterjc/galaxy_blast/issues - thank you!" 30 | false 31 | 32 | 33 | 34 | 35 | $INSTALL_DIR 36 | $INSTALL_DIR 37 | 38 | 39 | 40 | 41 | Downloads the precompiled 64 bit Linux, or Mac OS X BLAST+ binaries from the NCBI, 42 | which is faster than performing a local compilation, avoids any issues with build 43 | dependencies, and is more reproducible between installations as there is no 44 | variability from the compiler or library versions. 45 | 46 | Note that NCBI do not provide any 32 bit binaries anymore. 47 | 48 | For more details, see: 49 | http://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Web&PAGE_TYPE=BlastDocs&DOC_TYPE=Download 50 | 51 | 52 | 53 | -------------------------------------------------------------------------------- /packages/package_blast_plus_2_6_0/.shed.yml: -------------------------------------------------------------------------------- 1 | name: package_blast_plus_2_6_0 2 | owner: iuc 3 | homepage_url: https://blast.ncbi.nlm.nih.gov/ 4 | remote_repository_url: https://github.com/peterjc/galaxy_blast/tree/master/packages/package_blast_plus_2_6_0/ 5 | description: NCBI BLAST+ 2.6.0 (binaries only) 6 | long_description: | 7 | This Tool Shed package is intended to be used as a dependency 8 | of the Galaxy wrappers for NCBI BLAST+ and any other tools which 9 | call the BLAST+ binaries internally. 10 | 11 | Note that for compatibility with BioConda, internally this is now 12 | called "blast" rather than "blast+" as in the older Galaxy BLAST+ 13 | packages. 14 | categories: 15 | - Tool Dependency Packages 16 | type: tool_dependency_definition 17 | -------------------------------------------------------------------------------- /packages/package_blast_plus_2_6_0/tool_dependencies.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | https://depot.galaxyproject.org/software/blast_plus/blast_plus_2.6.0_darwin_x64.tar.gz 10 | 11 | bin 12 | $INSTALL_DIR 13 | 14 | 15 | 16 | 17 | 18 | https://depot.galaxyproject.org/software/blast_plus/blast_plus_2.6.0_linux_x64.tar.gz 19 | 20 | bin 21 | $INSTALL_DIR 22 | 23 | 24 | 25 | echo "ERROR: Automated installation on your operating system and CPU architecture combination is not supported." 26 | echo "Your machine details (the output from 'uname' and 'arch'):" 27 | uname 28 | arch 29 | echo "Please report this via https://github.com/peterjc/galaxy_blast/issues - thank you!" 30 | false 31 | 32 | 33 | 34 | 35 | $INSTALL_DIR 36 | $INSTALL_DIR 37 | 38 | 39 | 40 | 41 | Downloads the precompiled 64 bit Linux, or Mac OS X BLAST+ binaries from the NCBI, 42 | which is faster than performing a local compilation, avoids any issues with build 43 | dependencies, and is more reproducible between installations as there is no 44 | variability from the compiler or library versions. 45 | 46 | Note that NCBI do not provide any 32 bit binaries anymore. 47 | 48 | For more details, see: 49 | http://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Web&PAGE_TYPE=BlastDocs&DOC_TYPE=Download 50 | 51 | 52 | 53 | -------------------------------------------------------------------------------- /packages/package_blast_plus_2_7_0/.shed.yml: -------------------------------------------------------------------------------- 1 | name: package_blast_plus_2_7_0 2 | owner: iuc 3 | homepage_url: https://blast.ncbi.nlm.nih.gov/ 4 | remote_repository_url: https://github.com/peterjc/galaxy_blast/tree/master/packages/package_blast_plus_2_7_0/ 5 | description: NCBI BLAST+ 2.7.0 (binaries only; deprecated) 6 | long_description: | 7 | This Tool Shed package is intended to be used as a dependency 8 | of the Galaxy wrappers for NCBI BLAST+ and any other tools which 9 | call the BLAST+ binaries internally. 10 | 11 | Note that for compatibility with BioConda, internally this is now 12 | called "blast" rather than "blast+" as in the older Galaxy BLAST+ 13 | packages. 14 | 15 | Note the NCBI withdrew BLAST+ 2.7.0 in favor of 2.7.1 released 16 | shortly afterwards. 17 | categories: 18 | - Tool Dependency Packages 19 | type: tool_dependency_definition 20 | -------------------------------------------------------------------------------- /packages/package_blast_plus_2_7_0/tool_dependencies.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | https://depot.galaxyproject.org/software/blast_plus/blast_plus_2.7.0_darwin_x64.tar.gz 10 | 11 | bin 12 | $INSTALL_DIR 13 | 14 | 15 | 16 | 17 | 18 | https://depot.galaxyproject.org/software/blast_plus/blast_plus_2.7.0_linux_x64.tar.gz 19 | 20 | bin 21 | $INSTALL_DIR 22 | 23 | 24 | 25 | echo "ERROR: Automated installation on your operating system and CPU architecture combination is not supported." 26 | echo "Your machine details (the output from 'uname' and 'arch'):" 27 | uname 28 | arch 29 | echo "Please report this via https://github.com/peterjc/galaxy_blast/issues - thank you!" 30 | false 31 | 32 | 33 | 34 | 35 | $INSTALL_DIR 36 | $INSTALL_DIR 37 | 38 | 39 | 40 | 41 | Downloads the precompiled 64 bit Linux, or Mac OS X BLAST+ binaries from the NCBI, 42 | which is faster than performing a local compilation, avoids any issues with build 43 | dependencies, and is more reproducible between installations as there is no 44 | variability from the compiler or library versions. 45 | 46 | Note that NCBI do not provide any 32 bit binaries anymore. 47 | 48 | For more details, see: 49 | http://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Web&PAGE_TYPE=BlastDocs&DOC_TYPE=Download 50 | 51 | 52 | 53 | -------------------------------------------------------------------------------- /packages/package_blast_plus_2_7_1/.shed.yml: -------------------------------------------------------------------------------- 1 | name: package_blast_plus_2_7_1 2 | owner: iuc 3 | homepage_url: https://blast.ncbi.nlm.nih.gov/ 4 | remote_repository_url: https://github.com/peterjc/galaxy_blast/tree/master/packages/package_blast_plus_2_7_1/ 5 | description: NCBI BLAST+ 2.7.1 (binaries only) 6 | long_description: | 7 | This Tool Shed package is intended to be used as a dependency 8 | of the Galaxy wrappers for NCBI BLAST+ and any other tools which 9 | call the BLAST+ binaries internally. 10 | 11 | Note that for compatibility with BioConda, internally this is now 12 | called "blast" rather than "blast+" as in the older Galaxy BLAST+ 13 | packages. 14 | categories: 15 | - Tool Dependency Packages 16 | type: tool_dependency_definition 17 | -------------------------------------------------------------------------------- /packages/package_blast_plus_2_7_1/tool_dependencies.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | https://depot.galaxyproject.org/software/blast_plus/blast_plus_2.7.1_darwin_x64.tar.gz 10 | 11 | bin 12 | $INSTALL_DIR 13 | 14 | 15 | 16 | 17 | 18 | https://depot.galaxyproject.org/software/blast_plus/blast_plus_2.7.1_linux_x64.tar.gz 19 | 20 | bin 21 | $INSTALL_DIR 22 | 23 | 24 | 25 | echo "ERROR: Automated installation on your operating system and CPU architecture combination is not supported." 26 | echo "Your machine details (the output from 'uname' and 'arch'):" 27 | uname 28 | arch 29 | echo "Please report this via https://github.com/peterjc/galaxy_blast/issues - thank you!" 30 | false 31 | 32 | 33 | 34 | 35 | $INSTALL_DIR 36 | $INSTALL_DIR 37 | 38 | 39 | 40 | 41 | Downloads the precompiled 64 bit Linux, or Mac OS X BLAST+ binaries from the NCBI, 42 | which is faster than performing a local compilation, avoids any issues with build 43 | dependencies, and is more reproducible between installations as there is no 44 | variability from the compiler or library versions. 45 | 46 | Note that NCBI do not provide any 32 bit binaries anymore. 47 | 48 | For more details, see: 49 | http://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Web&PAGE_TYPE=BlastDocs&DOC_TYPE=Download 50 | 51 | 52 | 53 | -------------------------------------------------------------------------------- /test-data/README.rst: -------------------------------------------------------------------------------- 1 | Galaxy Tool test data 2 | ===================== 3 | 4 | This folder contains sample files used in the functional tests of the 5 | Galaxy tools defined elsewhere in this repository. 6 | 7 | For general information, see the `main README file <../README.rst>`_. 8 | -------------------------------------------------------------------------------- /test-data/all_fasta.loc: -------------------------------------------------------------------------------- 1 | # 2 | # 3 | three_human_mRNA thmRNA Three-Human-mRANs ${__HERE__}/three_human_mRNA.fasta 4 | -------------------------------------------------------------------------------- /test-data/blastdb.loc: -------------------------------------------------------------------------------- 1 | # This is a test file distributed with the Galaxy BLAST+ wrapper for 2 | # defining a list of nucleotide BLAST databases used in functional 3 | # tests for blastn etc. 4 | # 5 | # See the file tool-data/blastdb.loc.sample for more information. 6 | # 7 | three_human_mRNA Three Human mRNAs ${__HERE__}/three_human_mRNA.fasta 8 | rhodopsin_nucs Rhodopsin nucleotides ${__HERE__}/rhodopsin_nucs.fasta 9 | -------------------------------------------------------------------------------- /test-data/blastdb_d.loc: -------------------------------------------------------------------------------- 1 | # This is a test file distributed with the Galaxy BLAST+ wrapper for 2 | # defining a list of protein domain BLAST databases used in functional 3 | # tests of rpsblast etc. 4 | # 5 | # See the file tool-data/blastdb_d.loc.sample for more information. 6 | # 7 | cd00003_and_cd00008 Domains CD00003 (PNPsynthase) and CD00008 (PIN_53EXO-like) ${__HERE__}/cd00003_and_cd00008 8 | -------------------------------------------------------------------------------- /test-data/blastdb_p.loc: -------------------------------------------------------------------------------- 1 | # This is a test file distributed with the Galaxy BLAST+ wrapper for 2 | # defining a list of protein BLAST databases used in functional tests 3 | # for blastp etc. 4 | # 5 | # See the file tool-data/blastdb_p.loc.sample for more information. 6 | # 7 | four_human_proteins Four Human Proteins (no taxid) ${__HERE__}/four_human_proteins.fasta 8 | four_human_proteins_taxid Four Human Proteins (with taxid) ${__HERE__}/four_human_proteins_taxid.fasta 9 | -------------------------------------------------------------------------------- /test-data/blastn_arabidopsis.extended.tabular: -------------------------------------------------------------------------------- 1 | chunk_of_plant chrIII 100.000 630 0 0 1 630 4341 4970 0.0 1164 chrIII 630 630 630 0 100.00 1 1 GATGTTCAATACTGTTTCCAACAAAAAGATTGTTGTCCTCGAGTTCGCCTTCAAGAAAGACACGAGAGAGACTCCAGCCATTGACGTCTGCAAAGGTTTGTTAGGAGACAAGGCCCGAATCAGCATCTATGATCCACAAGTCACGGAAGAACAAATCCAAAGAGACTTAACCATGAACACATTCGACTGGGACCATCCACTTCACCTCCAACCCATGAGTCCAACCACTGTGAAACAAGTCTCAGTTGCTTGGGACGCTTACGCTGCCACCAAAGACGCCCACGGAATCTGCTTGTTAACCGAGTGGGACGAGTATAAGACGCTTGACTATGAGCGGATTTTTGAAAACATGCAGAAACCAGCGTTTGTCTTCGATGGCAGAAATGTTTTTGATGCAGAGAAGCTGAGGAAGATAGGGTTTATTGTTTACTCTATTGGTAAGCCGTTGGACCAGTGGCACATGCCTGCTCTTGCTTAGCTCAGACTCTTTGCCCTTTCTCAAGATTTGGATTGTTTTTCTCTCTGTTGCTTATATCAAATAATTTGTTCTGTTTCTTCTTGACGAGATATTTTCCTATACTTATTATGTTGGTTAGAACAAGAGACTAGGTTTGGTTATTATTGCTAACT GATGTTCAATACTGTTTCCAACAAAAAGATTGTTGTCCTCGAGTTCGCCTTCAAGAAAGACACGAGAGAGACTCCAGCCATTGACGTCTGCAAAGGTTTGTTAGGAGACAAGGCCCGAATCAGCATCTATGATCCACAAGTCACGGAAGAACAAATCCAAAGAGACTTAACCATGAACACATTCGACTGGGACCATCCACTTCACCTCCAACCCATGAGTCCAACCACTGTGAAACAAGTCTCAGTTGCTTGGGACGCTTACGCTGCCACCAAAGACGCCCACGGAATCTGCTTGTTAACCGAGTGGGACGAGTATAAGACGCTTGACTATGAGCGGATTTTTGAAAACATGCAGAAACCAGCGTTTGTCTTCGATGGCAGAAATGTTTTTGATGCAGAGAAGCTGAGGAAGATAGGGTTTATTGTTTACTCTATTGGTAAGCCGTTGGACCAGTGGCACATGCCTGCTCTTGCTTAGCTCAGACTCTTTGCCCTTTCTCAAGATTTGGATTGTTTTTCTCTCTGTTGCTTATATCAAATAATTTGTTCTGTTTCTTCTTGACGAGATATTTTCCTATACTTATTATGTTGGTTAGAACAAGAGACTAGGTTTGGTTATTATTGCTAACT 630 23459830 gi|240255695|ref|NC_003074.8| Arabidopsis thaliana chromosome 3, complete sequence 2 | -------------------------------------------------------------------------------- /test-data/blastn_arabidopsis.standard.tabular: -------------------------------------------------------------------------------- 1 | chunk_of_plant chrIII 100.000 630 0 0 1 630 4341 4970 0.0 1164 2 | -------------------------------------------------------------------------------- /test-data/blastn_chimera_vs_rhodopsin_db.tabular: -------------------------------------------------------------------------------- 1 | chimera NM_001009242.1 92.308 1014 78 0 8881 9894 34 1047 0.0 1441 2 | chimera GQ290312.1 91.527 956 81 0 8881 9836 4 959 0.0 1317 3 | chimera AB062417.1 87.586 1015 124 2 8881 9894 34 1047 0.0 1175 4 | chimera GQ290303.1 91.515 330 28 0 8881 9210 4 333 8.28e-130 455 5 | chimera GQ290303.1 91.358 243 19 2 9542 9783 3127 3368 1.46e-92 331 6 | chimera GQ290303.1 94.220 173 10 0 9208 9380 1410 1582 1.50e-72 265 7 | chimera GQ290303.1 92.941 170 12 0 9375 9544 2854 3023 1.51e-67 248 8 | chimera GQ290303.1 95.588 68 3 0 9781 9848 4222 4289 7.43e-26 110 9 | -------------------------------------------------------------------------------- /test-data/blastn_chimera_vs_rhodopsin_db_max_hsps1.tabular: -------------------------------------------------------------------------------- 1 | chimera NM_001009242.1 92.308 1014 78 0 8881 9894 34 1047 0.0 1441 2 | chimera GQ290312.1 91.527 956 81 0 8881 9836 4 959 0.0 1317 3 | chimera AB062417.1 87.586 1015 124 2 8881 9894 34 1047 0.0 1175 4 | chimera GQ290303.1 91.515 330 28 0 8881 9210 4 333 8.28e-130 455 5 | -------------------------------------------------------------------------------- /test-data/blastn_chimera_vs_three_human_and_rhodopsin_db.tabular: -------------------------------------------------------------------------------- 1 | chimera ENA|AB011145|AB011145.1 100.000 4560 0 0 1 4560 121 4680 0.0 8421 2 | chimera ENA|M10051|M10051.1 99.931 4331 3 0 4560 8890 60 4390 0.0 7982 3 | chimera ENA|BC112106|BC112106.1 100.000 1093 0 0 8881 9973 121 1213 0.0 2019 4 | chimera NM_001009242.1 92.308 1014 78 0 8881 9894 34 1047 0.0 1441 5 | chimera GQ290312.1 91.527 956 81 0 8881 9836 4 959 0.0 1317 6 | chimera AB062417.1 87.586 1015 124 2 8881 9894 34 1047 0.0 1175 7 | chimera GQ290303.1 91.515 330 28 0 8881 9210 4 333 1.70e-129 455 8 | chimera GQ290303.1 91.358 243 19 2 9542 9783 3127 3368 2.98e-92 331 9 | chimera GQ290303.1 94.220 173 10 0 9208 9380 1410 1582 3.07e-72 265 10 | chimera GQ290303.1 92.941 170 12 0 9375 9544 2854 3023 3.09e-67 248 11 | chimera GQ290303.1 95.588 68 3 0 9781 9848 4222 4289 1.52e-25 110 12 | -------------------------------------------------------------------------------- /test-data/blastn_chimera_vs_three_human_db.tabular: -------------------------------------------------------------------------------- 1 | chimera ENA|AB011145|AB011145.1 100.000 4560 0 0 1 4560 121 4680 0.0 8421 2 | chimera ENA|M10051|M10051.1 99.931 4331 3 0 4560 8890 60 4390 0.0 7982 3 | chimera ENA|BC112106|BC112106.1 100.000 1093 0 0 8881 9973 121 1213 0.0 2019 4 | -------------------------------------------------------------------------------- /test-data/blastn_chimera_vs_three_human_max1.tabular: -------------------------------------------------------------------------------- 1 | chimera ENA|AB011145|AB011145.1 100.000 4560 0 0 1 4560 121 4680 0.0 8421 2 | -------------------------------------------------------------------------------- /test-data/blastn_rhodopsin_vs_three_human.columns.tabular: -------------------------------------------------------------------------------- 1 | gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 92.073 1047 1213 2 | gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 91.592 4301 1213 3 | gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 91.358 4301 1213 4 | gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 94.220 4301 1213 5 | gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 92.941 4301 1213 6 | gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 91.554 983 1213 7 | gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 87.500 1047 1213 8 | -------------------------------------------------------------------------------- /test-data/blastn_rhodopsin_vs_three_human.tabular: -------------------------------------------------------------------------------- 1 | gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 92.073 1047 83 0 1 1047 88 1134 0.0 1474 2 | gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 91.592 333 28 0 1 333 118 450 8.03e-132 460 3 | gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 91.358 243 19 2 3127 3368 782 1023 6.57e-93 331 4 | gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 94.220 173 10 0 1410 1582 448 620 6.76e-73 265 5 | gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 92.941 170 12 0 2854 3023 615 784 6.81e-68 248 6 | gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 91.554 959 81 0 1 959 118 1076 0.0 1323 7 | gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 87.500 1048 129 2 1 1047 88 1134 0.0 1208 8 | -------------------------------------------------------------------------------- /test-data/blastn_rhodopsin_vs_three_human_converted.tabular: -------------------------------------------------------------------------------- 1 | gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 92.073 1047 83 0 1 1047 88 1134 0.0 1474 2 | gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 91.592 333 28 0 1 333 118 450 8e-132 460 3 | gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 91.358 243 19 2 3127 3368 782 1023 7e-93 331 4 | gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 94.220 173 10 0 1410 1582 448 620 7e-73 265 5 | gi|283855845|gb|GQ290303.1| ENA|BC112106|BC112106.1 92.941 170 12 0 2854 3023 615 784 7e-68 248 6 | gi|283855822|gb|GQ290312.1| ENA|BC112106|BC112106.1 91.554 959 81 0 1 959 118 1076 0.0 1323 7 | gi|18148870|dbj|AB062417.1| ENA|BC112106|BC112106.1 87.500 1048 129 2 1 1047 88 1134 0.0 1208 8 | -------------------------------------------------------------------------------- /test-data/blastp_four_human_vs_rhodopsin.tabular: -------------------------------------------------------------------------------- 1 | P08100 NP_001009242.1 96.552 348 12 0 1 348 1 348 0.0 701 2 | P08100 0811197A 93.103 348 23 1 1 348 1 347 0.0 673 3 | P08100 ADB45242.1 94.817 328 17 0 11 338 1 328 0.0 653 4 | P08100 ADB45229.1 94.817 328 17 0 11 338 1 328 0.0 631 5 | P08100 P56514.1 84.795 342 51 1 1 341 1 342 0.0 619 6 | P08100 BAB21486.1 82.164 342 60 1 1 341 1 342 0.0 599 7 | -------------------------------------------------------------------------------- /test-data/blastp_four_human_vs_rhodopsin_converted.tabular: -------------------------------------------------------------------------------- 1 | P08100 gi|57163783|ref|NP_001009242.1| 96.552 348 12 0 1 348 1 348 0.0 701 2 | P08100 gi|223523|prf||0811197A 93.103 348 23 1 1 348 1 347 0.0 673 3 | P08100 gi|283855846|gb|ADB45242.1| 94.817 328 17 0 11 338 1 328 0.0 653 4 | P08100 gi|283855823|gb|ADB45229.1| 94.817 328 17 0 11 338 1 328 0.0 631 5 | P08100 gi|3024260|sp|P56514.1|OPSD_BUFBU 84.795 342 51 1 1 341 1 342 0.0 619 6 | P08100 gi|12583665|dbj|BAB21486.1| 82.164 342 60 1 1 341 1 342 0.0 599 7 | -------------------------------------------------------------------------------- /test-data/blastp_four_human_vs_rhodopsin_ext.tabular: -------------------------------------------------------------------------------- 1 | P08100 NP_001009242.1 96.552 348 12 0 1 348 1 348 0.0 701 gi|57163783|ref|NP_001009242.1| 1808 336 343 0 98.56 1 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA MNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA 348 348 N/A 2 | P08100 0811197A 93.103 348 23 1 1 348 1 347 0.0 673 gi|223523|prf||0811197A 1736 324 336 1 96.55 1 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGID-YTPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA 348 347 N/A 3 | P08100 ADB45242.1 94.817 328 17 0 11 338 1 328 0.0 653 gi|283855846|gb|ADB45242.1| 1684 311 321 0 97.87 1 1 VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVS VPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS 348 328 N/A 4 | P08100 ADB45229.1 94.817 328 17 0 11 338 1 328 0.0 631 gi|283855823|gb|ADB45229.1| 1627 311 323 0 98.48 1 1 VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVS VPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS 348 328 N/A 5 | P08100 P56514.1 84.795 342 51 1 1 341 1 342 0.0 619 gi|3024260|sp|P56514.1|OPSD_BUFBU 1595 290 322 1 94.15 1 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEA-SATVSKTE MNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVKEAAAQQQESATTQKAEKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDDASSAATSKTE 348 354 N/A 6 | P08100 BAB21486.1 82.164 342 60 1 1 341 1 342 0.0 599 gi|12583665|dbj|BAB21486.1| 1544 281 314 1 91.81 1 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPL-GDDEASATVSKTE MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDGASATSSKTE 348 354 N/A 7 | -------------------------------------------------------------------------------- /test-data/blastp_four_human_vs_rhodopsin_top3.tabular: -------------------------------------------------------------------------------- 1 | #Query BLAST hit 1 BLAST hit 2 BLAST hit 3 2 | Q9BS26 3 | Q9NSY1 4 | P06213 5 | P08100 gi|57163783|ref|NP_001009242.1| rhodopsin [Felis catus] gi|223523|prf||0811197A rhodopsin [Bos taurus] gi|283855846|gb|ADB45242.1| rhodopsin [Cynopterus brachyotis] 6 | -------------------------------------------------------------------------------- /test-data/blastp_four_human_vs_rhodopsin_top3_positive.tabular: -------------------------------------------------------------------------------- 1 | #Query BLAST hit 1 BLAST hit 2 BLAST hit 3 2 | P08100 gi|57163783|ref|NP_001009242.1| rhodopsin [Felis catus] gi|223523|prf||0811197A rhodopsin [Bos taurus] gi|283855846|gb|ADB45242.1| rhodopsin [Cynopterus brachyotis] 3 | -------------------------------------------------------------------------------- /test-data/blastp_human_vs_pdb_seg_no_converted_std.tabular: -------------------------------------------------------------------------------- 1 | sp|Q9BS26|ERP44_HUMAN gi|193885198|pdb|2R2J|A 97.113 381 11 0 26 406 2 382 0.0 768 2 | sp|Q9BS26|ERP44_HUMAN gi|88192228|pdb|2B5E|A 25.172 290 193 8 25 306 10 283 4e-20 95.1 3 | sp|Q9NSY1|BMP2K_HUMAN gi|73536291|pdb|2BUJ|A 29.391 279 182 8 40 308 21 294 1e-22 105 4 | sp|Q9NSY1|BMP2K_HUMAN gi|270346335|pdb|2WQM|A 27.206 272 166 12 53 311 36 288 6e-17 86.3 5 | sp|P06213|INSR_HUMAN gi|116667097|pdb|2DTG|E 95.905 928 7 2 28 955 1 897 0.0 1846 6 | sp|P06213|INSR_HUMAN gi|114794482|pdb|2HR7|A 99.588 485 2 0 28 512 1 485 0.0 1016 7 | sp|P08100|OPSD_HUMAN gi|16975387|pdb|1JFP|A 93.391 348 23 0 1 348 1 348 0.0 681 8 | sp|P08100|OPSD_HUMAN gi|195927458|pdb|3C9M|A 93.103 348 24 0 1 348 1 348 0.0 674 9 | -------------------------------------------------------------------------------- /test-data/blastp_rhodopsin_adv_vs_four_human.tabular: -------------------------------------------------------------------------------- 1 | gi|57163783|ref|NP_001009242.1| sp|P08100|OPSD_HUMAN 96.552 348 12 0 1 348 1 348 0.0 701 2 | gi|3024260|sp|P56514.1|OPSD_BUFBU sp|P08100|OPSD_HUMAN 83.333 354 53 2 1 354 1 348 0.0 605 3 | gi|283855846|gb|ADB45242.1| sp|P08100|OPSD_HUMAN 94.817 328 17 0 1 328 11 338 0.0 630 4 | gi|283855823|gb|ADB45229.1| sp|P08100|OPSD_HUMAN 94.817 328 17 0 1 328 11 338 0.0 630 5 | gi|223523|prf||0811197A sp|P08100|OPSD_HUMAN 93.103 348 23 1 1 347 1 348 0.0 651 6 | gi|12583665|dbj|BAB21486.1| sp|P08100|OPSD_HUMAN 81.089 349 65 1 1 349 1 348 0.0 587 7 | -------------------------------------------------------------------------------- /test-data/blastp_rhodopsin_peptides_vs_four_human.tabular: -------------------------------------------------------------------------------- 1 | gi|57163783|ref|NP_001009242.1| sp|P08100|OPSD_HUMAN 100.000 16 0 0 1 16 95 110 4.51e-14 52.4 2 | gi|57163783|ref|NP_001009242.1| sp|Q9NSY1|BMP2K_HUMAN 83.333 6 1 0 4 9 1107 1112 0.27 16.3 3 | gi|57163783|ref|NP_001009242.1| sp|Q9NSY1|BMP2K_HUMAN 100.000 2 0 0 11 12 266 267 53 10.0 4 | gi|57163783|ref|NP_001009242.1| sp|Q9BS26|ERP44_HUMAN 27.778 18 0 1 9 13 347 364 25 10.8 5 | gi|57163783|ref|NP_001009242.1| sp|P06213|INSR_HUMAN 100.000 2 0 0 14 15 816 817 95 9.1 6 | gi|3024260|sp|P56514.1|OPSD_BUFBU sp|P08100|OPSD_HUMAN 67.857 28 8 1 1 28 319 345 1.31e-13 54.1 7 | gi|3024260|sp|P56514.1|OPSD_BUFBU sp|Q9NSY1|BMP2K_HUMAN 85.714 7 1 0 8 14 625 631 0.004 23.5 8 | gi|3024260|sp|P56514.1|OPSD_BUFBU sp|Q9NSY1|BMP2K_HUMAN 100.000 4 0 0 9 12 265 268 1.0 16.3 9 | gi|3024260|sp|P56514.1|OPSD_BUFBU sp|Q9NSY1|BMP2K_HUMAN 50.000 10 5 0 15 24 343 352 7.0 13.4 10 | gi|3024260|sp|P56514.1|OPSD_BUFBU sp|Q9NSY1|BMP2K_HUMAN 40.000 20 6 2 15 31 376 392 38 11.2 11 | gi|3024260|sp|P56514.1|OPSD_BUFBU sp|Q9NSY1|BMP2K_HUMAN 57.143 7 1 1 20 24 942 948 74 10.4 12 | gi|3024260|sp|P56514.1|OPSD_BUFBU sp|Q9NSY1|BMP2K_HUMAN 100.000 2 0 0 25 26 343 344 315 8.3 13 | gi|3024260|sp|P56514.1|OPSD_BUFBU sp|P06213|INSR_HUMAN 75.000 8 0 1 27 32 630 637 1.0 16.3 14 | gi|3024260|sp|P56514.1|OPSD_BUFBU sp|P06213|INSR_HUMAN 47.059 17 3 2 18 31 778 791 8.4 13.4 15 | gi|3024260|sp|P56514.1|OPSD_BUFBU sp|P06213|INSR_HUMAN 66.667 6 0 1 22 27 745 748 101 10.0 16 | gi|283855846|gb|ADB45242.1| sp|P08100|OPSD_HUMAN 93.333 15 1 0 1 15 11 25 4.81e-11 43.9 17 | gi|283855846|gb|ADB45242.1| sp|Q9NSY1|BMP2K_HUMAN 62.500 8 3 0 1 8 957 964 4.2 12.9 18 | gi|283855846|gb|ADB45242.1| sp|Q9NSY1|BMP2K_HUMAN 37.500 16 10 0 1 16 681 696 35 10.4 19 | gi|283855846|gb|ADB45242.1| sp|Q9NSY1|BMP2K_HUMAN 66.667 3 1 0 13 15 958 960 95 9.1 20 | gi|283855846|gb|ADB45242.1| sp|P06213|INSR_HUMAN 45.455 22 6 2 1 16 774 795 12 11.7 21 | gi|283855846|gb|ADB45242.1| sp|P06213|INSR_HUMAN 100.000 2 0 0 15 16 1357 1358 38 10.4 22 | gi|283855846|gb|ADB45242.1| sp|P06213|INSR_HUMAN 100.000 2 0 0 9 10 558 559 85 9.1 23 | gi|283855823|gb|ADB45229.1| sp|P08100|OPSD_HUMAN 97.778 45 1 0 1 45 11 55 3.28e-27 95.6 24 | gi|283855823|gb|ADB45229.1| sp|P08100|OPSD_HUMAN 100.000 2 0 0 40 41 328 329 1001 7.4 25 | gi|283855823|gb|ADB45229.1| sp|P06213|INSR_HUMAN 31.818 22 13 1 20 41 1011 1030 4.5 14.6 26 | gi|283855823|gb|ADB45229.1| sp|P06213|INSR_HUMAN 46.667 15 8 0 25 39 1273 1287 5.1 14.6 27 | gi|283855823|gb|ADB45229.1| sp|P06213|INSR_HUMAN 40.000 15 9 0 8 22 142 156 30 12.1 28 | gi|283855823|gb|ADB45229.1| sp|P06213|INSR_HUMAN 23.529 17 13 0 2 18 688 704 52 11.2 29 | gi|283855823|gb|ADB45229.1| sp|P06213|INSR_HUMAN 100.000 4 0 0 37 40 880 883 68 10.8 30 | gi|283855823|gb|ADB45229.1| sp|P06213|INSR_HUMAN 57.143 7 3 0 4 10 553 559 722 7.9 31 | gi|283855823|gb|ADB45229.1| sp|P06213|INSR_HUMAN 100.000 2 0 0 22 23 752 753 1254 7.0 32 | gi|283855823|gb|ADB45229.1| sp|Q9NSY1|BMP2K_HUMAN 62.500 8 3 0 1 8 957 964 7.9 13.8 33 | gi|283855823|gb|ADB45229.1| sp|Q9NSY1|BMP2K_HUMAN 55.556 9 4 0 20 28 564 572 17 12.9 34 | gi|283855823|gb|ADB45229.1| sp|Q9NSY1|BMP2K_HUMAN 50.000 8 2 1 8 15 955 960 51 11.2 35 | gi|283855823|gb|ADB45229.1| sp|Q9NSY1|BMP2K_HUMAN 100.000 2 0 0 23 24 152 153 171 9.5 36 | gi|283855823|gb|ADB45229.1| sp|Q9NSY1|BMP2K_HUMAN 100.000 2 0 0 31 32 347 348 517 8.3 37 | gi|283855823|gb|ADB45229.1| sp|Q9BS26|ERP44_HUMAN 100.000 2 0 0 44 45 294 295 152 10.0 38 | gi|283855823|gb|ADB45229.1| sp|Q9BS26|ERP44_HUMAN 100.000 2 0 0 21 22 390 391 448 8.3 39 | gi|223523|prf||0811197A sp|P08100|OPSD_HUMAN 93.333 30 2 0 1 30 1 30 5.63e-16 60.4 40 | gi|223523|prf||0811197A sp|P06213|INSR_HUMAN 61.538 13 1 3 2 11 933 944 1.6 15.5 41 | gi|223523|prf||0811197A sp|P06213|INSR_HUMAN 57.143 7 3 0 10 16 304 310 77 10.4 42 | gi|223523|prf||0811197A sp|P06213|INSR_HUMAN 100.000 2 0 0 19 20 558 559 172 9.1 43 | gi|223523|prf||0811197A sp|P06213|INSR_HUMAN 100.000 2 0 0 14 15 553 554 173 9.1 44 | gi|223523|prf||0811197A sp|P06213|INSR_HUMAN 100.000 2 0 0 16 17 487 488 193 9.1 45 | gi|223523|prf||0811197A sp|Q9NSY1|BMP2K_HUMAN 55.556 9 4 0 10 18 956 964 9.4 12.9 46 | gi|223523|prf||0811197A sp|Q9NSY1|BMP2K_HUMAN 50.000 4 2 0 23 26 958 961 191 9.1 47 | gi|223523|prf||0811197A sp|Q9BS26|ERP44_HUMAN 100.000 2 0 0 12 13 262 263 73 10.4 48 | gi|12583665|dbj|BAB21486.1| sp|P08100|OPSD_HUMAN 100.000 8 0 0 1 8 1 8 2.14e-06 28.6 49 | -------------------------------------------------------------------------------- /test-data/blastp_rhodopsin_vs_four_human.tabular: -------------------------------------------------------------------------------- 1 | gi|57163783|ref|NP_001009242.1| sp|P08100|OPSD_HUMAN 96.552 348 12 0 1 348 1 348 0.0 701 2 | gi|3024260|sp|P56514.1|OPSD_BUFBU sp|P08100|OPSD_HUMAN 83.333 354 53 2 1 354 1 348 0.0 605 3 | gi|283855846|gb|ADB45242.1| sp|P08100|OPSD_HUMAN 94.817 328 17 0 1 328 11 338 0.0 630 4 | gi|283855823|gb|ADB45229.1| sp|P08100|OPSD_HUMAN 94.817 328 17 0 1 328 11 338 0.0 630 5 | gi|223523|prf||0811197A sp|P08100|OPSD_HUMAN 93.103 348 23 1 1 347 1 348 0.0 651 6 | gi|12583665|dbj|BAB21486.1| sp|P08100|OPSD_HUMAN 81.089 349 65 1 1 349 1 348 0.0 587 7 | -------------------------------------------------------------------------------- /test-data/blastp_rhodopsin_vs_four_human_db.taxid.tabular: -------------------------------------------------------------------------------- 1 | gi|57163783|ref|NP_001009242.1| sp|P08100|OPSD_HUMAN 96.55 9606 Homo sapiens human Eukaryota primates 2 | gi|3024260|sp|P56514.1|OPSD_BUFBU sp|P08100|OPSD_HUMAN 83.33 9606 Homo sapiens human Eukaryota primates 3 | gi|283855846|gb|ADB45242.1| sp|P08100|OPSD_HUMAN 94.82 9606 Homo sapiens human Eukaryota primates 4 | gi|283855823|gb|ADB45229.1| sp|P08100|OPSD_HUMAN 94.82 9606 Homo sapiens human Eukaryota primates 5 | gi|223523|prf||0811197A sp|P08100|OPSD_HUMAN 93.10 9606 Homo sapiens human Eukaryota primates 6 | gi|12583665|dbj|BAB21486.1| sp|P08100|OPSD_HUMAN 81.09 9606 Homo sapiens human Eukaryota primates 7 | -------------------------------------------------------------------------------- /test-data/blastp_sample.blast2go.tabular: -------------------------------------------------------------------------------- 1 | Sample GO:0005488 tail tape measure protein 2 | -------------------------------------------------------------------------------- /test-data/blastp_sample_converted.tabular: -------------------------------------------------------------------------------- 1 | Sample gi|119953746|ref|YP_950551.1| 96.899 516 16 0 1 516 27 542 0.0 949 2 | Sample gi|148986157|ref|ZP_01819143.1| 41.270 252 115 3 49 300 679 897 2e-41 174 3 | Sample gi|77411259|ref|ZP_00787609.1| 40.996 261 143 2 50 310 655 904 8e-39 165 4 | Sample gi|76786754|ref|YP_329383.1| 39.464 261 147 2 50 310 655 904 7e-37 159 5 | Sample gi|153811333|ref|ZP_01964001.1| 29.982 557 277 18 3 516 573 1059 2e-36 157 6 | Sample gi|56962696|ref|YP_174422.1| 28.792 389 228 8 48 433 123 465 3e-33 146 7 | Sample gi|50914476|ref|YP_060448.1| 43.820 178 100 0 50 227 655 832 5e-33 146 8 | Sample gi|29374987|ref|NP_814140.1| 25.463 432 244 8 73 482 545 920 7e-31 139 9 | Sample gi|163941333|ref|YP_001646217.1| 27.189 434 287 7 61 480 142 560 8e-31 138 10 | -------------------------------------------------------------------------------- /test-data/blastx_rhodopsin_adv_vs_four_human.tabular: -------------------------------------------------------------------------------- 1 | gi|57163782|ref|NM_001009242.1| sp|P08100|OPSD_HUMAN 96.552 348 12 0 1 1044 1 348 0.0 639 2 | gi|2734705|gb|U59921.1|BBU59921 sp|P08100|OPSD_HUMAN 85.241 332 49 0 42 1037 1 332 0.0 551 3 | gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 96.396 111 4 0 1 333 11 121 3.78e-67 220 4 | gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 92.308 65 5 0 3174 3368 248 312 4.13e-35 127 5 | gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 91.525 59 5 0 2855 3031 177 235 3.99e-33 121 6 | gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 93.220 59 4 0 1404 1580 119 177 7.46e-25 97.1 7 | gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 88.462 26 3 0 4222 4299 312 337 1.13e-11 57.0 8 | gi|283855822|gb|GQ290312.1| sp|P08100|OPSD_HUMAN 95.092 326 16 0 1 978 11 336 0.0 589 9 | gi|18148870|dbj|AB062417.1| sp|P08100|OPSD_HUMAN 93.391 348 23 0 1 1044 1 348 0.0 619 10 | gi|12583664|dbj|AB043817.1| sp|P08100|OPSD_HUMAN 81.682 333 61 0 23 1021 1 333 0.0 532 11 | -------------------------------------------------------------------------------- /test-data/blastx_rhodopsin_vs_four_human.tabular: -------------------------------------------------------------------------------- 1 | gi|57163782|ref|NM_001009242.1| sp|P08100|OPSD_HUMAN 96.552 348 12 0 1 1044 1 348 0.0 639 2 | gi|2734705|gb|U59921.1|BBU59921 sp|P08100|OPSD_HUMAN 85.241 332 49 0 42 1037 1 332 0.0 551 3 | gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 96.396 111 4 0 1 333 11 121 3.78e-67 220 4 | gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 92.308 65 5 0 3174 3368 248 312 4.13e-35 127 5 | gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 91.525 59 5 0 2855 3031 177 235 3.99e-33 121 6 | gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 93.220 59 4 0 1404 1580 119 177 7.46e-25 97.1 7 | gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 88.462 26 3 0 4222 4299 312 337 1.13e-11 57.0 8 | gi|283855822|gb|GQ290312.1| sp|P08100|OPSD_HUMAN 95.092 326 16 0 1 978 11 336 0.0 589 9 | gi|18148870|dbj|AB062417.1| sp|P08100|OPSD_HUMAN 93.391 348 23 0 1 1044 1 348 0.0 619 10 | gi|12583664|dbj|AB043817.1| sp|P08100|OPSD_HUMAN 81.682 333 61 0 23 1021 1 333 0.0 532 11 | -------------------------------------------------------------------------------- /test-data/blastx_rhodopsin_vs_four_human_converted.tabular: -------------------------------------------------------------------------------- 1 | gi|57163782|ref|NM_001009242.1| sp|P08100|OPSD_HUMAN 96.552 348 12 0 1 1044 1 348 0.0 639 2 | gi|2734705|gb|U59921.1|BBU59921 sp|P08100|OPSD_HUMAN 85.241 332 49 0 42 1037 1 332 0.0 551 3 | gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 96.396 111 4 0 1 333 11 121 4e-67 220 4 | gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 92.308 65 5 0 3174 3368 248 312 4e-35 127 5 | gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 91.525 59 5 0 2855 3031 177 235 4e-33 121 6 | gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 93.220 59 4 0 1404 1580 119 177 7e-25 97.1 7 | gi|283855845|gb|GQ290303.1| sp|P08100|OPSD_HUMAN 88.462 26 3 0 4222 4299 312 337 1e-11 57.0 8 | gi|283855822|gb|GQ290312.1| sp|P08100|OPSD_HUMAN 95.092 326 16 0 1 978 11 336 0.0 589 9 | gi|18148870|dbj|AB062417.1| sp|P08100|OPSD_HUMAN 93.391 348 23 0 1 1044 1 348 0.0 619 10 | gi|12583664|dbj|AB043817.1| sp|P08100|OPSD_HUMAN 81.682 333 61 0 23 1021 1 333 0.0 532 11 | -------------------------------------------------------------------------------- /test-data/blastx_sample_converted.tabular: -------------------------------------------------------------------------------- 1 | phage_suis gi|119953746|ref|YP_950551.1| 100.000 518 0 0 336 1889 25 542 0.0 988 2 | phage_suis gi|289551554|ref|YP_003472458.1| 32.946 516 280 6 342 1889 657 1106 6e-66 256 3 | phage_suis gi|223044325|ref|ZP_03614360.1| 30.220 546 327 7 393 1889 655 1193 1e-64 252 4 | phage_suis gi|223044325|ref|ZP_03614360.1| 19.882 508 328 9 384 1796 844 1309 6e-28 130 5 | phage_suis gi|268611153|ref|ZP_06144880.1| 28.638 639 371 11 78 1847 440 1042 1e-60 239 6 | phage_suis gi|268611153|ref|ZP_06144880.1| 23.356 441 286 7 543 1856 547 938 4e-31 141 7 | phage_suis gi|268611153|ref|ZP_06144880.1| 25.272 459 266 11 522 1844 722 1121 8e-31 140 8 | phage_suis gi|268611153|ref|ZP_06144880.1| 24.631 406 267 8 501 1694 770 1144 3e-23 115 9 | phage_suis gi|268611153|ref|ZP_06144880.1| 27.801 241 145 3 492 1148 811 1044 6e-16 90.9 10 | phage_suis gi|268611153|ref|ZP_06144880.1| 19.763 253 168 6 1158 1883 547 775 3e-04 52.0 11 | phage_suis gi|268610688|ref|ZP_06144415.1| 28.951 639 369 11 78 1847 440 1042 3e-59 234 12 | phage_suis gi|268610688|ref|ZP_06144415.1| 24.644 491 316 9 501 1856 770 1245 4e-39 167 13 | phage_suis gi|268610688|ref|ZP_06144415.1| 23.791 517 319 9 492 1832 811 1322 3e-37 161 14 | phage_suis gi|268610688|ref|ZP_06144415.1| 21.907 493 322 11 510 1859 905 1377 1e-25 123 15 | phage_suis gi|268610688|ref|ZP_06144415.1| 20.548 292 197 5 486 1343 1138 1400 4e-10 71.6 16 | phage_suis gi|268610688|ref|ZP_06144415.1| 21.408 341 225 10 894 1883 467 775 8e-05 53.9 17 | phage_suis gi|153811333|ref|ZP_01964001.1| 28.341 621 364 16 108 1847 493 1073 8e-55 219 18 | phage_suis gi|153811333|ref|ZP_01964001.1| 29.673 428 250 9 519 1760 709 1099 2e-47 195 19 | phage_suis gi|153811333|ref|ZP_01964001.1| 29.412 391 226 7 498 1640 746 1096 1e-39 169 20 | phage_suis gi|153811333|ref|ZP_01964001.1| 26.493 268 174 3 492 1256 854 1111 3e-24 118 21 | phage_suis gi|153811333|ref|ZP_01964001.1| 27.124 306 198 4 510 1385 816 1110 1e-23 116 22 | phage_suis gi|262113750|emb|CAR95417.1| 38.462 286 169 1 384 1241 540 818 2e-54 218 23 | phage_suis gi|262113750|emb|CAR95417.1| 29.684 411 271 7 657 1871 460 858 3e-40 171 24 | phage_suis gi|77411259|ref|ZP_00787609.1| 37.193 285 172 1 387 1241 628 905 2e-53 215 25 | phage_suis gi|77411259|ref|ZP_00787609.1| 28.010 407 281 6 660 1871 548 945 1e-40 172 26 | phage_suis gi|77411259|ref|ZP_00787609.1| 22.817 355 207 7 978 1877 540 882 9e-14 83.6 27 | phage_suis gi|76786754|ref|YP_329383.1| 36.842 285 173 1 387 1241 628 905 8e-53 213 28 | phage_suis gi|76786754|ref|YP_329383.1| 27.273 407 284 6 660 1871 548 945 3e-38 164 29 | phage_suis gi|76786754|ref|YP_329383.1| 24.735 283 194 2 543 1391 637 900 3e-23 115 30 | phage_suis gi|76786754|ref|YP_329383.1| 22.910 323 204 6 978 1847 540 850 2e-13 82.4 31 | phage_suis gi|50914476|ref|YP_060448.1| 35.862 290 179 1 372 1241 623 905 4e-51 207 32 | phage_suis gi|50914476|ref|YP_060448.1| 27.007 411 280 7 660 1871 548 945 2e-35 155 33 | phage_suis gi|50914476|ref|YP_060448.1| 22.997 387 269 5 543 1673 637 1004 3e-25 121 34 | -------------------------------------------------------------------------------- /test-data/cd00003_and_cd00008.aux: -------------------------------------------------------------------------------- 1 | BLOSUM62 2 | 11 3 | 1 4 | 0.000000e+00 5 | 0.000000e+00 6 | 0 7 | 0 8 | 100.000000 9 | 234 10 | 6.955024e-02 11 | 160 12 | 4.862535e-02 13 | -------------------------------------------------------------------------------- /test-data/cd00003_and_cd00008.freq: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/peterjc/galaxy_blast/75b5762aa9699a0b3f97a67158111599a2cb6f59/test-data/cd00003_and_cd00008.freq -------------------------------------------------------------------------------- /test-data/cd00003_and_cd00008.loo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/peterjc/galaxy_blast/75b5762aa9699a0b3f97a67158111599a2cb6f59/test-data/cd00003_and_cd00008.loo -------------------------------------------------------------------------------- /test-data/cd00003_and_cd00008.phr: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/peterjc/galaxy_blast/75b5762aa9699a0b3f97a67158111599a2cb6f59/test-data/cd00003_and_cd00008.phr -------------------------------------------------------------------------------- /test-data/cd00003_and_cd00008.pin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/peterjc/galaxy_blast/75b5762aa9699a0b3f97a67158111599a2cb6f59/test-data/cd00003_and_cd00008.pin -------------------------------------------------------------------------------- /test-data/cd00003_and_cd00008.psd: -------------------------------------------------------------------------------- 1 | gnl|cdd|1890191 2 | gnl|cdd|2379770 3 | -------------------------------------------------------------------------------- /test-data/cd00003_and_cd00008.psi: -------------------------------------------------------------------------------- 1 | "@"4Egnl|cdd|1890191 -------------------------------------------------------------------------------- /test-data/cd00003_and_cd00008.psq: -------------------------------------------------------------------------------- 1 |        2 |            3 |   4 |    5 | 6 |   7 |      8 |    9 |    10 |  11 |       12 |   13 |       14 |    15 |   16 | 17 | 18 |  19 |    20 |    21 |  22 |    23 |   24 |  25 |   26 |   27 |     28 | 29 |    30 |  -------------------------------------------------------------------------------- /test-data/cd00003_and_cd00008.rps: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/peterjc/galaxy_blast/75b5762aa9699a0b3f97a67158111599a2cb6f59/test-data/cd00003_and_cd00008.rps -------------------------------------------------------------------------------- /test-data/chimera.fasta.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/peterjc/galaxy_blast/75b5762aa9699a0b3f97a67158111599a2cb6f59/test-data/chimera.fasta.gz -------------------------------------------------------------------------------- /test-data/convert2blastmask_four_human_masked.maskinfo-asn1: -------------------------------------------------------------------------------- 1 | Blast-db-mask-info ::= { 2 | algo-id 0, 3 | algo-program seg, 4 | algo-options "window=12; locut=2.2; hicut=2.5", 5 | masks { 6 | masks { 7 | int { 8 | from 6, 9 | to 18, 10 | id swissprot { 11 | name "ERP44_HUMAN", 12 | accession "Q9BS26", 13 | release "reviewed" 14 | } 15 | }, 16 | packed-int { 17 | { 18 | from 11, 19 | to 46, 20 | id swissprot { 21 | name "BMP2K_HUMAN", 22 | accession "Q9NSY1", 23 | release "reviewed" 24 | } 25 | }, 26 | { 27 | from 325, 28 | to 332, 29 | id swissprot { 30 | name "BMP2K_HUMAN", 31 | accession "Q9NSY1", 32 | release "reviewed" 33 | } 34 | }, 35 | { 36 | from 421, 37 | to 496, 38 | id swissprot { 39 | name "BMP2K_HUMAN", 40 | accession "Q9NSY1", 41 | release "reviewed" 42 | } 43 | }, 44 | { 45 | from 501, 46 | to 516, 47 | id swissprot { 48 | name "BMP2K_HUMAN", 49 | accession "Q9NSY1", 50 | release "reviewed" 51 | } 52 | }, 53 | { 54 | from 536, 55 | to 558, 56 | id swissprot { 57 | name "BMP2K_HUMAN", 58 | accession "Q9NSY1", 59 | release "reviewed" 60 | } 61 | }, 62 | { 63 | from 636, 64 | to 648, 65 | id swissprot { 66 | name "BMP2K_HUMAN", 67 | accession "Q9NSY1", 68 | release "reviewed" 69 | } 70 | }, 71 | { 72 | from 737, 73 | to 762, 74 | id swissprot { 75 | name "BMP2K_HUMAN", 76 | accession "Q9NSY1", 77 | release "reviewed" 78 | } 79 | }, 80 | { 81 | from 789, 82 | to 806, 83 | id swissprot { 84 | name "BMP2K_HUMAN", 85 | accession "Q9NSY1", 86 | release "reviewed" 87 | } 88 | }, 89 | { 90 | from 970, 91 | to 983, 92 | id swissprot { 93 | name "BMP2K_HUMAN", 94 | accession "Q9NSY1", 95 | release "reviewed" 96 | } 97 | }, 98 | { 99 | from 999, 100 | to 1010, 101 | id swissprot { 102 | name "BMP2K_HUMAN", 103 | accession "Q9NSY1", 104 | release "reviewed" 105 | } 106 | } 107 | }, 108 | packed-int { 109 | { 110 | from 3, 111 | to 26, 112 | id swissprot { 113 | name "INSR_HUMAN", 114 | accession "P06213", 115 | release "reviewed" 116 | } 117 | }, 118 | { 119 | from 372, 120 | to 390, 121 | id swissprot { 122 | name "INSR_HUMAN", 123 | accession "P06213", 124 | release "reviewed" 125 | } 126 | }, 127 | { 128 | from 766, 129 | to 791, 130 | id swissprot { 131 | name "INSR_HUMAN", 132 | accession "P06213", 133 | release "reviewed" 134 | } 135 | }, 136 | { 137 | from 1312, 138 | to 1324, 139 | id swissprot { 140 | name "INSR_HUMAN", 141 | accession "P06213", 142 | release "reviewed" 143 | } 144 | } 145 | }, 146 | int { 147 | from 230, 148 | to 246, 149 | id swissprot { 150 | name "OPSD_HUMAN", 151 | accession "P08100", 152 | release "reviewed" 153 | } 154 | } 155 | }, 156 | more FALSE 157 | } 158 | } 159 | -------------------------------------------------------------------------------- /test-data/convert2blastmask_four_human_masked.maskinfo-asn1-binary: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/peterjc/galaxy_blast/75b5762aa9699a0b3f97a67158111599a2cb6f59/test-data/convert2blastmask_four_human_masked.maskinfo-asn1-binary -------------------------------------------------------------------------------- /test-data/deduplicate.nosortids.fasta: -------------------------------------------------------------------------------- 1 | >Quick;Brown;Fox;3;5 representing 5 records 2 | acgt 3 | >1 first entry 4 | act 5 | >2 The A-Team 6 | AAaa 7 | >4 8 | CCCC 9 | >6 last! 10 | GGGG 11 | -------------------------------------------------------------------------------- /test-data/deduplicate.sortids.fasta: -------------------------------------------------------------------------------- 1 | >3;5;Brown;Fox;Quick representing 5 records 2 | acgt 3 | >1 first entry 4 | act 5 | >2 The A-Team 6 | AAaa 7 | >4 8 | CCCC 9 | >6 last! 10 | GGGG 11 | -------------------------------------------------------------------------------- /test-data/deltablast_four_human_vs_rhodopsin.tabular: -------------------------------------------------------------------------------- 1 | P08100 NP_001009242.1 96.552 348 12 0 1 348 1 348 4.79e-146 406 2 | P08100 P56514.1 84.795 342 51 1 1 341 1 342 3.20e-128 361 3 | P08100 ADB45242.1 94.817 328 17 0 11 338 1 328 7.59e-130 364 4 | P08100 ADB45229.1 94.817 328 17 0 11 338 1 328 1.25e-122 346 5 | P08100 0811197A 93.103 348 23 1 1 348 1 347 4.22e-143 398 6 | P08100 BAB21486.1 82.164 342 60 1 1 341 1 342 9.21e-128 360 7 | -------------------------------------------------------------------------------- /test-data/deltablast_four_human_vs_rhodopsin_ext.tabular: -------------------------------------------------------------------------------- 1 | P08100 NP_001009242.1 96.552 348 12 0 1 348 1 348 4.79e-146 406 gi|57163783|ref|NP_001009242.1| 1044 336 343 0 98.56 1 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA MNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA 348 348 N/A 2 | P08100 P56514.1 84.795 342 51 1 1 341 1 342 3.20e-128 361 gi|3024260|sp|P56514.1|OPSD_BUFBU 927 290 322 1 94.15 1 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEA-SATVSKTE MNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRTPLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVVHFTIPLIIIFFCYGRLVCTVKEAAAQQQESATTQKAEKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQGSEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDDASSAATSKTE 348 354 N/A 3 | P08100 ADB45242.1 94.817 328 17 0 11 338 1 328 7.59e-130 364 gi|283855846|gb|ADB45242.1| 936 311 321 0 97.87 1 1 VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVS VPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLALTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS 348 328 N/A 4 | P08100 ADB45229.1 94.817 328 17 0 11 338 1 328 1.25e-122 346 gi|283855823|gb|ADB45229.1| 888 311 323 0 98.48 1 1 VPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVS VPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTIPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS 348 328 N/A 5 | P08100 0811197A 93.103 348 23 1 1 348 1 347 4.22e-143 398 gi|223523|prf||0811197A 1025 324 336 1 96.55 1 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDY-TPHEETNNESFVIYMFVVHFIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSDFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA 348 347 N/A 6 | P08100 BAB21486.1 82.164 342 60 1 1 341 1 342 9.21e-128 360 gi|12583665|dbj|BAB21486.1| 924 281 314 1 91.81 1 1 MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTTICCGKNPL-GDDEASATVSKTE MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRTPLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVCKPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTCHFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQGSTFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDGASATSSKTE 348 354 N/A 7 | -------------------------------------------------------------------------------- /test-data/deltablast_rhodopsin_vs_four_human.tabular: -------------------------------------------------------------------------------- 1 | gi|57163783|ref|NP_001009242.1| sp|P08100|OPSD_HUMAN 96.552 348 12 0 1 348 1 348 1.71e-137 385 2 | gi|3024260|sp|P56514.1|OPSD_BUFBU sp|P08100|OPSD_HUMAN 83.333 354 53 2 1 354 1 348 3.53e-130 367 3 | gi|283855846|gb|ADB45242.1| sp|P08100|OPSD_HUMAN 94.817 328 17 0 1 328 11 338 7.52e-121 341 4 | gi|283855823|gb|ADB45229.1| sp|P08100|OPSD_HUMAN 94.817 328 17 0 1 328 11 338 2.06e-121 343 5 | gi|223523|prf||0811197A sp|P08100|OPSD_HUMAN 92.816 348 24 1 1 347 1 348 1.05e-134 378 6 | gi|12583665|dbj|BAB21486.1| sp|P08100|OPSD_HUMAN 81.089 349 65 1 1 349 1 348 5.41e-127 358 7 | -------------------------------------------------------------------------------- /test-data/duplicates.fasta: -------------------------------------------------------------------------------- 1 | >1 first entry 2 | act 3 | >2 The A-Team 4 | AAaa 5 | >3 not unique... 6 | ACgt 7 | >4 8 | CCCC 9 | >5 a duplicate 10 | acgt 11 | >6 last! 12 | GGGG 13 | -------------------------------------------------------------------------------- /test-data/duplicates.fasta.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/peterjc/galaxy_blast/75b5762aa9699a0b3f97a67158111599a2cb6f59/test-data/duplicates.fasta.gz -------------------------------------------------------------------------------- /test-data/duplicates.nr.fasta: -------------------------------------------------------------------------------- 1 | >1 first entry 2 | act 3 | >2 The A-Team 4 | AAaa 5 | >3;5 representing 2 records 6 | ACgt 7 | >4 8 | CCCC 9 | >6 last! 10 | GGGG 11 | -------------------------------------------------------------------------------- /test-data/dustmasker_three_human.maskinfo-asn1: -------------------------------------------------------------------------------- 1 | Blast-db-mask-info ::= { 2 | algo-id 2, 3 | algo-program dust, 4 | algo-options "window=64; level=20; linker=1", 5 | masks { 6 | masks { 7 | packed-int { 8 | { 9 | from 1447, 10 | to 1495, 11 | id local id 1 12 | }, 13 | { 14 | from 1540, 15 | to 1552, 16 | id local id 1 17 | }, 18 | { 19 | from 1886, 20 | to 1892, 21 | id local id 1 22 | }, 23 | { 24 | from 2278, 25 | to 2284, 26 | id local id 1 27 | }, 28 | { 29 | from 4409, 30 | to 4415, 31 | id local id 1 32 | }, 33 | { 34 | from 4635, 35 | to 4653, 36 | id local id 1 37 | }, 38 | { 39 | from 4726, 40 | to 4734, 41 | id local id 1 42 | } 43 | }, 44 | packed-int { 45 | { 46 | from 139, 47 | to 219, 48 | id local id 2 49 | }, 50 | { 51 | from 4569, 52 | to 4584, 53 | id local id 2 54 | }, 55 | { 56 | from 4621, 57 | to 4648, 58 | id local id 2 59 | } 60 | } 61 | }, 62 | more FALSE 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /test-data/dustmasker_three_human.maskinfo-asn1-binary: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/peterjc/galaxy_blast/75b5762aa9699a0b3f97a67158111599a2cb6f59/test-data/dustmasker_three_human.maskinfo-asn1-binary -------------------------------------------------------------------------------- /test-data/empty.fasta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/peterjc/galaxy_blast/75b5762aa9699a0b3f97a67158111599a2cb6f59/test-data/empty.fasta -------------------------------------------------------------------------------- /test-data/empty_file.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/peterjc/galaxy_blast/75b5762aa9699a0b3f97a67158111599a2cb6f59/test-data/empty_file.dat -------------------------------------------------------------------------------- /test-data/est_out.json: -------------------------------------------------------------------------------- 1 | {"data_tables": {"blastdb": [{"path": "est/est_a3aebb9941bff066cfbd40ebab14c3992f7aadabb64999f3e3b53d783c06f08033ba9066e5efd9380c6bbf9dcec808a281b7a6e9138087cc207c93f2e3ae3f67", "nucleotide_alias_name": "est", "name": "Database of GenBank+EMBL+DDBJ sequences from EST Divisions (12/05/2013 07:12:35)", "value": "est_a3aebb9941bff066cfbd40ebab14c3992f7aadabb64999f3e3b53d783c06f08033ba9066e5efd9380c6bbf9dcec808a281b7a6e9138087cc207c93f2e3ae3f67"}]}} -------------------------------------------------------------------------------- /test-data/four_human_proteins.dbinfo.txt: -------------------------------------------------------------------------------- 1 | Database: Just 4 human proteins 2 | 4 sequences; 3,297 total residues 3 | 4 | -------------------------------------------------------------------------------- /test-data/four_human_proteins.fasta: -------------------------------------------------------------------------------- 1 | >sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1 2 | MHPAVFLSLPDLRCSLLLLVTWVFTPVTTEITSLDTENIDEILNNADVALVNFYADWCRFSQMLHPIFEEASDVIKEEFP 3 | NENQVVFARVDCDQHSDIAQRYRISKYPTLKLFRNGMMMKREYRGQRSVKALADYIRQQKSDPIQEIRDLAEITTLDRSK 4 | RNIIGYFEQKDSDNYRVFERVANILHDDCAFLSAFGDVSKPERYSGDNIIYKPPGHSAPDMVYLGAMTNFDVTYNWIQDK 5 | CVPLVREITFENGEELTEEGLPFLILFHMKEDTESLEIFQNEVARQLISEKGTINFLHADCDKFRHPLLHIQKTPADCPV 6 | IAIDSFRHMYVFGDFKDVLIPGKLKQFVFDLHSGKLHREFHHGPDPTDTAPGEQAQDVASSPPESSFQKLAPSEYRYTLL 7 | RDRDEL 8 | >sp|Q9NSY1|BMP2K_HUMAN BMP-2-inducible protein kinase OS=Homo sapiens GN=BMP2K PE=1 SV=2 9 | MKKFSRMPKSEGGSGGGAAGGGAGGAGAGAGCGSGGSSVGVRVFAVGRHQVTLEESLAEGGFSTVFLVRTHGGIRCALKR 10 | MYVNNMPDLNVCKREITIMKELSGHKNIVGYLDCAVNSISDNVWEVLILMEYCRAGQVVNQMNKKLQTGFTEPEVLQIFC 11 | DTCEAVARLHQCKTPIIHRDLKVENILLNDGGNYVLCDFGSATNKFLNPQKDGVNVVEEEIKKYTTLSYRAPEMINLYGG 12 | KPITTKADIWALGCLLYKLCFFTLPFGESQVAICDGNFTIPDNSRYSRNIHCLIRFMLEPDPEHRPDIFQVSYFAFKFAK 13 | KDCPVSNINNSSIPSALPEPMTASEAAARKSQIKARITDTIGPTETSIAPRQRPKANSATTATPSVLTIQSSATPVKVLA 14 | PGEFGNHRPKGALRPGNGPEILLGQGPPQQPPQQHRVLQQLQQGDWRLQQLHLQHRHPHQQQQQQQQQQQQQQQQQQQQQ 15 | QQQQQQHHHHHHHHLLQDAYMQQYQHATQQQQMLQQQFLMHSVYQPQPSASQYPTMMPQYQQAFFQQQMLAQHQPSQQQA 16 | SPEYLTSPQEFSPALVSYTSSLPAQVGTIMDSSYSANRSVADKEAIANFTNQKNISNPPDMSGWNPFGEDNFSKLTEEEL 17 | LDREFDLLRSNRLEERASSDKNVDSLSAPHNHPPEDPFGSVPFISHSGSPEKKAEHSSINQENGTANPIKNGKTSPASKD 18 | QRTGKKTSVQGQVQKGNDESESDFESDPPSPKSSEEEEQDDEEVLQGEQGDFNDDDTEPENLGHRPLLMDSEDEEEEEKH 19 | SSDSDYEQAKAKYSDMSSVYRDRSGSGPTQDLNTILLTSAQLSSDVAVETPKQEFDVFGAVPFFAVRAQQPQQEKNEKNL 20 | PQHRFPAAGLEQEEFDVFTKAPFSKKVNVQECHAVGPEAHTIPGYPKSVDVFGSTPFQPFLTSTSKSESNEDLFGLVPFD 21 | EITGSQQQKVKQRSLQKLSSRQRRTKQDMSKSNGKRHHGTPTSTKKTLKPTYRTPERARRHKKVGRRDSQSSNEFLTISD 22 | SKENISVALTDGKDRGNVLQPEESLLDPFGAKPFHSPDLSWHPPHQGLSDIRADHNTVLPGRPRQNSLHGSFHSADVLKM 23 | DDFGAVPFTELVVQSITPHQSQQSQPVELDPFGAAPFPSKQ 24 | >sp|P06213|INSR_HUMAN Insulin receptor OS=Homo sapiens GN=INSR PE=1 SV=4 25 | MATGGRRGAAAAPLLVAVAALLLGAAGHLYPGEVCPGMDIRNNLTRLHELENCSVIEGHLQILLMFKTRPEDFRDLSFPK 26 | LIMITDYLLLFRVYGLESLKDLFPNLTVIRGSRLFFNYALVIFEMVHLKELGLYNLMNITRGSVRIEKNNELCYLATIDW 27 | SRILDSVEDNYIVLNKDDNEECGDICPGTAKGKTNCPATVINGQFVERCWTHSHCQKVCPTICKSHGCTAEGLCCHSECL 28 | GNCSQPDDPTKCVACRNFYLDGRCVETCPPPYYHFQDWRCVNFSFCQDLHHKCKNSRRQGCHQYVIHNNKCIPECPSGYT 29 | MNSSNLLCTPCLGPCPKVCHLLEGEKTIDSVTSAQELRGCTVINGSLIINIRGGNNLAAELEANLGLIEEISGYLKIRRS 30 | YALVSLSFFRKLRLIRGETLEIGNYSFYALDNQNLRQLWDWSKHNLTITQGKLFFHYNPKLCLSEIHKMEEVSGTKGRQE 31 | RNDIALKTNGDQASCENELLKFSYIRTSFDKILLRWEPYWPPDFRDLLGFMLFYKEAPYQNVTEFDGQDACGSNSWTVVD 32 | IDPPLRSNDPKSQNHPGWLMRGLKPWTQYAIFVKTLVTFSDERRTYGAKSDIIYVQTDATNPSVPLDPISVSNSSSQIIL 33 | KWKPPSDPNGNITHYLVFWERQAEDSELFELDYCLKGLKLPSRTWSPPFESEDSQKHNQSEYEDSAGECCSCPKTDSQIL 34 | KELEESSFRKTFEDYLHNVVFVPRKTSSGTGAEDPRPSRKRRSLGDVGNVTVAVPTVAAFPNTSSTSVPTSPEEHRPFEK 35 | VVNKESLVISGLRHFTGYRIELQACNQDTPEERCSVAAYVSARTMPEAKADDIVGPVTHEIFENNVVHLMWQEPKEPNGL 36 | IVLYEVSYRRYGDEELHLCVSRKHFALERGCRLRGLSPGNYSVRIRATSLAGNGSWTEPTYFYVTDYLDVPSNIAKIIIG 37 | PLIFVFLFSVVIGSIYLFLRKRQPDGPLGPLYASSNPEYLSASDVFPCSVYVPDEWEVSREKITLLRELGQGSFGMVYEG 38 | NARDIIKGEAETRVAVKTVNESASLRERIEFLNEASVMKGFTCHHVVRLLGVVSKGQPTLVVMELMAHGDLKSYLRSLRP 39 | EAENNPGRPPPTLQEMIQMAAEIADGMAYLNAKKFVHRDLAARNCMVAHDFTVKIGDFGMTRDIYETDYYRKGGKGLLPV 40 | RWMAPESLKDGVFTTSSDMWSFGVVLWEITSLAEQPYQGLSNEQVLKFVMDGGYLDQPDNCPERVTDLMRMCWQFNPKMR 41 | PTFLEIVNLLKDDLHPSFPEVSFFHSEENKAPESEELEMEFEDMENVPLDRSSHCQREEAGGRDGGSSLGFKRSYEEHIP 42 | YTHMNGGKKNGRILTLPRSNPS 43 | >sp|P08100|OPSD_HUMAN Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1 44 | MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLA 45 | VADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFT 46 | WVMALACAAPPLAGWSRYIPEGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVKEAAAQQQES 47 | ATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIYIMMNKQFRNCMLTT 48 | ICCGKNPLGDDEASATVSKTETSQVAPA 49 | -------------------------------------------------------------------------------- /test-data/four_human_proteins.fasta.log.txt: -------------------------------------------------------------------------------- 1 | New DB title: Just 4 human proteins 2 | Sequence type: Protein 3 | -------------------------------------------------------------------------------- /test-data/four_human_proteins.fasta.phd: -------------------------------------------------------------------------------- 1 | 11117184492 2 | 29249033410 3 | 36665887501 4 | 5392473183 5 | -------------------------------------------------------------------------------- /test-data/four_human_proteins.fasta.phi: -------------------------------------------------------------------------------- 1 | 3@34A11117184492 -------------------------------------------------------------------------------- /test-data/four_human_proteins.fasta.phr: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/peterjc/galaxy_blast/75b5762aa9699a0b3f97a67158111599a2cb6f59/test-data/four_human_proteins.fasta.phr -------------------------------------------------------------------------------- /test-data/four_human_proteins.fasta.pin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/peterjc/galaxy_blast/75b5762aa9699a0b3f97a67158111599a2cb6f59/test-data/four_human_proteins.fasta.pin -------------------------------------------------------------------------------- /test-data/four_human_proteins.fasta.pog: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/peterjc/galaxy_blast/75b5762aa9699a0b3f97a67158111599a2cb6f59/test-data/four_human_proteins.fasta.pog -------------------------------------------------------------------------------- /test-data/four_human_proteins.fasta.psd: -------------------------------------------------------------------------------- 1 | gnl|bl_ord_id|00 2 | gnl|bl_ord_id|11 3 | gnl|bl_ord_id|22 4 | gnl|bl_ord_id|33 5 | -------------------------------------------------------------------------------- /test-data/four_human_proteins.fasta.psi: -------------------------------------------------------------------------------- 1 | H@H4Fgnl|bl_ord_id|00 -------------------------------------------------------------------------------- /test-data/four_human_proteins.fasta.psq: -------------------------------------------------------------------------------- 1 |              2 |      3 |  4 |   5 |  6 |    7 |       8 |   9 |     10 |   11 |        12 |        13 |      14 |    15 |    16 |     17 |   18 | 19 |   20 |  21 |   22 | 23 |   24 |      25 |     26 |   27 |   28 |        29 | 30 |     31 |   32 |     33 |   34 |   35 | 36 |    37 |   38 |     39 |           40 |  41 | 42 |      43 |  44 |     45 |     46 |    47 |                     48 |     49 |      50 |      51 |     52 | 53 |     54 |  55 |  56 |  57 | 58 |  59 |   60 |      61 |  62 |  63 |      64 |  65 |  66 |   67 |  68 | 69 |    70 |   71 |      72 |  73 |   74 |  75 |   76 |   77 |  78 | 79 |  80 |  81 | 82 |     83 |    84 |     85 |         86 |     87 |               88 |   89 |    90 |          91 |      92 |        93 |    94 |  95 |    96 |   97 |     98 |      99 |  100 |   101 |      102 |   103 |           104 |    105 |          106 |    107 |   108 |    109 |  110 |    111 |    112 |   113 |    114 |       115 |    116 |   117 |   118 |       119 |  120 |       121 |  122 |  123 |   124 |  125 |   126 |    127 |  128 |     129 |  130 |         131 |      132 |       133 |           134 |        135 |      136 |      137 |  138 |      139 |   140 |     141 |           142 | 143 |     144 |    145 |  146 |    147 |        148 |        149 |    150 |   151 |        152 |    153 | 154 |                155 | 156 |                157 |            158 |        159 |  160 |  161 |           162 |     163 |     164 |   165 |  -------------------------------------------------------------------------------- /test-data/four_human_proteins_masked.fasta: -------------------------------------------------------------------------------- 1 | >sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1 2 | MHPAVFlslpdlrcsllllVTWVFTPVTTEITSLDTENIDEILNNADVALVNFYADWCRF 3 | SQMLHPIFEEASDVIKEEFPNENQVVFARVDCDQHSDIAQRYRISKYPTLKLFRNGMMMK 4 | REYRGQRSVKALADYIRQQKSDPIQEIRDLAEITTLDRSKRNIIGYFEQKDSDNYRVFER 5 | VANILHDDCAFLSAFGDVSKPERYSGDNIIYKPPGHSAPDMVYLGAMTNFDVTYNWIQDK 6 | CVPLVREITFENGEELTEEGLPFLILFHMKEDTESLEIFQNEVARQLISEKGTINFLHAD 7 | CDKFRHPLLHIQKTPADCPVIAIDSFRHMYVFGDFKDVLIPGKLKQFVFDLHSGKLHREF 8 | HHGPDPTDTAPGEQAQDVASSPPESSFQKLAPSEYRYTLLRDRDEL 9 | >sp|Q9NSY1|BMP2K_HUMAN BMP-2-inducible protein kinase OS=Homo sapiens GN=BMP2K PE=1 SV=2 10 | MKKFSRMPKSEggsgggaagggaggagagagcgsggssvgvrvfavgRHQVTLEESLAEG 11 | GFSTVFLVRTHGGIRCALKRMYVNNMPDLNVCKREITIMKELSGHKNIVGYLDCAVNSIS 12 | DNVWEVLILMEYCRAGQVVNQMNKKLQTGFTEPEVLQIFCDTCEAVARLHQCKTPIIHRD 13 | LKVENILLNDGGNYVLCDFGSATNKFLNPQKDGVNVVEEEIKKYTTLSYRAPEMINLYGG 14 | KPITTKADIWALGCLLYKLCFFTLPFGESQVAICDGNFTIPDNSRYSRNIHCLIRFMLEP 15 | DPEHRPDIFQVSYFAFKFAKKDCPVsninnssiPSALPEPMTASEAAARKSQIKARITDT 16 | IGPTETSIAPRQRPKANSATTATPSVLTIQSSATPVKVLAPGEFGNHRPKGALRPGNGPE 17 | Illgqgppqqppqqhrvlqqlqqgdwrlqqlhlqhrhphqqqqqqqqqqqqqqqqqqqqq 18 | qqqqqqhhhhhhhhllqDAYMqqyqhatqqqqmlqqqFLMHSVYQPQPSASQYPTMmpqy 19 | qqaffqqqmlaqhqpsqqqASPEYLTSPQEFSPALVSYTSSLPAQVGTIMDSSYSANRSV 20 | ADKEAIANFTNQKNISNPPDMSGWNPFGEDNFSKLTeeelldrefdllrSNRLEERASSD 21 | KNVDSLSAPHNHPPEDPFGSVPFISHSGSPEKKAEHSSINQENGTANPIKNGKTSPASKD 22 | QRTGKKTSVQGQVQKGNdesesdfesdppspksseeeeqddeeVLQGEQGDFNDDDTEPE 23 | NLGHRPLLMdsedeeeeekhssdsdyeQAKAKYSDMSSVYRDRSGSGPTQDLNTILLTSA 24 | QLSSDVAVETPKQEFDVFGAVPFFAVRAQQPQQEKNEKNLPQHRFPAAGLEQEEFDVFTK 25 | APFSKKVNVQECHAVGPEAHTIPGYPKSVDVFGSTPFQPFLTSTSKSESNEDLFGLVPFD 26 | EITGSQQQKVkqrslqklssrqrrTKQDMSKSNGKRHHGtptstkktlkptYRTPERARR 27 | HKKVGRRDSQSSNEFLTISDSKENISVALTDGKDRGNVLQPEESLLDPFGAKPFHSPDLS 28 | WHPPHQGLSDIRADHNTVLPGRPRQNSLHGSFHSADVLKMDDFGAVPFTELVVQSITPHQ 29 | SQQSQPVELDPFGAAPFPSKQ 30 | >sp|P06213|INSR_HUMAN Insulin receptor OS=Homo sapiens GN=INSR PE=1 SV=4 31 | MATggrrgaaaapllvavaalllgaagHLYPGEVCPGMDIRNNLTRLHELENCSVIEGHL 32 | QILLMFKTRPEDFRDLSFPKLIMITDYLLLFRVYGLESLKDLFPNLTVIRGSRLFFNYAL 33 | VIFEMVHLKELGLYNLMNITRGSVRIEKNNELCYLATIDWSRILDSVEDNYIVLNKDDNE 34 | ECGDICPGTAKGKTNCPATVINGQFVERCWTHSHCQKVCPTICKSHGCTAEGLCCHSECL 35 | GNCSQPDDPTKCVACRNFYLDGRCVETCPPPYYHFQDWRCVNFSFCQDLHHKCKNSRRQG 36 | CHQYVIHNNKCIPECPSGYTMNSSNLLCTPCLGPCPKVCHLLEGEKTIDSVTSAQELRGC 37 | TVINGSLIINIRggnnlaaeleanlglieeiSGYLKIRRSYALVSLSFFRKLRLIRGETL 38 | EIGNYSFYALDNQNLRQLWDWSKHNLTITQGKLFFHYNPKLCLSEIHKMEEVSGTKGRQE 39 | RNDIALKTNGDQASCENELLKFSYIRTSFDKILLRWEPYWPPDFRDLLGFMLFYKEAPYQ 40 | NVTEFDGQDACGSNSWTVVDIDPPLRSNDPKSQNHPGWLMRGLKPWTQYAIFVKTLVTFS 41 | DERRTYGAKSDIIYVQTDATNPSVPLDPISVSNSSSQIILKWKPPSDPNGNITHYLVFWE 42 | RQAEDSELFELDYCLKGLKLPSRTWSPPFESEDSQKHNQSEYEDSAGECCSCPKTDSQIL 43 | KELEESSFRKTFEDYLHNVVFVPRKTSSGTGAEDPRPSRKRRSLGDvgnvtvavptvaaf 44 | pntsstsvptspEEHRPFEKVVNKESLVISGLRHFTGYRIELQACNQDTPEERCSVAAYV 45 | SARTMPEAKADDIVGPVTHEIFENNVVHLMWQEPKEPNGLIVLYEVSYRRYGDEELHLCV 46 | SRKHFALERGCRLRGLSPGNYSVRIRATSLAGNGSWTEPTYFYVTDYLDVPSNIAKIIIG 47 | PLIFVFLFSVVIGSIYLFLRKRQPDGPLGPLYASSNPEYLSASDVFPCSVYVPDEWEVSR 48 | EKITLLRELGQGSFGMVYEGNARDIIKGEAETRVAVKTVNESASLRERIEFLNEASVMKG 49 | FTCHHVVRLLGVVSKGQPTLVVMELMAHGDLKSYLRSLRPEAENNPGRPPPTLQEMIQMA 50 | AEIADGMAYLNAKKFVHRDLAARNCMVAHDFTVKIGDFGMTRDIYETDYYRKGGKGLLPV 51 | RWMAPESLKDGVFTTSSDMWSFGVVLWEITSLAEQPYQGLSNEQVLKFVMDGGYLDQPDN 52 | CPERVTDLMRMCWQFNPKMRPTFLEIVNLLKDDLHPSFPEVSFFHSEENKAPeseeleme 53 | fedmeNVPLDRSSHCQREEAGGRDGGSSLGFKRSYEEHIPYTHMNGGKKNGRILTLPRSN 54 | PS 55 | >sp|P08100|OPSD_HUMAN Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1 56 | MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLY 57 | VTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLG 58 | GEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIP 59 | EGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVkeaaaqqqes 60 | attqkaeKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAI 61 | YNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA 62 | -------------------------------------------------------------------------------- /test-data/four_human_proteins_taxid.fasta.log.txt: -------------------------------------------------------------------------------- 1 | New DB title: Just 4 human proteins 2 | Sequence type: Protein 3 | -------------------------------------------------------------------------------- /test-data/four_human_proteins_taxid.fasta.phd: -------------------------------------------------------------------------------- 1 | 11117184492 2 | 29249033410 3 | 36665887501 4 | 5392473183 5 | -------------------------------------------------------------------------------- /test-data/four_human_proteins_taxid.fasta.phi: -------------------------------------------------------------------------------- 1 | 3@34A11117184492 -------------------------------------------------------------------------------- /test-data/four_human_proteins_taxid.fasta.phr: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/peterjc/galaxy_blast/75b5762aa9699a0b3f97a67158111599a2cb6f59/test-data/four_human_proteins_taxid.fasta.phr -------------------------------------------------------------------------------- /test-data/four_human_proteins_taxid.fasta.pin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/peterjc/galaxy_blast/75b5762aa9699a0b3f97a67158111599a2cb6f59/test-data/four_human_proteins_taxid.fasta.pin -------------------------------------------------------------------------------- /test-data/four_human_proteins_taxid.fasta.pog: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/peterjc/galaxy_blast/75b5762aa9699a0b3f97a67158111599a2cb6f59/test-data/four_human_proteins_taxid.fasta.pog -------------------------------------------------------------------------------- /test-data/four_human_proteins_taxid.fasta.psd: -------------------------------------------------------------------------------- 1 | gnl|bl_ord_id|00 2 | gnl|bl_ord_id|11 3 | gnl|bl_ord_id|22 4 | gnl|bl_ord_id|33 5 | -------------------------------------------------------------------------------- /test-data/four_human_proteins_taxid.fasta.psi: -------------------------------------------------------------------------------- 1 | H@H4Fgnl|bl_ord_id|00 -------------------------------------------------------------------------------- /test-data/four_human_proteins_taxid.fasta.psq: -------------------------------------------------------------------------------- 1 |              2 |      3 |  4 |   5 |  6 |    7 |       8 |   9 |     10 |   11 |        12 |        13 |      14 |    15 |    16 |     17 |   18 | 19 |   20 |  21 |   22 | 23 |   24 |      25 |     26 |   27 |   28 |        29 | 30 |     31 |   32 |     33 |   34 |   35 | 36 |    37 |   38 |     39 |           40 |  41 | 42 |      43 |  44 |     45 |     46 |    47 |                     48 |     49 |      50 |      51 |     52 | 53 |     54 |  55 |  56 |  57 | 58 |  59 |   60 |      61 |  62 |  63 |      64 |  65 |  66 |   67 |  68 | 69 |    70 |   71 |      72 |  73 |   74 |  75 |   76 |   77 |  78 | 79 |  80 |  81 | 82 |     83 |    84 |     85 |         86 |     87 |               88 |   89 |    90 |          91 |      92 |        93 |    94 |  95 |    96 |   97 |     98 |      99 |  100 |   101 |      102 |   103 |           104 |    105 |          106 |    107 |   108 |    109 |  110 |    111 |    112 |   113 |    114 |       115 |    116 |   117 |   118 |       119 |  120 |       121 |  122 |  123 |   124 |  125 |   126 |    127 |  128 |     129 |  130 |         131 |      132 |       133 |           134 |        135 |      136 |      137 |  138 |      139 |   140 |     141 |           142 | 143 |     144 |    145 |  146 |    147 |        148 |        149 |    150 |   151 |        152 |    153 | 154 |                155 | 156 |                157 |            158 |        159 |  160 |  161 |           162 |     163 |     164 |   165 |  -------------------------------------------------------------------------------- /test-data/k12_ten_proteins.fasta: -------------------------------------------------------------------------------- 1 | >gi|16127995|ref|NP_414542.1| thr operon leader peptide [Escherichia coli str. K-12 substr. MG1655] 2 | MKRISTTITTTITITTGNGAG 3 | >gi|16127996|ref|NP_414543.1| fused aspartokinase I and homoserine dehydrogenase I [Escherichia coli str. K-12 substr. MG1655] 4 | MRVLKFGGTSVANAERFLRVADILESNARQGQVATVLSAPAKITNHLVAMIEKTISGQDALPNISDAERI 5 | FAELLTGLAAAQPGFPLAQLKTFVDQEFAQIKHVLHGISLLGQCPDSINAALICRGEKMSIAIMAGVLEA 6 | RGHNVTVIDPVEKLLAVGHYLESTVDIAESTRRIAASRIPADHMVLMAGFTAGNEKGELVVLGRNGSDYS 7 | AAVLAACLRADCCEIWTDVDGVYTCDPRQVPDARLLKSMSYQEAMELSYFGAKVLHPRTITPIAQFQIPC 8 | LIKNTGNPQAPGTLIGASRDEDELPVKGISNLNNMAMFSVSGPGMKGMVGMAARVFAAMSRARISVVLIT 9 | QSSSEYSISFCVPQSDCVRAERAMQEEFYLELKEGLLEPLAVTERLAIISVVGDGMRTLRGISAKFFAAL 10 | ARANINIVAIAQGSSERSISVVVNNDDATTGVRVTHQMLFNTDQVIEVFVIGVGGVGGALLEQLKRQQSW 11 | LKNKHIDLRVCGVANSKALLTNVHGLNLENWQEELAQAKEPFNLGRLIRLVKEYHLLNPVIVDCTSSQAV 12 | ADQYADFLREGFHVVTPNKKANTSSMDYYHQLRYAAEKSRRKFLYDTNVGAGLPVIENLQNLLNAGDELM 13 | KFSGILSGSLSYIFGKLDEGMSFSEATTLAREMGYTEPDPRDDLSGMDVARKLLILARETGRELELADIE 14 | IEPVLPAEFNAEGDVAAFMANLSQLDDLFAARVAKARDEGKVLRYVGNIDEDGVCRVKIAEVDGNDPLFK 15 | VKNGENALAFYSHYYQPLPLVLRGYGAGNDVTAAGVFADLLRTLSWKLGV 16 | >gi|16127997|ref|NP_414544.1| homoserine kinase [Escherichia coli str. K-12 substr. MG1655] 17 | MVKVYAPASSANMSVGFDVLGAAVTPVDGALLGDVVTVEAAETFSLNNLGRFADKLPSEPRENIVYQCWE 18 | RFCQELGKQIPVAMTLEKNMPIGSGLGSSACSVVAALMAMNEHCGKPLNDTRLLALMGELEGRISGSIHY 19 | DNVAPCFLGGMQLMIEENDIISQQVPGFDEWLWVLAYPGIKVSTAEARAILPAQYRRQDCIAHGRHLAGF 20 | IHACYSRQPELAAKLMKDVIAEPYRERLLPGFRQARQAVAEIGAVASGISGSGPTLFALCDKPETAQRVA 21 | DWLGKNYLQNQEGFVHICRLDTAGARVLEN 22 | >gi|16127998|ref|NP_414545.1| threonine synthase [Escherichia coli str. K-12 substr. MG1655] 23 | MKLYNLKDHNEQVSFAQAVTQGLGKNQGLFFPHDLPEFSLTEIDEMLKLDFVTRSAKILSAFIGDEIPQE 24 | ILEERVRAAFAFPAPVANVESDVGCLELFHGPTLAFKDFGGRFMAQMLTHIAGDKPVTILTATSGDTGAA 25 | VAHAFYGLPNVKVVILYPRGKISPLQEKLFCTLGGNIETVAIDGDFDACQALVKQAFDDEELKVALGLNS 26 | ANSINISRLLAQICYYFEAVAQLPQETRNQLVVSVPSGNFGDLTAGLLAKSLGLPVKRFIAATNVNDTVP 27 | RFLHDGQWSPKATQATLSNAMDVSQPNNWPRVEELFRRKIWQLKELGYAAVDDETTQQTMRELKELGYTS 28 | EPHAAVAYRALRDQLNPGEYGLFLGTAHPAKFKESVEAILGETLDLPKELAERADLPLLSHNLPADFAAL 29 | RKLMMNHQ 30 | >gi|16127999|ref|NP_414546.1| hypothetical protein b0005 [Escherichia coli str. K-12 substr. MG1655] 31 | MKKMQSIVLALSLVLVAPMAAQAAEITLVPSVKLQIGDRDNRGYYWDGGHWRDHGWWKQHYEWRGNRWHL 32 | HGPPPPPRHHKKAPHDHHGGHGPGKHHR 33 | >gi|16128000|ref|NP_414547.1| peroxide resistance protein, lowers intracellular iron [Escherichia coli str. K-12 substr. MG1655] 34 | MLILISPAKTLDYQSPLTTTRYTLPELLDNSQQLIHEARKLTPPQISTLMRISDKLAGINAARFHDWQPD 35 | FTPANARQAILAFKGDVYTGLQAETFSEDDFDFAQQHLRMLSGLYGVLRPLDLMQPYRLEMGIRLENARG 36 | KDLYQFWGDIITNKLNEALAAQGDNVVINLASDEYFKSVKPKKLNAEIIKPVFLDEKNGKFKIISFYAKK 37 | ARGLMSRFIIENRLTKPEQLTGFNSEGYFFDEDSSSNGELVFKRYEQR 38 | >gi|16128001|ref|NP_414548.1| putative transporter [Escherichia coli str. K-12 substr. MG1655] 39 | MPDFFSFINSVLWGSVMIYLLFGAGCWFTFRTGFVQFRYIRQFGKSLKNSIHPQPGGLTSFQSLCTSLAA 40 | RVGSGNLAGVALAITAGGPGAVFWMWVAAFIGMATSFAECSLAQLYKERDVNGQFRGGPAWYMARGLGMR 41 | WMGVLFAVFLLIAYGIIFSGVQANAVARALSFSFDFPPLVTGIILAVFTLLAITRGLHGVARLMQGFVPL 42 | MAIIWVLTSLVICVMNIGQLPHVIWSIFESAFGWQEAAGGAAGYTLSQAITNGFQRSMFSNEAGMGSTPN 43 | AAAAAASWPPHPAAQGIVQMIGIFIDTLVICTASAMLILLAGNGTTYMPLEGIQLIQKAMRVLMGSWGAE 44 | FVTLVVILFAFSSIVANYIYAENNLFFLRLNNPKAIWCLRICTFATVIGGTLLSLPLMWQLADIIMACMA 45 | ITNLTAILLLSPVVHTIASDYLRQRKLGVRPVFDPLRYPDIGRQLSPDAWDDVSQE 46 | >gi|16128002|ref|NP_414549.1| transaldolase B [Escherichia coli str. K-12 substr. MG1655] 47 | MTDKLTSLRQYTTVVADTGDIAAMKLYQPQDATTNPSLILNAAQIPEYRKLIDDAVAWAKQQSNDRAQQI 48 | VDATDKLAVNIGLEILKLVPGRISTEVDARLSYDTEASIAKAKRLIKLYNDAGISNDRILIKLASTWQGI 49 | RAAEQLEKEGINCNLTLLFSFAQARACAEAGVFLISPFVGRILDWYKANTDKKEYAPAEDPGVVSVSEIY 50 | QYYKEHGYETVVMGASFRNIGEILELAGCDRLTIAPALLKELAESEGAIERKLSYTGEVKARPARITESE 51 | FLWQHNQDPMAVDKLAEGIRKFAIDQEKLEKMIGDLL 52 | >gi|16128003|ref|NP_414550.1| molybdochelatase incorporating molybdenum into molybdopterin [Escherichia coli str. K-12 substr. MG1655] 53 | MNTLRIGLVSISDRASSGVYQDKGIPALEEWLTSALTTPFELETRLIPDEQAIIEQTLCELVDEMSCHLV 54 | LTTGGTGPARRDVTPDATLAVADREMPGFGEQMRQISLHFVPTAILSRQVGVIRKQALILNLPGQPKSIK 55 | ETLEGVKDAEGNVVVHGIFASVPYCIQLLEGPYVETAPEVVAAFRPKSARRDVSE 56 | >gi|16128004|ref|NP_414551.1| inner membrane protein, Grp1_Fun34_YaaH family [Escherichia coli str. K-12 substr. MG1655] 57 | MGNTKLANPAPLGLMGFGMTTILLNLHNVGYFALDGIILAMGIFYGGIAQIFAGLLEYKKGNTFGLTAFT 58 | SYGSFWLTLVAILLMPKLGLTDAPNAQFLGVYLGLWGVFTLFMFFGTLKGARVLQFVFFSLTVLFALLAI 59 | GNIAGNAAIIHFAGWIGLICGASAIYLAMGEVLNEQFGRTVLPIGESH 60 | 61 | -------------------------------------------------------------------------------- /test-data/more_duplicates.fasta: -------------------------------------------------------------------------------- 1 | >Quick 2 | acgt 3 | >Brown 4 | ACGT 5 | >Fox 6 | ACGT 7 | -------------------------------------------------------------------------------- /test-data/rbh_blastn_three_human_mRNA_vs_rhodopsin_nucs.tabular: -------------------------------------------------------------------------------- 1 | #A_id B_id A_length B_length A_qcovhsp B_qcovhsp length pident bitscore 2 | ENA|BC112106|BC112106.1 gi|57163782|ref|NM_001009242.1| 1213 1047 86 100 1047 92.073 1515 3 | -------------------------------------------------------------------------------- /test-data/rbh_blastp_four_human_vs_rhodopsin_proteins.tabular: -------------------------------------------------------------------------------- 1 | #A_id B_id A_length B_length A_qcovhsp B_qcovhsp length pident bitscore 2 | sp|P08100|OPSD_HUMAN gi|57163783|ref|NP_001009242.1| 348 348 100 100 348 96.552 701 3 | -------------------------------------------------------------------------------- /test-data/rbh_blastp_k12.tabular: -------------------------------------------------------------------------------- 1 | #A_id B_id A_length B_length A_qcovhsp B_qcovhsp length pident bitscore 2 | gi|16127995|ref|NP_414542.1| gi|16127995|ref|NP_414542.1| 21 21 100 100 21 100.000 38.1 3 | gi|16127996|ref|NP_414543.1| gi|16127996|ref|NP_414543.1| 820 820 100 100 820 100.000 1687 4 | gi|16127997|ref|NP_414544.1| gi|16127997|ref|NP_414544.1| 310 310 100 100 310 100.000 642 5 | gi|16127998|ref|NP_414545.1| gi|16127998|ref|NP_414545.1| 428 428 100 100 428 100.000 882 6 | gi|16128000|ref|NP_414547.1| gi|16128000|ref|NP_414547.1| 258 258 100 100 258 100.000 531 7 | gi|16128001|ref|NP_414548.1| gi|16128001|ref|NP_414548.1| 476 476 100 100 476 100.000 959 8 | gi|16128002|ref|NP_414549.1| gi|16128002|ref|NP_414549.1| 317 317 100 100 317 100.000 648 9 | gi|16128003|ref|NP_414550.1| gi|16128003|ref|NP_414550.1| 195 195 100 100 195 100.000 397 10 | gi|16128004|ref|NP_414551.1| gi|16128004|ref|NP_414551.1| 188 188 100 100 188 100.000 365 11 | -------------------------------------------------------------------------------- /test-data/rbh_blastp_k12_self.tabular: -------------------------------------------------------------------------------- 1 | #A_id B_id A_length B_length A_qcovhsp B_qcovhsp length pident bitscore 2 | gi|16127997|ref|NP_414544.1| NP_414544_near_copy 310 309 99 100 309 99.676 638 3 | NP_414544_near_copy gi|16127997|ref|NP_414544.1| 309 310 100 99 309 99.676 638 4 | NP_414546_near_copy_1 NP_414546_near_copy_2 99 100 99 98 98 100.000 197 5 | NP_414546_near_copy_2 NP_414546_near_copy_1 100 99 98 99 98 100.000 197 6 | -------------------------------------------------------------------------------- /test-data/rbh_megablast_rhodopsin_nucs_vs_three_human_mRNA.tabular: -------------------------------------------------------------------------------- 1 | #A_id B_id A_length B_length A_qcovhsp B_qcovhsp length pident bitscore 2 | gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 1047 1213 100 86 1047 92.073 1474 3 | -------------------------------------------------------------------------------- /test-data/rbh_none.tabular: -------------------------------------------------------------------------------- 1 | #A_id B_id A_length B_length A_qcovhsp B_qcovhsp length pident bitscore 2 | -------------------------------------------------------------------------------- /test-data/rbh_tblastx_rhodopsin_nucs_vs_three_human_mRNA.tabular: -------------------------------------------------------------------------------- 1 | #A_id B_id A_length B_length A_qcovhsp B_qcovhsp length pident bitscore 2 | gi|57163782|ref|NM_001009242.1| ENA|BC112106|BC112106.1 1047 1213 22 19 230 97.391 559 3 | -------------------------------------------------------------------------------- /test-data/rhodopsin_bufo.fasta: -------------------------------------------------------------------------------- 1 | >U59921.1 Bufo bufo rhodopsin mRNA, complete cds 2 | TCTTTCTAGTTTGGGGGGGGGGACTTTAAAGAGCCGCCAATATGAACGGAACAGAAGGCCCAAACTTTTACATACCCATG 3 | TCCAACAAGACTGGGGTGGTGCGAAGCCCCTTTGAATACCCTCAGTATTACCTGGCAGAGCCATGGCAATATTCCATTCT 4 | GTGCGCGTACATGTTCCTGCTCATTCTACTTGGGTTCCCAATCAACTTCATGACCTTGTACGTCACCATCCAGCACAAGA 5 | AGCTCCGGACACCCTTAAACTATATCCTGCTGAATTTGGCCTTTGCCAACCACTTCATGGTCCTGTGTGGATTCACGGTG 6 | ACAATGTACTCCTCAATGAACGGATACTTCATCCTCGGAGCCACCGGTTGCTATGTTGAAGGCTTCTTCGCTACCCTTGG 7 | TGGTGAAATCGCCCTTTGGTCCCTGGTGGTCTTGGCCATTGAACGATACGTGGTCGTCTGTAAGCCCATGAGCAACTTCC 8 | GATTTAGTGAGAACCATGCCGTCATGGGCGTAGCGTTCACCTGGATAATGGCTTTGTCCTGTGCTGTTCCTCCACTCCTT 9 | GGATGGTCCAGGTACATCCCCGAGGGCATGCAGTGCTCCTGCGGAGTCGACTACTACACCCTGAAGCCCGAGGTCAACAA 10 | CGAGTCCTTCGTCATCTACATGTTCGTCGTCCACTTCACCATCCCCCTGATTATCATTTTCTTCTGCTATGGCCGCCTGG 11 | TGTGCACTGTGAAAGAGGCTGCAGCTCAACAGCAAGAGTCCGCCACCACCCAGAAGGCCGAGAAAGAGGTGACCAGGATG 12 | GTGATCATCATGGTGGTCTTCTTCCTTATCTGTTGGGTCCCCTACGCCTCTGTCGCTTTCTTCATCTTCAGCAATCAGGG 13 | CTCTGAGTTCGGCCCCATCTTCATGACCGTCCCAGCTTTCTTTGCCAAGAGTTCTTCCATCTACAACCCCGTCATCTACA 14 | TCATGCTCAACAAGCAGTTCCGTAACTGCATGATCACCACCCTGTGCTGCGGCAAGAATCCCTTTGGAGAAGACGATGCC 15 | TCCTCTGCCGCCACCTCCAAGACAGAGGCTTCTTCTGTTTCTTCCAGCCAGGTGTCTCCTGCATAAGACCTTCCACCAGG 16 | CCTGTCTCAGGGTCCGCTGCCTCACACAGCTCCCACCGCCCCAACTCCGTCTCCTGCTCGCTAAGGCGGCGAAGTTCCCC 17 | TTCCATTACATAAAACGTATCTGTTCAAGAAAGGCGACGACGAAGGAGAAGAAGAGGAGCCCCCCCGAACCCCTTCGCTG 18 | CTGCTGAAAACGACTTGATTGCTTCTGCAACGCAACGGGGCCTTACGGCAGCGAAGGGGTTGTCATCCGGACGCGCCAAG 19 | AATTCCTTCGAGACTGTAAATATCTTAAAGGAACCGTCCTGCTAGTTACCGACGCCGCTCCTGTAGCCGCCGTTCCCCCG 20 | CACTCCGGCCGGTTCATACCTCTTATTTTTTTGCAATGCAACAGAAAATAATATTTTTGTTCCCACGGCTTTTCCCGGTC 21 | AGGTCTGGTAGTGGCGGAGATTGGCCGACCCCTCGCACCTGTAATAAAGCGCAG 22 | -------------------------------------------------------------------------------- /test-data/rhodopsin_nucs.blastdbcmd.txt: -------------------------------------------------------------------------------- 1 | gi|57163782|ref|NM_001009242.1| 2 | gi|2734705|gb|U59921.1|BBU59921 "1 -" + 3 | gi|283855845|gb|GQ290303.1| 1-4301 + 4 | gi|283855822|gb|GQ290312.1| "1-983" 5 | gi|18148870|dbj|AB062417.1| "1 -" + 6 | gi|12583664|dbj|AB043817.1| "1--" 7 | 8 | -------------------------------------------------------------------------------- /test-data/rhodopsin_nucs.dbinfo.txt: -------------------------------------------------------------------------------- 1 | Database: Rhodopsin nucleotides 2 | 6 sequences; 10,296 total bases 3 | 4 | -------------------------------------------------------------------------------- /test-data/rhodopsin_nucs.fasta.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/peterjc/galaxy_blast/75b5762aa9699a0b3f97a67158111599a2cb6f59/test-data/rhodopsin_nucs.fasta.gz -------------------------------------------------------------------------------- /test-data/rhodopsin_nucs.fasta.log.txt: -------------------------------------------------------------------------------- 1 | New DB title: Rhodopsin nucleotides 2 | Sequence type: Nucleotide 3 | -------------------------------------------------------------------------------- /test-data/rhodopsin_nucs.fasta.nhd: -------------------------------------------------------------------------------- 1 | 12397459091 2 | 20759409394 3 | 22689758313 4 | 28815213262 5 | 36620822910 6 | 40074407105 7 | -------------------------------------------------------------------------------- /test-data/rhodopsin_nucs.fasta.nhi: -------------------------------------------------------------------------------- 1 | N@N4A12397459091 -------------------------------------------------------------------------------- /test-data/rhodopsin_nucs.fasta.nhr: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/peterjc/galaxy_blast/75b5762aa9699a0b3f97a67158111599a2cb6f59/test-data/rhodopsin_nucs.fasta.nhr -------------------------------------------------------------------------------- /test-data/rhodopsin_nucs.fasta.nin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/peterjc/galaxy_blast/75b5762aa9699a0b3f97a67158111599a2cb6f59/test-data/rhodopsin_nucs.fasta.nin -------------------------------------------------------------------------------- /test-data/rhodopsin_nucs.fasta.nnd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/peterjc/galaxy_blast/75b5762aa9699a0b3f97a67158111599a2cb6f59/test-data/rhodopsin_nucs.fasta.nnd -------------------------------------------------------------------------------- /test-data/rhodopsin_nucs.fasta.nni: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/peterjc/galaxy_blast/75b5762aa9699a0b3f97a67158111599a2cb6f59/test-data/rhodopsin_nucs.fasta.nni -------------------------------------------------------------------------------- /test-data/rhodopsin_nucs.fasta.nog: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/peterjc/galaxy_blast/75b5762aa9699a0b3f97a67158111599a2cb6f59/test-data/rhodopsin_nucs.fasta.nog -------------------------------------------------------------------------------- /test-data/rhodopsin_nucs.fasta.nsd: -------------------------------------------------------------------------------- 1 | ab0438175 2 | ab043817.15 3 | ab0624174 4 | ab062417.14 5 | bbu599211 6 | gq2903032 7 | gq290303.12 8 | gq2903123 9 | gq290312.13 10 | nm_0010092420 11 | nm_001009242.10 12 | u599211 13 | u59921.11 14 | -------------------------------------------------------------------------------- /test-data/rhodopsin_nucs.fasta.nsi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/peterjc/galaxy_blast/75b5762aa9699a0b3f97a67158111599a2cb6f59/test-data/rhodopsin_nucs.fasta.nsi -------------------------------------------------------------------------------- /test-data/rhodopsin_nucs.fasta.nsq: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/peterjc/galaxy_blast/75b5762aa9699a0b3f97a67158111599a2cb6f59/test-data/rhodopsin_nucs.fasta.nsq -------------------------------------------------------------------------------- /test-data/rhodopsin_peptides.fasta: -------------------------------------------------------------------------------- 1 | >gi|57163783|ref|NP_001009242.1| rhodopsin [Felis catus] 2 | LYTSLHGYFVFGPTGC 3 | 4 | >gi|3024260|sp|P56514.1|OPSD_BUFBU RecName: Full=Rhodopsin 5 | TTLCCGKNPFGEDDASSAATSKTEASSVSSSQ 6 | 7 | >gi|283855846|gb|ADB45242.1| rhodopsin [Cynopterus brachyotis] 8 | VPFSNKTGVVRSPFEH 9 | 10 | >gi|283855823|gb|ADB45229.1| rhodopsin [Myotis pilosus] 11 | VPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPIN 12 | 13 | >gi|223523|prf||0811197A rhodopsin [Bos taurus] 14 | MNGTEGPNFYVPFSNKTGVVRSPFEAPQYY 15 | 16 | >gi|12583665|dbj|BAB21486.1| fresh water form rod opsin [Conger myriaster] 17 | MNGTEGPN 18 | -------------------------------------------------------------------------------- /test-data/rhodopsin_proteins.fasta: -------------------------------------------------------------------------------- 1 | >gi|57163783|ref|NP_001009242.1| rhodopsin [Felis catus] 2 | MNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRT 3 | PLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVC 4 | KPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVV 5 | HFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQG 6 | SNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA 7 | 8 | >gi|3024260|sp|P56514.1|OPSD_BUFBU RecName: Full=Rhodopsin 9 | MNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRT 10 | PLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVC 11 | KPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVV 12 | HFTIPLIIIFFCYGRLVCTVKEAAAQQQESATTQKAEKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQG 13 | SEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDDASSAATSKTEASSVSSSQ 14 | VSPA 15 | 16 | >gi|283855846|gb|ADB45242.1| rhodopsin [Cynopterus brachyotis] 17 | VPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLA 18 | VADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGE 19 | NHAIMGLALTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIF 20 | FCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTL 21 | PAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS 22 | 23 | >gi|283855823|gb|ADB45229.1| rhodopsin [Myotis pilosus] 24 | VPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLA 25 | VANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGE 26 | NHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIF 27 | FCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTI 28 | PAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS 29 | 30 | >gi|223523|prf||0811197A rhodopsin [Bos taurus] 31 | MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRT 32 | PLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVC 33 | KPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYTPHEETNNESFVIYMFVVH 34 | FIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGS 35 | DFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA 36 | 37 | >gi|12583665|dbj|BAB21486.1| fresh water form rod opsin [Conger myriaster] 38 | MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRT 39 | PLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVC 40 | KPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTC 41 | HFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQG 42 | STFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDGASATSSKTEASSVSSSS 43 | VSPA 44 | -------------------------------------------------------------------------------- /test-data/segmasker_four_human.fasta: -------------------------------------------------------------------------------- 1 | >sp|Q9BS26|ERP44_HUMAN Endoplasmic reticulum resident protein 44 OS=Homo sapiens GN=ERP44 PE=1 SV=1 2 | MHPAVFlslpdlrcsllllVTWVFTPVTTEITSLDTENIDEILNNADVALVNFYADWCRF 3 | SQMLHPIFEEASDVIKEEFPNENQVVFARVDCDQHSDIAQRYRISKYPTLKLFRNGMMMK 4 | REYRGQRSVKALADYIRQQKSDPIQEIRDLAEITTLDRSKRNIIGYFEQKDSDNYRVFER 5 | VANILHDDCAFLSAFGDVSKPERYSGDNIIYKPPGHSAPDMVYLGAMTNFDVTYNWIQDK 6 | CVPLVREITFENGEELTEEGLPFLILFHMKEDTESLEIFQNEVARQLISEKGTINFLHAD 7 | CDKFRHPLLHIQKTPADCPVIAIDSFRHMYVFGDFKDVLIPGKLKQFVFDLHSGKLHREF 8 | HHGPDPTDTAPGEQAQDVASSPPESSFQKLAPSEYRYTLLRDRDEL 9 | >sp|Q9NSY1|BMP2K_HUMAN BMP-2-inducible protein kinase OS=Homo sapiens GN=BMP2K PE=1 SV=2 10 | MKKFSRMPKSEggsgggaagggaggagagagcgsggssvgvrvfavgRHQVTLEESLAEG 11 | GFSTVFLVRTHGGIRCALKRMYVNNMPDLNVCKREITIMKELSGHKNIVGYLDCAVNSIS 12 | DNVWEVLILMEYCRAGQVVNQMNKKLQTGFTEPEVLQIFCDTCEAVARLHQCKTPIIHRD 13 | LKVENILLNDGGNYVLCDFGSATNKFLNPQKDGVNVVEEEIKKYTTLSYRAPEMINLYGG 14 | KPITTKADIWALGCLLYKLCFFTLPFGESQVAICDGNFTIPDNSRYSRNIHCLIRFMLEP 15 | DPEHRPDIFQVSYFAFKFAKKDCPVsninnssiPSALPEPMTASEAAARKSQIKARITDT 16 | IGPTETSIAPRQRPKANSATTATPSVLTIQSSATPVKVLAPGEFGNHRPKGALRPGNGPE 17 | Illgqgppqqppqqhrvlqqlqqgdwrlqqlhlqhrhphqqqqqqqqqqqqqqqqqqqqq 18 | qqqqqqhhhhhhhhllqDAYMqqyqhatqqqqmlqqqFLMHSVYQPQPSASQYPTMmpqy 19 | qqaffqqqmlaqhqpsqqqASPEYLTSPQEFSPALVSYTSSLPAQVGTIMDSSYSANRSV 20 | ADKEAIANFTNQKNISNPPDMSGWNPFGEDNFSKLTeeelldrefdllrSNRLEERASSD 21 | KNVDSLSAPHNHPPEDPFGSVPFISHSGSPEKKAEHSSINQENGTANPIKNGKTSPASKD 22 | QRTGKKTSVQGQVQKGNdesesdfesdppspksseeeeqddeeVLQGEQGDFNDDDTEPE 23 | NLGHRPLLMdsedeeeeekhssdsdyeQAKAKYSDMSSVYRDRSGSGPTQDLNTILLTSA 24 | QLSSDVAVETPKQEFDVFGAVPFFAVRAQQPQQEKNEKNLPQHRFPAAGLEQEEFDVFTK 25 | APFSKKVNVQECHAVGPEAHTIPGYPKSVDVFGSTPFQPFLTSTSKSESNEDLFGLVPFD 26 | EITGSQQQKVkqrslqklssrqrrTKQDMSKSNGKRHHGtptstkktlkptYRTPERARR 27 | HKKVGRRDSQSSNEFLTISDSKENISVALTDGKDRGNVLQPEESLLDPFGAKPFHSPDLS 28 | WHPPHQGLSDIRADHNTVLPGRPRQNSLHGSFHSADVLKMDDFGAVPFTELVVQSITPHQ 29 | SQQSQPVELDPFGAAPFPSKQ 30 | >sp|P06213|INSR_HUMAN Insulin receptor OS=Homo sapiens GN=INSR PE=1 SV=4 31 | MATggrrgaaaapllvavaalllgaagHLYPGEVCPGMDIRNNLTRLHELENCSVIEGHL 32 | QILLMFKTRPEDFRDLSFPKLIMITDYLLLFRVYGLESLKDLFPNLTVIRGSRLFFNYAL 33 | VIFEMVHLKELGLYNLMNITRGSVRIEKNNELCYLATIDWSRILDSVEDNYIVLNKDDNE 34 | ECGDICPGTAKGKTNCPATVINGQFVERCWTHSHCQKVCPTICKSHGCTAEGLCCHSECL 35 | GNCSQPDDPTKCVACRNFYLDGRCVETCPPPYYHFQDWRCVNFSFCQDLHHKCKNSRRQG 36 | CHQYVIHNNKCIPECPSGYTMNSSNLLCTPCLGPCPKVCHLLEGEKTIDSVTSAQELRGC 37 | TVINGSLIINIRggnnlaaeleanlglieeiSGYLKIRRSYALVSLSFFRKLRLIRGETL 38 | EIGNYSFYALDNQNLRQLWDWSKHNLTITQGKLFFHYNPKLCLSEIHKMEEVSGTKGRQE 39 | RNDIALKTNGDQASCENELLKFSYIRTSFDKILLRWEPYWPPDFRDLLGFMLFYKEAPYQ 40 | NVTEFDGQDACGSNSWTVVDIDPPLRSNDPKSQNHPGWLMRGLKPWTQYAIFVKTLVTFS 41 | DERRTYGAKSDIIYVQTDATNPSVPLDPISVSNSSSQIILKWKPPSDPNGNITHYLVFWE 42 | RQAEDSELFELDYCLKGLKLPSRTWSPPFESEDSQKHNQSEYEDSAGECCSCPKTDSQIL 43 | KELEESSFRKTFEDYLHNVVFVPRKTSSGTGAEDPRPSRKRRSLGDvgnvtvavptvaaf 44 | pntsstsvptspEEHRPFEKVVNKESLVISGLRHFTGYRIELQACNQDTPEERCSVAAYV 45 | SARTMPEAKADDIVGPVTHEIFENNVVHLMWQEPKEPNGLIVLYEVSYRRYGDEELHLCV 46 | SRKHFALERGCRLRGLSPGNYSVRIRATSLAGNGSWTEPTYFYVTDYLDVPSNIAKIIIG 47 | PLIFVFLFSVVIGSIYLFLRKRQPDGPLGPLYASSNPEYLSASDVFPCSVYVPDEWEVSR 48 | EKITLLRELGQGSFGMVYEGNARDIIKGEAETRVAVKTVNESASLRERIEFLNEASVMKG 49 | FTCHHVVRLLGVVSKGQPTLVVMELMAHGDLKSYLRSLRPEAENNPGRPPPTLQEMIQMA 50 | AEIADGMAYLNAKKFVHRDLAARNCMVAHDFTVKIGDFGMTRDIYETDYYRKGGKGLLPV 51 | RWMAPESLKDGVFTTSSDMWSFGVVLWEITSLAEQPYQGLSNEQVLKFVMDGGYLDQPDN 52 | CPERVTDLMRMCWQFNPKMRPTFLEIVNLLKDDLHPSFPEVSFFHSEENKAPeseeleme 53 | fedmeNVPLDRSSHCQREEAGGRDGGSSLGFKRSYEEHIPYTHMNGGKKNGRILTLPRSN 54 | PS 55 | >sp|P08100|OPSD_HUMAN Rhodopsin OS=Homo sapiens GN=RHO PE=1 SV=1 56 | MNGTEGPNFYVPFSNATGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLY 57 | VTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEGFFATLG 58 | GEIALWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLAGWSRYIP 59 | EGLQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIIIFFCYGQLVFTVkeaaaqqqes 60 | attqkaeKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAI 61 | YNPVIYIMMNKQFRNCMLTTICCGKNPLGDDEASATVSKTETSQVAPA 62 | -------------------------------------------------------------------------------- /test-data/segmasker_four_human.maskinfo-asn1: -------------------------------------------------------------------------------- 1 | Blast-db-mask-info ::= { 2 | algo-id 1, 3 | algo-program seg, 4 | algo-options "window=12; locut=2.2; hicut=2.5", 5 | masks { 6 | masks { 7 | int { 8 | from 6, 9 | to 18, 10 | id local id 1 11 | }, 12 | packed-int { 13 | { 14 | from 11, 15 | to 46, 16 | id local id 2 17 | }, 18 | { 19 | from 325, 20 | to 332, 21 | id local id 2 22 | }, 23 | { 24 | from 421, 25 | to 443, 26 | id local id 2 27 | }, 28 | { 29 | from 437, 30 | to 450, 31 | id local id 2 32 | }, 33 | { 34 | from 447, 35 | to 496, 36 | id local id 2 37 | }, 38 | { 39 | from 501, 40 | to 516, 41 | id local id 2 42 | }, 43 | { 44 | from 536, 45 | to 554, 46 | id local id 2 47 | }, 48 | { 49 | from 545, 50 | to 558, 51 | id local id 2 52 | }, 53 | { 54 | from 636, 55 | to 648, 56 | id local id 2 57 | }, 58 | { 59 | from 737, 60 | to 762, 61 | id local id 2 62 | }, 63 | { 64 | from 789, 65 | to 806, 66 | id local id 2 67 | }, 68 | { 69 | from 970, 70 | to 983, 71 | id local id 2 72 | }, 73 | { 74 | from 999, 75 | to 1010, 76 | id local id 2 77 | } 78 | }, 79 | packed-int { 80 | { 81 | from 3, 82 | to 26, 83 | id local id 3 84 | }, 85 | { 86 | from 372, 87 | to 390, 88 | id local id 3 89 | }, 90 | { 91 | from 766, 92 | to 782, 93 | id local id 3 94 | }, 95 | { 96 | from 780, 97 | to 791, 98 | id local id 3 99 | }, 100 | { 101 | from 1312, 102 | to 1324, 103 | id local id 3 104 | } 105 | }, 106 | int { 107 | from 230, 108 | to 246, 109 | id local id 4 110 | } 111 | }, 112 | more FALSE 113 | } 114 | } 115 | -------------------------------------------------------------------------------- /test-data/segmasker_four_human.maskinfo-asn1-binary: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/peterjc/galaxy_blast/75b5762aa9699a0b3f97a67158111599a2cb6f59/test-data/segmasker_four_human.maskinfo-asn1-binary -------------------------------------------------------------------------------- /test-data/tblastn_four_human_vs_rhodopsin.tabular: -------------------------------------------------------------------------------- 1 | sp|P08100|OPSD_HUMAN gi|57163782|ref|NM_001009242.1| 96.552 348 12 0 1 348 1 1044 0.0 732 2 | sp|P08100|OPSD_HUMAN gi|18148870|dbj|AB062417.1| 93.391 348 23 0 1 348 1 1044 0.0 711 3 | sp|P08100|OPSD_HUMAN gi|283855822|gb|GQ290312.1| 95.092 326 16 0 11 336 1 978 0.0 682 4 | sp|P08100|OPSD_HUMAN gi|2734705|gb|U59921.1|BBU59921 84.795 342 51 1 1 341 42 1067 0.0 646 5 | sp|P08100|OPSD_HUMAN gi|12583664|dbj|AB043817.1| 82.164 342 60 1 1 341 23 1048 0.0 626 6 | sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 93.243 74 5 0 239 312 3147 3368 1.34e-71 151 7 | sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 91.525 59 5 0 177 235 2855 3031 1.34e-71 126 8 | sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 96.396 111 4 0 11 121 1 333 3.31e-67 229 9 | sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 93.220 59 4 0 119 177 1404 1580 2.31e-32 122 10 | sp|P08100|OPSD_HUMAN gi|283855845|gb|GQ290303.1| 88.462 26 3 0 312 337 4222 4299 3.61e-12 57.7 11 | -------------------------------------------------------------------------------- /test-data/tblastn_four_human_vs_rhodopsin_deflines.tabular: -------------------------------------------------------------------------------- 1 | P08100 NM_001009242.1 96.552 348 12 0 1 348 1 1044 0.0 732 2 | P08100 AB062417.1 93.391 348 23 0 1 348 1 1044 0.0 711 3 | P08100 GQ290312.1 95.092 326 16 0 11 336 1 978 0.0 682 4 | P08100 U59921.1 84.795 342 51 1 1 341 42 1067 0.0 646 5 | P08100 AB043817.1 82.164 342 60 1 1 341 23 1048 0.0 626 6 | P08100 GQ290303.1 93.243 74 5 0 239 312 3147 3368 1.34e-71 151 7 | P08100 GQ290303.1 91.525 59 5 0 177 235 2855 3031 1.34e-71 126 8 | P08100 GQ290303.1 96.396 111 4 0 11 121 1 333 3.31e-67 229 9 | P08100 GQ290303.1 93.220 59 4 0 119 177 1404 1580 2.31e-32 122 10 | P08100 GQ290303.1 88.462 26 3 0 312 337 4222 4299 3.61e-12 57.7 11 | -------------------------------------------------------------------------------- /test-data/three_human_mRNA.dbinfo.txt: -------------------------------------------------------------------------------- 1 | Database: Just 3 human mRNA sequences 2 | 3 sequences; 10,732 total bases 3 | 4 | -------------------------------------------------------------------------------- /test-data/three_human_mRNA.fasta.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/peterjc/galaxy_blast/75b5762aa9699a0b3f97a67158111599a2cb6f59/test-data/three_human_mRNA.fasta.gz -------------------------------------------------------------------------------- /test-data/three_human_mRNA.fasta.log.txt: -------------------------------------------------------------------------------- 1 | New DB title: Just 3 human mRNA sequences 2 | Sequence type: Nucleotide 3 | -------------------------------------------------------------------------------- /test-data/three_human_mRNA.fasta.nhd: -------------------------------------------------------------------------------- 1 | 12956943350 2 | 13082197871 3 | 19180330422 4 | -------------------------------------------------------------------------------- /test-data/three_human_mRNA.fasta.nhi: -------------------------------------------------------------------------------- 1 | '@'4A12956943350 -------------------------------------------------------------------------------- /test-data/three_human_mRNA.fasta.nhr: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/peterjc/galaxy_blast/75b5762aa9699a0b3f97a67158111599a2cb6f59/test-data/three_human_mRNA.fasta.nhr -------------------------------------------------------------------------------- /test-data/three_human_mRNA.fasta.nin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/peterjc/galaxy_blast/75b5762aa9699a0b3f97a67158111599a2cb6f59/test-data/three_human_mRNA.fasta.nin -------------------------------------------------------------------------------- /test-data/three_human_mRNA.fasta.nog: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/peterjc/galaxy_blast/75b5762aa9699a0b3f97a67158111599a2cb6f59/test-data/three_human_mRNA.fasta.nog -------------------------------------------------------------------------------- /test-data/three_human_mRNA.fasta.nsd: -------------------------------------------------------------------------------- 1 | gnl|bl_ord_id|00 2 | gnl|bl_ord_id|11 3 | gnl|bl_ord_id|22 4 | -------------------------------------------------------------------------------- /test-data/three_human_mRNA.fasta.nsi: -------------------------------------------------------------------------------- 1 | 6@64Fgnl|bl_ord_id|00 -------------------------------------------------------------------------------- /test-data/three_human_mRNA.fasta.nsq: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/peterjc/galaxy_blast/75b5762aa9699a0b3f97a67158111599a2cb6f59/test-data/three_human_mRNA.fasta.nsq -------------------------------------------------------------------------------- /test-data/three_human_mRNA_and_rhodopsin_nucs.dbinfo.txt: -------------------------------------------------------------------------------- 1 | Database: Just 3 human mRNA sequences; Rhodopsin nucleotides 2 | 9 sequences; 21,028 total bases 3 | 4 | -------------------------------------------------------------------------------- /test-data/tool_data_table_conf.xml.test: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | value, name, path 5 | 6 |
7 | 8 | value, name, path 9 | 10 |
11 | 12 | value, name, path 13 | 14 |
15 | 16 | value, dbkey, name, path 17 | 18 |
19 |
20 | -------------------------------------------------------------------------------- /tool-data/README.rst: -------------------------------------------------------------------------------- 1 | Galaxy Tool sample data 2 | ======================= 3 | 4 | This folder contains sample files used by Galaxy tools defined elsewhere 5 | in this repository. For example, ``blastdb_p.loc.sample`` is a sample 6 | file used to generate the default ``blastdb_p.loc`` file describing any 7 | system-level protein BLAST databases available within Galaxy. 8 | 9 | For general information, see the `main README file <../README.rst>`_. 10 | -------------------------------------------------------------------------------- /tool-data/all_fasta.loc.sample: -------------------------------------------------------------------------------- 1 | #This file lists the locations and dbkeys of all the fasta files 2 | #under the "genome" directory (a directory that contains a directory 3 | #for each build). The script extract_fasta.py will generate the file 4 | #all_fasta.loc. This file has the format (white space characters are 5 | #TAB characters): 6 | # 7 | # 8 | # 9 | #So, all_fasta.loc could look something like this: 10 | # 11 | #apiMel3 apiMel3 Honeybee (Apis mellifera): apiMel3 /path/to/genome/apiMel3/apiMel3.fa 12 | #hg19canon hg19 Human (Homo sapiens): hg19 Canonical /path/to/genome/hg19/hg19canon.fa 13 | #hg19full hg19 Human (Homo sapiens): hg19 Full /path/to/genome/hg19/hg19full.fa 14 | # 15 | #Your all_fasta.loc file should contain an entry for each individual 16 | #fasta file. So there will be multiple fasta files for each build, 17 | #such as with hg19 above. 18 | # 19 | -------------------------------------------------------------------------------- /tool-data/blast2go.loc.sample: -------------------------------------------------------------------------------- 1 | # This is a three column tab separated file to define the properties 2 | # file (settings) to be offered for Blast2GO for Pipelines (b2g4pipe). 3 | # 4 | # Column 1 - ID, string that Galaxy will save in its database 5 | # Column 2 - Human readable name, Galaxy will show this in the UI 6 | # Column 3 - Filename, Galaxy will use this when calling the tool 7 | # 8 | # Probably the most important setting in the properties file is the 9 | # Blast2GO database to use. Currently b2g4pipe v2.5 ships with an 10 | # old configuration so consult http://www.blast2go.com for the latest 11 | # public database they host in Spain (or find this by running the GUI 12 | # version of Blast2GO via Java Web Start under the menu entry "Tools", 13 | # "General Settings", "DataAccess setting"). We also strongly recommend 14 | # configuring a local Blast2GO database. 15 | # 16 | # The property filenames can be fully qualified paths like 17 | # /opt/b2g4pipe/Spain_2012_August.properties or provided they are 18 | # in the same folder as the Blast2GO JAR file, just the filename 19 | # like Spain_2012_August.properties instead. This is intended to 20 | # make migrating between future versions of Blast2GO easier (as the 21 | # property files change between versions), and simpler overall. 22 | # 23 | #Local_2011_May Local database (May 2011) Local_2011_May.properties 24 | #Spain_2010_May Database in Spain (May 2010) Spain_2010_May.properties 25 | Spain_2012_August Database in Spain (August 2012) Spain_2012_August.properties 26 | Spain_2011_June Database in Spain (June 2011) Spain_2011_June.properties 27 | #default Default settings b2gPipe.properties 28 | -------------------------------------------------------------------------------- /tool-data/blastdb.loc.sample: -------------------------------------------------------------------------------- 1 | # This is a sample file distributed with Galaxy that is used to define a 2 | # list of nucleotide BLAST databases, using three columns tab separated: 3 | # 4 | # {tab}{tab} 5 | # 6 | # The captions typically contain spaces and might end with the build date. 7 | # It is important that the actual database name does not have a space in 8 | # it, and that there are only two tabs on each line. 9 | # 10 | # You can download the NCBI provided protein databases like NR from here: 11 | # ftp://ftp.ncbi.nlm.nih.gov/blast/db/ 12 | # 13 | # For simplicity, many Galaxy servers are configured to offer just a live 14 | # version of each NCBI BLAST database (updated with the NCBI provided 15 | # Perl scripts or similar). In this case, we recommend using the case 16 | # sensistive base-name of the NCBI BLAST databases as the unique id. 17 | # Consistent naming is important for sharing workflows between Galaxy 18 | # servers. 19 | # 20 | # For example, consider the NCBI partially non-redundant nucleotide 21 | # nt BLAST database, where you have downloaded and decompressed the 22 | # files under /data/blastdb/ meaning at the command line BLAST+ would 23 | # would look at the files /data/blastdb/nt.n* when run with: 24 | # 25 | # $ blastn -db /data/blastdb/nt -query ... 26 | # 27 | # In this case use nr (lower case to match the NCBI file naming) as the 28 | # unique id in the first column of blastdb_p.loc, giving an entry like 29 | # this: 30 | # 31 | # nt{tab}NCBI partially non-redundant (nt){tab}/data/blastdb/nt 32 | # 33 | # Alternatively, rather than a "live" mirror of the NCBI databases which 34 | # are updated automatically, for full reproducibility the Galaxy Team 35 | # recommend saving date-stamped copies of the databases. In this case 36 | # your blastdb.loc file should include an entry per line for each 37 | # version you have stored. For example: 38 | # 39 | # nt_05Jun2010{tab}NCBI nt (partially non-redundant) 05 Jun 2010{tab}/data/blastdb/05Jun2010/nt 40 | # nt_15Aug2010{tab}NCBI nt (partially non-redundant) 15 Aug 2010{tab}/data/blastdb/15Aug2010/nt 41 | # ...etc... 42 | # 43 | # See also blastdb_p.loc which is for any protein BLAST database, and 44 | # blastdb_d.loc which is for any protein domains databases (like CDD). 45 | -------------------------------------------------------------------------------- /tool-data/blastdb_d.loc.sample: -------------------------------------------------------------------------------- 1 | # This is a sample file distributed with Galaxy that is used to define a 2 | # list of protein domain databases, using three columns tab separated 3 | # (longer whitespace are TAB characters): 4 | # 5 | # {tab}{tab} 6 | # 7 | # The captions typically contain spaces and might end with the build date. 8 | # It is important that the actual database name does not have a space in 9 | # it, and that there are only two tabs on each line. 10 | # 11 | # You can download the NCBI provided databases as tar-balls from here: 12 | # ftp://ftp.ncbi.nih.gov/pub/mmdb/cdd/little_endian/ 13 | # 14 | # For simplicity, many Galaxy servers are configured to offer just a live 15 | # version of each NCBI BLAST database (updated with the NCBI provided 16 | # Perl scripts or similar). In this case, we recommend using the case 17 | # sensistive base-name of the NCBI BLAST databases as the unique id. 18 | # Consistent naming is important for sharing workflows between Galaxy 19 | # servers. 20 | # 21 | # For example, consider the NCBI Conserved Domains Database (CDD), where 22 | # you have downloaded and decompressed the files under the directory 23 | # /data/blastdb/domains/ meaning at the command line BLAST+ would be 24 | # run as follows any would look at the files /data/blastdb/domains/Cdd.*: 25 | # 26 | # $ rpsblast -db /data/blastdb/domains/Cdd -query ... 27 | # 28 | # In this case use Cdd (title case to match the NCBI file naming) as the 29 | # unique id in the first column of blastdb_d.loc, giving an entry like 30 | # this: 31 | # 32 | # Cdd{tab}NCBI Conserved Domains Database (CDD){tab}/data/blastdb/domains/Cdd 33 | # 34 | # Your blastdb_d.loc file should include an entry per line for each "base name" 35 | # you have stored. For example: 36 | # 37 | # Cdd{tab}NCBI CDD{tab}/data/blastdb/domains/Cdd 38 | # Kog{tab}KOG (eukaryotes){tab}/data/blastdb/domains/Kog 39 | # Cog{tab}COG (prokaryotes){tab}/data/blastdb/domains/Cog 40 | # Pfam{tab}Pfam-A{tab}/data/blastdb/domains/Pfam 41 | # Smart{tab}SMART{tab}/data/blastdb/domains/Smart 42 | # Tigr{tab}TIGR /data/blastdb/domains/Tigr 43 | # Prk{tab}Protein Clusters database{tab}/data/blastdb/domains/Prk 44 | # ...etc... 45 | # 46 | # Alternatively, rather than a "live" mirror of the NCBI databases which 47 | # are updated automatically, for full reproducibility the Galaxy Team 48 | # recommend saving date-stamped copies of the databases. In this case 49 | # your blastdb_d.loc file should include an entry per line for each 50 | # version you have stored. For example: 51 | # 52 | # Cdd_05Jun2010{tab}NCBI CDD 05 Jun 2010{tab}/data/blastdb/domains/05Jun2010/Cdd 53 | # Cdd_15Aug2010{tab}NCBI CDD 15 Aug 2010{tab}/data/blastdb/domains/15Aug2010/Cdd 54 | # ...etc... 55 | # 56 | # See also blastdb.loc which is for any nucleotide BLAST database, and 57 | # blastdb_p.loc which is for any protein BLAST databases. 58 | -------------------------------------------------------------------------------- /tool-data/blastdb_p.loc.sample: -------------------------------------------------------------------------------- 1 | # This is a sample file distributed with Galaxy that is used to define a 2 | # list of protein BLAST databases, using three columns tab separated: 3 | # 4 | # {tab}{tab} 5 | # 6 | # The captions typically contain spaces and might end with the build date. 7 | # It is important that the actual database name does not have a space in 8 | # it, and that there are only two tabs on each line. 9 | # 10 | # You can download the NCBI provided protein databases like NR from here: 11 | # ftp://ftp.ncbi.nlm.nih.gov/blast/db/ 12 | # 13 | # For simplicity, many Galaxy servers are configured to offer just a live 14 | # version of each NCBI BLAST database (updated with the NCBI provided 15 | # Perl scripts or similar). In this case, we recommend using the case 16 | # sensistive base-name of the NCBI BLAST databases as the unique id. 17 | # Consistent naming is important for sharing workflows between Galaxy 18 | # servers. 19 | # 20 | # For example, consider the NCBI "non-redundant" protein BLAST database 21 | # where you have downloaded and decompressed the files under /data/blastdb/ 22 | # meaning at the command line BLAST+ would be run with something like 23 | # which would look at the files /data/blastdb/nr.p*: 24 | # 25 | # $ blastp -db /data/blastdb/nr -query ... 26 | # 27 | # In this case use nr (lower case to match the NCBI file naming) as the 28 | # unique id in the first column of blastdb_p.loc, giving an entry like 29 | # this: 30 | # 31 | # nr{tab}NCBI non-redundant (nr){tab}/data/blastdb/nr 32 | # 33 | # Alternatively, rather than a "live" mirror of the NCBI databases which 34 | # are updated automatically, for full reproducibility the Galaxy Team 35 | # recommend saving date-stamped copies of the databases. In this case 36 | # your blastdb_p.loc file should include an entry per line for each 37 | # version you have stored. For example: 38 | # 39 | # nr_05Jun2010{tab}NCBI NR (non redundant) 05 Jun 2010{tab}/data/blastdb/05Jun2010/nr 40 | # nr_15Aug2010{tab}NCBI NR (non redundant) 15 Aug 2010{tab}/data/blastdb/15Aug2010/nr 41 | # ...etc... 42 | # 43 | # See also blastdb.loc which is for any nucleotide BLAST database, and 44 | # blastdb_d.loc which is for any protein domains databases (like CDD). 45 | -------------------------------------------------------------------------------- /tool-data/tool_data_table_conf.xml.sample: -------------------------------------------------------------------------------- 1 | 2 | 3 | value, name, path 4 | 5 |
6 | 7 | value, name, path 8 | 9 |
10 | 11 | value, name, path 12 | 13 |
14 | 15 | value, dbkey, name, path 16 | 17 |
18 |
19 | -------------------------------------------------------------------------------- /tools/README.rst: -------------------------------------------------------------------------------- 1 | Galaxy Tool definitions 2 | ======================= 3 | 4 | Each sub-folder represents a different entry on the Galaxy Tool Shed, 5 | for example ``ncbi_blast_plus`` contains wrappers for the BLAST+ suite. 6 | 7 | For general information, see the `main README file <../README.rst>`_. 8 | -------------------------------------------------------------------------------- /tools/blast2go/.shed.yml: -------------------------------------------------------------------------------- 1 | name: blast2go 2 | owner: peterjc 3 | homepage_url: https://github.com/peterjc/galaxy_blast/tree/master/tools/blast2go 4 | remote_repository_url: https://github.com/peterjc/galaxy_blast/tree/master/tools/blast2go 5 | description: Maps BLAST results to GO annotation terms 6 | long_description: | 7 | Galaxy wrapper for Java command line tool Blast2GO for pipelines, b2g4pipe v2.5, 8 | available from http://blast2go.org/ 9 | 10 | The tool takes a single BLAST XML file as input, searched against a protein database 11 | such as the NCBI non redundant database (NR). The tool gives a single tabular output 12 | file, the annotation file which can be opened with the Blast2GO GUI. 13 | 14 | The wrapper uses a Galaxy loc file to allow the use of one or more Blast2GO property 15 | files (e.g. different versions of the database, or different servers, or different 16 | evidence weighting settings). We use this to offer both a local Blast2GO database 17 | (fast) and the public database hosted in Valencia, Spain. 18 | categories: 19 | - Ontology Manipulation 20 | - Sequence Analysis 21 | type: unrestricted 22 | include: 23 | - strip_components: 2 24 | source: 25 | - ../../test-data/blastp_sample.blast2go.tabular 26 | - ../../test-data/blastp_sample.xml 27 | - ../../tool-data/blast2go.loc.sample 28 | - ../../tools/blast2go/README.rst 29 | - ../../tools/blast2go/blast2go.py 30 | - ../../tools/blast2go/blast2go.xml 31 | - ../../tools/blast2go/massage_xml_for_blast2go.py 32 | -------------------------------------------------------------------------------- /tools/blast2go/go_categorize.py: -------------------------------------------------------------------------------- 1 | """Categorize GO terms.""" 2 | 3 | from __future__ import print_function 4 | 5 | import gzip 6 | import sys 7 | 8 | _gzip_magic = "\x1f\x8b" 9 | 10 | 11 | def gzip_open(filename, mode="rb"): 12 | """Open a possibly gzipped file.""" 13 | assert mode == "rb", mode 14 | h = open(filename, "rb") 15 | magic = h.read(2) 16 | h.seek(0) 17 | if magic == _gzip_magic: 18 | h.close() 19 | sys.stderr.write("%s is gzipped\n" % filename) 20 | return gzip.open(filename, "rb") 21 | else: 22 | sys.stderr.write("%s isn't compressed\n" % filename) 23 | return h 24 | 25 | 26 | def get_term_class(go, alias, is_a): 27 | """Find the class (P, C or F) of the given GO term.""" 28 | x = alias.get(go, go) 29 | while x: 30 | if x in ["GO:0008150", "obsolete_biological_process"]: 31 | return "BP" 32 | elif x in ["GO:0005575", "obsolete_cellular_component"]: 33 | return "CC" 34 | elif x in ["GO:0003674", "obsolete_molecular_function"]: 35 | return "MF" 36 | try: 37 | x = is_a[x] 38 | except KeyError: 39 | return "??" 40 | 41 | 42 | def load_go_mapping(rdf_xml): 43 | """Quick and dirty GO RDF-XML parser.""" 44 | sys.stderr.write("Loading %s\n" % rdf_xml) 45 | h = gzip_open(rdf_xml, "rb") 46 | 47 | names = dict() 48 | alias = dict() 49 | is_a = dict() 50 | 51 | go = None 52 | for line in h: 53 | # sys.stderr.write("... %r\n" % line) 54 | if "" in line: 55 | assert go is None, line 56 | go = line[line.find("") + 14 :] 57 | assert "" in line, line 58 | go = go[: go.find("")] 59 | elif "" in line: 60 | assert go is not None 61 | name = line[line.find("") + 9 :] 62 | assert "" in name, name 63 | name = name[: name.find("")] 64 | names[go] = name 65 | elif "GO:" in line: 66 | assert go is not None 67 | go2 = line[line.find("GO:") + 12 :] 68 | assert "" in line, line 69 | go2 = go2[: go2.find("")] 70 | alias[go2] = go 71 | elif ' 74 | thing = line[ 75 | line.find(' 87 | # or 90 | thing = line[ 91 | line.find('" in line: 97 | go = None 98 | h.close() 99 | sys.stderr.write( 100 | "%i names, %i aliases, %i parents\n" % (len(names), len(alias), len(is_a)) 101 | ) 102 | 103 | if "all" in names: 104 | del names["all"] 105 | 106 | for go in names: 107 | yield go, names[go], get_term_class(go, alias, is_a) 108 | 109 | 110 | for go, name, term_class in load_go_mapping(sys.argv[1]): 111 | print(go, term_class, name) 112 | -------------------------------------------------------------------------------- /tools/blast2go/massage_xml_for_blast2go.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """Script for reformatting Blast XML to suit Blast2GO. 3 | 4 | This script takes exactly two command line arguments: 5 | * Input BLAST XML filename 6 | * Output BLAST XML filename 7 | 8 | Sadly b2g4pipe (at least v2.3.5 to v2.5.0) cannot cope with current 9 | style large BLAST XML files (e.g. from BLAST 2.2.25+), so we reformat 10 | these to avoid it crashing with a Java heap space OutOfMemoryError. 11 | 12 | As part of this reformatting, we check for BLASTP or BLASTX output 13 | (otherwise raise an error), and print the query count. 14 | 15 | This script is called from my Galaxy wrapper for Blast2GO for pipelines, 16 | available from the Galaxy Tool Shed here: 17 | http://toolshed.g2.bx.psu.edu/view/peterjc/blast2go 18 | 19 | This script is under version control here: 20 | https://github.com/peterjc/galaxy_blast/tree/master/blast2go 21 | """ 22 | import os 23 | import sys 24 | 25 | 26 | def prepare_xml(original_xml, mangled_xml): 27 | """Reformat BLAST XML to suit Blast2GO. 28 | 29 | Blast2GO can't cope with 1000s of tags within a 30 | single tag, so instead split this into one 31 | full XML record per interation (i.e. per query). This gives 32 | a concatenated XML file mimicing old versions of BLAST. 33 | 34 | This also checks for BLASTP or BLASTX output, and outputs 35 | the number of queries. Galaxy will show this as "info". 36 | """ 37 | in_handle = open(original_xml) 38 | footer = " \n\n" 39 | header = "" 40 | while True: 41 | line = in_handle.readline() 42 | if not line: 43 | # No hits? 44 | sys.exit("Problem with XML file?") 45 | if line.strip() == "": 46 | break 47 | header += line 48 | 49 | if "blastx" in header: 50 | print("BLASTX output identified") 51 | elif "blastp" in header: 52 | print("BLASTP output identified") 53 | else: 54 | in_handle.close() 55 | sys.exit("Expect BLASTP or BLASTX output") 56 | 57 | out_handle = open(mangled_xml, "w") 58 | out_handle.write(header) 59 | out_handle.write(line) 60 | count = 1 61 | while True: 62 | line = in_handle.readline() 63 | if not line: 64 | break 65 | elif line.strip() == "": 66 | # Insert footer/header 67 | out_handle.write(footer) 68 | out_handle.write(header) 69 | count += 1 70 | out_handle.write(line) 71 | 72 | out_handle.close() 73 | in_handle.close() 74 | print("Input has %i queries" % count) 75 | 76 | 77 | if __name__ == "__main__": 78 | # Run the conversion... 79 | if len(sys.argv) != 3: 80 | sys.exit("Require two arguments: XML input filename, XML output filename") 81 | 82 | xml_file, out_xml_file = sys.argv[1:] 83 | 84 | if not os.path.isfile(xml_file): 85 | sys.exit("Input BLAST XML file not found: %s" % xml_file) 86 | 87 | prepare_xml(xml_file, out_xml_file) 88 | -------------------------------------------------------------------------------- /tools/blast2go/test-data: -------------------------------------------------------------------------------- 1 | ../../test-data -------------------------------------------------------------------------------- /tools/blast_rbh/.shed.yml: -------------------------------------------------------------------------------- 1 | name: blast_rbh 2 | owner: peterjc 3 | homepage_url: https://github.com/peterjc/galaxy_blast/tree/master/tools/blast_rbh 4 | remote_repository_url: https://github.com/peterjc/galaxy_blast/tree/master/tools/blast_rbh 5 | description: BLAST Reciprocal Best Hits (RBH) from two FASTA files 6 | long_description: | 7 | Builds BLAST databases and runs reciprocal searches, filters them, 8 | and then identifies and reports any reciprocal best hits (RBH). 9 | categories: 10 | - Fasta Manipulation 11 | - Sequence Analysis 12 | type: unrestricted 13 | include: 14 | - strip_components: 2 15 | source: 16 | - ../../test-data/four_human_proteins.fasta 17 | - ../../test-data/k12_edited_proteins.fasta 18 | - ../../test-data/k12_ten_proteins.fasta 19 | - ../../test-data/rbh_blastn_three_human_mRNA_vs_rhodopsin_nucs.tabular 20 | - ../../test-data/rbh_blastp_four_human_vs_rhodopsin_proteins.tabular 21 | - ../../test-data/rbh_blastp_k12.tabular 22 | - ../../test-data/rbh_blastp_k12_self.tabular 23 | - ../../test-data/rbh_megablast_rhodopsin_nucs_vs_three_human_mRNA.tabular 24 | - ../../test-data/rbh_none.tabular 25 | - ../../test-data/rbh_tblastx_rhodopsin_nucs_vs_three_human_mRNA.tabular 26 | - ../../test-data/rhodopsin_nucs.fasta 27 | - ../../test-data/rhodopsin_proteins.fasta 28 | - ../../test-data/three_human_mRNA.fasta 29 | - ../../tools/blast_rbh/README.rst 30 | - ../../tools/blast_rbh/best_hits.py 31 | - ../../tools/blast_rbh/blast_rbh.py 32 | - ../../tools/blast_rbh/blast_rbh.xml 33 | -------------------------------------------------------------------------------- /tools/blast_rbh/test-data: -------------------------------------------------------------------------------- 1 | ../../test-data -------------------------------------------------------------------------------- /tools/blast_rbh/update_tests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -e 3 | echo "This will update test files using the current version of BLAST+" 4 | 5 | if [ -f "tools/blast_rbh/update_tests.sh" ] 6 | then 7 | echo "Good, in the expected directory" 8 | else 9 | echo "ERROR. Run this from the GitHub repository root directory." 10 | exit 1 11 | fi 12 | 13 | cd test-data 14 | 15 | echo rbh_none.tabular 16 | ../tools/blast_rbh/blast_rbh.py rhodopsin_nucs.fasta three_human_mRNA.fasta -a nucl -t megablast -i 100 -c 100 -o rbh_none.tabular 17 | 18 | echo rbh_blastn_three_human_mRNA_vs_rhodopsin_nucs.tabular 19 | ../tools/blast_rbh/blast_rbh.py three_human_mRNA.fasta rhodopsin_nucs.fasta -a nucl -t blastn -i 0 -c 0 -o rbh_blastn_three_human_mRNA_vs_rhodopsin_nucs.tabular 20 | 21 | echo rbh_megablast_rhodopsin_nucs_vs_three_human_mRNA.tabular 22 | ../tools/blast_rbh/blast_rbh.py rhodopsin_nucs.fasta three_human_mRNA.fasta -a nucl -t megablast -i 0 -c 0 -o rbh_megablast_rhodopsin_nucs_vs_three_human_mRNA.tabular 23 | 24 | echo rbh_tblastx_rhodopsin_nucs_vs_three_human_mRNA.tabular 25 | ../tools/blast_rbh/blast_rbh.py rhodopsin_nucs.fasta three_human_mRNA.fasta -a nucl -t tblastx -i 0 -c 0 -o rbh_tblastx_rhodopsin_nucs_vs_three_human_mRNA.tabular 26 | 27 | echo rbh_blastp_four_human_vs_rhodopsin_proteins.tabular 28 | ../tools/blast_rbh/blast_rbh.py four_human_proteins.fasta rhodopsin_proteins.fasta -a prot -t blastp -i 0 -c 0 -o rbh_blastp_four_human_vs_rhodopsin_proteins.tabular 29 | 30 | echo rbh_blastp_k12.tabular 31 | ../tools/blast_rbh/blast_rbh.py k12_edited_proteins.fasta k12_ten_proteins.fasta -a prot -t blastp -i 0 -c 0 -o rbh_blastp_k12.tabular 32 | 33 | echo rbh_blastp_k12_self.tabular 34 | ../tools/blast_rbh/blast_rbh.py k12_edited_proteins.fasta k12_edited_proteins.fasta -a prot -t blastp-fast -i 80 -c 80 -o rbh_blastp_k12_self.tabular 35 | -------------------------------------------------------------------------------- /tools/blastxml_to_top_descr/.shed.yml: -------------------------------------------------------------------------------- 1 | name: blastxml_to_top_descr 2 | owner: peterjc 3 | homepage_url: https://github.com/peterjc/galaxy_blast/tree/master/tools/blastxml_to_top_descr 4 | remote_repository_url: https://github.com/peterjc/galaxy_blast/tree/master/tools/blastxml_to_top_descr 5 | description: Make table of top BLAST match descriptions 6 | long_description: | 7 | NCBI BLAST+ (and the older NCBI "legacy" BLAST) can output in a range of formats 8 | including text, tabular and a more detailed XML format. You can do a lot of things 9 | with tabular files in Galaxy (sorting, filtering, joins, etc), however until BLAST+ 10 | 2.2.28 the tabular output never included the hit descriptions (titles) found in 11 | the other output formats. 12 | 13 | This tool turns a BLAST XML file into a simple tabular file containing one row per 14 | query sequence, containing the query identifier and then the three (by default) 15 | top hit descriptions (i.e. the first three). If a query doesn''t have that many 16 | hits, then these entries are left blank. 17 | 18 | This tool can also be used with the tabular output from BLAST+ instead, provided 19 | the relevant columns are provided. The default settings will work with the default 20 | 25 column extended output from the BLAST+ tools wrapped in Galaxy. Note if a query 21 | has no hits, it does not appear in the BLAST tabular output. 22 | categories: 23 | - Convert Formats 24 | - Sequence Analysis 25 | - Text Manipulation 26 | type: unrestricted 27 | include: 28 | - strip_components: 2 29 | source: 30 | - ../../test-data/blastp_four_human_vs_rhodopsin.xml 31 | - ../../test-data/blastp_four_human_vs_rhodopsin_converted_ext.tabular 32 | - ../../test-data/blastp_four_human_vs_rhodopsin_top3.tabular 33 | - ../../test-data/blastp_four_human_vs_rhodopsin_top3_positive.tabular 34 | - ../../tools/blastxml_to_top_descr/README.rst 35 | - ../../tools/blastxml_to_top_descr/blastxml_to_top_descr.py 36 | - ../../tools/blastxml_to_top_descr/blastxml_to_top_descr.xml 37 | -------------------------------------------------------------------------------- /tools/blastxml_to_top_descr/test-data: -------------------------------------------------------------------------------- 1 | ../../test-data -------------------------------------------------------------------------------- /tools/make_nr/.shed.yml: -------------------------------------------------------------------------------- 1 | name: make_nr 2 | owner: peterjc 3 | homepage_url: https://github.com/peterjc/galaxy_blast/tree/master/tools/make_nr 4 | remote_repository_url: https://github.com/peterjc/galaxy_blast/tree/master/tools/make_nr 5 | description: Make a FASTA file non-redundant 6 | long_description: | 7 | Python script intended to be run prior to calling the NCBI BLAST+ 8 | command line tool ``makeblastdb`` or in other settings where you 9 | want to collapse duplicated sequences in a FASTA file to a single 10 | representative. 11 | categories: 12 | - Fasta Manipulation 13 | - Sequence Analysis 14 | type: unrestricted 15 | include: 16 | - strip_components: 2 17 | source: 18 | - ../../tools/make_nr/README.rst 19 | - ../../tools/make_nr/make_nr.py 20 | - ../../tools/make_nr/make_nr.xml 21 | - ../../test-data/duplicates.fasta 22 | - ../../test-data/duplicates.fasta.gz 23 | - ../../test-data/duplicates.nr.fasta 24 | - ../../test-data/more_duplicates.fasta 25 | - ../../test-data/deduplicate.nosortids.fasta 26 | - ../../test-data/deduplicate.sortids.fasta 27 | - ../../test-data/empty.fasta 28 | -------------------------------------------------------------------------------- /tools/make_nr/test-data: -------------------------------------------------------------------------------- 1 | ../../test-data -------------------------------------------------------------------------------- /tools/ncbi_blast_plus/.lint_skip: -------------------------------------------------------------------------------- 1 | # delta and psiblast miss tests 2 | TestsMissing 3 | -------------------------------------------------------------------------------- /tools/ncbi_blast_plus/check_no_duplicates.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """Check for duplicate sequence identifiers in FASTA files. 3 | 4 | This is run as a pre-check before makeblastdb, in order to avoid 5 | a regression bug in BLAST+ 2.2.28 which fails to catch this. See: 6 | http://blastedbio.blogspot.co.uk/2012/10/my-ids-not-good-enough-for-ncbi-blast.html 7 | 8 | This script takes one or more FASTA filenames as input, and 9 | will return a non-zero error if any duplicate identifiers 10 | are found. 11 | """ 12 | import gzip 13 | import os 14 | import sys 15 | 16 | 17 | if "-v" in sys.argv or "--version" in sys.argv: 18 | print("v0.0.23") 19 | sys.exit(0) 20 | 21 | identifiers = set() 22 | files = 0 23 | for filename in sys.argv[1:]: 24 | if not os.path.isfile(filename): 25 | sys.stderr.write("Missing FASTA file %r\n" % filename) 26 | sys.exit(2) 27 | files += 1 28 | 29 | with open(filename, "rb") as binary_handle: 30 | magic = binary_handle.read(2) 31 | if not magic: 32 | # Empty file, special case 33 | continue 34 | elif magic == b"\x1f\x8b": 35 | # Gzipped 36 | handle = gzip.open(filename, "rt") 37 | elif magic[0:1] == b">": 38 | # Not gzipped, shoudl be plain FASTA 39 | handle = open(filename, "r") 40 | 41 | for line in handle: 42 | if line.startswith(">"): 43 | # The split will also take care of the new line character, 44 | # e.g. ">test\n" and ">test description here\n" both give "test" 45 | seq_id = line[1:].split(None, 1)[0] 46 | if seq_id in identifiers: 47 | handle.close() 48 | sys.exit("Repeated identifiers, e.g. %r" % seq_id) 49 | identifiers.add(seq_id) 50 | handle.close() 51 | if not files: 52 | sys.stderr.write("No FASTA files given to check for duplicates\n") 53 | sys.exit(3) 54 | elif files == 1: 55 | print("%i sequences" % len(identifiers)) 56 | else: 57 | print("%i sequences in %i FASTA files" % (len(identifiers), files)) 58 | -------------------------------------------------------------------------------- /tools/ncbi_blast_plus/get_species_taxids.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | ncbi_macros.xml 5 | 6 | 7 | echo "@TOOL_VERSION@" 8 | &2 echo "could not find taxid for $name" && exit 1; 14 | else 15 | echo " $name -> \$taxid"; 16 | fi && 17 | get_species_taxids.sh -t "\$taxid" >> species_ids.txt && 18 | #end for 19 | #else 20 | #for taxid in $type_cond.ids.split(',') 21 | get_species_taxids.sh -t "$taxid" >> species_ids.txt && 22 | #end for 23 | #end if 24 | sort -n -u species_ids.txt > '$output' 25 | ]]> 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | [a-zA-Z ,]+$ 35 | 36 | 37 | 38 | 39 | [0-9,]+$ 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | **What it does** 94 | 95 | Returns a list of species taxids for a taxon. It relies on the get_species_taxids.sh script of the BLAST+ package https://www.ncbi.nlm.nih.gov/books/NBK546209/ 96 | 97 | 98 | 99 | 100 | -------------------------------------------------------------------------------- /tools/ncbi_blast_plus/ncbi_blastdbcmd_info.xml: -------------------------------------------------------------------------------- 1 | 2 | Show BLAST database information from blastdbcmd 3 | 4 | blastdbcmd 5 | ncbi_macros.xml 6 | 7 | 8 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | **What it does** 44 | 45 | Calls the NCBI BLAST+ blastdbcmd command line tool with the -info 46 | switch to give summary information about a BLAST database, such as 47 | the size (number of sequences and total length) and date. 48 | 49 | 50 | 51 | 52 | -------------------------------------------------------------------------------- /tools/ncbi_blast_plus/ncbi_convert2blastmask_wrapper.xml: -------------------------------------------------------------------------------- 1 | 2 | Convert masking information in lower-case masked FASTA input to file formats suitable for makeblastdb 3 | 4 | convert2blastmask 5 | ncbi_macros.xml 6 | 7 | 8 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | **What it does** 72 | 73 | Convert masking information in lower-case masked FASTA input to file formats suitable for makeblastdb. 74 | 75 | More information about segmasker can be found in the `BLAST Command Line Applications User Manual`_. 76 | 77 | .. _BLAST Command Line Applications User Manual: https://www.ncbi.nlm.nih.gov/books/NBK279690/ 78 | 79 | 80 | 81 | 82 | -------------------------------------------------------------------------------- /tools/ncbi_blast_plus/ncbi_rpsblast_wrapper.xml: -------------------------------------------------------------------------------- 1 | 2 | Search protein domain database (PSSMs) with protein query sequence(s) 3 | 4 | deltablast 5 | ncbi_macros.xml 6 | 7 | 8 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | @SEARCH_TIME_WARNING@ 69 | 70 | **What it does** 71 | 72 | Search a *protein domain database* using a *protein query*, 73 | using the NCBI BLAST+ rpsblast command line tool. 74 | 75 | The protein domain databases use position-specific scoring matrices 76 | (PSSMs) and are available for a number of domain collections including: 77 | 78 | *CDD* - NCBI curarated meta-collection of domains, see 79 | https://www.ncbi.nlm.nih.gov/Structure/cdd/cdd_help.shtml#NCBI_curated_domains 80 | 81 | *Kog* - PSSMs from automatically aligned sequences and sequence 82 | fragments classified in the KOGs resource, the eukaryotic 83 | counterpart to COGs, see https://www.ncbi.nlm.nih.gov/COG/ 84 | 85 | *Cog* - PSSMs from automatically aligned sequences and sequence 86 | fragments classified in the COGs resource, which focuses primarily 87 | on prokaryotes, see https://www.ncbi.nlm.nih.gov/COG/ 88 | 89 | *Pfam* - PSSMs from Pfam-A seed alignment database, see 90 | http://xfam.org/ 91 | 92 | *Smart* - PSSMs from SMART domain alignment database, see 93 | http://smart.embl-heidelberg.de/ 94 | 95 | *Tigr* - PSSMs from TIGRFAM database of protein families, see 96 | ftp://ftp.jcvi.org/data/TIGRFAMs/ 97 | 98 | *Prk* - PSSms from automatically aligned stable clusters in the 99 | Protein Clusters database, see 100 | https://www.ncbi.nlm.nih.gov/proteinclusters?cmd=search&db=proteinclusters 101 | 102 | The exact list of domain databases offered will depend on how your 103 | local Galaxy has been configured. 104 | 105 | ----- 106 | 107 | @OUTPUT_FORMAT@ 108 | 109 | ------- 110 | 111 | @CLI_OPTIONS@ 112 | 113 | 114 | 115 | 116 | -------------------------------------------------------------------------------- /tools/ncbi_blast_plus/ncbi_rpstblastn_wrapper.xml: -------------------------------------------------------------------------------- 1 | 2 | Search protein domain database (PSSMs) with translated nucleotide query sequence(s) 3 | 4 | rpstblastn 5 | ncbi_macros.xml 6 | 7 | 8 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | @SEARCH_TIME_WARNING@ 69 | 70 | **What it does** 71 | 72 | Search a *protein domain database* using a *nucleotide query*, 73 | using the NCBI BLAST+ rpstblastn command line tool. 74 | 75 | The protein domain databases use position-specific scoring matrices 76 | (PSSMs) and are available for a number of domain collections including: 77 | 78 | *CDD* - NCBI curarated meta-collection of domains, see 79 | https://www.ncbi.nlm.nih.gov/Structure/cdd/cdd_help.shtml#NCBI_curated_domains 80 | 81 | *Kog* - PSSMs from automatically aligned sequences and sequence 82 | fragments classified in the KOGs resource, the eukaryotic 83 | counterpart to COGs, see https://www.ncbi.nlm.nih.gov/COG/ 84 | 85 | *Cog* - PSSMs from automatically aligned sequences and sequence 86 | fragments classified in the COGs resource, which focuses primarily 87 | on prokaryotes, see https://www.ncbi.nlm.nih.gov/COG/ 88 | 89 | *Pfam* - PSSMs from Pfam-A seed alignment database, see 90 | http://xfam.org/ 91 | 92 | *Smart* - PSSMs from SMART domain alignment database, see 93 | http://smart.embl-heidelberg.de/ 94 | 95 | *Tigr* - PSSMs from TIGRFAM database of protein families, see 96 | ftp://ftp.jcvi.org/data/TIGRFAMs/ 97 | 98 | *Prk* - PSSms from automatically aligned stable clusters in the 99 | Protein Clusters database, see 100 | https://www.ncbi.nlm.nih.gov/proteinclusters?cmd=search&db=proteinclusters 101 | 102 | The exact list of domain databases offered will depend on how your 103 | local Galaxy has been configured. 104 | 105 | ----- 106 | 107 | @OUTPUT_FORMAT@ 108 | 109 | ------- 110 | 111 | @CLI_OPTIONS@ 112 | 113 | 114 | 115 | 116 | -------------------------------------------------------------------------------- /tools/ncbi_blast_plus/ncbi_tblastx_wrapper.xml: -------------------------------------------------------------------------------- 1 | 2 | Search translated nucleotide database with translated nucleotide query sequence(s) 3 | 4 | tblastx 5 | ncbi_macros.xml 6 | 7 | 8 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | @SEARCH_TIME_WARNING@ 81 | 82 | **What it does** 83 | 84 | Search a *translated nucleotide database* using a *translated nucleotide query*, 85 | using the NCBI BLAST+ tblastx command line tool. 86 | 87 | @FASTA_WARNING@ 88 | 89 | ----- 90 | 91 | @OUTPUT_FORMAT@ 92 | 93 | ------- 94 | 95 | @CLI_OPTIONS@ 96 | 97 | 98 | 99 | 100 | -------------------------------------------------------------------------------- /tools/ncbi_blast_plus/test-data: -------------------------------------------------------------------------------- 1 | ../../test-data -------------------------------------------------------------------------------- /tools/ncbi_blast_plus/tool-data: -------------------------------------------------------------------------------- 1 | ../../tool-data -------------------------------------------------------------------------------- /tools/reciprocal_best_hits/README.rst: -------------------------------------------------------------------------------- 1 | Galaxy tool to find Reciprocal Best Hits (RBH) from BLAST etc 2 | ============================================================= 3 | 4 | This tool is copyright 2011-2015 by Peter Cock, The James Hutton Institute 5 | (formerly SCRI, Scottish Crop Research Institute), UK. All rights reserved. 6 | See the licence text below. 7 | 8 | This tool is a short Python script to parse a pair of BLAST tabular files 9 | (or similar), and extract the reciprocal best hits. 10 | 11 | This was an experiment. I was also considering supporting BLAST XML as input, 12 | which could require extensions to Galaxy ideally so that the current column 13 | selection parameters can be conditional on tabular input. This would make 14 | it possible to integrate BLAST filtering into this tool - although that 15 | might be better done as a separate tool instead. 16 | 17 | This tool has been superceded by an integrated BLAST RBH tool taking two 18 | FASTA files as input instead, see: 19 | 20 | * https://toolshed.g2.bx.psu.edu/view/peterjc/blast_rbh 21 | * https://testtoolshed.g2.bx.psu.edu/view/peterjc/blast_rbh 22 | * https://github.com/peterjc/galaxy_blast/tree/master/tools/blast_rbh 23 | 24 | 25 | Licence (MIT) 26 | ============= 27 | 28 | Permission is hereby granted, free of charge, to any person obtaining a copy 29 | of this software and associated documentation files (the "Software"), to deal 30 | in the Software without restriction, including without limitation the rights 31 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 32 | copies of the Software, and to permit persons to whom the Software is 33 | furnished to do so, subject to the following conditions: 34 | 35 | The above copyright notice and this permission notice shall be included in 36 | all copies or substantial portions of the Software. 37 | 38 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 39 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 40 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 41 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 42 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 43 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 44 | THE SOFTWARE. 45 | -------------------------------------------------------------------------------- /tools/reciprocal_best_hits/reciprocal_best_hits.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """Reciprocal Best Hit (RBH) using BLAST style tabular input. 3 | 4 | Takes seven command line options, 5 | 1. Tabular filename of A against B 6 | 2. Tabular filename of B against A 7 | 3. Query ID column number (assumed to be same for both input files), e.g. c1 8 | 4. Match ID column number (assumed to be same for both input files), e.g. c2 9 | 5. Score column number (assumed to be same for both input files), e.g. c12 10 | 6. Want higest or lowest score? (use string high or low) 11 | 7. Output filename 12 | 13 | """ 14 | from __future__ import print_function 15 | 16 | import sys 17 | 18 | if "--version" in sys.argv[1:]: 19 | print("RBH v0.0.4") 20 | sys.exit(0) 21 | 22 | # Parse Command Line 23 | try: 24 | a_vs_b, b_vs_a, c_query, c_match, c_score, sort_order, out_file = sys.argv[1:] 25 | except ValueError: 26 | sys.exit("Expect 7 arguments: two input files, column settings, output file") 27 | 28 | 29 | want_highest = want_lowest = False 30 | if sort_order == "high": 31 | want_highest = True 32 | elif sort_order == "low": 33 | want_lowest = True 34 | else: 35 | sys.exit("Sort order argument should be high or low") 36 | 37 | if out_file in [a_vs_b, b_vs_a]: 38 | sys.exit("Output file would overwrite an input file") 39 | 40 | if "None" in [c_query, c_match, c_score]: 41 | sys.exit("Three distinct column numbers must be chosen") 42 | 43 | 44 | def get_col_index(col_str): 45 | """Return integer index from a column name string.""" 46 | if col_str[0] == "c": 47 | col_str = col_str[1:] 48 | return int(col_str) - 1 49 | 50 | 51 | c_query = get_col_index(c_query) 52 | c_match = get_col_index(c_match) 53 | c_score = get_col_index(c_score) 54 | if len(set([c_query, c_match, c_score])) < 3: 55 | sys.exit("Need three different column numbers!") 56 | 57 | best_a_vs_b = dict() 58 | for line in open(a_vs_b): 59 | if line.startswith("#"): 60 | continue 61 | parts = line.rstrip("\n").split("\t") 62 | a = parts[c_query] 63 | b = parts[c_match] 64 | score = float(parts[c_score]) 65 | if ( 66 | (a not in best_a_vs_b) 67 | or (want_highest and score > best_a_vs_b[a][1]) 68 | or (want_lowest and score < best_a_vs_b[a][1]) 69 | ): 70 | best_a_vs_b[a] = (b, score, parts[c_score]) 71 | b_short_list = set(b for (b, score, score_str) in best_a_vs_b.values()) 72 | 73 | best_b_vs_a = dict() 74 | for line in open(b_vs_a): 75 | if line.startswith("#"): 76 | continue 77 | parts = line.rstrip("\n").split("\t") 78 | b = parts[c_query] 79 | a = parts[c_match] 80 | if a not in best_a_vs_b: 81 | continue 82 | # sys.exit("The A-vs-B file does not have A-ID %r found in B-vs-A file" % a) 83 | if b not in b_short_list: 84 | continue 85 | score = float(parts[c_score]) 86 | if ( 87 | (b not in best_b_vs_a) 88 | or (want_highest and score > best_b_vs_a[b][1]) 89 | or (want_lowest and score < best_b_vs_a[b][1]) 90 | ): 91 | best_b_vs_a[b] = (a, score, parts[c_score]) 92 | # TODO - Preserve order from A vs B? 93 | a_short_list = sorted(set(a for (a, score, score_str) in best_b_vs_a.values())) 94 | 95 | count = 0 96 | outfile = open(out_file, "w") 97 | outfile.write("#A_id\tB_id\tA_vs_B\tB_vs_A\n") 98 | for a in a_short_list: 99 | b = best_a_vs_b[a][0] 100 | if b in best_b_vs_a and a == best_b_vs_a[b][0]: 101 | outfile.write("%s\t%s\t%s\t%s\n" % (a, b, best_a_vs_b[a][2], best_b_vs_a[b][2])) 102 | count += 1 103 | outfile.close() 104 | print("Done, %i RBH found" % count) 105 | -------------------------------------------------------------------------------- /tools/reciprocal_best_hits/test-data: -------------------------------------------------------------------------------- 1 | ../../test-data -------------------------------------------------------------------------------- /workflows/README.rst: -------------------------------------------------------------------------------- 1 | Galaxy Workflows 2 | ================ 3 | 4 | Each sub-folder represents a different entry on the Galaxy Tool Shed, 5 | for example ``blast_top_hit_species`` contains a workflow which runs 6 | BLASTX and counts the species of each top hit. 7 | 8 | For general information, see the `main README file <../README.rst>`_. 9 | -------------------------------------------------------------------------------- /workflows/blast_top_hit_species/.shed.yml: -------------------------------------------------------------------------------- 1 | name: blast_top_hit_species 2 | owner: peterjc 3 | homepage_url: https://github.com/peterjc/galaxy_blast/tree/master/workflows/blast_top_hit_species 4 | remote_repository_url: https://github.com/peterjc/galaxy_blast/tree/master/workflows/blast_top_hit_species 5 | description: Workflow to count species of top nr BLASTX hits of a transcriptome 6 | long_description: | 7 | This is a non-trivial example workflow using the NCBI BLAST+ wrappers, intended only 8 | for crude crude contamination assessment of a transcriptome assembly. 9 | 10 | This would ideally include a visualisation of the finally tally table as a Pie Chart, 11 | currently not possible with the Galaxy Visualization Framework. 12 | categories: 13 | - Sequence Analysis 14 | type: unrestricted 15 | -------------------------------------------------------------------------------- /workflows/blast_top_hit_species/N_abberans_piechart_mouseover.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/peterjc/galaxy_blast/75b5762aa9699a0b3f97a67158111599a2cb6f59/workflows/blast_top_hit_species/N_abberans_piechart_mouseover.png -------------------------------------------------------------------------------- /workflows/blast_top_hit_species/blast_top_hit_species.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/peterjc/galaxy_blast/75b5762aa9699a0b3f97a67158111599a2cb6f59/workflows/blast_top_hit_species/blast_top_hit_species.png -------------------------------------------------------------------------------- /workflows/blast_top_hit_species/repository_dependencies.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 9 | 10 | --------------------------------------------------------------------------------