├── .gitignore ├── Dockerfile ├── LICENSE ├── README.md ├── data ├── Blast_db │ ├── isfinder_is.fasta │ ├── isfinder_is.nhr │ ├── isfinder_is.nin │ ├── isfinder_is.nog │ ├── isfinder_is.nsd │ ├── isfinder_is.nsi │ ├── isfinder_is.nsq │ ├── isfinder_orf.fasta │ ├── isfinder_orf.phr │ ├── isfinder_orf.pin │ ├── isfinder_orf.pog │ ├── isfinder_orf.psd │ ├── isfinder_orf.psi │ ├── isfinder_orf.psq │ ├── isfinder_orf_all.phr │ ├── isfinder_orf_all.pin │ ├── isfinder_orf_all.pog │ ├── isfinder_orf_all.psd │ ├── isfinder_orf_all.psi │ └── isfinder_orf_all.psq ├── models │ ├── fasta │ │ └── outliers.fasta │ ├── hmm │ │ ├── hmm_all_fams.hmm │ │ ├── hmm_all_subfams.hmm │ │ └── thresholds.txt │ └── msa │ │ ├── IS1 │ │ ├── IS1.sto │ │ └── IS1_cut.sto │ │ ├── IS110 │ │ ├── IS110.sto │ │ └── IS110_cut.sto │ │ ├── IS1182 │ │ ├── IS1182.sto │ │ └── IS1182_cut.sto │ │ ├── IS1380 │ │ ├── IS1380.sto │ │ └── IS1380_cut.sto │ │ ├── IS1595 │ │ ├── IS1595.sto │ │ └── IS1595_cut.sto │ │ ├── IS1634 │ │ ├── IS1634.sto │ │ └── IS1634_cut.sto │ │ ├── IS200_IS605 │ │ ├── IS200_IS605.sto │ │ └── IS200_IS605_cut.sto │ │ ├── IS21 │ │ ├── IS21.sto │ │ └── IS21_cut.sto │ │ ├── IS256 │ │ ├── IS256.sto │ │ └── IS256_cut.sto │ │ ├── IS3 │ │ ├── IS3.sto │ │ └── IS3_cut.sto │ │ ├── IS30 │ │ ├── IS30.sto │ │ └── IS30_cut.sto │ │ ├── IS3_IS150 │ │ ├── IS3_IS150.sto │ │ └── IS3_IS150_cut.sto │ │ ├── IS3_IS2 │ │ ├── IS3_IS2.sto │ │ └── IS3_IS2_cut.sto │ │ ├── IS3_IS3 │ │ ├── IS3_IS3.sto │ │ └── IS3_IS3_cut.sto │ │ ├── IS3_IS407 │ │ ├── IS3_IS407.sto │ │ └── IS3_IS407_cut.sto │ │ ├── IS3_IS51 │ │ ├── IS3_IS51.sto │ │ └── IS3_IS51_cut.sto │ │ ├── IS4 │ │ ├── IS4.sto │ │ └── IS4_cut.sto │ │ ├── IS481 │ │ ├── IS481.sto │ │ └── IS481_cut.sto │ │ ├── IS4_IS10 │ │ ├── IS4_IS10.sto │ │ └── IS4_IS10_cut.sto │ │ ├── IS4_IS231 │ │ ├── IS4_IS231.sto │ │ └── IS4_IS231_cut.sto │ │ ├── IS4_IS4 │ │ ├── IS4_IS4.sto │ │ └── IS4_IS4_cut.sto │ │ ├── IS4_IS4Sa │ │ ├── IS4_IS4Sa.sto │ │ └── IS4_IS4Sa_cut.sto │ │ ├── IS4_IS50 │ │ ├── IS4_IS50.sto │ │ └── IS4_IS50_cut.sto │ │ ├── IS4_ISH8 │ │ ├── IS4_ISH8.sto │ │ └── IS4_ISH8_cut.sto │ │ ├── IS4_ISPepr1 │ │ ├── IS4_ISPepr1.sto │ │ └── IS4_ISPepr1_cut.sto │ │ ├── IS5_IS1031 │ │ ├── IS5_IS1031.sto │ │ └── IS5_IS1031_cut.sto │ │ ├── IS5_IS427 │ │ ├── IS5_IS427.sto │ │ └── IS5_IS427_cut.sto │ │ ├── IS5_IS5 │ │ ├── IS5_IS5.sto │ │ ├── IS5_IS5_cut.sto │ │ ├── IS5_IS5_delete.sto │ │ ├── IS5_IS5_delete_cut.sto │ │ ├── IS5_IS5_full.sto │ │ ├── IS5_IS5_full_cut.sto │ │ ├── IS5_IS5_mode3.sto │ │ ├── IS5_IS5_model1.sto │ │ ├── IS5_IS5_model1_cut.sto │ │ ├── IS5_IS5_model2.sto │ │ ├── IS5_IS5_model2_cut.sto │ │ └── IS5_IS5_model3_cut.sto │ │ ├── IS5_IS903 │ │ ├── IS5_IS903.sto │ │ └── IS5_IS903_cut.sto │ │ ├── IS5_ISH1 │ │ ├── IS5_ISH1.sto │ │ └── IS5_ISH1_cut.sto │ │ ├── IS5_ISL2 │ │ ├── IS5_ISL2.sto │ │ └── IS5_ISL2_cut.sto │ │ ├── IS5_None │ │ ├── IS5_None.sto │ │ ├── IS5_None_m1_cut.sto │ │ ├── IS5_None_m2_cut.sto │ │ ├── IS5_None_m3_cut.sto │ │ ├── IS5_None_model1.sto │ │ ├── IS5_None_model1_cut.sto │ │ ├── IS5_None_model1_final.sto │ │ ├── IS5_None_model2.sto │ │ ├── IS5_None_model2_cut.sto │ │ ├── IS5_None_model2_final.sto │ │ ├── IS5_None_model3.sto │ │ ├── IS5_None_model3_cut.sto │ │ ├── IS5_None_zvysok.sto │ │ └── IS5_None_zvysok_zo_zvysku.sto │ │ ├── IS6 │ │ ├── IS6.sto │ │ └── IS6_cut.sto │ │ ├── IS607 │ │ ├── IS607.sto │ │ └── IS607_cut.sto │ │ ├── IS630 │ │ ├── IS630.sto │ │ └── IS630_cut.sto │ │ ├── IS66 │ │ ├── IS66.sto │ │ └── IS66_cut.sto │ │ ├── IS701 │ │ ├── IS701.sto │ │ └── IS701_cut.sto │ │ ├── IS91 │ │ ├── IS91.sto │ │ └── IS91_cut.sto │ │ ├── IS982 │ │ ├── IS982.sto │ │ └── IS982_cut.sto │ │ ├── ISAs1 │ │ ├── ISAs1.sto │ │ └── ISAs1_cut.sto │ │ ├── ISAzo13 │ │ ├── ISAzo13.sto │ │ └── ISAzo13_cut.sto │ │ ├── ISH3 │ │ ├── ISH3.sto │ │ └── ISH3_cut.sto │ │ ├── ISH6 │ │ ├── ISH6.sto │ │ └── ISH6_cut.sto │ │ ├── ISKra4 │ │ ├── ISKra4.sto │ │ └── ISKra4_cut.sto │ │ ├── ISL3 │ │ ├── ISL3.sto │ │ └── ISL3_cut.sto │ │ ├── ISLre2 │ │ ├── ISLre2.sto │ │ └── ISLre2_cut.sto │ │ ├── ISNCY_IS1202 │ │ ├── ISNCY_IS1202.sto │ │ └── ISNCY_IS1202_cut.sto │ │ ├── ISNCY_IS892 │ │ └── ISNCY_IS892.sto │ │ ├── ISNCY_ISA1214 │ │ ├── ISNCY_ISA1214.sto │ │ └── ISNCY_ISA1214_cut.sto │ │ ├── ISNCY_ISC1217 │ │ ├── ISNCY_ISC1217.sto │ │ └── ISNCY_ISC1217_cut.sto │ │ ├── ISNCY_ISDol1 │ │ ├── ISNCY_ISDol1.sto │ │ └── ISNCY_ISDol1_cut.sto │ │ ├── ISNCY_ISLbi1 │ │ ├── ISNCY_ISLbi1.sto │ │ └── ISNCY_ISLbi1_cut.sto │ │ ├── ISNCY_ISM1 │ │ ├── ISNCY_ISM1.sto │ │ └── ISNCY_ISM1_cut.sto │ │ ├── ISNCY_ISMae2 │ │ ├── ISNCY_ISMae2.sto │ │ └── ISNCY_ISMae2_cut.sto │ │ ├── ISNCY_ISPlu15 │ │ └── ISNCY_ISPlu15.sto │ │ ├── ISNCY_None │ │ └── ISNCY_None.sto │ │ └── Tn3 │ │ ├── Tn3.sto │ │ └── Tn3_cut.sto └── test_data │ ├── NC_002608.fasta │ └── NC_002608.gb ├── definitions.py ├── digIS_search.py ├── digis_docker_wrapper.sh ├── evaluation ├── archaea_complete_genomic_files.txt └── bacteria_complete_genomic_files.txt ├── requirements.txt └── src ├── blast ├── Blast.py ├── BlastDB.py ├── BlastHit.py ├── BlastHsp.py ├── BlastHspFlat.py ├── BlastN.py ├── BlastP.py ├── BlastQuery.py └── BlastX.py ├── common ├── Classifier.py ├── RangesHits.py ├── classification.py ├── csv_utils.py ├── genbank.py ├── genome.py ├── gff_utils.py ├── grange.py ├── misc.py ├── ranges.py └── sequence.py ├── genbank └── RecordGenbank.py ├── hmmer ├── Hmmer.py ├── HmmerHit.py ├── HmmerHsp.py └── HmmerHspFlat.py └── search_tool ├── RecordDigIS.py ├── RecordDigISAttrib.py ├── digIS.py ├── digISConfiguration.py └── digISMultifasta.py /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | 3 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:20.04 2 | 3 | RUN apt-get update && \ 4 | apt-get install -y hmmer \ 5 | ncbi-blast+ \ 6 | python3.7 \ 7 | python3-pip 8 | 9 | RUN pip3 install biopython==1.73 10 | 11 | ENV INSTALL_PATH /digis_tool 12 | RUN mkdir -p ${INSTALL_PATH} 13 | 14 | WORKDIR ${INSTALL_PATH} 15 | 16 | COPY . ${INSTALL_PATH} 17 | 18 | # Run digIS_search.py when the container launches 19 | CMD ["python3", "digIS_search.py"] 20 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Janka Puterova & Tomas Martinek 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /data/Blast_db/isfinder_is.nhr: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/janka2012/digIS/0386c36e49880af25390d208a37129318516e502/data/Blast_db/isfinder_is.nhr -------------------------------------------------------------------------------- /data/Blast_db/isfinder_is.nin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/janka2012/digIS/0386c36e49880af25390d208a37129318516e502/data/Blast_db/isfinder_is.nin -------------------------------------------------------------------------------- /data/Blast_db/isfinder_is.nog: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/janka2012/digIS/0386c36e49880af25390d208a37129318516e502/data/Blast_db/isfinder_is.nog -------------------------------------------------------------------------------- /data/Blast_db/isfinder_is.nsi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/janka2012/digIS/0386c36e49880af25390d208a37129318516e502/data/Blast_db/isfinder_is.nsi -------------------------------------------------------------------------------- /data/Blast_db/isfinder_is.nsq: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/janka2012/digIS/0386c36e49880af25390d208a37129318516e502/data/Blast_db/isfinder_is.nsq -------------------------------------------------------------------------------- /data/Blast_db/isfinder_orf.phr: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/janka2012/digIS/0386c36e49880af25390d208a37129318516e502/data/Blast_db/isfinder_orf.phr -------------------------------------------------------------------------------- /data/Blast_db/isfinder_orf.pin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/janka2012/digIS/0386c36e49880af25390d208a37129318516e502/data/Blast_db/isfinder_orf.pin -------------------------------------------------------------------------------- /data/Blast_db/isfinder_orf.pog: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/janka2012/digIS/0386c36e49880af25390d208a37129318516e502/data/Blast_db/isfinder_orf.pog -------------------------------------------------------------------------------- /data/Blast_db/isfinder_orf.psi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/janka2012/digIS/0386c36e49880af25390d208a37129318516e502/data/Blast_db/isfinder_orf.psi -------------------------------------------------------------------------------- /data/Blast_db/isfinder_orf_all.phr: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/janka2012/digIS/0386c36e49880af25390d208a37129318516e502/data/Blast_db/isfinder_orf_all.phr -------------------------------------------------------------------------------- /data/Blast_db/isfinder_orf_all.pin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/janka2012/digIS/0386c36e49880af25390d208a37129318516e502/data/Blast_db/isfinder_orf_all.pin -------------------------------------------------------------------------------- /data/Blast_db/isfinder_orf_all.pog: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/janka2012/digIS/0386c36e49880af25390d208a37129318516e502/data/Blast_db/isfinder_orf_all.pog -------------------------------------------------------------------------------- /data/Blast_db/isfinder_orf_all.psi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/janka2012/digIS/0386c36e49880af25390d208a37129318516e502/data/Blast_db/isfinder_orf_all.psi -------------------------------------------------------------------------------- /data/models/hmm/thresholds.txt: -------------------------------------------------------------------------------- 1 | NAME NC_seq NC_dom 2 | IS1 14.70 13.80 3 | IS110 17.60 16.30 4 | IS1182 17.70 17.70 5 | IS1380 15.20 14.50 6 | IS1595 18.60 18.60 7 | IS1634 15.10 13.40 8 | IS200_IS605 15.10 14.10 9 | IS21 14.80 12.00 10 | IS256 14.50 5.70 11 | IS3_IS150 15.10 12.60 12 | IS3_IS2 14.90 14.60 13 | IS3_IS3 14.00 13.50 14 | IS3_IS407 19.30 12.20 15 | IS3_IS51 14.40 14.00 16 | IS30 15.70 14.30 17 | IS4_IS10 14.90 14.60 18 | IS4_IS231 13.30 13.10 19 | IS4_IS4 17.20 15.30 20 | IS4_IS4Sa 19.60 15.10 21 | IS4_IS50 13.30 13.00 22 | IS4_ISH8 13.70 13.60 23 | IS4_ISPepr1 11.90 11.10 24 | IS481 17.40 15.20 25 | IS5_IS1031 16.10 15.30 26 | IS5_IS427 16.60 15.80 27 | IS5_IS5_model1 13.40 13.20 28 | IS5_IS5_model2 20.00 16.40 29 | IS5_IS5_model3 18.10 17.10 30 | IS5_IS903 14.70 14.00 31 | IS5_ISH1 15.30 14.40 32 | IS5_ISL2 14.00 13.40 33 | IS5_None_model1 15.90 15.70 34 | IS5_None_model2 12.50 11.30 35 | IS6 12.00 9.20 36 | IS66 13.90 13.50 37 | IS607 15.90 13.80 38 | IS630 14.20 13.10 39 | IS701 14.20 13.60 40 | IS91 16.70 15.60 41 | IS982 15.20 14.10 42 | ISAs1 13.40 11.90 43 | ISAzo13 12.80 11.20 44 | ISH3 17.90 17.10 45 | ISH6 13.70 13.30 46 | ISKra4 14.10 13.20 47 | ISL3 29.70 29.70 48 | ISLre2 15.80 14.60 49 | Tn3 12.80 12.10 50 | ISNCY_IS1202 13.10 11.80 51 | ISNCY_ISDol1 11.50 13.60 52 | -------------------------------------------------------------------------------- /data/models/msa/IS3_IS2/IS3_IS2_cut.sto: -------------------------------------------------------------------------------- 1 | # STOCKHOLM 1.0 2 | IS1087.1|IS3|IS2|Transposase|Ralstonia/248-360 WCSDGFEFRCDDGSPLRVTFALDCHDREAISWAATTG-GHSGDIVRDVMLAAVEQRFGAV-Q--TEQTIEWLSDNGSAYIDHRTRTFARELGLEPLTTPVRSPQSNGMAESFVKTMK 3 | IS1087B.3|IS3|IS2|Transposase|Ralstonia/248-360 WCSDGFEFRCDDGSPLRVTFALDCHDREAISWAATTG-GHSGDIVRDVMLAAVEQRFGAV-Q--TEQTIEWLSDNGSAYIDHRTRSFARELGLEPLTTPVRSPQSNGMAESFVKTMK 4 | IS1312.3|IS3|IS2|Transposase|Agrobacterium/243-356 WCSDGFEIGCDNKEKVRVAFALDCCDREAIAHVATTE-GIKSEDVQDLVITAVENRFGRINI--LPKPIEWLTDNGSCFIASDTKSLLLDIGMEPRTTPVRSPQSNGMAEAFVKTFK 5 | IS1417.3|IS3|IS2|Transposase|Burkholderia/238-351 WCSDGFEFRCDNGEPLRVTFALDCCDREAMSWAATTA-GHSGDIVRDVMLAAVESRFGDVLH--TESEIEWLSDNGSGYTAEETRQFAALLGLKPLTTPVCSPQSNGMAESFVKTMK 6 | IS2.3|IS3|IS2|Transposase|Escherichia/237-349 WCSDGFEFCCDNGERLRVTFALDCCDREALHWAVTTG-GFNSETVQDVMLGAVERRFGNDL---PSSPVEWLTDNGSCYRANETRQFARMLGLEPKNTAVRSPESNGIAESFVKTIK 7 | IS426.3|IS3|IS2|Transposase|Agrobacterium/242-355 WCSDGLEFACWNGEVIRLAFIIDAFDREIIAWTAVANAGISGSDVRDMMLEAVEKRFHATR---APHAIEHLSDNGSAYTARDTRLFAQALNLTPCFTPVASPQSNGMSEAFVKTLK 8 | ISAs17.3|IS3|IS2|Transposase|Aeromonas/238-350 WCSDGFEFRCDNGEKLRVTFAMDCCDREALDWAASTG-GYDSDTVQHVMLRSVERRFGDSL---PASPVEWLTDNGSAYRAHETRAFAREIGLEPRTTAVRSPQSNGIAESFVKTMK 9 | ISAve3.3|IS3|IS2|Transposase|Aeromonas/238-350 WCSDGFEFRCDNGEKLRVTFAMDCSDRETLDWAASTG-SYDRATVQDVMLRSVERRFGDAL---PASPVEWLTDNGSAYRAHETRAFARELWLEPRTTAVRSPQSNGIAESFVKTMK 10 | ISBcen6.3|IS3|IS2|Transposase|Burkholderia/237-350 WCSDGFEFRCDNGEPLRVTFALDCCDREAMSWAATTA-GHSGDIVRDVMLAAVENRFGIELH--TPSEIEWLSDNGSGYTADDTRRFAVAIGLKPLTTPVCSPQSNGMAESFVKTMK 11 | ISBugl1.2|IS3|IS2|Transposase|Burkholderia/248-361 WCSDGFEFRCDNGEPLRVTFALDCCDREAMSWAATTA-GHSGDIVRDVMLAAVESRFGDVLH--TESEIEWLSDNGSGYTAEETRQFAALLGLKPLTTPVCSPQSNGMAESFVKTMK 12 | ISButh1.3|IS3|IS2|Transposase|Burkholderia/248-360 WCSDGFEFRCDDGTPLRVTFALDCCDREAMSWAATTG-GHSGDVVRDVMLAAVEQRFGTT-Q--AAQPIEWLTDNGSAYIDHHTRSFARELGLEPLTTPVRSPQSNGMAESFVKTMK 13 | ISButh2.2|IS3|IS2|Transposase|Burkholderia/242-355 WCSDGFEFRCDNGEPLRVTFALDCCDREAMSWAATTA-GHSGDIVRDVMLAAVENRFGNALH--TPSEIEWLSDNGSGYTADDTRRFAMDIGLKPLTTPVCSPQSNGMAESFVKTMK 14 | ISCpr2.3|IS3|IS2|Transposase|Candidatus/238-350 WCSDSFSIQCLNGDRVHVAFSLDTCDREVMRYIAS-TIGIDGQMIRDLMLETIEYRFEQPK---ARVPLEWLSDNGSCYTAKETVNFGRMLGLTIRTTPPYSPESNGMAEAFVKTFK 15 | ISDvu2.3|IS3|IS2|Transposase|Desulfovibrio/246-358 WCSDGFEIPCDNREVVRVAFVLDSCDREVISYVAT-TRGISGSMVRDLMLESVERRFGNAH---TSHTVEWLSDNGSCYTAKETVEFASWLGLRSCFTPVRSPESNGMAEAFVKTFK 16 | ISEae2.3|IS3|IS2|Transposase|Enterobacter/239-351 WCSDGFEFGCDDGEKLRVTFALDCCDREAIDCAASTG-GYDSMTVQDVMLGAVEKRFGAML---PDKPVQWLTDNGSAYTAHDTRKFAADLNLEVCTTAVSSPQSNGMAERFVKTMK 17 | ISEc27.3|IS3|IS2|Transposase|Escherichia/252-364 WCSDGFEFSCDNGEKLRVTFALDCCDREALYWAASNG-GYDSETVQDVMLGAVERRFGNSL---PTSPVEWLTDNGSAYRSYQTRQFARMVGLEPKHTAVRSPESNGMAESFVKTMK 18 | ISEc36.3|IS3|IS2|Transposase|Escherichia/252-364 WCSDGFEFSCDNGEKLRVTFALDCCDREALHWAASTG-GYDSETVQDVMLGAVERRFGNSL---PTSPVEWLTDNGSAYRSYQTRQFARMVGLEPKHTAVRSPESNGMAESFVKTMK 19 | ISEc48.3|IS3|IS2|Transposase|Escherichia/239-351 WCSDGFEFGCDDGEKLRVTFVLDCCDREAIDWAASTG-GYDKATVQDVMLGAIEKRFGDKV---PEQSIQWLTDNGSAYRAHETRQFARELNLKPCTTAISSPQSNGMAERFVKTMK 20 | ISEcl1.3|IS3|IS2|Transposase|Enterobacter/239-351 WCSDGFEFGCDNGEKLRVTFALDCCDREAIDWAASTG-GYDSSTVQDVMLRSVEKRFGDRL---PDTAVQWLTDNGSAYTAYETRRFARELNLEPCTTAVSSPQSNGMAERFVKTMK 21 | ISGbe2.3|IS3|IS2|Transposase|Granulibacter/247-360 WCSDGFEFTCWNGEVVRGAFIIDAHDREIIAWRAVANAGISGSDVRDIMLEAVETRFGGMR---APVPVEMLSDNGSAYTARETRTFARQLGLKPCFTPVRSPQSNGISEAFVHTLK 22 | ISMaq2.3|IS3|IS2|Transposase|Marinobacter/238-351 WCSDGFEIRCWNKEVVRVAFSLDCCDRELMRYVAT-TGGITGEMVQDLLLESLEYRFGQSER--VPHPLEWLTDNGSCYIAKETRAFASSLGFVVCTTPVRSPQSNGMAEAFVKTFK 23 | ISMtsp18.3|IS3|IS2|Transposase|Methylobacterium/252-364 WCSDHLDLRCRDGAVVRVLFAIDACDREIMAWSAT-TAGVSGEMVSNLMIACVEHRFGATK---APHPIEWLSDNGSAYIARQTSETATALGLRLLFTPVRSPQSNGIAEAFVKTLK 24 | ISMtsp5.3|IS3|IS2|Transposase|Methylobacterium/247-359 WGTDLTTTWTGEG-QVAVFVAVDHCSAECVGVHAARR--ATRFEALEPIRQGVRQRFGSFAA-KSASGLSVRHDHGSQYMSDAFQAELAFLGIASSPAFVRAPEGNGCAERFIRTLK 25 | ISNmu3.3|IS3|IS2|Transposase|Nitrosospira/249-362 WCSDGFEIPCWNGQVVRVAFALDCCDREVISHVAT-TGGITGEMVRDLMTESVERRFGTVDL--LPHRVEWLSDNGSCYTASETTAFAKDMGFISCFTPVRSPESNGMAEAFVKTFK 26 | ISOgr2.3|IS3|IS2|Transposase|Oceanicola/244-357 WCSDGFEFTCWNGDVIRGAFIIDAHDREIISWRAVVNAGISGSDIRDMMLEAVERRFGDYR---TPHPVEMLSDNGSCYVARETRIFARQLGLRSCYTPVKSPESNGIAEAFVKTLK 27 | ISPa56.3|IS3|IS2|Transposase|Pseudomonas/243-359 WCSDGLEIKCDSGQTVTATFAKDCCDREVMAWRAWEGKGLPGEPVREMLIEAVERRFGSVEAVPQGQELEFLSDNGGAYIAAETRALARALGLKPINTPVCSPQSNGMAESFVNTFK 28 | ISRhsp2.3|IS3|IS2|Transposase|Rhodobacter/244-357 WCSDGFEFTCWNGDIVRGAFIIDAHDREIIAWRAVVNAGISGSDIRDIMLEAVERRFGAYR---APSVIEMLSDNGPPYIAKDTQIFARQLGLKPCFTPVQSPQSNGISEAFVKTLK 29 | ISRm1.3|IS3|IS2|Transposase|Rhizobium/243-356 WCSDGFEIGCDNKEKVRVAFALDCCDREAIAHVATTE-GIKSQDVQDLVITAVENRFGRINM--LSEPIEWLTDNGSCFIAKDTASLLRDIGMEPCTTPVRSPQSNGMAEAFVKTFK 30 | ISRso10.3|IS3|IS2|Transposase|Ralstonia/238-351 WCSDGFEFRCDNGEPLRVTFALDCCDREAMSWVATTG-GYSGDVVRDVMLAAVEQRFGNVPK--APAEIEWLTDNGSGYIAGKTREFATDIGLKPLTTPVCSPQSNGMAESFVKTMK 31 | // 32 | -------------------------------------------------------------------------------- /data/models/msa/IS4_ISH8/IS4_ISH8_cut.sto: -------------------------------------------------------------------------------- 1 | # STOCKHOLM 1.0 2 | ISAeme3.1|IS4|ISH8|Transposase|Aeromonas/116-314 FKQVLLHDGTSFAVHRRLAADFPGRFKTISPAAIECHMTMSLSEQSPLCMSVS-----ADTAGERQFLPEAHKLANCLLLADAGYIDRAWFEQVNDAGGFYLVRGTQSLNPKIIQAWRGDGRE--------VPKLAGLSLKEAGRRRCRAEVLDMVV------------KSGK-VEYRL-IRRWFA----------EEKRFCLWMTNL-PRAAWSAEQVMSLYRCRWQVELLFKEWK 3 | ISAeme4.1|IS4|ISH8|Transposase|Aeromonas/116-314 FKQVLLQDGTSFAVHKSLAEVFPGRFKTISPAAIECHMLMSLLEQSPVCMQVS-----ADTASERQFLPEPERLNNSLLLADAGYIDMAYFAKLNEAGGFYLVRGSKSLNPKIIRAWRGDERE--------VPKLAGLSLKDAGRRHCRAEVLDMVV------------KSGK-FEYRL-IRRWFA----------EEKRFCIWMTNL-PREAWPAERVMSLYRCRWQVELLFKEWK 4 | ISApu1.1|IS4|ISH8|Transposase|Aeromonas/116-314 FKQVLLHDGTSFAVHRRLAADFPGRFKTISPAAIECHMTMSLSEQSPLCMSVS-----ADTAGERQFLPEAHTLTNCLLLADAGYIDRAWFEQVNDAGGFYLVRGTKSLNPKIIQAWRGDGRE--------VPKLAGLSLREAGRRRCRAEVLDMVV------------KSGQ-VEYRL-IRRWFA----------EEKRFCLWMTNL-PRAAWSAEQVMSLYRCRWQVELLFKEWK 5 | ISApu2.1|IS4|ISH8|Transposase|Aeromonas/117-315 FEQVLLQDGSSFALHPQLAEHFPGRFNKHSPAAVECHMTMSLLDQSPISMSIT-----ADTESERKHLPAANSLNNKLLLADAGYISREYFADVTKAKGSYLVRGSQNLNPKVQAAYRQDGRE--------MPKLLGKKLKDIDRRSCRAEVLDLDV------------SSGK-YKYRL-IRRWFA----------EEKRFCIWMTNL-PRERWPAEKVMALYRCRWQIELLFKELK 6 | ISArch11.1|IS4|ISH8|Transposase|uncultured/120-330 FKDLVIQDSTIIRLHESLAKIWPAARTKKIAAGVKVSCIVSAVADSPKSVRIY-----PERTSEAKILRLGPWLRDRILLIDLGYFKYLFFDRIDGYGGYFVSRLKGNANPLIVRVNRKC-RGNS-------VDVVGKKLRDVLPRLKREI-LDVEVEVEFKRRKYKGKQSTVKRRFRM-VCAFNS----------DSGKYHSYLTNI-RVDILSAEEIALLYGARWEIELIFKELK 7 | ISAva1.1|IS4|ISH8|Transposase|Anabaena/127-311 FEKIWIVDCSILEALFQKLDSLKDAPQG--QLAGKIGTVINLVNLLPVEIWFCENPRTADTKFEADILNLV--TPHTLLLLDRGFYHFNFWLQLIAQNVNFITRLKKGAAIHVQQVFTDSFA-------------------------LRDRLVRL----------GSGTKKTTFITLRL-VEIRSD------------KTWHSYLTSVLDPEVLPPYVVADLYRRRWRIEDAFNTVK 8 | ISH26-1.1|IS4|ISH8|Transposase|Halobacterium/105-148 FRDVMIADGTVLRLHEFLSDQFEARHEE--QAGAKSPASQCHRADD----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 9 | ISH26.1|IS4|ISH8|Transposase|Halobacterium/105-148 FRDVMIADGTVLRLHEFLSDQFEARHEE--QAGAKSPASQCHRADD----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 10 | ISH30.1|IS4|ISH8|Transposase|Halobacterium/121-327 FRDVLIVDASIVSLYQDATDVYAATGED--QAGVKLHLTESLSTGLPTRFQTT-----DAKTQERSQLPTGEWVAGALVLLDLGFYDFWLFDRIDQNDGWFVSRVKDDANFEIVEELRTW-RGNS-------IPLEGESLQDVLDDLQRQE-IDARITLSFDRKRGS--GVSTTRTFRL-VGVLNE----------DSGEYHLYVTNL-ARDNYHAPDIAQLYRARWEIELLFKELK 11 | ISH32.1|IS4|ISH8|Transposase|Halobacterium/121-327 FRDVLIADGSIVSLYQDAADVYAATGDD--QAGLKLHLTESLSTGLPTRYRTT-----DAKTQERSQLPTGEWVAGALVLLDLGFYDFWLFDRIDQNNGWFVSRVKDDANFEIVEELRTW-RGNS-------IPLEGESLQDVLDDLQRQE-IDVRITLSFERKRGS--CTSTTRTFRL-VGVWNE----------DTEEYHLYLTNL-SKDDYSAPDIAQLYRARWEIELLFKELK 12 | ISH5.1|IS4|ISH8|Transposase|Halobacterium/120-326 FRDVLIADATIVSLYQDAADVYAATGED--QAELKLHLIESLSTGLPTRFRTT-----DGTTHERSQLPTGEWVADALILLDLGFYDFWLFDRIDQNGGWFVSRVKDNANFEIVEELRTW-RGNS-------IPLEGESLQAVLDDLQRQE-IDVRITLSFERKRGS--GASATRTFRL-VGLRNE----------ETEEYHLYLTNL-GNDDYSAPDIAQLYRARWEVELLFKELK 13 | ISH8.1|IS4|ISH8|Transposase|Halobacterium/106-314 FRDVMIADGTVLRLHEFLSDQFEARHEE--QAGAKLHLLHNATEQTIERLDTA-----NEKTHDSTLFKTGPWLENRLVLFDLAYFKYRRFALIDENDGYFVSRLKQNANPLITRELREW-RGRA-------IPLEGKQLRAVLDDLDRKY-IDVEVEVEFKRGPYNGTRSLDTKRFRV-VGVRDE----------DADDYHLYMTNL-VRKEFFPADLAQIYRCRWEVELLFRELK 14 | ISH8A.1|IS4|ISH8|Transposase|Halobacterium/106-314 FRDVMIADGTVLRLHEFLSDQFEARHEE--QAGAKLHLLHNATEQTIEQLDTA-----NEKTHDSTLFKTGPWLENRLMLFDLAYFKYRRFALIDENDGYFVSRLKQNANPLITGELREW-RGRA-------IPLEGKQLRAVLDDLDRKY-IDVEVEVEFKRGPYNGTQSLDTKRFRV-VGVRDE----------DADDYHLYMTNL-ARKEFFPADLAEIYRCRWEVELLFRELK 15 | ISH8B.1|IS4|ISH8|Transposase|Halobacterium/106-314 FSDVMIADGTVLRLHEFLADQFEARHEE--QAGAKLHLLHNATEQTIERIDVT-----DEKAHDSTLFKTGSWLENRLVLFDLAYFKYRRFARIDENGGSFVSRLKQNANPVITDELREW-RGRA-------IPLEGKQLRDVLDDLDRTY-IDVEVEVEFKRGPYNGTRSLDTKRFRV-VGVRDE----------DADDYHLYVTNL-SREEFFPADLAQIYRCRWEVELLFRELK 16 | ISH8C.1|IS4|ISH8|Transposase|Halobacterium/106-314 FRDVMIADGTVLRLHEFLSDEFQARHEE--QAGAKLHLLHNATDETIERIDVT-----DEKTHDSTLFKTGSWLQERLVLFDRAYFKYRRFALIDENDGYFVSRLKENANPLITEELREW-RGRA-------IPLEGKQIHDVVDDISRKY-IDVEVEAEFKRGQYEGTRSLDTKRFRV-VGVRDS----------DADDYHLYITNL-PRDEFFPEDLATLYRCRWEVETLFRELK 17 | ISH8D.1|IS4|ISH8|Transposase|Halobacterium/106-314 FRDVMIADGTVLRLHEFLSDQFEARHEE--QAGAKLHLLHNATEQTIERLDTA-----NEKTHDSTLFKTGPWLENRLVLFDLAYFKYRRFALIDENDGYFVSRLKQNANPLITRELREW-RGRA-------IPLEGKQLRAVLDDLDRKY-IDVEVEVEFKRGPYNGTRSLDTKRFRV-VGVRDE----------DADDYHLYMTNL-VRKEFFPADLAQIYRCRWEVELLFRELK 18 | ISH8E.1|IS4|ISH8|Transposase|Halobacterium/106-314 FRDVMIADGTVLRLHEFLSEVYEGRHEE--QAGARLHLLHNATEQMIERIDVT-----DETAHDSTLFKTGSWLHGRLILLDLAYFKYRRFALIDENDGFFVSRLKQNANPVITAELREW-RGRA-------IPLEGKQIQDVVDDLSRQY-IDVEVEAEFKRGQYEGTRSLDTKRFRV-VGVRNE----------DADDYHLYITNL-PRDEFLPSDLATLYRCRWEVETLFRELK 19 | ISHma1.1|IS4|ISH8|Transposase|Haloarcula/106-314 FRDVMIADGTVLRLHEFLSDEFQARHEE--QAGAKLHLLHNATDQTIERIDVT-----DEKTHDSALFKTGSWLQGRLVLFDRAYFKYRRFALIDENDGYFVSRLKQNANPVITAELREW-RGRA-------IPLEGKQIHDVVNDLSRKY-IDVEVEAEFKRGQYEGTRSLDTKRFRV-VGVRNE----------DADDYHLYITNL-PREEFLPADLATLYRCRWEVETLFRELK 20 | ISHme2.1|IS4|ISH8|Transposase|Haloferax/106-314 FRDVMIADGTVLRLHEFLSDEFQARHEE--QAGAKLHLLHNATDETIERMDVT-----DEKTHDSTLFKTGSWLQGRLVLLDLAYFKYRRFALIDENDGYFVSRLKQNANPVITAELREW-RGRA-------IPLEGKQIHDVVDDLSRKY-IDVAVEAEFKRGPYNGTRSLDTKRFRV-VGVRDE----------DADDYHLYITNL-PREEFLPADLSTLYRCRWEVETLFRELK 21 | ISHti13.1|IS4|ISH8|Transposase|Halorhabdus/116-324 FRDVMIADGTVLRLHQFLSDEFEGRNEE--QAGARLHLLHNPSDQMLERFSIT-----DEKAHDSTEFNTGSWLEQRLVLFDQAYFKYRRFALIDENDGYFVSRLKPDANPVVTDELREW-RGDA-------IPLEGEKIHDVVEDLYRKY-IDVEVEAEFKRGPYNGTRSLDSKRFRV-VGVRDE----------DADDYHLYITNL-PREEFLPEDLATIYRCRWAVERLFRELK 22 | ISHvo11.1|IS4|ISH8|Transposase|Haloferax/121-327 FRDVLIADATIVSLYQDAADIYTATGDH--QAELKLHLTESLSTGLPTRFRTT-----DGTTHERSQLPTGEWVADALILLDLGFYDFWLFDRIDQNGGWFVSRVKDNANFEIVEELRTW-RGNS-------IPLEGESLQAVLEDLQRQE-IDVRITLSFERKRGS--GASATRSFRL-VGLRNK----------ESEEYHLYLTNL-ARESYSAPDIAQLYRARWEVELLFKELK 23 | ISHvo12.1|IS4|ISH8|Transposase|Haloferax/117-323 FRDVLIADATVFRLHRLL-DSFPATHSG--QSGAKLHLVQNATKQTIEQFQLT-----DERTHESSQLRTGSWLRGRLLLFDLGFYSFRRFALIEENGGFFLSRLKSNANPLIVGERRKW-RGRA-------ISLPERRLRDVLEDLSREI-IDVTVEIEFKRRAYAGKESTDSMEFRV-VGVRNE----------DTDDYHLYVTNL-PD-EFTPRQVGALYGLRWEVELVFRELK 24 | ISHvo13.1|IS4|ISH8|Transposase|Haloferax/117-323 FRDVMIADATVFRLHRLL-SEFPATHPD--QSGAKLHLVHNLTKQTIERFELT-----DERSHESSQFRTGNWLRGRLLLFDLGFYSYRRFALIEENGGFFLSRLKTNANPRITEARRKW-RGRA-------ISLPDRRLQDVLGGLTREI-IDVTAEVSFKRRPYGEKHSSATIEFRV-VGVRNE----------DTDDYHLYITNL-PD-EFTPEQVAALYGVRWEVEVLFRELK 25 | ISHwa5.2|IS4|ISH8|Transposase|Haloquadratum/114-328 FRDVFITDATDCTLSAASFEDFPGYSDD--HAGAQLHMIESLASRAPFFASIT-----DVRTDELSQLQIGDWVADSLLLDDLGYHDYGKIARIDELGGWFVNRLKVNSNGPITEELERW-PTGA-------ISLEGTHIQDVLPDLYRSE-IDALAQFDTPS----SDSELLPCEFRL-VVTESDEAADEEDAPDADHEYHLYVTNL-PKEWFSPREVAALYSARWSIETLIQEAK 26 | ISMba6.1|IS4|ISH8|Transposase|Methanosarcina/117-328 FQDVVIQDITIVRLHSSLADRFPAARSRTVAAGVKVGVMVSAIANGPRTIALY-----SEKTAEIKTLKIGTWIKDHILLVDLGFYKTQMFARVEENGGYFVSRIRKNMDPILVSIEEGLSKTKS-------KEFAGKPVSECIKQLSGKD-IDAVVKIEFKRREYKGKQKQDEMIVRL-VAVYND----------EDEKYHIYITNI-HKDILNAKDIANLYGARWDIELLFKELK 27 | ISMhu6.1|IS4|ISH8|Transposase|Methanospirillum/125-341 FKDILVQDSSIIRISKKLYELHPAARSRDESAGLKIHAIYSVVSNSVKNAIIT-----TERVHDSKMLKIGPEIENILLINDLGYYSLKIFSKIQQYGGFFVSRIKSNAVFKVVSINSGPSEITSIVDHIRFKSINGYDFLDLMPKK--GV-YDLICSFHVGDKRINKIKTPIFQEFRV-ICTWNP----------LTEKWHLYITNI-SKELFSADDIYDLYRFRWVIELIFKELK 28 | ISMps1.1|IS4|ISH8|Transposase|marine/115-312 FKGIVLQDGSSFAVHDSLKDIFTGRFTKISPAAIEVHVSWDVLKGYPEQVSIS-----PDSQAEYDFLPDADALEGRLLLADRGYFKLSYLDEIDQAGGAYVVRAKTTVNPMVVAGFNKAGKP--------LKRFQKIKQKAVKKHIRRSGIVDMDV-------------EGK-TNYRL-IASWPE----------GKDEPTYWATNL-DREQFSAEKVMKLYQLRWQIELLFKEWK 29 | ISNpu8.1|IS4|ISH8|Transposase|Nostoc/148-331 FTAIWIADGSTLEALRRKLKVLQEQ-EK--TLAGKIMMVVEAFSHHPVTTWYTQNSKANDKTWCEQLLERL--PIGGLLIFDLGFFKFPWFDAFTEADKFFLTRLREKTSYKVIRCLTNASF-------------------------YRNEIISM----------GEYRSNPCQHQVRL-VSVLWG------------STWYYYLTNVLDPQMLSAHLVCELGSRRWRVEDAFLLTK 30 | ISPa12.1|IS4|ISH8|Transposase|Pseudomonas/118-310 FKQVLLQDGTSFAVHDGLSLHFPGRFSTHSPAAVELHVTYDLEKAQPVRVSLS-----EDTASERDYLPVAQSLRGCLLMADAGYFSKAYIESLQNEAASFVLRMPASVNPMATCNQTGLCQP--------LRSWLA--------VLPKHGELDLDV------------QWPDGPVYRCVLFASTD----------HKDKPVCLCTNL-DRHTFPAATVGEWYRLRWQIELLFKEWK 31 | ISPa13.1|IS4|ISH8|Transposase|Pseudomonas/118-310 FKRIILQDGTSFAVHDDLMFYFPGRFNENSPAAVELHVTYDVFKGQPDGVSLT-----EDTAPERDYLPSPASLSGCLFMADAGYFSKAYIQQLQEACAYFIMRMCNRVNPMAVCTKTQQMKP--------LKTWLK--------ELPNAGVLDLDV------------QWPNGPVYRCVAFAALD----------HKKRHVLVCTNL-DREQFPPAVVGDFYRVRWQIELLFKEWK 32 | ISVa14.1|IS4|ISH8|Transposase|Vibrio/118-315 FDDVLLQDGSSFHIHRDLADVYPSRFKR-NPAAVECHMTMSLKSFSPVAMSIS-----ADTASERDFLPEPKTMNNKLLLADAGYPDFHFFRELEQHGGFYIVRGAKSLNPMIIEARNGRGRL--------LPKLEGRRLKDITRGTNRSQVLDLKV------------RRGK-QEFRV-VRRWFA----------EEKRFCIWITNL-PSDTYTADDIMAIYRCRWQVELLFKELK 33 | // 34 | -------------------------------------------------------------------------------- /data/models/msa/IS4_ISPepr1/IS4_ISPepr1_cut.sto: -------------------------------------------------------------------------------- 1 | # STOCKHOLM 1.0 2 | IS2621.2|IS4|ISPepr1|Transposase|Deinococcus/1-127 -------------------------------------------------------------------------------------------------------------------------------------------------MVETVTGHGL---PFISRFPRN--ANLKYLYT------------GEHPRRRG--RPKKFDGKVDFSDLQRFDLVSE----TSTERVWTQVV-------WSVQWAREVRAVVIQQVGKKGQVTGYAVLFSTAVTMPAHEVIALYRSRFEIELIFRDAK 3 | IS943.1|IS4|ISPepr1|Transposase|Bacteroides/129-371 LIIDDTDLPK-TGFKTELIGRIYSHVLHRS----ILGFKGLFLCHTDGKTQTMLDFSLHGEEGKNPEKIQGLTSKQRNARFCKVRDEKSVVNTRIRE--YKQSKIERSIEMVR----------HAMKKGIRFDYLLVDSWFTCADLIRFITSRHL-ECHLIGMLKMG---KTRYRTEAGNLNAPVIIDRLKKEKSVR--YSRKLNCY--------------YAHMDAE---YANR---------------KIRIFFCK----RGRKGAWNAFLSTDTRLDFFEAYRIYSMRWAIEVCFSEMK 4 | ISAcma11.1|IS4|ISPepr1|Transposase|Acaryochloris/1-230 --------MEKSGHYTHGVDWFFNGKTQRA----ERGIEWSVIAIIDLAQHTGYTLSAQQTEA-------GLQAK---PPEPSRLAKDSPSP---------TSRVDFYVGHLVECDS---------YLPERVCYIVADGFYSKRKWINAVVQINR---HAIGKLRCD--ANLKFLYR------------GPQK-RQG--RRRRYAAKVELADPSNLTFVCE---LESGDQLYMAVV-------WSVSLKRAIRMAY--VCGHKDGKHSYAVLFSTDLDIDPYDIYRYYSARFQIEFIFRDAR 5 | ISAsp3.1|IS4|ISPepr1|Transposase|Anabaena/85-348 LLAGDEVVISKAGKKTYGLDRFFSSLTSKP----ISGLSFFTLSLVSVQQRHSFPIQIEQVIK-------SDVEKSIVSPIPEVKPQEKPGRGRPKGSKNKNKQEVIFTSELLRIQKMINELFKLIANFIPLTYLVVDGHFGNNNALQMARQVKL---HIISKLRHD--SALYIPYQ------------HPDPNHRS--R-RKYGDKLDWRNIAGEYLRQSSIDEDIKTDIYQMTL-------LHKEFAQSLNVVILVKTNIKTNAVSHVILFSSDLDLSYEKIIDYYRLRFQIEFNFRDAK 6 | ISBf8.1|IS4|ISPepr1|Transposase|Bacteroides/128-368 FIIDDTVLEK-SGVRMEGISRVFDHMKGRC----VLGYKLLLCAFFDGKTTIPFDFSLHQEKGKQGN--CGLTRQQLKKAYHTKRNTGNPDYKRFQE--CKMSKMEVAMDMLR----------RGWKMGLHAKYVITDSWFTCEQLMTCVRSIGKGAMHFVGLAKMG---KTKYTISGKKKNAAELIATY-ERERGK--NCRKYKCR--------------YIQLNGN---LGDI---------------PIRIFLIK----YGRNSAWNVLLTTDTTMYFVKAFEVYQIRWNIEVMNKETK 7 | ISCku4.1|IS4|ISPepr1|Transposase|Candidatus/97-334 LLCGDGLKVPKEGKKMPGVKSLHQESDSNNKAEYIMGHSCQVVSLLAEAGKSCFAIPL-----------------------VSRIHEGVV--------FSNRDQRTLLDKMVLLINS---------LELKELFYFIADAYYASHAIINGVVARGS---HLISRVRSN--AVAYFPVEP-----------TPEKKGRG--RPKKYGMKVKLKTLLNDRASMK----EAESPVYGEQGIKINYRTLDLLWKPV-G-ILIRFVLVDHPQRGKIILMSTDLTISAMEIICLYGLRFKIEVSFKQAL 8 | ISDha5.1|IS4|ISPepr1|Transposase|Desulfitobacterium/114-340 FIVDDSVMERERSKKVELLARVFDHVSGRF----VRGYTLLTLGWSDGFSFAPLDFTLMSSAKA----------KNRLCEMREDLDKRSVGYKRRLE--AMSPKPDTVVQMLE----------RALKAGFSADYVLMDSWFTHAPLLQKLRDKEL---HVIGMVKE--LKQ-RYLFEGKSLSLRELYARV--PKNPK---AEILGS---------------VRVHTPS------GL--------------ALKVVFVQ---NRNNRREWLAILTTDLSLETTEVVRIYGMRWSIETFFKMAK 9 | ISDpr2.1|IS4|ISPepr1|Transposase|delta/118-344 LVVDDSDYDRARSKKVELLAKIFDHNSGKY----LKGFKLLTLGWGDGVTFLPLDFVLRSSANA----------VNRIQGITKDLDKRTCGYKRRIE--AMTKSTDALEVMVK----------RVMALGVRADYLLMDSWFCFASLIRKLST-HL---PVICMAKD--LKSNFYQYRGESLRLGRLYQQL--KKRPG--RAQILAS---------------IVLSMVN------GP--------------TVKLVFVR---NRN-GRGWLALLSTDTTLPDEEIIRIYGKRWDIEVFFKMAK 10 | ISDra7.1|IS4|ISPepr1|Transposase|Deinococcus/90-318 VLAIDASFHRKAGQHTAHLGSFWNGCAART----ERGIEQSCCALIDVQHRQALTVDVRQTLT-------GSEA-------PTRLEQXA----------------DQLDDVLLDLRT---------VQQLDLAAVVADGNYAKEPIVETVTGHGL---PFISRLPRN--ANLNDLYT------------GEHPRRRG--RKKKFDGKVDFSDLQRFDLVSA----RPTERVWTQVV-------WSVQWAREVRAVVIQQIGKKGQVTGYAVLFSTAVTMPAHEVIALYRSRFEIELIFRDAK 11 | ISGur1.1|IS4|ISPepr1|Transposase|Geobacter/115-347 LIADDTLYRRDRSKRVELLARVHDHNTGRY----VRGFRMLTLGWSDGNSFVPMMLSMLSSAKD----------KNRLAPMREGIDKRTNGYQRRRE--SMRKSTDVLVDMVA----------LAMKAGTTARHLLFDSWFAFPATIRRIRALGM---HTICMLKD--TGKVTYEVQGWPFTLKELFKEV--RKRPG--RAKVLAE---------------VLVTIGQDI-HGKPV--------------AAKIVFVR---DRS-SKKWLALLSTDTTLTAEEIITLYGRRWDIEVFFKMAK 12 | ISL5.1|IS4|ISPepr1|Transposase|Lactobacillus/1-169 --------------------------------------------------------------------------------MTKKIDKRTIAAKRRIM--AQSKGTDVVIQLLD----------QALKAGLTAKYVMFDTWFSNPHQIVQISQRGL---NVIAMVKK--SSKITYEFEEKRMNVKQIFNAC--KKRRG--RSRYLLS---------------VPVKVGDPAKDGAQI--------------DARIVCVR---NRSNRKDWIALICTDMTIDENEIIRIYGKRWDIEVFFKTCK 13 | ISLho3.1|IS4|ISPepr1|Transposase|Lactobacillus/110-342 FILDDSLFKREFSKKTELLSKVFDHDKQKF----YRGFRGLTLGWSDGNTFLPVNFALMSSNNS----------KNRFNQL-KHFDCRSLAAKRRFQ--AQRKMNDVALELID----------EALKAGLKTKYVLFDSWFSSPRMFFELLQRGQ---FGIGMLKR--SKKVYFRYRGRQMDVKSLYTMLSRSKRPT--HTTYLYS---------------SIVN---FIIDGHKM--------------KIKLVYVA---NRNKTNQYLVLGTTNTALQPNQIIQMYGRRWQIEGYFKVAK 14 | ISLre1.1|IS4|ISPepr1|Transposase|Lactobacillus/112-345 FIVDDTLISRPYSTKTELLAKVYDHNQDKY----ITGYRNLTIGWSDGNTFLPVNFALMSTKNR----------ANLVGTKACVTDQRTIAGQRRNQ--AQRKMNDVVIELIH----------QALKFGISAKYVLFDSWYSSPRMFWRLKELGL---DSVAMLKR--SSKVYYRYRGRAYSIKALYLRLLHSKRHQ--AENYLYS---------------SNVE---ANFQGYSF--------------PLKVVFVA---KKGTKNQYLVLASTNTKLTPQKIIQLYNRRWSIETYFKTAK 15 | ISMasp2.1|IS4|ISPepr1|Transposase|Magnetococcus/131-369 LVADTTVKEK-RGDRIEGVCWHHDHNTGRS----VAGFEAAHLVWVNKQGTLPLDAALRFSKRPLISNLLHILS--------YRFDCRSHLGRRYRE-AAKMSKLDQTVDMVA----------RAIQAGIPAQYFLADAWYSSVKFVKKILDLGV-----VPLIRWK-RNNTKFLFQGERLTSAELYTRFAKGKIRKAKGSKRFKGT--------------FL--DAE---HPEIG--------------LIRLFFVRLIDPKTGSKEWAVFLTTDRSMGLSNMIEHYANRWGIEVFYKESK 16 | ISPepr1.1|IS4|ISPepr1|Transposase|Pelobacter/115-347 LIADDTLYSRDRSKCVELLARVHDHNTGRF----MRGFRMLTLGWSDGNSFVPMALSMLSSAKE----------KNRLAPMHDGIDKRTNGYKRRQE--SMRKSTDVLVDMVS----------LVMTAGTKAQPLLLDSWFAFPATIRRIKALGM---HTVCMLKD--TGKVTYEMQGWPLSLKELYKSV--RKRCG--RAKVLAE---------------VLVTIGSAD-QGIPV--------------PAKIVFVR---DRN-SKRWLALLSTDTTLTAEEIIKLYRRRWDIEVFFKMAK 17 | ISPepr4.1|IS4|ISPepr1|Transposase|Pelobacter/118-349 LIADDTLYLRDRSKCVELLARVHDHNTKRY----HRGFRMLTLGWSDGNSFVPMLFSMLSSSKE----------KNRLAAMRTDLDKRTNGYKRRQE--SVNKSPDVLVELVK----------VAMAAGSKARYLLFDSWFAFPVTIRRIHALGM---HTICMLKD--S-KTRYTFQGDGMPLKELFKWV--RKRPG--RAKVLAS---------------VMVTIGEDD-KGNPV--------------AAKIVFVR---DRS-KKSWLALLSTDTTLADEEIIKLYGRRWDIEVFFKMSK 18 | ISPto3.1|IS4|ISPepr1|Transposase|Psychroflexus/129-366 LIFDDTDLSK-TGKTIEGVSKIYNHVSKTY----YLGFKLLVAGYWNGSVFIPIDFSLHRESK-TSRLKYGLTAKQRKAQKKTPRCSKTVAAKRYRE--LNKKKTDLVVQMFS----------RVVKRKIPVDYILIDTWFTSVGLLKKLRSICS-STHIIGMYKYNSKIEVRSKV-------KTLAQLKKQKAKPK--RCRKFNYY--------------YHHYIAE---IDGL---------------KVALFISK----RGKNGKWHTLITTDTSLKFVKAIEVYSIRWSIEVFFKEAK 19 | ISUnCu6.1|IS4|ISPepr1|Transposase|uncultured/132-375 LIVDDTDYPK-TGRRFEYIGRVHSHVQHRS----ILGFKALFLVITDGTSQMILDFALLGEKGRKGN--FGMSAKELKDRFTKPRDEQDALQERINE--YTASKISLMIDMIK----------RAIGKGVKFRYVLADSWFACKDIIRFVRSRHM-KCDYLGMIKIGESGRTKYHFERKDFTAPALIKLLSKRKRRK--YNRKLRCY--------------YMVADVV---FADT---------------KVRLFFVK----RSKNAAWNGLITTDTTLDFLCAYKIYAQRWALEVIFKEAK 20 | // 21 | -------------------------------------------------------------------------------- /data/models/msa/IS5_IS5/IS5_IS5_model3_cut.sto: -------------------------------------------------------------------------------- 1 | # STOCKHOLM 1.0 2 | #=GF Alignment Program ClustalOWS 3 | IS1106.1|IS5|IS5|Transposase|Neisseria/95-243 VVDATIIQTA-GSKQRQAIEVDE--EGQIS----------------GQTTPSKDKDARWIKKNGL-------------------YKLGYKQHTRTDA-EGYTEKLHITPANAHEC--KHLPPLLEG--LPKGTTVYADKGYDSAENRQHLKEH--QLQDGIMRKACRN-RPLTETQTKRNRY--LSKTRYVVEQSFG 4 | IS1168.1|IS5|IS5|Transposase|Bacteroides/152-306 IIDASFVVAPRQRNTREENAKIK--EGKAD---------ELWND-NPHKKFHKDVDARWTKKRGD-------------------TFYGYKQHVKVDKGNKVILSYATTPANVHDS--KGFEQLLDE--SDKDKDLYLDAGYAGQE--STVKEH--GMNPIICEKGRRN-HPLTEKQKAENRR--KSKTRCLVEHVFG 5 | IS1169.1|IS5|IS5|Transposase|Bacteroides/152-306 IIDASFVIAPRQRNTRDENEQIK--QGAGD---------KLWND-NPHKKLHKDVDARWTKKRDE-------------------TFYGYKQHTKAEKRNKIILSYDTTSAEVHDS--KGFEGLLDE--KDEGKDLYLDAGYVGQE--EIVKQH--KMNPIICEKGYRN-RPLTKEQKSDNRK--KSKTRCLVEHVFG 6 | IS1186.1|IS5|IS5|Transposase|Bacteroides/152-306 IIDASFVVAPRQRNTREENAKIK--EGKGD---------ELWND-NPHKKFHKDVDARWTEKRGD-------------------TFYGYKQHVKVDKGNKVILSYATTPANVHDS--KGFEQLLDE--SDKDKDLYLDAGYAGQE--STVKEH--GMNPIICEKGRRN-HPLTEKQKAENRR--KSKTRCLVEHVFG 7 | IS1194.1|IS5|IS5|Transposase|Streptococcus/139-285 IVDATFIEAPKQRNPKDENELIK--ANRVP---------VNWT---KNKRAQKDTAARWTIKGNE-------------------RHYGYKNHIAIDTKSKFVKNYQTTPANVHDS--QVIGVLVDP-----DEITLADSAYQNKATP-----K--GAELFTCLKNTHS-KSLKADDKMFNKI--ISKIRVRIEHVFG 8 | IS1246.1|IS5|IS5|Transposase|Pseudomonas/83-243 IVDATLVPAPKQRNSREENKLVREFTGAMP---------ANWK---PAKRRQKDTDATWTQKHGK-------------------NHFGYKLSVNVDKKYKIIRKFETDTASVHDSKFTHFDALIDR--SNTSRDVYADRGHTSADREGWLKDH--GYRNQIQRKGYRN-KPLSECQQRRNHRFTIAKTRARVEHVFA 9 | ISAba23.1|IS5|IS5|Transposase|Acinetobacter/142-285 IIDASIVEAKNKRPKKGKHTDNT-----------------------------QDNEAAYVSKKDS--------------TGKVKTTYGFKIHLNCDED-SFVKKVETTPANVHDS--QCFTTLLT----DDESAVYADSAYKSQAHDDYLAEHEPPIKNHIHDRAWRN-TPLTEQQKRNNTQ--KSQTRNTVERVFG 10 | ISArch6.1|IS5|IS5|Transposase|Uncultured/139-270 VQDATFITSDPGHAKADTPRG------------------------SEAK-TRRSKDGTWAKKGSK-------------------SFFGYKLHSKPDMDYGLIRDLETTTAAIHDS--Q-----VDL--SMEGEVVYRDKGYHGSTPKGYSA---------TMKRRARD-HPLSIMDKLRNIR--ISKKRAPGERQYA 11 | ISAtu2.1|IS5|IS5|Transposase|Agrobacterium/146-303 MLDATIIETAAARPPSGRNRDLD--EAAAL---------ADPHE-QEEHGPAHDADARFVRHAGK-----------------TGSAYGYKAHVGVDEGSGLIRRIITTPANVNDTVVA--DMLVV----GDEAYVLADGAYHTHGREKELKAR--GIKPRLMRRPNRHHRMLSPRLARFNLL--ISRRRSAVETTFA 12 | ISAzo11.1|IS5|IS5|Transposase|Azoarcus/146-300 IIDATLVPAPKQHNRRGEKELIE--QGAMP---------ASWR---PAKRRQKDLDATWTKKHGK-------------------SHFGYKLSINADKKYKIIRRLETDTASTHDS--QHFDNVFDT--RNTSRDVYADRGYPSEQREAWLKEN--GFRNRIQRKGKRN-KPLSECQQRRNKR--IAKTRARVEHVFG 13 | ISAzo39.1|IS5|IS5|Transposase|Azoarcus/107-261 IIDATLVPAPKQRNSREENKLVK--EGAMP---------ADWK---PAKRRQKDTDATWTKKHGK-------------------SHFGYKLSINVDKKYKVIRKIETGTASIHDS--QHFESVFYT--LNTSRDVYADRGYTSKGREDWLKDK--GYRNQIQRKGTRN-KPLSECQQGRNHR--IAKTRARVEHVFA 14 | ISAzo41.1|IS5|IS5|Transposase|Azoarcus/146-300 IIDATLVPAPKQHNSRGEKALID--QGAMP---------ADWK---PAKRRQKDTDATWTKKHGK-------------------SHFGYKLSINVDKKYKFIRTLETDTASTHDS--QHFDNVFDT--SNTSRDVYADRGYPSEERAAWLKAN--GFRNQIQRKGQRN-KPLSECQQRRNTR--IARTRARVEHPFA 15 | ISCARN8.1|IS5|IS5|Transposase|Metagenomic/147-301 IIDATLVPAPKQHFSRDDKEQIK--KNAMP---------ADWS---PAKRRQKDLDATWTKKHGK-------------------STYGFKLSIGVDRKHKLIRKLVTDTASVHDS--RHFEVVLDD--WNTSAEVYADRGYPSQEREEQLKAQ--GYRSRIQRKGSRN-HPLSECQQRRNHK--IAKVRARVEHVFG 16 | ISCca3.1|IS5|IS5|Transposase|Candidatus/144-298 VIDASITPTSRGPKGKKSYNLHE--DGTIS--------------VSHSYQKGVDQEASWTKKGHD-------------------LYYGYKRHVLVESKEGLVLAVSTTKASNHDG--AHLPILLDKVSLKAGSRLYGDKGYSGLPNETLLKKK--SLKSAIQKKATKN-HPLSPTAKRFNKL--VSKTRYKVERVFG 17 | ISCca7.1|IS5|IS5|Transposase|Cardinium/124-278 VIDASITPTPRRPKGKKSYDLRE--DGTIS--------------VSHSYQKGVDPEASWTKKGHN-------------------LYYGYKRHVLIESKEGLVLAVTTTKASSHDG--AHLPILLDKVSLKAGSRLYGDKGYSGLPNETLLKNK--KLKSAIQKKGNRN-HPLSPAAKRFNKF--VSKTRYKVERVFG 18 | ISMac22.1|IS5|IS5|Transposase|Methanosarcina/139-270 IQDATFIHSDPEHAKADKPRG------------------------NEAK-TRRSRDGTWTKKGSK-------------------LYFGYKLHSIIDRDYELIRRFKTTTALVHDS--Q-----VDL--SEENEVVYRDTGYFGAEAKGFAA---------TMKRAVRG-HPLGIRDIIRNKR--ISLKRVPGERVYA 19 | ISMbu1.1|IS5|IS5|Transposase|Methanococcoides/138-269 IQDATFIHADPGHANLDTPRG------------------------NEAK-TRRCKDGTWTKKASK-------------------SHFGYKLHTIEDTEYDLIRRYRTTTASVHDS--Q-----VDL--SEEGEVVYRDRGYFGAISKGYDA---------TMQRGVRG-HPIGIRDKMRNKR--ISRKRAKGERPYA 20 | ISMex35.1|IS5|IS5|Transposase|Methylobacterium/148-319 IVDATVVAAPKQRNTEAEKAELK--GGRVP---------DAWKA-KPAKLAQKDRDARWTIKWSKAKPAED----GGKRIDLAVPAFGYKNHVGIDRRHGLIRTWIATDAARHDG--AQLPGLLSK--ANTAGDVWADTAYRSKANEAHLAAH--GFRSRIHRKKPPG-KPMPRNVARANGG--KSKVRAAVEHVFA 21 | ISPg8.1|IS5|IS5|Transposase|Porphyromonas/149-316 LVDASLVETPHKPNGTITIEVAD--DREDNRSEAEKEAEEDYQKQVVRRRKGTDEEARWVYKQKR-------------------YHYGYKKHCLTNV-QGIVQKVITTAANRSDT--KEFIPLLQGANIPQGTAVLADKGYACGENRSYLQTH--HLQDGIMHKAQRN-RALTEEEKQGNKA--ISPIRSTIERTFG 22 | ISPmar3.1|IS5|IS5|Transposase|Paracoccus/145-301 ILDASIVPVPRNRNTRDENKAIK--NGEMP---------EDWAD-KPAKRSQKDVDARWTKKHGK-------------------SHYGYKNHVNMDKKHKLVRRYHVSDAALHDS--QAVDHLLTR--GNTGAGVWADPAYRSEEMETKLRSM--KLKSHIHRKGKRG-KPLTEQAKGSNRT--KSSVRARVEHIFG 23 | ISPso3.1|IS5|IS5|Transposase|Paracoccus/145-301 ILDASIVPVPRNHNTRDENKAIK--NGEVP---------EDWAD-KPAKRSQKDVDARWTKKHGK-------------------SYYGYKNHVNVDRKHKLVRCYHVSDAALHDS--QAVDHLLTR--GNTGAGVWADPAYRSEEMEARLRSM--KLKSHIHRKGKRG-KPLTEQAKSSNRT--KSSVRARVEHVFG 24 | ISSsp126.1|IS5|IS5|Transposase|Sphingomonas/143-316 IIDATVVPAPKQRNTQEEKAAIK--EGRIP---------QDWN---PAKTRQKDRDARWSIKYTKAKVREGDDPKATKPVDLAIPMFGYKNHIGIDRVHGLIRTWDASAANAHDG--ARLPDLISK--ENTASGVWADTAYRSKKNEAFLARG--MFTSNIHQKRLPK-RPLPERIARANAR--RSKVRAAVEHVFA 25 | ISSth7.1|IS5|IS5|Transposase|Streptococcus/141-287 IVDASFVECPKQRNSREENEKIK--AHETV---------EGWN---KAKRCQKDIDGSWTQKGGV-------------------RYFGYKNHVCVDRKSKLIKDYGVTTASIHDS--NVLASLCDA-----NEPVFDDSAYVGKSVP-----D--NCQHHTVRRAFRN-KPLTDTDKNINRH--IAKVRCRVEHVFG 26 | // 27 | -------------------------------------------------------------------------------- /data/models/msa/IS5_ISH1/IS5_ISH1_cut.sto: -------------------------------------------------------------------------------- 1 | # STOCKHOLM 1.0 2 | ISH1.1|IS5|ISH1|Transposase|Halobacterium/110-237 AAIDASGFQRDQTSYHYRDRANYSFQSMKTTILIDVNSLAIKDVHYTTQK-PGTATLGCRSSAGNAEDLRVLSADANYSWSDLREECRSESTRPLIKHREQTPLQKAHHARMNED-YNQRWMSETGFSQL 3 | ISH19.1|IS5|ISH1|Transposase|Haloarcula/112-240 AAIDATFFDRENASKHYCRRTNYRVQTLKATALVDTESQAILDVHCTTEK-RHDTQLGWQVARATRGDLASLAADKGYDWMDLREKLREEGVRPLIKHREFRPIDHAHNARIDGPRYRQRAMCETVFSTI 4 | ISH1A.1|IS5|ISH1|Transposase|Halobacterium/110-237 AAIDASGFQRDQTSYHYRDRANYSFQSMKTTILIDVNSLAIKDVHYTTQK-AWDGHIGMQVFRRNAEDLRVLSADANYSWSDLREECRSESTRPLIKHREQTPLQKAHNARMNED-YNQRWMSETGFSQL 5 | ISH28.1|IS5|ISH1|Transposase|Halobacterium/112-240 AAIDATFFDRETASKHYCRRTNYRVQTLKTTALVDTETQAILDVHCSTGK-PHDTQLGWQVARRNAGDLASLAADKGYDWMELRKKLREDGVRPLIKHRIFRPIDHAHNARVDGPRYRQRAMCETVFSTI 6 | ISH9.1|IS5|ISH1|Transposase|Halobacterium/112-240 AAIDATFFDRETASKHYCRRTNYRVQTLKTTALVDTETQAILDVHCSTGK-PHDTQLGWQVARRNAGDLASLAADKGYDWMELRKKLREDGVRPLIKHRIFRPIDHAHNARVDGPRYRQRAMCETVFSTI 7 | ISHalh2.1|IS5|ISH1|Transposase|Halorubrum/110-238 AALDSTFFDRRRASSYFRQRAGRTIQTLKVTTLTDVESLAVLDVHIAARW-KHDTKTGPQVVRRNADDLQSVAADNGFQDWHTEYEIAAHDVEYLVHYRGSSAKAAANNALNRANGYSQRWMAETSYSTT 8 | ISHla2.1|IS5|ISH1|Transposase|Halorubrum/112-240 VGVDASGFDRSHASKHYTKRAELTIQQLKVTLLVDTKVNAILDLHVTTTR-KHDSQIAPSLVKRNPETIDILLGDKGYDDQKIRRLARHHEVRPLIKHREFTSLHKAWNARLDADLYGQRSQSETVNSTL 9 | ISHla3.1|IS5|ISH1|Transposase|Halorubrum/110-238 AALDSTFFDRRRASSYFRQRAGRTIQTLKVTTLTDVESLAVLDVHIAARW-KHDTKTGPQVVRRNADDLQSVAADNGFQDWHTEYEIAAHDIDYLVHYRGSSANAAAKNALNRAKGYSQRWMAETSYSTT 10 | ISHla4.1|IS5|ISH1|Transposase|Halorubrum/112-240 VGVDASGFDRSHASKHYTKRAELTIQQLKVTLLVDAKVNAILDLHVTTTR-KHDSQIAPSLIKRNPDDIDVLLGDKGYDDQKIRRLARQHEVRPLIKHREFTSLHKAWNVRLDTDLYGQRSQSETVNSTL 11 | ISHla7.1|IS5|ISH1|Transposase|Halorubrum/112-240 VGIDASGFDRSHASKHYTKRTKLTIQQLKVTLLVDTRVNAIIDLHVTTTR-KHDSKIAPSLIRRNTDDVTILLGDKGYDDQKIRTLAREDGVRPVIKHRGFSSLHKAWNVRLDADIHGQRSQNETVNSRI 12 | ISHli1.1|IS5|ISH1|Transposase|Halohasta/109-237 ASIDATGFDRDQPSRHYANRTSYRVRALKVTALVDVETLYITDIHCTTTK-KHDAKIGPQVARRNAADLRSLAADRGYDSKAFRDELRGYGVRPLIKHRIYSSLDHAHNARMDSDRYHQRSMSETVFSSI 13 | ISHli2.1|IS5|ISH1|Transposase|Halohasta/112-240 AAMDATFFDRETASKHYCRRTNYRVQTLKTTALVDTQTQAVLNVHCTTEK-RHDTQIGWQLARRNAGEIASLAADKGYDWMQLREKLREEGVRPLIKHREFRPVDCAHNARIDESLYGQRALSETVFSTI 14 | ISHma10.1|IS5|ISH1|Transposase|Haloarcula/120-248 AAIDATYFDRQQASSHYLRRIDRSVKTIQTTFLVDTAEGAILDLHCSTKW-PDETKIGPKVALRNAGDLRSLAADKGYDDMSFREELRNAGICPLIKHRVFAPYDHAHNARIDDELYGQRSQTESVNSSI 15 | ISHma11.1|IS5|ISH1|Transposase|Haloarcula/110-238 AALDSTFFDRRSASSYYRQRSGSNVQTLKVTTLTDRESLAVLDVHISARW-KHDTKTGTQVVRRNADDLLSVAADKAFHSWITKYEFYALGVEPLILQRGSRPLTLGHNALIRTKGYSQRWMAETSYSTT 16 | ISHma13.1|IS5|ISH1|Transposase|Haloarcula/112-240 VGVDASGFDRSHASKHYTKRAELTIQQLKVTLLADAKVNAILDLHVTTTR-KHDSQIAPSLIKRNPDDIDILLGDKGYDDQKIRRLARYHEVRPLIKHRESTSLHKAWNARLDANLYGQRSQSETVNSTL 17 | ISHma8.1|IS5|ISH1|Transposase|Haloarcula/114-242 AAIDATFFDRENASKHYCRRTNYRVQTLKATALVDTESRAILDVHCTTEK-RHDTQLGWQVARRNAGDLASLAADKGYDWMDLREKLREEGVRPLIKHREFRPIDHAHNARIDGPRYRQRAMCETVFSTI 18 | ISHma9.1|IS5|ISH1|Transposase|Haloarcula/110-238 VALDSTFFDRRISSSYYRQRSGNSVQTLKVTTLTDVESLAVLDVHISARW-QHDTKTGPQVGRRNADDLLSVAADNGFQDWNTEYEIAALDIDYLVHYRGSSLNATANNTLIRSKVYSQRWMAETSYSTI 19 | ISHmu1.1|IS5|ISH1|Transposase|Halomicrobium/120-248 QAIDATGMDRIAASQHYAKRTNYTFEAVKTTLLSDCKTGAILDIHCSMKQ-PHDSKIGWQMVKRNLDKLNILTADKGYDWWLLRQRLRAEGVKPVIKHREFGWHGIANNFLQDDTIYHQRSNAESTFFAL 20 | ISHtu1.1|IS5|ISH1|Transposase|Haloterrigena/113-241 QAIDATGFKRHQASRHYVLRVGYNFDDIKTTALVDCDTSVILDIHCSMKQ-PHDTHVGRQVLMRNLAQLTTITADKSYDWDVLRHELRDIGIRPVIKHREFYALDKAHNARHDENVYHRRSIVEAIFFAL 21 | ISHut1.1|IS5|ISH1|Transposase|Halorhabdus/109-237 AAIDSTGFDRDQPSRHYANRTNYRVRALKVTALVDVETLYITDIHSTTSK-KHDAKIGPQVARRNAGDLRSLAADRGYDAKTFRDELRENGIRPLIKHRIMNPLDHAHNARMDGDRYHQRSMSETVFSSI 22 | ISHvo1.1|IS5|ISH1|Transposase|Haloferax/112-240 AAIDATFFDRENASKHYCRRTNYHVQTLKATALVDTQTQAVLDVHCTTEK-THDTQLGWQVACRNAGDLHSLAADKGYDWMQLREKLREEGVRPLIKHRIFRPIDHAHNARVDGPRYRQRSMCETVFSSI 23 | ISHvo2.1|IS5|ISH1|Transposase|Haloferax/112-240 VGIDASGFDRSHASKHYTKRTKLTIQQLKVTLLVDTRANAIIDLHVTTTR-KHDSQIAPSLIKRNTGEVAILLGDKGYDDQKIRALARDEGVRPLIKHRKFSSLHKAWNARLDADIYGQRSQSETVNSNL 24 | ISHvo3.1|IS5|ISH1|Transposase|Haloferax/112-241 GAIDATFYERSAASRHYCQRTSYRVQKLKVTKLVDTDSQAVLDVHCSTNRKGSDADLAEQIARRNAGDLRSLAADKGYDKKSLRESLRNLGIRPLIKHRIFAPYDHAHNARIDEQRYNQRSMSETVNSAV 25 | ISHvo4.1|IS5|ISH1|Transposase|Haloferax/110-238 AALDSTFFDRRRASSYFRQRSGSTVQTLKVTTLTDVESLAVLDVHITARW-KHDTKTGPQVVRRNADDLQSVAADNGFQDWHTEYEIAAYDVEYLVHYRGSSPKAALNNALNRANGYSQRWMAETSYSTA 26 | ISHwa1.1|IS5|ISH1|Transposase|Haloquadratum/112-240 AVIDATFFDRENASKHYCRRTNYRVQTLKTTTLVDTESQAILDVHCTTEK-RHDTQLGWQVARRNAGDLTSLAADKGYDWMELREKLREDGVRPLIKHREFRPIDHAHNARIDGPQYRQRAMCETVFSTI 27 | ISHwa3.1|IS5|ISH1|Transposase|Haloquadratum/112-241 GAIDATFYERSAASRHYCQRISYRVQKLKVTKLVDTASQAVLDVHCSTNRKGSDADLAEQIARRNAGDLRSLAADKGYDKQSLRESLRDLGIRPLIKHRIFAPYDHAHNARIDEQRYNQRSMTETVNSAV 28 | ISHxa2.1|IS5|ISH1|Transposase|Halopiger/112-240 VGVDASGFDRSHASKHYTKRAELTIQQFKVTLLVDAKVNTILDLHVTTTR-KHDSQIAPSLIKRNPEDIDILLGDKGYDDQKIRRLARQHEVRPLIKHREFTSLHRAWNARLNADLYGQRSQSETVNSTL 29 | ISNagr1.1|IS5|ISH1|Transposase|Natronobacterium/112-240 AAMDATFFDRENASKHYCRRTNYRVQTLKTTALVDTESQAILDVHCTTKK-RHDTQIGWQLARRNAGELHSLAADKGYDWQRFRDKLREEDVRPLIKHREFRPIDHAHNARIDGTLYGQRALSETVFSVI 30 | ISNagr12.1|IS5|ISH1|Transposase|Natronobacterium/114-243 AAIDATFYERDRASRHHCQRTNYRVQTLKVTKLVDTATQAVLDLHCSTTLEGSDADLCEQIARRNAGDLRSLAADKGYDKQQLRDRLRELDIRPLIKYRIFVPYDHAHNARIDEDRYAQRSMTETVNSAV 31 | ISNamo1.1|IS5|ISH1|Transposase|Natronomonas/110-238 ASIDASGFQRDQASSHYRNRVGYSFNAMKTTLLVDTDSLAIMDAHFTTKK-AYDGHIGLQVFRRNAEDLQELLADKMYSWSDLREACRDASTRPVIKHCEQNGLKKAHNARIDDDVYNQRSMSETVFAMV 32 | ISNamo2.1|IS5|ISH1|Transposase|Natronomonas/113-241 AAIDATYFDRHQASTHYLNRCDRKVQTVQATFLVDTAHGAVIDVHCSTKW-PNGTNVGPQVALRNAGDLRSLAADKGYDDMSFRDQLRSEGVRPLIKHRVFAPYDHAHNARIEDDLYNQRSVCETVNSVI 33 | ISNma1.1|IS5|ISH1|Transposase|Natrialba/113-241 QAIDATGFKRHQASRHYVLRVGYNFDDIKTTALVDCDTSVILDIHCSMKQ-PHDTQVGRQVLTRNRTRLSTIAADKSYDWDALRHELRDAGIRPVIKHREFYALNKAHNARHNETVYHRRSIAEVIFFAL 34 | ISNma2.1|IS5|ISH1|Transposase|Natrialba/112-240 AGIDASGFDRSHASKHYTKRAKLTIQQLKVTLLVDSKVNAVLDLHVTTTR-KHDSQIAPSLIKRNPETIDILLGDKGYDDQKIRRLARQHEIRPLIKHREFTPLHKAWNARLDADLYGQRSQSETVNSTL 35 | ISNpe14.1|IS5|ISH1|Transposase|Natrinema/110-238 AAIDASGFQRDQTSYHYRNRAGFSFHKLKTTILVDTESLAIKDVHFTTKR-KWDGHIGLQVYRRNAEDLQEFLADANYSWSDLREECRAGATRPLIKHREHNALKKAHNARMDEDLYHQRTLSETAFSLL 36 | ISNph4.1|IS5|ISH1|Transposase|Natronomonas/110-238 AALDSTFFDRRSASSYYRQRSGSNVQTLKVTTLTDRESLAVLDVHISARW-KHDTKTGPQVVRRNADDLLSVAADKAFHNWITKYEFYALGVEPLILQRGSRPLTVGYNALIRTKGYAQRWMAETSYSTT 37 | // 38 | -------------------------------------------------------------------------------- /data/models/msa/IS5_None/IS5_None_m1_cut.sto: -------------------------------------------------------------------------------- 1 | # STOCKHOLM 1.0 2 | #=GF Alignment Program ClustalOWS 3 | ISNpe2.1|IS5|None|Transposase|Natrinema/128-281 LLDSTYSIDSTHIEAIQYNDAASWNYDPTAEEYYYG--FGCTIVSTGAKIPIAAEFTQAKQADQETAMRVTGDALAVDTPIWMLGDSAYDILDWHDYLLTAGVVPIAPYNPRNTDDPKDIEYRVEDRIEEHSEDVQLKRSILDETFNNRTGIERTN 4 | ISNpe19.1|IS5|None|Transposase|Natrinema/128-281 LLDSTYSIDSTHIEAIQYNDAASWNYDPTAEEYYYG--FGCTIVSTGAKIPIAAEFTQAKQADQETAMRVTRDALAVDTPIWMLGDSAYDILDWHDYLLAAGVVPIAPYNPRNTDDPKDIEYRVEDRIEKHSEDVQLKQSILDETYNSRTGVERTN 5 | ISNpe18.1|IS5|None|Transposase|Natrinema/128-281 LLDSTYRIDSTHVTAIQYNDEATWNYDSTAEEHYYG--FGCVIVSAGPKIPIAAEFTQRKQIDAETAMRVTKDALAVDTPIWMLGDSAYDVLEWHDFLLSQGVVPIAPYNPRNTDDPLDIEYRVEDRIDEHAEDISLKQSVLAETYDHRTQVERTN 6 | ISNpe17.1|IS5|None|Transposase|Natrinema/126-279 LLDLTYSIDSTDVRTMPADPDASKCYDPTAEEYYYG--YGCTIVSTGSKIPIAAEFTESKQAPEETAMRVTRDALAVAKPIWMLGDSAYDTLDWHDHLLAAGVVPVAPYNPRNTDDPKDIEYRVEDRIEQHSEDVQLKQSTLNETYNRRTGVERTN 7 | ISNpe16.1|IS5|None|Transposase|Natrinema/126-279 LLDLTYSIDSTDVRTMPADQDASKCYDPTAEEYYYG--YGCTIVSTGSKIPIAAEFTESKQAPEETAMRVTRDALAVDTPIWMLGDSAYDTLDWHDYLLTAGVVPIAPYNARNTDDPLDIEYRVEDRIKEHSKDVKLKQSTLDETYNRRTGVERTN 8 | ISNpe15.1|IS5|None|Transposase|Natrinema/128-281 LLDSTYSIDSTHIEAIRYNDAASWNYDPTAEEYYYG--FGCTIVSSGAKIPIAAEFTQAKQASQETAMRVTSDALAVDTPTWMLGDSAYDILDWHDHLLAAGVVPVAPYNPRNTDDPKDIEYRIEDRIEEHSEDVQLKQSVLDETYNRRTGVERTN 9 | ISNamo4.1|IS5|None|Transposase|Natronomonas/126-279 LLDLTYCIDSTDVRAMPADQDASKCYDPTDDEYYYG--YGCTIASTGQKIPIAAEFTESKQAPEETAMRVTRDALAVAKPIWMVGDSAYDTLDWHDHLLAAGVVPVAPYNARNADDPKDIQYRVEGRIDEHSEDVQLKQSTLDETYNRRTGVERTN 10 | ISHwa20.1|IS5|None|Transposase|Haloquadratum/126-281 LLELTYSIDSTDVRAMPADPDASKCSDPTDDEYYYGYGYGCTIVSTGQKIPIAAEFTESKQAPEETAMRVTRDALAVEKPIWMLGDSAYDTLEWHDHLLTAGVVPVAPYNPRNTDDPKNIEYGVEDRIEKHSNDVQLKQSTLDETYNRRSGVERTN 11 | ISHwa14.1|IS5|None|Transposase|Haloquadratum/128-281 LLDSTYSIDSTHVEAIQHNDAASWNYDPTAEEYYYG--FGCTLVSTGAKIPIAAEFTQAKQADQETAMRVTRDALAVDTPIWMLGDSAYDILDWHDHLLTAGVVPVAPYNPRNTTDPKDIEYRVEDRIEEHSEGVQLKQSILDETYNNRTGVERTN 12 | ISHvo7.1|IS5|None|Transposase|Haloferax/128-281 LLDSTYRIDSTHVEAIQWNDEASWNYDSTAEEHYYG--FGCTIVSTGAKIPIAAEFTQAKQASEETAMRVTRDALAVDTPIWIIGDSAYDTLQWHDFLLDAGVVPVAPYNPRNTDEPLDIEYRVEDQIEEHSEDLQLKQSVLEETYNRRTQVERTN 13 | ISHti8.1|IS5|None|Transposase|Halorhabdus/128-281 LLDSTYSIDSTHVEAIQYNDAASWNYDPTAEEYYYG--FGCTIVSTGAKIPIAAEFTQAKQANKETAMRVTRDALAVDTPIWMLGDSAYDILDWHDLLLAAGVVPIAPYNPRNTDDPKDIKYRVEDRIKKHSEDIRLKQSILDETYNDRTGVERTN 14 | ISHti7.1|IS5|None|Transposase|Halorhabdus/128-281 LLDSTYSIDSTHIEAIQYNDAASWNYDPTAEEYYYG--FGCTIVSTGAKIPIAAEFTQAKQADQETAMRVIRDALTVDTPIWMLGDSAYDILDWHDHLLAAGVVPIAPYNPRNTDDPKDIEYRVEDRITEHSEDVQLKQSILDETYNKRTGVERTN 15 | ISHma6.1|IS5|None|Transposase|Haloarcula/126-279 LLDLTYCIDSTDVRAMPADQDASKCYDPTDEEYYYG--YGCTIVSTGQKIPIAAEFTESKQAPEETAMRVTCDALAVAKPIWMVGDSAYDTLDWHDHLLAAGVVPVAPYNARNADDPKDIEYRVEDRIEKHSEDVQLKQSILNQTYNRRTGVERTN 16 | ISHla6.1|IS5|None|Transposase|Halorubrum/128-281 LLDLTYSIDSTDVRTMPADQDASKGYDPTAEEYYHG--YGCTIVSTGQKIPIAAEFTESKQAPEETAMRVTCDALAVEKPIWMLGDSAYDTLGWHDHLLAAGVVPVAPYNARNTDDPKDIEYRVEARIDEHSEDVQLKQSTLDETYNRRSGVERTN 17 | ISH11.1|IS5|None|Transposase|Halobacterium/126-279 LLDLTYCIDSTDVRAMPADQDASKCYDPTDDEYYHG--YGCTIVSTGQKIPIAAEFTESKQAPEETAMRVTRDALAVAKPIWMVGDSAYDTLDWHDHLLAAGVVPVAPYNARNTDDPKDIEYRVEDRIEQHSEDVQLKQSTLDETYNRRTGVERTN 18 | // 19 | -------------------------------------------------------------------------------- /data/models/msa/IS5_None/IS5_None_m3_cut.sto: -------------------------------------------------------------------------------- 1 | # STOCKHOLM 1.0 2 | ISSulsp1.1|IS5|None|Transposase|Sulfurihydrogenibium/112-245 IADATGFAYGDI-YSLKWRRGIEVKQ--------------IKSHIRLEVIMAVDNEDKVVILGCETGKAYASEIKMLNQILDKVDFIKGL-PF---IADKGYD-SISVIQKILDIGLIPAIKIKETFRIK--IKH---------------------PL----RQL--SKENWLKYGKKRYRIE 3 | ISPac1.1|IS5|None|Transposase|Candidatus/125-260 AIDSTGYSKRNVSYHYLWRIKGRIR---------------HREHVKHSI--AVDVDRQAIIAAIDRSNPYAADTKDFIPLVKKVASVVTP---CKILADRGYDSEKNNEFC-YRIGAKPLIPIRNANVPVWKTHG---------------------MHRKEAKR-----RFDWKSYDKRSLVE 4 | ISMru1.1|IS5|None|Transposase|Methanobrevibacter/124-298 IVDATPVDVDI-----NFHRNKKTKEHLEKINLKWSYSSSKGYYIGFKATVVLDYDSMNPVCIL-VHSGAPNDAKLFEEILENLQKRRIIRKGDTLIFDKGYYSYKNYQIGISKYKIIPFIFPKEKFSRT-RLDDILTYPLAVFNKTKRI-MKEKRLYNSLKMELMKKIDSWEKFKPIRGKIE 5 | ISMmi1.1|IS5|None|Transposase|Methanobrevibacter/124-298 IVDATPVDLDF-----NFNRNKKTKEHLKTLNLKWSYSSSKGFYIGFKATVIIDYDSMNPVSIL-IHSGAPNDAKLFDEIMENLQKRRIIRKGDTIILDKGYYSYKNYQLGISKYKIVPFIFPKDNFNKT-KLNDQLSYPLQVFNKTKKI-LAQKQFYNNLKIELFKKLDDWKKFKPIRGKIE 6 | ISMhu10.1|IS5|None|Transposase|Methanospirillum/136-305 IIDGTSLSVDI-----NWFRKRIQKSKLVNKPYNWQYSPYKGFYIGLKLTLAIDQNNLLPLAFL-VHQNPVADSKIFPLILQELKRRRILNKGDRVLLDRGYYSYKNYVLSLIEYSVIPLILPKKGFSFS-KLENYLISPLTWFENKKSYQK--VIELSHMKKELKDALENSDELAAQR---- 7 | ISMeva1.1|IS5|None|Transposase|Methanosarcina/124-298 IIDSTDINLDL-----NWFKRKISKKMLEDREFKWGHSKHRGYFIGMKLSLAIEYPSLKPLTFI-INEANVSEYTVYPQILEELKKRKKIRQGDVLYFDKGYFSYENYVIGIAKYKIVPIIFPRINCNYN-KLFNMLSYPLNIFDSKRNT-EEDKKIYKRLVVKFKTLMENWGGLKPIRSLIE 8 | ISMev1.1|IS5|None|Transposase|Methanohalobium/125-256 AVDSTGFTSGHCSYYYSLRTDKK-----------------RHSFLKTSI--AVDTSNLVIIGVKVSGR-PVHDTKHAVTLLKQCHRIRKS---EFYVMDKAYDSEDIHKLVRDELGSIAIIPVKDRKRKR--IKG---------------------RYRRLMIE-----DFDVALHRKRSMSE 9 | ISMets1.1|IS5|None|Transposase|Methanosarcina/124-298 IIDSTDINLNL-----NWHAKKITKKSLENKEYKWGHSTHRGFFIGMKLTLALEYQTLKPLAFL-INEANVPETKIYPEILKELKRRRILKAGDIVYADRGYYSYENYVISVREFKVVPLIFPRKNCNFK-KLFNMLRYPLKIFDSRRDT-EKEIKIYKEIIAKFKELISKWKEFRPVRSIIE 10 | ISMepa1.1|IS5|None|Transposase|Candidatus/115-246 AIDSTGFTSPYASHYYSWRTGKT-----------------RKNFLKTSI--AVDTCKQVILFSKISLK-PVHDTKHAEPLLRQCQRTRKT---GCYVMDKGYDSEKLHRQIREEMGADSVIPVRTWKGKI--YSG---------------------KYRQEMYN-----NFDSKRYYERNKVE 11 | ISMeme1.1|IS5|None|Transposase|Methanococcoides/114-245 GIDLTGFTSGYCSNYYSWRIKKW-----------------RRSYVKTSI--SVDVHKFVITGYKISGK-PVHDAKHAKTLLSQCHRNRKS---RYYVMDKAYDSEGNHKLTREKLRSIAIVPLRQRERKR--IKG---------------------HYRKKMLR-----EFDDEIYSLRNLSE 12 | ISMem1.1|IS5|None|Transposase|Methanohalophilus/115-246 AIDSSGFTGGHCSYYYSVRTGKK-----------------RRSYLKTSI--AVDVEKFIVTGFKISGK-PVHDAKHALTLLRQCHKRRKA---DCYLMDKGYDSEKIHALINEELKAEAIIPVRCRKRKK--IKG---------------------KYRRKMRD-----EFDEDIYHYRNLVE 13 | ISMefo1.1|IS5|None|Transposase|Methanobacterium/124-298 IVDATPVDLDY-----NTKRKHRSKKYLKKQNLKWSYASSYGFYIGFKATIVIEHESAMPVAIL-IHSGAPHDTKIFTEIMENLRKRRIIRKGDTIIFDRGYYKYENYQIGISKYKIVPLIFPKEKFKLQ-KLKDKLTYPLRVFKDKKTE-NKSKRLYKILKRILIQKIQNWKRYKPIRGKIE 14 | ISMbu10.1|IS5|None|Transposase|Methanococcoides/23-154 AIDSTGFTSGYCSNYYSWRIKKW-----------------RRSYVKTSI--SVDVHKFVITGYKISGK-PVHDAKHAKALLSQCHHNRRS---RYYVVDKAYDSENIHELTREKLGSIAIVPLRQRERKR--IKG---------------------RYRKKMIH-----EFDSKIYSMRNLSE 15 | ISDith1.1|IS5|None|Transposase|Dictyoglomus/117-259 LVDGAGFGYNFK-IKQRLYFGREIRE--------------KRDHVKCELLVLVDERGKSYIVGVFVDDGYKDERKILKSKFEEVKKLKEEVDFRKVYGDRLYNRDIELLREFEKEGVEMILPVEDGIHNK--VKS---------------------EERKRVKES--YNRKRHAYNRNRYKIE 16 | ISArch21.1|IS5|None|Transposase|uncultured/127-302 IGDTTNLTVDL-----NWLRKKYKKEDLKDTDYKWAYSKSKGYYIGMKLVLAIEYPSLKPLAFL-VFPGGPSDSKIFDKIVAELMRRRILRKGDMFVLDKGFYAYKHYTDGLLKYDIIPLIFPRKNFKIE-KVLNSIQLILDFFCDKADRIKMKIRNLKAILSEFKQNILNWKEFKPKRSLIE 17 | ISA1218.1|IS5|None|Transposase|Archaeoglobus/125-290 ILDWTDISLDL-----NPFRRR----ELSNKPYKWGYST-KGFFLGMKMMILIDYNTLTPLFFH-IYPANTHESRIYPVILEMLKRRKLIRFGDSIIMDRGFYAYKNYLIGL-RYGIIPLIIPRKNFREE-RLKGMVSYPLSIFASKNVEK--EKKRYRKLVRKLFEGL--KQNLKTLRSIIE 18 | // 19 | -------------------------------------------------------------------------------- /data/models/msa/IS5_None/IS5_None_model1_final.sto: -------------------------------------------------------------------------------- 1 | # STOCKHOLM 1.0 2 | ISHla6.1|IS5|None|Transposase|Halorubrum/133-263 -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------YSIDSTDVRTMP--ADQDASKGY-DPTAEEYYHG--YGCTIVSTGQKIP----IA--AEFTE----------SKQAPEETA-----MRVTCDALAVEKPIWMLGDSAYDTL-GWHDHLLAAGVV--P-------VA---PYNARN---------TDDPKDIEYRVEARID---EHSEDV--QLK----------------------------------------------------------------------------------------------------------------------------------------- 3 | ISH11.1|IS5|None|Transposase|Halobacterium/131-261 -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------YCIDSTDVRAMP--ADQDASKCY-DPTDDEYYHG--YGCTIVSTGQKIP----IA--AEFTE----------SKQAPEETA-----MRVTRDALAVAKPIWMVGDSAYDTL-DWHDHLLAAGVV--P-------VA---PYNARN---------TDDPKDIEYRVEDRIE---QHSEDV--QLK----------------------------------------------------------------------------------------------------------------------------------------- 4 | ISNamo4.1|IS5|None|Transposase|Natronomonas/131-261 -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------YCIDSTDVRAMP--ADQDASKCY-DPTDDEYYYG--YGCTIASTGQKIP----IA--AEFTE----------SKQAPEETA-----MRVTRDALAVAKPIWMVGDSAYDTL-DWHDHLLAAGVV--P-------VA---PYNARN---------ADDPKDIQYRVEGRID---EHSEDV--QLK----------------------------------------------------------------------------------------------------------------------------------------- 5 | ISHma6.1|IS5|None|Transposase|Haloarcula/131-261 -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------YCIDSTDVRAMP--ADQDASKCY-DPTDEEYYYG--YGCTIVSTGQKIP----IA--AEFTE----------SKQAPEETA-----MRVTCDALAVAKPIWMVGDSAYDTL-DWHDHLLAAGVV--P-------VA---PYNARN---------ADDPKDIEYRVEDRIE---KHSEDV--QLK----------------------------------------------------------------------------------------------------------------------------------------- 6 | ISNpe17.1|IS5|None|Transposase|Natrinema/131-261 -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------YSIDSTDVRTMP--ADPDASKCY-DPTAEEYYYG--YGCTIVSTGSKIP----IA--AEFTE----------SKQAPEETA-----MRVTRDALAVAKPIWMLGDSAYDTL-DWHDHLLAAGVV--P-------VA---PYNPRN---------TDDPKDIEYRVEDRIE---QHSEDV--QLK----------------------------------------------------------------------------------------------------------------------------------------- 7 | ISHwa20.1|IS5|None|Transposase|Haloquadratum/131-263 -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------YSIDSTDVRAMP--ADPDASKCS-DPTDDEYYYGYGYGCTIVSTGQKIP----IA--AEFTE----------SKQAPEETA-----MRVTRDALAVEKPIWMLGDSAYDTL-EWHDHLLTAGVV--P-------VA---PYNPRN---------TDDPKNIEYGVEDRIE---KHSNDV--QLK----------------------------------------------------------------------------------------------------------------------------------------- 8 | ISNpe16.1|IS5|None|Transposase|Natrinema/131-261 -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------YSIDSTDVRTMP--ADQDASKCY-DPTAEEYYYG--YGCTIVSTGSKIP----IA--AEFTE----------SKQAPEETA-----MRVTRDALAVDTPIWMLGDSAYDTL-DWHDYLLTAGVV--P-------IA---PYNARN---------TDDPLDIEYRVEDRIK---EHSKDV--KLK----------------------------------------------------------------------------------------------------------------------------------------- 9 | ISHvo7.1|IS5|None|Transposase|Haloferax/133-263 -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------YRIDSTHVEAIQ--WNDEASWNY-DSTAEEHYYG--FGCTIVSTGAKIP----IA--AEFTQ----------AKQASEETA-----MRVTRDALAVDTPIWIIGDSAYDTL-QWHDFLLDAGVV--P-------VA---PYNPRN---------TDEPLDIEYRVEDQIE---EHSEDL--QLK----------------------------------------------------------------------------------------------------------------------------------------- 10 | ISNpe18.1|IS5|None|Transposase|Natrinema/133-263 -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------YRIDSTHVTAIQ--YNDEATWNY-DSTAEEHYYG--FGCVIVSAGPKIP----IA--AEFTQ----------RKQIDAETA-----MRVTKDALAVDTPIWMLGDSAYDVL-EWHDFLLSQGVV--P-------IA---PYNPRN---------TDDPLDIEYRVEDRID---EHAEDI--SLK----------------------------------------------------------------------------------------------------------------------------------------- 11 | ISHti8.1|IS5|None|Transposase|Halorhabdus/133-263 -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------YSIDSTHVEAIQ--YNDAASWNY-DPTAEEYYYG--FGCTIVSTGAKIP----IA--AEFTQ----------AKQANKETA-----MRVTRDALAVDTPIWMLGDSAYDIL-DWHDLLLAAGVV--P-------IA---PYNPRN---------TDDPKDIKYRVEDRIK---KHSEDI--RLK----------------------------------------------------------------------------------------------------------------------------------------- 12 | ISNpe19.1|IS5|None|Transposase|Natrinema/133-263 -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------YSIDSTHIEAIQ--YNDAASWNY-DPTAEEYYYG--FGCTIVSTGAKIP----IA--AEFTQ----------AKQADQETA-----MRVTRDALAVDTPIWMLGDSAYDIL-DWHDYLLAAGVV--P-------IA---PYNPRN---------TDDPKDIEYRVEDRIE---KHSEDV--QLK----------------------------------------------------------------------------------------------------------------------------------------- 13 | ISNpe2.1|IS5|None|Transposase|Natrinema/133-263 -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------YSIDSTHIEAIQ--YNDAASWNY-DPTAEEYYYG--FGCTIVSTGAKIP----IA--AEFTQ----------AKQADQETA-----MRVTGDALAVDTPIWMLGDSAYDIL-DWHDYLLTAGVV--P-------IA---PYNPRN---------TDDPKDIEYRVEDRIE---EHSEDV--QLK----------------------------------------------------------------------------------------------------------------------------------------- 14 | ISHti7.1|IS5|None|Transposase|Halorhabdus/133-263 -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------YSIDSTHIEAIQ--YNDAASWNY-DPTAEEYYYG--FGCTIVSTGAKIP----IA--AEFTQ----------AKQADQETA-----MRVIRDALTVDTPIWMLGDSAYDIL-DWHDHLLAAGVV--P-------IA---PYNPRN---------TDDPKDIEYRVEDRIT---EHSEDV--QLK----------------------------------------------------------------------------------------------------------------------------------------- 15 | ISNpe15.1|IS5|None|Transposase|Natrinema/133-263 -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------YSIDSTHIEAIR--YNDAASWNY-DPTAEEYYYG--FGCTIVSSGAKIP----IA--AEFTQ----------AKQASQETA-----MRVTSDALAVDTPTWMLGDSAYDIL-DWHDHLLAAGVV--P-------VA---PYNPRN---------TDDPKDIEYRIEDRIE---EHSEDV--QLK----------------------------------------------------------------------------------------------------------------------------------------- 16 | ISHwa14.1|IS5|None|Transposase|Haloquadratum/133-263 -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------YSIDSTHVEAIQ--HNDAASWNY-DPTAEEYYYG--FGCTLVSTGAKIP----IA--AEFTQ----------AKQADQETA-----MRVTRDALAVDTPIWMLGDSAYDIL-DWHDHLLTAGVV--P-------VA---PYNPRN---------TTDPKDIEYRVEDRIE---EHSEGV--QLK----------------------------------------------------------------------------------------------------------------------------------------- 17 | // 18 | -------------------------------------------------------------------------------- /data/models/msa/IS5_None/IS5_None_model2_cut.sto: -------------------------------------------------------------------------------- 1 | # STOCKHOLM 1.0 2 | #=GF Alignment Program ClustalOWS 3 | ISNpe2.1|IS5|None|Transposase|Natrinema/134-282 SIDSTHIEAIQYNDAASWNYDPTAEEYYYG--FGCTIVSTGAKIPIAAEFTQAKQADQETAMRVTGDALAVDTPIWMLGDSAYDILDWHDYLLTAGVVPIAPYNPRNTDDPKDIEYRVEDRIEEHSEDVQLKRSILDETFNNRTGIERTND 4 | ISNpe19.1|IS5|None|Transposase|Natrinema/134-282 SIDSTHIEAIQYNDAASWNYDPTAEEYYYG--FGCTIVSTGAKIPIAAEFTQAKQADQETAMRVTRDALAVDTPIWMLGDSAYDILDWHDYLLAAGVVPIAPYNPRNTDDPKDIEYRVEDRIEKHSEDVQLKQSILDETYNSRTGVERTND 5 | ISNpe18.1|IS5|None|Transposase|Natrinema/134-282 RIDSTHVTAIQYNDEATWNYDSTAEEHYYG--FGCVIVSAGPKIPIAAEFTQRKQIDAETAMRVTKDALAVDTPIWMLGDSAYDVLEWHDFLLSQGVVPIAPYNPRNTDDPLDIEYRVEDRIDEHAEDISLKQSVLAETYDHRTQVERTNE 6 | ISNpe17.1|IS5|None|Transposase|Natrinema/132-280 SIDSTDVRTMPADPDASKCYDPTAEEYYYG--YGCTIVSTGSKIPIAAEFTESKQAPEETAMRVTRDALAVAKPIWMLGDSAYDTLDWHDHLLAAGVVPVAPYNPRNTDDPKDIEYRVEDRIEQHSEDVQLKQSTLNETYNRRTGVERTNE 7 | ISNpe16.1|IS5|None|Transposase|Natrinema/132-280 SIDSTDVRTMPADQDASKCYDPTAEEYYYG--YGCTIVSTGSKIPIAAEFTESKQAPEETAMRVTRDALAVDTPIWMLGDSAYDTLDWHDYLLTAGVVPIAPYNARNTDDPLDIEYRVEDRIKEHSKDVKLKQSTLDETYNRRTGVERTND 8 | ISNpe15.1|IS5|None|Transposase|Natrinema/134-282 SIDSTHIEAIRYNDAASWNYDPTAEEYYYG--FGCTIVSSGAKIPIAAEFTQAKQASQETAMRVTSDALAVDTPTWMLGDSAYDILDWHDHLLAAGVVPVAPYNPRNTDDPKDIEYRIEDRIEEHSEDVQLKQSVLDETYNRRTGVERTND 9 | ISNamo4.1|IS5|None|Transposase|Natronomonas/132-280 CIDSTDVRAMPADQDASKCYDPTDDEYYYG--YGCTIASTGQKIPIAAEFTESKQAPEETAMRVTRDALAVAKPIWMVGDSAYDTLDWHDHLLAAGVVPVAPYNARNADDPKDIQYRVEGRIDEHSEDVQLKQSTLDETYNRRTGVERTNE 10 | ISHwa20.1|IS5|None|Transposase|Haloquadratum/132-282 SIDSTDVRAMPADPDASKCSDPTDDEYYYGYGYGCTIVSTGQKIPIAAEFTESKQAPEETAMRVTRDALAVEKPIWMLGDSAYDTLEWHDHLLTAGVVPVAPYNPRNTDDPKNIEYGVEDRIEKHSNDVQLKQSTLDETYNRRSGVERTNE 11 | ISHwa14.1|IS5|None|Transposase|Haloquadratum/134-282 SIDSTHVEAIQHNDAASWNYDPTAEEYYYG--FGCTLVSTGAKIPIAAEFTQAKQADQETAMRVTRDALAVDTPIWMLGDSAYDILDWHDHLLTAGVVPVAPYNPRNTTDPKDIEYRVEDRIEEHSEGVQLKQSILDETYNNRTGVERTND 12 | ISHvo7.1|IS5|None|Transposase|Haloferax/134-282 RIDSTHVEAIQWNDEASWNYDSTAEEHYYG--FGCTIVSTGAKIPIAAEFTQAKQASEETAMRVTRDALAVDTPIWIIGDSAYDTLQWHDFLLDAGVVPVAPYNPRNTDEPLDIEYRVEDQIEEHSEDLQLKQSVLEETYNRRTQVERTND 13 | ISHti8.1|IS5|None|Transposase|Halorhabdus/134-282 SIDSTHVEAIQYNDAASWNYDPTAEEYYYG--FGCTIVSTGAKIPIAAEFTQAKQANKETAMRVTRDALAVDTPIWMLGDSAYDILDWHDLLLAAGVVPIAPYNPRNTDDPKDIKYRVEDRIKKHSEDIRLKQSILDETYNDRTGVERTND 14 | ISHti7.1|IS5|None|Transposase|Halorhabdus/134-282 SIDSTHIEAIQYNDAASWNYDPTAEEYYYG--FGCTIVSTGAKIPIAAEFTQAKQADQETAMRVIRDALTVDTPIWMLGDSAYDILDWHDHLLAAGVVPIAPYNPRNTDDPKDIEYRVEDRITEHSEDVQLKQSILDETYNKRTGVERTND 15 | ISHma6.1|IS5|None|Transposase|Haloarcula/132-280 CIDSTDVRAMPADQDASKCYDPTDEEYYYG--YGCTIVSTGQKIPIAAEFTESKQAPEETAMRVTCDALAVAKPIWMVGDSAYDTLDWHDHLLAAGVVPVAPYNARNADDPKDIEYRVEDRIEKHSEDVQLKQSILNQTYNRRTGVERTNE 16 | ISHla6.1|IS5|None|Transposase|Halorubrum/134-282 SIDSTDVRTMPADQDASKGYDPTAEEYYHG--YGCTIVSTGQKIPIAAEFTESKQAPEETAMRVTCDALAVEKPIWMLGDSAYDTLGWHDHLLAAGVVPVAPYNARNTDDPKDIEYRVEARIDEHSEDVQLKQSTLDETYNRRSGVERTND 17 | ISH11.1|IS5|None|Transposase|Halobacterium/132-280 CIDSTDVRAMPADQDASKCYDPTDDEYYHG--YGCTIVSTGQKIPIAAEFTESKQAPEETAMRVTRDALAVAKPIWMVGDSAYDTLDWHDHLLAAGVVPVAPYNARNTDDPKDIEYRVEDRIEQHSEDVQLKQSTLDETYNRRTGVERTNE 18 | // 19 | -------------------------------------------------------------------------------- /data/models/msa/IS5_None/IS5_None_model2_final.sto: -------------------------------------------------------------------------------- 1 | # STOCKHOLM 1.0 2 | #=GF Alignment Program ClustalOWS 3 | ISMbu10.1|IS5|None|Transposase|Methanococcoides/22-162 TAIDSTGFTSGYCS------------NYYSWRIKKW--R-RSYVKTSISVDVHK--F-VITGYKISGK-PVHDAKHAKALLSQCHHNRRS---RYYVVDKAYDSENIHELTREKLGSIAIVPLRQRERKR---I-------------------KGRYRK------KMIHEFDSKIYSMRNLSETMFSVLKR 4 | ISPac1.1|IS5|None|Transposase|Candidatus/124-268 IAIDSTGYSKRNVS------------YHYLWRIKGRIRH-REHVKHSIAVDVDR--Q-AIIAAIDRSNPYAADTKDFIPLVKKVASVVTP---CKILADRGYDSEKNNEFC-YRIGAKPLIPIRNANVPV-WKT-------------------HGMHRK------EAKRRFDWKSYDKRSLVETVNSVEKR 5 | ISMev1.1|IS5|None|Transposase|Methanohalobium/124-264 TAVDSTGFTSGHCS------------YYYSLRTDKK--R-HSFLKTSIAVDTSN--L-VIIGVKVSGR-PVHDTKHAVTLLKQCHRIRKS---EFYVMDKAYDSEDIHKLVRDELGSIAIIPVKDRKRKR---I-------------------KGRYRR------LMIEDFDVALHRKRSMSETSNSVLKR 6 | ISMeme1.1|IS5|None|Transposase|Methanococcoides/113-253 TGIDLTGFTSGYCS------------NYYSWRIKKW--R-RSYVKTSISVDVHK--F-VITGYKISGK-PVHDAKHAKTLLSQCHRNRKS---RYYVMDKAYDSEGNHKLTREKLRSIAIVPLRQRERKR---I-------------------KGHYRK------KMLREFDDEIYSLRNLSETMFSLLKR 7 | ISMem1.1|IS5|None|Transposase|Methanohalophilus/114-254 IAIDSSGFTGGHCS------------YYYSVRTGKK--R-RSYLKTSIAVDVEK--F-IVTGFKISGK-PVHDAKHALTLLRQCHKRRKA---DCYLMDKGYDSEKIHALINEELKAEAIIPVRCRKRKK---I-------------------KGKYRR------KMRDEFDEDIYHYRNLVETMFSVLKR 8 | ISMepa1.1|IS5|None|Transposase|Candidatus/114-254 TAIDSTGFTSPYAS------------HYYSWRTGKT--R-KNFLKTSIAVDTCK--Q-VILFSKISLK-PVHDTKHAEPLLRQCQRTRKT---GCYVMDKGYDSEKLHRQIREEMGADSVIPVRTWKGKI---Y-------------------SGKYRQ------EMYNNFDSKRYYERNKVETAFSVIKR 9 | ISMmi1.1|IS5|None|Transposase|Methanobrevibacter/123-306 FIVDATPVDLDFNFNRNKKTKEHLKTLNLKWSYSSS-KGFYIGFKATVIIDYDS----MNPVSILIHSGAPNDAKLFDEIMENLQKRRIIRKGDTIILDKGYYSYKNYQLGISKYKIVPFIFPKDNFNKTKLNDQLSYPLQVFNKTKKI-LAQKQFYNNLKIELFKKLDDWKK-FKPIRGKIEDFFKLLKQ 10 | ISArch21.1|IS5|None|Transposase|uncultured/126-310 IIGDTTNLTVDLNWLRKKYKKEDLKDTDYKWAYSKS-KGYYIGMKLVLAIEYPS----LKPLAFLVFPGGPSDSKIFDKIVAELMRRRILRKGDMFVLDKGFYAYKHYTDGLLKYDIIPLIFPRKNFKIEKVLNSIQLILDFFCDKADRIKMKIRNLKAILSEFKQNILNWKE-FKPKRSLIEDVIKVMKK 11 | ISMeva1.1|IS5|None|Transposase|Methanosarcina/123-306 IIIDSTDINLDLNWFKRKISKKMLEDREFKWGHSKH-RGYFIGMKLSLAIEYPS----LKPLTFIINEANVSEYTVYPQILEELKKRKKIRQGDVLYFDKGYFSYENYVIGIAKYKIVPIIFPRINCNYNKLFNMLSYPLNIFDSKRNT-EEDKKIYKRLVVKFKTLMENWGG-LKPIRSLIEDIFKLAKK 12 | ISMefo1.1|IS5|None|Transposase|Methanobacterium/123-306 FIVDATPVDLDYNTKRKHRSKKYLKKQNLKWSYASS-YGFYIGFKATIVIEHES----AMPVAILIHSGAPHDTKIFTEIMENLRKRRIIRKGDTIIFDRGYYKYENYQIGISKYKIVPLIFPKEKFKLQKLKDKLTYPLRVFKDKKTE-NKSKRLYKILKRILIQKIQNWKR-YKPIRGKIEDFFKLCKS 13 | ISMets1.1|IS5|None|Transposase|Methanosarcina/123-306 IIIDSTDINLNLNWHAKKITKKSLENKEYKWGHSTH-RGFFIGMKLTLALEYQT----LKPLAFLINEANVPETKIYPEILKELKRRRILKAGDIVYADRGYYSYENYVISVREFKVVPLIFPRKNCNFKKLFNMLRYPLKIFDSRRDT-EKEIKIYKEIIAKFKELISKWKE-FRPVRSIIEDIFKLAKK 14 | ISMru1.1|IS5|None|Transposase|Methanobrevibacter/123-306 FIVDATPVDVDINFHRNKKTKEHLEKINLKWSYSSS-KGYYIGFKATVVLDYDS----MNPVCILVHSGAPNDAKLFEEILENLQKRRIIRKGDTLIFDKGYYSYKNYQIGISKYKIIPFIFPKEKFSRTRLDDILTYPLAVFNKTKRI-MKEKRLYNSLKMELMKKIDSWEK-FKPIRGKIEDFFKLLKQ 15 | ISA1218.1|IS5|None|Transposase|Archaeoglobus/124-298 LILDWTDISLDLNPFRR----RELSNKPYKWGYST--KGFFLGMKMMILIDYNT----LTPLFFHIYPANTHESRIYPVILEMLKRRKLIRFGDSIIMDRGFYAYKNYLIGL-RYGIIPLIIPRKNFREERLKGMVSYPLSIFASKNVE--KEKKRYRKLVRKLFEGL--KQN-LKTLRSIIEDVIKLGKE 16 | ISDith1.1|IS5|None|Transposase|Dictyoglomus/116-267 LLVDGAGFGYNFKI-------------KQRLYFGREIREKRDHVKCELLVLVDERGK-SYIVGVFVDDGYKDERKILKSKFEEVKKLKEEVDFRKVYGDRLYNRDIELLREFEKEGVEMILPVEDGIHNK-VKS-----------------EERKRVKE------SYNRKRHA-YNRNRYKIEQKIGNIKR 17 | ISSulsp1.1|IS5|None|Transposase|Sulfurihydrogenibium/94-236 YIADATGFAYGDIY-------------SLKWRRGIEVKQIKSHIRLEVIMAVDNEDK-VVILGCETGKAYASEIKMLNQILDKVDFIKGL-PF---IADKGYD-SISVIQKILDIGLIPAIKIKETFRIK-IKH-----------------PL----RQ------LSKENWLK-YGKKRYRIESLFGNIKN 18 | ISTth7.1|IS5|None|Transposase|Thermus/42-184 YLMDTTGLAYRSKD------------RLLRFRRGKEVRRVRGHARLLALMRWDRERRLLWPWGGVVGEGYAPDPRLGGEVLRRFPPSRGW-----LLADAGFD-GKEVWGVLGEAGVRPVIRLRGGGEAR-EEA-----------------------RV------RAREGWDPEVYRFRGVVEGVFGGMKT 19 | // 20 | -------------------------------------------------------------------------------- /data/models/msa/IS5_None/IS5_None_zvysok_zo_zvysku.sto: -------------------------------------------------------------------------------- 1 | # STOCKHOLM 1.0 2 | #=GF Alignment Program ClustalOWS 3 | ISDce1.1|IS5|None|Transposase|Desulfotomaculum/140-498 ANKDAVDSFMIHGAAARQSTFTMIRKATARVLRQADVEGFIEDIRQKLNRDDYLNNKKPTIDWDNIEARNKLLTEMVLDARTIALWAKEN---------KAKIS----------------EELNQCIELLQIV---------------------AEQDIEEKDGN---------IA-----------IRQG-VA---KDRIISVEDPQMRHGRKTTSSKTDGYKGHIM--SGGIENKII-TAAEITAANVPDSEPVPDLIKQRQENTGS--K-------------PDSLSGDTAYGGAETRKHIKKE-KIKLIAKVPPST----NVNG---CFNKDKFIIDLDNKFIECPAGVRLEI----DKELGEKEICCKFPKEQCQNCDLRNQCTKSKDGR-TVRIHPH--EALLQKARKQQQTAEFKEEYRFRSR-IERIIYCVTKNGARKGKYNGLEKNRFKLQLHT 4 | ISPlu14.3|IS5|None|Transposase|Photorhabdus/143-409 ------------------------------------------------------------------------------------LYTDST---------HLKAS----------------ANPHKSENVMRSVPPGAYVDAL---------DKAVTQDRA-AAGK---------KS-----------LKPA-LKERQRKTKVSTVDPESGFMHRTNKPRGFFYLDHRT--VDG-QVGII-TDTYATPGNVHDSQPFIKRLTRQLERFAL--N-------------PLAVGLDAGYFTAPVCYLTEQLGIIPIIGYRRP-N----KGPN---TFQKKHFTYDQLQDCYVCPQGEKLIY----TTTDRQGYRHYQAPAAICQCCPQRDACTHAKGGK-TITRHIW--EASKEQARENRLTEWGKKVYKRRKETIERS-FADAKQ--------------------- 5 | ISAva5.1|IS5|None|Transposase|Anabaena/211-500 ----------------------------------------REKLRRKIGE-----------------DGHHLLSALYADSTCNWLWQI------------------------------------PSVETLRIVWVQQYYIQL---------QQVYWREQD---------------N-----------LP------PNRLQIESPYDVDARNSSKR-EINWTGYNLHLTEICHPILPNLI-INVETSVATSADVEMTP-VIHSRLNQNNL--L-------------PQEHVVDTGYVNAQNLVDSQSHFHVDLVGKVPPGTSWQATAQS---GFEQNCFTIHWDLMRVDCPMGKQSKSWRTTVDSHDNPVVKIQFDKSDCSLCSSRSKCTRSKKLPRLLTLKPQELHLALHDARIRQKTESFQQIYHQRAGVEGLISQATGRYQLR------------------ 6 | ISAcma45.1|IS5|None|Transposase|Acaryochloris/245-474 --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------P------PSDSSIHSPYDVEARYSNKR-SMDWVGYKVHLTEVCDPESPRLI-THVHTTSATVPDDQVVE-PIHLALAKKQL--L-------------PKEHLLDGGYLSAEHLVNSKAEYDMDVIGPVRQNHSWQAKAGN---GFDSSFFLISWDQQKVVCPQGHFSTKWLPGKDIHGKDVIQARFQGTTCRSCPVRSQCTKSKTQPRELTFRPRELHQALNERRQVQRTTEFKEKYSLRAGVESTHSQGIRRFGMR------------------ 7 | ISC1290.1|IS5|None|Transposase|Sulfolobus/131-307 --------------------------------------------------------------WIVDSF-------------------------------LIEVPF-------------------------------------GKRNKETL-KKKFELDLRQRKYREAA------NTLFF-----------------YIKCKARRRF-KGEFTKKR-NRSYFGFKVF--NLMS--PTMIV-HEIQVELANFPDNKVGFSRSGYKVVDRGFVGKSSTWLIGFSSFR--RYVEFFGIFLRRYWRPYATEKG--MVEFFVY----------------------VIALIYNSYIYTSVLS-RVPESQLAH-------------------------------------------------------------------------------------------------- 8 | ISC1234.1|IS5|None|Transposase|Sulfolobus/141-318 --------------------------------------------------------------WLVDSF-------------------------------LLDLPP-------------------------------------GKRSVETF-REKAELERREGNLERAR------KL--------------------LSLGRTKRRF-EGRWTKKR-GVSHYGLKAV--AVIS--VSLFV-RSITVKPANFSDKRFKSPLKGIKIADRGFSPSPTQLIAREKPFTTLAHVEFFGTYLNAFWRPYGTTTW--RNDVFLH----------------------VLGVIYNIKMFLAIQR-RTPPGRRAVQL------------------------------------------------------------------------------------------------ 9 | ISC1212.2|IS5|None|Transposase|Sulfolobus/1-289 ---------MVKAIST-----------EKDLLLKVDKSFPWETFRSKLKS---LYSKK--PKWNVISLLKVLLIKLIFDISWNNLEGEIRDSKRFMDFLGGKIPPKSTVFSF---YKKLQQTVIQEGETMRTTLMDELNKALDKVISEYR-EKGFELEVGREKTIGSRTTTMGFNDVFLLM---------GYTTSKLKNFTAFRKY-KGSWGRKH-GKSYFGFKVC--NLVER-RTNFV-RGFKVGLANLSDLAFSFDNVKL-MADRAWISRKDVLVKGVGSAR--LPVEGSGVKIREGKASSTTLRGVVMEVFFL-----------------------------NLYRDLEILS-TRIRTKVLVN------------------------------------------------------------------------------------------------- 10 | IS1114.1|IS5|None|Transposase|Xanthomonas/26-282 --------VIVDTTVQ-----------EKAIAHPSD-SRLLEVARNKLVL---LAKRH------------G----IVLRHTYVRQ--------------GPGLSRKAGRYAHARQFKRMRK-VLRRQRTILGRVLRDLQRKLAQQEPSVRERIGVWLERAQRLLT-HRP--KDKQKLYALHSPEVECISKGKASSPLRIWRQGRHCGECAQGLDR-GRAQFSWAIPTTAILGR-ATGT---GARAAAGCECDPAGGDRGPGVSRAGRGGCADPASGQSQD-ADTTAMALDQTTASGRAGDR--TSETGLPLESL--------------------------------------PSQ-RRPRR------------------------------------------------------------------------------------------------------ 11 | ISNme1.1|IS5|None|Transposase|Neisseria/141-299 ------------------------------------------------------------------------------------------------------------------------------------AVID---ATIIQTAGSKQ-RQA--IEVDE----------------------------EGQV----SGQTTLSKDKNARWTKKN-GLYKLGYKQHT--RTDA-EGY-I-EKLHITPANAHECKHLSPLLEG------LPK--------------GTTVYADKGYDSAENRQHLEEHQLLDGIMRKACRNR------PLSEVQTKRNRYLSKTRYVVEQSFGTLH-RKFRYA----------------------------------------------------------------------------------------------------- 12 | ISRso1.1|IS5|None|Transposase|Ralstonia/117-243 ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------GVI----DRQTVKAPS-ADKRGYDA-AKKIVGRKRHI--AVDT-DGR-L-LMVNLTPADIADSTGALAVLEA--VKKRWPG--------------IKHLFADGAYDRTALMDKASTLDFVVEVVRRH---------------EQQTGFAVLPRRWVVERTFGWMV-RWRR------------------------------------------------------------------------------------------------------- 13 | ISMhu10.1|IS5|None|Transposase|Methanospirillum/135-305 -----------------------------------------------------------------------------------------------------------------------------------IIIDGT-SLSV---------DINWF-----------------------------------------RKRIQKSKLVNKPYNWQYSPYKGFYIGLKLT--LAIDQNNLLPLAFLVHQNPVADSKIFP-LILQELKRRRILNK-------------GDRVLLDRGYYSYKNYVLSLIEYSVI--PLILPKK----------------GFSFSKLENYLISPLT-----WFENKKSYQK-VIELSHMK--------------------------KELKDALENSDELA---------AQR----------------------------------- 14 | IS1406.1|IS5|None|Transposase|Pseudomonas/82-233 ------------------------------------------------------------------------------------------------------------------------------------TIVD---ATLIAAPPSVK---------NR----------------------------EGKR------------DPEMHQAKKG-NQWHFGMKAHI--GVDA-TSGLV-HSVVGTAANVADVTQVDQLLHG--AET--------------------YVSGDAGYTGTAKRPEHAERDVIWSIAARPSSYKQHGEGSVLYRVKRKIEYAKAQLRAKVEHPFQVIK-VRFNHRKV--------------------------------------------------------------------------------------------------- 15 | // 16 | -------------------------------------------------------------------------------- /data/models/msa/IS607/IS607_cut.sto: -------------------------------------------------------------------------------- 1 | # STOCKHOLM 1.0 2 | IS1535.1|IS607|None|Transposase|Mycobacterium/55-145 YARVSS--HDRRS--DLDRQVARLTAWATER-DLGVGQVVCEVGSGLN-GKRPKLRRILSDPDA---RVIVVEHR--DRLARFGVEHLEAALSAQGRRIVVA 3 | IS1536.1|IS607|None|Transposase|Mycobacterium/55-145 YARVSS--ADQKS--DLDRQVARVTSWATAE-QIPVDKVVTEVGSVLN-GHRRKFPAVLRDLSV---TRIVVEHR--DRFCRFGSEYVHAALAAQGRELVVV 4 | IS1537.1|IS607|None|Transposase|Mycobacterium/55-145 YARVSS--ADQRS--DLDRRVARVTAWATSQ-HLSVDKVVAEGGWALN-GHRRKFFALLGDPVV---TRIVVEHR--DRFCWFGSEYVEAALVAQGRELVVV 5 | IS1538.1|IS607|None|Transposase|Mycobacterium/55-149 YARVSSDAADQKA--DLDRQVARVTAWATAQ-QMPVDKVVTEVGSAFN-EHRRKFLSLLRDPSV---HRIVVEHRDADRFCRLGSKYVQAAFAAQGRELVVV 6 | IS1539.1|IS607|None|Transposase|Mycobacterium/55-145 CARLSS--ADQKV--DLDRQVVGVTAWATAE-QIPVGKVVTEVGSALY-GRRRTFLTLLGDPTV---RRIVMKRR--DRLGRFGFECVQAVLAADGRELVVV 7 | IS1602.1|IS607|None|Transposase|Mycobacterium/55-145 YARVSS--ADQKP--DLDRQVARVTAWATTE-QIAVDKVVTEVGSALN-GHRRKFLALLRDPSV---KRIVVEHR--DRFCRFGSEYVEAALAAQGRELVVV 8 | IS1921.1|IS607|None|Transposase|Acidianus/59-148 YARVSS--NAQKD--DLVNQLKYLQEQVKDY-----GQVITDVGSGLN-MKRKGFLKLLRMILNNEVSKVVIAYP--DRLVRFGFEILEEVCKAHNCELVVL 9 | IS607.1|IS607|None|Transposase|Helicobacter/68-160 YARVSS--HDQQD--DLIRQVQVLELYCARC-GFN-YEVIQDLGSGMN-YYKKGLTKLLNLILDNQVKRLVLTHK--DRLLRFGAELVFSICEAKGVEVVII 10 | ISAfe10.1|IS607|None|Transposase|Acidithiobacillus/67-158 YARVSS--AENRP--NLESQAERLGQYATAR-GWQVIEVVKETGSGVN-DHRKKLEKLLQKSTN--WDILIVEHK--DRLTRFGFHYIDTLLSQLGKRIEVV 11 | ISAfe11.1|IS607|None|Transposase|Acidithiobacillus/63-156 YCRVSS--AGQKD--DLASQVDAMETYCRGA-GIAVDEWVQEIGGGMN-FKRKRFLGILDRMQRGEIGKLLVAHK--DRLVRFGYDLIMHLATENGCEIVVV 12 | ISArma1.1|IS607|None|Transposase|Arthrospira/66-154 YARVSS--RAQQS--DLNRQVAALSNLYPE------AEVVSEIGGGLN-FKGKKMLALLGHHLSGDVRMVVVAHK--DRLAIWGFDLFRWLCEQNRCSLMVL 13 | ISBce17.1|IS607|None|Transposase|Bacillus/15-108 YTRVSN--PGQKD--DLENQVEFFKTFANAR-GMIVDEVIKDIGSGLN-YKRKQWNNLIDSCMERNISTIIIAHK--DRFVRFGYEWFEKFLRKMGVEIMIV 14 | ISBlo12.1|IS607|None|Transposase|Bifidobacterium/61-151 YARVSS--SDQKD--DLTRQADRLRAFAVNM-GVEKPEVVTETGSGMN-DKRRKLNRLLADPTV---GTLIVEHR--DRLARMNAGLVESALKAQGRRVVVV 15 | ISC1904.1|IS607|None|Transposase|Sulfolobus/59-148 YARVSS--STQKD--DLANQVKYLEEQVKEY-----DQVITDIGSGLN-MKRKGFLKLLRMILNNEVSRVVIAYP--DRLVRFGFEILEEVCKAHGCEIVVI 16 | ISC1913.1|IS607|None|Transposase|Sulfolobus/62-155 YARVSS--TDQRE--DLERQINYLTNYATAK-GYKVVEVLKDIASGLN-TQRKGLLKLFKLVEGRSVDIVLITYK--DRLTRFGFEYIEEFFSTMGVKIEVV 17 | ISC1926.1|IS607|None|Transposase|Sulfolobus/70-163 YARVSS--SDQKE--DLERQINYLTNYATAK-GYKVVEVLKDIASGLN-TQRKGLLKLFKLVEGRSVDVVLITYK--DRLTRFGFEYIEELFSTMGVKIEVV 18 | ISCARN1.1|IS607|None|Transposase|Metagenomic/51-142 YARVSS--AENRP--NLDSQAERLGQYATAR-GWQVIDVVKETGSGVN-DHRKKLEKLLRKSND--WDILVVEHK--DRLTRFGFHYIDTLLGQLGKRVEVV 19 | ISCARN56.1|IS607|None|Transposase|Metagenomic/54-144 YARVSS--HDQRN--DLDRQLARLSQYAAEH-DLHVVESVAEVGSGLN-GKRRKVMRLLLDANV---HAIVVEHR--DRFARFGSEYLEAALAASGRRLIVV 20 | ISCbe1.1|IS607|None|Transposase|Caldicellulosiruptor/63-155 YARVST--KKQEE--YLKNQIRRLEEYAKSQ-GWQY-EVISEIASKVD-ENRRGLLKLLNKIKRGEVTKVVIEYP--DRLARFGFEYLKFFMESFGVELVVL 21 | ISCbo10.1|IS607|None|Transposase|Clostridium/58-152 YARVST--PKQKK--DLENQVQLLKQFCFSN-GWMINNVFQDVASGISFDKRKDFFKMLDDIIQNKVERVVITYK--DRLSRVGFELFYYLFKKYHCEIVVM 22 | ISCbo6.1|IS607|None|Transposase|Clostridium/66-158 YCRVSS--HKQKD--DLERQIENVKTYMFAK-GYQ-FEIITDIGSGIN-YNKKGLNQLIDMITNSEVEKVVILYK--DRLIRFGYELIENLCNKYGTTIEII 23 | ISCbt3.1|IS607|None|Transposase|Clostridium/65-158 YARVSS--NSQKD--DLKNQVEFLKQYANAK-GMIVDEIFEDVGSGLN-YNRKKWNKLLEDCMLGAIKTIIVSHK--DRFIRFGFDWFERFVKSNGVELIVV 24 | ISCbt4.1|IS607|None|Transposase|Clostridium/63-155 YCRVNT--PSQKD--DLENQVNNVKSYMIAK-GYQ-FEIIKDIGSGIN-YKKKGLKELIDKINNQEVSRVVILYK--DRLIRFGFELIEYLCQINNVELEII 25 | ISCfe1.1|IS607|None|Transposase|Campylobacter/67-159 YARVSS--NDQKD--DLIRQVQVLELYCSKQ-GFN-YEIIQDLGSGMN-YYKKGLTRLLNLILDGEVKRLVLTHK--DRLLRFGAELVFAICEAKEVEVIII 26 | ISChh1.1|IS607|None|Transposase|Campylobacter/67-159 YARVSS--HDQKD--DLIRQVQVLELYCSKQ-GFN-YEVIQDLGSGMN-YYKKGLTKLLNQILDGKVKRLVLTHK--DRLLRFGAELVFAICEAKEVEVIII 27 | ISDka1.1|IS607|None|Transposase|Desulfurococcus/66-159 YARVSS--SDQRS--DLERQVQYLTQYCSAR-GYRVVDVLTDIASGLK-ADRRGLLKLFEYVVSKQVDVVVITYR--DRLTRFGFEYLEYFFRQYGVRIEVI 28 | ISLhe9.1|IS607|None|Transposase|Lactobacillus/75-167 YCRVSS--AGQKN--DLKRQIAVVTNFCEMQ-GKP-FKIISDIGSGLN-YHKKGLKELIHLICTQQCSQVVVNYQ--DRLVRFGFELIEDICQENDVEITVI 29 | ISNma20.1|IS607|None|Transposase|Natrialba/63-158 YARVSS--HGQKDNGDLDHQLERLTDYAHDH-GWSVENTYTDVGSGLN-EDRRGLNSLLDDLQEADYGRILVTYE--DRLTRFGFSYLKRYFDCYGVTVTVI 30 | ISPfu4.1|IS607|None|Transposase|Pyrococcus/72-164 YARVSS--HTQKD--YLERQVKAIEQYAKER-GWQV-QILTDIGSGLN-ENRKNYRKLLELVAKREVSKVIITHP--DRLTRFGFKTLEFFFKENGAEIIII 31 | ISSis5.1|IS607|None|Transposase|Sulfolobus/59-148 YARVSS--NTQKD--DLINQVKYLEENVKDY-----DQVITDIGSGLN-MKRKGFLKLLRMILNNEVSKVVIAYP--DRLVRFGFEIIEEACKAHNCELVVL 32 | ISSis7.1|IS607|None|Transposase|Sulfolobus/59-148 YARVSS--NTQKD--DLINQVKYLEENVKEY-----DQVITDVGSGLN-MKRKGFLKLLRMILNNEVSKVVIAYP--DRLVRFGFEIIEEVCKAHNCELVVL 33 | ISSoc2.1|IS607|None|Transposase|Synechococcus/69-157 YARVSS--RGQKP--DLERQIARLVDLYPG------AEVVGEVGGGLD-FKRPKFLALLERVRAGDIGTIVVAHR--DRLCRFGFEFVEWYCRQYGCEILVL 34 | ISSto11.1|IS607|None|Transposase|Sulfolobus/38-127 YARVSS--NTQKD--HLINQVKYLEEQVKDY-----DQVITDMGSGLN-MKRKGFLKLLRMILNNEVSKVLIAHP--DRLVKFGFEILEEVCKAHNCELVVL 35 | ISSto12.1|IS607|None|Transposase|Sulfolobus/62-156 YARVSS--NTQKD--DLERQINALREWVKKNYGNVSVIEIKDVGSGLK-EDRRGLKKLIELSRRRQIDVVVVAYK--DRLTRFGFDYLVELFKAYGVGVVVA 36 | ISSto13.1|IS607|None|Transposase|Sulfolobus/62-155 YARVSS--ANQKE--DLERQINYLTNYATAK-GYKVVEVLKDIASGLN-TQRKGLLKLLKLVESRNVDVVLITYK--DRLTRFGFEYLEEFFSAMGVRIEVV 37 | ISTko1.1|IS607|None|Transposase|Thermococcus/66-158 YARVSS--HTQKD--DLERQVEAIKQYAKER-GWQV-QILKDIGSGLN-EKRKNYRKLLELVAKGEVSKVIITHP--DRLTRFGFKTLEFFFKENGSEIIVI 38 | ISTko2.1|IS607|None|Transposase|Thermococcus/60-153 YARVSS--RDQKE--DLERQVEYLKNYCSAK-GYQVARILTDISSGLN-ENRKGLKQLFKLVESGEVTKVVITYK--DRLTRFGFKYLEQYFNSHGVEIEVI 39 | ISTsi1.1|IS607|None|Transposase|Thermococcus/61-154 YARVSS--QNQKE--DLERQVEYLKNYCSAK-GYQVAKIITDISSGLN-ENRKGLKQLFKLVENGEITKVVITYK--DRLTRFGFKYLEQYFNSHGVEIEVI 40 | ISTvo1.1|IS607|None|Transposase|Thermoplasma/58-149 YERVST--AKQKN--DLQNQ---MKQXCFMN-GYTISAIYAHIASGISLEKRKGFFEMLDEIINNKVEKVIITYK--DRLSRVGFDLFSYLFRKYRTEIAVI 41 | ISvAR158_1.1|IS607|None|Transposase|Virus/55-142 YCRVSS--AKQYD--DLERQVE----SMRTE-FPG-YRIITDIGSGIN-WKRKGLKTILESAMHGTCGRVVVAHR--DRIARFGFELVEWILRAN-GADLVV 42 | ISvMimi_1.1|IS607|None|Transposase|Mimivirus/65-153 YVRVSS--NSQKD--DLERQIK----FMKKK-YPN-HTIIKDISSGIN-MNRKGLNKIIDLAIEGRIKEVVVAYK--DRLAIFGFSLIERLIETYSDGKIVV 43 | ISvMimi_2.1|IS607|None|Transposase|Mimivirus/63-151 YCRVSS--KKQIK--DLNRQVE----YMEKN-YPE-YEIIKDIGSGIN-MERKGLLQLIQMAIDGEISEVVVTYK--DRLARFGFELIEWIIKTYSNGQIKI 44 | ISvNY2A_1.1|IS607|None|Transposase|Virus/77-163 YARVSS--SKQRD--DLQRQID----FLKEK-HPD-FEVVSDIASGIN-WQRKGLRKLLDLSSAGGVERIVVAER--DRLCRFAFELVEYVFS--INGTIVE 45 | // 46 | -------------------------------------------------------------------------------- /data/models/msa/IS91/IS91_cut.sto: -------------------------------------------------------------------------------- 1 | # STOCKHOLM 1.0 2 | IS1294.1|IS91|None|Transposase|Escherichia/133-255 GIFCAIHTYGRRLNWHPHVHVSVTCGGLNK-HGQWKK-----L-----SFLKDAMRSRWMW-----NMRQLLLKAWSE--GMA-MPESLSHITTESQWRSLVLKSGGKYWHVYM---SKKTAGGRNTARYLGRYLKKPPIAASRL 3 | IS801.1|IS91|None|Transposase|Pseudomonas/155-277 GIFGALHTYGRRLNWHPHVHLSVTAGGLDE-QGVWKN-----L-----SFHKEALRRRWMW-----LVRDYLLGQPLS--QLT-MPPPLAHILCESDWRRLILAAGGQHWHIHL---SKKTKNGRKTVNYLGRYLKKPPISGSRL 4 | IS91.1|IS91|None|Transposase|Escherichia/142-264 GIFTVIHTWGRDQQWHPHIHLSTTAGGVTS-GHTWKN-----L-----HFYARKVMSMWRY-----RITRLLSRKYPE--LVI-PDELAVEGNSKRDWNCFLDTHYRRGWNVNI---SRVMDNATHVAVYFGSYLKKPPVPMSRL 5 | ISAzo26.2|IS91|None|Transposase|Azoarcus/144-272 GFIAILHTWGQNMLHHPHLHCVVPGGGVSADGERWIA-----CR-RGFFLPVRVLSRLFRH-----LFLKRLQQAFDAGELHF-FNALAAL-HAPAVFARHLARARRSEWVVFA---KPPFGGPQQVLEYLGRYTHRVAISNNRL 6 | ISCARN110.2|IS91|None|Transposase|Metagenomic/140-246 GAVTVLHTHSRRLDYHPHAHLVMPAAAFDNKQRRWRN-----KD-GGYLFDHKALAKVFRA-----KMLAGI-KQAG----------------------LKLPDAYPTEWVVDC---KD-VGSGHQALVYLGRYLYRGVIQEKDI 7 | ISEc37.1|IS91|None|Transposase|Escherichia/142-264 GIFMVIHTWGRDQQWHPHIHLSTTAGGVTS-GHTWKN-----L-----HFYARKVMSMWRY-----RITRLLSRKYPD--LVM-PDALAAEGSSKREWNRFLDTHYRRGWNVNV---SRVMDNATHVAVYFGSYLKKPPVPMSRL 8 | ISFn1.1|IS91|None|Transposase|Fusobacterium/38-155 GLITVIHTFGRDLKWNPHVHALISLGGFNK-RFVWKK-----LD----YFHVDVIANQWKF-----IVLQLIQSGNYQ--DPI-WKEKAKQVA---------NKLYKENARLFFSVGRQEVNSAEGLLKYLGRYLARAPIADYKI 9 | ISMbu9.1|IS91|None|Transposase|Methanococcoides/152-267 GAIVVLHPFSKDMGFNPHLHILLTEGRFDM-YGKFIH-----QK----FIHFKTMRKTWQY-----QVLTRFKDALSK--VQA-----------FSHLVNQLFKEYPDGFYVHMPKNSR-ITNKQRMAQYVGRYIRHPAIANSRL 10 | ISMno23.2|IS91|None|Transposase|Methylobacterium/144-272 GLTAVLHTWGSAMTHHPHVPVIVPGGGLSPDGARWIA-----CR-PGFFLPVRVLSRLFRG-----LFLDRLSQAHRAGRLTF-GAELSRL-AEPMAFAALLAPLRRAEWVVYA---KPPLAGPEAVLTYLSGYTHRVAIANSRL 11 | ISMno24.1|IS91|None|Transposase|Methylobacterium/142-270 GLTAVLHTWGSAMTHHPHVHIVVPGGGLSSDGSRWIA-----CR-PGFFLPVRVLSRLFRR-----LFLTRLAEAHRVGRLGF-HGELAPL-AEAGAFAAHLAPLRRAEWIVYA---KRPFAGPEAVLAYLSRYTHRVAIANSRL 12 | ISPda2.1|IS91|None|Transposase|Photobacterium/143-266 GLFCVVHTFGRQLNWNVHFHLSVTRGGVNLKTKRWGN-----I-----YFNAAMVEQHWKQ-----QVVSFLRDHYNA--LNT-KHDNYQHIRDFHGWSQFLSSQYSRKWRIHL---AKKTEDVGPTVRYLGRYFKRPPIAASRL 13 | ISPps1.1|IS91|None|Transposase|Pseudomonas/174-304 GAIAFIHRFGSSLNEHVHFHVCVVDGVFEEVEGEGDADATPRISSPGVIFHAATGIDAATVAPVQTTLQKRILRAFVARGLLENCDAKDML------------GYKHSGFSVDAG--VCIEAHDRAALERLLRYCARPPFSMERL 14 | ISPsy3.1|IS91|None|Transposase|Pseudomonas/154-275 GVFCAIHTYGRRLNWHPHIHVSVTLGGIDD-AGVWKD-----L-----SVHPSALRRRWMW-----NVRQYLLSQW-E--HTT-VPPENAHLQSENDWRHLVLNAGGQHWHIHL---SKKTKNGKKTVNYLGRYLKKPPISGSRL 15 | ISSbo1.1|IS91|None|Transposase|Shigella/147-269 GIFCAIHTYGRRLNWHPHVHVSVTCGGLNK-HGQWKK-----L-----SFLKDSMRSRWMW-----NMRQLLLKAWSE--GLA-MPESLSHITTESQWRSLVLKAGGKYWHVYM---SKKTAGGRNTARYLGRYLKKPPIAASRL 16 | ISSde12.2|IS91|None|Transposase|Shewanella/139-246 GFTGVLHTHNRRRDLHPHIHFIIPAGSFDKDRKQWHK-----SK-GKYLFNNVNLASVWRA-----KLLEQLTKKFN----------------------IKLPSATPKKWVVDC---QH-VGKGLPALKYLSRYLYRGVLPDKSI 17 | ISShvi3.2|IS91|None|Transposase|Shewanella/139-246 GFTGVLHTHNRRRDFHPHIHFIIPAGSFNKDKKQWRK-----SK-GKYLFNAFNLATVWRA-----RLLELLTNKLG----------------------IKLPEHLPNKWVVDC---QH-VGKGLPALKYLSRYLYRGVLPDKCI 18 | ISSod25.2|IS91|None|Transposase|Shewanella/141-250 GCTMVLHTWGQTLSQHIHLHCLIPGGVVTSAGDWHGV-----KT--DYLFPVKALSTVYRA-----KLLRALRQHEL------------------AI--PAAETLMAKPWCVYS---KACLSRAETVVEYLGRYTRKGMLHESRI 19 | ISTha3.1|IS91|None|Transposase|Thauera/140-265 GFSLVLHTWSQDLRVHLHVHALITCGGLDAEGRWRIP-----ARGTHFLFPIQAASKVFRG-----KFLARLDAAHRSGELPD-D----PQ-GAPGAWQDRRRALLVHDWVVYA---KPPPGGPAQVLDYLARYTHRVALSNDRL 20 | ISVa3.1|IS91|None|Transposase|Vibrio/158-280 GIFGALHTYGRALNWHPHIHLSVTRGGLDK-HNTWKP-----I-----YFKKKHVEYHWRR-----TLIRLLRQKYDD--LNL-STSTTHHIQYYRQWNFFLERHYQRYWNIHF---AKKTKVLKQTVNYLGRYLKRPPISASRL 21 | ISVsa10.1|IS91|None|Transposase|Aliivibrio/140-262 GCVLALHTWARNLTFHPHLHCLITHGGLSDDG-WVEP-----K--KSILFPARVMMKLFRG-----KFIAALRAAMNKGELNY-PDTLSKQ-DVLNLFN----KWGVLDWVVHC---AKPYSHGAGVVKYLARYVRGGAIKNSQI 22 | ISVsa21.1|IS91|None|Transposase|Aliivibrio/142-264 GIFAALHTYGRKLNFNCHIHLALAELGLDK-HENLKK-----F-----SFKFASLMKQWRY-----GIIKLLRDNYDR--LIF-PSELSDEAKNTQSWNGFLNTQYNRHWNVNI---AKKTSHKKHTAKYLGSYVKKPPIAASRL 23 | ISVsa3.1|IS91|None|Transposase|Vibrio/32-147 LTHTIAHRVGRYLERQGLLERD-VENSYLASDAVDDDPMTPL-LGHSITYRIAVGSQAGR-------------KVFTLQTLPTSGDPFGDG------------IGKVAGSSLHAG--VAARADERKKLERLCRYISRPAVSEKRL 24 | ISVsa9.1|IS91|None|Transposase|Aliivibrio/142-265 GIFCALHTYGRKLNWNTHLHLSVTRGGICERTGLWKP-----I-----YFQMKTTEPCWRA-----AIVSLLGKAYYE--LDL-SSEECPYIRNKTDWSRFLSSQYNRRWKLHF---AKKTNNVKPTMNYLGRYLKRPPISASRL 25 | ISWz1.2|IS91|None|Transposase|Weeksella/152-265 GMIAVLHTWGQNLSLHPHLHCIVPGGGVDESGRWKNL-----RSDGKFLFPVKALSKVFRA-----KFCEKLKDKNL------------------KEYTKIRQNLWEKPWVVYA---KKPFGSPKSVVEYLGRYTHKIAISNQRI 26 | // 27 | -------------------------------------------------------------------------------- /data/models/msa/ISAzo13/ISAzo13_cut.sto: -------------------------------------------------------------------------------- 1 | # STOCKHOLM 1.0 2 | ISArch16.1|ISAzo13|None|Transposase|Uncultured/180-329 PVISVDTKKREIVGDFKNPGRTWRKKGDAKEVNVYDFPSLG-IGVAIPYGAYDINRGEGFVNVGISRDI---SEFAVESIRQWWKVSGKKHYPDARELLICADGGGSNGSRRRGWKFFLQELTDQIGIPISVCHFPPGTSKWNKIEHCMFSFIS 3 | ISArch20.1|ISAzo13|None|Transposase|Uncultured/187-336 PVISVDTKKKELVGNFKNDGQEWEPKGMNEEVNTYDFLSMA-IGKGIPYGVYDPGQNKGWISVGIDHDT---STFAVETIRRWWYSMGRKLYPNAQQLLITADGGGSNGYRVRLWKMELQNLANELGFPISVCHFPPGTSKWNKIEHHMFSFIT 4 | ISAzo13.1|ISAzo13|None|Transposase|Azoarcus/186-335 PVISVDTKKKELVGAYKNAGREWEKAGRPVEVKVHDFIDPD-LGRANPYGVYDVGADQGWVSVGTDHDT---AAFAVESIRRWWFAMGQALYPGARELTITADGGGSNGSRVRLWKQELQTLANDLGFPVRVCHFPPGTSKWNKIEHRLFSFIS 5 | ISCap1.1|ISAzo13|None|Transposase|Candidatus/184-333 PVISVDTKKKELIGDFKNGGKEWQPKGKPEEVRVHDFIDRE-LGKVAPYGVYDLTTNTGWVSVGIDHDT---AEFAVESIRRWWREMGQVTYPNARRLLITADGGGSNGYRVRLWRRELQKLADELRLTVQVCHLPPGTSKWNKIEHRMFCHIT 6 | ISCARN37.1|ISAzo13|None|Transposase|Metagenomic/188-337 PVISVDTKKKELVGQFRNGGKEWSPVGEPVQVKVHDFVDPE-LGRASPYGVYDIGADQGWVSVGTDHDT---ATFAVQTIRRWWYAMGKPRYPKARELTITADGGGSNGHRVRLWKLELGRFAQEAGLNIRVCHFPPGTSKWNKIEHRMFSFIT 7 | ISCARN84.1|ISAzo13|None|Transposase|Metagenomic/181-330 PVISVDTKKKELVGNYKNAGQEWRPQGEPEAVQVHDFVDTE-LGRANPYGVYDLAQNAGWVSVGTDHDT---ASFAVATIRRWWLGMGQPLYPAAKELMIMADGGGSNGSRVRLWKLELQGLADELNLPIRVCHFPPGTSKWNKIEHRLFSYIS 8 | ISCasp1.1|ISAzo13|None|Transposase|Calothrix/185-334 PVISVDTKKKELIGNYKNSGVEWEPSSSPTQVKVHDFVDPE-LGKAIPYGVYDLQHNQAWVSVGIDHDT---ASFAVETIRNWWFNMGKPLYPDSQHLLITADCGGSNSYRSRLWKLKLQEFAEQTGLTVHVCHFPPGTSKWNRIEHRLFCHIS 9 | ISCfu1.1|ISAzo13|None|Transposase|Cystobacter/177-326 PVISVDTKKKELVGDFKNAGQEWQPKKTPEKVQVHDFPDDA-IGKAIPHGVYDMARNEAWVSIGLDHDT---PAFAVASIRQWWRTMGLQSYPHATELLITADAGGSNGYRPRAWKLELQRFADETGLHISVCHFPPGTSKWNKIEHRLFCHIT 10 | ISCph11.1|ISAzo13|None|Transposase|Chlorobium/187-336 PVISVDTKKKELVGAFKNNGTNWRPQGEPEEVKVHDFIDKD-LGRANPYGVYDIGSNTGWVSVGTDHDT---ASFAVETIRRWWYTMGKPRYADATKIMITADGGGSNGSKVRLWKLELQKLADELRMSIHVSHLPPGTSKWNKIEHRLFSYIS 11 | ISCph5.1|ISAzo13|None|Transposase|Chlorobium/188-337 PVISVDTKKKELVGAYKNNGTNWRPQGEPESVKVHDFIDKE-LGRANPYGVYDVASNTGWVSVGTDHDT---ASFAVETIRRWWNTMGKPRYPKASKLMITADGGGSNGNRVRLWKLELQKLADEIHIPVHVAHLPPGTSKWNKIEHRLFSYIS 12 | ISDpr7.1|ISAzo13|None|Transposase|Delta/184-333 PVISVDTKKKELVGDFKNAGREWQPKGQPDSVRVHDFADKN-LGKVNPYGVYDQTANVGWVSVGIDHDT---SSFAVESIRRWWKRMGQQTYPEAKQLLITADGGGSNGYRIRLWKIELQRFADDQGLEISVCHLPPGTSKWNKIEHRMFSHIS 13 | ISDpr8.1|ISAzo13|None|Transposase|Delta/186-335 PVISVDAKKKENIGQYKNNGMEWEPAGQPTNVNTYDFPDKE-KGKACPYGIYDLTRNEGWVSLGISRDT---AQFAVESIRRWWSEMGCFRYPVATRLLITADGGGSNGWRVRLWKKEIQSLANELGISISICHFPPGTSKWNKIEHQMFSFIS 14 | ISDpr9.1|ISAzo13|None|Transposase|Delta/184-336 PVISVDTKKKELIGRFTNGGYEYQPKGKPEEVETYDFPSMA-DGKGIPYGVYDMGKNMGWVSVGTDHDTADTAQFAVHTIRQWWFQMGLEAYPRSKELLITADGGGSNGSRNRLWKYELQRFADETQLAVNVCHFPPGTSKWNKIEHQMFSHIT 15 | ISDssp1.1|ISAzo13|None|Transposase|Desulfobacterium/184-333 PVISVDTKKKELVGDFRNNGSELHPKGNPEKVRVHDFMIPS-LGKVAPYGVYDQTQNSGWVNIGTDHDT---AAFAVESIRRWWNIMGNEVHPKAHQLLITADGGGSNGSRIRLWKTELQKFSNETNLEVSVCHFPPGTSKWNKIEHRLFSYIS 16 | ISDssp2.1|ISAzo13|None|Transposase|Desulfobacterium/185-334 PVISVDAKKKEKIGEFKNGGQEWEKKGSPAEVNVYDFVDPV-LGKVTPYGVYDITTNKGWVNVGIDHDT---SEFAVESIRRWWREMGSPLYPNANQLLITADGGGSNGSRIRLWKLELQKLADELGMTINVCHFPPGTSKWNKIEHKMFSFIS 17 | ISDssp3.1|ISAzo13|None|Transposase|Desulfobacterium/181-330 AVISVDTKKKELVGQYKNNGKELHPMKSPEKVNVHDFENKK-LGKVAPFGIYDLTQNTGFINLGTDADT---SAFAVESIRKWWFTMGKPLYPNSNKLLITADSGGSNGYRRKLWKTELQKLSSEIGCSISVCHLPPGTSKWNKIEHRLFSFIS 18 | ISFsp12.1|ISAzo13|None|Transposase|Frankia/174-323 PVVSVDTKKKEVLGDYAVAGREWHRAGQPVRVRAHDFPEKN-AQKAVPYGIYDLTADTGWVSVGCDGDT---AAFAVATLRRWWDGEGRHRYPTATRLLITADAGGANGYRVRAWKKELADLARTANLQITVCHFPPGTSKWNKIEHRLFSRIS 19 | ISFsp13.1|ISAzo13|None|Transposase|Frankia/181-330 PVVSVDTKKKEVLGDYAVAGREWHRKGQPVRVRAHDFPEKG-AQKAVPYGVYDLAADTGWVSVGCDGDT---AAFAVATLRRWWDGEGRHRYPTATRLLITADVGGANGYRVRAWKKELADLARTTGLQITVCHFPPGTSKWNKIEHRLFSRIS 20 | ISKra3.1|ISAzo13|None|Transposase|Ktedonobacter/180-329 PVVSVDAKKKELVGEFKNPGREWGPQGQPEEVRVYDFPIPG-LGRATPYGVYDLGQNAGWVNVGMDHDT---AAFAVESIRRWWKTVGQHQYPDAKRLLISADGGGSNGSRVRLWKWELQQLANETGLAITVCHLPPGTSKWNKIEHRLFAWIS 21 | ISLsp4.1|ISAzo13|None|Transposase|Leptospirillum/190-339 PVISVDTKKKELIGPFKNNGRTWRPQGEPEEVKVHDFIDKE-LGRANPYGVYDLAQDEGWVSVGTDHDT---SAFAVQTIRRWWQSVGSESYPHATELLITADGGGSNGSRVRLWKVEIQKLADEIGIPITICHFPPGTSKWNKIEHRLFSFIS 22 | ISMae28.3|ISAzo13|None|Transposase|Microcystis/185-326 LRLSLDAKARVNIGLFDRGGKNRI----TVETNDHDFNPKT---TLTPYGIFIPEFDELFLYFTASTVT---SDFIVDILEDFWESE-KSRFEKIKTLIINQDNGPENNSRRTQFMKRIVEFSQKYQVNIRLAYYPPYHSKYNPIER-TWAVLE 23 | ISMco6.1|ISAzo13|None|Transposase|Methanosaeta/180-329 PVISVDAKKKELVGNYKNSGRTWRQKGQPELVNVYDFPSKA-IGAAIPYGVYDPKRNEGVVNIGKSHNT---AEFAVESIRQWWNLVGKYRYAGCKNLLICADGGGSNGSRNRGWKFFLQQLADEIGITITVSHFPPGTSKWNKIEHCMFSFIS 24 | ISMfu1.1|ISAzo13|None|Transposase|Myxococcus/181-330 PVVSVDTKKKELVGEFKNGGREWQPAGAPVLSLTHDFPDTA-VGKAIPYGVYDIGDNSAWVSVGVDHDT---PVFAVNSLGAWWRKMGQERYPEAKELLVTADSGGSNSARSRVWKAELQRLADATGLCISVCHFPPGTSKWNKVEHRLFSHIS 25 | ISMme1.1|ISAzo13|None|Transposase|Mesorhizobium/182-331 PAISVDTKKKELVGDFKNNGREYRPKGDPEKVRVHDFKIPE-LGRAAPYGVYDIADNAGWVSVGIDHDT---ASFAVNSIRRWWQTMGQQRYPNARQLLISADGGGSNGSRVRLWKIELQSLANELDIDITVCHLPPGASKWNKIEHRLFSFIT 26 | ISNpu10.3|ISAzo13|None|Transposase|Nostoc/188-337 PVISVDTKKKELIGDFKNSGTEWCEKEQPIEVKMHDFVDPK-LGKAIPYGIYDLTSNQGWVNVGIDHDT---AEFAVESIRHWWYSMGKQVYPSSEHIMITADCGGSNSYRSRLWKLKLQELATDTGKTIHVCHFPPGTSKWNKIEHRLFCHIT 27 | ISNsp4.1|ISAzo13|None|Transposase|Nostoc/188-337 PVISVDTKKKELIGDFKNSGTEWCEKARPIEVKMHDFVDPK-LGKAIPYGIYDLTSNQGWVNVGIDHDT---AEFAVESIRHWWYSMGKEVYPKSQHIMITADCGGSNSYRSRLWKLKLQELATETGKTIHVCHFPPGTSKWNKIEHRLFCHIT 28 | ISRisp1.1|ISAzo13|None|Transposase|Rivularia/187-336 PVISVDTKKKELIGNFKNPGTEWCDSEQPVEVRMHDFVDPN-AGKAIPYGIYDLTLNKGWVNVGIDHDT---AEFAVESIRHWWYSMGNLLYPKSEHIMITADSGGSNSYRSRLWKLKLQEFATEIGKSIHVCHFPPGTSKWNKIEHRLFCHIT 29 | ISRop2.1|ISAzo13|None|Transposase|Rhodococcus/182-332 PVISVDSKKKEQLGQLPTPGREWRPQGDPVRVVDHSFFTGPNADRAIPYGVYDLTTDAGWVNVGVDHDT---AAFAVASIRRWWQARGAADYPHARRLLITADAGGSNSYRYRLWKAELAALATETGLAITVCHFPPGTSKWNKIEHRLFSQIT 30 | ISSaci1.1|ISAzo13|None|Transposase|Streptomyces/172-321 PVISVDAKKKETLGSYAVIGREWHRAGQPVQVRAHDFPEKG-AQKAVPYGIYDIGADTGWVSVGCDGDT---SAFAVATLRRWWNGEGRRRYPHASRLLVTADAGGSNGYRVRAWKKELADFAYETGLEVTVCHFPPGTSKWNKIEHRLFSQIS 31 | ISScl2.1|ISAzo13|None|Transposase|Streptomyces/183-332 PVISVDTKKKEVVGPLKNGGREWRPAGDPERVSTHDFPDRE-LGKAVPYGIYDLAANTGWVSVGTDHDT---AAFAVESIRRWWKARGTQDYPQARRLLITADAGGSNGYRTRAWKAELASLALETGLHITVCHFPPGTSKWNRIEHRLFSHIT 32 | ISSiac3.1|ISAzo13|None|Transposase|Singulisphaera/186-335 PAISVDTKKKELVGDFKNGGREWHPQGEPEEVRVHDFLDKT-LGKAIPYGVYDMVNDQGWVSVGIDHDT---AQFATHSIRRWWQEMGRERFPRATELLITADGGGSNGHRTRLWKVSLQALADDLGLMLSVSHFPPGTSKWNKIEHRLFSFIT 33 | ISStau11.1|ISAzo13|None|Transposase|Stigmatella/183-332 PVISVDAKKKELVGDFKNAGREWNPKGQPPRVRVHDFVDEE-LGKALPYGVYDMGANEGWVSVGVTHDT---PAFAIATIRTWWLEMGSVRYPKAKELLIIADSGGSNSARARLWKVELQHLADEMGLRMAVSHLPPGTSKWNKIEHRMFCHIT 34 | ISStau6.1|ISAzo13|None|Transposase|Stigmatella/184-333 PVISVDTKKKEWVGEFKNGGREWQPKGAPVLALTHDFPDTA-EGKVIPYGVYDVGTNSAWVSVGVDHDT---PVFAVNSMAAWWSKLGKACYPEAKELLVMADSGGSNSAKSRVWKAQLQKLADATGLSISVCHFPPGTSKWNKVEHRLFSHLS 35 | ISStsp1.1|ISAzo13|None|Transposase|Streptomyces/183-332 PVISVDTKKKELVGDFKNSGRQWRPAGEPVPVSVHDFADPQ-LGKAVPYGIYDLTANTGWVNVGTDHDT---AAFAVESIRRWWHGQGQAAYPRATRLLITADAGGSNGYRTRAWKLELAQLAAETGLTITVCHLPPGTSKWNKIEHRLFSHIT 36 | ISStsp2.1|ISAzo13|None|Transposase|Streptomyces/153-303 PVISVDTKKKELVGPYKNGGREWEPKGAPAQVKTHDFLDRQGPGKAIAYGIYDVAANTGWVSVGTDHDT---AAFAVASIRRWWQARGSHDYPQAARLLITADAGGSNGYRTRAWKTELATLAAETGLEITVCHMPPGTSKWNKIEHRLFSHIS 37 | ISStu2.1|ISAzo13|None|Transposase|Streptomyces/185-334 PVVSVDTKKKEVVGEFKNAGRQWRPAGEPVRVDVHDFPGDA-LGKALPYGIYDLAANTGWVNVGTDHDT---AAFAVESINSWWNGQGRLDYPQARRLLITADAGGSNGYRTRAFKTELAAFAARTGLAVTVCHMPPGTSKWNKVEHRLFSGIT 38 | ISStvi1.1|ISAzo13|None|Transposase|Streptomyces/186-335 PVIRVDTKAKEWLGNRDRPGRTWRPGKSAIKVDCHTFTTND-QPMAIPYGIYDIANNSGRVNLGTDHDT---AQFAVESIRRWWQHRGRADHPNATRLLITADSGGSNDPRRWTWKSNLATFARESGLEITVCHLPPGTSKWNKIEHRMFCHIT 39 | // 40 | -------------------------------------------------------------------------------- /data/models/msa/ISH3/ISH3_cut.sto: -------------------------------------------------------------------------------- 1 | # STOCKHOLM 1.0 2 | ISC1200.1|ISH3|None|Transposase|Sulfolobus/75-249 LSIDWTTKTWYGKPVKG-----------SSEKGNSWNYAT--TKYKGK-VLL-LAFIP----QVNGMTKD--EIVKVLVEQVMAMGFKIRLITLDAGFYTVDVLNFISQFK--YIIAVLVGDV-KVYEEFD--------GDYTTN---SKRHRRDE------QVKFRF---LVYSKEEVRR--KSLVYFARATNLDLSKR--EVLDLYNKVRGPIETSYRNIK 3 | ISC1225.1|ISH3|None|Transposase|Sulfolobus/120-300 ISIDWTTKTWYGKPVEG-----L----GSSAKGNSWNYATATTKYQNM-VLL-LAFVP----QVNGMSKD--EIVKLLMEQIVGMGFKVGLVTLDAGFYTVEVLKFISQFK--FVIGVPVGDV-KIYEEFD--------GEYTTN---SKRHKKEE------QVKFRL---LVYGKEIVKKRKKTVVYFARATNLDLPKR--EVLKLYNKVRSPIETSYRNIK 4 | ISC1359.1|ISH3|None|Transposase|Sulfolobus/91-280 VAVDFHAIPQYHADK-S-----FLSR-IKPTKGTSWGLVQAAIFLLGR-TRSFLDVIP---VTVKNVAEGFKAVMEVIVKELEEDKLRLVMVFADREFAVNEVIRFLLELGLDFVISAKAQMY-KKYKGMLQDVDVSFGGVRYTG---FLCVRHGS------GA---Y--LIIL-----RKEDGKIIAFLVRKEMDLY--DAIVLAEMYRERWGIENAFRSLE 5 | ISC1439A.1|ISH3|None|Transposase|Sulfolobus/84-268 VAVDETGLTVGEKEQEKAEG-------FLLYNWKRKGVKMRSLDLVYP---LRLPLLVEVADLRSDSPSQ--FLLRSVRE--VSQYMEIDYVVADAGFLNLGVIKEM---PVKTIVRGKSNLKGFK--E-LSNV--------PLVEKRYEVKDKVYVAYRVLKFEGLYYYDVVYVKGKP-------RHFMFVT---NFEGDPYELAELYRLRWQVEEGFKVRK 6 | ISC1439B.1|ISH3|None|Transposase|Sulfolobus/84-268 VAVDETAITVGEKESRKVEG-------FQLYNGKRKGVKLRSVDLVFP---LRLSLLEEIADLRSDTPSQ--FLMRAVSE--VAQHLKIDYVVADAGFLNLKVIKEM---PVKTVVGGKTNLKGFK--E-LSNV--------PLTEKKCEVNDRVYVAYRVLKYNDLYYYEVIYVKEKP-------RHFIFVT---NFNGDPYKLAELYRLRWQSEEGFKVRK 7 | ISFac1.3|ISH3|None|Transposase|Ferroplasma/113-311 VAIDEHDEPYTGKDNPY-----LIDAPFHRFRGTDMAYRFATLDCVDN-NRFTLAAMVKHPLDVINNAKE----VRMVIEHALSLGIKVNTVLMDRGYLDSSVMDAVDSMKLKYIIPAKDNHKVIKFKKM----DLKYCKDVYGNEFPFMVLRDNIGSSKRVNANFVH--IVYY------SHKKH--DFSFYTNINVNENNVIELAELYRKRWGIENGYQERK 8 | ISH20A.1|ISH3|None|Transposase|Halobacterium/108-297 VCADLHLDPYYGDEDET-EA--LYF--SQAKRGTTTFHAYATLYARVPNKRYTLAVRQ----LVAGDTTS--DVLTEFLELLNGLDLGVKAVYLDRGFYNSTCLGLLYAHNYAYVMPIVKWGETIQ--DELSR-GWSREIEHDLA---GE-------------VAFPVFIDCVYQQGRYDEHGVARHGYAADA---PFIDTPRDAREHYSKRFGIESSYRLAK 9 | ISH27-1.1|ISH3|None|Transposase|Halobacterium/108-304 VVSDLHLRPYYGDEDGT-DG--LYH--SQAKRGTTAFHAYATLYARVKNKRYTLAVRR----LEDGDTAS--SVLAEFLGILRRLDLDVKAVFLDREFYDSKCLTLLQAHNHAYVMPIVRWGQTIK--QELSE-GWSRVIQHDMT---AKLDGHSW------TVEFLVYIDCTYQNGRCDEHGVARHGYAADA---PFIESPRDARYHYAKRFGIEGSYRLSE 10 | ISH27-2.1|ISH3|None|Transposase|Halobacterium/108-304 VCADLHLRPYYGDEDDT-EN--LYH--SEAKRGTTAFHAYATLYARVKNKRYTLAVRR----LEDGDTAS--SVLAEFLGVLDGLDTDVKAVYLDRGFYDSKCLTLLQTHNYAYVVPIIRWGEAIQ--QELSE-GWSRVIQHDLT---GKLDGHSW------TVEFPVYIDCTYLNGRYDEHGVARHGYAADA---PFIENPRDARYHYSKRFGIESSYRLSE 11 | ISH27-3.1|ISH3|None|Transposase|Halobacterium/108-304 VCADLHLRPYYGDEDDT-EN--LYH--SEAKRGTTAFHAYATLYARVKNKRYRLAVRR----LEDGDTAS--SVLAEFLGVLDGLDTEVKAVYLDRGFYDSKCLTLLQAHNYAYVVPIIQWGEAIQ--QELSE-GWSRIIQHNLT---GKLDGHSW------TVEFPVYIDCTYLNGRYDENGVARHGYAADA---PFIETPREARYHYSKRFGIESSYRLSE 12 | ISH3B.1|ISH3|None|Transposase|Halobacterium/108-304 VVSDLHLRPYYGDEDGT-DG--LYH--SQAKRGTTAFHAYATLYARVKNKRYTLAVRR----LEDGDTAS--SVLAEFLGILDGLDLDVKAVFLDREFYDSKCLTLLQAHNHAYVMPIVRWGQTIK--QELSE-GWSRVIQHDMT---AKLDGHSW------TVEFLVYIDCTYQNGRCDEHGVARHGYAADA---PFIESPRDARYHYAKRFGIEASYRLSE 13 | ISH3C.1|ISH3|None|Transposase|Halobacterium/108-304 VCADLHLRPYYGDEDDT-EN--LYH--SEAKRGTTAFHAYATLYARVKNKRYTLAVRR----LEDGDTAS--SVLAEFLGVLDGLDTDVKAVYLDRGFYDSKCLTLLQTHNYAYVVPIIRWGEAIQ--QELSE-GWSRVIQHDLT---GKLDGHSW------TVEFPVYIDCTYLNGRYDEHGVARHGYAADA---PFIENPRDARYHYSKRFGIESSYRLSE 14 | ISH51.1|ISH3|None|Transposase|Haloferax/108-304 VCADLHLRPYYGDEDDT-DG--LYH--SQAKRGTTAFHAYATLYARVKNKRYTLAVRR----LEDGDTAS--SVLAEFLGILDGLDLGVKAVYLDREFYDSKCLTLLQAHNHAYVMPIVRWGQTIK--RELSE-GWSRVIQHSLT---AKLDGHSW------TVEFPVYIDCTYQNGRYDEHGVARHGYAADA---PFIDSPRDARYHYAKRFGIEASYRLSE 15 | ISHla1.1|ISH3|None|Transposase|Halorubrum/108-304 VCADLHLRPYYGDEDDT-DG--LYH--SVAKRGTTAFHAYATLYARVKNKRYTLAVRR----LKDGDTAS--SVLAEFFGVLDGLDAGVKAVYLDRGFYDSKCLTLLQAHNYAYVIPIIRWGEAIQ--QELSE-GWSRVIQHDLT---GKLDGHSW------TVDFPVYIDCTYLNGKYDENGVARHGYAADA---PFIDSPRDARYHYSKRFGIESSYRLFE 16 | ISHla11.1|ISH3|None|Transposase|Halorubrum/108-297 VVADLHLDPYYGDEDET-EA--LYS--SQAKRGTTAFHAYATLYARVRNKRYTLAVRQ----LVAGETTS--DVLAEFLELLDGLDLGVKAVYLDRGFYNSTGLKLLYAHNYAYVMPIVKWGETIQ--DELNS-GWSREIEHDLA---GE-------------VTFPVFIDCVYQQGRCDEHGVARHGYAADA---PFIDTPRDARNHYSKRFGIESSYRLAK 17 | ISHla8.1|ISH3|None|Transposase|Halorubrum/108-301 VSIDFIDNPYHGEHHAE-KG-ELCS--MAPKDGTTTCHRYCTAYVVSNGKPVTLAMTY----VRSDEDEA--DAVERVLARVENYPFEIDLLLADSGFYNERVIRRARD-IAPTVVHVPKKGERMK--DKLET-HKSYMTTYRM------YKDSER------ELRFPLAVAVSYQNGDRGKHGEVVRGYVACG---VTDRSAKQVEHRYRKRSGIETTYRLLR 18 | ISHla9.1|ISH3|None|Transposase|Halorubrum/108-301 VSIDFVDNPYHGTYADE-SG-ELCR--MAAKDGTTTCHRYCSAYLVSNGKPVTLAMTY----VRSDESEA--DAVERVLDRVEAYPFDIDLLLADRGFYNERILRRSHD-IAATVVPVQKKGKRMK--KKLDT-HCSYMTTYRM------YKDCKR------ELKFPLAVAVSYQAGDRGKSGEVVRGYVACD---LADRTPKRVEQLYRKRSAIETSYRVFR 19 | ISHli5.1|ISH3|None|Transposase|Halohasta/108-297 VVADLHLDPYYGDEDET-VA--LYS--SLAKRGTTTFHAYATLYARVRNKRYTLAVRQ----LVAGETSS--DVLSEFFELLDGLDLGVKAVYLDRGFYNSTCIGLLYAYNYAYVIPVVKWGDTIK--DELSS-GWSRVIEHELA---GK-------------VTFPVYIDCVYQQGRYDENGVARHGYAADA---PFIETPRDAREYYRKRFGIESSYRLAR 20 | ISHvo20.1|ISH3|None|Transposase|Haloferax/108-304 VCADLHLRPYYGDEDDT-DG--LYH--SQAKRGTTAFHAYATLYARVKNKRYTLAVRR----LEDGDTAS--SVLAEFLGILDGLDLGVKAVYLDREFYDSKCLTLLQAHNHAYVMPIVRWGRTIK--RELSE-GWSRVIQHSLT---AKLDGHSW------TVEFPVYIDCTYQNGRYDEHGVARHGYAADA---PFINSPRDARYHYAKRFGIEASYRLSE 21 | ISHvo21.1|ISH3|None|Transposase|Haloferax/109-305 VVSDLHLRPYYGDEDDT-DG--LYH--SQAKRGTTAFHAYATLYARVKNKRYTLAVRR----LEDGDTAS--SVLAEFLGILDGLDLGVKAVYLDREFYDSKCLTLLQAHNHAYVMPIVRWGRTIK--RELSE-GWSRVIQHDLT---AKLDGHSW------TVEFPVYIDCTYLNGRYDEHGVARHGYAADA---PFIDSPRDARYHYAKRFGIEASYRLSE 22 | ISHvo22.1|ISH3|None|Transposase|Haloferax/108-304 VCADLHLRPYYGDEDDT-DG--LYH--SQAKRGTTAFHAYATLYARVKNKRYTLAVRR----LEDGDTAS--SVLAGFLGILDGLDLNVKAVYLDREFYDSKCLTLLQAHNHAYVMPIVRWGQSIK--QELSE-GWSRVIQHSLT---ARLDGHSW------TVEFPVYIDCTYQNGRYDEHGVARHGYAADA---PFIDSPRDARYHYAKRFGIEASYRLSE 23 | ISHwa12.1|ISH3|None|Transposase|Haloquadratum/117-309 ICIDFMNNPFHGCPDDE--D-EFRR--MSARDGTTKCHRYCTAFVLAQGKPLTLAVDP----VDGKDSKA--DAVERVLARVETYPFEIDQILMDRDAFCGELIGVLRE-AAPPVFPVRTGKETLE--EKLTT-GSSYMTEEII------CEGKEH------EQTYPLAVNVTYQNGDRGKSGLKQTGYAAYG---LEDRTPRQVAQVYNHRAQIEKSYETFR 24 | ISHwa13.1|ISH3|None|Transposase|Haloquadratum/108-301 VSIDFIDNPYHGDHYDE-EG-ELCS--MAPKDGTTTCHRYCTAYVVSNGKPVTLAMTY----VRNDETEA--DAVERVLARVENYPFEIELLLADSGFFNERVIRRSRE-IAATVVHVPKKGDRMK--EKLDV-HKSYMTTYRM------YKDSER------ELRVPLAVSVSYQNGDRNKHGEVVRGYVACG---VTDHTAKQVERIYRKRSGIETTYRLIR 25 | ISMba14.1|ISH3|None|Transposase|Methanosarcina/115-305 IAIDFHDISYYGDKNTP-----GIRG-IKLKNGSSWGKSFCTLDIIGT-SHLTLDVID---I--NSLNKNYSLLIESLFKRLQTIGVKTGTVYLDREFFNTDVIPKLDELKVNFVIAVKSTQV-IN--RELKNHQKEYGNT-STI---FEYQFQKG------GPSFNV--VAIY-----NDEKKKYLLFATNKKAESIEKFEKMIPEEYRKRWNIETGYRVKN 26 | ISMbu7.1|ISH3|None|Transposase|Methanococcoides/103-296 LAIDYTNDPYYGEVNYV-NENYVIR--GQAKKSTNSFYSYISLCIINKYTRFTISMLP----VEKGKTKT--DYLKYFIDVVDKINLKINILCLDREFYSRDVFTFLQENDIPHITPVVRKGKRIK--DILDG-NNKRYEKYVMK-------NKKG------DVGLDIAIDVKYLKGKRGKNGCENLGFVVYG---I-DWEPGKISTIYRKRFTIESSYRMRN 27 | ISMbu8.1|ISH3|None|Transposase|Methanococcoides/60-253 FAIDYTNDQYYGSIDDS-NNRYVIR--GQAKKSTNSFYSYISLCIIDKDERVTISVLP----VEKGNSKT--DYLKYFIDQIKQIKLKINVLCLNREFYSRDVFSFLQENEVAHIVPVVKMGKRLK--EIIDG-TKKRCDTYTMN-------STKG------KVELVLAIDVKYRKGKRGKNGCENLGFVVYG---I-DWDPRKVSDTYRKKFAIESSYRMRN 28 | ISMma1.1|ISH3|None|Transposase|Methanosarcina/121-310 IAIDFHDVEYYGCRDTL-----CVRG-IKPKNGTSWGYSFCTLDVIGN-SKLTLDVID---I--NGLSKDYSILMESLFERVEKMGVKVGTVYMDREFFNRKVISKMEKYKVDFVIAAKSNKR-IK--EMLERHRKENRDT-STV---FEYKFQGE------EQTFNI--VAVW-----DKE-KEYSIFATNKKVSSIDTFVKQIPEEYRKRWNIETGYRVKK 29 | ISNamo6.1|ISH3|None|Transposase|Natronomonas/108-301 VSIDFVDNPYHGHPDED-DG-ELCS--SSPTDGTTTCHRYCTAYVVSNGKPVTLALTY----VRSDEQEA--DAVERVLDRVGAYPFAIDLLLADRGFYNGRVIRRGRE-LATTVIPVQEKGERMK--EKLDT-HCSYMTTYRM------FKDSER------ELRFPLAVSVSYHNGDRGKHGEVVRGYVACD---LADRTPTQIERRYRKRSAIETSYRLFR 30 | ISSis6.1|ISH3|None|Transposase|Sulfolobus/96-276 VSIDWTTKTWYGRPVGG-----L----GSSEEGNSWNYATATTKFNGK-VLL-LAFVT----QVKGMTKE--EIVKALVEQVVAMGFKIRLITLDAGFYTVDVLNFISQFK--YIVAVPVGDV-KVYEEFD--------GDYATN---SKRHRRDE------QVKFRL---LVYSKEKVRRKKKSVVYFARATNLDLPKG--EVLDLYNKVRGPIETSYRNIK 31 | ISSto14.1|ISH3|None|Transposase|Sulfolobus/96-276 ISIDWTTKTWYGKQVKG-----L----GTSEKGNSWNHATATTKYGGK-ILL-LAFIT----QVNGMTKE--DIVKALVEQVVAMGLKIRLITLDAGFYTVGVLNFISQFK--YIIGVPVGDV-KVYEEFD--------GEYTTN---SKRKRREE------QVKFRL---LVYSKEKVRRKRREVVYFARATNLDLKKG--DVLKLYNKVRGPIETSYRDIK 32 | ISSto8.1|ISH3|None|Transposase|Sulfolobus/91-279 VAIDLHSQPQYHKDK-S-----LLSR-IKPNKGTSWGLAQIAIFLLNR-KSIFLDVLP---ITVKNIAEDFKMVMQVVLEELDKFDLRLVRVYADREFAVNEVIKFLLGLGVDFVITARYQMY-KKYEDKLRDVDITYCGVRYTG---FLCVRHVS------GA---Y--LVIL-----RKGDG-IIAFLVSGEVDVR--TAVVLAEDYRERWGVENAFRSLE 33 | // 34 | -------------------------------------------------------------------------------- /data/models/msa/ISH6/ISH6_cut.sto: -------------------------------------------------------------------------------- 1 | # STOCKHOLM 1.0 2 | ISH31.1|ISH6|None|Transposase|Halobacterium/179-304 ADTVIPDGTKCHSQDDDRSSHSVQATLGEDTAEES-RSLLDLSVNADWDETAAELDDIGAVTDDATVVSDADSGIVTAFTDEARDHQLDLVHVGRTLGYTLWDDGVFSLDRRKEIVSEVIDEVSHLK 3 | ISH6.1|ISH6|None|Transposase|Halobacterium/180-305 ADAVIPDGTKCHSQDDDRTYHSVQATLGEDTAEES-RSLLDLSVNADWDQTAADLDDIDAVTDDATVVSDADDGIVTAFTDEYSDHQLDLVHVGRTLDYNLWDDGVFSLDRRNEIVSEVIDEVFHLK 4 | ISHala2.1|ISH6|None|Transposase|Halobiforma/179-305 ADTVVPDGTKCHSQDDHCTYHDVNVTLGQTTEEDTETTLLDVNLNDSWDETAATLEETEAITDDATVVSDAENALVDAFENGDRSHQLDLVHVGRTLGYKLWKDDAFSLSERKAIVSGVTNDLFHLK 5 | ISHla10.1|ISH6|None|Transposase|Halorubrum/179-304 ADAVIPDGTKCHSQDDDRSSHSVQATLGEDTAEES-RSLLDLSVNADWDETAAELDDIGAVTDDATVVSDADSGIVTAFTDENRDHQLDLVHVGRTLGYTLWDDGVFSLDRRKEIVSEVIDEVFHLK 6 | ISHma5.1|ISH6|None|Transposase|Haloarcula/180-305 ADAVTPDGTKCHSQDDDRSYHSVQATLGEDTAEES-RSLLDLSANAEWDKTAADLDDIGAVTDDATVVSDADDGIVAAFTDEHSDHQLDLVHVGRTLDYNLWDDGVFSLDQRNEIVSEVIDEVLHLK 7 | ISHs1.1|ISH6|None|Transposase|Halobacterium/127-252 ADAVIPDGTKVHSQDDDRTYHSVQATLGEDTAEES-RSLLDLSVNADWDQTAADLDDIDAVTDDATVVSDADDGIVTAFTDEYSDHQLDLVHVGRTLDYNLWDDGVFSLDRRNEIVSEVIDEVFHLK 8 | ISHsa1.1|ISH6|None|Transposase|Halococcus/184-309 ADTVVPDGTKCYSQDDDRDYHDVQVTLAEDTAENS-RSLLDVSVNAVWDETAASLEAMDAITDDARVVSDAENRLVTAFTDGTRKHQLDLSHVPRTLSYKLWDDGALSLDDRREVTSEVAGELFHLK 9 | ISHth1.1|ISH6|None|Transposase|Halococcus/179-304 ADAVIADGTKCHSQDEDRSYHSVQATLGEDTAADS-RSLLDLSVNADWNETAAALDDIDAVTDDAAVVSDADGNIVTAFTDESREHQLDLVHVGRTLGYNLWDDGVFPLERRTEIITEVIDEVFHLK 10 | ISHth2.1|ISH6|None|Transposase|Halococcus/179-303 ADTILADGTKCHSQDDATAYNDVHVTLSQDED-DA-TTLLDVSVDDTWEDTATALDEIETVTDDARVVSDAEEGLTEAFTTEERLHQLDLVHVPRTTSYMLWQDGAFSLDERKQIVSEVANDLFHLK 11 | ISMta1.1|ISH6|None|Transposase|Methanolinea/173-296 MVVAMADGTKTHSQEPGKKQNDIHVVLGVSDV---RKVLLGVTVNQSWKFLSDLVDNEKTLAENAVIVSDGEPELRAAFADEDIQFQTDLIHGFRLLGYKLWEDGKLSLKERKGVITELKKLLLSLK 12 | ISNph3.1|ISH6|None|Transposase|Natronomonas/180-305 ADAVIPDGTQCHSQDEDRSYHSVQATLGENTAEES-RSLLDLSVNADWDETAAELEDIGAVTDDATVVSDGDSGIVTAFTDEHSDHQLDLVHVGRTLDYNLWDDGVFSLDRRNQIVSEVIEDVFHLK 13 | // 14 | -------------------------------------------------------------------------------- /data/models/msa/ISNCY_IS1202/ISNCY_IS1202_cut.sto: -------------------------------------------------------------------------------- 1 | # STOCKHOLM 1.0 2 | IS1202.1|ISNCY|IS1202|Transposase|Streptococcus/170-288 IQMDASPHAWFGP--ETTNLHLAIDDASGNILGAYFDKQETLNAYYHVLEQILANHGIPLQMKTDKRTVFTYQASN---SKKMEDDSYTQFGYACHQLGILLETTSIPQAKGRVERLNQTLQSR 3 | ISAba32.1|ISNCY|IS1202|Transposase|Acinetobacter/143-260 IQIDGSYHDWFEGRASKCCLLVYIDDATGKLLHLRFCEAETTFDYMLSTRAYIEQYGKPLAFYSDKHSVFRVNQKS------SQDSQITQFGRILNELNIDIIFANSPQAKGRVERANRTLQDR 4 | ISAjo2.1|ISNCY|IS1202|Transposase|Acinetobacter/139-256 IQIDGSHHDWFEGRAAKCCLLVFIDDATGKLQHLRFCESESAFDYMISTRLYVEQHGKPLAFYSDKHSVFRVNQSS------KKDTKITQFGRVLSTLNIDIIFANSPQAKGRVERANRTLQDR 5 | ISCARN112.1|ISNCY|IS1202|Transposase|Metagenomic/139-256 IQIDGSPHDWFEGRGDYCTLLVFIDDATGELTQLRFAPTETTLGYMHVLHDHILMHGVPAALYSDKHSIFRINTKE------VDPEAETQFSRAARELGIECIHAHSPQAKGRVERANQTLQDR 6 | ISCARN53.1|ISNCY|IS1202|Transposase|Metagenomic/119-236 IQADGSPHRWFEERGGACTLLLYVDDATTTILGGLFAEHETSDGYFELFERAFIEHGLPVALYTDKHSVFRINNPG------SMVDDETHVQRALRELDVELICANSPQAKGRVERANRSLQDR 7 | ISCARN62.1|ISNCY|IS1202|Transposase|Metagenomic/143-260 VQADGSPHRWFEDRGDPCSLLLYIDDATSNALGGLFVDHESTDGYFQLFEQAFTQHGLPMAIYVDKHGVFRINHPG------ATADHETHVQRALRELGVELICANSPQAKGRVERANRTLQDR 8 | ISCARN63.1|ISNCY|IS1202|Transposase|Metagenomic/142-259 MQMDGSPHNWFEGRAPECTLLLAVDDATSRICSARFERTETTDGYFRLTRSHIERYGRFLAAYPDKHSIFRYSGQS------VASDVTTQYQRACNELDVELICANSPQAKGRVERANRTLQDR 9 | ISKpn21.1|ISNCY|IS1202|Transposase|Klebsiella/147-265 IQIDGCDHDWFEGRGPACTALVYVDDATSKLMELLFVKSESTFSYFEATRRYIDKHGKPLALYSDKAGVFRVNNKH-----ATGGDGHTQFGRAMHELNIQTICAETSPAKGRVERAHLTLQDR 10 | ISLad2.1|ISNCY|IS1202|Transposase|Leclercia/139-256 IQIDGSHHDWFEGRAPKCCLLVFIDDATGRLMHLRFSESETAFDYMLATREYIEQHGKPVSLYSDKHAIFRVSGPE------KRNTTVTQFGRVLYDLAIELICANSSEAKGRVERANQTLQDR 11 | ISMex6.1|ISNCY|IS1202|Transposase|Methylobacterium/144-262 IQIDGSKHYWFESRGPECTLLAFIDDATSRIMHAAFVHSESAFDYLRETRAYLLKHGRPTAFYSDKHAVFRVSKRD-----AAGGTGMTQFGRALDELNIDIMCANTPSAKGRIERAFGTLQDR 12 | ISRel10.2|ISNCY|IS1202|Transposase|Rhizobium/145-264 VQIDGSHHWWFENRGPKCGLLVYIDDATGKLLHLRFAGSENTFDYLHATKAYLQQWGKPLAFYSDKHGVFRSTHASE----KDRTSGLTQFGRALYELNIDIICANTPQAKGRVERANQTLQDR 13 | ISRel26.1|ISNCY|IS1202|Transposase|Rhizobium/143-261 VQIDGSEHRWFEDRGPPCSLLVFVDDATGRLMQLRFVRSESAFSYFEALALYLRRHGAPVAFYSDKHSVFRVAKKD-----AKGGQGMTQFGRALCELNIEILCANSSQAKGRVERMNRTLQDR 14 | ISSeq2.1|ISNCY|IS1202|Transposase|Streptococcus/171-289 IQMDASQFPWFGQ--QETHLHVAIDDTSGDIVGAYFDTQETLNGYYHVLEQILEVHGIPFQFLTDKRTVFTYASSQ---SKKIEEDTFTQFGYACHQLGIAIETSSIPQAKGRVERLNQTLQSR 15 | ISShha1.1|ISNCY|IS1202|Transposase|Shewanella/142-252 LQMDGSPHRWFGD--SRSCLIAIIDDATSDIH-AEFYPSETTEGCMKVMKAYIEKRGLFKTLYVDRAGIFGGPKRC----------HFSQMQRACEELGIEIIFANSPQGKGRVERCFDTFQDR 16 | ISTde1.1|ISNCY|IS1202|Transposase|Treponema/144-266 LQFDGSHHKWFEQRGPKCCLMNIVDDATGMTQ-SFLTEQETTEAAMRLLWGWIDCHGIPQAVCCDKKNAYVITREPTMSEIIKNVRPKTPFQKACEKLGIQIIVAHSAQSKGRVERNHSVYQDR 17 | ISVbsp4.1|ISNCY|IS1202|Transposase|Vibrio/139-249 LQMDGSPHQWFGE--HRSCLIAIIDDATSDIH-AEFFPSETTEGCMKVMKAYIEKRGLFKTLYVDRAGIFGGPKRS----------NFSQMQRACEELGIEIIFANSPQGKGRVERAFDTFQDR 18 | ISVisp7.1|ISNCY|IS1202|Transposase|Vibrio/142-252 LQMDGSPHRWFGD--EKSCLIAMIDDATSDIH-AQFFPSETTEGCMRVMRAYIEKRGIFKTLYVDRAGIFGGPKRS----------NFSQMQRACEELGIEIIFANSPQGKGRIERSFDTFQDR 19 | // 20 | -------------------------------------------------------------------------------- /data/models/msa/ISNCY_IS892/ISNCY_IS892.sto: -------------------------------------------------------------------------------- 1 | # STOCKHOLM 1.0 2 | 3 | #=GS IS892.1|ISNCY|IS892|Transposase|Anabaena DE sp. 4 | #=GS ISAzs34.3|ISNCY|IS892|Transposase|Azospirillum DE sp. 5 | 6 | IS892.1|ISNCY|IS892|Transposase|Anabaena MWNEYNNPRHIRTLNGVVELQLKIRRCQNKSCMRYKKAYRPEQEGSLALP 7 | ISAzs34.3|ISNCY|IS892|Transposase|Azospirillum MRIRYENRRTVLTLSGFERLRLKIQWCENPACARHHRAYRPEAEGQLTLP 8 | 9 | IS892.1|ISNCY|IS892|Transposase|Anabaena QNEFGLDVIAYIGALRYQEHRSVPQIHTHLELKGICISQRTVTHLIDRYD 10 | ISAzs34.3|ISNCY|IS892|Transposase|Azospirillum HHEFGLDVIALIGSLRHREHRSVPEIHVTLRERGLLISERSVTNLLDRYD 11 | 12 | IS892.1|ISNCY|IS892|Transposase|Anabaena ELLSLWLKDHKRLKTIVANQGRVILAIDGMQPEIGHEVLWVIRDCLSGEI 13 | ISAzs34.3|ISNCY|IS892|Transposase|Azospirillum ELVATVLDAPNR--AAVAAQGRVILALDGLQPDVGHEVLWVIRDCLSGRV 14 | 15 | IS892.1|ISNCY|IS892|Transposase|Anabaena LLAKTLLSSRNEDLVALLLEVTNTLDVPIDGVVSDGQQSIRKAVRLALPR 16 | ISAzs34.3|ISNCY|IS892|Transposase|Azospirillum LLARPLLSAAQADLAGLLREVIAALPVPVTGVVSDGQHSIRRAVAEALPG 17 | 18 | IS892.1|ISNCY|IS892|Transposase|Anabaena IAHGLCHYHYLKEAIKPIYEADRHALKGIKEKS----------------- 19 | ISAzs34.3|ISNCY|IS892|Transposase|Azospirillum VPHQLCQFHYLREAARPLFEADRHAKKELKKTVRGVRPIERWSEGHEDGG 20 | 21 | IS892.1|ISNCY|IS892|Transposase|Anabaena -------------------------------------------------- 22 | ISAzs34.3|ISNCY|IS892|Transposase|Azospirillum APVVAGYCAAVRSALTDDGRPPLAAPGLKLKGRLEAIAGSLDRVKKKGGL 23 | 24 | IS892.1|ISNCY|IS892|Transposase|Anabaena -------------------------------------------------- 25 | ISAzs34.3|ISNCY|IS892|Transposase|Azospirillum SKPLRRLRDLLRQGLERTAALWPDVRETHRWLHAAAHILGNAADHPATVV 26 | 27 | IS892.1|ISNCY|IS892|Transposase|Anabaena -------------------------------------------------- 28 | ISAzs34.3|ISNCY|IS892|Transposase|Azospirillum QARYDRLIATWSTRRGRAGGLTAAVDHFVKVTASYRPGLFHCYAVPDLPR 29 | 30 | IS892.1|ISNCY|IS892|Transposase|Anabaena -------------------------------------------------- 31 | ISAzs34.3|ISNCY|IS892|Transposase|Azospirillum TNNDLEHLFGCHRHHERRATGRKVASPSLVLRGSVRIVAATATRIAPFTA 32 | 33 | IS892.1|ISNCY|IS892|Transposase|Anabaena -------------------------------------------------- 34 | ISAzs34.3|ISNCY|IS892|Transposase|Azospirillum RDLATDQARWLKLRSTLETRRQARGSRTRFRRDQDTYLAVLEDRACQQRL 35 | 36 | IS892.1|ISNCY|IS892|Transposase|Anabaena -- 37 | ISAzs34.3|ISNCY|IS892|Transposase|Azospirillum PS 38 | // 39 | -------------------------------------------------------------------------------- /data/models/msa/ISNCY_ISA1214/ISNCY_ISA1214.sto: -------------------------------------------------------------------------------- 1 | # STOCKHOLM 1.0 2 | 3 | #=GS ISA1214-1.2|ISNCY|ISA1214|Transposase|Archaeoglobus DE fulgidus 4 | #=GS ISA1214-6.2|ISNCY|ISA1214|Transposase|Archaeoglobus DE fulgidus 5 | #=GS ISFac3.2|ISNCY|ISA1214|Transposase|Ferroplasma DE acidarmanus 6 | #=GS ISTvo2.2|ISNCY|ISA1214|Transposase|Thermoplasma DE volcanium 7 | 8 | ISA1214-1.2|ISNCY|ISA1214|Transposase|Archaeoglobus ----------------------MNLGFRVTGKFVRELLDVLDEIAEEIRQ 9 | ISA1214-6.2|ISNCY|ISA1214|Transposase|Archaeoglobus ------------------------MNVRITKKMVRELLEVLDEIVEEIRQ 10 | ISFac3.2|ISNCY|ISA1214|Transposase|Ferroplasma ------------------------MGLVRSEDIQYKLEEINNILGKKYVQ 11 | ISTvo2.2|ISNCY|ISA1214|Transposase|Thermoplasma MLCAVISFASFTLTFFTNVYIQYILYFVIQQSMYEEFRDNINSLIEEYRE 12 | 13 | ISA1214-1.2|ISNCY|ISA1214|Transposase|Archaeoglobus EEKEKYPYTEWERKR----EVVKERLRKLPEYVREAISVITVQKRVGRPK 14 | ISA1214-6.2|ISNCY|ISA1214|Transposase|Archaeoglobus EEKEKYPYAEWERKR----EIVKERLRKLPKYVREAVSAIRIEKRVGRPK 15 | ISFac3.2|ISNCY|ISA1214|Transposase|Ferroplasma EHPE--KGRNWRTYESEFSRRIKTAMKELDPLIEKAVSIMHIAKRPGHPH 16 | ISTvo2.2|ISNCY|ISA1214|Transposase|Thermoplasma KLSI-PHVMD*KEYEKKYRSRLTAASRELRSLLLEASSRIR-EDEFGRPS 17 | 18 | ISA1214-1.2|ISNCY|ISA1214|Transposase|Archaeoglobus KVDLEKRVMLFLFARLMDKSNRDIEELLELFEPLFGIKVSYKTIERLYSD 19 | ISA1214-6.2|ISNCY|ISA1214|Transposase|Archaeoglobus KVDLEKRVMLFLFARLINKSNRDVEELLELFEPLFGIRVSYKTIERLYSD 20 | ISFac3.2|ISNCY|ISA1214|Transposase|Ferroplasma SLLLAQRVKLILIKQLVGESNRMFVNMLDIFSMLSGIDVSYKTIERLYSD 21 | ISTvo2.2|ISNCY|ISA1214|Transposase|Thermoplasma VIHEVDRAQIILSKEMFDLSNRKAAYLMPLMGM--EYEVSYKTVERIYSD 22 | 23 | ISA1214-1.2|ISNCY|ISA1214|Transposase|Archaeoglobus EEVRMALHNLFILLLREEGVS-GDFSGDGTGYSLTITKHYRSNPKR---- 24 | ISA1214-6.2|ISNCY|ISA1214|Transposase|Archaeoglobus EEVRMALHNLFILLLKDEGVS-GEFSGDGTGYSLAITKHYRSNPKK---- 25 | ISFac3.2|ISNCY|ISA1214|Transposase|Ferroplasma NEVVMAIFNLHVLLLKNKNIANSDATGDGTGYSLTVKKNYESYAQRLKDL 26 | ISTvo2.2|ISNCY|ISA1214|Transposase|Thermoplasma DVVPMIMHNHFVLSVKKRGIDISNACRDRTGYSLTVSDHHRSVREKLGES 27 | 28 | ISA1214-1.2|ISNCY|ISA1214|Transposase|Archaeoglobus --------------------KGKDFRYVFRIIDIDTGMYVGFGYSDRSEK 29 | ISA1214-6.2|ISNCY|ISA1214|Transposase|Archaeoglobus --------------------KGKDFRYVFRIIDLETGMYVGFGYSAKSEK 30 | ISFac3.2|ISNCY|ISA1214|Transposase|Ferroplasma AKENKEIGKEEHKDKKSKGHRKRLFAYSFSIMDLRTRMYIASGTSMKSER 31 | ISTvo2.2|ISNCY|ISA1214|Transposase|Thermoplasma -------------------VKHGKYRYSFAIMDLKTRMYIGYASFVRSES 32 | 33 | ISA1214-1.2|ISNCY|ISA1214|Transposase|Archaeoglobus DAFEKALGMLKSMGVKVNSISLDKYYSSRKTLRLFDAETAVYVIPKRNLA 34 | ISA1214-6.2|ISNCY|ISA1214|Transposase|Archaeoglobus DAFEKAMKMLKSMGVKVNSISLDKYYSTRKTLRMFDAETSVYVIPKRNLS 35 | ISFac3.2|ISNCY|ISA1214|Transposase|Ferroplasma NAYDNAMRIVNKIGINIDSIRLDRYYSSPSYVDKL-GDTKVYIIPKKNST 36 | ISTvo2.2|ISNCY|ISA1214|Transposase|Thermoplasma DAYQKASRIISDLGIQINSVRLNK*YSGQSILGDFSDNTRIFIIPKKNSR 37 | 38 | ISA1214-1.2|ISNCY|ISA1214|Transposase|Archaeoglobus RIG-FDWLRVIERIVEAPYRFLKRYFKRNLSEAGFSADKRRFGWLIRQRR 39 | ISA1214-6.2|ISNCY|ISA1214|Transposase|Archaeoglobus RIG-FDWLRVIERIVEMPYRFLKRYFKRNLSEAGFSADKRRFGWLIRQRR 40 | ISFac3.2|ISNCY|ISA1214|Transposase|Ferroplasma LHGSHKWKSIIREFLNDTMNYLEKYHKRSNSESGFAADKKMLGWNMAQRR 41 | ISTvo2.2|ISNCY|ISA1214|Transposase|Thermoplasma IHGKKRWRDIIASFMSDPMEFLKEYFKRNNSEAGFSSDKRATGRRIFQRR 42 | 43 | ISA1214-1.2|ISNCY|ISA1214|Transposase|Archaeoglobus EDRREMALFAVGLWHNVFAVRVVR 44 | ISA1214-6.2|ISNCY|ISA1214|Transposase|Archaeoglobus EDRREMALFAVGLWHNIFAVRVMR 45 | ISFac3.2|ISNCY|ISA1214|Transposase|Ferroplasma DDRIGNALLCTNVWHNLFNMGRY- 46 | ISTvo2.2|ISNCY|ISA1214|Transposase|Thermoplasma NDRIETSIFIKGLWHNLMFMNG-- 47 | // 48 | -------------------------------------------------------------------------------- /data/models/msa/ISNCY_ISA1214/ISNCY_ISA1214_cut.sto: -------------------------------------------------------------------------------- 1 | # STOCKHOLM 1.0 2 | ISA1214-1.2|ISNCY|ISA1214|Transposase|Archaeoglobus/146-286 GDFSGDGTGYSLTITKHYRSNPKR------------------------KGKDFRYVFRIIDIDTGMYVGFGYSDRSEKDAFEKALGMLKSMGVKVNSISLDKYYSSRKTLRLFDAETAVYVIPKRNLARIG-FDWLRVIERIVEAPYRFLKRYFKRNLSEAGFSAD 3 | ISA1214-6.2|ISNCY|ISA1214|Transposase|Archaeoglobus/144-284 GEFSGDGTGYSLAITKHYRSNPKK------------------------KGKDFRYVFRIIDLETGMYVGFGYSAKSEKDAFEKAMKMLKSMGVKVNSISLDKYYSTRKTLRMFDAETSVYVIPKRNLSRIG-FDWLRVIERIVEMPYRFLKRYFKRNLSEAGFSAD 4 | ISFac3.2|ISNCY|ISA1214|Transposase|Ferroplasma/147-311 SDATGDGTGYSLTVKKNYESYAQRLKDLAKENKEIGKEEHKDKKSKGHRKRLFAYSFSIMDLRTRMYIASGTSMKSERNAYDNAMRIVNKIGINIDSIRLDRYYSSPSYVDKL-GDTKVYIIPKKNSTLHGSHKWKSIIREFLNDTMNYLEKYHKRSNSESGFAAD 5 | ISTvo2.2|ISNCY|ISA1214|Transposase|Thermoplasma/169-315 SNACRDRTGYSLTVSDHHRSVREKLGES-------------------VKHGKYRYSFAIMDLKTRMYIGYASFVRSESDAYQKASRIISDLGIQINSVRLNK*YSGQSILGDFSDNTRIFIIPKKNSRIHGKKRWRDIIASFMSDPMEFLKEYFKRNNSEAGFSSD 6 | // 7 | -------------------------------------------------------------------------------- /data/models/msa/ISNCY_ISC1217/ISNCY_ISC1217.sto: -------------------------------------------------------------------------------- 1 | # STOCKHOLM 1.0 2 | 3 | #=GS ISC1205.1|ISNCY|ISC1217|Transposase|Sulfolobus DE sp. 4 | #=GS ISSis4.1|ISNCY|ISC1217|Transposase|Sulfolobus DE islandicus 5 | #=GS ISSto10.1|ISNCY|ISC1217|Transposase|Sulfolobus DE tokodaii 6 | #=GS ISSto15.1|ISNCY|ISC1217|Transposase|Sulfolobus DE tokodaii 7 | 8 | ISC1205.1|ISNCY|ISC1217|Transposase|Sulfolobus MKSEKELDIARTEFIKSFNYLIGT--LRMNGLRRKVAVGLALMTLIGGRA 9 | ISSis4.1|ISNCY|ISC1217|Transposase|Sulfolobus MNPEKELDIARSEFIKSFNYLIGT--LRMNGLSRKVAVGLALMTLIGGRA 10 | ISSto10.1|ISNCY|ISC1217|Transposase|Sulfolobus MNTNSLL----QEYYKTLQEALQQIFTALTSVRKDTLTRLVLGGVMGGTA 11 | ISSto15.1|ISNCY|ISC1217|Transposase|Sulfolobus MNTNSLL----QEYYKALREALQQIFTALTSVRKDTLTRLVLGGVMGGTA 12 | 13 | ISC1205.1|ISNCY|ISC1217|Transposase|Sulfolobus SIRNASITFKLNYANLLKTLENLENTWRDYLEAL-SKVIIGPVVVIIDDT 14 | ISSis4.1|ISNCY|ISC1217|Transposase|Sulfolobus SIRNASITFKLNYANLLKTLENLENTWSDYLEAL-SKVIVGPVVVIIDDT 15 | ISSto10.1|ISNCY|ISC1217|Transposase|Sulfolobus TEI--AQAVDMDYETVLKNLDKLANINL--IKIVKEIVKDHPVQLIIDDT 16 | ISSto15.1|ISNCY|ISC1217|Transposase|Sulfolobus TEI--AQAVDMDYETVLKNLDKLANINL--IKIVKEIVKDHPVQLIIDDT 17 | 18 | ISC1205.1|ISNCY|ISC1217|Transposase|Sulfolobus FDHKLYSRVEGIASKYGNYFAWCSTHKRFEPGIQVLTIALYDLAMGKSYL 19 | ISSis4.1|ISNCY|ISC1217|Transposase|Sulfolobus FDHKLYSRVEGIASKYGNYLAWCSTHKRFEPGIQVLTIALYDLAMGKSYL 20 | ISSto10.1|ISNCY|ISC1217|Transposase|Sulfolobus HNHKARAL---PVSRNGTQAFYCREHKRYEPAIQLLLITLKDLRTNEAYI 21 | ISSto15.1|ISNCY|ISC1217|Transposase|Sulfolobus HNHKARAL---PVSRNGTQAFYCREHKRYEPAIQLLIIAIKDLYTNETYI 22 | 23 | ISC1205.1|ISNCY|ISC1217|Transposase|Sulfolobus IGAFPYATRKM----WESGMVSEFKTKIEMAAEIIEILKERFHVARVVFD 24 | ISSis4.1|ISNCY|ISC1217|Transposase|Sulfolobus IGAFPYATRKM----WESGMVSEFKTKIEMAAEIIEVLKERFHVARVVFD 25 | ISSto10.1|ISNCY|ISC1217|Transposase|Sulfolobus VTIIPYIPQKVAEILKERGEKAEYKTKIQLYLETLPTILNEYNVTTISFD 26 | ISSto15.1|ISNCY|ISC1217|Transposase|Sulfolobus ITIIPYIPQKVAEILKERGEKAEYKTKIQLYLETLPTILNEYNVTTISFD 27 | 28 | ISC1205.1|ISNCY|ISC1217|Transposase|Sulfolobus SWYWSEKLVKGSVVSELKSNRRLIRVRPLEGGKTLGVEGHPHVGDLPPGS 29 | ISSis4.1|ISNCY|ISC1217|Transposase|Sulfolobus SWYWSEKLVKGSVVSELKSNRRLLRVRPLEGEKTLGVEGHPHVGDLPPGS 30 | ISSto10.1|ISNCY|ISC1217|Transposase|Sulfolobus SWYVNSKTLLPNTTGELKANSRV-----VEGGR------HVPVAEFPEGE 31 | ISSto15.1|ISNCY|ISC1217|Transposase|Sulfolobus SWYVNSKTLLPNTIGELKANSRV-----VEGDR------HVPVAEFPEGE 32 | 33 | ISC1205.1|ISNCY|ISC1217|Transposase|Sulfolobus YLAELTLGDQVITIKLLI-LVYKD-NRLNLYTTDLNLSDEEIEATWKIRW 34 | ISSis4.1|ISNCY|ISC1217|Transposase|Sulfolobus YLAELTLGDQVITIKLLI-LVYKD-NRLNLYSTDLNLSDEEIEVTWKIRW 35 | ISSto10.1|ISNCY|ISC1217|Transposase|Sulfolobus YLVEYLGT----PIKLLVIDNYKGMGKRYFFSTNTNDTPEDIITTWENRW 36 | ISSto15.1|ISNCY|ISC1217|Transposase|Sulfolobus YLVEYLGT----PIKLLVIDNYKDMGKRYFFSTNTKDTPEDIITTWENRW 37 | 38 | ISC1205.1|ISNCY|ISC1217|Transposase|Sulfolobus EIEKLHKDIKALGMQDSSFLKRKRLQGYLLLFVMVVNAA----RDLVTSL 39 | ISSis4.1|ISNCY|ISC1217|Transposase|Sulfolobus EIEKLHKDIKALGMQDSSFLKRKRLQGYLLLFVMAVNTV----RDLISSL 40 | ISSto10.1|ISNCY|ISC1217|Transposase|Sulfolobus DIEVVIRELKALGLEKSSFLTWVRNKGFITLKSLSLLLVLSFKYSLGLSL 41 | ISSto15.1|ISNCY|ISC1217|Transposase|Sulfolobus DIEVVIRELKALGLEKSSFLTWIRNKGFITLKALSLLLVLSFKYSLGLRL 42 | 43 | ISC1205.1|ISNCY|ISC1217|Transposase|Sulfolobus NLKSVEELRRFVEIRLGGALGLMKIFKLR--- 44 | ISSis4.1|ISNCY|ISC1217|Transposase|Sulfolobus NLKSVEELLRFVEIRLGGALGLMKIFKLR--- 45 | ISSto10.1|ISNCY|ISC1217|Transposase|Sulfolobus GAKRISRMIKSIYQSLG---GIKKLFKRRKKT 46 | ISSto15.1|ISNCY|ISC1217|Transposase|Sulfolobus GAKRISRMIKSIYQSLG---GIKKLFKRRKKT 47 | // 48 | -------------------------------------------------------------------------------- /data/models/msa/ISNCY_ISC1217/ISNCY_ISC1217_cut.sto: -------------------------------------------------------------------------------- 1 | # STOCKHOLM 1.0 2 | ISC1205.1|ISNCY|ISC1217|Transposase|Sulfolobus/92-302 VIIDDTFDHKLYSRVEGIASKYGNYFAWCSTHKRFEPGIQVLTIALYDLAMGKSYLIGAFPYATRKM----WESGMVSEFKTKIEMAAEIIEILKERFHVARVVFDSWYWSEKLVKGSVVSELKSNRRLIRVRPLEGGKTLGVEGHPHVGDLPPGSYLAELTLGDQVITIKLLI-LVYKD-NRLNLYTTDLNLSDEEIEATWKIRWEIEKLHKDIKA 3 | ISSis4.1|ISNCY|ISC1217|Transposase|Sulfolobus/92-302 VIIDDTFDHKLYSRVEGIASKYGNYLAWCSTHKRFEPGIQVLTIALYDLAMGKSYLIGAFPYATRKM----WESGMVSEFKTKIEMAAEIIEVLKERFHVARVVFDSWYWSEKLVKGSVVSELKSNRRLLRVRPLEGEKTLGVEGHPHVGDLPPGSYLAELTLGDQVITIKLLI-LVYKD-NRLNLYSTDLNLSDEEIEVTWKIRWEIEKLHKDIKA 4 | ISSto10.1|ISNCY|ISC1217|Transposase|Sulfolobus/87-285 LIIDDTHNHKARAL---PVSRNGTQAFYCREHKRYEPAIQLLLITLKDLRTNEAYIVTIIPYIPQKVAEILKERGEKAEYKTKIQLYLETLPTILNEYNVTTISFDSWYVNSKTLLPNTTGELKANSRV-----VEGGR------HVPVAEFPEGEYLVEYLGT----PIKLLVIDNYKGMGKRYFFSTNTNDTPEDIITTWENRWDIEVVIRELKA 5 | ISSto15.1|ISNCY|ISC1217|Transposase|Sulfolobus/87-285 LIIDDTHNHKARAL---PVSRNGTQAFYCREHKRYEPAIQLLIIAIKDLYTNETYIITIIPYIPQKVAEILKERGEKAEYKTKIQLYLETLPTILNEYNVTTISFDSWYVNSKTLLPNTIGELKANSRV-----VEGDR------HVPVAEFPEGEYLVEYLGT----PIKLLVIDNYKDMGKRYFFSTNTKDTPEDIITTWENRWDIEVVIRELKA 6 | // 7 | -------------------------------------------------------------------------------- /data/models/msa/ISNCY_ISLbi1/ISNCY_ISLbi1.sto: -------------------------------------------------------------------------------- 1 | # STOCKHOLM 1.0 2 | 3 | #=GS ISBj12.1|ISNCY|ISLbi1|Transposase|Bradyrhizobium DE japonicum 4 | #=GS ISBli15.1|ISNCY|ISLbi1|Transposase|Brevibacterium DE linens 5 | #=GS ISBli16.1|ISNCY|ISLbi1|Transposase|Brevibacterium DE linens 6 | #=GS ISKol11.1|ISNCY|ISLbi1|Transposase|Kosmotoga DE olearia 7 | #=GS ISLbi1.1|ISNCY|ISLbi1|Transposase|Leptospira DE biflexa 8 | 9 | ISBj12.1|ISNCY|ISLbi1|Transposase|Bradyrhizobium MGWLSMA----TRKELTAAAGVRYRRSDRAKKARILDEFVDITGFHRKHA 10 | ISBli15.1|ISNCY|ISLbi1|Transposase|Brevibacterium --MVDKDVSMNTRIEITKKYATAYAKAGRQQKSAILDAVTDITGWNRDHA 11 | ISBli16.1|ISNCY|ISLbi1|Transposase|Brevibacterium ---------MATRAEITTKYAREYKKASKKNKGTVLNEVMAVTGWTRDNA 12 | ISKol11.1|ISNCY|ISLbi1|Transposase|Kosmotoga ------------MKNVVKDYSKRYRKARKKEKSEILDEFTRVTKYNRSYA 13 | ISLbi1.1|ISNCY|ISLbi1|Transposase|Leptospira ---MSMKQSKMVRSMLVQVFKEKYFWASKKEKSLILDQFVEATGFNRSYA 14 | 15 | ISBj12.1|ISNCY|ISLbi1|Transposase|Bradyrhizobium MRLLRNQEGV------H-----PGRRARRRIYNEAEHTALVLLWEASDRI 16 | ISBli15.1|ISNCY|ISLbi1|Transposase|Brevibacterium RQQLMRRTRQPKGRANATVAVIDRRKTKARKYSYDAIKILQYVWSVAGGI 17 | ISBli16.1|ISNCY|ISLbi1|Transposase|Brevibacterium RRRLTQAAKHPPGRGKQVA--SHPRQPRARKYSYDAVKILQRVWAISGGQ 18 | ISKol11.1|ISNCY|ISLbi1|Transposase|Kosmotoga SFLLRGALKK------RKATSPSQKKGRKKKYDHKVFVKLVKIWEIMDFP 19 | ISLbi1.1|ISNCY|ISLbi1|Transposase|Leptospira RTVLRKKKDN------LIKL--RPRKKRLSNYDDDVRFYLEKIWEILDRI 20 | 21 | ISBj12.1|ISNCY|ISLbi1|Transposase|Bradyrhizobium CGKRLKALMPALIEAMERYGHLDLA-----PEIRTKLLAMSAATIDRALV 22 | ISBli15.1|ISNCY|ISLbi1|Transposase|Brevibacterium CGKYLAQAMVDLLNSLEAHNHLVPGQGRYSTNVRDELVSMSPATIDRYLA 23 | ISBli16.1|ISNCY|ISLbi1|Transposase|Brevibacterium CGKYLHTTMRILLDLLEAHNELTLGQDRYTKKVRKELLAMSPATIDRYLK 24 | ISKol11.1|ISNCY|ISLbi1|Transposase|Kosmotoga CGKRLEAVMDEVIDNLVRNGHLTLA-----EETKRKLLSISASTIDRLLS 25 | ISLbi1.1|ISNCY|ISLbi1|Transposase|Leptospira CGKRLVMALPDVLSKLEQFKVFKID-----KTTKEKLLSISSATVDRLLK 26 | 27 | ISBj12.1|ISNCY|ISLbi1|Transposase|Bradyrhizobium RVREKLGRKRRRHA--GHSLRRSIPIRTSADWNDPAPGFVEADLVAHSGP 28 | ISBli15.1|ISNCY|ISLbi1|Transposase|Brevibacterium PARARDTLRGKSATKPGTLLRNSIQVRKAGDEVEAEPGFFEVDTVAHCGP 29 | ISBli16.1|ISNCY|ISLbi1|Transposase|Brevibacterium PVRARDAINGISTTKPGSLLRSAITIRRAGDDVEGEPGFFEGDTVAHCGP 30 | ISKol11.1|ISNCY|ISLbi1|Transposase|Kosmotoga SERKKMELKGRSHTKPGTLLKKHIRIKTHYEWDDTRPGFVEIDLVGHDGG 31 | ISLbi1.1|ISNCY|ISLbi1|Transposase|Leptospira PARKKLGRKGTSTTKQPKYLIDRIPIKTFGEWKSSLPGFVQIDLVAHNGG 32 | 33 | ISBj12.1|ISNCY|ISLbi1|Transposase|Bradyrhizobium SARGSFIQTLVLTDIATGWTECAPLIVREQTLVSTVLTELRKQLPFALLG 34 | ISBli15.1|ISNCY|ISLbi1|Transposase|Brevibacterium TLKGEFIRSVNYTDMHTGWVYTRAVKNNAAVHIVAACTHFVEAVPYLVTG 35 | ISBli16.1|ISNCY|ISLbi1|Transposase|Brevibacterium VESGEFARTLNLTDFHTGWTFTRTVRNNAHTNILAGLKTAAREIPFQITG 36 | ISKol11.1|ISNCY|ISLbi1|Transposase|Kosmotoga SVSGDFCYSLNMVDVASGWSVVAPIRNKAQIWTLKAIIQLRKTLPFTLLG 37 | ISLbi1.1|ISNCY|ISLbi1|Transposase|Leptospira NVFGGFYSTLAATDVCTGWTVCILVKDKTQFQMLKALIKLKKILPFPLLG 38 | 39 | ISBj12.1|ISNCY|ISLbi1|Transposase|Bradyrhizobium FDTDNDTVFMNETLKAYCEAANIVFTRCRPYRKNDQAFVEQKNGAVVRRM 40 | ISBli15.1|ISNCY|ISLbi1|Transposase|Brevibacterium LDFDNGSEFINHDLIDWAAQRKIFFTRGRPYTKNDQATIESKNNHLVRRY 41 | ISBli16.1|ISNCY|ISLbi1|Transposase|Brevibacterium LDFDNGSEFLNQYVIEWAGSKGIYFTRSRPYKKNDQATIESKNNHVVRRY 42 | ISKol11.1|ISNCY|ISLbi1|Transposase|Kosmotoga IHSDNGSEFINRHLYRYCEDEGLLFTRTRSYNKNDNCHVEQKNWSVVRRA 43 | ISLbi1.1|ISNCY|ISLbi1|Transposase|Leptospira IHSDNGAEFINQTILTYAERNDIQFTRGRPYKKNDNPHIEQKNYSVVRRN 44 | 45 | ISBj12.1|ISNCY|ISLbi1|Transposase|Bradyrhizobium VGYRRFEGLEAAKLLAELYRSARLFVNFFQPSFKLLAKQRD-GARVRKTY 46 | ISBli15.1|ISNCY|ISLbi1|Transposase|Brevibacterium GFYYRYDTTTELGLMTTLWALVNDRLNYFTPTKKPTGYSTDSVGRRKRVY 47 | ISBli16.1|ISNCY|ISLbi1|Transposase|Brevibacterium GFYYRYDTDLERRALNRLWRLVNDRVNYLMPTIKPTGYGATKNGRRKRVY 48 | ISKol11.1|ISNCY|ISLbi1|Transposase|Kosmotoga VGYYRYDTEEEFQILKELYASLNLYNNHFQPNQKIVEKIRK-GNKVSKKY 49 | ISLbi1.1|ISNCY|ISLbi1|Transposase|Leptospira TGYLRIENQSQADIIRSLYQDLNTYNNYFLPVMILKEKHRI-GSKAIRRY 50 | 51 | ISBj12.1|ISNCY|ISLbi1|Transposase|Bradyrhizobium SAPATPHQRLSADARTPDAVRHHLQEIYTALDPVTLLRDIRDVQERLAAL 52 | ISBli15.1|ISNCY|ISLbi1|Transposase|Brevibacterium DTPRTPFVRLLDSGILNRKQVAELRAYKAGLDPVHIAAEIDRIQQRLIKL 53 | ISBli16.1|ISNCY|ISLbi1|Transposase|Brevibacterium DRPRTPFDRLLDSGVLAPKQVKDMTAYRNSLNPARIAAEIARVQDRLLVL 54 | ISKol11.1|ISNCY|ISLbi1|Transposase|Kosmotoga DRPTTPYERIMRSPWVDQDKKDRLRRQHEALDIYKLKSIITHLQEQLLSI 55 | ISLbi1.1|ISNCY|ISLbi1|Transposase|Leptospira DEAKSPYRRILARKDISKTIKASMKKIYEKLNIFELKNQVNHWQNEIVKI 56 | 57 | ISBj12.1|ISNCY|ISLbi1|Transposase|Bradyrhizobium ADIQPSAHPAAASQSIDRFLASLRTAWKDGATRPTDRPIVKAKRGRRRPD 58 | ISBli15.1|ISNCY|ISLbi1|Transposase|Brevibacterium AAGKTARMEREIE------AKQALPD-SSGIRVRPSR-------VG---- 59 | ISBli16.1|ISNCY|ISLbi1|Transposase|Brevibacterium SGKKTEQMYLASF------P-SALPDVRKGVRVKTG-------------- 60 | ISKol11.1|ISNCY|ISLbi1|Transposase|Kosmotoga QIDK----------SKGGIL-NYVPLNFK--------------------- 61 | ISLbi1.1|ISNCY|ISLbi1|Transposase|Leptospira AAPIRNPIDKVKVRRKKGIV-HTIPKWRREVNSDTKNPFLERQRV----E 62 | 63 | ISBj12.1|ISNCY|ISLbi1|Transposase|Bradyrhizobium PLIRATSDLRKWFEAEPWRTGSELLSRLQVEYPGDYPNKLLRTLQRRLKS 64 | ISBli15.1|ISNCY|ISLbi1|Transposase|Brevibacterium -------------------------------------------------- 65 | ISBli16.1|ISNCY|ISLbi1|Transposase|Brevibacterium -------------------------------------------------- 66 | ISKol11.1|ISNCY|ISLbi1|Transposase|Kosmotoga -------------------------------------------------- 67 | ISLbi1.1|ISNCY|ISLbi1|Transposase|Leptospira EMRRA--------AEQVWAKRK---------------------------- 68 | 69 | ISBj12.1|ISNCY|ISLbi1|Transposase|Bradyrhizobium WRSEQANALLFASEKMPPGHEVTTPQ 70 | ISBli15.1|ISNCY|ISLbi1|Transposase|Brevibacterium -------------------------- 71 | ISBli16.1|ISNCY|ISLbi1|Transposase|Brevibacterium -------------------------- 72 | ISKol11.1|ISNCY|ISLbi1|Transposase|Kosmotoga -------------------------- 73 | ISLbi1.1|ISNCY|ISLbi1|Transposase|Leptospira -------------------------- 74 | // 75 | -------------------------------------------------------------------------------- /data/models/msa/ISNCY_ISLbi1/ISNCY_ISLbi1_cut.sto: -------------------------------------------------------------------------------- 1 | # STOCKHOLM 1.0 2 | ISBj12.1|ISNCY|ISLbi1|Transposase|Bradyrhizobium/168-277 VEADLVAHSGPSARGSFIQTLVLTDIATGWTECAPLIVREQTLVSTVLTELRKQLPFALLGFDTDNDTVFMNETLKAYCEAANIVFTRCRPYRKNDQAFVEQKNGAVVRR 3 | ISBli15.1|ISNCY|ISLbi1|Transposase|Brevibacterium/188-297 FEVDTVAHCGPTLKGEFIRSVNYTDMHTGWVYTRAVKNNAAVHIVAACTHFVEAVPYLVTGLDFDNGSEFINHDLIDWAAQRKIFFTRGRPYTKNDQATIESKNNHLVRR 4 | ISBli16.1|ISNCY|ISLbi1|Transposase|Brevibacterium/179-288 FEGDTVAHCGPVESGEFARTLNLTDFHTGWTFTRTVRNNAHTNILAGLKTAAREIPFQITGLDFDNGSEFLNQYVIEWAGSKGIYFTRSRPYKKNDQATIESKNNHVVRR 5 | ISKol11.1|ISNCY|ISLbi1|Transposase|Kosmotoga/167-276 VEIDLVGHDGGSVSGDFCYSLNMVDVASGWSVVAPIRNKAQIWTLKAIIQLRKTLPFTLLGIHSDNGSEFINRHLYRYCEDEGLLFTRTRSYNKNDNCHVEQKNWSVVRR 6 | ISLbi1.1|ISNCY|ISLbi1|Transposase|Leptospira/174-283 VQIDLVAHNGGNVFGGFYSTLAATDVCTGWTVCILVKDKTQFQMLKALIKLKKILPFPLLGIHSDNGAEFINQTILTYAERNDIQFTRGRPYKKNDNPHIEQKNYSVVRR 7 | // 8 | -------------------------------------------------------------------------------- /data/models/msa/ISNCY_ISM1/ISNCY_ISM1.sto: -------------------------------------------------------------------------------- 1 | # STOCKHOLM 1.0 2 | 3 | #=GS ISM1.1|ISNCY|ISM1|Transposase|Methanobrevibacter DE smithii 4 | #=GS ISMac19.1|ISNCY|ISM1|Transposase|Methanosarcina DE acetivorans 5 | #=GS ISMbu2.1|ISNCY|ISM1|Transposase|Methanococcoides DE burtonii 6 | #=GS ISMma11.1|ISNCY|ISM1|Transposase|Methanosarcina DE mazei 7 | #=GS ISMst1.1|ISNCY|ISM1|Transposase|Methanosphaera DE stadtmanae 8 | 9 | ISM1.1|ISNCY|ISM1|Transposase|Methanobrevibacter --MTKQNKSALNSNIDFIQLKLYDFFDEKIDQEKIISKRLNKTPN-FENT 10 | ISMac19.1|ISNCY|ISM1|Transposase|Methanosarcina --------------MVTINLTLNNFSELHA-----LLDVFGCFGN----- 11 | ISMbu2.1|ISNCY|ISM1|Transposase|Methanococcoides MVMTPNITSTFASHIDSIQLKLTDCFSGITTFEKAIISHFSSTGKRNIRN 12 | ISMma11.1|ISNCY|ISM1|Transposase|Methanosarcina --------------MVTINLTLNNFSELPS-----LLDVFGCFGN----- 13 | ISMst1.1|ISNCY|ISM1|Transposase|Methanosphaera --MKTQISNSLKDNMSSIQLKLNDFGLKDP--IKKNTEELFKTKN-TRPI 14 | 15 | ISM1.1|ISNCY|ISM1|Transposase|Methanobrevibacter NHKLFLDENNTF-KYDNPICPVCGS----HKIIKKGTIKKNKQNTNGKTT 16 | ISMac19.1|ISNCY|ISM1|Transposase|Methanosarcina --DYEYTTRGIFRRKIPPVCSICNTPM-----VHNGYNP----HTKDGLG 17 | ISMbu2.1|ISNCY|ISM1|Transposase|Methanococcoides EKELVLHADNHF-DLLHPQCPVCGS----NKINKQEYYTRKLKLAEFGSQ 18 | ISMma11.1|ISNCY|ISM1|Transposase|Methanosarcina --DYEYTTQGIFRRKIPPACSICSTPM-----VHNGYNP----HTKQGLG 19 | ISMst1.1|ISNCY|ISM1|Transposase|Methanosphaera NQEYVYLNDNHF-EYDNPTCSHCHKKHEKHKVIKKGFRTRKVRTTNKTKI 20 | 21 | ISM1.1|ISNCY|ISM1|Transposase|Methanobrevibacter EFKEQQYQCKKCGKKFGIYNNPLIGENKQFLQEIMDKIPGIMKIGYQSLR 22 | ISMac19.1|ISNCY|ISM1|Transposase|Methanosarcina EINIGRYKCSNCGSTHEEDYSFWEDMKTLLFDTFND-FFQLLRYHNVSYD 23 | ISMbu2.1|ISNCY|ISM1|Transposase|Methanococcoides IIHVRRYYCKKCSKRFTTPLDPIVKKGHQYARTYEQYIEDSYETGYCSFR 24 | ISMma11.1|ISNCY|ISM1|Transposase|Methanosarcina EINIGRYKCSNCGRTYEEDYSFWGDLKALLFDSFND-FFKLLRYHKVSYD 25 | ISMst1.1|ISNCY|ISM1|Transposase|Methanosphaera TIFLRRYQCKTCGKKFQTELSWLYDKNKRYTKQFFELIDKIMEFRNIPLS 26 | 27 | ISM1.1|ISNCY|ISM1|Transposase|Methanobrevibacter KISKYFEIFLGIRISHQTIKNWSDKNHEES----------------ISNE 28 | ISMac19.1|ISNCY|ISM1|Transposase|Methanosarcina GISDLMDFIYPRSRSTIFRAFYKEMEQETI-------------------P 29 | ISMbu2.1|ISNCY|ISM1|Transposase|Methanococcoides HLQKIFSSLYDCSPSHQTIYNWIRKSNKEV----------------TSEA 30 | ISMma11.1|ISNCY|ISM1|Transposase|Methanosarcina GISDIMEFIYPRSRSTILRAFYKEMEQETV-------------------P 31 | ISMst1.1|ISNCY|ISM1|Transposase|Methanosphaera LLQHIINVVLNTNINLQTLEFWIKIKNKFSRNKEYLKIELIKDKNMIINH 32 | 33 | ISM1.1|ISNCY|ISM1|Transposase|Methanobrevibacter KFEYSGYYLYDEQFLRLNGTRHYRLTLFDAILNIPVTERIVRR------R 34 | ISMac19.1|ISNCY|ISM1|Transposase|Methanosarcina YSENIHMVHYDEQHPKEGRCQKYRLTLLDAKAQRTIADELFKD------K 35 | ISMbu2.1|ISNCY|ISM1|Transposase|Methanococcoides TKEYSGYYCYDEQYLKLNGRRFYRLSLIDSIFNKPVEEMIVTN------L 36 | ISMma11.1|ISNCY|ISM1|Transposase|Methanosarcina FSENIHMVHYDEQHPKEGRCQKYRLTLLDAKTQTTIADDLFDD------K 37 | ISMst1.1|ISNCY|ISM1|Transposase|Methanosphaera NIIGSGIYNYDEQYIKINGKKYYRLTLYDYSKDQPIAEQIIKKEWNKKSL 38 | 39 | ISM1.1|ISNCY|ISM1|Transposase|Methanobrevibacter IPKNTKKFILESTENKPFICLTTDLFPMYRNVADEI---EVKHQLCIFHL 40 | ISMac19.1|ISNCY|ISM1|Transposase|Methanosarcina SPETIKKFLKKNLDASEPVFIVTDFDKRYPDILKEIFRDKLVHQYCLMHL 41 | ISMbu2.1|ISNCY|ISM1|Transposase|Methanococcoides EYDTVKDFIEKATSNRLIYAISTDHRIKYKSIMDKL---EIKHQLCIFYL 42 | ISMma11.1|ISNCY|ISM1|Transposase|Methanosarcina SPETIKEFLRKNLDASEPVFIVTDFDKRYPDILKEIFGDKLVHQYCLMHL 43 | ISMst1.1|ISNCY|ISM1|Transposase|Methanosphaera SSKTIEEFIKTATKERPFKALITDGKKQYNEIAKKL---RVQHQTCIFHA 44 | 45 | ISM1.1|ISNCY|ISM1|Transposase|Methanobrevibacter FQTINHKLKVYCR-----------------RNKINGKQRD-----HIYE- 46 | ISMac19.1|ISNCY|ISM1|Transposase|Methanosarcina NKLIVNDFPKNTTIEQELLKYRLLSIFYNRENEIKFLQKLQSEELNVINN 47 | ISMbu2.1|ISNCY|ISM1|Transposase|Methanococcoides YKLIGNDVFKKLK-----------------SKFVSYREKI-----NMCL- 48 | ISMma11.1|ISNCY|ISM1|Transposase|Methanosarcina NKLIVSDFPKNTTIEQELLKYRLLNIFYNRENEIKFLEELLSEELNVINN 49 | ISMst1.1|ISNCY|ISM1|Transposase|Methanosphaera IKYIKDETKKYLR-----------------SKTLSTLDKM-----TIAN- 50 | 51 | ISM1.1|ISNCY|ISM1|Transposase|Methanobrevibacter ----------NAQELKNCFRQN--------------SKKEAIEQFKQYLQ 52 | ISMac19.1|ISNCY|ISM1|Transposase|Methanosarcina KEKHQEWIKKAKKEFCQYRHKLKLEIRRKKENLPRNSLEKAKYNFDKLME 53 | ISMbu2.1|ISNCY|ISM1|Transposase|Methanococcoides ----------TFTEIKEIFRTY--------------DVSIAINRLQNLID 54 | ISMma11.1|ISNCY|ISM1|Transposase|Methanosarcina EEKHQEWSKKAKKEFNQFRRKLKLERRRKKENLPLNSLEKARDNFDKLME 55 | ISMst1.1|ISNCY|ISM1|Transposase|Methanosphaera ----------QTSQICQIYREL--------------SLYDTYKTLNELKD 56 | 57 | ISM1.1|ISNCY|ISM1|Transposase|Methanobrevibacter KYTAIPVVLKDFIRKHIINHFHRYVEYLDDENIEKTSNKVENYYRQTNPE 58 | ISMac19.1|ISNCY|ISM1|Transposase|Methanosarcina NIRTYDEKVQKRL-WMINKHWLNLTLFHYLPGAPATNNPIESYYSKSLKT 59 | ISMbu2.1|ISNCY|ISM1|Transposase|Methanococcoides GIKKIPYALYRSI-DKITRDFNRLTLFMVDGFVSKTTNPIENYYRQTLPN 60 | ISMma11.1|ISNCY|ISM1|Transposase|Methanosarcina NIRTYDQTIQKRL-WMINKHWLNLTLFHYLPGAPATNNPIESYYSKSLKT 61 | ISMst1.1|ISNCY|ISM1|Transposase|Methanosphaera IENQLLKPIKKILNTTVENNINKIITHHLYSEIPRTNNAVEQYYRNSLPK 62 | 63 | ISM1.1|ISNCY|ISM1|Transposase|Methanobrevibacter KIKKIYKTKNGILTFLDYQMQNWTEKH------------------- 64 | ISMac19.1|ISNCY|ISM1|Transposase|Methanosarcina DNKKKFRTDKGIENRIKLTQMRRLNLLKKPQKSFMELFRLFSPFKL 65 | ISMbu2.1|ISNCY|ISM1|Transposase|Methanococcoides QLKRIFKTPE------------------------------------ 66 | ISMma11.1|ISNCY|ISM1|Transposase|Methanosarcina DNKKQFRTDKGIGNQIKLTQMRRLNLLKKPQKSFLELFRLFNPFKL 67 | ISMst1.1|ISNCY|ISM1|Transposase|Methanosphaera SKKNKKRTIDGVLTTIATEMMKKFKNTKEK---------------- 68 | // 69 | -------------------------------------------------------------------------------- /data/models/msa/ISNCY_ISM1/ISNCY_ISM1_cut.sto: -------------------------------------------------------------------------------- 1 | # STOCKHOLM 1.0 2 | ISM1.1|ISNCY|ISM1|Transposase|Methanobrevibacter/184-365 YLYDEQFLRLNGTRHYRLTLFDAILNIPVTERIVRR------RIPKNTKKFILESTENKPFICLTTDLFPMYRNVADEI---EVKHQLCIFHLFQTINHKLKVYCR-----------------RNKINGKQRD-----HIYE-----------NAQELKNCFRQN--------------SKKEAIEQFKQYLQKYTAIPVVLKDFIRKHIINHFHRYVEYLDDENIEKTSNKVENYYR 3 | ISMac19.1|ISNCY|ISM1|Transposase|Methanosarcina/153-383 VHYDEQHPKEGRCQKYRLTLLDAKAQRTIADELFKD------KSPETIKKFLKKNLDASEPVFIVTDFDKRYPDILKEIFRDKLVHQYCLMHLNKLIVNDFPKNTTIEQELLKYRLLSIFYNRENEIKFLQKLQSEELNVINNKEKHQEWIKKAKKEFCQYRHKLKLEIRRKKENLPRNSLEKAKYNFDKLMENIRTYDEKVQKRL-WMINKHWLNLTLFHYLPGAPATNNPIESYYS 4 | ISMbu2.1|ISNCY|ISM1|Transposase|Methanococcoides/187-367 YCYDEQYLKLNGRRFYRLSLIDSIFNKPVEEMIVTN------LEYDTVKDFIEKATSNRLIYAISTDHRIKYKSIMDKL---EIKHQLCIFYLYKLIGNDVFKKLK-----------------SKFVSYREKI-----NMCL-----------TFTEIKEIFRTY--------------DVSIAINRLQNLIDGIKKIPYALYRSI-DKITRDFNRLTLFMVDGFVSKTTNPIENYYR 5 | ISMma11.1|ISNCY|ISM1|Transposase|Methanosarcina/153-383 VHYDEQHPKEGRCQKYRLTLLDAKTQTTIADDLFDD------KSPETIKEFLRKNLDASEPVFIVTDFDKRYPDILKEIFGDKLVHQYCLMHLNKLIVSDFPKNTTIEQELLKYRLLNIFYNRENEIKFLEELLSEELNVINNEEKHQEWSKKAKKEFNQFRRKLKLERRRKKENLPLNSLEKARDNFDKLMENIRTYDQTIQKRL-WMINKHWLNLTLFHYLPGAPATNNPIESYYS 6 | ISMst1.1|ISNCY|ISM1|Transposase|Methanosphaera/202-389 YNYDEQYIKINGKKYYRLTLYDYSKDQPIAEQIIKKEWNKKSLSSKTIEEFIKTATKERPFKALITDGKKQYNEIAKKL---RVQHQTCIFHAIKYIKDETKKYLR-----------------SKTLSTLDKM-----TIAN-----------QTSQICQIYREL--------------SLYDTYKTLNELKDIENQLLKPIKKILNTTVENNINKIITHHLYSEIPRTNNAVEQYYR 7 | // 8 | -------------------------------------------------------------------------------- /data/models/msa/ISNCY_ISMae2/ISNCY_ISMae2.sto: -------------------------------------------------------------------------------- 1 | # STOCKHOLM 1.0 2 | 3 | #=GS ISAcif1.1|ISNCY|ISMae2|Transposase|Acidithiobacillus DE ferrivorans 4 | #=GS ISAtsp9.1|ISNCY|ISMae2|Transposase|Arthrospira DE sp. 5 | #=GS ISMae2.1|ISNCY|ISMae2|Transposase|Microcystis DE aeruginosa 6 | 7 | ISAcif1.1|ISNCY|ISMae2|Transposase|Acidithiobacillus MACPVVSPQQALRMPEQIAVLRALARDFPDNR-TGKNCTYRMEDALLGA- 8 | ISAtsp9.1|ISNCY|ISMae2|Transposase|Arthrospira ----------------LVGSFRQCLSSLPDKR-RGKNRRYGMEDAALSVR 9 | ISMae2.1|ISNCY|ISMae2|Transposase|Microcystis ----MAQKAATLEISELMQFLRQELDDLPDERKPGNNRKYEVEDAVMAA- 10 | 11 | ISAcif1.1|ISNCY|ISMae2|Transposase|Acidithiobacillus FSVFYTQCPSFLTHQIAMSETRGMSNAQTLFGMRDIPSDNHIRNLLDPVD 12 | ISAtsp9.1|ISNCY|ISMae2|Transposase|Arthrospira ERVFFTQTPSFLAYQRLMEGSKGKSNAQSLFGVHRIPCDHQIRDLLDAVS 13 | ISMae2.1|ISNCY|ISMae2|Transposase|Microcystis FSVFFTQSPSFLDHQRLMKSNKGKDNAESLFSIKKIPGDNQIRNLLDPVP 14 | 15 | ISAcif1.1|ISNCY|ISMae2|Transposase|Acidithiobacillus PQHIYPAFERTFNALNDTGQLDTFRMVNGQLLIALDGTSYHQSHAIHCPQ 16 | ISAtsp9.1|ISNCY|ISMae2|Transposase|Arthrospira PEHLFPVFEEILQVLEAQGQLEDFRCLGDSLLVALDGTEYFSSDKIHCPH 17 | ISMae2.1|ISNCY|ISMae2|Transposase|Microcystis ASNVSRTFQKVYEWLKEKGVLKKFLYLDGEMLVALDGTEYFSSKKINCPH 18 | 19 | ISAcif1.1|ISNCY|ISMae2|Transposase|Acidithiobacillus CTVTEHKKGGTSYTHTVVTPVVVSTAHNRVIPLEPEFVTPQDGHAKQDCE 20 | ISAtsp9.1|ISNCY|ISMae2|Transposase|Arthrospira CSTRTQKSGKTHYFHSVVTPVIVCPGQNHVIPLVPEFIVPQDGHDKQDCE 21 | ISMae2.1|ISNCY|ISMae2|Transposase|Microcystis CNCRNHRNGTTTYFHGCVTPMVVSPKQKQVINLEPEFIKKQDGHQKQDCE 22 | 23 | ISAcif1.1|ISNCY|ISMae2|Transposase|Acidithiobacillus SAAAHRWITAYAERYRHLHVTLLGDDLYSRQPMCEEMLRAGFDFILVCKP 24 | ISAtsp9.1|ISNCY|ISMae2|Transposase|Arthrospira NAAAKRWLSRQEQCLRALNVKVLGDDLYCHQPLCQQFLEQQLNFIVVCRP 25 | ISMae2.1|ISNCY|ISMae2|Transposase|Microcystis NAAVKRWLNRNHKNKYGYPVTILGDDLYSHEPVCELAVKQGYNFIFVGLE 26 | 27 | ISAcif1.1|ISNCY|ISMae2|Transposase|Acidithiobacillus ASHTTLYAELERREHTGNVSTFSMTRREGKKTFTDTYRFAESLPLRAGKD 28 | ISAtsp9.1|ISNCY|ISMae2|Transposase|Arthrospira ESHTTLYEHLEGIEL----PTLTTKKWTGQVEKTYTYRYLNGVPLKDSDE 29 | ISMae2.1|ISNCY|ISMae2|Transposase|Microcystis TSHKTLYEWLEFLEKSGEVRTVEKKQWDGRKNLIYRYRYASRVPLREGDS 30 | 31 | ISAcif1.1|ISNCY|ISMae2|Transposase|Acidithiobacillus ALMVNWCELKSTSA-EGKTVYHNAFATTHTLHPSNVAEIVKAGRARWRIE 32 | ISAtsp9.1|ISNCY|ISMae2|Transposase|Arthrospira ALLVNWCELTVTTA-DGQVTYHNSFATNYPLSDENVAEVVRAGLTRWKVE 33 | ISMae2.1|ISNCY|ISMae2|Transposase|Microcystis SLEVNWCEVTVINEKTQKTIYQNNWITNHKITENNVEEIVKAGRSRWKVE 34 | 35 | ISAcif1.1|ISNCY|ISMae2|Transposase|Acidithiobacillus NGNNNTLKTKGYNLSHNFGHGQKYLSATLATLNLLAFLLHTVQELTDFTY 36 | ISAtsp9.1|ISNCY|ISMae2|Transposase|Arthrospira NENNNTLKTKGYHLEHNFGHGKQHLSSLLATLNILSLLFHTLLELLDNKY 37 | ISMae2.1|ISNCY|ISMae2|Transposase|Microcystis NEGNNVLKNHGYNLEHNFGHGQNHLCEFLLSLNLLAFLFHTVLDLVNYTY 38 | 39 | ISAcif1.1|ISNCY|ISMae2|Transposase|Acidithiobacillus RTLRDRLPTRKAFFEHVRVLTQYHCFESFAALLEFMLSGLNRGRRDSG 40 | ISAtsp9.1|ISNCY|ISMae2|Transposase|Arthrospira QLLRAHLPTRQTFFNDLRALTQTVR----------------------- 41 | ISMae2.1|ISNCY|ISMae2|Transposase|Microcystis QKIRELLVTRTSSLMIFVP---Y------------------------- 42 | // 43 | -------------------------------------------------------------------------------- /data/models/msa/ISNCY_ISMae2/ISNCY_ISMae2_cut.sto: -------------------------------------------------------------------------------- 1 | # STOCKHOLM 1.0 2 | ISAcif1.1|ISNCY|ISMae2|Transposase|Acidithiobacillus/131-348 IALDGTSYHQSHAIHCPQCTVTEHKKGGTSYTHTVVTPVVVSTAHNRVIPLEPEFVTPQDGHAKQDCESAAAHRWITAYAERYRHLHVTLLGDDLYSRQPMCEEMLRAGFDFILVCKPASHTTLYAELERREHTGNVSTFSMTRREGKKTFTDTYRFAESLPLRAGKDALMVNWCELKSTSA-EGKTVYHNAFATTHTLHPSNVAEIVKAGRARWRIEN 3 | ISAtsp9.1|ISNCY|ISMae2|Transposase|Arthrospira/116-329 VALDGTEYFSSDKIHCPHCSTRTQKSGKTHYFHSVVTPVIVCPGQNHVIPLVPEFIVPQDGHDKQDCENAAAKRWLSRQEQCLRALNVKVLGDDLYCHQPLCQQFLEQQLNFIVVCRPESHTTLYEHLEGIEL----PTLTTKKWTGQVEKTYTYRYLNGVPLKDSDEALLVNWCELTVTTA-DGQVTYHNSFATNYPLSDENVAEVVRAGLTRWKVEN 4 | ISMae2.1|ISNCY|ISMae2|Transposase|Microcystis/128-346 VALDGTEYFSSKKINCPHCNCRNHRNGTTTYFHGCVTPMVVSPKQKQVINLEPEFIKKQDGHQKQDCENAAVKRWLNRNHKNKYGYPVTILGDDLYSHEPVCELAVKQGYNFIFVGLETSHKTLYEWLEFLEKSGEVRTVEKKQWDGRKNLIYRYRYASRVPLREGDSSLEVNWCEVTVINEKTQKTIYQNNWITNHKITENNVEEIVKAGRSRWKVEN 5 | // 6 | -------------------------------------------------------------------------------- /data/models/msa/ISNCY_ISPlu15/ISNCY_ISPlu15.sto: -------------------------------------------------------------------------------- 1 | # STOCKHOLM 1.0 2 | 3 | #=GS ISPlu15.1|ISNCY|ISPlu15|Transposase|Photorhabdus DE luminescens 4 | #=GS ISPlu22.1|ISNCY|ISPlu15|Transposase|Photorhabdus DE luminescens 5 | #=GS ISVsa17.1|ISNCY|ISPlu15|Transposase|Aliivibrio DE salmonicida 6 | 7 | ISPlu15.1|ISNCY|ISPlu15|Transposase|Photorhabdus MKRKNTPTPHDAIFKKFLSHIDTARDFLEIHLPATLRAVCDLDTLRLESG 8 | ISPlu22.1|ISNCY|ISPlu15|Transposase|Photorhabdus MAKKEKRPHHDGLFKYFLTQPETAREFLSLYLPEEIQLLCDLATLKLEPG 9 | ISVsa17.1|ISNCY|ISPlu15|Transposase|Aliivibrio MSKKNTTTPHDGLFKAFLTTPDTAKDMLEIHLPTHLKTLCDLSTLKLESG 10 | 11 | ISPlu15.1|ISNCY|ISPlu15|Transposase|Photorhabdus SFIEDNLRVHYSDILYSLKTTQGESYVYCVIEHQSSPDKMMAFRLMRYSI 12 | ISPlu22.1|ISNCY|ISPlu15|Transposase|Photorhabdus SFVDEHLRQLHSDVLYSVETARGQGYIYCLIEHQSTPDPLMAWRLMYYAM 13 | ISVsa17.1|ISNCY|ISPlu15|Transposase|Aliivibrio SFLEDDLRPYYSDVLYSMKTECGDGYIYALIEHQSSPDEHMAFRMFRYAI 14 | 15 | ISPlu15.1|ISNCY|ISPlu15|Transposase|Photorhabdus SAMQWHLEQGHKKLPLVIPVLFYHGKIRPYPWSTNWFDCFDASALAEEIY 16 | ISPlu22.1|ISNCY|ISPlu15|Transposase|Photorhabdus LAMAAHLKKGHTELPLVAPLLFYHGEIRPYPYSNRWLDCFTLPEQAARLY 17 | ISVsa17.1|ISNCY|ISPlu15|Transposase|Aliivibrio AAMQKHLDAGNKDLPLVVPLLFYHGKTSPYPYSMNWLDCFTKPEMAKALY 18 | 19 | ISPlu15.1|ISNCY|ISPlu15|Transposase|Photorhabdus SSAFPLVDVTVIPDDEILTHKRVALLEIVQKHIRQRDMAELQQELTMLFA 20 | ISPlu22.1|ISNCY|ISPlu15|Transposase|Photorhabdus RQAFPLVDVSVLSDEEILTHKGVALMELVQKHIRCRDMQEWLLQLVELLN 21 | ISVsa17.1|ISNCY|ISPlu15|Transposase|Aliivibrio NNDFPLVDVTVMDDSEIMQHKRIALLELVQKHIYQKDINDILESLAILLL 22 | 23 | ISPlu15.1|ISNCY|ISPlu15|Transposase|Photorhabdus YDYYTYELLKSMLNYILLVGDTADPEGFIRQLAEQFPKYEEVLMTIAQKL 24 | ISPlu22.1|ISNCY|ISPlu15|Transposase|Photorhabdus AGYNTTEQLNVVLLYILLNGHTLDLSHFVHQLIEQSPEHETMLMTIAEQL 25 | ISVsa17.1|ISNCY|ISPlu15|Transposase|Aliivibrio NDYHTDSQVRTLIEYLVTVGETQNVNTLLEELAQQVPKHEGTLMTIAEQL 26 | 27 | ISPlu15.1|ISNCY|ISPlu15|Transposase|Photorhabdus QHKGHQEGLKEGLQKCQDAREEGLQEGLQKGEKKGEKKGEKKGEEKGEKR 28 | ISPlu22.1|ISNCY|ISPlu15|Transposase|Photorhabdus EQKGLERGIKQ-----------------------GIELGREEGREEGREE 29 | ISVsa17.1|ISNCY|ISPlu15|Transposase|Aliivibrio ILQGEQKGLQQ---------------GLQQGRQEGEQKGRQEGRQEGRQD 30 | 31 | ISPlu15.1|ISNCY|ISPlu15|Transposase|Photorhabdus ASLKIARALMDNGIDRETIMKSTGLSQNELEQIHH 32 | ISPlu22.1|ISNCY|ISPlu15|Transposase|Photorhabdus GKLETACALLRHGVSLDIIVTSTGLSRDKIEALKH 33 | ISVsa17.1|ISNCY|ISPlu15|Transposase|Aliivibrio TLKEMARNLLLSGVDKDAIMKATGFSSRELELISH 34 | // 35 | -------------------------------------------------------------------------------- /definitions.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | ROOT_DIR = os.path.dirname(os.path.abspath(__file__)) 4 | HMM_MODELS = os.path.join(ROOT_DIR, "data", "models", "hmm", "hmm_all_subfams.hmm") 5 | OUTLIERS = os.path.join(ROOT_DIR, "data", "models", "fasta", "outliers.fasta") 6 | ISFINDER_ORF_DB = os.path.join(ROOT_DIR, "data", "Blast_db", "isfinder_orf") 7 | ISFINDER_IS_DB = os.path.join(ROOT_DIR, "data", "Blast_db", "isfinder_is") 8 | CONTEXT_SIZE_ORF = 1600 9 | CONTEXT_SIZE_IS = 14000 10 | MIN_GB_OVERLAP = 100 11 | MAX_MERGE_DISTANCE = 700 12 | MIN_HIT_LENGTH = 150 13 | CURRATED_CUTOFF = False 14 | OUTLIERS_EVALUE = 0.001 15 | 16 | BLASTN_GAPOPEN = 5 17 | BLASTN_GAPEXTEND = 2 18 | BLASTN_WORDSIZE = 11 19 | BLASTN_EVALUE = 0.001 20 | 21 | BLASTX_GAPOPEN = 11 22 | BLASTX_GAPEXTEND = 1 23 | BLASTX_WORDSIZE = 3 24 | BLASTX_EVALUE = 0.001 25 | 26 | INTRAFAMILY_ORF_SIM_THRESHOLD = 0.45 27 | INTERFAMILY_ORF_SIM_THRESHOLD = 0.25 28 | 29 | INTRAFAMILY_DNA_SIM_THRESHOLD = 0.70 30 | INTERFAMILY_DNA_SIM_THRESHOLD = 0.50 31 | 32 | NUM_THREADS = 4 33 | MIN_EVAL_OVERLAP = 100 34 | 35 | FASTA_EXTENSIONS = [".fasta", ".fna", ".ffn"] 36 | GENBANK_EXTENSIONS = [".gb", ".genbank", ".gbff"] 37 | 38 | IS_GB_KEYWORDS = ['transposase', 'insertion element', 'mobile element', 'transposon', 'transposable element', 'DDE', 'resolvase', 'recombinase', 'recombination/resolution'] 39 | 40 | IS_FAMILIES_NAMES = ['IS1', 'IS110', 'IS1182', 'IS1380', 'IS1595', 'IS1634', 'IS21', 'IS256', 41 | 'IS3', 'IS30', 'IS4', 'IS481', 'IS5', 'IS6', 'IS66', 'IS607', 'IS630', 'IS701', 'IS91', 42 | 'IS982', 'ISAs1', 'ISAzo13', 'ISH3', 'ISH6', 'ISKra4', 'ISL3', 'ISLre2', 'Tn3', 'ISNCY'] 43 | 44 | IS_SUBFAMILIES_NAMES = ['ISMhu11', 'IS2', 'IS51', 'IS150', 'IS407', 'IS4', 'IS4Sa', 'IS10', 'IS231', 45 | 'IS4Sa', 'IS50', 'ISH8', 'ISPepr1', 'IS5', 'IS427', 'IS903', 'IS1031', 'ISH1', 'ISL2', 46 | 'ISBst12', 'IS1111', 'IS200', 'IS605', 'IS1249', 'ISC1250', 'ISAba11', 'IS942', 'IS1016', 47 | 'ISH4', 'ISNha5', 'ISNwi1', 'ISPna2', 'ISSod11', 'ISAzba1', 'ISMich2' ] 48 | 49 | HYPOTHETICAL_GB_KEYWORDS = ['hypothetical protein', 'predicted protein', 'unknown'] 50 | 51 | NEUTRAL_GB_KEYWORDS = ['dispersed repetitive unit', 'Tn-like element'] 52 | -------------------------------------------------------------------------------- /digIS_search.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | from src.search_tool.digISConfiguration import digISConfiguration 4 | from src.search_tool.digISMultifasta import digISMultifasta 5 | 6 | 7 | if __name__ == "__main__": 8 | parser = argparse.ArgumentParser(description="digIS search") 9 | 10 | parser.add_argument('-i', "--input", action='store', dest='input_fasta', required=True, 11 | help='Input fasta file, nucleotide') 12 | 13 | parser.add_argument('-g', "--genbank", action='store', dest='genbank_file', required=False, default=None, 14 | help='Genbank annotations for genome in the input fasta file.') 15 | 16 | parser.add_argument('-o', "--output", action='store', dest='output_dir', required=False, default="digIS_output", 17 | type=str, help='Output directory name, default=digIS_output.') 18 | 19 | args = parser.parse_args() 20 | 21 | digIS_conf = digISConfiguration(genome_file=args.input_fasta, 22 | genbank_file=args.genbank_file, 23 | output_dir=args.output_dir) 24 | 25 | digIS = digISMultifasta(digIS_conf) 26 | digIS.run() 27 | 28 | -------------------------------------------------------------------------------- /digis_docker_wrapper.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ########################################################################################### 4 | # 5 | # sudo docker run -v $(pwd)/data/test_data/NC_002608.fasta:/digis_data/NC_002608.fasta 6 | # -v $(pwd)/data/test_data/NC_002608.gb:/digis_data/NC_002608.gb 7 | # -v $(pwd)/digis_output:/digis_output digis 8 | # -i /digis_data/NC_002608.fasta 9 | # -g /digis_data/NC_002608.gb 10 | ########################################################################################### 11 | 12 | export INPUT_FASTA 13 | export GENBANK 14 | export OUTPUTDIR="digis_output" 15 | 16 | while getopts g:i:o: option 17 | do 18 | case "${option}" 19 | in 20 | g) GENBANK=${OPTARG};; 21 | i) INPUT_FASTA=${OPTARG};; 22 | o) OUTPUTDIR=${OPTARG};; 23 | \?) echo "Invalid option: -"$OPTARG"" >&2 24 | exit 1;; 25 | : ) echo "Option -"$OPTARG" requires an argument." >&2 26 | exit 1;; 27 | esac 28 | done 29 | 30 | CONTAINER_ID=$(cat /dev/urandom | tr -dc 'a-z0-9A-Z' | fold -w 8 | head -n 1) 31 | DOCKER_DATA="/digis_data" 32 | DOCKER_CONTAINER_NAME="digis_${CONTAINER_ID}" 33 | echo $DOCKER_CONTAINER_NAME 34 | DOCKER_RUN="sudo docker run --name ${DOCKER_CONTAINER_NAME}" 35 | DOCKER_IMAGE_NAME="janka2012/digis" 36 | DOCKER_OUTPUTDIR="/digis_output" 37 | DOCKER_MOUNT="" 38 | DOCKER_PARAMS="" 39 | 40 | [ -z ${INPUT_FASTA} ] && { echo "INPUT_FASTA is unset. This argument is mandatory."; exit 2; } || echo "INPUT_FASTA is set to '$INPUT_FASTA'" 41 | [ -z ${GENBANK} ] && echo "GENBANK is unset" || echo "GENBANK is set to '$GENBANK'" 42 | [ -z ${OUTPUTDIR} ] && echo "OUTPUTDIR is unset" || echo "OUTPUTDIR is set to '$OUTPUTDIR'" 43 | 44 | # mandatory 45 | if [ ! -z ${INPUT_FASTA} ] 46 | then 47 | input_fasta_fullpath=$(echo $(cd $(dirname "$INPUT_FASTA") && pwd -P)/$(basename "$INPUT_FASTA")) 48 | DOCKER_MOUNT="${DOCKER_MOUNT} -v ${input_fasta_fullpath}:${DOCKER_DATA}/$(basename ${INPUT_FASTA})" 49 | DOCKER_PARAMS="${DOCKER_PARANS} -i ${DOCKER_DATA}/$(basename $INPUT_FASTA)" 50 | else 51 | echo "INPUT_FASTA is unset. This argument is mandatory." 52 | exit 2 53 | fi 54 | 55 | # optional 56 | if [ ! -z ${GENBANK} ] 57 | then 58 | genbank_fullpath=$(echo $(cd $(dirname "$GENBANK") && pwd -P)/$(basename "$GENBANK")) 59 | DOCKER_MOUNT="${DOCKER_MOUNT} -v ${genbank_fullpath}:${DOCKER_DATA}/$(basename ${GENBANK})" 60 | DOCKER_PARAMS="${DOCKER_PARAMS} -g ${DOCKER_DATA}/$(basename ${GENBANK})" 61 | fi 62 | 63 | # mounting output directory 64 | outputdir_fullpath=$(echo $(cd $(dirname "$OUTPUTDIR") && pwd -P)/$(basename "$OUTPUTDIR")) 65 | DOCKER_MOUNT="${DOCKER_MOUNT} -v ${outputdir_fullpath}:${DOCKER_OUTPUTDIR}" 66 | 67 | # adding script params 68 | DOCKER_PARAMS="${DOCKER_PARAMS} -o ${DOCKER_OUTPUTDIR}" 69 | 70 | # creating full command 71 | DOCKER_CMD=$DOCKER_RUN$DOCKER_MOUNT" "$DOCKER_IMAGE_NAME" python3 digIS_search.py"$DOCKER_PARAMS 72 | 73 | # run full command 74 | eval $DOCKER_CMD 75 | 76 | # copy output from docker container into host 77 | sudo docker cp ${DOCKER_CONTAINER_NAME}:${DOCKER_OUTPUTDIR} ${outputdir_fullpath} 78 | 79 | # remove stopped container 80 | sudo docker container rm $DOCKER_CONTAINER_NAME 81 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | biopython==1.73 2 | numpy=1.16.2 3 | 4 | -------------------------------------------------------------------------------- /src/blast/Blast.py: -------------------------------------------------------------------------------- 1 | import os 2 | import tempfile 3 | 4 | from abc import ABC 5 | from abc import abstractmethod 6 | from copy import copy 7 | from Bio import SeqIO 8 | from Bio.Blast import NCBIXML 9 | 10 | from ..blast.BlastHspFlat import BlastHspFlat 11 | from ..blast.BlastQuery import BlastQuery 12 | from ..common.sequence import prepare_flank_sequences 13 | 14 | 15 | class Blast(ABC): 16 | 17 | def __init__(self, query, database="", subject="", output=None, remove_query=False): 18 | self.query = query 19 | self.subject = subject 20 | self.database = database 21 | self.best_hit = BlastHspFlat() 22 | self.best_hits = {} 23 | self.query_hits = [] 24 | self.remove_query = remove_query 25 | 26 | if output: 27 | self.output = output 28 | self.remove_output = False 29 | else: 30 | self.fd, self.output = tempfile.mkstemp(prefix="digIS_", suffix=".xml") 31 | self.remove_output = True 32 | 33 | @abstractmethod 34 | def search_database(self): 35 | raise NotImplementedError("Should have implemented this") 36 | 37 | @abstractmethod 38 | def search_subject(self): 39 | raise NotImplementedError("Should have implemented this") 40 | 41 | @classmethod 42 | def from_seqrec(cls, qrec, database, output=None): 43 | fd, query = tempfile.mkstemp(prefix="digIS_", suffix=".fasta") 44 | SeqIO.write(qrec, query, "fasta") 45 | os.close(fd) 46 | return cls(query, database=database, output=output, remove_query=True) 47 | 48 | def parse(self): 49 | with open(self.output) as result_handle: 50 | try: 51 | blast_records = NCBIXML.parse(result_handle) 52 | self.query_hits = [] 53 | for record in blast_records: 54 | query_hits = BlastQuery.from_rec(record) 55 | self.query_hits.append(query_hits) 56 | except ValueError: 57 | print("XML file with blast results is empty") 58 | 59 | def get_best_hit(self): 60 | for rec in self.query_hits: 61 | hspflat = rec.get_best_hit() 62 | if hspflat.score > 0.0: 63 | self.best_hits[rec.query_id] = copy(hspflat) 64 | if self.best_hit < hspflat: 65 | self.best_hit = hspflat 66 | return self.best_hit 67 | 68 | @staticmethod 69 | def get_best_blast_hits_in_range(recs, search_engine, flank, database, min_overlap, positive_subject_strand_only): 70 | 71 | seq_recs, seq_ranges, seq_original_ranges = prepare_flank_sequences(recs, flank) 72 | 73 | # Do BLAST 74 | bl = search_engine.from_seqrec(seq_recs, database) 75 | bl.search_database() 76 | bl.parse() 77 | 78 | # Get best hits in original sequence range 79 | bl_bhits = [] 80 | for i, query in enumerate(bl.query_hits): 81 | bl_bhit = query.get_best_hit(seq_ranges[i], min_overlap, positive_subject_strand_only) 82 | seq_original_ranges[i].remap_offsets(bl_bhit.query_start, bl_bhit.query_end) 83 | bl_bhit.query_start = seq_original_ranges[i].start 84 | bl_bhit.query_end = seq_original_ranges[i].end 85 | bl_bhits.append(bl_bhit) 86 | 87 | return bl_bhits 88 | 89 | @staticmethod 90 | def get_best_blast_hits_in_full_range(recs, search_engine, flank, database, min_overlap, positive_subject_strand_only): 91 | 92 | seq_recs, seq_ranges, seq_original_ranges = prepare_flank_sequences(recs, flank) 93 | 94 | # Do BLAST 95 | bl = search_engine.from_seqrec(seq_recs, database) 96 | bl.search_database() 97 | bl.parse() 98 | 99 | # Get best hits in original sequence range 100 | bl_bhits = [] 101 | for i, query in enumerate(bl.query_hits): 102 | new_seq_ranges = (0, seq_ranges[i][1]) 103 | bl_bhit = query.get_best_hit(new_seq_ranges, min_overlap, positive_subject_strand_only) 104 | seq_original_ranges[i].remap_offsets(bl_bhit.query_start, bl_bhit.query_end) 105 | bl_bhit.query_start = seq_original_ranges[i].start 106 | bl_bhit.query_end = seq_original_ranges[i].end 107 | bl_bhits.append(bl_bhit) 108 | 109 | return bl_bhits 110 | 111 | @staticmethod 112 | def get_max_blast_hits_in_range(recs, search_engine, flank, database, min_overlap, positive_subject_strand_only): 113 | 114 | seq_recs, seq_ranges, seq_original_ranges = prepare_flank_sequences(recs, flank) 115 | 116 | # Do BLAST 117 | bl = search_engine.from_seqrec(seq_recs, database) 118 | bl.search_database() 119 | bl.parse() 120 | 121 | # Get max hit in original sequence range 122 | bl_mhits = [] 123 | for i, query in enumerate(bl.query_hits): 124 | query_start, query_end = query.get_max_hit(seq_ranges[i], min_overlap, positive_subject_strand_only) 125 | if not (query_start == 0 and query_end == 0): 126 | seq_original_ranges[i].remap_offsets(query_start, query_end) 127 | query_start = seq_original_ranges[i].start 128 | query_end = seq_original_ranges[i].end 129 | bl_mhits.append((query_start, query_end)) 130 | 131 | return bl_mhits 132 | 133 | 134 | def print_best_hits(self): 135 | print('\n'.join(list(str(i) for i in self.best_hits.values()))) 136 | 137 | def __del__(self): 138 | if self.remove_output and os.path.exists(self.output): 139 | os.close(self.fd) 140 | os.remove(self.output) 141 | if self.remove_query and os.path.exists(self.query): 142 | os.remove(self.query) 143 | 144 | def __str__(self): 145 | return '\n'.join(list(str(i) for i in self.query_hits)) 146 | -------------------------------------------------------------------------------- /src/blast/BlastDB.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from subprocess import call 4 | from Bio import SeqIO 5 | 6 | from ...definitions import ROOT_DIR 7 | 8 | 9 | class BlastDB: 10 | def __init__(self, fasta_file, db_name, dir_path="", db_type="prot"): 11 | self.fasta_file = fasta_file 12 | self.dir_path = dir_path 13 | self.db_name = db_name.replace(" ", "_") 14 | self.db_type = db_type 15 | self.db_output_dir = os.path.join(ROOT_DIR, "data", "Blast_db") 16 | self.db_output_file = os.path.join(self.db_output_dir, self.db_name) 17 | 18 | def make_db(self): 19 | if os.path.exists(self.fasta_file): 20 | call(["makeblastdb", "-in", self.fasta_file, "-parse_seqids", 21 | "-dbtype", self.db_type, "-out", self.db_output_file]) 22 | else: 23 | raise FileExistsError("File {} does not exists.".format(self.fasta_file)) 24 | 25 | def prepare_fasta_file(self, out_fasta): 26 | all_records = [] 27 | 28 | for fasta_file in self.__get_fasta_files(): 29 | collected_records = self.__collect_fasta_records_from_file(fasta_file) 30 | all_records.extend(collected_records) 31 | SeqIO.write(all_records, out_fasta, "fasta") 32 | 33 | def __get_fasta_files(self): 34 | if os.path.exists(self.dir_path) and os.path.isdir(self.dir_path): 35 | for (dirpath, dirnames, filenames) in os.walk(self.dir_path): 36 | for filename in filenames: 37 | file = os.path.join(dirpath, filename) 38 | if os.path.exists(file) and file.endswith(".fasta"): 39 | yield file 40 | 41 | def __collect_fasta_records_from_file(self, file): 42 | formatted_records = [] 43 | for rec in SeqIO.parse(file, "fasta"): 44 | self.__format_record_header(rec) 45 | formatted_records.append(rec) 46 | return formatted_records 47 | 48 | def __format_record_header(self, record): 49 | record.id = "gnl|" + self.db_name + "|" + record.description.replace("|", ";") 50 | record.description = "" 51 | -------------------------------------------------------------------------------- /src/blast/BlastHit.py: -------------------------------------------------------------------------------- 1 | from copy import copy 2 | 3 | from ..blast.BlastHsp import BlastHsp 4 | 5 | 6 | class BlastHit: 7 | def __init__(self, subject_id, subject_length, hsps): 8 | self.subject_id = subject_id 9 | self.subject_length = subject_length 10 | self.hsps = hsps 11 | 12 | @classmethod 13 | def from_rec(cls, rec): 14 | subject_id = rec.hit_id 15 | subject_length = rec.length 16 | hsps = [] 17 | for hsp in rec.hsps: 18 | hsps.append(BlastHsp.from_rec(hsp)) 19 | return cls(subject_id, subject_length, hsps) 20 | 21 | def merge_hsps(self, max_space): 22 | hsps = copy(self.hsps) 23 | change = True 24 | merged_idx = [] 25 | while change: 26 | change = False 27 | merged = [] 28 | for i, hi in enumerate(hsps): 29 | for j, hj in enumerate(hsps): 30 | if i != j and not (i, j) in merged_idx \ 31 | and hi.query_end <= hj.query_start \ 32 | and hi.subject_end <= hj.subject_start \ 33 | and hi.subject_strand == hj.subject_strand \ 34 | and hj.query_start - hi.query_end + 1 <= max_space \ 35 | and hj.subject_start - hi.subject_end + 1 <= max_space: 36 | merged.append(hi+hj) 37 | change = True 38 | merged_idx.append((i, j)) 39 | hsps = hsps + merged 40 | return hsps 41 | 42 | def get_best_hsp(self, query_range=(0, 0), min_overlap=1, positive_subject_strand_only=False): 43 | bhsp = BlastHsp() 44 | 45 | for hsp in self.hsps: 46 | if positive_subject_strand_only and hsp.subject_strand == "-": 47 | continue 48 | if query_range == (0, 0): 49 | if bhsp < hsp: 50 | bhsp = hsp 51 | else: 52 | max_start = max(hsp.query_start, query_range[0]) 53 | min_end = min(hsp.query_end, query_range[1]) 54 | overlap_len = min_end - max_start + 1 55 | 56 | if bhsp < hsp and overlap_len >= min_overlap: 57 | bhsp = hsp 58 | return bhsp 59 | 60 | def get_max_hsp(self, query_range=(0, 0), min_overlap=1, positive_subject_strand_only=False): 61 | query_start, query_end = 0,0 62 | 63 | for hsp in self.hsps: 64 | if positive_subject_strand_only and hsp.subject_strand == "-": 65 | continue 66 | max_start = max(hsp.query_start, query_range[0]) 67 | min_end = min(hsp.query_end, query_range[1]) 68 | overlap_len = min_end - max_start + 1 69 | 70 | if query_range == (0, 0) or overlap_len >= min_overlap: 71 | if query_start == 0 and query_end == 0: 72 | query_start = hsp.query_start 73 | query_end = hsp.query_end 74 | else: 75 | query_start = min(query_start, hsp.query_start) 76 | query_end = max(query_end, hsp.query_end) 77 | 78 | return query_start, query_end 79 | 80 | def __str__(self): 81 | hsps = '\n'.join(list(str(i) for i in self.hsps)) 82 | return "{}, {}\n".format(self.subject_id, self.subject_length) + hsps 83 | -------------------------------------------------------------------------------- /src/blast/BlastHsp.py: -------------------------------------------------------------------------------- 1 | class BlastHsp: 2 | def __init__(self, query_start=0, query_end=0, subject_start=0, subject_end=0, 3 | subject_strand='+', score=0.0, identities=0, positives=0): 4 | self.query_start = query_start 5 | self.query_end = query_end 6 | self.subject_start = subject_start 7 | self.subject_end = subject_end 8 | self.subject_strand = subject_strand 9 | self.score = score 10 | self.identities = identities 11 | self.positives = positives 12 | 13 | @classmethod 14 | def from_rec(cls, hsp): 15 | if hsp.sbjct_start < hsp.sbjct_end: 16 | subject_strand = '+' 17 | subject_start, subject_end = hsp.sbjct_start, hsp.sbjct_end 18 | else: 19 | subject_strand = '-' 20 | subject_end, subject_start = hsp.sbjct_start, hsp.sbjct_end 21 | return cls(hsp.query_start, hsp.query_end, subject_start, subject_end, 22 | subject_strand, hsp.score, hsp.identities, hsp.positives) 23 | 24 | def __lt__(self, other): 25 | return self.score < other.score 26 | 27 | def __eq__(self, other): 28 | return self.score == other.score 29 | 30 | def __add__(self, other): 31 | return BlastHsp(self.query_start, other.query_end, self.subject_start, other.subject_end, self.subject_strand, 32 | self.score + other.score, self.identities + other.identities, self.positives + other.positives) 33 | 34 | def __str__(self): 35 | return "Sc: {:1.1f}, Id: {}, Pos: {}, QStart: {}, QEnd: {}, SStart: {}, SEnd: {}, SStrand: {}"\ 36 | .format(self.score, self.identities, self.positives, self.query_start, 37 | self.query_end, self.subject_start, self.subject_end, self.subject_strand) 38 | -------------------------------------------------------------------------------- /src/blast/BlastHspFlat.py: -------------------------------------------------------------------------------- 1 | class BlastHspFlat: 2 | 3 | def __init__(self): 4 | self.score = 0.0 5 | self.identities = 0 6 | self.positives = 0 7 | self.query_id = "" 8 | self.subject_id = "" 9 | self.query_start = 0 10 | self.query_end = 0 11 | self.subject_start = 0 12 | self.subject_end = 0 13 | self.subject_strand = '+' 14 | self.query_len = 0 15 | self.subject_len = 0 16 | self.query_identity = 0.0 17 | self.subject_identity = 0.0 18 | self.query_coverage = 0.0 19 | self.subject_coverage = 0.0 20 | self.shorter_identity = 0.0 21 | self.application = '' 22 | 23 | def set_from_hsp(self, hsp, query_id, query_len, subject_id, subject_len, application): 24 | self.score = hsp.score 25 | self.identities = hsp.identities 26 | self.positives = hsp.positives 27 | self.query_start = hsp.query_start 28 | self.query_end = hsp.query_end 29 | self.subject_start = hsp.subject_start 30 | self.subject_end = hsp.subject_end 31 | self.subject_strand = hsp.subject_strand 32 | self.query_id = query_id 33 | self.query_len = query_len 34 | self.subject_id = subject_id 35 | self.subject_len = subject_len 36 | self.application = application 37 | 38 | self.query_coverage = (self.query_end - self.query_start + 1) / self.query_len 39 | self.subject_coverage = (self.subject_end - self.subject_start + 1) / self.subject_len 40 | self.subject_identity = self.identities / self.subject_len 41 | 42 | if self.application == "BLASTX": 43 | self.query_identity = self.identities / int(self.query_len/3) 44 | self.shorter_identity = self.identities / min(int(self.query_len/3), self.subject_len) 45 | else: 46 | self.query_identity = self.identities / self.query_len 47 | self.shorter_identity = self.identities / min(self.query_len, self.subject_len) 48 | 49 | def __lt__(self, other): 50 | return self.score < other.score 51 | 52 | def __eq__(self, other): 53 | return self.score == other.score 54 | 55 | def __str__(self): 56 | return 'Qlen: {:4}, DBlen: {:4}, Score: {:6.1f}, Pos: {:4}, Id: {:4}, Qid: {:1.2f}, ' \ 57 | 'DBid: {:1.2f}, Qcov: {:1.2f}, DBcov: {:1.2f}, Qid: {}, DBid: {}'.format( 58 | self.query_len, self.subject_len, self.score, self.positives, self.identities, self.query_identity, 59 | self.subject_identity, self.query_coverage, self.subject_coverage, self.query_id, self.subject_id) 60 | -------------------------------------------------------------------------------- /src/blast/BlastN.py: -------------------------------------------------------------------------------- 1 | from Bio.Blast.Applications import NcbiblastnCommandline 2 | 3 | from definitions import BLASTN_GAPOPEN, BLASTN_GAPEXTEND, BLASTN_WORDSIZE, BLASTN_EVALUE, NUM_THREADS 4 | from ..blast.Blast import Blast 5 | from ..common.misc import check_evalue 6 | 7 | 8 | class BlastN(Blast): 9 | 10 | def __get_default_params(self): 11 | params = {'query' : self.query, 'outfmt' : 5, 'out' : self.output, 'gapopen' : BLASTN_GAPOPEN, 12 | 'gapextend' : BLASTN_GAPEXTEND, 'word_size' : BLASTN_WORDSIZE} 13 | 14 | if NUM_THREADS > 0: 15 | params['num_threads'] = NUM_THREADS 16 | 17 | return params 18 | 19 | def search_database(self, evalue=BLASTN_EVALUE): 20 | check_evalue(evalue) 21 | 22 | params = self.__get_default_params() 23 | params['db'] = self.database 24 | params['evalue'] = evalue 25 | 26 | cline = NcbiblastnCommandline(**params) 27 | cline() 28 | 29 | def search_subject(self, evalue=BLASTN_EVALUE): 30 | check_evalue(evalue) 31 | 32 | params = self.__get_default_params() 33 | params['subject'] = self.subject 34 | params['evalue'] = evalue 35 | 36 | cline = NcbiblastnCommandline(**params) 37 | cline() 38 | -------------------------------------------------------------------------------- /src/blast/BlastP.py: -------------------------------------------------------------------------------- 1 | from Bio.Blast.Applications import NcbiblastpCommandline 2 | from ..blast.Blast import Blast 3 | from ..common.misc import check_evalue 4 | 5 | 6 | class BlastP(Blast): 7 | 8 | def search_database(self, evalue=0.001): 9 | check_evalue(evalue) 10 | try: 11 | cline = NcbiblastpCommandline(query=self.query, db=self.database, evalue=evalue, outfmt=5, out=self.output) 12 | cline() 13 | except ValueError: 14 | print("query file is empty.") 15 | 16 | def search_subject(self, evalue=0.001): 17 | check_evalue(evalue) 18 | cline = NcbiblastpCommandline(query=self.query, subject=self.subject, evalue=evalue, outfmt=5, out=self.output) 19 | cline() 20 | -------------------------------------------------------------------------------- /src/blast/BlastQuery.py: -------------------------------------------------------------------------------- 1 | from ..blast.BlastHit import BlastHit 2 | from ..blast.BlastHsp import BlastHsp 3 | from ..blast.BlastHspFlat import BlastHspFlat 4 | 5 | 6 | class BlastQuery: 7 | def __init__(self, query_id, query_length, hits, application=''): 8 | self.query_id = query_id 9 | self.query_length = query_length 10 | self.hits = hits 11 | self.application = application 12 | 13 | @classmethod 14 | def from_rec(cls, rec): 15 | hits = [] 16 | for hit in rec.alignments: 17 | hits.append(BlastHit.from_rec(hit)) 18 | query_length = rec.query_length 19 | return cls(rec.query, query_length, hits, rec.application) 20 | 21 | def get_best_hit(self, query_range=(0, 0), min_overlap=1, positive_subject_strand_only=False): 22 | bhsp = BlastHsp() 23 | bhspflat = BlastHspFlat() 24 | for hit in self.hits: 25 | hsp = hit.get_best_hsp(query_range, min_overlap, positive_subject_strand_only) 26 | if bhsp < hsp: 27 | bhsp = hsp 28 | bhspflat.set_from_hsp(hsp, self.query_id, self.query_length, 29 | hit.subject_id, hit.subject_length, self.application) 30 | return bhspflat 31 | 32 | def get_max_hit(self, query_range=(0, 0), min_overlap=1, positive_subject_strand_only=False): 33 | query_start, query_end = 0,0 34 | 35 | for hit in self.hits: 36 | hit_query_start, hit_query_end = hit.get_max_hsp(query_range, min_overlap, positive_subject_strand_only) 37 | if hit_query_start == 0 and hit_query_end == 0: 38 | continue 39 | if query_start == 0 and query_end == 0: 40 | query_start, query_end = hit_query_start, hit_query_end 41 | else: 42 | query_start = min(query_start, hit_query_start) 43 | query_end = max(query_end, hit_query_end) 44 | 45 | return query_start, query_end 46 | 47 | 48 | def __str__(self): 49 | hits = '\n'.join(list(str(i) for i in self.hits)) 50 | return "{}, {}\n".format(self.query_id, self.query_length) + hits 51 | -------------------------------------------------------------------------------- /src/blast/BlastX.py: -------------------------------------------------------------------------------- 1 | from Bio.Blast.Applications import NcbiblastxCommandline 2 | 3 | from definitions import BLASTX_GAPOPEN, BLASTX_GAPEXTEND, BLASTX_WORDSIZE, BLASTX_EVALUE, NUM_THREADS 4 | from ..blast.Blast import Blast 5 | from ..common.misc import check_evalue 6 | 7 | 8 | class BlastX(Blast): 9 | 10 | def __get_default_params(self): 11 | params = {'query': self.query, 'outfmt': 5, 'out': self.output, 'gapopen': BLASTX_GAPOPEN, 12 | 'gapextend': BLASTX_GAPEXTEND, 'word_size': BLASTX_WORDSIZE} 13 | 14 | if NUM_THREADS > 0: 15 | params['num_threads'] = NUM_THREADS 16 | 17 | return params 18 | 19 | def search_database(self, evalue=BLASTX_EVALUE): 20 | check_evalue(evalue) 21 | 22 | params = self.__get_default_params() 23 | params['db'] = self.database 24 | params['evalue'] = evalue 25 | 26 | cline = NcbiblastxCommandline(**params) 27 | cline() 28 | 29 | def search_subject(self, evalue=BLASTX_EVALUE): 30 | check_evalue(evalue) 31 | 32 | params = self.__get_default_params() 33 | params['subject'] = self.subject 34 | params['evalue'] = evalue 35 | 36 | cline = NcbiblastxCommandline(**params) 37 | cline() 38 | -------------------------------------------------------------------------------- /src/common/Classifier.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | from definitions import IS_GB_KEYWORDS, HYPOTHETICAL_GB_KEYWORDS, IS_FAMILIES_NAMES, IS_SUBFAMILIES_NAMES, INTRAFAMILY_ORF_SIM_THRESHOLD, INTERFAMILY_ORF_SIM_THRESHOLD, INTRAFAMILY_DNA_SIM_THRESHOLD, NEUTRAL_GB_KEYWORDS 4 | 5 | 6 | class Classifier: 7 | 8 | def __init__(self, rec, gb_rec, orf_blast_hit, is_blast_hit): 9 | self.rec = rec 10 | self.genbank_recs = gb_rec 11 | self.blast_orf = orf_blast_hit 12 | self.blast_is_dna = is_blast_hit 13 | self.similarity_orf = None 14 | self.similarity_is = None 15 | self.genbank_annotation = None 16 | self.level = None 17 | self.kept = True 18 | 19 | def classify(self): 20 | self.__assign_overall_similarity_with_isfinderdb() 21 | if self.genbank_recs is not None: 22 | self.__clean_duplicit_gene_records() 23 | self.__assign_genbank_annotation() 24 | self.__assign_level() 25 | 26 | def __clean_duplicit_gene_records(self): 27 | """ 28 | Clean records that have gene and CDS at the same positions 29 | :return: 30 | """ 31 | 32 | out_idx = [] 33 | for i, ref_rec in enumerate(self.genbank_recs): 34 | discard = False 35 | if ref_rec.type == 'gene': 36 | for j, rec in enumerate(self.genbank_recs): 37 | if rec.type == 'CDS' and rec.start == ref_rec.start and rec.end == ref_rec.end: 38 | self.genbank_recs[j].qualifiers.update(ref_rec.qualifiers) 39 | discard = True 40 | if not discard: 41 | out_idx.append(i) 42 | 43 | new_recs = list(self.genbank_recs[i] for i in out_idx) 44 | self.genbank_recs = new_recs 45 | 46 | def __assign_genbank_annotation(self): 47 | if self.__no_genbank_annotation(): 48 | genbank_annotation = 'no' 49 | else: 50 | genbank_annotation = self.__classify_based_on_annotation() 51 | self.genbank_annotation = genbank_annotation 52 | 53 | def __no_genbank_annotation(self): 54 | out = True 55 | if len(self.genbank_recs) == 0: 56 | return out 57 | 58 | for rec in self.genbank_recs: 59 | gb_annots_product, gb_annots_note, _, is_pseudo = self.__get_genbank_annotations(rec.qualifiers) 60 | gb_annots = gb_annots_product + gb_annots_note 61 | 62 | if "functional annotations will be submitted" not in ",".join(gb_annots): 63 | out = False 64 | break 65 | return out 66 | 67 | def __classify_based_on_annotation(self): 68 | is_all_length = 0 69 | other_all_length = 0 70 | for rec in self.genbank_recs: 71 | gb_annots_product, gb_annots_note, gb_annots_pseudogene, is_pseudo = self.__get_genbank_annotations(rec.qualifiers) 72 | gb_annots = gb_annots_product + gb_annots_note 73 | overlap_length = rec.get_overlap_length(self.rec, ignore_strand=True) 74 | 75 | if rec.type in ['mobile_element', 'mobile_element_type']: 76 | if "truncated" in ",".join(gb_annots_note): 77 | pass 78 | else: 79 | is_all_length += overlap_length 80 | elif rec.type in ['repeat_region', 'CDS', 'gene', 'misc_feature'] \ 81 | and any(annot.lower() in ",".join(gb_annots) for annot in NEUTRAL_GB_KEYWORDS): 82 | pass 83 | elif rec.type in ['repeat_region', 'CDS', 'gene', 'misc_feature'] \ 84 | and any(annot.lower() in ",".join(gb_annots) for annot in IS_GB_KEYWORDS + IS_FAMILIES_NAMES + IS_SUBFAMILIES_NAMES): 85 | if is_pseudo and "incomplete" in ",".join(gb_annots_note): 86 | pass 87 | else: 88 | is_all_length += overlap_length 89 | elif rec.type in ['repeat_region']: 90 | pass 91 | elif rec.type in ['CDS', 'gene', 'misc_feature'] and 'integrase' in ",".join(gb_annots): 92 | if self.blast_orf.subject_identity >= INTRAFAMILY_ORF_SIM_THRESHOLD or self.blast_is_dna.subject_identity >= INTRAFAMILY_DNA_SIM_THRESHOLD: 93 | is_all_length += overlap_length 94 | elif rec.type in ['CDS', 'gene'] \ 95 | and any(annot.lower() in ",".join(gb_annots) for annot in HYPOTHETICAL_GB_KEYWORDS): 96 | pass 97 | elif re.match(r'DUF\d+', ",".join(gb_annots_product), re.M | re.I): 98 | pass 99 | elif rec.type == 'CDS' and not gb_annots_product and gb_annots_note: 100 | pass 101 | elif 'unknown' in gb_annots_pseudogene: 102 | pass 103 | else: 104 | other_all_length += overlap_length 105 | 106 | if (is_all_length >= 100) or (self.blast_orf.subject_identity >= 0.9 and self.blast_is_dna.subject_identity >= 0.9): 107 | out = 'is_related' 108 | elif other_all_length >= 100: 109 | out = 'other_record' 110 | else: 111 | out = 'no' 112 | 113 | return out 114 | 115 | def __get_genbank_annotations(self, gb_qualifiers): 116 | gb_annots_product = [] 117 | gb_annots_note = [] 118 | gb_annots_pseudogene = [] 119 | is_pseudo = False 120 | if 'product' in gb_qualifiers: 121 | gb_annots_product = list(map(str.lower, gb_qualifiers['product'])) 122 | if 'note' in gb_qualifiers: 123 | gb_annots_note = list(map(str.lower, gb_qualifiers['note'])) 124 | if 'pseudo' in gb_qualifiers: 125 | is_pseudo = True 126 | if 'pseudogene' in gb_qualifiers: 127 | gb_annots_note = list(map(str.lower, gb_qualifiers['pseudogene'])) 128 | 129 | return gb_annots_product, gb_annots_note, gb_annots_pseudogene, is_pseudo 130 | 131 | def __assign_overall_similarity_with_isfinderdb(self): 132 | self.similarity_is = self.__assign_similarity_level_dna() 133 | self.similarity_orf = self.__assign_similarity_level_orf() 134 | 135 | def __assign_similarity_level_dna(self): 136 | return self.blast_is_dna.subject_identity 137 | 138 | def __assign_similarity_level_orf(self): 139 | return self.blast_orf.subject_identity 140 | 141 | def __assign_level(self): 142 | if self.genbank_annotation == "is_related": 143 | self.level = "TP" 144 | elif self.genbank_annotation == "other_record": 145 | self.level = "FP" 146 | else: 147 | if self.similarity_orf > INTRAFAMILY_ORF_SIM_THRESHOLD: 148 | self.level = "TP" 149 | elif self.similarity_orf < INTERFAMILY_ORF_SIM_THRESHOLD: 150 | self.level = "FP" 151 | else: 152 | self.level = "pNov" 153 | 154 | @classmethod 155 | def get_csv_header(cls, verbose=False): 156 | if verbose: 157 | header = ['Genome', 'SeqID', 'Level', 'Annotation', 'Orf_Sim', 'IS_Sim', 'Str_Rec', 'Str_GB', 'Str_Orf', 'Str_IS', 'kept'] 158 | else: 159 | header = ["ORF_sim", "IS_sim", "GenBank_class"] 160 | return header 161 | 162 | def to_csv(self, verbose=False): 163 | if verbose: 164 | str_gb = "" 165 | if self.genbank_recs is not None: 166 | str_gb = '[' + ','.join(str(i) for i in self.genbank_recs) + ']' if len(self.genbank_recs) > 0 else "" 167 | str_bl_orf = str(self.blast_orf) if self.blast_orf.score != 0.0 else "" 168 | str_bl_is = str(self.blast_is_dna) if self.blast_is_dna.score != 0.0 else "" 169 | row = [self.rec.genome_name, self.rec.chr, self.level, self.genbank_annotation, self.similarity_orf, 170 | self.similarity_is, str(self.rec), str_gb, str_bl_orf, str_bl_is, self.kept] 171 | else: 172 | row = [self.similarity_orf, self.similarity_is, self.genbank_annotation] 173 | return row 174 | 175 | def __eq__(self, other): 176 | return self.level == other.level 177 | 178 | def __lt__(self, other): 179 | return ['FP', 'pNov', 'TP'].index(self.level) < ['FP', 'pNov', 'TP'].index(other.level) 180 | 181 | def __gt__(self, other): 182 | return ['FP', 'pNov', 'TP'].index(self.level) > ['FP', 'pNov', 'TP'].index(other.level) 183 | 184 | -------------------------------------------------------------------------------- /src/common/RangesHits.py: -------------------------------------------------------------------------------- 1 | class RangesHits: 2 | 3 | def __init__(self, queries, subjects, match, unbinded_query, unbinded_subject, query_no_overlap, query_low_overlap): 4 | self.queries = queries 5 | self.subjects = subjects 6 | self.match = match 7 | self.unbinded_query = unbinded_query 8 | self.unbinded_subject = unbinded_subject 9 | self.query_no_overlap = query_no_overlap 10 | self.query_low_overlap = query_low_overlap 11 | 12 | def get_subject_map(self): 13 | subject_map = [[]] * self.subjects 14 | subject_match = set(subject for query, subject in self.match) 15 | for k in subject_match: 16 | for i, j in self.match: 17 | if j == k: 18 | subject_map[j] = subject_map[j] + [i] 19 | return subject_map 20 | -------------------------------------------------------------------------------- /src/common/classification.py: -------------------------------------------------------------------------------- 1 | from copy import copy 2 | from ..blast.Blast import Blast 3 | from ..blast.BlastX import BlastX 4 | from ..blast.BlastN import BlastN 5 | from ..common.Classifier import Classifier 6 | from ..common.ranges import find_overlaps 7 | 8 | 9 | def get_recs_genbank_overlap_map(recs, genbank_records=None, min_gb_overlap=None, ignore_strand=False): 10 | hits = find_overlaps(genbank_records, recs, ignore_strand=ignore_strand, min_overlap=min_gb_overlap) 11 | subj_map = hits.get_subject_map() 12 | recs_genbank = [] 13 | 14 | for map_list in subj_map: 15 | recs_genbank.append([genbank_records[idx] for idx in map_list]) 16 | 17 | return recs_genbank 18 | 19 | 20 | def classification(recs, gb_recs, context_size_orf, context_size_is, min_gb_overlap, isfinder_orf_db, isfinder_is_db): 21 | ds_genbank_recs = [None] * len(recs) 22 | classification_recs = [] 23 | 24 | if recs: 25 | orf_blast_hits = Blast.get_best_blast_hits_in_range(recs, BlastX, context_size_orf, isfinder_orf_db, min_overlap=min_gb_overlap, positive_subject_strand_only=False) 26 | is_dna_blast_hits = Blast.get_best_blast_hits_in_range(recs, BlastN, context_size_is, isfinder_is_db, min_overlap=min_gb_overlap, positive_subject_strand_only=False) 27 | if gb_recs is not None: 28 | ds_genbank_recs = get_recs_genbank_overlap_map(recs, gb_recs, min_gb_overlap, ignore_strand=True) 29 | 30 | for digis_hit, gb_rec, orf_blast_hit, is_blast_hit in zip(recs, ds_genbank_recs, orf_blast_hits, 31 | is_dna_blast_hits): 32 | classifier = Classifier(digis_hit, gb_rec, orf_blast_hit, is_blast_hit) 33 | classifier.classify() 34 | classification_recs.append(classifier) 35 | 36 | return classification_recs 37 | 38 | def extension(recs, context_size_orf, context_size_is, min_gb_overlap, isfinder_orf_db, isfinder_is_db): 39 | if recs: 40 | orf_blast_hits = Blast.get_best_blast_hits_in_range(recs, BlastX, context_size_orf, isfinder_orf_db, min_overlap=min_gb_overlap, positive_subject_strand_only=False) 41 | is_dna_blast_hits = Blast.get_best_blast_hits_in_range(recs, BlastN, context_size_is, isfinder_is_db, min_overlap=min_gb_overlap, positive_subject_strand_only=False) 42 | 43 | # Extend records to IS or ORF level 44 | extended_recs = [] 45 | for rec, orf_blast_hit, is_blast_hit in zip(recs, orf_blast_hits, is_dna_blast_hits): 46 | rec_copy = copy(rec) 47 | # Extension at the level of ORF 48 | orf_start, orf_end = orf_blast_hit.query_start, orf_blast_hit.query_end 49 | if orf_start != orf_end: 50 | if orf_start < rec_copy.start or orf_end > rec_copy.end: 51 | rec_copy.set_start(min(rec_copy.start, orf_start)) 52 | rec_copy.set_end(max(rec_copy.end, orf_end)) 53 | # Extension at the level of IS 54 | is_start, is_end = is_blast_hit.query_start, is_blast_hit.query_end 55 | if is_start != is_end: 56 | if is_start < rec_copy.start or is_end > rec_copy.end: 57 | rec_copy.set_start(min(rec_copy.start, is_start)) 58 | rec_copy.set_end(max(rec_copy.end, is_end)) 59 | extended_recs.append(rec_copy) 60 | 61 | return extended_recs, orf_blast_hits, is_dna_blast_hits 62 | 63 | 64 | def classification_after_extension(recs, gb_recs, min_gb_overlap, orf_blast_hits, is_dna_blast_hits): 65 | ds_genbank_recs = [None] * len(recs) 66 | classification_recs = [] 67 | 68 | if recs: 69 | if gb_recs is not None: 70 | ds_genbank_recs = get_recs_genbank_overlap_map(recs, gb_recs, min_gb_overlap, ignore_strand=True) 71 | 72 | for digis_hit, gb_rec, orf_blast_hit, is_blast_hit in zip(recs, ds_genbank_recs, orf_blast_hits, 73 | is_dna_blast_hits): 74 | classifier = Classifier(digis_hit, gb_rec, orf_blast_hit, is_blast_hit) 75 | classifier.classify() 76 | classification_recs.append(classifier) 77 | 78 | return classification_recs 79 | -------------------------------------------------------------------------------- /src/common/csv_utils.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import logging 3 | import os 4 | 5 | 6 | def read_csv(filename, delimiter=';'): 7 | out_list = [] 8 | if os.path.exists(filename): 9 | with open(filename, newline='') as f: 10 | reader = csv.DictReader(f, delimiter=delimiter) 11 | for row in reader: 12 | out_list.append(row) 13 | else: 14 | logging.error("Filename {} does not exist.".format(filename)) 15 | return out_list 16 | 17 | 18 | def write_csv(rows, filename, header, delimiter=","): 19 | if len(rows) > 0 and len(header) != len(rows[0]): 20 | raise ValueError("Number of elements in header and in row to write is not same.")\ 21 | 22 | with open(filename, 'w+', newline='') as f: 23 | writer = csv.writer(f, delimiter=delimiter) 24 | writer.writerow(header) 25 | for row in rows: 26 | writer.writerow(row) 27 | -------------------------------------------------------------------------------- /src/common/genbank.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | 4 | from collections import OrderedDict 5 | from Bio import SeqIO 6 | 7 | 8 | def read_gb(filename): 9 | genbank_dict = OrderedDict() 10 | if os.path.exists(filename): 11 | for genome in SeqIO.parse(filename, "genbank"): 12 | genbank_dict[genome.id] = genome.features 13 | else: 14 | logging.error("Filename {} does not exist.".format(filename)) 15 | raise FileNotFoundError("{} file does not exist.".format(filename)) 16 | 17 | return genbank_dict 18 | 19 | -------------------------------------------------------------------------------- /src/common/genome.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | 4 | from collections import Counter, OrderedDict 5 | from Bio import SeqIO 6 | from ..common.sequence import translate_dna_seq_biopython 7 | 8 | 9 | class Genome: 10 | 11 | def __init__(self, genome_rec, translate=True, output_dir=None): 12 | self.seq = genome_rec.seq 13 | self.desc = genome_rec.description 14 | self.name = genome_rec.id 15 | self.length = len(genome_rec.seq) 16 | if output_dir: 17 | self.orf_db = os.path.join(output_dir, "pep", self.name + ".pep") 18 | if translate: 19 | translate_dna_seq_biopython(seqrec=genome_rec, outseq=self.orf_db) 20 | 21 | @staticmethod 22 | def parse_genomes(fasta_file, translate=True, output_dir=None): 23 | genomes_dict = OrderedDict() 24 | genome_ids = [] 25 | if os.path.exists(fasta_file): 26 | for genome_rec in SeqIO.parse(fasta_file, "fasta"): 27 | genome_ids.append(genome_rec.id) 28 | genomes_dict[genome_rec.id] = Genome(genome_rec, translate, output_dir) 29 | 30 | # check if there are duplicates in genome_ids 31 | duplicated_ids = [item for item, count in Counter(genome_ids).items() if count > 1] 32 | 33 | if duplicated_ids: 34 | raise ValueError("Multifasta file contains following duplicated ids: {}.\n" 35 | "Ids have to be unique.".format(duplicated_ids)) 36 | # print(duplicated_ids) 37 | 38 | else: 39 | logging.error("Filename {} does not exist.".format(fasta_file)) 40 | raise FileNotFoundError("{} file does not exist.".format(fasta_file)) 41 | 42 | return genomes_dict 43 | -------------------------------------------------------------------------------- /src/common/gff_utils.py: -------------------------------------------------------------------------------- 1 | def write_gff(rows, filename, header): 2 | if len(rows) > 0 and len(header) != len(rows[0]): 3 | raise ValueError("Number of elements in header and in row to write is not same.") 4 | 5 | sid_idx = header.index('sid') 6 | start_idx = header.index('sstart') 7 | end_idx = header.index('send') 8 | strand_idx = header.index('strand') 9 | score_idx = header.index('acc') 10 | attr_idx = [x for x in range(len(header)) if x not in [sid_idx, start_idx, end_idx, strand_idx, score_idx]] 11 | 12 | with open(filename, 'w+', newline='') as f: 13 | f.write("##gff-version 3\n") 14 | for row in rows: 15 | sid = row[sid_idx] 16 | start = row[start_idx] 17 | end = row[end_idx] 18 | strand = row[strand_idx] 19 | score = row[score_idx] 20 | attr_list = [] 21 | for idx in attr_idx: 22 | attr_list.append(header[idx] + "=" + str(row[idx])) 23 | attributes = ";".join(attr_list) 24 | 25 | f.write("{}\tdigIS\ttransposable_element\t{}\t{}\t{}\t{}\t.\t{}\n".format(sid, start, end, score, strand, attributes)) 26 | -------------------------------------------------------------------------------- /src/common/grange.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import logging 3 | 4 | from Bio import SeqIO 5 | from Bio.Seq import Seq 6 | from Bio.Alphabet import IUPAC 7 | from Bio.SeqRecord import SeqRecord 8 | 9 | 10 | class Grange: 11 | def __init__(self, genome_name, chrom, start, end, strand, genome_seq, genome_len, circular=True): 12 | if start <= 0 or end <= 0 or start > genome_len or end > genome_len: 13 | raise ValueError("GRange: start or end position is out of the range.") 14 | self.genome_name = genome_name 15 | self.chr = chrom 16 | self.start = start 17 | self.end = end 18 | self.strand = strand 19 | self.genome_seq = genome_seq 20 | self.genome_len = genome_len 21 | self.circular = circular 22 | self.width = self.__len__() 23 | 24 | def set_start(self, start): 25 | self.start = start 26 | self.width = self.__len__() 27 | 28 | def set_end(self, end): 29 | self.end = end 30 | self.width = self.__len__() 31 | 32 | def get_flank_range(self, flank): 33 | if flank + flank + self.width > self.genome_len: 34 | max_flank = int((self.genome_len - self.width) / 2) 35 | else: 36 | max_flank = flank 37 | flank_start = self.start - max_flank 38 | if flank_start <= 0: 39 | flank_start = flank_start + self.genome_len if self.circular else 1 40 | flank_end = self.end + max_flank 41 | if flank_end > self.genome_len: 42 | flank_end = flank_end - self.genome_len if self.circular else self.genome_len 43 | 44 | return Grange(self.genome_name, self.chr, flank_start, flank_end, self.strand, 45 | self.genome_seq, self.genome_len, self.circular) 46 | 47 | def shift_left(self, size): 48 | self.start -= size 49 | if self.start <= 0: 50 | self.start = self.start + self.genome_len if self.circular else 1 51 | self.end -= size 52 | if self.end <= 0: 53 | self.end = self.end + self.genome_len if self.circular else 1 54 | self.width = self.__len__() 55 | 56 | def remap_offsets(self, left_offset, right_offset): 57 | if self.strand == "+": 58 | self.end = self.start + right_offset - 1 59 | self.start = self.start + left_offset - 1 60 | if self.start > self.genome_len: 61 | self.start = self.start - self.genome_len if self.circular else self.genome_len 62 | if self.end > self.genome_len: 63 | self.end = self.end - self.genome_len if self.circular else self.genome_len 64 | else: 65 | self.start = self.end - right_offset 66 | self.end = self.end - left_offset 67 | if self.start <= 0: 68 | self.start = self.start + self.genome_len if self.circular else 1 69 | if self.end <= 0: 70 | self.end = self.end + self.genome_len if self.circular else 1 71 | self.width = self.__len__() 72 | 73 | def has_overlap(self, other, ignore_strand=False, flank=0): 74 | return self.get_overlap_length(other, ignore_strand, flank) > 0 75 | 76 | def get_overlap_length(self, other, ignore_strand=False, flank=0): 77 | other_range = copy.copy(other) 78 | new_range = self.get_flank_range(flank) 79 | size = min(new_range.start, other_range.start)-1 80 | new_range.shift_left(size) 81 | other_range.shift_left(size) 82 | if new_range.start <= other_range.start: 83 | overlap = min(new_range.end-other_range.start+1, other.width) 84 | else: 85 | overlap = min(other_range.end-new_range.start+1, new_range.width) 86 | 87 | if self.strand != other.strand and not ignore_strand: 88 | overlap = 0 89 | 90 | # print(self.start, self.end, self.strand, other.start, other.end, other.strand, flank, overlap) 91 | return overlap 92 | 93 | def is_inside(self, other, ignore_strand=False): 94 | return self.get_overlap_length(other, ignore_strand, 0) == self.width 95 | 96 | def get_flank_lengths(self, flank): 97 | new_range = self.get_flank_range(flank) 98 | left_flank = self.start - new_range.start 99 | if left_flank <= 0: 100 | left_flank += self.genome_len 101 | right_flank = new_range.end - self.end 102 | if right_flank <= 0: 103 | right_flank += self.genome_len 104 | return left_flank, right_flank 105 | 106 | def get_sequence(self, flank=0, protein=False): 107 | 108 | new_range = self.get_flank_range(flank) 109 | 110 | if new_range.start <= new_range.end: 111 | seq = self.genome_seq[new_range.start-1:new_range.end] 112 | else: # element crossing the genome boundary 113 | seq = self.genome_seq[new_range.start-1:self.genome_len] + self.genome_seq[0:new_range.end] 114 | 115 | if self.strand == '-': 116 | seq = seq.reverse_complement() 117 | if protein: 118 | seq = seq.translate(table=11) 119 | 120 | return SeqRecord(seq, id=self.genome_name, description='') 121 | 122 | def __str__(self): 123 | return "{} {} {} {} {}".format(self.genome_name, self.chr, self.start, self.end, self.strand) 124 | 125 | def __len__(self): 126 | out_len = self.end - self.start + 1 127 | if out_len <= 0: # element crossing the genome boundary 128 | out_len += self.genome_len 129 | return out_len 130 | -------------------------------------------------------------------------------- /src/common/misc.py: -------------------------------------------------------------------------------- 1 | import errno 2 | import os 3 | import pickle 4 | import re 5 | 6 | from os import walk 7 | from definitions import ROOT_DIR 8 | 9 | 10 | def get_family_names(mypath): 11 | records = walk(mypath) 12 | (dirpath, dirnames, filenames) = next(records) 13 | return dirnames 14 | 15 | 16 | def get_family_filenames(fam_path, include_subdir=True): 17 | filename_list = [] 18 | for (dirpath, dirnames, filenames) in walk(fam_path): 19 | for filename in filenames: 20 | filename_list.append(os.path.join(dirpath, filename)) 21 | if(not include_subdir): 22 | break 23 | return filename_list 24 | 25 | 26 | def get_filenames_by_extension(dirpath, extensions): 27 | outfiles = [] 28 | for (dirpath, dirnames, filenames) in os.walk(dirpath): 29 | for filename in filenames: 30 | if os.path.splitext(filename)[-1] in extensions: 31 | outfiles.append(os.path.join(dirpath, filename)) 32 | break 33 | 34 | return outfiles 35 | 36 | 37 | def get_filenames_by_substring(dirpath, substring, include_subdir=False): 38 | outfiles = [] 39 | for (dirpath, dirnames, filenames) in os.walk(dirpath): 40 | for filename in filenames: 41 | if substring in filename: 42 | outfiles.append(os.path.join(dirpath, filename)) 43 | if(not include_subdir): 44 | break 45 | 46 | return outfiles 47 | 48 | 49 | def save_object(obj_name, obj): 50 | dump_file = os.path.join(ROOT_DIR, "data", "objs", obj_name) 51 | with open(dump_file, 'wb') as out_handle: 52 | pickle.dump(obj, out_handle, pickle.HIGHEST_PROTOCOL) 53 | 54 | 55 | def load_object(obj_name): 56 | dump_file = os.path.join(ROOT_DIR, "data", "objs", obj_name) 57 | if os.path.exists(dump_file): 58 | with open(dump_file, 'rb') as in_handle: 59 | return pickle.load(in_handle) 60 | else: 61 | return None 62 | 63 | 64 | def delete_file(file): 65 | try: 66 | os.remove(file) 67 | except OSError: 68 | print("Error while deleting file ", file) 69 | 70 | 71 | def clean_files(l): 72 | for i in l: 73 | if os.path.isfile(i): 74 | os.remove(i) 75 | 76 | 77 | def clean_directory(dir_path): 78 | file_list = os.listdir(dir_path) 79 | for filename in file_list: 80 | os.remove(os.path.join(dir_path, filename)) 81 | 82 | 83 | def prepare_directory(dir_path): 84 | if os.path.exists(dir_path): 85 | clean_directory(dir_path) 86 | else: 87 | os.makedirs(dir_path) 88 | 89 | 90 | def init_output_dir(output_dir): 91 | subdirs = ["hmmer", "logs", "pep", "results"] 92 | 93 | if not os.path.exists(output_dir): 94 | os.makedirs(output_dir) 95 | 96 | for subdir in subdirs: 97 | if not os.path.exists(os.path.join(output_dir, subdir)): 98 | os.makedirs(os.path.join(output_dir, subdir)) 99 | 100 | 101 | def check_if_file_exists(filename): 102 | if os.path.isfile(filename): 103 | return filename 104 | else: 105 | return None 106 | 107 | 108 | def check_evalue(evalue): 109 | 110 | if evalue is None: 111 | raise TypeError("Wrong evalue argument type. Current type: {}".format(type(evalue))) 112 | 113 | try: 114 | evalue_val = float(evalue) 115 | 116 | if evalue_val < 0: 117 | raise ValueError("Evalue has to be a non-negative number. " 118 | "Current value: {}, type: {}".format(evalue, type(evalue))) 119 | except ValueError: 120 | raise ValueError("Evalue has to be a non-negative number. " 121 | "Current value: {}, type: {}".format(evalue, type(evalue))) 122 | 123 | 124 | def is_empty(file_path): 125 | 126 | try: 127 | if os.path.getsize(file_path) > 0: 128 | return False 129 | else: 130 | return True 131 | except OSError as e: 132 | print(e) 133 | 134 | 135 | def change_path_to_linux(line): 136 | matchObj = re.match(r'(.):.*', line, re.M | re.I) 137 | if matchObj: 138 | line = line.replace(matchObj.group(1) + ":", '/mnt/' + matchObj.group(1).lower()) 139 | line = line.replace('\\', '/') 140 | return line 141 | -------------------------------------------------------------------------------- /src/common/ranges.py: -------------------------------------------------------------------------------- 1 | from ..common.RangesHits import RangesHits 2 | 3 | 4 | def find_overlaps(query, subject, ignore_strand=False, min_overlap=None, min_query_overlap_percentage=None, min_subject_overlap_percentage=None, allow_query_fragments=False): 5 | 6 | unbinded_query = set(list(range(len(query)))) 7 | unbinded_subject = set(list(range(len(subject)))) 8 | match = [] 9 | query_no_overlap = [] 10 | query_low_overlap = [] 11 | 12 | # For each query record 13 | for qi, q in enumerate(query): 14 | # For each subject record 15 | q_no_overlap = True 16 | q_low_overlap = True 17 | for si, s in enumerate(subject): 18 | # Test for overlap 19 | overlap = q.get_overlap_length(s, ignore_strand) 20 | filter = False 21 | if min_overlap is not None: 22 | filter = filter or overlap < min_overlap 23 | if min_query_overlap_percentage is not None: 24 | filter = filter or (overlap/q.width)*100 < min_query_overlap_percentage 25 | if min_subject_overlap_percentage is not None: 26 | filter = filter or (overlap/s.width)*100 < min_subject_overlap_percentage 27 | if overlap > 0: 28 | q_no_overlap = False 29 | if allow_query_fragments and overlap == q.width: 30 | filter = False 31 | if not filter: 32 | q_low_overlap = False 33 | match.append((qi, si)) 34 | unbinded_query.discard(qi) 35 | unbinded_subject.discard(si) 36 | query_no_overlap.append(q_no_overlap) 37 | query_low_overlap.append(q_low_overlap) 38 | 39 | return RangesHits(len(query), len(subject), match, unbinded_query, unbinded_subject, query_no_overlap, query_low_overlap) 40 | 41 | 42 | def get_unique_ranges(classified_recs, ranges, min_overlap=1): 43 | 44 | hits = find_overlaps(ranges, ranges, ignore_strand=True, min_overlap=min_overlap, allow_query_fragments=True) 45 | 46 | unique_ranges = set(list(range(len(ranges)))) 47 | for i, j in hits.match: 48 | if i in unique_ranges and j in unique_ranges and i != j: 49 | discard_idx = i if classified_recs[i].level > classified_recs[j].level else j 50 | classified_recs[discard_idx].kept = False 51 | unique_ranges.discard(discard_idx) 52 | 53 | return list(hits.unbinded_query) + list(unique_ranges) 54 | -------------------------------------------------------------------------------- /src/common/sequence.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | 4 | from Bio import SeqIO 5 | from Bio.Alphabet import IUPAC 6 | from Bio.SeqRecord import SeqRecord 7 | 8 | 9 | def transform_range(start, end, frame, seqlen): 10 | offset = [0, 1, 2, 2, 1, 0][frame-1] 11 | if frame <= 3: 12 | start_pos = (start-1)*3 + offset + 1 13 | end_pos = end*3 + offset 14 | else: 15 | start_pos = seqlen - (end*3 + offset) + 1 16 | end_pos = seqlen - ((start-1)*3 + offset) 17 | 18 | return start_pos, end_pos 19 | 20 | 21 | def translate_dna_seq_biopython(seqrec, outseq): 22 | 23 | with open(outseq, 'w') as aa_fa: 24 | 25 | aa_seqs = [] 26 | dna_seqs = [seqrec.seq, seqrec.seq.reverse_complement()] 27 | for dna_seq in dna_seqs: 28 | for frame in range(1, 4): 29 | 30 | # correction of end position such that the length is multiple of 3 31 | start = frame - 1 32 | excess = (len(dna_seq) - start) % 3 33 | end = len(dna_seq) - excess 34 | seq = dna_seq[start:end] 35 | aa_seqs.append(seq.translate(table=11)) 36 | 37 | for frame, aa_seq in enumerate(aa_seqs, start=1): 38 | seq_id = seqrec.id + "_" + str(frame) 39 | description = seq_id + seqrec.description.replace(seqrec.id, "") 40 | aa_record = SeqRecord(aa_seq, id=seq_id, name=seq_id, description=description) 41 | SeqIO.write(aa_record, aa_fa, 'fasta') 42 | 43 | 44 | def __is_non_zero_file(filepath): 45 | return os.path.isfile(filepath) and os.path.getsize(filepath) > 0 46 | 47 | 48 | def get_seqlen(filename): 49 | rec = SeqIO.read(filename, "fasta") 50 | return len(rec.seq) 51 | 52 | 53 | def get_ids_from_fasta(filename): 54 | 55 | ids = set() 56 | records = SeqIO.parse(filename, "fasta") 57 | for rec in records: 58 | rec_id = rec.id.split("|")[0] 59 | rec_id = rec_id.split(".")[0] 60 | ids.add(rec_id) 61 | 62 | return ids 63 | 64 | 65 | def get_full_ids_from_fasta(filename): 66 | 67 | ids = [] 68 | records = SeqIO.parse(filename, "fasta") 69 | for rec in records: 70 | # rec_id = rec.id.split("|")[0] 71 | # rec_id = rec_id.split(".")[0] 72 | ids.append(rec.id) 73 | 74 | return ids 75 | 76 | 77 | def filter_fasta_re(in_file, out_file, regexp): 78 | out_recs = [] 79 | records = SeqIO.parse(in_file, "fasta") 80 | for rec in records: 81 | if re.search(regexp, rec.id, re.IGNORECASE): 82 | out_recs.append(rec) 83 | SeqIO.write(out_recs, out_file, "fasta") 84 | 85 | 86 | def get_sequence_record(filename, start, end, strand, protein=True): 87 | record = SeqIO.read(filename, "fasta") 88 | seq = record.seq[start-1:end] 89 | 90 | if strand == '-': 91 | seq = seq.reverse_complement() 92 | if protein: 93 | seq = seq.translate(table=11) 94 | 95 | return SeqRecord(seq, id=record.id, description='') 96 | 97 | 98 | def get_sequence_record_ids(filename, ids): 99 | out = [] 100 | ids_set = set(ids) 101 | recs = SeqIO.parse(filename, "fasta") 102 | for rec in recs: 103 | if rec.id in ids_set: 104 | out.append(rec) 105 | 106 | return out 107 | 108 | 109 | def get_sequence_record_id(filename, rec_id): 110 | out = [] 111 | recs = SeqIO.parse(filename, "fasta") 112 | for rec in recs: 113 | if rec_id in rec.id: 114 | out.append(rec) 115 | 116 | return out 117 | 118 | def get_sequence_ids(filename): 119 | out = [] 120 | recs = SeqIO.parse(filename, "fasta") 121 | for rec in recs: 122 | out.append(rec.id) 123 | 124 | return out 125 | 126 | 127 | def get_sixframe_record(filename, start, end): 128 | 129 | # correction of end position such that the length is multiple of 3 130 | record = SeqIO.read(filename, "fasta") 131 | s = start 132 | e = end + 3 - ((end - start + 1) % 3) 133 | 134 | all_recs = [] 135 | for i in [0, 1, 2]: 136 | seq = record.seq[s - 1 + i:e + i] 137 | all_recs.append(SeqRecord(seq.translate(table=11), id=record.id, description='Frame: ' + str(i))) 138 | rc_seq = seq.reverse_complement() 139 | all_recs.append(SeqRecord(rc_seq.translate(table=11), id=record.id, description='Frame: ' + str(i + 3))) 140 | 141 | return all_recs 142 | 143 | 144 | def merge_all_fasta_files(dir_path, out_file): 145 | all_recs = [] 146 | for (dirpath, dirnames, filenames) in os.walk(dir_path): 147 | for filename in filenames: 148 | records = SeqIO.parse(os.path.join(dirpath, filename), "fasta") 149 | for rec in records: 150 | all_recs.append(rec) 151 | SeqIO.write(all_recs, out_file, "fasta") 152 | 153 | 154 | def merge_fasta_files(filenames, out_file): 155 | all_recs = [] 156 | for filename in filenames: 157 | records = SeqIO.parse(filename, "fasta") 158 | for rec in records: 159 | all_recs.append(rec) 160 | SeqIO.write(all_recs, out_file, "fasta") 161 | 162 | 163 | def get_max_seq_len(filename): 164 | max_len = 0 165 | records = SeqIO.parse(filename, "fasta") 166 | for rec in records: 167 | max_len = max(max_len, len(rec.seq)) 168 | 169 | return max_len 170 | 171 | 172 | def get_maxlen_seq(filename): 173 | max_len = 0 174 | out = None 175 | records = SeqIO.parse(filename, "fasta") 176 | for rec in records: 177 | if len(rec.seq) > max_len: 178 | max_len = len(rec.seq) 179 | out = rec 180 | 181 | return out 182 | 183 | 184 | def get_seq_lens(filename, seq_type): 185 | lens = [] 186 | records = SeqIO.parse(filename, "fasta") 187 | for rec in records: 188 | alphabet = IUPAC.protein.letters if seq_type == 'prot' else IUPAC.unambiguous_dna.letters 189 | rec.seq = trim_sequence(rec.seq, alphabet) 190 | lens.append(len(rec.seq)) 191 | 192 | return lens 193 | 194 | 195 | def trim_sequence(seq, alphabet): 196 | end_pos = len(seq) 197 | for i in range(len(seq)): 198 | if seq[i] not in alphabet: 199 | end_pos = i 200 | break 201 | 202 | return seq[0:end_pos] 203 | 204 | 205 | def save_to_fasta_file(records, output_file, mode="w+"): 206 | with open(output_file, mode) as output_file: 207 | SeqIO.write(records, output_file, "fasta") 208 | 209 | 210 | def prepare_flank_sequences(seq_records, flank, ids=None): 211 | 212 | seq_recs = [] 213 | seq_ranges = [] 214 | seq_original_ranges = [] 215 | for i, rec in enumerate(seq_records): 216 | seq_len = len(rec) 217 | seq_rec = rec.get_sequence(flank=flank) 218 | seq_original_range = rec.get_flank_range(flank=flank) 219 | flank_lens = rec.get_flank_lengths(flank) 220 | if ids: 221 | seq_rec.id = seq_rec.id + "_" + ids[i] 222 | 223 | seq_range = (flank_lens[0] + 1, flank_lens[0] + seq_len) 224 | seq_recs.append(seq_rec) 225 | seq_ranges.append(seq_range) 226 | seq_original_ranges.append(seq_original_range) 227 | 228 | return seq_recs, seq_ranges, seq_original_ranges 229 | -------------------------------------------------------------------------------- /src/genbank/RecordGenbank.py: -------------------------------------------------------------------------------- 1 | from ..common.grange import Grange 2 | 3 | 4 | class RecordGenbank(Grange): 5 | 6 | def __init__(self, rec, genome_name="", chrom="chr", genome_seq="", genome_len=0): 7 | if rec.location.strand == 1: 8 | strand = "+" 9 | start_pos = rec.location.parts[0].start 10 | end_pos = rec.location.parts[len(rec.location.parts) - 1].end 11 | else: 12 | strand = "-" 13 | start_pos = rec.location.parts[len(rec.location.parts) - 1].start 14 | end_pos = rec.location.parts[0].end 15 | super().__init__(genome_name, chrom, start_pos+1, end_pos, strand, genome_seq, genome_len) 16 | self.type = rec.type 17 | if 'product' in rec.qualifiers: 18 | self.product = ", ".join(rec.qualifiers['product']) if self.type == "CDS" else "" 19 | elif 'note' in rec.qualifiers: 20 | self.product = ", ".join(rec.qualifiers['note']) if self.type == "CDS" else "" 21 | else: 22 | self.product = "" 23 | self.qualifiers = rec.qualifiers 24 | 25 | def __str__(self): 26 | return "{} {} {} {} {} {}".format(self.genome_name, self.type, self.start, self.end, self.strand, self.qualifiers) 27 | -------------------------------------------------------------------------------- /src/hmmer/Hmmer.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | import sys 3 | 4 | from Bio import SearchIO, SeqIO 5 | 6 | from definitions import NUM_THREADS 7 | from ..common.csv_utils import write_csv 8 | from ..common.misc import check_if_file_exists, change_path_to_linux 9 | from ..common.sequence import get_sequence_record_ids 10 | from ..hmmer.HmmerHspFlat import HmmerHspFlat 11 | 12 | 13 | class Hmmer: 14 | def __init__(self): 15 | """ Create a new Hmmer instance. 16 | 17 | model HMM model used in hmmsearch (e.g. "IS1_cut.hmm") 18 | database fasta file, used as database in hmmsearch (e.g. "test.fasta") 19 | hits list of hmmer hits 20 | hsps list of hmmer hsps 21 | outfile file, where output from hmmsearch is stored (e.g. "out.hmmer") 22 | """ 23 | self.hmm = "" 24 | self.seqfile = "" 25 | self.flat_hsps = [] 26 | 27 | def run(self, tool, hmmfile, seqdb, outfile, cevalue=None): 28 | 29 | if not check_if_file_exists(seqdb): 30 | print("File {} does not exist or is not a file.".format(seqdb)) 31 | print("Try to run digIS with translate option turned on.") 32 | exit(1) 33 | 34 | if outfile: 35 | cmd = self.__build_command(tool=tool, hmmfile=hmmfile, seqdb=seqdb, outfile=outfile, cevalue=cevalue) 36 | if sys.platform == 'win32': 37 | cmd = ['bash.exe', '-c', ' '.join(cmd)] 38 | self.__run_tool(cmd) 39 | else: 40 | raise AttributeError("Output file argument is required.") 41 | 42 | def parse(self, outfile): 43 | self.flat_hsps = [] 44 | new_recs = False 45 | 46 | try: 47 | check_if_file_exists(outfile) 48 | except FileNotFoundError: 49 | print("No hmmer output file set.") 50 | 51 | hmmer_res = list(SearchIO.parse(outfile, 'hmmsearch3-domtab')) 52 | if len(hmmer_res) > 0: 53 | new_recs = True 54 | for query in hmmer_res: 55 | for hit in query.hits: 56 | for hsp in hit.hsps: 57 | self.flat_hsps.append(HmmerHspFlat(query, hit, hsp)) 58 | 59 | return new_recs 60 | 61 | def to_csv(self, output_csv=None): 62 | header = [] 63 | rows = [] 64 | for hsp in self.flat_hsps: 65 | header, row = hsp.to_csv() 66 | rows.append(row) 67 | 68 | if output_csv: 69 | write_csv(rows, output_csv, header) 70 | 71 | return header, rows 72 | 73 | def __build_command(self, tool, hmmfile, seqdb, outfile, cevalue=None): 74 | """ 75 | tool [options] 76 | """ 77 | 78 | cmd = [tool, "--noali"] 79 | 80 | if NUM_THREADS != 0: 81 | cmd.extend(["--cpu", str(NUM_THREADS)]) 82 | 83 | if cevalue: 84 | cmd.extend(["--domE", str(cevalue)]) 85 | else: 86 | cmd.extend(["--domT", "0.0"]) 87 | 88 | if sys.platform == 'win32': 89 | outfile = change_path_to_linux(outfile) 90 | hmmfile = change_path_to_linux(hmmfile) 91 | seqdb = change_path_to_linux(seqdb) 92 | 93 | cmd.extend(["--domtblout", outfile, hmmfile, seqdb]) 94 | return cmd 95 | 96 | @staticmethod 97 | def __run_tool(cmd): 98 | try: 99 | subprocess.check_call(cmd, stdout=sys.stdout, stderr=sys.stderr) 100 | except subprocess.CalledProcessError as e: 101 | print("An error occurred when calling {}.".format(cmd)) 102 | print(e) 103 | 104 | def __len__(self): 105 | return len(self.hits) 106 | 107 | def __str__(self): 108 | return '\n'.join(list(str(i) for i in self.hits)) 109 | -------------------------------------------------------------------------------- /src/hmmer/HmmerHit.py: -------------------------------------------------------------------------------- 1 | from ..hmmer.HmmerHsp import HmmerHsp 2 | from ..common.sequence import transform_range 3 | 4 | 5 | class HmmerHit: 6 | def __init__(self, hit, query_len): 7 | self.query_id = hit.query_id 8 | self.subject_id = hit.id 9 | self.subject_desc = hit.description 10 | self.query_len = query_len 11 | self.subject_len = hit.seq_len 12 | self.bitscore = float(hit.bitscore) 13 | self.evalue = hit.evalue 14 | self.hsps = [HmmerHsp(hsp, self.query_len) for hsp in hit.hsps] 15 | 16 | def get_best_hsp_in_range(self, seqlen, query_range=(0, 0), min_overlap=1): 17 | 18 | best_hsp = None 19 | 20 | for hsp in self.hsps: 21 | 22 | if query_range == (0, 0): 23 | if best_hsp < hsp: 24 | best_hsp = hsp 25 | else: 26 | hit_range = (hsp.qstart, hsp.qend) 27 | frame = int(hsp.qid.strip()[-1]) 28 | hsp_dna_range = transform_range(hit_range[0], hit_range[1], frame, seqlen) 29 | max_start = max(hsp_dna_range[0], query_range[0]) 30 | min_end = min(hsp_dna_range[1], query_range[1]) 31 | overlap_len = min_end - max_start + 1 32 | 33 | # init 34 | if not best_hsp and overlap_len >= min_overlap: 35 | best_hsp = hsp 36 | else: 37 | if best_hsp: 38 | if best_hsp < hsp and overlap_len >= min_overlap: 39 | best_hsp = hsp 40 | 41 | return best_hsp 42 | 43 | def __str__(self): 44 | return '{}, {}, {}, {}, {}, {}'.format(self.query_id, 45 | self.subject_id, 46 | self.subject_len, 47 | self.bitscore, 48 | self.evalue, 49 | self.subject_desc) 50 | -------------------------------------------------------------------------------- /src/hmmer/HmmerHsp.py: -------------------------------------------------------------------------------- 1 | class HmmerHsp: 2 | 3 | def __init__(self, hsp, query_len): 4 | self.qid = hsp.query_id 5 | self.sid = hsp.hit_id 6 | self.qstart = hsp.query_start + 1 7 | self.qend = hsp.query_end 8 | self.sstart = hsp.hit_start + 1 9 | self.send = hsp.hit_end 10 | self.sstart_env = hsp.env_start + 1 11 | self.send_env = hsp.env_end 12 | self.acc = float(hsp.acc_avg) 13 | self.evalue = float(hsp.evalue) 14 | self.bitscore = float(hsp.bitscore) 15 | self.query_coverage = (self.qend - self.qstart + 1) / query_len 16 | 17 | def __str__(self): 18 | return "{}, {}, {}, {}, {}, {}, {}, {}, {}, {}".format(self.qid, self.sid, self.qstart, self.qend, self.sstart, 19 | self.send, self.acc, self.evalue, self.bitscore, 20 | self.query_coverage) 21 | 22 | def __lt__(self, other): 23 | return self.bitscore < other.bitscore 24 | -------------------------------------------------------------------------------- /src/hmmer/HmmerHspFlat.py: -------------------------------------------------------------------------------- 1 | class HmmerHspFlat: 2 | 3 | def __init__(self, query, hit, hsp): 4 | self.sid = hit.id 5 | self.slen = hit.seq_len 6 | 7 | self.qid = query.id 8 | self.qlen = query.seq_len 9 | 10 | self.seq_evalue = float(hit.evalue) 11 | self.seq_bitscore = float(hit.bitscore) 12 | self.seq_bias = float(hit.bias) 13 | 14 | self.dom_idx = hsp.domain_index 15 | self.dom_num = len(hit.hsps) 16 | self.dom_cevalue = float(hsp.evalue_cond) 17 | self.dom_evalue = float(hsp.evalue) 18 | self.dom_bitscore = float(hsp.bitscore) 19 | self.dom_bias = float(hsp.bias) 20 | 21 | self.qstart = hsp.query_start + 1 22 | self.qend = hsp.query_end 23 | self.sstart = hsp.hit_start + 1 24 | self.send = hsp.hit_end 25 | self.sstart_env = hsp.env_start + 1 26 | self.send_env = hsp.env_end 27 | self.acc = float(hsp.acc_avg) 28 | self.sdesc = hit.description 29 | 30 | @classmethod 31 | def get_csv_header(cls): 32 | return ["subject_id", "subject_len", "query_id", "query_len", "seq_evalue", "seq_bitscore", "seq_bias", 33 | "dom_idx", "dom_num", "dom_cevalue", "dom_evalue", "dom_bitscore", "dom_bias", "query_start", 34 | "query_end", "subject_start", "subject_end", "subject_env_start", "subject_env_end", "acc_avg", 35 | "subject_description"] 36 | 37 | def to_csv(self): 38 | return [self.sid, self.slen, self.qid, self.qlen, self.seq_evalue, self.seq_bitscore, self.seq_bias, 39 | self.dom_idx, self.dom_num, self.dom_cevalue, self.dom_evalue, self.dom_bitscore, self.dom_bias, 40 | self.qstart, self.qend, self.sstart, self.send, self.sstart_env, self.send_env, self.acc, self.sdesc] 41 | -------------------------------------------------------------------------------- /src/search_tool/RecordDigIS.py: -------------------------------------------------------------------------------- 1 | from ..common.grange import Grange 2 | 3 | 4 | class RecordDigIS(Grange): 5 | def __init__(self, genome_name, chrom, genome_seq, genome_len, qid, sid, qstart, qend, start, end, strand, acc, score, evalue): 6 | self.qid = qid 7 | self.sid = sid 8 | self.qstart = qstart 9 | self.qend = qend 10 | self.acc = acc 11 | self.score = score 12 | self.evalue = evalue 13 | super().__init__(genome_name, chrom, start, end, strand, genome_seq, genome_len) 14 | 15 | @classmethod 16 | def from_csv(cls, csv, genome_name, chrom, genome_seq, genome_len): 17 | qid = csv['qid'] 18 | sid = csv['sid'] 19 | qstart = int(csv['qstart']) 20 | qend = int(csv['qend']) 21 | start = int(csv['sstart']) 22 | end = int(csv['send']) 23 | strand = csv['strand'] 24 | acc = csv['acc'] 25 | score = csv['score'] 26 | evalue = csv['evalue'] 27 | return cls(genome_name, chrom, genome_seq, genome_len, qid, sid, qstart, qend, start, end, strand, acc, score, evalue) 28 | 29 | @classmethod 30 | def from_hmmer(cls, hsp, sid, start, end, strand, genome_name, chrom, genome_seq, seq_len): 31 | return cls(genome_name, chrom, genome_seq, seq_len, hsp.qid, sid, hsp.qstart, hsp.qend, start, end, strand, float(hsp.acc), float(hsp.dom_bitscore), float(hsp.dom_evalue)) 32 | 33 | # Regurements for merge in distance 34 | # - the same strand 35 | # - the same query_id (hmm model/outlier) 36 | # - continuous fragments with respect to model 37 | def should_be_merged_distance(self, other, merge_distance): 38 | 39 | continuous_fragments = (self.strand == '+' and self.start < other.start and self.qend <= other.qstart) or \ 40 | (self.strand == '+' and other.start < self.start and other.qend <= self.qstart) or \ 41 | (self.strand == '-' and other.start < self.start and self.qend <= other.qstart) or \ 42 | (self.strand == '-' and self.start < other.start and other.qend <= self.qstart) 43 | 44 | if self.qid == other.qid and not self.has_overlap(other) \ 45 | and self.has_overlap(other, flank=merge_distance) and continuous_fragments: 46 | return True 47 | else: 48 | return False 49 | 50 | def merge(self, other, merge_type): 51 | if self.strand != other.strand or self.sid != other.sid: 52 | raise ValueError('RecordDigIS.merge(): Records can not be merged') 53 | 54 | # if both hits from the same 55 | 56 | new_start = min(self.start, other.start) 57 | new_end = max(self.end, other.end) 58 | new_len = new_end - new_start + 1 59 | 60 | intersection_length = self.get_overlap_length(other) 61 | if self.acc > other.acc: 62 | new_acc = (len(self)*self.acc + (len(other)-intersection_length)*other.acc) / new_len 63 | else: 64 | new_acc = ((len(self)-intersection_length)*self.acc + len(other)*other.acc) / new_len 65 | 66 | if merge_type == "distance": 67 | new_score = self.score + other.score 68 | elif merge_type == "overlap": 69 | new_score = max(self.score, other.score) 70 | 71 | self.set_start(new_start) 72 | self.set_end(new_end) 73 | self.qstart = min(self.qstart, other.qstart) 74 | self.qend = max(self.qend, other.qend) 75 | self.qid = '-'.join(list(set(self.qid.split('-') + other.qid.split('-')))) 76 | self.acc = new_acc 77 | self.score = new_score 78 | self.evalue = min(self.evalue, other.evalue) 79 | 80 | @classmethod 81 | def get_csv_header(cls): 82 | return ["qid", "qstart", "qend", "sid", "sstart", "send", "strand", "acc", "score", "evalue"] 83 | 84 | def to_csv(self): 85 | return [self.qid, self.qstart, self.qend, self.sid, self.start, self.end, self.strand, round(self.acc, 2), round(self.score, 2), self.evalue] 86 | 87 | def __str__(self): 88 | return "{}, {}, {}, {}, {}, {}, {}, {}, {}, {}".format(self.qid, self.qstart, self.qend, 89 | self.sid, self.start, self.end, 90 | self.strand, self.acc, self.score, self.evalue) 91 | -------------------------------------------------------------------------------- /src/search_tool/RecordDigISAttrib.py: -------------------------------------------------------------------------------- 1 | from ..common.Classifier import Classifier 2 | from ..hmmer.HmmerHspFlat import HmmerHspFlat 3 | 4 | class RecordDigISAttrib: 5 | 6 | def __init__(self, source_type, hmmer_hsp): 7 | self.source_type = source_type 8 | self.hmmer_hsp = hmmer_hsp 9 | self.status = 'valid' 10 | self.extension_level = 'domain' 11 | self.classification = None 12 | 13 | @classmethod 14 | def get_csv_header(cls): 15 | attrib_header = ["source_type", "status", "extension_level"] 16 | hsp_header = HmmerHspFlat.get_csv_header() 17 | class_header = Classifier.get_csv_header(verbose=True) 18 | return attrib_header + hsp_header + class_header 19 | 20 | def to_csv(self): 21 | row = [self.source_type, self.status, self.extension_level] 22 | hsp_row = self.hmmer_hsp.to_csv() 23 | class_header = Classifier.get_csv_header(verbose=True) 24 | class_row = ['' for _ in range(len(class_header))] 25 | if self.classification: 26 | class_row = self.classification.to_csv(verbose=True) 27 | return row + hsp_row + class_row -------------------------------------------------------------------------------- /src/search_tool/digISConfiguration.py: -------------------------------------------------------------------------------- 1 | import definitions 2 | 3 | from ..common.misc import init_output_dir 4 | 5 | class digISConfiguration: 6 | def __init__(self, genome_file, genbank_file, output_dir, currated_cutoff=None, outliers_evalue=None): 7 | self.genome_file = genome_file 8 | self.models = definitions.HMM_MODELS 9 | self.outliers_fasta = definitions.OUTLIERS 10 | self.isfinder_orf_db = definitions.ISFINDER_ORF_DB 11 | self.isfinder_is_db = definitions.ISFINDER_IS_DB 12 | self.context_size_orf = definitions.CONTEXT_SIZE_ORF 13 | self.context_size_is = definitions.CONTEXT_SIZE_IS 14 | self.max_merge_distance = definitions.MAX_MERGE_DISTANCE 15 | self.min_hit_length = definitions.MIN_HIT_LENGTH 16 | self.min_gb_overlap = definitions.MIN_GB_OVERLAP 17 | self.currated_cutoff = currated_cutoff if currated_cutoff else definitions.CURRATED_CUTOFF 18 | self.outliers_evalue = outliers_evalue if outliers_evalue else definitions.OUTLIERS_EVALUE 19 | self.genbank_file = genbank_file 20 | self.output_dir = output_dir 21 | init_output_dir(self.output_dir) 22 | 23 | if self.context_size_orf > self.context_size_is: 24 | msg = "Context size ORF is greater than context_size_is. Should be smaller or equal.\n" 25 | msg += "Context size ORF value: {}\n".format(self.context_size_orf) 26 | msg += "Context size IS value: {}\n".format(self.context_size_is) 27 | 28 | raise ValueError(msg) 29 | 30 | def __str__(self): 31 | return "genome file: {}, models: {}, outliers: {}, isfinder orf db: {}, isfinder is db: {}, context orf size: {}, " \ 32 | "context size is: {}, max merge distance: {}, genbank file: {}, min gb overlap {}, outdir: {}".format( 33 | self.genome_file, self.models, self.outliers_fasta, self.isfinder_orf_db, self.isfinder_is_db, self.context_size_orf, 34 | self.context_size_is, self.max_merge_distance, self.genbank_file, self.min_gb_overlap, self.output_dir) 35 | 36 | 37 | -------------------------------------------------------------------------------- /src/search_tool/digISMultifasta.py: -------------------------------------------------------------------------------- 1 | import os 2 | from collections import OrderedDict 3 | 4 | from ..common.csv_utils import write_csv 5 | from ..common.genbank import read_gb 6 | from ..common.genome import Genome 7 | from ..common.gff_utils import write_gff 8 | from ..search_tool.digIS import digIS 9 | 10 | 11 | class digISMultifasta: 12 | 13 | def __init__(self, config): 14 | self.config = config 15 | self.genomes_dict = Genome.parse_genomes(fasta_file=config.genome_file, output_dir=config.output_dir) 16 | self.genbank_dict = read_gb(config.genbank_file) if config.genbank_file else OrderedDict() 17 | self.digIS_recs = OrderedDict() 18 | for genome_id, genome_rec in self.genomes_dict.items(): 19 | self.digIS_recs[genome_id] = digIS(self.config, genome=genome_rec, genbank_features=self.genbank_dict.get(genome_id, [])) 20 | 21 | def run(self, search=True): 22 | for genome_id, digIS_rec in self.digIS_recs.items(): 23 | digIS_rec.run(search=search) 24 | records = self.export() 25 | return records 26 | 27 | def export(self): 28 | print("===== Exporting outputs =====") 29 | fasta_basename = os.path.splitext(os.path.basename(self.config.genome_file))[0] 30 | csv_header = [] 31 | csv_rows = [] 32 | for genome_id, digIS_rec in self.digIS_recs.items(): 33 | csv_header, rows = digIS_rec.export_records() 34 | csv_rows.extend(rows) 35 | 36 | print("Exporting records...") 37 | output_recs_csv = os.path.join(self.config.output_dir, "results", fasta_basename + ".csv") 38 | output_recs_gff = os.path.join(self.config.output_dir, "results", fasta_basename + ".gff") 39 | write_csv(csv_rows, output_recs_csv, csv_header) 40 | write_gff(csv_rows, output_recs_gff, csv_header) 41 | 42 | print("Exporting summary statistics...") 43 | sum_recs = [] 44 | for genome_id, digIS_rec in self.digIS_recs.items(): 45 | sum_recs.extend(digIS_rec.export_summary_stats()) 46 | 47 | output_sum = os.path.join(self.config.output_dir, "results", fasta_basename + ".sum") 48 | with open(output_sum, 'w+', newline='') as f: 49 | f.write("{}\t{}\t{}\t{}\t{}\t{}\n".format('#seqid', 'family', 'nIS', 'bps', 'dnaLen', '%dna')) 50 | for rec in sum_recs: 51 | f.write("{}\t{}\t{}\t{}\t{}\t{:.2f}\n".format(*rec)) 52 | 53 | return len(csv_rows) 54 | --------------------------------------------------------------------------------