├── .DS_Store ├── .gitignore ├── .travis.yml ├── KEGGDecoder ├── .DS_Store ├── .gitignore ├── Amphibactin │ ├── ABO2092homolog_ref.faa │ ├── ABO2093homolog_ref.faa │ ├── amphibactin.sfam.hmm │ ├── amphibactin2092_sfam_match.tblout │ └── amphibactin2093_sfam_match.tblout ├── DMSPLyase │ ├── .DS_Store │ ├── 14591.hmm │ ├── 25993.hmm │ ├── 274874.hmm │ ├── 94923.hmm │ ├── Pelagibacter_ref_dddK.faa │ └── dmdA_37326.hmm ├── DMSPSynthase │ ├── 4254.hmm │ ├── curson_dmsp_synthase.faa │ ├── img_dmsp_synthase_dsyB.faa │ └── ncbi_dmsp_synthase_dsyB.faa ├── Decode_and_Expand.py ├── DesferrioxamineBiosynthesis │ ├── Streptomycetes_ref_pathway.faa │ ├── ferrioxamine.sfam.hmm │ └── ferrioxamine_sfam_match.tblout ├── HMM_Models │ ├── .DS_Store │ ├── 14591.hmm │ ├── 25993.hmm │ ├── 274874.hmm │ ├── 4254.hmm │ ├── 94923.hmm │ ├── Ammonium_transp.hmm │ ├── AnfG_VnfG.hmm │ ├── Bac_rhodopsin.hmm │ ├── CbtB.hmm │ ├── Copper_HMA.hmm │ ├── DMSP_lyase.hmm │ ├── DsrD.hmm │ ├── Oxidored_nitro.hmm │ ├── PreviousExpanderDB_versions │ │ ├── Aminotran_5.hmm │ │ ├── PPC.hmm │ │ ├── PepSY.hmm │ │ ├── Peptidase_A8.hmm │ │ ├── Peptidase_C11.hmm │ │ ├── Peptidase_C25.hmm │ │ ├── Peptidase_M1.hmm │ │ ├── Peptidase_M14.hmm │ │ ├── Peptidase_M20.hmm │ │ ├── Peptidase_M24.hmm │ │ ├── Peptidase_M28.hmm │ │ ├── Peptidase_M29.hmm │ │ ├── Peptidase_M3.hmm │ │ ├── Peptidase_M50.hmm │ │ ├── Peptidase_M55.hmm │ │ ├── Peptidase_M61.hmm │ │ ├── Peptidase_S24.hmm │ │ ├── Peptidase_S26.hmm │ │ ├── expander_dbv0.1.hmm │ │ ├── expander_dbv0.2.hmm │ │ ├── expander_dbv0.3.hmm │ │ ├── expander_dbv0.4.hmm │ │ ├── expander_dbv0.5.hmm │ │ └── expander_dbv0.6.hmm │ ├── ZupT.hmm │ ├── amphibactin.sfam.hmm │ ├── anfD_nitrogenase.hmm │ ├── anfG_nitrogenase.hmm │ ├── anfK_nitrogenase.hmm │ ├── blh_betacarotenemonox.hmm │ ├── expander_dbv0.7.hmm │ ├── ferrioxamine.sfam.hmm │ ├── methylphosphonate-synthase.sfam.hmm │ ├── vnfD_nitrogenase.hmm │ ├── vnfG_nitrogenase.hmm │ └── vnfK_nitrogenase.hmm ├── KEGG_clustering.py ├── KEGG_clustering.pyc ├── KEGG_decoder.py ├── KEGG_expander.py ├── KOALA_definitions.txt ├── MakeTanglegram.py ├── MethylphosphonateSynthase │ ├── methylphosphonate-synthase.sfam.hmm │ ├── methylphosphonate-synthase_ref.faa │ └── methylphosphonate-synthase_sfam_match.tblout ├── Pfam_definitions.txt ├── Plotly_viz.py ├── PreviousKEGGDecoderVersions │ ├── Decode_and_Expand-V0.0.py │ ├── KEGG-decoderV0.3.py │ ├── KEGG-decoderV0.4.py │ ├── KEGG-decoderV0.5.py │ ├── KEGG-expanderV0.3.py │ ├── KEGG-expanderV0.4.1.py │ └── KOALA_definitionsV0.5.txt ├── README.md ├── __init__.py └── images │ ├── interactive.png │ ├── static.png │ └── tanglegram.png ├── LICENSE ├── README.md ├── pyproject.toml ├── tests ├── NORP_subset.txt ├── PET-function-test-ghostkoalaoutput.txt └── test_decoder.py └── tox.ini /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bjtully/BioData/cb1d45a957eda783412a48911c8592252915d9cd/.DS_Store -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .tox/ 2 | **/__pycache__/ 3 | .coverage 4 | dist/ 5 | .idea/* 6 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | cache: 3 | directories: 4 | - "$HOME/.cache/pip" 5 | - ".tox" 6 | 7 | python: 8 | - "3.6" 9 | - "3.5" 10 | 11 | # 3.7 needs Xenial image because of https://github.com/travis-ci/travis-ci/issues/9069 12 | matrix: 13 | include: 14 | - python: "3.7" 15 | dist: xenial 16 | 17 | install: 18 | - pip install --upgrade pip 19 | - pip install codecov 20 | - pip install tox-travis 21 | script: tox 22 | after_success: codecov 23 | -------------------------------------------------------------------------------- /KEGGDecoder/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bjtully/BioData/cb1d45a957eda783412a48911c8592252915d9cd/KEGGDecoder/.DS_Store -------------------------------------------------------------------------------- /KEGGDecoder/.gitignore: -------------------------------------------------------------------------------- 1 | .idea/* 2 | -------------------------------------------------------------------------------- /KEGGDecoder/Amphibactin/ABO2092homolog_ref.faa: -------------------------------------------------------------------------------- 1 | >WP_011589370.1 non-ribosomal peptide synthetase [Alcanivorax borkumensis] 2 | MSLDKAALARRFMALDETKQAVFLQKLGEKGIPFERLPIVAGNRPGRIPLAPAQQRLWTIHQLEPANTAY 3 | HLSAAFELNGPLDVPRLLRAMDAVVRRHESLRTRFVEEDGEGYQRIVADTLTVEQRDARALDDGDRQTLA 4 | DEHASRLFVLGSDSPLRVQLLQLSEQCWRLQLVMHHLVSDGWSMDVFFADLARAYLSDSPLPALAIQYAD 5 | YALWQKAWLDAGERDRQLSYWREQLGHDQQERAQPPLLIAHDRAPAQTALRSAASVQWSVPAHLQGPLQA 6 | LARDNDTTLFTVVLAAWQWALASVSGRRDIPVGVPVANRERAEVDALVGFFVNTLVLRGKPRAAFTVNQW 7 | VTQLHQTMLDAQAHQALPFDQLVAFLAPQREPGETPLFQVLFNYQRRDTKARNLSDNVTIVPLAQGVPHA 8 | LFDLALDAHEGEGGALLLTLTYAADRFHSETVERLKQAMNQVLTAFSQGQHTLGSISVAESDVSRLQEWG 9 | QGEGEWQATSFVSLFSQQAAERGDAIALVHGGTRVSFAELDARSNQLARYLMGQGVKADQVVGVSFERGV 10 | TMVEAFIAVMKAGGAFLPLDPGYPKDRLHYMLKDSGARLLLTSSALIGVLPEVATVAPVAVDRLSLNDFP 11 | ANALNNEPHPDQLAYVIYTSGSTGKPKGVSLTHAGLSMHVQTIGKRYGMTPDDVELQFASISFDGAVERW 12 | TVPLAFGSRVVIRDQQLWSAQQTCDALQKEGVTIACIPPSYMGPLLDWIEQEKPPLNVRSWTLGGEAFTR 13 | ETFERMQQVLKPQRILNGYGPTETVVTPMLWAAYEGDTLSSAYAPIGHAVGPRKLYVLDQDLNRVPVGVA 14 | GELYIGNEVGLARGYHQRPDLTAERFLPDPFGEPGERMYRTGDLVKYRDDGVMEYLGRVDQQVKIRGFRI 15 | ELGEIESQLLGHEQIREAVVVAQPSPTGDRLVGYIVMRNAAHSVPGEHEPQTILAALSESLPDYMVPSQL 16 | ITLEAMPLTPAGKVDRKALPVAQWHTASEGAPPQTDNEKVLADIWQSLLGRKGVSRDDNFFALGGDSILA 17 | LQVVSRARQQGLALTPKDLFEHPRLHDLAACAVNAEASIISQQPLQGELGLLPIQQHFFSLRPPSPSHWN 18 | QHLWLALDAPMDRQALNVTLQALYRQHDALRLQFTYEANGWRQTCLSADRPCDELLWWCQAADEAAVETL 19 | CEQTQQSLNIETGELFRVLYVQTPQQPDRMLLVIHHLAVDGVSWRILLDDLLNAYGQALTGAAITLPERS 20 | HNLSDWHQALEQWLQSKGEARLPFWQGQQPEKIDSALPTFAEKESVALSLGKDDTRRLLQDAPVKLGVSV 21 | PALLLTALTRTLKPSAMGASLAVRSAPEVGSESPSQDNQLVINLEGHGRETRLAPGLDLSRTLGWFTSLF 22 | PLRLPFDGDPRRNLFAVSQQLANTEADGGVGYGALRYLADSPLNDSTDAPVTFNYLGQYHNDALIQGFTP 23 | LSGGGKAQAEDNPMAAPLAINGQVVAGKLGLVWDFARSHYPRQQVQHLCSRYRQELLVLLDLADSPQEAR 24 | ADPALVTVLNSSADTPVLWCPHPVTGRTTGYQALAASLEGQWQVRGLQSRSFLESGWFDPSLSEMVERYY 25 | RTVRQAQPEGPYYLLGWSMGGAMSMELAHRLEQAGEEVAFVGLLDTYVPGHEVPEDQWSSPQAQQTLRAH 26 | LGMLLTCAPATALDDCMARLRDSKPAQWPQNFTQWLATQSIEAAVADSAAQLLHAWAVEQHLRDLCWGYQ 27 | LPALHTPVHSWWASEPAGRAQALQRGLALCAELAHTETVSADHLTIVREPSLLQGLSVLLKCCKGGNALM 28 | DIPD 29 | >WP_013032793.1 non-ribosomal peptide synthetase [Nitrosococcus halophilus] 30 | MSLDKQALAQRFLALGDADQERFLAALREKSLRFDRLPIVKGKRSNRAPLAPAQRRLWLLHQMEPDNSAY 31 | HMVGAFELSGVVQSEALRGALADIQRRHEALRTRFVEIDGEPWQVIDEHPPLALTEQDMTDADSSTIEHL 32 | ADDHAQAGFDLSRGPLLRIQLLRLTSQRWRLQVVMHHIISDGWSIGIFFKELEQAWRTRQAGNTEGLPAL 33 | AVQYADYAIWQRAWLNAGERERQLAYWRDQLEPGQPPLLLPYDTPVADSDLRQAAAERLSLPVSLVTALR 34 | MLARECDATLFTVLLAAWQWALAVQSGRRDIPVGVPVANRNRPEIAPLIGFFVNTLVLRGQPDPAASVRD 35 | WVRQLHTRMLDAQDHQALPFDQLVEAIAPRRKPGETPLFQVLFNYQRRDAQAFEVLPGVTLRPLSQGVPH 36 | ALFDLALDADEQADGQVSLTLTYAADRFKADTISRLLSMLEQALSGFTQDAQRPLGSLELLTPEQRERLK 37 | LWSGGSEAYGRGVTLAGLISAQAARRPESEALVSGEERVSYGELESRSERLGRWLRSQGVSAETVVGVLL 38 | ERGVGMIESFLGILKAGGAFLPLDPDYPEERLGYMLRDSGVELLLSESGLAGRLPAVEGLRVVALDRLDY 39 | GAEESGDLAVPVHPEQLAYVIYTSGSTGQPKGVGVTQGGLSMHVQSIGERYGMGPEDVELHFASISFDGA 40 | VERWAVPLAFGSRLVIRDQGLWSAERTCQVLEEEGVTIACFPPSYVGPLLDWIEHRRPQLKVRSWTLGGE 41 | AFTRELYERLQRVLKPRRVLNGYGPTETVVTPLLWEADEGTAMSSAYAPIGTAVGARRLYVLDGELNRVP 42 | PGVSGELYIGGEVGLARGYWGRAGQTAERFLPDRWGAPGERMYRTGDWVRWRADGVVEYLGRVDGQVKLR 43 | GFRIELGEIETRLLALAQVREAVVVMRRGPGGERLVGYVAAPPEVEGERLRAALAGQLPEYMVPSQVVRL 44 | EALPLTPAGKVDRQGLPEPRWSAEGYEPPQTEAEAVLAQVWGQLLGVERVGRQDRFFELGGDSIIALQVV 45 | SRARQAGWSLRPRDLFEQPTLSALAAVVQPAKASNAVQESLVGPLPLTPIQSHFFELQPPVPSHWNQHLW 46 | LALKVPLDVTALSAALQALMRHHDALRLHFRQQGGQWQQSFVDPAELNSELLWVRAAEGEEEVERWCNEV 47 | QRSLDIETGELLRALYVSSPSQPDRLLLCIHHLAVDGVSWRILLEDLLRAYQQYLAGEAIELPAKTHSLR 48 | DWSQALSAWAKTAQAESRLPFWQQQFTETTSCRTPFAKCGHWTMMLSKAETQKLLYQAPAQLDVNVPALL 49 | VTALVQVLADERQPRIAVNLEGHGRTEEVTPHLDVSRTVGWFTSFYPVALEWQPDLQRCMAGVAEQLRRA 50 | DQDGGISYGALRYQGHESSRRQLAGAAAAPVTFNYLGQYRSDGLEHWFSPLPGGGDPQAPENPMAAPLSV 51 | NAQVVAGQLSLDWLYSQDHYNPLQIKQLAQCYKQVLLTLLETPVSNVPAAADPRLLVPLAEASLEQPPVF 52 | CIHPVTGRVNGYQPLAMGLQGHRTLIGLQSRSFVDTLWFDPSLSQMADRYVATLRYRQPVGPYYLLGWSL 53 | GGALAMEIACRLEQAGEGIAFLGLLDSYVPGFEIPEDQWTSPQAQQKLADHLRILLPGVAEEGLQHCLAQ 54 | FSESQPRQWPEIFSAWLSTQGVDAVSAENAQQLLFSWALEQHYRRLCDGYTLPSVSTKAHAWWAGKPEGR 55 | GQQLQQALQEKVRLTFSRIVAADHLSIVRDAECLSDLHESLRI 56 | >WP_016784469.1 non-ribosomal peptide synthetase [Vibrio cyclitrophicus] 57 | MSDLNELNQPTSAVQAEATAKSNVVSKKNDTEKQSANSAGKIDKHALAKRFLQLGKPEQVKFIELLSNKG 58 | LDFEKLPIVSVGSEQKVGLSPAQQRLWDIYCLDKGNSAYHMSGTFDVKGKLDTTQLGNLVNKVLDRHEVL 59 | RTRFVHESDSAVVQFVDRSAVMFPVIVDAQGWSQQQIQDTQRDFIGNTFDLETELPIRMLCLKLSNGSWK 60 | LQLVMHHIVSDGWSIGVFVSELVSLYQQGDLPKLPIQYRDYSQWQTALLDAGKGAHHIDYWKRELKDSNS 61 | ELLFPWCCEIAPNQRRAADSISYELSKTQSNKINQVARQLSVTNSSLWLGVWQAALATVTHRSDISVGVP 62 | MANRTRHEVSNLIGFFVNTMVIQQTISPKCSIAKAIAGAHDKVLEVQEHQLLPFDQLVTALSDEHKLIER 63 | KAGQTPLFQVLFNHQNTLSGDIHLDNELILSAQDQKGEFALFDVALDVRESQSKTRVVLTYSKDRIDEDK 64 | MAELNAVLENLIRDLDVNLGCSLAQMKHLSPHDEAVLKNLSQPEGNWHFQSIIELLENQARVRPDAIALK 65 | HQDQQLTFAGLAHSSSDLASKLKAQGVRRDQAVGVLFERGVQMIVAMVAVMKAGGAFLPLDPDYPTERLA 66 | YMIQDSQARFVVTQHRLGSRWHSIQSTFKASLSQPNPLFYGVGESVDECELRSSNGELLLVRSKKQLREE 67 | EQTTAAKLHPEQLAYIIYTSGSTGKPKGVAVNHVGLSMHVQTIGHQYGMTPEDIELHFASISFDGAVERW 68 | AVPLAFGSKLVIRDQELWSAEQTCDVLSKEKVTIACFPPSYVGPLLDWIDYQKPPLSVRSWTLGGEAFTA 69 | ETYQRLQKVVNPPRIINGYGPTETVVTPMIWRAYPQDKLTSAYAPIGQPVGERRLYVLDAQLNKVAFGKV 70 | GELYIGAEVGLARGYLDQPDLTSERFLPDPFREHVGERMYRTGDLVRWNDDGVMEYIGRVDQQIKIRGFR 71 | VELGEIESRLQSISKVEHCVVALQEVGAQKQLFGYLQSSEPERFDLDDILDQLSTVLPDYMVPSQLLVLE 72 | KLPLTPAGKVDRASLPFLGSKFESEATFVTPETEQEALLVDTWQQLLGAEQVSCSDNFFALGGDSILCLQ 73 | MVSKVRVTGYNLTPQQVFEAKTLRDLASVLSIHKTAKERELSDLPFELMPIQAHFFAQEFPEPNHWNQHI 74 | CVELKQDMEIGYLESAIQALVKQHPSLRLSFSQESGKWKQQFQLYQKRQYLWVSNVDSEQAFSAFTQEIQ 75 | QSMDISSGRLIQAGIAKFSDAPNRLMIAIHHLAVDGVSWRILLDDLWKAYQQLVAGQDIDLMPNSSSLDE 76 | AVNAVERWSSSEKGKLMREAWHFLSEITDDLGSTELGLKLPLYRDKQSQKTELSNDLTQALLRISGSELG 77 | ANIQSALITALVSALASEEHPNVPIYLEGHGREPSVFANTDLSRMVGWTTSLYPIIGQYHPDIISMVDKT 78 | HDALERIKEDGGIGYGLRYLEQREQGTLEEQSMVATFNYLGQYSSHGFAQWCSPIEEGSSPQSELNQMLT 79 | PLVVNAQIVSGKLSLAWEYATTHYSHQKIEEIATRYVECLTEFVSVMRIVKEKPTVKPDKRLIEKLNTTS 80 | NEHEPVFCIHPVTGRVTGYQRLAQSLEGKRSVYGIKSKSFVLDNAFDTSLTDMADTYFQTIKQLQPRGPY 81 | RLVGWSLGGALTQEIASRLEKNGDEIRFAGLLDCYVPGTEIASDQWDSPSSKAKLLEHISLLLGTVTVQQ 82 | GARCIDLLDSVSPPMWPEVFNTWLAENHFDPHMADNAKQMLYSWSVEQHMRSLCRGYQLPSIKTKLHCWW 83 | AGQPNGRSDLLSKGLNKLNAQRYSVTVDTDHLGIVQNNTVIKGLYRHLYCE 84 | >WP_016789556.1 non-ribosomal peptide synthetase [Vibrio cyclitrophicus] 85 | MSDLNELNQPTSAVQAEATANVVSKKNDTEKQSANSAGKIDKHALAKRFLQLGKPEQVKFIELLSNKGLD 86 | FEKLPIVSVGAEQKVGLSPAQQRLWDIYCLDKGNSAYHMSGTFDVKGKLDTTQLGNLVNKVLDRHEVLRT 87 | RFVHESDSAVVQFVDRTAVMFPVIVDAQGWSQQQIQDTQRDFIGNAFDLETELPIRMLCLKLNNGSWKLQ 88 | LVMHHIVSDGWSIGVFVSELVSLYQQGDLPKLPIQYRDYSQWQTALLDAGKGAHHIDYWKRELKDSNSEL 89 | LFPWCCEIAPNQRRAADSISYELSKTQSNKINQVARQLSVTNSSLWLGVWQAALATVTHRSDISVGVPMA 90 | NRTRHEVSNLIGFFVNTMVIQQTISPKCSIAKAIAGAHDKVLEAQEHQLLPFDQLVIALSDEHKLIERKA 91 | GQTPLFQVLFNHQNTLSGDIHLDNELILSAQDQKGEFALFDVALDVRESQSKTRVVLTYSKDRIDEDKMA 92 | ELNAVLENLIRDLDVNLGCSLAQMKHLSPHDEAVLENLSQPEGNWHFQSIIELLENQARVRPDAIALKHQ 93 | DQQLTFAGLAHSSSDLASKLKAQGVRRDQAVGVLFERGVQMIVAMVAVMKAGGAFLPLDPDYPTERLAYM 94 | IQDSQARFVVTQHRLGSRWHSIQSTFKASLSQPNPLFYGVGESVDDCELRSSNGELLLVRSKKQQREEEQ 95 | TTAAKLHPEQLAYIIYTSGSTGKPKGVAVNHVGLSMHVQTIGHQYGMTPEDIELHFASISFDGAVERWAV 96 | PLAFGSKLVIRDQELWSAEQTCDVLSKEKVTIACFPPSYVGPLLDWIDYQKPPLSVRSWTLGGEAFTAET 97 | YQRLQKVVNPPRIINGYGPTETVVTPMIWRAYPQDKLTSAYAPIGQPVGERRLYVLDAQLNKVAFGKVGE 98 | LYIGAEVGLARGYLDQPDLTSERFLPDPFREHVGERMYRTGDLVRWNDDGVMEYIGRVDQQIKIRGFRVE 99 | LGEIESRLQSISKVEHCVVALQEVGAQKQLFGYLQSSEPERFDLDDILDQLSTVLPDYMVPSQLLVLEKL 100 | PLTPAGKVDRASLPFLGSKFESEATFVTPETEQEALLVDTWQQLLGAEQVSCSDNFFALGGDSILCLQMV 101 | SKVRVTGYNLTPQQVFEAKTLRDLASVLSIHKTAKERELSDLPFELMPIQAHFFAQEFPEPNHWNQHICV 102 | ELKQDMEIGYLESAIQALVKQHPSLRLSFSQESGKWKQQFQLYQKRQYLWVSNVDSEQAFSTFTQEIQQS 103 | MDISSGRLIQAGIAKFSDAPNRLMIAIHHLAVDGVSWRILLDDLWKAYQQLVAGQDIDLMPNSSSLDEAV 104 | NAVERWSSSEKGKSMQEAWHFVSEITDNLGSTELGLKLPLYRDKQSQKTELSNDLTQALLRISGSELGAN 105 | IQSALITALVSALASEEHPNVPIYLEGHGREPSVFANTDLSRMVGWTTSLYPIIGQYHPDIISMVDKTHD 106 | ALERIKEDGGIGYGLRYLEQREQGTLEEQSMVATFNYLGQYSSHGFAQWCSPIEEGSSPQSELNQMLTPL 107 | VVNAQIVSGKLSLAWEYATTHYSHQKIEEIATRYVECLTEFVSVMRIVKEKPTVKPDKRLIEKLNTTSNE 108 | HEPVFCIHPVTGRVTGYQRLAQSLEGKRSVYGIKSKSFVLDNAFDTSLTDMADTYFQTIKQLQPRGPYRL 109 | VGWSLGGALTQEIASRLEKNGDEIRFAGLLDCYVPGTEIASDQWDSPSSKAKLLEHISLLLGTVTVQQGA 110 | RCIDLLDSVSPPMWPEVFNTWLAENHFDPHMADNAKQMLYSWSVEQHMRSLCRGYQLPSIKTKLHCWWAG 111 | QPNGRSDLLSKGLNKLNAQRYSVTVDTDHLGIVQNNTVIKDLYRHLYCE 112 | >WP_017074986.1 non-ribosomal peptide synthetase [Vibrio splendidus] 113 | MNHLKTTFDSKTVNQSNDKNQPKTASYSKAKIDKQALAKRFLQLGKPEQIKFIELLSSKGLDFEKLPIVS 114 | VGHEQQVGLSPAQQRLWDIYCLDKGNSAYHMSGTFYVKGHLDIEPLSRIVNQVLQRHDVLRTRFITNANE 115 | SVVQFVDTAAVMCPLTVDAQSWSQDQIQESQREFIAKAFELEIDLPIRMQCLQLSESSWKLQLVMHHIVS 116 | DGWSIGVFLNELITLYQQGHLPELPIQYRDYSHWQSALLEAGKGQQHIDFWKNELGEADSHQLFPWYSEI 117 | APNQRRIAESISYELSESQSDKLKQIARQLSVTTSSLWLGVWQAALAKVTHRTDISVGVPMANRTRHEVS 118 | NLIGFFVNTMVIQQSISPSCSLSQAIEGAHNKVLEAQEHQLLPFDQLVQTLANEQRNIQQKSVEPTNSEP 119 | KGFDRKAGQTPLFQVLFNHQHAPADDIQLDNQLTLSAEQQNGEFALFDIALDVRESERKTRVVLTYSKDR 120 | IDQDNMVELNAVLEQLVSELESNIDRSLAQISHLTTHDASVLERLSQPEGDWTFHSIIELIAKQVKEQPD 121 | AIALKHQDIELSFKQLDLASSLLASKLIASNLVDSNNEDEGVYRDQVIGVLFERGVEMIVAMVAVMKAGG 122 | AFLPLDPDYPTERLTYMIQDSKAKFVITQQGLESRWQNIQSEFDVSSSQASSFVFGSQDLKHSLNDLDEK 123 | AARDQEDKQVQLETTILPEQLAYIIYTSGSTGKPKGVAINHVGLSMHVQTIGKQYGMTPEDVELHFASIS 124 | FDGAVERWTVPLAFGSKLVIRDQELWSAEKTCEVLCDDKITIACFPPSYVGPLLDWIEYQKPLLNVRSWT 125 | LGGEAFTAETYQRLQKVVNPPRIINGYGPTETVVTPMIWRAYPKDNLNSAYAPIGQPVGERRLYVLDAQL 126 | NKVAFGHIGELYIGGEVGLARGYLEQPDLTSERFIPDPFNTDDTTSRDSSHERAGQRMYRTGDLVRWNHG 127 | GVMEYIGRVDQQVKIRGFRVELGEIESRLQALSKVEHCVVAVKEVGQQKQLFGYLQSSEPDRFDLDDILE 128 | ALSRELPDYMVPNQLMVMDRLPLTPAGKIDRTSLPDINMETPSVVALVPPKTEQEVLLVKLWQQLLSIED 129 | VSCTDNFFALGGDSILCLQMVSKVRVAGYNLTPQQVFEAKSLSDLAGVLEIHKAKKERDLTTESFGLMPI 130 | QAHFLAQQFSQPDHWNQHICVELKQDMNTSYLESAIQALVKQHPSLRLAFNQHQGRWSQQFQDFQVRDYL 131 | WVSKVESEPAFSAFAQEIHQSMDIGMGRLIQAGYAEFENAPNRLMIAIHHLAVDGVSWRILLDDLWKAYQ 132 | QQVAGQTIELMPNSSAMDEAVHSLEEWSSTEQGKLMQEAWHELNVTSNDSAQDKPPALHQDKQVFKAELS 133 | NELTQTLLRISTSALAVDIQSVLITALVSTLANKDQPDVHIYLEGHGRESSVFSNIDLSRMVGWTTSLYP 134 | MVGRFQKDILSVLSSTSEYLESVKKDGGIGYGLRYLSQDCIEQSKASVTFNYLGQYSSDDFAHWCSPIEE 135 | ESLPQSGLNSMLTPLVVNSQVVAGKLSLSWEFATTHYSLAEIEQFTSRFIDSLTALVAASQVNDKSATGQ 136 | LGRNRMTTRLPDQCISDTRLVEKLNTTSDEHEPLFCIHPVTGRVTGYQRLAQALEGDRSVFGIKSKSFVS 137 | DNIFDSSFADMADAYYQTIKQIQPCGPYRLVGWSLGGALAQEIASRLEANGDEIAFIGLLDCYVPGTEIT 138 | EDQWDSPTSKSKLLEHLSLLLGPISEQQGQRCLRLLDSVPPPMWPEAFNTWLAENQFDRYMADNAKQMLY 139 | SWSVEQHMRALCHEYQLPSIKTQLHCWWAGLPSGRSELLSKGLNKHNKLQYSVVVDTDHLGVVQNNKVIK 140 | GLYRHLYCE -------------------------------------------------------------------------------- /KEGGDecoder/Amphibactin/ABO2093homolog_ref.faa: -------------------------------------------------------------------------------- 1 | >WP_011589371.1 non-ribosomal peptide synthetase [Alcanivorax borkumensis] 2 | MTWSANENLNPGPDWAPLSCSQQRLWLFTRLSAQSTAYNLGAMLWLEGELNPEALEQTINYVIARQDIVR 3 | VQFAERDGQGWQRIAPHEDRVLQCEDVSNRADPVAAAYALGHEFNATPFDLETDSLMRYRLVKLNEQRHV 4 | LLISLHHIVGDAWSLGVFMQEFLYAYSALREGLEPKLPSLETRYLDWVKTEVQWLASENAKSQLEYWVET 5 | LEHEGEPLALPRLSPQSGVSNAARYRDFSLSIEQTQRLKKFSRTQGVSRFTVLLSVLQYLLFRISGQSLV 6 | RVGVPSANRNSSNQLLMGFFVNNLVVQGEVLPAQSVSDWIAHIHHALSNAKKHRDIPFEKIVDELSDSRR 7 | AGSHPLFQVAFNYRQQGKGLSLNLGNLLVRVEDLPVTETPFDLVLDAWPDEHGGLTLRLVYGEGIFDDAF 8 | AERMVAGFEQVLDQWLQAPALPLTESAALVPGDVALLQEWGQGEGEWQATNFVSLFSQQAAKRGDAIALV 9 | HGGTRVSFAELEARSNQLARYLMGQGVKADQVVGVSFERGVTMVEAFIAVMKAGGAFLPLDPGYPADRLR 10 | YMLEDSGASLLLTSSDLVDTLPSVEAVNPIAVDELSLDDFAYGILNEEPHPDQLAYVIYTSGSTGKPKGV 11 | SLTHAGLSMHVQTIGERYGMTPDDVELQFASISFDGAVERWTVPLAFGSRVVIRDQALWSAEKCCEVLQE 12 | EGVTIACFPPSYVGPLLDWIEQEKPPLKVRSWTLGGEAFTRETFERMQQVLKPQRILNGYGPTETVVTPM 13 | LWAAYEGDTLSSAYAPIGHAVGPRKLYVLDQDLNRVPVGVAGELYIGNEVGLARGYHQRPDLTAERFLPD 14 | PFGEPGERMYRTGDLVKYRDDGVMEYLGRVDQQVKIRGFRIELGEIESQLLGHEQIREAAVVAQPSPTGD 15 | RLVGYIVMRNAAHSVPREHDPQTILAALSESLPDYMVPSQLITLEAMPLTPAGKVDRKALPVAQWHTASE 16 | GAPPQTDNEKMLADIWQSLLGREGVSRDDNFFALGGDSILALQVVSRARQQGLALTPKDLFEHPVLSQQA 17 | AVADPVNVTMASQEPLSGPVSLLPIQQRFIQQRGLSACNQYLRFSVQKHFDAQVLEQALQRLVAQHDALR 18 | VQFDNQQVGTAYCLPVNDAPLLQQIDSDDSAAIEAQMIAVQRSLNPAKGTLLGALYVTGASPQLLLSIHH 19 | LAVDGVSWRILLEDFFTAYQQIEVSGKASLPRKTHTLRDWQEILDQHFLPQAEQALPYWQAVCEPMPPLF 20 | STVQGGVASRIDRQTDAQTLSRWQHSADRYASLNLEEFLLIALAQTLADFSGRNAVRIHRESHGRAAGDA 21 | AVDLSRTVGWLTSLYPQRLDRAEDLTATIKQQKEQLRQPANGGLAYGLLVQHGKLEDGDHRLDVLFNYLG 22 | QFRHDDMPGVSLMDAGLWQETDALADAPLVINADQQNGALRIQVNVDGESLSQGQGDRLVSQWLEQCERL 23 | ARHCATQAPVLTPADMPLASLTQMQLDALHDRPQQILPLSPLQSGLLFHSQLSGNNSTYVNQLVLPLTGV 24 | QPARMCQAWQTLLERHGVLRTSLLPAAQADDRHQAVWSTQQVKLPWCELDLRGEGDSASALDKWCKQRRE 25 | NGFDLQAAPLWKVDLLRTSEDRFECVLTLHHILMDGWSTGLLLTELMALYHGKTLAAMTKSYADYLAWLQ 26 | QQDRAVTRAFWQQYLSPVQLPTRLVDAVGTGEKGAFRRHPIEFDAATSDGLRQAARDKGLTINTLVQAAW 27 | ARVLGRFTGQQRVVFGNTVAGRPAELAGSDSMLGLFINTLPMTVPLHGEQPVADWLMQLQADNAALREQG 28 | HAPLFEVQQDAGWGGEGLFDTLLVFENYPLDESLLGGGQSELQLGTPTSHEFTHYPLTVAVLPGDHLQML 29 | FAHDSQALPVPLVERMASAFKRTLLALSQSEGMTLAELNALGDDAAPLQQWSQGGGEWVADSFVSLFSQQ 30 | AAKRGDAIALVHGGTRVSFAELDARSNQLARYLMGQGVKADQVVGVSFERGVTMVEAFIAVMKAGGAFLP 31 | LDPGYPKDRLHYMLKDSGARLLLTSSALIGVLPEVATVAPVAVDRLSLNDFPANALNNEPHPDQLAYVIY 32 | TSGSTGKPKGVSLTHAGLSMHVQTIGKRYGMTPDDVELQFASISFDGAVERWTVPLAFGSRVVIRDQQLW 33 | SAQQTCDALQKEGVTIACIPPSYMGPLLDWIEQEKPPLNVRSWTLGGEAFTRETFERMQQVLKPQRILNG 34 | YGPTETVVTPMLWAAYEGDTLSSAYAPIGHAVGPRKLYVLDQDLNRVPVGVAGELYIGNEVGLARGYHQR 35 | PDLTAERFLPDPFGEPGERMYRTGDLVKYRDDGVMEYLGRVDQQVKIRGFRIELGEIESRLLAHPQVREA 36 | VVLAQPSPGGDRLVGYLVPRGPLSTDALIATLAESLPDYMVPSHLLTLEAMPLTPAGKVNRKALPLPQWQ 37 | ASRAGDAPQAGTETLLAEIWQSLLGQEKISRDDHFFNLGGHSLLAVQMVNRLRHQHQLDLPLNRIFEQPL 38 | LRQCALLCQPANAMPDIQPVPRHGDLPCSAAQRRLWFVQQLEPKNGAYHMPLGLEVRGELHQAALQQAVD 39 | TLVANHESLRTRFVDVGGEPRQRILDDAAVIVQWQDLSDEASSEDVHRCQHEVLTKPFDLAADALLRVQV 40 | VKLSAQRYQLMLVQHHIIGDGISMQLLLAELSKLYRLAHQGARLVPSPAAIQYADYAAWQQQWLNSDEAR 41 | AQTRWWVKQLGDGGEPLALPTDFPRGALPENENGGARLPVSISDRQLTQLKQRASELGSTVSTLLLTVWQ 42 | TLLHRYSGQAQVRVGVPVAGRLQSQTENLQGCFINTLVVPATYQQPQSFAQHVAATQRFMGKAQGRQSLP 43 | FEVLVDALGVDRNLDRHPLFQVVFNHQRLSQTFAPQWPEAVITPFDPGAAGAQFELALDILEDDEQLQGF 44 | IGYATALFKPQTIARLRDHFFILLEALLNDPQQLLADTALLTAEEISDQQHFNHTEKDWGDFTALPKRLS 45 | QQAGRTPDAVALSMGEQQLSYAQLDRKVNQLANRLRRAGVKEEVRVAIGLPRSLELVIGILAITRAGGAY 46 | VPLDPSYPQDRLSYILEASSPALLLTHSSLLGGWPKAVPMWCLDELDVSDQPTTPPPVQWHPDQAVYVIY 47 | TSGSTGKPKGVLNTQAALENRLLWMQNEYPLQAADCVLQKTPFSFDVSVWEFFWPLMVGARLAVAPPQAH 48 | GDPQWLQQVMADEGVTTLHFVPSMLKAFVDASGLQNVPRLKRLICSGEALDMELQKAVFTSRDDVELHNL 49 | YGPTEAAIDVSFWQCQPEGGHTVPIGAPISNIQLYVLDTDLNPVPRGVPGELYLAGIGLARGYFGRSDLT 50 | AERFVPNPYGDAGSRMYRTGDQVRQRGDGIIEYLGRLDHQVKIRGLRIELGEIEQQLKQLPDVNDAVVVA 51 | DHSDTGDQLVAYVSADSDNREVWQQALADALPEYMVPALFMVLEALPLSPNGKLDRKALPAPQWQAREYR 52 | APQTETEQQLASLWEELLGQPRVGLDDNFFVLGGHSLLATRVVAALRDCWGVDVPLRALFEADTLQALAA 53 | RVDENNGEAKQQEQEDLSAMADLLDDLEDL 54 | >WP_013032792.1 non-ribosomal peptide synthetase [Nitrosococcus halophilus] 55 | MSQSPSLVATDNHHGWYPLSAAQRQLWFFAQLEPESCAYNLGGWLWLEGELDRAALTQALNGLVERHEML 56 | RACFRDYQGEPLQTIMPMMPFTLGYEDLSTLSEPEAVAKERGRELVLQPYDLNEGEVFRYRLYYLGPKRH 57 | MLALGFHHIVFDAWSFGVFMGELMSRYESGITHKPLKLPRLRQHYVDYSLRQQQWLQGEQAADQLAYWQT 58 | RLGQHHGPLNLPMQRPSGGVQRAEHYPIQIAGDLTKRLDKLSRQVGASRFTVLLAVLKLLLARLSGQEEV 59 | RVGVPSANRSADTAKMVGFLVNNWVVAGRPLPSLSVRQWIEQVKNHLTEARHNGRLPFETLVEALAPERY 60 | PGLHPLFQVAFNYRQQGGQQTWSTGGLDARFEEMTAVETPFSLVLDVAPALDDGLTLRFIAGQGTFSDRF 61 | MEQLTEGYLRLLEQCTEQPNATLAMLDMLTASSRQQLKLWSGGSEAYGRGVTLAGLISAQAARRPESEAL 62 | VSGEERVSYGELESRSERLGRWLRSQGVSAETVVGVLLERGVGMIESFLGILKAGGAFLPLDPDYPEERL 63 | GYMLRDSGVELLLSESGLAGRLPAVEGLRVVALDRLDYGAEESGDLAVPVHPEQLAYVIYTSGSTGQPKG 64 | VGVTQGGLSMHVQSIGERYGMGPEDVELHFASISFDGAVERWAVPLAFGSRLVIRDQGLWSAERTCQVLE 65 | EEGVTIACFPPSYVGPLLDWIEHRRPQLKVRSWTLGGEAFTRELYERLQRVLKPRRVLNGYGPTETVVTP 66 | LLWEADEGTAMSSAYAPIGTAVGARRLYVLDGELNRVPPGVSGELYIGGEVGLARGYWGRAGQTAERFLP 67 | DRWGAPGERMYRTGDWVRWRADGVVEYLGRVDGQVKLRGFRIELGEIETRLLALAQVREAVVVMRRGPGG 68 | ERLVGYVAAPPEVEGERLRAALAGQLPEYMVPSQVVRLEALPLTPAGKVDRQGLPEPRWSAEGYEPPQTE 69 | AEAVLAQVWGQLLGVERVGRQDRFFELGGDSIIALQVVSRARQAGWSLRPRDLFEQPTLSALAAVAETTE 70 | TTMIEQTPLVGEVEMTPIQARFLEREGVAICNQYFWFQLDAPLNPEDLRAALQALCAHHDVLRSRFYRHK 71 | GQWHQIFQAPEKVKSELLWVREAQTKAEIQRFAASAQQSLDIESGELLRALYVSSPGQPDRLLLCIHHLA 72 | VDGVSWRILLEDLLLAYRQSAAGQPLHLPAKTHSLRDWTGALAAWAQDEAQAQQAFWQAMTADVPSLWNM 73 | NPQPTEAQTLRLVIPAEVTRRAMQVAQTHLRSNLDDLLVMTLARVLAQHSGQSAVRIYRESHGRDLSFSG 74 | LDLSRTVGWFTSLYPLLLRVPTAREEALKTLKEQLHAVQNQGLAFGALSHWSDWEPADHRIEVLFNYLGQ 75 | LQWDGQGMLSFLSAGLWRSPGSRRDAPLVINAHQQEGELIMALEFSPVHFEPSLQQALMAAFEAELKALM 76 | DCCEQHGPWLTPFDVPLSGLNQEALDQLSKLELENVLPLSPLQQGLLFHSELSQQTDTYVNQLSLPLSGL 77 | DSARFEQAWQRLVQRHSILRSALLAGEGKVEQPLLGVWRSITLPWSRQDLRGEEDAVQAMERVRAERRQQ 78 | GFNLYRPPLWHVDLLQTGDEDYHCVLTLHHLLMDGWSTGILLQELLQLYHGRALLPEPPPFSAFLQWLAD 79 | KDQMRARAFWQDYLSAIASPTWLASSVGQVSEAKDFRRHSVILDSKLQQHLQEQARQQGVTLSTLMQGAW 80 | ALILSRYTGQQQVVFGNTVAGRPPELPGSERMLGLFINTLPVAVKIPAVEPCGRWLLALQQAGLDTREHG 81 | HLPLFEIQQAAGWAGEGLFDTLMVFENYPLDDNLLNSKADGLTIGAPDSYEFTHYPLTLAVLPGTELQVV 82 | FAYNAAVLPPRVIAPLAEAFKQALMALATQPEAPLASIASITPEQRERLKLWSGGSEAYGRGVTLAGLIS 83 | AQAARRPESEALVSGEERVSYGELESRSERLGRWLRSQGVSAETVVGVLLERGVGMIESFLGILKAGGAF 84 | LPLDPDYPEERLGYMLRDSGVELLLSESGLAGRLPAVEGLRVVALDRLDYGAEESGDLAVPVHPEQLAYV 85 | IYTSGSTGQPKGVGVTQGGLSMHVQSIGERYGMGPEDVELHFASISFDGAVERWAVPLAFGSRLVIRDQG 86 | LWSAERTCQVLEEEGVTIACFPPSYVGPLLDWIEHRRPQLKVRSWTLGGEAFTRELYERLQRVLKPRRVL 87 | NGYGPTETVVTPLLWEADEGTAMSSAYAPIGTAVGARRLYVLDGELNRVPPGVSGELYIGGEVGLARGYW 88 | GRAGQTAERFLPDRWGAPGERMYRTGDWVRWRADGVVEYLGRVDGQVKLRGFRIELGEIETRLLALAQVR 89 | EAVVVMRRGPGGERLVGYVAAPPEVEGERLRAALAGQLPEYMVPSQVVRLEALPLTPAGKVDRQGLPEPR 90 | WSAEGYEPPQTEAEAVLAQVWGQLLGVERVGRQDRFFELGGHSLLAMQAVSLLRRDYQKQVPLQVLFDSP 91 | RLADCAARLIDAVDEDEIIVAAPRDQDLPTSSAQRRLWFVQQLNPEGGAYHLPLGLRLQGTLDLTALQAA 92 | LDHLVEQHEILRTRFVEHNGEPWQRILPEAPLKINYIDLREQPQPQDRAAELFQEWLRQPFDLARDPLLR 93 | LGVVRLDERCYQLLLVQHHIITDGRSTAHFLESLINAYRAIVAGELLSEAPPRLQYADYAIWQQRWLQGK 94 | KAQQQLDYWRQALGTDTEPLELPTDFPRSRTTPPQGARYHFRLTTVQAQGLQQLARAHETTLFTPLLSLW 95 | LLLLSRYSGRRDIRVGIPVAGRVRPETETLLGCFINTVVLGVQIDPNSSFAGLMEQVKGRSAEAQSHQEL 96 | PFEALVEALGVGHSLEYHPLFQVTFNHQQMVTEALAEWPHGSVTPFDPGAAGVQFDLAMDTELHSDGSIR 97 | GYLSYASQLFHKATIERLWGHYLNLLDAVLNDGSRPVANFPLLSQTEQQQLGRWNDSGVARDPFVPIPIR 98 | QSRQAMETPEAVALSFNGRQLSYGELERWVNQLAHRLQRAGVGPEIRVAISLHRSVELVVGILAITRAGG 99 | GYVPLDPSYPEERLRYILAAAEPALVLTQSTLALTGLEDPGCPCWSLDDLDCSDEPAYPPMVDWHPDQAL 100 | YVIYTSGSTGRPKGVVNTHAALENRLLWMQEQYSLGGDDCVLQKTPFSFDVSVWEFFWPFMVGARLAVAP 101 | PEAHRDPVALQRVIEAEQVTTLHFVPSMLQAFIAATHLVGCASLRKVICSGEALSMDLQQQVLQARPKLQ 102 | LHNLYGPTEAAIDVSYWQCQADNRHTVPIGMPISNIQLHVLDEQLNLVPIGIPGELYLAGVGLARGYFGR 103 | ADLTAERFLPNPFGAPGSRMYRTGDKVKRGVDGVLEYLGRLDHQVKIRGLRIELGEIESLLRQQPSVSDA 104 | LVIAQPYSIGDQLVAYVILDGVPAEDWREQLKAALSAQLPDYMVPALFMSLDGFPISPNGKLDRKALPAP 105 | EGQRKGYRAPQTPLQQQLAECWRCLLEQPRIGLDDNFFALGGHSLLAVRAVAQIREQLGLEISLRQFFQC 106 | ESLEALASQLEYESRAGEEEEQNELDAMAALLNELGEL 107 | >WP_017074987.1 non-ribosomal peptide synthetase [Vibrio splendidus] 108 | MSVDDEQILDSVSPWSPLSYSQQRLWLFQQLQPMSLAYNLGGLLWFEGKDITVEKLQHSLNEMVSAFPSL 109 | RSQFAEVEGKAVQRVMPFSPVEYDYIDMQGDSNTVEFINQDARQRWQQNFNLVESNLARCCIYQVTEHRF 110 | AVLLATHHIVTDAWSFQITIKMLVNSVAGKTYQMKEVQSYLDYAAEQASAEKSDFYEQQKSSWLQNHFIG 111 | EESLSLSNLNDQPQDTYGARHELVHLSNDINDSIKQLGSELGATKFEILASAMMLTLRQYSSNVHPSICV 112 | PALNRNAQNRRTVGFYVNSAVMGYRIDAEMTLSQLVNNTHASMKASLAYESTPLEALVGDLPLPTAALNF 113 | RNHGDKLSINTNGVSAEFEEFPVLETPFELVLDVINKGDAPLRFVYAKEKFTRPFITKFIESFKINLATI 114 | VQSPNAAVASVNALSSKDMRLIDQHGSGDYSWQYRPFTDLVTEQAMKCPNSIALKHQNESMSYLELETRS 115 | NQLAHSILSQGTAAKSPIGVMMERGVDMIVSMIAVLKAGSPFLPLDPDYPTERLSFMLEDSGAELLLTHS 116 | KSDDRCRDVLSGSDGVTQFCVENAELAHLPLDNSFNRPLAEELAYIIYTSGSTGKPKGVTISHEGLSMHV 117 | QTIGQRYGMSEQDVELHFASISFDGAIERWTVPLAFGSKLVIRDQELWTAEQTCHVLQQEGVTIACFPPS 118 | YVGPLLDWIEAARPSLALRSITLGGEAFTRETFDRIQDVLAPPRIINGYGPTETVITPMVWEAYADDTMN 119 | SAYAPIGTAVGDRKLYVLDSELSQVPFGCSGELYIGSEVGLAEGYLKQPNLTAERFLPDPFSSNGERMYR 120 | TGDLVQWRDDGVMEYLGRVDQQVKIRGFRVELGEIESQLQALSGAEFCAVVDHESPTGKKLVGYVQLSKV 121 | QQSKAPSNTSQLQSANDNEEAQWLELLGQTLPDYMVPACIVVQDKLPLTPASKVDRKELFAPDWTEVRSD 122 | QGALENNRQVMLAEIWCELLKVKSVGTNSQFFALGGDSIMALQLVGKLRQLGFMLSPKQVFDFPKLRDMA 123 | GSLEEAKLVVAEQGKRQGSVALLPIQQRYIEHFELNRCNQYIQFNWDYPVDVERLSRAVHHLVEHHDALR 124 | LRFSHAVSSSSPIEITANYQDDVEFAIHSFDGEININQVQTSIDLINGVIGAVGISSMDALDEETEHSEV 125 | LIAIHHLVVDALSWPVIIEDLVKLYVSEPTSEFSYEPREVSGPHRLTQKTHHQGNWANALNTLSISDEQQ 126 | AYWAEQTKEPLYSTLRGKPISTQWYTPLSKIEALVKAGFGFARLTQEQIVYLISALAVSTLNQGKALTIH 127 | RESHGRFTEATGLDLSRTVGWYTSLYPQAIPVLESLEEWVKSLKDSFETDHLGGVTFHAGVTQSLWSHVG 128 | DMDVLFNYLGNAAQQMNDAIEVTNTGLWRDESNTADAAIVINASVAKEALLWDIELDSACFSESEVEVLH 129 | AALSDSLEQLHEVFNDADPVLTRADAPLIDLNQIQLNELCKGAISPLSLPVTILPLSTLQQGLYFHAKLS 130 | DSDSTYVNQITLPLNNVNVPRMVNAWQGVMQRHQMLRSTLFSFDGSAYLAEWAELSLDYQFLDVRQRRQF 131 | DLDEYKQQRVERGFQLEQAVEHSKIKPLWCVDFIQTEEAQVQCVFTIHHILMDGWSTGVLLSDLFALYHD 132 | RSIAPVKAEFSDYLAWTQRQDKNQSNEYWQRYLQDMESPTRLAESYGNSDTSESNFQRYNDDFSQETISQ 133 | WLPRLNQVGVTLNTLTQAAWLLTLNRFTGQETPVFGNTVAGRPTELAHSDSMVGLFINTLPIAHYIDLSK 134 | PVSEWLLEIQTSSSEQREFSYSSLSDIQAQTGWSGENLFDTLVVFENYPLDEALLNDKNSNSSAFSIGEP 135 | ESYEFTHYPLTLAILPSESLRIVFAYDESKFTPQQIEALRATNRHYLNQLVDHLTEDLGNLSALADEQLS 136 | ELNAFERQPEPWTFSPFTDLVAQQTLLRPDSEALVSNVEVEDGQHRESLTYREVVEQSDAIAHRLIMEGV 137 | QRDDIVGVLFERDCNMLVAMMGVMKAGAAFLPLDPAYPQERLDFMVQDSGASVLIHDALSESLVNQIGNQ 138 | AKTISYAAFDLTKNLNDKPVILPDQLAYMIYTSGSTGKPKGVCVSQQGLSMHVQTIGQRYGMTASDVELH 139 | FASISFDGAIERWTVPLAFGSRLVIRDQSLWSAQQTCDVLAREQVTVACFPPSYVLPLLEWIEGCNPELS 140 | VRSWTLGGEAFTRDTYFKLQHVLKPKRIINGYGPTETVVTPMIWEAYSDTELESAYAPIGKAVGNRTLYV 141 | LDSALNRVPAGVAGELYIGEEVGLARGYFDRPDLTSERFIPDPFSNNGERMYRTGDLVKWRLDGVMEYLG 142 | RSDEQVKIRGFRVELGEIESRLQKSAGSEQCAVVAYDSPTGKQLVAYIQSDNTLSIEDILQDLSKDLPDY 143 | MVPSQVVQMGKIPLTPASKVDKKRLPVPNWQAVAIQDYIPPIGEMEQVLAQQWRVLFDKELIGRDDDFFA 144 | LGGQSLLATQLVGRLKQQDDIRLSLQAVFDTSVLKELATHCIKDEAELVTLAQTPRLPYMPASAVQKRLW 145 | FVQQLLPTSAAYHMPLGLKFTGNVNVAALERAINVLVERHEILRTNFSQVEGELMQSIHADREICLGLHE 146 | SLASDEQRLLAYKELIAKPFDFSEGPLIRFDWIVGTTERPAGVKQADLLIVAHHIVSDGISMQLLLKELA 147 | HCYQSMISEYAESGRVSASEPAATHDLQYVDYVNWQQHWLESKEASDQKAWWLDALKYDIDPLVLHSDTP 148 | REQTKTQGNRLHFELSTEQISSITKLAISHKTTPFNVMLTLWHLLMHKYSGQEQVRVGVPVAGRTQPETQ 149 | SMQGCFINSLVIPAHFNGEQSFSDLLHQVKSFTEQALDRQDFPFEMLVESLGITGNLQYHPVFQTSFNFQ 150 | RFNERELFDWQELDVELFDPGVVNAQLEIAMDIQQMSEDKWLGFVSYVSPIFTKGFAQSLLEHWMLVLKE 151 | VATNNDTQISQLHLMDQAAMEQVGNFNGTSLDWGDFVTPSQEIQHQAEKTPNAVALTMQGETLTYQEFDE 152 | RVNQLANWLREKGVNSETRVGLGLERSFDLVIGLHAITRAGGAYVPLDPGYPTDRLQYILESANIACLLT 153 | DSNSMHLWPESNGCEYVDLNKLDVSKQSSQPPKVNWHPEQALYVIFTSGSTGLPKGVVNTQAALQNRLNW 154 | MQQEYVLNQSACVLQKTPFSFDVSVWEFFWPLMTGARLAIAPPEAHRQPSVLSGVIQQEKVTTIHFVPSM 155 | LNAFSIETNISDCTSLKRIICSGEALPADLVEQVLSHTPVELHNLYGPTEAAIDVTYWPCELPVSKRTPI 156 | GYAISNTQLYVLDENWNPVPIGVPGELYLAGIGLAREYLARPDLTADRFIPNPFGEAGSRMYRTGDQVVQ 157 | LADGRLEYLGRLDNQVKIRGLRIELEEIENVMNQLEWVEESAVIAFKHQTGEQLVGYVVDSNWSENKQAL 158 | VKQHLSEHLPDYMVPPILIGLDEMPLSPNGKRDRKALPAPEWQSIEYRAPETELEIWFATHWAEVLAIPQ 159 | VGLDDNFFALGGHSLLATRVVAKAHQELGLEVALKDFFEAKSLQALTDSLQSHYQTQNQHEQDEFDAMAA 160 | LMDELELL 161 | >WP_017078079.1 non-ribosomal peptide synthetase [Vibrio splendidus] 162 | MSVDDEQILDSVTPWNPLSYSQQRLWLFQQLQPMSLAYNLGGLLWFEGKDITAEKIQHSLNEMVSAFPSL 163 | RSQFAEVEGKAVQRVMPFSSVEYDYIDMQGDSNTVEFIHQDARQRWQQTFNLVESNLARCCIYQVTEHRF 164 | AVLLATHHIVTDAWSFQITIKMLVNSVAGKAYKTKEVQSYLDYAAEQASAEKSDFYEQQKSSWLQNHFIG 165 | EESLSLSNLNEQPQDTYGARHELVHLSNDINDSIKHLGSELGATKFEVLASAMMLTLRQYSSNVHPSICV 166 | PALNRNAKNRRTVGFYVNSAVMGYRIDAEMSLSQLVDNTHASMKASLAFESTPLEALVGDLPLPTTALNF 167 | RNHGDKLSINTNGVSAEFEEFPVLETPFELVLDVINKGDAPLRFVYAKEKFTRPFITKFIESFKINLASI 168 | VQSPNAAVASVNALSSKDMRLIDQHGSGDHSWQYRPFTDLVTEQAFKCPNSIALKHQNESMSYLELETRS 169 | NQLAHSILSQGTAAKSPIGVMMERGVDMIVSMIAVLKAGSPFLPLDPDYPTERLSFMLEDSGAELLLTHS 170 | KSNDRCRDVLSGGNGVTQFCIESAKLASLSSDNSFNRPLAEELAYIIYTSGSTGKPKGVTISHEGLSMHV 171 | QTIGQRYGMSEQDVELHFASISFDGAVERWTVPLAFGSKLVIRDQELWTAEQTCNVLQQEGVTIACFPPS 172 | YVGPLLDWIEATKPALALRSITLGGEAFTCETFVRIQNVLAPPRIINGYGPTETVITPMIWEAYADDTMN 173 | SAYAPIGTPVGDRKLYVLDSELSQVPFGCSGELYIGSEVGLAEGYLKQPNLTAERFLPDPFSSNGERMYR 174 | TGDLVQWRDDGVMEYLGRVDQQVKIRGFRVELGEIESQLQALSGAEFCAVVDHESPTGKKLVGYVQLSKV 175 | QQSNAQSNTPQLQSSNGNEESQWLGLLGQTLPDHMVPACIIVQDKLPLTPAGKVDRKELFAPDWTEVRSD 176 | QGALENNRQVMLAEIWCELLKVKSIGANSQFFALGGDSIMALQLVGKLRQLGFMLSPKQVFDFPKLRDMA 177 | GSLEEAKLVVAEQDKRQGSVALLPIQQRYIEHFELNRCNQYIQFKWDYPADLARLTNAFRGLIEHHDALR 178 | LRFSTDTAIGMTANYQGDAEFEIHMFEGEIDFNKVQATIDLANGVIGAVGIRSMEGLDDASEHSEVLIAI 179 | HHLVVDALSWPVIIEDLVKLYVSELTSEPRVISGPHLLTQKTHHQGNWVNALNAMSISDDQQAYWAEQQK 180 | DLLYLQSRGKPISTHWHTPLSKIEALTKAGFGFARLTQEQIVFLVSTLTVSTLNEGKALTIHRESHGRFT 181 | EDSGLDLSRTVGWYTSLYPQAIPVLESLEEWVKSLKDSFDADHLGGVTFHAGVMQNIWPHVGEMDVLFNY 182 | LGNAAQQINDAVEVTNTGLWRDESNTADAAIVINASVAKEALLWDIELDNGCFNEFEVDALHAAFDSSIE 183 | RLHDLFIEASPILTIADAPLVGLSQTQLKQLCGCASTAAELPSTILPLSTLQQGLYFHAKLSDSDSTYVN 184 | QITLPLNNVNVPKMVEAWQGVMQRHQMLRSTLFSFDGNTYLAEWAELCLDYEVLDVRKRSKFDIEEYKQQ 185 | RVERGFELEQIVERSKAAPLWRVDFIQTHDHQVQCIFTIHHILMDGWSTGVLLSDLFALYQGRAITPVKG 186 | EFSDYLAWTQRQDKSQSNEYWQHYLQDMESPTRLAESFGQSNPDAFDSKVSSFHRYNDDYSSETVSEWLP 187 | KLNQAGVTLNTLTQAAWLLTLNRFTGQEAPVFGNTVAGRPTELAHSDSMVGLFINTLPIAHRVDRSKPVS 188 | EWLLDIQTSSSEQREYSYSSLSDIQAQTGWSGENLFDTLVVFENYPLDEALLNSKGNDEFSIGKPESYEF 189 | THYPLTLAILPSDSLRIVFAYDESKFTSQQIEVLCATNRHYLTQLVGHLAEDLGRLSELAEDQLRELRSF 190 | DRQPEPWAFEPFTDLVTKQMLLRPNDEALVSNVEVENGLQRQSLTYQQVVEQSDAIASRLITEGVKRDDI 191 | VGVLFERDCNMLVAMMGIMKAGAAFLPLDPSYPQERLDFMIQDSGARVLIHDALSESLGDQVGNQAKIIS 192 | YDSFDLTKNLNDTPVILPDQLAYMIYTSGSTGKPKGVCVSQQGLSMHVQTIGQRYGMTASDVELHFASIS 193 | FDGAIERWTVPLAFGSRLVIRDQSLWSAQQTCDVLAREQVTIACFPPSYVLPLLEWIEGSDPELYVRSWT 194 | LGGEAFTRDTYFRLQHVLKPKRIINGYGPTETVVTPMIWEAYSDTELESAYAPIGKTVGNRTLYVLDSAL 195 | NRVPAGVAGELYIGEEVGLARGYFERPHLTSERFIPDPFSNNGERMYRTGDVVKWRSDGVMEYLGRSDEQ 196 | VKIRGFRVELGEIESRLQKITGSEQCAVIACDSPSGKQLVAYIQSDNVLSSEGALQDLAKDLPDYMVPSQ 197 | VVKVDKIPLTPASKVDKKRLPMPNWQEVVSADYIAPIGEIELALAKQWQALFDKEQISREDDFFALGGQS 198 | LLVTQLVGRLSQQDDIRLSLQAVFDAPVLKDLAAQCMLTDVRNRGEVVSLARASRLPYMPASAVQKRLWF 199 | VQQLLPASAAYHMPLGLKFTGNVNVAALERAINVLVERHEILRTNFSQVEGELMQSIHPEREICLGMHES 200 | LVSDEQRLLAYKELIAKPFDFAEGQLIRFDWIVRASERSTGVEQAELLIVAHHIISDGISMQLLLKELAV 201 | CYQSMIFAYLESGESAAFETPAIHDVQYADYVNWQKQWLESEEASEQKSWWLDALKYDIDPLVLHSDVPR 202 | EQTETQGHRLHFELTNEQVSNITELATSHNTTPFTVMLTLWHLLMHKYSGQEQVRVGVPVAGRTQPETQT 203 | MQGCFINSLVIPAQFNGEQSFSGLLHQVKSFTEQALGRQDFPFEMLVESLGITGNLQYHPVFQTSFNFQR 204 | FDEAELFDWQGLDVEPFDPGVVNAQLEIGMDIQQMSENKWLGFVSYVSPIFKQDFAQALLGHWLLLLDRV 205 | ATNSDGLVSQLHLIDETAEQQNRAFNDTFLDWGGLVTPSHQVQHQAEKTPGSIALSMQGQILTYKEFDKR 206 | VNQLANWLRDKGVNSETRVGLGLERSFELVIGLHAITRAGGAYVPLDPSYPVERLQYILQSANIDLLLTD 207 | SNSVHLWPQSQDCEYVDLSKLDVSGQPSVPPLVHWHPEQALYVIFTSGSTGLPKGVVNTQAALQNRLNWM 208 | QQEYALNESDCVLQKTPFSFDVSVWEFFWPLMTGARLAIAPPEAHRQPSVLSEVIQQEQVTTLHFVPSML 209 | NAFSVETNISDCVSLKRIICSGEALPADLVEQVLSHAPVELHNLYGPTEAAIDVTYWPCDLPVSKRIPIG 210 | YAISNTQLYVLDDNWNSVPIGVPGELYLAGIGLAREYLARPDLTADRFIPNPFGEAGSRMYRTGDQVVQM 211 | ADGRLEYLGRLDNQVKIRGLRIELEEIENVINQLDWIEESAVIAFKHQTGDQLVGYVIDSRWEQDKQELV 212 | KQHLSEHVPDYMVPTILIGLGEMPLSPNGKRDRKALPAPEWQSIKYRAPETELELWFATNWEEVLEVPQV 213 | GLDDNFFALGGHSLLATRVVAKAHQELGLEVVLKDFFEANSLQALTDSLQNQYQTQNEHEQDEFDAMAAL 214 | MDELELL 215 | >WP_017110664.1 non-ribosomal peptide synthetase [Vibrio tasmaniensis] 216 | MSVDDEQILDSVTPWNPLSYSQQRLWLFQQLQPMSLAYNLGGLLWFEGKDITAEKIQHSLNEMVSAFPSL 217 | RSQFSEIEGKSAQRVLPFSPVDYDCIDMQGGSDAVEVINQDARKRWQQPFNLLEGNLARCCIYQVSEHRF 218 | AVLLATHHIVTDAWSFQIKIKMLVNSVAGKAYSGKEVQSYLDYAAEQASTEKSDIYKQQKAAWIENQFIG 219 | EESLSLSNLNDQSQDTYGARHELVHLSKEINDSIKLLGSELGATKFEVLASAMMLTLRQYSSNVHPSICV 220 | PALNRNAQNRRTVGFYVNSAVMGYRIDAEMPLSQLVNNTHASMKASLAFESTPLEALVGDLPLPTTALNF 221 | RNHGDKLSINTNGVFAEFEEFPVLETPFELVLDVINKGDAPLRFVYAKEKFTRPFITKFIESFKINLATI 222 | VQSPNAAVASVNALSSKDMRLIDQYGSGDHLWQYRPFTDLVTEQAVKCPNSIALKHQNESMSYLELETRS 223 | NQLAHSILSQGTAAKSPIGVMMERGVDMIVSMIAVLKAGSPFLPLDPDYPTERLSFMLEDSGAELLLTHS 224 | KSNDRCRDVLSGGNCVTQFCIENAKLASFPSDNSFNRPLAEELAYIIYTSGSTGKPKGVTISHEGLSMHV 225 | QTIGQRYGMSEEDVELHFASISFDGAVERWTVPLAFGSKLVIRDQELWTAERTCDALQQEKVTIACFPPS 226 | YVGPLLDWIEATKPALALRSITLGGEAFTRETFDRIQNVLAPPRIINGYGPTETVITPMIWEAYADDTMN 227 | SAYAPIGTPVGDRKLYVLDSELSQVPFGCSGELYIGSEVGLAEGYLKQPNLTAERFLPDPFSSNGERMYR 228 | TGDLVQWRDDGVMEYLGRVDQQVKIRGFRIELGEIESQLQALSSAEFCAVVDHESPTGKKLVGYVQLNKV 229 | QKSNAQSNTPQLQSSNGNEESQWLELLGQTLPDYMVPACIIVQDRLPLTPAGKVDRKELFAPDWTEIRSD 230 | QGALETNRQVMLAEVWCELLKVKSVGANSQFFALGGDSIMALQLVGKLRQQGYMLSPKQVFDFPKLQDMA 231 | ENLEEAQLVVAEQDKLQGEVALLPIQQRYIKHFELSRCNQYIQFKWDYPFDLARLTNAFRGLIEHHDALR 232 | LRFSADTAIGMTANYQDDVEFTIHSFDSEININQVQTSIDLINGVIGAVGIRSMDGLDESAENSEVLIAI 233 | HHLVVDALSWPVIIEDLVKLYVSEPTSELSYEPRGISGPHLLTQKTYHQGNWANALNTLSISDDQQAYWA 234 | EQTKEPLYSTPRGKPISTHWHTPLSKIEALTKAGFGFARLTQEQIVFLVSTLTVSTLNQGKALTIHRESH 235 | GRFTEDSGLDLSRTVGWYTSLYPQAIPVLDSLEEWVKSLKGSFDADQLGGVTFHAGVMQNLWPHVGDMDV 236 | LFNYLGNAAQQINDAVEVTNTGLWRDESNTADAAIVINASVVKEALLWDIELDNGCFNEFEIDVLHAAFD 237 | SSIERLHDLFIEASPILTKADAPLVDLSQIQFKQLCGCVSTAADLPSTILPLSTLQQGLYFHAKLSDSDS 238 | TYVNQITLPFNNVNVPKMVEAWQGVMQRHQMLRSTLFSFDGNAYLAEWAELGLDYEVLDVRKRSQFDIEE 239 | YKQLRVEGGFQLEQVVERSKVKPLWRVDFIQTHDHQVQCIFTIHHILMDGWSTGVLLSDLFALYQGRAIT 240 | PVKGEFSDYLAWTQRQDKSQSNEYWQNYLQGMESPTRLAESFGQSKPEASDPKVSSFHRYNDDYSSEVIS 241 | EWLLKLNQAGVTLNTLTQAAWLLTLNRFTGQETPVFGNTVAGRPTELAHSDSMVGLFINTLPIAQRVDLS 242 | KRVSEWLLDIQTSSSEQREYSYSSLSDIQAQTGWSGENLFDTLVVFENYPLDEALLNSKGNDEFSIGEPE 243 | SYEFTHYPLTLAILPSDSLRIVFAYDESKFTSQQVEVLCATNRHYLTQLVGHLAEDLGRLSELAEDQLRE 244 | LRSFDRQPEPWAFEPFTDLVTKQMLLHPNDEALVSNVEVENGLQRQSLTYQQVVEQSDAIASRLITEGVK 245 | RDDIVGVLFERDCNMLVAMMGIMKAGAAFLPLDPSYPQERLDFMIQDSGARVLIHDALSESLGDQIGNQA 246 | KIISYDSFDLTKNLNDTPVILPDQLAYMIYTSGSTGKPKGVCVSQQGLSMHVQTIGQRYGMTASDVELHF 247 | ASISFDGAIERWTVPLAFGSRLVIRDQSLWSAQQTCDVLVREQVTIACFPPSYVLPLLEWIEGSDPELYV 248 | RSWTLGGEAFTRDTYFRLQHVLKPKRIINGYGPTETVVTPMIWEAYSDTELESAYAPIGKAVGNRTLYVL 249 | DSALNRVPTGVAGELYIGEEVGLARGYFERPHLTSERFIPDPFSNNGERMYRTGDVVKWRSDGVMEYLGR 250 | SDEQVKIRGFRVELGEIESRLQKITGSEQCAVIACDSPSGKQLVAYIQSDNVLSSECALQDLAKDLPDYM 251 | VPSQVVKMDKIPLTPASKVDKKRLPMPNWQEVVSADYIAPIGKIELTLAKQWQALFDKEQISREDDFFAL 252 | GGQSLLATQLVGRLSQQDDIRLSLQAVFDAPVLKDLAAQCMLTDVRNRGEVVSLARASRLPYMPASAVQK 253 | RLWFVQQLLPASAAYHMPLGLKFTGNVNVAALERAINVLVERHEILRTNFSQVEGELMQSIHPEREICLG 254 | MHEGLASDEQRLLAYKELIAKPFDFAEGQLIRFDWIVRASERSTGVEQAELLIVAHHIISDGISMQLLLK 255 | ELAVCYQSMISAYLESGESAAFETPAIHDVQYADYVNWQKQWLESEEASEHKSWWLDALKYDIDPLVLHS 256 | DVPREQTETQGHRLHFELTNEQVSNITQLATSHNTTPFTVMLTLWHLLMHKYSGQEQVRVGVPVAGRTQP 257 | ETQTMQGCFINSLVIPAQFNGEQSFSDLLHQVKSFTEQALGRQDFPFEMLVESLGITGNLQYHPVFQTSF 258 | NFQRFDEAELFDWQGGLDVEPFDPGVVNAQLEIGMDIQQMSESKWLGFVSYVSPIFKQDFVQALLGHWLL 259 | LLDRVATNSDGLVSQLHLIDETAEQQNRAFNDTSLDWGGFVTPSHKIQHQVEKTPGSIALSMQGQILTYK 260 | EFDKRVNQLANWLRDKGVNSETRVGLGLERSFELVIGLHAITRAGGAYVPLDPSYPVERLQYILQSANID 261 | LLLTDSNSVHLWPQSQDCEYVDLSKLDVSGQPSVPPLVHWHPEQALYVIFTSGSTGLPKGVVNTQAALQN 262 | RLNWMQQEYVLNESDCVLQKTPFSFDVSVWEFFWPLMTGARLAIAPPEAHRQPSVLSEVIQQEQVTTLHF 263 | VPSMLNAFSVETNISDCVSLKRIICSGEALPADLVEQVLSHAPVELHNLYGPTEAAIDVTYWPCELPVSK 264 | RIPIGYAISNTQLYVLDDNWNSVPIGVPGELYLAGIGLAREYLARPDLTADRFIPNPFGEAGSRMYRTGD 265 | QVVQMADGRLEYLGRLDNQVKIRGLRIELEEIENVINQLDWVEESAVIAFKHQTGDQLVGYVIDSHWEQD 266 | KQELLKKHLSEHVPDYMVPTILIGLGEMPLSPNGKRDRKALPAPEWQSIKYRAPETELELWFATNWEEVL 267 | EVPQVGLDDNFFALGGHSLLATRVVAKAHQELGLEVVLKDFFEANSLQALTDSLQNQYQTQNEHEQDEFD 268 | AMAALMDELELL 269 | >WP_016789555.1 non-ribosomal peptide synthetase [Vibrio cyclitrophicus] 270 | MSVDDEQILDSVSPWSPLSYSQQRLWLFQQLQPMSLAYNLGGLLWFEGEEVTLDKLQHSLNEMVSAFPSL 271 | RSQFTEVEGKAVQRVLPLRRIEFDTVDLRNDTNSINIINQDARRRWQQPFNLMEGQLARCCIYQVTEHRF 272 | GVLLSTHHIVTDAWSFQLTIKMLVNSVAGKAYQTREAQSYVDYATEQVSEEKSERYSKQKSFWQQLQFLN 273 | EESVSISNLNAQSQDTYGARHELVELPSDIDESIKQLGKKLGVTKFEIFASAMMLTLRQYSSNVHPSICV 274 | PALNRNAKNRRTVGFYVNSTVMGYRIDAEMTLSQLVSQTRDSMKASLAFESTPLEALVGDLPLPTTALNF 275 | RNHGDKLSINTNGVSAQFEEFPVLETPFELVLDVINKSSAPLRFVYGKEKFTRPFITKFIESFKVNLEAI 276 | VQSPYTAVASVNAISSKDMRLIDQYGSGEHEWHYRPFTDLVTEQAKSSPDSIALKHQDESMSYLELETRS 277 | NQLAHSILSKRVTSQSPIGVMMERGVDMIVSMIAVLKSGSPFLPLDPDYPTERLSFMLEDSGAELLLTHP 278 | KSQDRCSDILESSDGVTPFCVERAALADFPSDNSFTQPLAEELAYIIYTSGSTGKPKGVTISHEGLSMHV 279 | QTIGQRYGMTAQDIELHFASISFDGAVERWTVPLAFGSRLVIRDQELWTAEQTCEALQKEKITIACFPPS 280 | YIGPLLDWVEQTKPSLSLRSITLGGEAFTRETFDRIQAVLAPPRIINGYGPTETVITPMIWEAYTNDTMN 281 | SAYAPIGTRVGDRKLYVLDSELSPVPLGSSGELYIGSEVGLAQGYLRQPNLTAERFLPDPFLANGERMYR 282 | TGDLVRWRDDGVMEYLGRVDQQVKIRGFRVELGEIESQLQTLSGAEFCSVVAHESQTGKKLVGYVQLRDT 283 | SSLDNKNIQETRWLDELAKALPDYMVPVCIIVQDEMPLTPAGKVDRKQLLAPDWSENLSNQGTLETKRQQ 284 | ILATVWCELLKLEGVGADSHFFALGGDSIMALQLVGKLRQQGFMLSPKQVFDFPKLQDMAELLEESQRVL 285 | ADQSKLQGRVALLPIQQRFIEQFELSRCNQYIQFTWRHALNIERLSHALNQLADHHDALRLNFSNSLSHN 286 | ASFGVTAEYQEEARFAIHPFDDEINIEQVQSSIDLEKAITGAVGIRNLKGSTEGSEVLIAIHHLVVDALS 287 | WPMIIEDLSKLYEALPENDEATNTLSPKTHNQGNWVNNLNTLLITEQRQAYWLDQMQAPIYSTERALPIS 288 | TQWLTPLSKIESLTKAGQSFARLTQEQVVFIVCALTVSTQNQGQALTIHRESHGRFTDNIGLDLSRTIGW 289 | YTSLYPQAIPELGTLMEWVKSLKDSFDSDHVGGITFHAGVAQNLWPHVGNMDVLFNYLGNATQQINGKVD 290 | ITGSGLWRDKSNVADAAVVINTSVVNDHLQWDVEFDSGCFNETGIEVLHAALNDSIERLHDLFIEVDPIL 291 | TKTDAPLVELTQSQLNQLCQGIKQPESLPNTILPLSTLQQGLYFHAKLSDSDSTYVNQITLPLNHVELPR 292 | MIEAWQGVMQRHQMLRSTLFSLDGNAYLAEWQQLDLSYAVLDVRLRSQFDMAEYKQQIIEQGFQLEQRLE 293 | RSQVNPLWRVDFVQTEDHQIQCVFTIHHILMDGWSTGVLLSDLFAIYQRRTITPVKGQFSDYLAWARQQD 294 | HQQSNAYWQRYLQNMESPTRLVESFGSSDSLESKYNRFNDDYSQETVGEWLPKLSAAGVTLNTLTQAAWL 295 | LTLHRFTGQQTPVFGNTVAGRPTDLAHSDSMVGLFINTLPIAHQVDLSKSVSQWLLDIQNSSSEQREFSY 296 | SSLSDIQAQTGWTGDNLFDTLVVFENYPLDEALLKSNREGELSIGEPESYEFTHYPLTLAILPSESLRIV 297 | FAYDESKFSQQQIETLCATNRHYLNKLVEHLSKELGSIPVLAKGQLNELAKFDRVPEPWTFEPFTDLVSK 298 | QMLLRPDNVALVSNVEHELGAHKASLTFKQLVEQSDAVAARLISEGITRDDIVGVLFERDCNMLVTMMGV 299 | MKAGGAFLPLDPAYPQERLEFMVKDSRARILVHDALSGVLANEICNQAKAVSFNALDLNAKLIDKPAILS 300 | DQLAYMIYTSGSTGKPKGVCVSQQGLSMHVQTIGQRYGMTPDDVELHFASISFDGAIERWTVPLAFGSRL 301 | VIRDQSLWSAQQTCDVLARERVTIACFPPSYVLPLLEWIEGTQPELSVRSWTLGGEAFTRDTYFKLQQVL 302 | KPQRIINGYGPTETVVTPMIWEAYLDTGLDSAYAPIGKAVGCRTLYVLDSALNRLPAGVAGELYVGEEVG 303 | LARGYFERPDLTSERFLPDPFASNGERMYRTGDVVKWRSDGVMEYLGRSDEQVKIRGFRVELGEIESRLQ 304 | KLTDSELCAVVACDSPSGKQLVAYLQSDSSLSMSDMRSDLAKDLPEYMVPSQFIKLDKIPLTPASKVDKK 305 | RLPAPDWLATNKSDYIEPVGKIEQALAKQWCALFEKDQIGREDDFFALGGQSLLATQLVGRLKQQDSIRL 306 | SLQAVFDTPVLKDLASQCISELSSSSAHGTQDLAVFARAPRLPNMPTSAVQQRLWFVQQLLPTSAAYHMP 307 | LGLKFTGRVNVAVLKQAINTLVSRHEILRTNFAQVDGELMQQVRPERYIELGIHEAAVNDQDRLSHYKDL 308 | IAKPFNFADGALIRFDWMPQTSDLDEFCKLGEQDEKAELLIVVHHIISDGISMQLLLKELSGCYQSLVDP 309 | TYKGSETPSELQYIDYAQWQKQWLESEGASRQKGWWLDALKYDIEPLVLHSDVPREQTETSGNRLHFELS 310 | REQIASISQLAKSHSTTPFNVMLTLWHLLMHKYSGQEQIRVGVPVAGRTQAETQVMQGCFINSLVIPAQF 311 | SESQTFSDLLMQVKSFSEKALERQDFPFEMLVESLGIKGNLQYHPVFQTSFNFQRFDENAIFDWNDIDVE 312 | PFDPGVVNAQLEIGLDIQQMSEHKWLGFISYASPIFTTDFAQALLDHWRLLLEKVSRDNRCSINQLHLID 313 | NTAKQQCIAFNNTELDWGSLVTPSQRIQQQAQTAPDSIALSMQDQALTYQEFDLRVNQMANWLRGKGVNS 314 | ETRVGLGLERSFDLVIGLHAITRAGGAYVPLDPSYPEDRLQYILRSANIDLLLTDSNSMHLWPETEGCEY 315 | IDLTKLDVSNQPFQAPHVDWQPDQALYVIFTSGSTGLPKGVVNTQVALQNRLNWMQQEYALNESDCVLQK 316 | TPFSFDVSVWEFFWPLMTGARLAIAPPEAHRQPRLLSEVVQKEKVTTIHFVPSMLNAFSIETTISECTSL 317 | RRIICSGEALPADLVEQVLSHAPVELHNLYGPTEAAIDVTYWPCELPVSKRIPIGYAISNTQLHVLDDNW 318 | NPVPVGVPGELYLAGVGLAREYLSRPDLTADRFVPNPFGGPGSRMYRTGDQVVQYADGRLEYLGRLDNQM 319 | KIRGLRIELEEIENVINQLDWVEESAVVAFKHQTGDQLIGYVVDPMWNELKQDNELKLEIVKQHLSEQLP 320 | DYMVPTILLGLSEMPLSPNGKRDRKALPAPEWQSIEYRAPETELERWFAANWGEVLEISQVGLDDNFFAL 321 | GGHSLLATRVVAKAHQELGLEVALKDFFEAKSLQALSDSLQAQYQTQSQHEQDEFDAMAALMDELELL 322 | >WP_016784470.1 non-ribosomal peptide synthetase [Vibrio cyclitrophicus] 323 | MSVDDEQILDSVSPWSPLSYSQQRLWLFQQLQPMSLAYNLGGLLWFEGEEVTLDKLQHSLNEMVSAFPSL 324 | RSQFAEVEGKAVQRVLPLRRIEFDTVDLRNDTNSINIINQDARRRWQQPFNLMEGQLARCCIYQVTEHRF 325 | GVLLSTHHIVTDAWSFQLTIKMLVNSVAGKAYQTREAQSYVDYATEQVSEEKSERYSKQKSFWQQLQFLN 326 | EESVSISNLNAQSQDTYGARHELVELPSDIDESIKQLGKELGVTKFEIFASAMMLTLRQYSSNVHPSICV 327 | PALNRNAKNRRTVGFYVNSTVMGYRIDAEMTLSQLVSQTRDSMKASLAFESTPLEALVGDLPLPTTALNF 328 | RNHGDKLSINTNGVSAQFEEFPVLETPFELVLDVINKSSAPLRFVYGKEKFTRPFITKFIESFKVNLEAI 329 | VQSPYTAVASVNAISSKDMRLIDQYGSGEHEWHYRPFTDLVTEQAKSSPDSIALKHQDESMSYLELETRS 330 | NQLAHSILSKRVTSQSPIGVMMERGVDMIVSMIAVLKSGSPFLPLDPDYPTERLSFMLEDSGAELLLTHP 331 | KSQDRCSDILESSDGVTPFCVERAALADFPSDNSFTQPLAEELAYIIYTSGSTGKPKGVTISHEGLSMHV 332 | QTIGQRYGMTAQDIELHFASISFDGAVERWTVPLAFGSRLVIRDQELWTAEQTCEALQKEKITIACFPPS 333 | YIGPLLDWVEQTKPSLSLRSITLGGEAFTRETFDRIQAVLAPPRIINGYGPTETVITPMIWEAYTNDTMN 334 | SAYAPIGTRVGDRKLYVLDSELSPVPLGSSGELYIGSEVGLAQGYLRQPNLTAERFLPDPFLANGERMYR 335 | TGDLVRWRDDGVMEYLGRVDQQVKIRGFRVELGEIESQLQTLSGAEFCSVVAHESQTGKKLVGYVQLRDT 336 | SSLDDKNIQETRWLDELAKALPDYMVPVCIIVQDEMPLTPAGKVDRKQLLAPDWSENLSNQGTLETKRQQ 337 | ILATVWCELLKLEGVGVDSHFFALGGDSIMALQLVGKLRQQGFMLSPKQVFDFPKLQDMAELLEESQRVL 338 | ADQSKLQGRVALLPIQQRFIEQFELSRCNQYIQFTWRHALNIERLSHALNQLADHHDALRLNFSNSLSHN 339 | ASFGVTAEYQEEARFAIHPFDDEINIEQVQSSIDLEKAITGAVGIRNLKGSTEGSEVLIAIHHLVVDALS 340 | WPMIIEDLSKLYEALPENDEATNTLSPKTHNQGNWVNNLNTLLITEQRQAYWLDQMQAPIYSTERALPIS 341 | TQWLTPLSKIESLTKAGQSFARLTQEQVVFIVCALTVSTQNQGQALTIHRESHGRFTDNIGLDLSRTIGW 342 | YTSLYPQAIPELGTLMEWVKSLKDSFDSDHVGGITFHAGVAQNLWPNVGNMDVLFNYLGNATQQINGKVD 343 | ITGSGLWRDKSNVADAAVVINTSVVNDHLQWDVEFDSGCFNETEIEVLHAALNDSIERLHDLFIEVDPIL 344 | TKTDAPLVELTQSQLNQLCQGIKQPESLPNTILPLSTLQQGLYFHAKLSDSDSTYVNQITLPLNHVELPR 345 | MIEAWQGVMQRHQMLRSTLFSLDGNAYLAEWQQLELNYAVLDVRLRSQFDIAEYKKEIIEQGFQLEQRLE 346 | RSQVTPLWRVDFVQTEEHQIQCVFTIHHILMDGWSTGVLLSDLFAIYQRRTITPVKGQFSDYLAWACEQD 347 | HQQSNAYWQRYLQNMESPTRLVESFGSSDSLESKYNRFNDDYSQETVGEWLPKLSAAGVTLNTLTQAAWL 348 | LTLHRFTGQQTPVFGNTVAGRPTDLTHSDSMVGLFINTLPIAHQVDLSKSVSQWLLDIQNSSSEQREFSY 349 | SSLSDIQAQTGWTGDNLFDTLVVFENYPLDEALLKSNREGELSIGEPESYEFTHYPLTLAILPSESLRIV 350 | FAYDESKFSQQQIETLCATNRHYLNKLVEHLSKELGSIPVLAKGQLNELAKFDRVSEPWTFEPFTDLVSK 351 | QMLLRPDNVALVSNVEHELGVHKASLTFKQLVEQSDAVAARLISEGITRDDIVGVLFERDCNMLVTMMGV 352 | MKAGGAFLPLDPAYPQERLEFMVKDSRARILVHDALSGLLANEICNQAKAVSFNTLDLNAKLIDKPAILS 353 | DQLAYMIYTSGSTGKPKGVCVSQQGLSMHVQTIGQRYGMTPDDVELHFASISFDGAIERWTVPLAFGSRL 354 | VIRDQSLWSAQQTCDVLARERVTIACFPPSYVIPLLEWIEGTQLELSVRSWTLGGEAFTRDTYFKLQQVL 355 | KPQRIINGYGPTETVVTPMIWEAYLDTGLDSAYAPIGKAVGCRTLYVLDSALNRLPAGVAGELYVGEEVG 356 | LARGYFERPDLTSERFLPDPFASNGERMYRTGDVVKWRSDGVMEYLGRSDEQVKIRGFRVELGEIESRLQ 357 | KLTDSELCAVVACDSPSGKQLVAYLQSDSSLSMSDMRSDLAKDLPEYMVPSQFIKLDKIPLTPASKVDKK 358 | RLPAPDWLATNKSDYIEPVGKIEQALAKQWCALFEKDQIGREDDFFALGGQSLLATQLVGRLKQQDSIRL 359 | SLQAVFDTPVLKDLASQCISELSSWSAHGTQDLAVFARAPRLPNMPTSAVQQRLWFVQQLLPTSAAYHMP 360 | LGLKFTGRVNVAVLKQAINTLVSRHEILRTNFAQVDGELMQHVRPERYIELGIHEAAVNDQDRLSHYKDL 361 | IAKPFNFADGALIRFDWMPQTSDLDEFCKLGEQDEKAELLIVVHHIISDGISMQLLLKELSGCYQSLVDP 362 | TYKGSETPSELQYIDYAQWQKQWLESEDASRQKGWWLNALKYDIEPLVLHSDVPREQTETSGNRLHFELS 363 | REQIASISQLAKSHSTTPFNVMLTLWHLLMHKYSGQEQIRVGVPVAGRTQPETQVMQGCFINSLVIPAQF 364 | SESQTFSDLLMQVKSFSEKALERQDFPFEMLVESLGIKGNLQYHPVFQTSFNFQRFDENAIFDWNDIDVE 365 | PFDPGVVNAQLEIGLDIQQMSEHKWLGFISYASPIFTTDFAQALLDHWQLLLEKVSRDNRCSINQLHLTD 366 | NTAKQQCIAFNNTELEWGSLVTPSQRIQQQAQTAPDSIALSMQDQALTYQEFDLRVNQMANWLRGKGVNS 367 | ETRVGLGLERSFDLVIGLHAITRAGGAYVPLDPSYPEDRLQYILRSANIDLLLTDSNSMHLWPETEGCEY 368 | IDLTKLDVSNQPFQAPHVDWQPDQALYVIFTSGSTGLPKGVVNTQVALQNRLNWMQQEYALNESDCVLQK 369 | TPFSFDVSVWEFFWPLMTGARLAIAPPEAHRQPRLLSEVVQKEKVTTIHFVPSMLNAFSIETTISECTSL 370 | RRIICSGEALPADLVEQVLSHAPVELHNLYGPTEAAIDVTYWPCELPVSKRIPIGYAISNTQLHVLDDNW 371 | NPVPVGVPGELYLAGVGLAREYLSRPDLTADRFVPNPFGGPGSRMYRTGDQVVQYADGRLEYLGRLDNQV 372 | KIRGLRIELEEIENVINQLDWVEESAVVAFKHQTGDQLIGYVVDPMWNELKQDNELKLEIVKQHLSEQLP 373 | DYMVPTILLGLSEMPLSPNGKRDRKALPAPEWQSIEYRAPKTELERWFAANWAEVLEISQVGLDDNFFAL 374 | GGHSLLATRVVAKAHQELGLEVALKDFFEAKSLQALSDSLQAQYQTQNQHEQDEFDAMAALMDELELL -------------------------------------------------------------------------------- /KEGGDecoder/Amphibactin/amphibactin2092_sfam_match.tblout: -------------------------------------------------------------------------------- 1 | # --- full sequence ---- --- best 1 domain ---- --- domain number estimation ---- 2 | # target name accession query name accession E-value score bias E-value score bias exp reg clu ov env dom rep inc description of target 3 | #------------------- ---------- -------------------- ---------- --------- ------ ----- --------- ------ ----- --- --- --- --- --- --- --- --- --------------------- 4 | 1544 - WP_011589370.1 - 0 1352.6 14.2 0 1240.6 14.5 3.0 1 1 2 3 3 3 3 - 5 | 27549 - WP_011589370.1 - 0 1087.3 1.4 4.6e-297 992.8 0.7 2.0 1 1 1 2 2 2 2 - 6 | 34424 - WP_011589370.1 - 0 1034.0 10.2 1.7e-232 778.9 6.2 3.3 2 1 1 3 3 3 2 - 7 | 48029 - WP_011589370.1 - 0 1023.0 4.1 4.3e-279 932.8 0.0 2.0 1 1 1 2 2 2 2 - 8 | 19476 - WP_011589370.1 - 1.9e-296 990.2 0.6 2.4e-193 648.8 0.0 2.2 2 0 0 2 2 2 2 - 9 | 333435 - WP_011589370.1 - 2.2e-295 987.6 16.8 3.2e-295 987.1 16.8 1.0 1 0 0 1 1 1 1 - 10 | 68264 - WP_011589370.1 - 4.2e-287 959.9 6.9 4.8e-266 890.1 0.4 3.0 2 1 1 3 3 3 3 - 11 | 1544 - WP_013032793.1 - 0 1467.2 7.5 0 1274.2 1.3 4.0 3 1 1 4 4 4 3 - 12 | 27549 - WP_013032793.1 - 0 1156.0 0.0 0 1032.2 0.0 2.0 1 1 1 2 2 2 2 - 13 | 48029 - WP_013032793.1 - 0 1110.6 0.5 1.5e-297 993.9 0.0 2.1 1 1 1 2 2 2 2 - 14 | 333435 - WP_013032793.1 - 0 1104.7 21.3 0 1104.3 21.3 1.0 1 0 0 1 1 1 1 - 15 | 19476 - WP_013032793.1 - 0 1065.6 0.0 1.6e-201 676.0 0.0 2.2 2 0 0 2 2 2 2 - 16 | 68264 - WP_013032793.1 - 0 1060.4 0.9 3.2e-290 970.3 0.0 3.2 2 2 1 3 3 3 3 - 17 | 34424 - WP_013032793.1 - 0 1054.5 0.4 1.8e-227 762.2 0.0 3.0 2 1 1 3 3 3 2 - 18 | 158964 - WP_013032793.1 - 0 1020.3 0.0 0 1019.9 0.0 1.0 1 0 0 1 1 1 1 - 19 | 137155 - WP_013032793.1 - 2e-302 1010.8 0.0 1.2e-225 756.4 0.0 4.1 2 2 2 4 4 4 4 - 20 | 154051 - WP_013032793.1 - 2.9e-286 957.2 8.5 3.9e-286 956.8 8.5 1.1 1 0 0 1 1 1 1 - 21 | 52660 - WP_013032793.1 - 3.6e-281 940.6 0.0 4.8e-281 940.1 0.0 1.0 1 0 0 1 1 1 1 - 22 | 33515 - WP_013032793.1 - 1e-275 921.1 5.8 9.7e-214 716.1 0.0 2.1 2 0 0 2 2 2 2 - 23 | 82907 - WP_013032793.1 - 3.7e-273 913.7 0.2 1.5e-242 812.1 0.0 3.0 2 1 1 3 3 3 3 - 24 | 1544 - WP_016784469.1 - 0 1280.0 4.8 0 1116.7 1.6 3.4 1 1 2 3 3 3 3 - 25 | 27549 - WP_016784469.1 - 7.2e-302 1008.7 1.6 8.3e-270 902.5 0.1 2.2 1 1 1 2 2 2 2 - 26 | 48029 - WP_016784469.1 - 1.2e-281 941.2 5.3 5.3e-261 872.9 0.1 2.9 1 1 1 2 2 2 2 - 27 | 19476 - WP_016784469.1 - 2e-275 920.6 7.2 1.6e-188 632.9 0.1 2.8 2 1 0 2 2 2 2 - 28 | 34424 - WP_016784469.1 - 1.3e-269 902.0 3.8 6e-207 694.2 1.3 3.1 2 1 1 3 3 3 2 - 29 | 1544 - WP_016789556.1 - 0 1281.3 6.7 0 1116.8 2.5 3.4 1 1 2 3 3 3 3 - 30 | 27549 - WP_016789556.1 - 2.8e-301 1006.8 2.6 6.2e-270 902.9 0.2 2.2 1 1 1 2 2 2 2 - 31 | 48029 - WP_016789556.1 - 1e-282 944.8 6.1 2.5e-262 877.3 0.1 2.9 1 1 1 2 2 2 2 - 32 | 19476 - WP_016789556.1 - 1.3e-276 924.5 7.4 7.4e-190 637.3 0.2 2.8 2 1 0 2 2 2 2 - 33 | 34424 - WP_016789556.1 - 9.1e-270 902.4 4.6 1.2e-207 696.5 1.8 3.1 2 1 1 3 3 3 2 - 34 | 1544 - WP_017074986.1 - 0 1206.8 15.8 0 1018.1 6.8 3.7 1 1 2 3 3 3 3 - 35 | 27549 - WP_017074986.1 - 8.2e-287 958.8 16.4 2.3e-259 867.9 3.6 2.9 1 1 1 2 2 2 2 - 36 | 48029 - WP_017074986.1 - 9.4e-271 905.2 7.5 4e-252 843.5 0.1 2.3 1 1 0 2 2 2 2 - 37 | # 38 | # Program: hmmscan 39 | # Version: 3.1b2 (February 2015) 40 | # Pipeline mode: SCAN 41 | # Query file: ABO2092homolog_ref.faa 42 | # Target file: SFAM_database.hmm 43 | # Option settings: hmmscan --tblout amphibactin2092_sfam_match.tblout --noali --cpu 25 SFAM_database.hmm ABO2092homolog_ref.faa 44 | # Current dir: /media/eclipse/sfams 45 | # Date: Mon Apr 16 15:25:38 2018 46 | # [ok] 47 | -------------------------------------------------------------------------------- /KEGGDecoder/Amphibactin/amphibactin2093_sfam_match.tblout: -------------------------------------------------------------------------------- 1 | # --- full sequence ---- --- best 1 domain ---- --- domain number estimation ---- 2 | # target name accession query name accession E-value score bias E-value score bias exp reg clu ov env dom rep inc description of target 3 | #------------------- ---------- -------------------- ---------- --------- ------ ----- --------- ------ ----- --- --- --- --- --- --- --- --- --------------------- 4 | 1544 - WP_011589371.1 - 0 3649.2 21.5 0 1268.4 6.7 4.0 1 1 3 4 4 4 4 - 5 | 27549 - WP_011589371.1 - 0 3036.4 5.3 0 1087.6 1.0 4.0 1 1 3 4 4 4 4 - 6 | 34424 - WP_011589371.1 - 0 2851.8 22.4 0 1270.2 6.2 3.0 2 1 1 3 3 3 3 - 7 | 19476 - WP_011589371.1 - 0 2727.0 0.0 1.2e-282 944.5 0.0 5.0 3 2 2 5 5 5 5 - 8 | 68264 - WP_011589371.1 - 0 2652.6 1.3 0 1035.4 0.2 4.0 2 2 1 4 4 4 4 - 9 | 137155 - WP_011589371.1 - 0 2514.2 0.0 1.4e-245 822.4 0.0 7.1 1 1 6 7 7 7 7 - 10 | 33515 - WP_011589371.1 - 0 2443.1 19.3 8.6e-250 835.3 0.2 5.1 5 0 0 5 5 5 5 - 11 | 22411 - WP_011589371.1 - 0 2439.2 0.0 6.7e-259 865.6 0.0 4.0 4 0 0 4 4 4 3 - 12 | 333435 - WP_011589371.1 - 0 2363.4 33.2 0 1125.2 16.6 3.0 1 1 2 3 3 3 3 - 13 | 48029 - WP_011589371.1 - 0 2354.9 13.7 3.1e-264 883.6 0.1 5.1 2 2 2 5 5 5 5 - 14 | 1544 - WP_013032792.1 - 0 3663.8 3.2 0 1303.4 0.1 4.0 1 1 2 4 4 4 4 - 15 | 27549 - WP_013032792.1 - 0 3070.4 0.0 0 1084.3 0.0 4.0 2 2 2 4 4 4 4 - 16 | 34424 - WP_013032792.1 - 0 2939.2 0.0 0 1313.5 0.0 3.1 1 1 1 3 3 3 3 - 17 | 19476 - WP_013032792.1 - 0 2781.5 0.0 2.3e-286 956.8 0.0 5.0 5 0 0 5 5 5 5 - 18 | 68264 - WP_013032792.1 - 0 2752.1 0.0 0 1039.7 0.0 4.0 2 2 2 4 4 4 4 - 19 | 137155 - WP_013032792.1 - 0 2541.1 0.0 2.9e-259 867.8 0.0 7.0 2 2 5 7 7 7 7 - 20 | 333435 - WP_013032792.1 - 0 2539.7 35.3 0 1272.5 18.4 3.0 1 1 2 3 3 3 3 - 21 | 33515 - WP_013032792.1 - 0 2497.0 4.2 5.9e-255 852.4 0.0 5.0 5 0 0 5 5 5 5 - 22 | 22411 - WP_013032792.1 - 0 2444.8 0.0 1.1e-258 864.9 0.0 4.0 4 0 0 4 4 4 4 - 23 | 48029 - WP_013032792.1 - 0 2443.2 0.0 6.5e-287 958.7 0.0 5.0 1 1 4 5 5 5 5 - 24 | 1544 - WP_017074987.1 - 0 3092.0 11.0 0 1078.1 1.1 4.0 2 1 2 4 4 4 4 - 25 | 27549 - WP_017074987.1 - 0 2586.8 3.7 2.3e-298 997.1 0.2 4.0 1 1 3 4 4 4 4 - 26 | 34424 - WP_017074987.1 - 0 2429.2 7.7 0 1044.2 2.4 3.2 1 1 2 3 3 3 3 - 27 | 19476 - WP_017074987.1 - 0 2374.7 8.3 4e-251 840.2 0.0 5.2 5 1 0 5 5 5 5 - 28 | 68264 - WP_017074987.1 - 0 2334.0 0.3 1.2e-291 975.0 0.0 4.0 2 2 2 4 4 4 4 - 29 | 22411 - WP_017074987.1 - 0 2218.0 0.0 4.5e-238 796.7 0.0 3.8 4 0 0 4 4 4 3 - 30 | 137155 - WP_017074987.1 - 0 2192.6 0.4 3.8e-223 748.0 0.0 7.0 2 2 5 7 7 7 7 - 31 | 33515 - WP_017074987.1 - 0 2150.5 0.2 1.3e-215 722.3 0.0 5.1 5 0 0 5 5 5 5 - 32 | 48029 - WP_017074987.1 - 0 2050.8 6.0 3.8e-213 714.5 0.0 5.1 3 1 1 5 5 5 5 - 33 | 52660 - WP_017074987.1 - 0 2017.6 0.0 9e-208 697.0 0.0 3.0 1 1 2 3 3 3 3 - 34 | 1544 - WP_017078079.1 - 0 3053.2 2.3 0 1065.2 0.1 4.0 1 1 2 4 4 4 4 - 35 | 27549 - WP_017078079.1 - 0 2570.7 0.0 9.3e-296 988.5 0.1 4.0 1 1 3 4 4 4 4 - 36 | 34424 - WP_017078079.1 - 0 2432.9 1.0 0 1041.0 0.2 3.2 1 1 1 3 3 3 3 - 37 | 19476 - WP_017078079.1 - 0 2341.0 0.7 1.6e-246 824.9 0.0 5.1 4 1 1 5 5 5 5 - 38 | 68264 - WP_017078079.1 - 0 2293.6 0.0 5.9e-283 946.2 0.0 4.0 2 2 2 4 4 4 4 - 39 | 137155 - WP_017078079.1 - 0 2186.2 0.0 5.9e-225 754.0 0.0 7.0 2 2 5 7 7 7 7 - 40 | 22411 - WP_017078079.1 - 0 2180.0 0.0 1.7e-230 771.6 0.0 3.1 3 0 0 3 3 3 3 - 41 | 33515 - WP_017078079.1 - 0 2128.1 0.0 2.2e-213 714.9 0.0 5.1 5 0 0 5 5 5 5 - 42 | 48029 - WP_017078079.1 - 0 2007.3 0.9 2e-209 702.1 0.0 5.1 3 1 2 5 5 5 5 - 43 | 158964 - WP_017078079.1 - 0 1995.8 0.0 1.5e-267 895.0 0.0 3.7 2 1 1 3 3 3 3 - 44 | 1544 - WP_017110664.1 - 0 3051.6 0.1 0 1060.6 0.1 4.0 2 2 2 4 4 4 4 - 45 | 27549 - WP_017110664.1 - 0 2574.4 0.0 2.1e-296 990.6 0.0 4.0 1 1 3 4 4 4 4 - 46 | 34424 - WP_017110664.1 - 0 2418.9 0.0 0 1020.3 0.0 3.2 1 1 2 3 3 3 3 - 47 | 19476 - WP_017110664.1 - 0 2332.4 0.0 1.2e-246 825.4 0.0 5.0 5 0 0 5 5 5 5 - 48 | 68264 - WP_017110664.1 - 0 2297.5 0.0 1.4e-284 951.6 0.0 4.0 3 1 1 4 4 4 4 - 49 | 137155 - WP_017110664.1 - 0 2196.2 0.0 2.1e-224 752.2 0.0 7.0 2 2 5 7 7 7 7 - 50 | 22411 - WP_017110664.1 - 0 2188.5 0.0 2.8e-232 777.5 0.0 3.1 3 0 0 3 3 3 3 - 51 | 33515 - WP_017110664.1 - 0 2121.4 0.0 1.7e-213 715.3 0.0 5.1 5 0 0 5 5 5 5 - 52 | 48029 - WP_017110664.1 - 0 2020.4 0.0 1.7e-210 705.7 0.0 5.1 3 1 1 5 5 5 5 - 53 | 52660 - WP_017110664.1 - 0 1986.7 0.0 2.1e-205 689.2 0.0 3.0 1 1 2 3 3 3 3 - 54 | 1544 - WP_016789555.1 - 0 3088.6 1.6 0 1045.6 0.0 4.0 1 1 3 4 4 4 4 - 55 | 27549 - WP_016789555.1 - 0 2610.8 0.8 1.7e-294 984.3 0.0 4.0 1 1 3 4 4 4 4 - 56 | 34424 - WP_016789555.1 - 0 2433.8 1.1 4.1e-302 1009.7 0.7 3.1 1 1 2 3 3 3 3 - 57 | 19476 - WP_016789555.1 - 0 2387.5 2.2 3.6e-248 830.4 0.0 5.2 5 0 0 5 5 5 5 - 58 | 68264 - WP_016789555.1 - 0 2333.3 0.0 1.7e-285 954.6 0.0 4.0 2 2 2 4 4 4 4 - 59 | 22411 - WP_016789555.1 - 0 2208.1 0.0 9.3e-236 789.0 0.0 3.4 4 0 0 4 4 3 3 - 60 | 137155 - WP_016789555.1 - 0 2203.0 0.0 1.3e-223 749.5 0.0 7.0 2 2 5 7 7 7 7 - 61 | 33515 - WP_016789555.1 - 0 2167.8 0.0 1.4e-213 715.5 0.0 5.1 5 0 0 5 5 5 5 - 62 | 48029 - WP_016789555.1 - 0 2048.1 0.8 1.9e-214 718.7 0.0 5.0 4 1 1 5 5 5 5 - 63 | 101568 - WP_016789555.1 - 0 2015.9 0.0 6.2e-214 717.3 0.0 5.0 2 2 2 4 4 4 4 - 64 | 1544 - WP_016784470.1 - 0 3092.4 1.4 0 1053.6 0.0 4.0 1 1 3 4 4 4 4 - 65 | 27549 - WP_016784470.1 - 0 2615.0 1.0 1.3e-296 991.3 0.0 4.0 1 1 3 4 4 4 4 - 66 | 34424 - WP_016784470.1 - 0 2436.7 0.9 7.2e-301 1005.5 0.4 3.1 1 1 1 3 3 3 3 - 67 | 19476 - WP_016784470.1 - 0 2387.1 2.3 8.6e-248 829.1 0.0 5.2 5 0 0 5 5 5 5 - 68 | 68264 - WP_016784470.1 - 0 2335.2 0.0 4.3e-287 959.9 0.0 4.0 2 2 2 4 4 4 4 - 69 | 22411 - WP_016784470.1 - 0 2215.6 0.0 2.5e-237 794.2 0.0 3.6 4 0 0 4 4 3 3 - 70 | 137155 - WP_016784470.1 - 0 2204.2 0.0 2.6e-224 751.9 0.0 7.0 2 2 4 7 7 7 7 - 71 | 33515 - WP_016784470.1 - 0 2166.7 0.0 7.4e-213 713.2 0.0 5.1 5 0 0 5 5 5 5 - 72 | 48029 - WP_016784470.1 - 0 2048.9 0.3 5.6e-215 720.5 0.0 5.0 3 1 1 5 5 5 5 - 73 | 52660 - WP_016784470.1 - 0 2034.1 0.0 2.8e-213 715.3 0.0 3.0 1 1 2 3 3 3 3 - 74 | # 75 | # Program: hmmscan 76 | # Version: 3.1b2 (February 2015) 77 | # Pipeline mode: SCAN 78 | # Query file: ABO2093homolog_ref.faa 79 | # Target file: SFAM_database.hmm 80 | # Option settings: hmmscan --tblout amphibactin2093_sfam_match.tblout --noali --cpu 25 SFAM_database.hmm ABO2093homolog_ref.faa 81 | # Current dir: /media/eclipse/sfams 82 | # Date: Mon Apr 16 15:38:34 2018 83 | # [ok] 84 | -------------------------------------------------------------------------------- /KEGGDecoder/DMSPLyase/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bjtully/BioData/cb1d45a957eda783412a48911c8592252915d9cd/KEGGDecoder/DMSPLyase/.DS_Store -------------------------------------------------------------------------------- /KEGGDecoder/DMSPLyase/Pelagibacter_ref_dddK.faa: -------------------------------------------------------------------------------- 1 | >WP_013695448.1 mannose-6-phosphate isomerase [Candidatus Pelagibacter sp. IMCC9063] 2 | MLNNKPAIFKEKKIKSIKRFGTVVTKIFVNKNSGSKSMISGTTLIPKDKSINLHYHNCEEAVLVLKGTAL 3 | AEINKKKYTLKEGEACWIPAKVPHRFINNNKSNLKIYWTYANVNATRTDVLTKKTYKILDEHKKKL 4 | >WP_009359929.1 cupin domain-containing protein [alpha proteobacterium HIMB114] 5 | MKQIKEKIFKEQKIKPIKRFGSVKTKIFINKKQGSKKMISGITIIPQNKSINLHYHNCEEAVMILEGTAI 6 | AEINKKKYILKKGEVSWIPAKIPHRFMNKKKEKLKIYWTYANANATRTDVLTDKTNKILNEHK 7 | >WP_028037226.1 cupin [Candidatus Pelagibacter ubique] 8 | MIFVNNLKSVSDQEWSSTEKYPGVRWKFLIDADYTKSSGLSLGFAEIAPGGDLTLHYHSPAEIYVVTNGT 9 | GILNKSGQLEEIKKGDVVYIAGNAKHALKNNGKETLEFYWIFPTDRFSEVKYLS 10 | >WP_014953073.1 cupin [alpha proteobacterium HIMB5] 11 | MIFIKNMNSVSDQDWTTSEKYPGVRWKFLIDEDYNGSKGLSCGFAEIEPGGNLTLHHHAPDEIYVVTNGS 12 | GTLNKSGELEEIKKGDVVYIAGNAKHALQNNGKEVLGFYWVFPTNKFKDVEYISDE 13 | >WP_011281678.1 cupin [Candidatus Pelagibacter ubique] 14 | MIFVKNLASVLSQEWSSTEKYPGVRWKFLIDADFDGSSGLSLGFAEIAPGGDLTLHYHSPAEIYVVTNGK 15 | GILNKSGKLETIKKGDVVYIAGNAEHALKNNGKETLEFYWIFPTDRFSEVEYFPAKQKSG -------------------------------------------------------------------------------- /KEGGDecoder/DMSPSynthase/curson_dmsp_synthase.faa: -------------------------------------------------------------------------------- 1 | >WP_025312975.1 methyltransferase [Roseibacterium elongatum] 2 | MNALTNADEISEIAFGFMGSKALFAALEHGVFTSLAGGASDAASVAKASDLDTDRAETLLTALAGLGLVV 3 | PQGQGQFVNSPAAEAFLVKGAKYDFGDYLRLQVGRQMYGLLDQIDDALTGALPEEATASYAEWFADPDEA 4 | RLYSESQHSGSLGPARQLIKRLDLSQATRLLDVGGGTGAFAITLCEANPGLSATIVDFPTVAALGRSYVQ 5 | KAGLSDRISYIEGNALETDWPGGQDVILMSYLFSGVPGVTHDGLIRAAMERLNPGGLLLIHDFVVHADRT 6 | GPKLAALWQLQHTAFTPRARSLDEGWLVSALDRGGFEDVTVSEMIPEMTMLAEARKPA 7 | >WP_023852424.1 O-methyltransferase [Rhodobacteraceae bacterium PD-2] 8 | MAALETADQVSDIAFGFMGSKALFAALEVGVFTELSRQPSTAAQLAERTAVDADRAETLLTALAGLGLVV 9 | REDGIYSNAPAAEAFLVRGAKHDFGDYLRLQVGRQMYGLLDQIDHALTDRLPKDATASYAEWFSDPEQAR 10 | LYSRSQHAGSLGPARQMLRRVDLSGAERLLDVGGGTGAFAITLCAANPDLSATIVDFPNVAALGRDYVAE 11 | AGLSDRIAYVEGNALERDWPGGQDVVLMSYLFSGVPGEAHAELLRHAYDTLAPGGRLLLHDFVVRADRSG 12 | PKLAALWQLQHTAFTPRARSLDAGWLAEALGQAGFAGIEIDDLIPEMTMLAIAHKPA 13 | >WP_043748339.1 methyltransferase [Pseudooceanicola atlanticus] 14 | MTPATEADEISNIAFGFMGSKALFLAIDLDLFSKLAEGPKTADEVAEATDVHRDRVETLMTALAGLGLLT 15 | VEDGKFANSPAAESFLVKGAKYDFSDYLARQVGQQMYPLMDQLAPAVKGDLGEDATGSYEEWFSDPEEAR 16 | LYSESQHAGSLGPARQLARRLDLSEARSMLDVGGGTGAFAITLCKAFPELKTTIVEFPNVAELGRGYVEK 17 | AGLSDRIEYVDGNALKTGWPGGQDIVLMSYLLSGVPGDMHEGLIADAMKALKPGGQLLIHDFMVHADRSG 18 | PGLAALWQLQHTAFTPEARSVDSGTLATELTEAGFEDVSVDEMIPQMTMIAKATKPA 19 | >WP_043143384.1 methyltransferase [Mameliella alba] 20 | MPLLTEADEISQIAFGFMGSKALFAALELEVFTHLAKGAMTAGELAKAAEMHEDRAMTLLTALAGMGLVA 21 | VEEGRFSNAPAAEAFLVQGAKYDFSDYLRLQVGKQMYPLMEQLEGALSGEMSDEDTASYADWFSDPDEAK 22 | LYSESQHAGSLGPARQLARSLDLTGKTKMLDVGGGTAAFDITLCQKNPGLKATVLEFPNVAALGRGYVEK 23 | AGLSDRISYLDGNALETPWPEGQDVVLMSYLFSGVPGETHAGLIAQAFKVLNPGGMVLIHDFIVNEDRTG 24 | PGLAALWQLQHTAFTPEARSLDAGWLEGALAAAGFQDVSVGAMIPEMTMLAQGKKP 25 | >WP_051644456.1 methyltransferase [Labrenzia sp. DG1229] 26 | MLAQDTVATENTIESAEDLSSIAFGFMASKSLFAGLHVDIFSVLADGPKSAEELAKATSIPLNRIVILTT 27 | ALASVGLLAIGDDKKIWNSPAAQNFLSKQSQYDFGDYLRHQIDQQMYPFLLQLNAVMKGDLSEDAIASYS 28 | HWMTDEEQASVYSESQHAGSLGPGKTLARKVDLGSADTLLDVGGGTGAMTISLCNEYENLHATIIDFPNV 29 | AEIGWRFISEANLVDRVRYIPGNAIEVQWPSNQSAILMSYLMSGVPGDDVEGLLQKAFDTLSPGGKLMVH 30 | DFMVEENRRGPALAALWQLQHMAFTPEARSLSVGWLTEAGKRKGFKVADVDNLIPAMTKLVVFEKPS 31 | >ERR00112.1 acetylserotonin O-methyltransferase [Labrenzia sp. C1B70] 32 | MPVARSLETAEEISDIAFGFMGSKALFSALHVDLFSLLSEKTLTPQQVAEESELDLDRATTLLTALTSLG 33 | LVRREGAGFTNSPAAEAFLVKGRKYDFGDYLRFQIDKQMYPFMTQLNDALTDSLEDDQVASYETWFSDPE 34 | EARLYSRSQHAGSLGPGRGLAKLVDLSAAKQLLDVGGGTGAFSISLCKAYPGLRSTVLDFPNVAKVGEEF 35 | IAEEGLQDRIRYAPGNALKDTWPDSADAVLMSYLFSGVPGTAIPGLVRKAFEVLTPGSDFMVHDFMVDEN 36 | RDGPKLAALWQLQHTAFNPEARSITSSYVAGLMEAAGFTDIAVEVMIPGMTMLVHGRKPE 37 | >ERP98606.1 acetylserotonin O-methyltransferase [Labrenzia sp. C1B10] 38 | MPVARSLETAEEISDIAFGFMGSKALFSALHVDLFSLLSEKTLTPQQVAEESELDLDRATTLLTALTSLG 39 | LVRREGAGFTNSPAAEAFLVKGRKYDFGDYLRFQIDKQMYPFMTQLNDALTDSLEDDQVASYETWFSDPE 40 | EARLYSRSQHAGSLGPGRGLAKLVDLSAAKQLLDVGGGTGAFSISLCKAYPGLRSTVLDFPNVAKVGEEF 41 | IAEEGLQDRIRYAPGNALKDTWPDSADAVLMSYLFSGVPGTAIPGLVRKAFEVLTPGSDFMVHDFMVDEN 42 | RDGPKLAALWQLQHTAFNPEARSITSSYVAGLMEAAGFTDIAVEVMIPGMTMLVHGRKPE 43 | >AOR83342.1 DsyB [Labrenzia aggregata] 44 | MPVARSLETAEEISDIAFGFMGSKALFSALHVDLFSLLSEKTLTPQHVAEESELDLDRATTLLTALTSLG 45 | LVRREGAGFTNSPAAEAFLVKGRKYDFGDYLRFQIDKQMYPFMTQLNDALTDSLEDDQVASYETWFSDPE 46 | EARLYSRSQHAGSLGPGRGLAKLVDLSAAKQLLDVGGGTGAFSISLCKAYPGLRSTVLDFPNVAKVGEEF 47 | IAEEGLQDRIRYAPGNALKDTWPDSADAVLMSYLFSGVPGTAIPGLVRKAFEVLTPGSDFMVHDFMVDEN 48 | RDGPKLAALWQLQHTAFNPEARSITSSYVAGLMEAAGFTDIAVEVMIPGMTMLVHGRKPE 49 | >CTQ43687.1 Demethylspheroidene O-methyltransferase [Labrenzia aggregata] 50 | MPVARSLETAEEISDIAFGFMGSKALFSALHVDLFSLLSEKTLTPQQVAEESELDLDRATTLLTALTSLG 51 | LVRREGAGFTNSPAAEAFLVKGRKYDFGDYLRFQIDKQMYPFMTQLNDALTDSLEDDQVASYETWFSDPE 52 | EARLYSRSQHAGSLGPGRGLAKLVDLSAAKQLLDVGGGTGAFSISLCKAYPGLRSTVLDFPNVAKVGEEF 53 | IAEEGLQDRIRYAPGNALKDTWPDSADAVLMSYLFSGVPGTAIPGLVRKAFEVLTPGGDFMVHDFMVDEN 54 | RDGPKLAALWQLQHTAFNPEARSITSSYVAGLMEAAGFTDIAVEVMIPGMTMLVHGRKPE 55 | >638883374 OB2597_06780 O-methyltransferase, family 2 [Oceanicola batsensis HTCC2597, unfinished sequence: NZ_AAMO01000001] 56 | MHPATEADEISAIAFGFMGSKALFVALDLGVFTKLAGGSATAEEMAQATG 57 | IHRDRAETLLTALTGLGLLTVKAGRFANSPAADSFLVKGAKYDFGDYLRL 58 | QVGRQMYGLLDQLDAAVQGEMTEGATASYEQWFSDPDQARLYSESQHAGS 59 | LGPARQLAKKVDLSGARRLLDVGGGTGAFAITLCRAFPELTATVVEFPNV 60 | ATLGRKYVEEAGLSDRITYVEGNALSTDWPEGQDTVLMSYLFSGVPGDAH 61 | TDLIADARAALAPGGQVVIHDFMVEADRSGPELAALWQLQHTAFTPEARS 62 | VDTGTLAEELTQGGFEKVDIVEMIPQMTKVAVGRRAA 63 | 64 | >639943763 SIAM614_21095 probable acetylserotonin O-methyltransferase [Stappia aggregata IAM 12614, unfinished sequence: NZ_AAUW01000016] 65 | MPVARSLETAEEISDIAFGFMGSKALFSALHVDLFSLLSEKTLSPDEVSR 66 | KSELDLDRATTLLTALASLGLVRREGTGFTNSPAAEAFLVKGRKYDFGDY 67 | LRFQIDKQMYPFMTQLNDALTDSLEDGQVASYEDWFSDPEEARLYSRSQH 68 | AGSLGPGRGLAKLVDLSAAKQLLDVGGGTGAFSISLCKAYPGLRSTVLDF 69 | PNVAKVGEEFIAEEGLQDRIQYAPGNALKDPWPDSADAVLMSYLFSGVPG 70 | TAIPGLVRKAFEVLTPGGDLMVHDFMVDENRDGPKLAALWQLQHTAFNPE 71 | ARSITSSYVAGLMEAAGFIDIAVEVMIPGMTMLVHGRKPD 72 | 73 | >640641694 SSE37_19772 probable acetylserotonin O-methyltransferase [Sagittula stellata E-37, unfinished sequence: NZ_AAYA01000001] 74 | MAVLTEAEDISDIAFGYMGSKALFAALEFGVFTALSQGNIGLTGIAGATG 75 | LPKERCRTLLSALVGLGLVTHDDAGFANSPAAESFLVKGARHDFGDYLRL 76 | QVGRQMYPLMDQIEKALTGDLEDDHTGSYAQWFADPEEARLYSESQHAGS 77 | LGPARGLAKRVDFSGIGSLLDVGGGTGAFAITLARRNPNLRITVLDFPNV 78 | AKLGEAYVADAGLSSQIGYCHGNALESGWPGGQDAVLMSYLFSGVPDHSH 79 | AGLLRKAHDALNPGGQVLIHDFIVDADLSGPKNTALWQLQHTAFTPEARS 80 | LDDDWLIGALEAAGFSDADVGPLIPGMTKLATARKA 81 | 82 | >641429164 BAL199_15988 probable acetylserotonin O-methyltransferase [alpha proteobacterium BAL199, unfinished sequence: NZ_ABHC01000001] 83 | MTLLTKAEEISDVAFGFMGSKALFAALHHQVFTRLADGPLTAEQAAEATG 84 | LHPERVRTLLTALAALGILSVEGGRFGNSPAADSFLVKGAKYDFGDYLRL 85 | QVDRQMYTLLDQIELALANKLPDDATASYAEWFSDPEEARLYSMSQHAGS 86 | LGPALGLAKSVDLSGARRLLDVGGGTGAFAITLCEAFPDLRPTVVDFPNV 87 | VSLGAKYAEDAGLADRITYVPGDALKTEWPGDQDAVLMSYLFSGVPGEAH 88 | DGLLARAFERLAPGGRILIHDFVVSADRTGPKLAALWQLQHTAFTPKARS 89 | LDDAWLAQQLTKAGFAEVSVTPMIPGMTMLAQGVRPG 90 | 91 | >648280724 RB2654_17946 tetracenomycin polyketide synthesis 8-o-methyltransferase, putative [Maritimibacter alkaliphilus HTCC2654 1099457000263: NZ_AAMT01000021] 92 | MAGLTEAEDISDIAFAYMGSKALFAAIKFDIFTTLAAGPLDAGTLAQRVG 93 | LPTERCRTLMTALTSLDLTTVDDDGTFANSPAAAAFLVKGAKYDFSDYLE 94 | RQVGQQMYPLMDQIDDALSGDMAEDATDSYDKWFSDPEEARLYSESQHAG 95 | SLGPARGLAKRVDLSGVRTLLDVGGGTGAFAITLCKANPDLTATVIDFPN 96 | VAALGETYVADAGLSDRVTYRHVNALEGDWPDGQDAILMSYLFSGVPDHA 97 | HEGLIAKAFAHLAPGGRLLIHDFVVDADLSGPKNTALWQLQHTAFTPEAR 98 | SLDDDWLAKAIETAGFTDVAVGPLIPGMTKLAQGTRP 99 | 100 | >648285806 R2601_26831 O-methyltransferase, family 2 [Pelagibaca bermudensis HTCC2601 1100011001360: NZ_AATQ01000019] 101 | MGPVTDADEISRIAFGFMGSQALFTALDHGLFTVLAEGALDAEALADRTG 102 | LHRDRAETLLTALAGLGLVTVSDGRFANSPAAEAFLVKGAKYDFGDYLRL 103 | QVGKQMYGLMGQLGDAVSGALGEGATASYEQWFSDPEQARLYSESQHAGS 104 | LGPARQLAKRIDLSGARQLLDVGGGTGAFAITLCKAFPELSATIVDFPNV 105 | AALGRRHVAEAGLSDRIAYVEGNALETDWPGGQDVVLMSYLFSGVPGSAH 106 | EGLLRAAHDRLMPGGRLLIHDFVVHADRSGPPLAALWQLQHTAFTPEARS 107 | VDAEGLARDLWAAGFAEVTVSEMIPQMTMLAEARRPE 108 | 109 | >2514595470 C357_08915 Dimerisation domain-containing protein [Citreicella sp. 357 : NZ_AJKJ01000067] 110 | MAQPDQPLTDADQVSRIAFGFMGSQALFAALESGVFTALAEEPGDAAAVA 111 | NRIGLHPDRAETLLTALAGLGLVAVHDGVFANSPAAAAFLVKGVKYDFGD 112 | YLRLQVGRQMYGLMGQLVPALTGTLPETATASYEQWFSDPEQARLYSESQ 113 | HAGSLGPARQLARRLDLSGARRMLDVGGGTGAFAITLCGAFPQLAATIVD 114 | FPNVATLGRGHVAEAGLSDRITYVEGNALDTPWPGGQDVILMSYLFSGVP 115 | GDAQAGLIARAFDCLAPGGRLLIHDFVVHADRTGPPLAALWQLQHTAFTP 116 | EARSLDTAGLSTALNGAGFVDVRIDEMIPQMTMLATAHRPG 117 | 118 | >2517312627 ladfl_04844 Ubiquinone/menaquinone biosynthesis C-methylase UbiE [Labrenzia alexandrii DFL-11 : ladfl_LADFL_1.1] 119 | LSAAKCLESAEDISDIAFGFMGSKALFSALNSDVFSLLSEKTLLPSEVAE 120 | GSNLDTERATTLLTALTALGLVRRDGDGFTNSPAAEAFLVKGKKYDFGDY 121 | LRFQIDRQMYPFMTQLNDALEGTLEEEQVASYEEWFSDAQEAELYSQSQH 122 | AGSLGPGRSLAKLVDLSSVRKLLDIGGGTGAFSISLCKAYPGLRSTILDF 123 | PNVAEVGKGFIEAEGLENRITYQPGNALKDTWPAQADAVLMSYLFSGVPG 124 | ASIPGLVRKSMDTLTPGGTYMVHDFMVDESREGPKLAALWQLQHTAFNPE 125 | AKSITSTYVSGLMEAAGFKDVTIKEMIPGMTSLVFGRKPA 126 | 127 | >2517908241 C507DRAFT_00326 Ubiquinone/menaquinone biosynthesis C-methylase UbiE [Amorphus coralli DSM 19760 : C507DRAFT_scaffold00001.1] 128 | MTLLTTAEEISDIAFGFMGSKALFAALHFGVFTHLAERPMTAEELGQAAG 129 | LPAERARTLLTAVASLGLVSVEDGRFANAPAAEAFLVKGAKYDFGDYLRL 130 | QVDRQMYGLLDQIEPALANRLPEDATGSYAEWFSDPEQARIYSESQHAGS 131 | LGPARGLAKSVDLAGARTLLDVGGGTGAYAITLCKANPDLAATVVDFPNV 132 | AALGREYVAEAGLADRVSYVDGNALETDWPAGQDSVLMSYLFSGVPGEEH 133 | DRLVRRAYDTLTPGGLYMVHDFVVDADRTGPKLAALWQLQHTAFTPTARS 134 | LDEATLADMMTGAGFEGVEVREMIPGLTMLATGRKPG 135 | 136 | >2520173307 C882_1540 acetylserotonin N-methyltransferase [Caenispirillum salinarum AK4 : ANHY01000002] 137 | MKPLGNVERISGIAFGFMASKALFSALHIGVFDQLAEGPKTVGELAEATG 138 | ATEHALETLLTGLVSLELVQPTSDGKAYKNAEDTNTFLVSTSRHYYGDYL 139 | RYQIDQQMYPFMDNLGHAIKGDTDKIEFDTYETWMADKEQAEIFSRSQHG 140 | GSLGPGAVVAKSWDLSDARTLLDVGGGTGAFSIMMCKRYPELKATVLDFP 141 | NVVALAEEYIAEADMSDRIDVIGGNGLTSQWPAERDVVLMSYLFSGVPAD 142 | GLETLVKNTWAALKPGGRVIIHDFMVDDDRTGPPLAAMWALQHMVFTPRA 143 | ASLTPGRVMELLKAQGFGDMEEKPLIPGLTRVVSAVKPQA 144 | 145 | >2523405058 G578DRAFT_0574 Dimerisation domain-containing protein [Thalassobaculum salexigens DSM 19539 : G578DRAFT_scaffold00001.1] 146 | MKPIESAEDISELAFGFMASKALFAALHVDVFGALSDGPKSITDLAAATK 147 | VPAQRMQTLVTALVSVGLLTRNDGKIANAPASDAYLVRDNTNYFGDYLRF 148 | QIDRQMYPFMENLDKVLLGDTDDIEYPDYASWMADRHHAELFSRSQHSGS 149 | LGPGAVLAKRLLKEGAVSEDVGSMLDVGGGSGAFSIMFCKRFPKLHATVL 150 | DFPNVIEVGKTFVAEEEMSDRIDFVAGDGTNANWPNDQDIVLMSYLFSGV 151 | PEEAIDKLCSDAFRVLKPGGLIAIHDFMVTDDRKGPALAALWQLQHMVYT 152 | PDGVGMTPGFVEKHLKKAGFEIEIDDDLIPGMTRVMTARKPG 153 | 154 | >2523510257 salmuc_01178 Ubiquinone/menaquinone biosynthesis C-methylase UbiE [Salipiger mucosus DSM 16094 (scaffold version) : salmuc_scaffold1_size1200394.1] 155 | MGLATQADEISDIAFGFMGSKALFVALDHKVFTHLAEAPLTADELAAKSG 156 | LHRDRAETLLTSLAGLGLLSVENGRYSNSPAAEAFLVQGAKYDFGDYLRL 157 | QVGRQMYALMDQLGPAISGEMPEGSTGSYEEWFSDPAEARLYSESQHAGS 158 | FGPARQLAKRIDLSGAKKLLDVGGGTGAFAITLCKAFPNLEATIVEFPNV 159 | ASLGREFVEKAGLSDRITYVDGNALETDWPGGQDVVLMSYLFSGVPGETH 160 | DGLIKGAYERLVPGGQLLIHDFIVHGDRSGPALAALWQLQHTAFTPQARS 161 | VDAASLASEMEAAGFSGVSVDEMIPQMTMLAQGKREG 162 | 163 | >2523943366 G568DRAFT_00664 O-methyltransferase [Sediminimonas qiaohouensis DSM 21189 : G568DRAFT_scaffold00002.2] 164 | MTLATKADEISEIAFGFMGSKALFAALQLKVFTHLAEGPLSAEELAQKAN 165 | VHPDRAQTLLTALASLGLIEVQDGNRFGNAPASQAFLVNGAKYDFGDYLR 166 | LQVGRQMYSLLDQIENALQGTMDADDTASYAEWFADPDEARLYSESQHSG 167 | SVGPARQLSGALDLSGARRMLDVGGGTGAFAITLCKDNPDLTASIVDFPN 168 | VAELGRGYVEKAGLSDRIAYIPGNALEAEWPADQDVILMSYLLSGVPGET 169 | HVDLIRRAYDHLVPGGRLLIHDFVVEKERTGPKLAALWQLQHTAFTPEAR 170 | SLDAGWLEVALDEIGFIDAQVAPLIPKMTMLAQGTKPAA 171 | 172 | >2524485630 G455DRAFT_02842 Dimerisation domain-containing protein [Donghicola xiamenensis DSM 18339 : G455DRAFT_scaffold00015.15] 173 | MTVLTEADQISEIAFGFMGSKALFAALGAGVFTHLADGPQSCAELAENCP 174 | LDEARTETLMTALAGLGLVQVQGDGRFANSPAADSFLVKGAKYDFGDYLR 175 | LQVGQQMYGLLDQIDDALQDNLPEEATASYAEWFSDPEEARLYSASQHSG 176 | SLGPARQLARQIDLSGAKTLLDVGGGTGAFAITLCQAFPDLTATIVDFPN 177 | VAALGRNYVARAGLSDRIAYVEGNALETAWPGAQDAVLMSYLFSGVPGET 178 | HDGLVGRAFDHLAPGGRFLLHDFVVRADRTGPKLAALWQLQHTAFTPRAR 179 | SLDEGWLKQALAKAGFSDIDVGVMIPKMTMLASAVKPAQEGEPAKLAGA 180 | 181 | >2525377636 K328DRAFT_2305 Ubiquinone/menaquinone biosynthesis C-methylase UbiE [Nisaea denitrificans DSM 18348 : K328DRAFT_scaffold00002.2] 182 | MTLLTNAEEISDIAFGFMGSKALFAALHHGVFTCLADGPLSVEEMAAATG 183 | LHPDRVQTLLTALASLGVVSAVEGRFANSPAAESFLVKGAKYDFGDYLRL 184 | QVDRQMYGLLDQIEDAIANNLPDDATSSYADWFSDPEQAKLYSNSQHAGS 185 | LGPARGLAKLIDLSGGKKLLDVGGGTGAFAITLCKAFADLAATIVDFPNV 186 | AALGKGYVEKAGLSDRIEYVIGDALRTEWPREQDAILMSYLFSGVAGDEH 187 | DSLLKRAYDHLVPGGRLLIHDFVVTADRTGPKLAALWQLQHTAFTPEARS 188 | LDDEWLAEQLKKTGFTDVKVGPMIPGMTMLAEAVRPE 189 | 190 | >2525928126 G572DRAFT_4219 Ubiquinone/menaquinone biosynthesis C-methylase UbiE [Stappia stellulata DSM 5886 : G572DRAFT_scaffold00007.7] 191 | MPTLTEAEEISDIAFGYMGSKALFAALHFGIFTQLAEAPATVDALAKTVG 192 | LPGERCRTLLTALAALGLVEQDDGTYRNSPAAERFLVKGAKYDFGDYLRL 193 | QVGQQMYPLMDQIEPALSGDLPDDATASYADWFSDPKEARLYSESQHAGS 194 | LGPALGLARRIDLSKARKLLDVGGGTGAFAITLCQSFPELSATVVEFPNV 195 | AKLGRSFVDKAGLSERISYLEGNALETEWPRDQDAILMSYLLSGVPDHAH 196 | EDLFKRAFDHLAPGGQLMVHDFVVDSDRAGPKNTALWQLQHTAFTPEARS 197 | IADDWLEKQMIKAGFENVGVEPLIPGMTKLALGTRPA 198 | 199 | >2527024186 S120_00996 Methylase involved in ubiquinone/menaquinone biosynthesis [Oceanicola sp. S124 : S120_gi339779149.57] 200 | MTLLTEADDVSRIAFGFMGSKALFAALELGIFTALAGGDKTAAELAEAAG 201 | VHEDRAMTLLTALAGLGLVSVHEGRFANSPAAGAFLVKGAKYDFGDYLRL 202 | QVGKQMYPLLDQIEGALKGELGEGDTASYADWFSDPDEAKLYSESQHAGS 203 | LGPARQLAKALDLSGARRMLDVGGGTAAFDIVLCGANPGLTSTVLEFPNV 204 | AALGRRYVEAAGLSDRITYLEGNALETDFPGGQDIVLMSYLFSGVPGEAH 205 | EALIARAY 206 | 207 | >2541035415 K224DRAFT_3290 Ubiquinone/menaquinone biosynthesis C-methylase UbiE [Oceanicola sp. HL-35 : K224DRAFT_scf7180000000020_quiver.5] 208 | MTLLTEADEISHIAFGFMGSKALFAALELKVFTHLKDGPRTAVDLAAKAG 209 | VHADRAETLLTALAGLGLVSVEEGRFGNSPAAEAFLVNGAKYDFGDYLRL 210 | QVGQQMYGLLDQIEGALQGDLGAEDTKSYAEWFADPDQARLYSASQHAGS 211 | LGPARQMARTLDLSGARQMLDVGGGTGAFAISLCKKNPDLRATIVEFPNV 212 | ATLGQDYVARAALSERIAYLPGNALEADWPDGQDVVLMSYLFSGVPGAAH 213 | GGLLAKALAALNPGGRLLIHDFVVRADRSGPVLAALWQLQHTAFTPKARS 214 | LDEGWLARALPDAGFAEVSIAPMIAEMTMLAQGVKPG 215 | 216 | >2553022978 C651DRAFT_03472 Methylase involved in ubiquinone/menaquinone biosynthesis [Donghicola sp. S598 : C651DRAFT_AMWC01001928_1.1523] 217 | AFGFMGSQALFTALDQQVFTHLADGALSAEEMAERTDLHRDRAETLLTAL 218 | AGMGLVTVDDGLFSNSPAAEAFLVKGAKYDFGDYIRLQVGKQMYGLMGQL 219 | GGAVSGTLSEEETGSYEQWFSDPEEARLYSESQHAGSHGPARQMTRRVDL 220 | SGAKTLLDVGGGTGAYAITFCNAFPDLTATIVDFPNVAALGRSYVDEAKL 221 | SARISYVEGN 222 | 223 | >2558678304 Q344DRAFT_1000 Ubiquinone/menaquinone biosynthesis C-methylase UbiE [Oceanicola nanhaiensis DSM 18065 : Q344DRAFT_scaffold00001.1] 224 | MTPLTEADEISDIAFGFMGSKALFAALKFGVFTKLSEGPGTAADLAGDEI 225 | HPDRMETLLTALAGLGLVTVEDGRFSNSPAAEAFLVRGAKYDFGEYLRLQ 226 | VGQQMYPLLDQIERALDGSLGAEATKSYAEWFSDPEEARLYSESQHAGSI 227 | GPARQLAKALEIEGPARLLDVGGGTGAFAITLCKAFPELTATIVDFPNVA 228 | ALGRRYVEEAGLSDRIAYADGDALEKGWPGGQDIVLMSYLFSGVPGDAHH 229 | GLIADAWTALRPGGRILVHDFVVDESREGPRLAALWQLQHTAFTPEARSL 230 | DDGWLKAALEGAGFTEVGVRPMIPEMTMLAEGVKPA 231 | 232 | >2565720611 OCH239_01665 Dimerisation domain-containing protein [Roseivivax halodurans JCM 10272 : JALZ01000001] 233 | MGPITEADQISDIAFGFMGSKTLFVALDHQIFTHLSKGAGDADEIAVKTG 234 | LHRDRAETLLTGLAGLGLLTVEEGRFANAPGAEAFLVHGAKYDFGDYLRL 235 | QVGQQMYALIHQLGAAIAGRMPANATASYEDWFEDAEEARLYSESQHAGS 236 | LGPARQVTRKVDLSDAKTLLDVGGGTGAFAITLCDAFPELTATVVDFPNV 237 | AKLGRGYVEEAGLSDRISYVDGNALRTDWPGGQDIVMMSYLLSGVPGAEH 238 | ETLIRRAYETLAPGGRLLVHDFVVEADRSGPKLAALWQLQHTAFTPEARS 239 | VDVAGLEHLLGANGFAEVETIEMIPQMTKLCVGRRSA 240 | 241 | >2579689465 U879_03735 Dimerisation domain-containing protein [Defluviimonas sp. 20V17 : AYXI01000038] 242 | MMESIVDSMLAQETPAPAGNPIESAEELSSIAFGFMASKALFAGLHVDLF 243 | TLLADGPKTAEELAKAAGIPLNRIVTLTTALASVGVVSISDRKLIQNSSA 244 | AQNFLAKQSKYDFGDYLRYQIDQQMYPFLLQLNAVMKGDLSDDAIASYQH 245 | WMADEEQASVYSESQHAGSLGPGRTLARKVDLAKASTLLDVGGGTGAMTI 246 | SLCNEYPDLHATIIDFPNVAEIGWRFVSEAGLVDRVRYIPGNAVQAQWPG 247 | RQDAILMSYLMSGVPGEAVADLLRKAFDALAPGGKLMVHDFMVEEDRRGP 248 | ALAALWQLQHMAFTPDAHSLSVGWLTEAGRKIGFAVDDVDNLIPAMTKLV 249 | VFAKPS 250 | 251 | >2593183274 N556DRAFT_08021 Ubiquinone/menaquinone biosynthesis C-methylase UbiE [Rhodospirillales bacterium URHD0017 : N556DRAFT_scaffold00054.54] 252 | MTIKPLDDVRQISALTYGFIASKALFAALDLDLFTRIATGTTDLAALAAA 253 | TGVAPNRLRTLLTALKTVGLVTETDGKFANAPAAGTYLVAGAPGDFRDYI 254 | SVVNGGFLYEGLRHLSKAMRGERIFPDKGFYEGIVYSEGGVGGEAFSRAQ 255 | HAGSLGPAQLMARRVELGNATTFLDVGGGSGAYSLAFLRKNPKLRATILD 256 | FPQTVDTARRYAAEAGMADRVTHVTGNALSTPWPRDQDVVLMSYVWSAVG 257 | GNDIRTLASRAFEALKPGGIVLVHDFMVDDDHDGPAFAAWYLLASLPDNP 258 | QAECLSPGLVERRLGDAGFAVGGTEPMLAEITALTRARKP 259 | 260 | >2597124009 JM93DRAFT_02836 Ubiquinone/menaquinone biosynthesis C-methylase UbiE [Roseibium hamelinense ATCC BAA-252 : JM93DRAFT_scaffold00006.6] 261 | MAKQPLETAEQISEIAFGFMGSKALFAALHVDLFSALSDKELAADEVAAE 262 | TGLDSDRATTLLTALNTLGLVEADDGRFKNAPAADAFLVTGRKYAFGDYL 263 | RFQIDRQMYPFLTQLNDALTDNLAPDQVSSYAQWFSDADEARLYSQSQHA 264 | GSLGPGRTLAKLVDLSQTRSLLDVGGGTGAFSISLCEAYPELKSTILDFP 265 | NVAKVGEEFIKKAGLSDRVWYQPGNALDDVWPAEADAVLMSYLFSGVPGK 266 | AIPGLVKNAMVTLPPGGHFMVHDFMVSGERTGPKLAALWQLQHTAFNPHA 267 | KSITSDYVSDLMEAAGFEDVNTREMIPGMTTLVWARKPQ 268 | 269 | >2599852567 Ga0056027_02933 Dimerisation domain-containing protein [Thalassobaculum litoreum DSM 18839 : Ga0056027_scaffold00008.8] 270 | VKPIESAEDISELAFGFMASKALFAALHVDVFGALSDGPKSITDLAAATK 271 | VPAQRMQTLVTALVSVGLLTRNDGKIANAPASDAYLVRDNTNYFGDYLRF 272 | QIDRQMYPFMENLDKVLLGDTDDIEYPDYASWMADRHHAELFSRSQHSGS 273 | LGPGAVMAKRLLKEGAVSEDVGSMLDVGGGSGAFSIMFCKRFPKLHATVL 274 | DFPNVIEVGKTFVAEEEMSDRIDFVAGDGTNANWPNDQDIVLMSYLFSGV 275 | PEEAIDKLCSDAFRVLKPGGLIAIHDFMVTDDRKGPALAALWQLQHMVYT 276 | PDGVGMTPGFVEKHLKKAGFEIEIDDDLIPGMTRVMTARKPG 277 | 278 | >2609105254 HLUCCA09_14310 Ubiquinone/menaquinone biosynthesis C-methylase UbiE [Rhodobacteraceae bacterium bin09 : ITZX_scaf_510] 279 | MAGLTEAEEISDIAFGYMGSRALFAALEFGIFTRLADGALPAAEIAAASG 280 | LPEERCRTLMTALTALGLTVRAGEGFANSPAAEAFLVKGARYDFGDYLRL 281 | QVGRQMYPLMDQIGEALAGRLPEGATASYADWFADAEEARLYSESQHAGS 282 | LGPARGLAKRVDLSGVARMLDIGGGTGAFAITLCRANPDLHVTVVDFPNV 283 | ALLGEAYVADAGLSDRITYVTGDALAAEWPGDQDAILMSYLLSGVPDHAH 284 | EGLIGRAFDHIGSGGRLLVHDFIVDEDRTGPKNTALWQLQHTAFTPEARS 285 | LSDGWLADRMCAAGFCGIEVEPLIPGMTKLASARRPDA 286 | 287 | >2609135787 HLUCCO17_14775 Dimerisation domain-containing protein [Rhizobiales bacterium HL-109 : ITZY_scaf_197] 288 | MLRTRADEISEIAFGFMASKALFSALHFDLFTHLNDGPKTAAELGKAAGL 289 | HPDRAETLLTALAGLGLVAVDDGKFSNAPASEAFLVKGAKYDFGDYLRRQ 290 | VGQQMYGLLDQVEDALSGNLPAEATGSYAEWFSDPAEARLYSESQHAGSL 291 | GPARQLVKALDLSGAKRMLDVGGGTGAFAITFCKAFPELGATVVDFPNVA 292 | AIGREFVDKAGLGDRIHYIEADGLKADWPRNQDVILMSYLLSGIPGETHE 293 | GLIKRAYDHLAPGGTLLIHDFIVCADRTGPKLAALWQLQHTAFTPQARSV 294 | DDAWLVESMERHGFRDASVETMIPEMTMLAKAVKPA 295 | 296 | >2616590712 Ga0070519_10532 Dimerisation domain-containing protein [Citreicella aestuarii DSM 22011 : Ga0070519_1053] 297 | LAQPDQPLTDADQVSRIAFGFMGSQALFAALECGVFTALAEEPGDAAAVA 298 | NRIGLHPDRAETLLTALAGLGLVAVHDGVFANSPAAAAFLVKGVKYDFGD 299 | YLRLQVGRQMYGLMGQLVPAVTGTLPETATASYEQWFSDPEQARLYSESQ 300 | HAGSLGPARQLARRLDLSGARRMLDVGGGTGAFAITLCGAFPQLAATIVD 301 | FPNVATLGRGHVAEAGLSDRITYVEGNALDTPWPGGQDVILMSYLFSGVP 302 | GDAQAGLIARAFDCLAPGGRLLIHDFVVHADRTGPPLAALWQLQHTAFTP 303 | EARSLDTAGLSTALNGAGFVDVLIDEMIPQMTMLATAHRPG 304 | 305 | >2616591799 Ga0070518_101426 Dimerisation domain-containing protein [Litorimicrobium taeanense DSM 22007 : Ga0070518_101] 306 | MLTQDSMPPSAKAIETAEDLSSIAFGFMASKALFAGLHVDIFSALADGPK 307 | SAEELAGAADIPINRIVMLTTALASVGLLTIGDDKKVQNSPAAQSFLSKQ 308 | TKYDFGDYLRYQIDQQMYPFLLQLNAVMKGDLSDDAIASYRHWMADEEQA 309 | SVYSESQHAGSLGPGRTLARKVDLATASTLLDVGGGTGAMTISLCREYPN 310 | LQATIIDFPNVTEIGWRFISEADLVDRVRYIPGNAVEVQWPGNQDAILMS 311 | YLMSGVPGDNVEELVHKAFDALAPGGKLMVHDFMVEEDRRGPALAALWQL 312 | QHMAFTPDAHSLSVGWLTQTGKRIGFDVDQVDNLIPAMTKLVVFSKPS 313 | 314 | >2617877652 Ga0070148_1037 Ubiquinone/menaquinone biosynthesis C-methylase UbiE [Yangia pacifica CGMCC 1.3455 : Ga0070148_103] 315 | MALLTEAHEISDIAFGFMASKALFAALEFGIFDTLAEGPLSAEELGAACD 316 | LHPERARTLLTALAGLGVVSVEGDRYSNSPAAASFLVKGEKYDFSDYLRL 317 | QVGRQMYPLMSQLEPALKDALPEEATGSYAEWFADPEEARLYSDSQHAGS 318 | LGPAGVLAKRLDLSEARSMLDVGGGTGAFSITFCKAFPHLRSTIVDFPNV 319 | VTVGREKVAAAGLSERITYLEADATSLDWPDGQDVVLMSYLLSGVPAEAH 320 | APLFDAAYRALRPGGQLLLHDFIVRGDRSGPHLTALWQLQHTAFTPQAAS 321 | LDAEGLAAALEAAGFAEVGVRAMIPEMTMLAEARRPE 322 | 323 | >2620103054 Ga0069992_1037 Ubiquinone/menaquinone biosynthesis C-methylase UbiE [Yangia pacifica DSM 26894 : Ga0069992_103] 324 | MALLTEAHEISDIAFGFMASKALFAALEFGIFDTLAEGPLSAEELGAACD 325 | LHPERARTLLTALAGLGVVSVEGDRYSNSPAAASFLVKGEKYDFSDYLRL 326 | QVGRQMYPLMSQLEPALKDALPEEATGSYAEWFADPEEARLYSDSQHAGS 327 | LGPAGVLAKRLDLSEARSMLDVGGGTGAFSITFCKAFPHLRSTIVDFPNV 328 | VTVGREKVAAAGLSERITYLEADATSLDWPDGQDVVLMSYLLSGVPAEAH 329 | APLFDAAYRALRPGGQLLLHDFIVRGDRSGPHLTALWQLQHTAFTPQAAS 330 | LDAEGLAAALEAAGFAEVGVRAMIPEMTMLAEARRPE 331 | 332 | >2620334468 Ga0069990_11421 Dimerisation domain-containing protein [Roseovarius indicus DSM 26383 : Ga0069990_114] 333 | MLDDDTPTTQVKAIETAEDLSSIAFSFMASKALFAGLHADIFSQLSDGPK 334 | SAAELAKAVDIPINRIVMLTTALASVGVLVIGDDEKIENSPAAQSFLSKQ 335 | SKYDFGDYLRYQIDQQMYPFLLQLNAVMKGDLTEDAISSYKHWMADEEQA 336 | SVYSESQHAGSLGPGRTLARKVDLSAARTLLDVGGGTGAMTISLCKAYEN 337 | LHATIIDFPNVSEIGWRFISEAGLVDRVRYIPGNAVEVQWPGNQDAILMS 338 | YLMSGVPGDDVSALLQNAYDSLAPGGKLLVHDFMVEEDRRGPALAALWQL 339 | QHMAFTPDARSLSVGWLTEAGKKLGFEVELADNLIPAMTKLVVFSKPA 340 | 341 | >2620496472 Ga0070504_10014 Dimerisation domain-containing protein [Antarctobacter heliothermus DSM 11445 : Ga0070504_1001] 342 | MSLLTDADEISQIAFGFMGSKALFAALELGIFTHLAEGAMTATELAQVSK 343 | IHEDRVTTLLTALAGMGLVAVDEDRFSNAPAAQAFLVQGAKYDFSDYLRL 344 | QVGRQMYPLMDQLGDALAGTMAEDATASYADWFSDPEQARLYSESQHSGS 345 | LGPARQLVRALDLSGKHKMLDLGGGTAAFDITLCKANPELTSTILEFPNV 346 | AALGRTYVDKAGLTDRITYLEGNALETPWPQGQDVVMMSYLLSGVPGDTH 347 | ARLIRQAFEVLNPGGIVLIHDFVVHEDRTGPSLAALWQLQHTAFTPEARS 348 | LDAGWLDGALAGAGFAGITVAPLIPEMTMLAQAHKP 349 | 350 | >2622865483 Ga0070237_103213 Dimerisation domain-containing protein [Thalassobius gelatinovorus DSM 5887 : Ga0070237_103] 351 | MTVLTEADQISDIAFGFMGSKALFAALGAGVFTHLDDGPLSCAELATLCP 352 | LDEIRTEILLTALAGLGLVEGENGKFKNSPAAESFLVKGAKYDFGDYLRL 353 | QVGQQMYGLLDQIDDALQDKLPDEATASYAEWFSDPEEAKIYSASQHSGS 354 | LGPARQLARKIDLSDARHLLDVGGGTGAFAITLCKAFPNLTATIVDFPNV 355 | AILGREYVARAGLSDRISYVEGNALETEWPGGQDVVLMSYLFSGVPGEAH 356 | DGLLGRARDQLKPGGRFLLHDFVVREDRTGPKLAALWQLQHTAFTPRARS 357 | LDEGWLRTSLDKAGFEDISVDVMIPEMTMLASAVKPA 358 | 359 | >2623181840 Ga0070585_101672 Dimerisation domain-containing protein [Tropicibacter naphthalenivorans DSM 19561 : Ga0070585_101] 360 | MPVLTEAEDISDIAFAYMGSKALFAALEFGVFSALSGGAVSLDELSAQTG 361 | LPRERCRTLMSALTSLGLTTRDAAGRFANSPAAEAFLVKGAKYDFGDYLR 362 | LQVGQQMYPLMDQIEKALPGDLGADETASYAQWFSDPEEARLYSQSQHAG 363 | SLGPARGLARRVDLSQARHLLDVGGGTGAFAITLCKAFPQLKATVIDFPN 364 | VAALGETYVADAGLSDRITYQHGNALEADWPSGVDAILMSYLFSGVPDHT 365 | HEGLLTKAFDRLHPGGKVLIHDFIVDADLSGPKNTALWQLQHTAFTPEAR 366 | SLDDAWLAQALGRAGFDAVEVAPLIPGMTKLAVGVKPA 367 | 368 | >2635168442 Ga0074799_106217 Dimerisation domain-containing protein [Roseivivax sediminis DSM 26472 : Ga0074799_106] 369 | MAAITEADQISDIAFGFMGSKALFVALDHQIFTHLSKGAADVDEISEKTG 370 | LHPDRAETLLTALAGLGLLTVSDGRFANAPGAEVFLVQGAKYDFGDYLRL 371 | QVGRQMYGLISQLGAAISGNMPPDATVSYEDWFEDAEEARLYSESQHAGS 372 | LGPARQVTKKVDLSGARTLLDVGGGTGAFAITLCQAFPELTATVVDFANV 373 | SKLGRRYVDEAGLSDRIRYVDGNALRTDWPDGQDIVMMSYLLSGVPGSEH 374 | DGLVARAYHHLVPGGRLLIHDFVVEADRSGPKLAALWQLQHTAFTPEARS 375 | VDAAGLSNLLSGAGFAETEVIEMIPAMTKLAVGRRPT -------------------------------------------------------------------------------- /KEGGDecoder/DMSPSynthase/img_dmsp_synthase_dsyB.faa: -------------------------------------------------------------------------------- 1 | >638883374 OB2597_06780 O-methyltransferase, family 2 [Oceanicola batsensis HTCC2597, unfinished sequence: NZ_AAMO01000001] 2 | MHPATEADEISAIAFGFMGSKALFVALDLGVFTKLAGGSATAEEMAQATG 3 | IHRDRAETLLTALTGLGLLTVKAGRFANSPAADSFLVKGAKYDFGDYLRL 4 | QVGRQMYGLLDQLDAAVQGEMTEGATASYEQWFSDPDQARLYSESQHAGS 5 | LGPARQLAKKVDLSGARRLLDVGGGTGAFAITLCRAFPELTATVVEFPNV 6 | ATLGRKYVEEAGLSDRITYVEGNALSTDWPEGQDTVLMSYLFSGVPGDAH 7 | TDLIADARAALAPGGQVVIHDFMVEADRSGPELAALWQLQHTAFTPEARS 8 | VDTGTLAEELTQGGFEKVDIVEMIPQMTKVAVGRRAA 9 | 10 | >639943763 SIAM614_21095 probable acetylserotonin O-methyltransferase [Stappia aggregata IAM 12614, unfinished sequence: NZ_AAUW01000016] 11 | MPVARSLETAEEISDIAFGFMGSKALFSALHVDLFSLLSEKTLSPDEVSR 12 | KSELDLDRATTLLTALASLGLVRREGTGFTNSPAAEAFLVKGRKYDFGDY 13 | LRFQIDKQMYPFMTQLNDALTDSLEDGQVASYEDWFSDPEEARLYSRSQH 14 | AGSLGPGRGLAKLVDLSAAKQLLDVGGGTGAFSISLCKAYPGLRSTVLDF 15 | PNVAKVGEEFIAEEGLQDRIQYAPGNALKDPWPDSADAVLMSYLFSGVPG 16 | TAIPGLVRKAFEVLTPGGDLMVHDFMVDENRDGPKLAALWQLQHTAFNPE 17 | ARSITSSYVAGLMEAAGFIDIAVEVMIPGMTMLVHGRKPD 18 | 19 | >640641694 SSE37_19772 probable acetylserotonin O-methyltransferase [Sagittula stellata E-37, unfinished sequence: NZ_AAYA01000001] 20 | MAVLTEAEDISDIAFGYMGSKALFAALEFGVFTALSQGNIGLTGIAGATG 21 | LPKERCRTLLSALVGLGLVTHDDAGFANSPAAESFLVKGARHDFGDYLRL 22 | QVGRQMYPLMDQIEKALTGDLEDDHTGSYAQWFADPEEARLYSESQHAGS 23 | LGPARGLAKRVDFSGIGSLLDVGGGTGAFAITLARRNPNLRITVLDFPNV 24 | AKLGEAYVADAGLSSQIGYCHGNALESGWPGGQDAVLMSYLFSGVPDHSH 25 | AGLLRKAHDALNPGGQVLIHDFIVDADLSGPKNTALWQLQHTAFTPEARS 26 | LDDDWLIGALEAAGFSDADVGPLIPGMTKLATARKA 27 | 28 | >641429164 BAL199_15988 probable acetylserotonin O-methyltransferase [alpha proteobacterium BAL199, unfinished sequence: NZ_ABHC01000001] 29 | MTLLTKAEEISDVAFGFMGSKALFAALHHQVFTRLADGPLTAEQAAEATG 30 | LHPERVRTLLTALAALGILSVEGGRFGNSPAADSFLVKGAKYDFGDYLRL 31 | QVDRQMYTLLDQIELALANKLPDDATASYAEWFSDPEEARLYSMSQHAGS 32 | LGPALGLAKSVDLSGARRLLDVGGGTGAFAITLCEAFPDLRPTVVDFPNV 33 | VSLGAKYAEDAGLADRITYVPGDALKTEWPGDQDAVLMSYLFSGVPGEAH 34 | DGLLARAFERLAPGGRILIHDFVVSADRTGPKLAALWQLQHTAFTPKARS 35 | LDDAWLAQQLTKAGFAEVSVTPMIPGMTMLAQGVRPG 36 | 37 | >648280724 RB2654_17946 tetracenomycin polyketide synthesis 8-o-methyltransferase, putative [Maritimibacter alkaliphilus HTCC2654 1099457000263: NZ_AAMT01000021] 38 | MAGLTEAEDISDIAFAYMGSKALFAAIKFDIFTTLAAGPLDAGTLAQRVG 39 | LPTERCRTLMTALTSLDLTTVDDDGTFANSPAAAAFLVKGAKYDFSDYLE 40 | RQVGQQMYPLMDQIDDALSGDMAEDATDSYDKWFSDPEEARLYSESQHAG 41 | SLGPARGLAKRVDLSGVRTLLDVGGGTGAFAITLCKANPDLTATVIDFPN 42 | VAALGETYVADAGLSDRVTYRHVNALEGDWPDGQDAILMSYLFSGVPDHA 43 | HEGLIAKAFAHLAPGGRLLIHDFVVDADLSGPKNTALWQLQHTAFTPEAR 44 | SLDDDWLAKAIETAGFTDVAVGPLIPGMTKLAQGTRP 45 | 46 | >648285806 R2601_26831 O-methyltransferase, family 2 [Pelagibaca bermudensis HTCC2601 1100011001360: NZ_AATQ01000019] 47 | MGPVTDADEISRIAFGFMGSQALFTALDHGLFTVLAEGALDAEALADRTG 48 | LHRDRAETLLTALAGLGLVTVSDGRFANSPAAEAFLVKGAKYDFGDYLRL 49 | QVGKQMYGLMGQLGDAVSGALGEGATASYEQWFSDPEQARLYSESQHAGS 50 | LGPARQLAKRIDLSGARQLLDVGGGTGAFAITLCKAFPELSATIVDFPNV 51 | AALGRRHVAEAGLSDRIAYVEGNALETDWPGGQDVVLMSYLFSGVPGSAH 52 | EGLLRAAHDRLMPGGRLLIHDFVVHADRSGPPLAALWQLQHTAFTPEARS 53 | VDAEGLARDLWAAGFAEVTVSEMIPQMTMLAEARRPE 54 | 55 | >2514595470 C357_08915 Dimerisation domain-containing protein [Citreicella sp. 357 : NZ_AJKJ01000067] 56 | MAQPDQPLTDADQVSRIAFGFMGSQALFAALESGVFTALAEEPGDAAAVA 57 | NRIGLHPDRAETLLTALAGLGLVAVHDGVFANSPAAAAFLVKGVKYDFGD 58 | YLRLQVGRQMYGLMGQLVPALTGTLPETATASYEQWFSDPEQARLYSESQ 59 | HAGSLGPARQLARRLDLSGARRMLDVGGGTGAFAITLCGAFPQLAATIVD 60 | FPNVATLGRGHVAEAGLSDRITYVEGNALDTPWPGGQDVILMSYLFSGVP 61 | GDAQAGLIARAFDCLAPGGRLLIHDFVVHADRTGPPLAALWQLQHTAFTP 62 | EARSLDTAGLSTALNGAGFVDVRIDEMIPQMTMLATAHRPG 63 | 64 | >2517312627 ladfl_04844 Ubiquinone/menaquinone biosynthesis C-methylase UbiE [Labrenzia alexandrii DFL-11 : ladfl_LADFL_1.1] 65 | LSAAKCLESAEDISDIAFGFMGSKALFSALNSDVFSLLSEKTLLPSEVAE 66 | GSNLDTERATTLLTALTALGLVRRDGDGFTNSPAAEAFLVKGKKYDFGDY 67 | LRFQIDRQMYPFMTQLNDALEGTLEEEQVASYEEWFSDAQEAELYSQSQH 68 | AGSLGPGRSLAKLVDLSSVRKLLDIGGGTGAFSISLCKAYPGLRSTILDF 69 | PNVAEVGKGFIEAEGLENRITYQPGNALKDTWPAQADAVLMSYLFSGVPG 70 | ASIPGLVRKSMDTLTPGGTYMVHDFMVDESREGPKLAALWQLQHTAFNPE 71 | AKSITSTYVSGLMEAAGFKDVTIKEMIPGMTSLVFGRKPA 72 | 73 | >2517908241 C507DRAFT_00326 Ubiquinone/menaquinone biosynthesis C-methylase UbiE [Amorphus coralli DSM 19760 : C507DRAFT_scaffold00001.1] 74 | MTLLTTAEEISDIAFGFMGSKALFAALHFGVFTHLAERPMTAEELGQAAG 75 | LPAERARTLLTAVASLGLVSVEDGRFANAPAAEAFLVKGAKYDFGDYLRL 76 | QVDRQMYGLLDQIEPALANRLPEDATGSYAEWFSDPEQARIYSESQHAGS 77 | LGPARGLAKSVDLAGARTLLDVGGGTGAYAITLCKANPDLAATVVDFPNV 78 | AALGREYVAEAGLADRVSYVDGNALETDWPAGQDSVLMSYLFSGVPGEEH 79 | DRLVRRAYDTLTPGGLYMVHDFVVDADRTGPKLAALWQLQHTAFTPTARS 80 | LDEATLADMMTGAGFEGVEVREMIPGLTMLATGRKPG 81 | 82 | >2520173307 C882_1540 acetylserotonin N-methyltransferase [Caenispirillum salinarum AK4 : ANHY01000002] 83 | MKPLGNVERISGIAFGFMASKALFSALHIGVFDQLAEGPKTVGELAEATG 84 | ATEHALETLLTGLVSLELVQPTSDGKAYKNAEDTNTFLVSTSRHYYGDYL 85 | RYQIDQQMYPFMDNLGHAIKGDTDKIEFDTYETWMADKEQAEIFSRSQHG 86 | GSLGPGAVVAKSWDLSDARTLLDVGGGTGAFSIMMCKRYPELKATVLDFP 87 | NVVALAEEYIAEADMSDRIDVIGGNGLTSQWPAERDVVLMSYLFSGVPAD 88 | GLETLVKNTWAALKPGGRVIIHDFMVDDDRTGPPLAAMWALQHMVFTPRA 89 | ASLTPGRVMELLKAQGFGDMEEKPLIPGLTRVVSAVKPQA 90 | 91 | >2523405058 G578DRAFT_0574 Dimerisation domain-containing protein [Thalassobaculum salexigens DSM 19539 : G578DRAFT_scaffold00001.1] 92 | MKPIESAEDISELAFGFMASKALFAALHVDVFGALSDGPKSITDLAAATK 93 | VPAQRMQTLVTALVSVGLLTRNDGKIANAPASDAYLVRDNTNYFGDYLRF 94 | QIDRQMYPFMENLDKVLLGDTDDIEYPDYASWMADRHHAELFSRSQHSGS 95 | LGPGAVLAKRLLKEGAVSEDVGSMLDVGGGSGAFSIMFCKRFPKLHATVL 96 | DFPNVIEVGKTFVAEEEMSDRIDFVAGDGTNANWPNDQDIVLMSYLFSGV 97 | PEEAIDKLCSDAFRVLKPGGLIAIHDFMVTDDRKGPALAALWQLQHMVYT 98 | PDGVGMTPGFVEKHLKKAGFEIEIDDDLIPGMTRVMTARKPG 99 | 100 | >2523510257 salmuc_01178 Ubiquinone/menaquinone biosynthesis C-methylase UbiE [Salipiger mucosus DSM 16094 (scaffold version) : salmuc_scaffold1_size1200394.1] 101 | MGLATQADEISDIAFGFMGSKALFVALDHKVFTHLAEAPLTADELAAKSG 102 | LHRDRAETLLTSLAGLGLLSVENGRYSNSPAAEAFLVQGAKYDFGDYLRL 103 | QVGRQMYALMDQLGPAISGEMPEGSTGSYEEWFSDPAEARLYSESQHAGS 104 | FGPARQLAKRIDLSGAKKLLDVGGGTGAFAITLCKAFPNLEATIVEFPNV 105 | ASLGREFVEKAGLSDRITYVDGNALETDWPGGQDVVLMSYLFSGVPGETH 106 | DGLIKGAYERLVPGGQLLIHDFIVHGDRSGPALAALWQLQHTAFTPQARS 107 | VDAASLASEMEAAGFSGVSVDEMIPQMTMLAQGKREG 108 | 109 | >2523943366 G568DRAFT_00664 O-methyltransferase [Sediminimonas qiaohouensis DSM 21189 : G568DRAFT_scaffold00002.2] 110 | MTLATKADEISEIAFGFMGSKALFAALQLKVFTHLAEGPLSAEELAQKAN 111 | VHPDRAQTLLTALASLGLIEVQDGNRFGNAPASQAFLVNGAKYDFGDYLR 112 | LQVGRQMYSLLDQIENALQGTMDADDTASYAEWFADPDEARLYSESQHSG 113 | SVGPARQLSGALDLSGARRMLDVGGGTGAFAITLCKDNPDLTASIVDFPN 114 | VAELGRGYVEKAGLSDRIAYIPGNALEAEWPADQDVILMSYLLSGVPGET 115 | HVDLIRRAYDHLVPGGRLLIHDFVVEKERTGPKLAALWQLQHTAFTPEAR 116 | SLDAGWLEVALDEIGFIDAQVAPLIPKMTMLAQGTKPAA 117 | 118 | >2524485630 G455DRAFT_02842 Dimerisation domain-containing protein [Donghicola xiamenensis DSM 18339 : G455DRAFT_scaffold00015.15] 119 | MTVLTEADQISEIAFGFMGSKALFAALGAGVFTHLADGPQSCAELAENCP 120 | LDEARTETLMTALAGLGLVQVQGDGRFANSPAADSFLVKGAKYDFGDYLR 121 | LQVGQQMYGLLDQIDDALQDNLPEEATASYAEWFSDPEEARLYSASQHSG 122 | SLGPARQLARQIDLSGAKTLLDVGGGTGAFAITLCQAFPDLTATIVDFPN 123 | VAALGRNYVARAGLSDRIAYVEGNALETAWPGAQDAVLMSYLFSGVPGET 124 | HDGLVGRAFDHLAPGGRFLLHDFVVRADRTGPKLAALWQLQHTAFTPRAR 125 | SLDEGWLKQALAKAGFSDIDVGVMIPKMTMLASAVKPAQEGEPAKLAGA 126 | 127 | >2525377636 K328DRAFT_2305 Ubiquinone/menaquinone biosynthesis C-methylase UbiE [Nisaea denitrificans DSM 18348 : K328DRAFT_scaffold00002.2] 128 | MTLLTNAEEISDIAFGFMGSKALFAALHHGVFTCLADGPLSVEEMAAATG 129 | LHPDRVQTLLTALASLGVVSAVEGRFANSPAAESFLVKGAKYDFGDYLRL 130 | QVDRQMYGLLDQIEDAIANNLPDDATSSYADWFSDPEQAKLYSNSQHAGS 131 | LGPARGLAKLIDLSGGKKLLDVGGGTGAFAITLCKAFADLAATIVDFPNV 132 | AALGKGYVEKAGLSDRIEYVIGDALRTEWPREQDAILMSYLFSGVAGDEH 133 | DSLLKRAYDHLVPGGRLLIHDFVVTADRTGPKLAALWQLQHTAFTPEARS 134 | LDDEWLAEQLKKTGFTDVKVGPMIPGMTMLAEAVRPE 135 | 136 | >2525928126 G572DRAFT_4219 Ubiquinone/menaquinone biosynthesis C-methylase UbiE [Stappia stellulata DSM 5886 : G572DRAFT_scaffold00007.7] 137 | MPTLTEAEEISDIAFGYMGSKALFAALHFGIFTQLAEAPATVDALAKTVG 138 | LPGERCRTLLTALAALGLVEQDDGTYRNSPAAERFLVKGAKYDFGDYLRL 139 | QVGQQMYPLMDQIEPALSGDLPDDATASYADWFSDPKEARLYSESQHAGS 140 | LGPALGLARRIDLSKARKLLDVGGGTGAFAITLCQSFPELSATVVEFPNV 141 | AKLGRSFVDKAGLSERISYLEGNALETEWPRDQDAILMSYLLSGVPDHAH 142 | EDLFKRAFDHLAPGGQLMVHDFVVDSDRAGPKNTALWQLQHTAFTPEARS 143 | IADDWLEKQMIKAGFENVGVEPLIPGMTKLALGTRPA 144 | 145 | >2527024186 S120_00996 Methylase involved in ubiquinone/menaquinone biosynthesis [Oceanicola sp. S124 : S120_gi339779149.57] 146 | MTLLTEADDVSRIAFGFMGSKALFAALELGIFTALAGGDKTAAELAEAAG 147 | VHEDRAMTLLTALAGLGLVSVHEGRFANSPAAGAFLVKGAKYDFGDYLRL 148 | QVGKQMYPLLDQIEGALKGELGEGDTASYADWFSDPDEAKLYSESQHAGS 149 | LGPARQLAKALDLSGARRMLDVGGGTAAFDIVLCGANPGLTSTVLEFPNV 150 | AALGRRYVEAAGLSDRITYLEGNALETDFPGGQDIVLMSYLFSGVPGEAH 151 | EALIARAY 152 | 153 | >2541035415 K224DRAFT_3290 Ubiquinone/menaquinone biosynthesis C-methylase UbiE [Oceanicola sp. HL-35 : K224DRAFT_scf7180000000020_quiver.5] 154 | MTLLTEADEISHIAFGFMGSKALFAALELKVFTHLKDGPRTAVDLAAKAG 155 | VHADRAETLLTALAGLGLVSVEEGRFGNSPAAEAFLVNGAKYDFGDYLRL 156 | QVGQQMYGLLDQIEGALQGDLGAEDTKSYAEWFADPDQARLYSASQHAGS 157 | LGPARQMARTLDLSGARQMLDVGGGTGAFAISLCKKNPDLRATIVEFPNV 158 | ATLGQDYVARAALSERIAYLPGNALEADWPDGQDVVLMSYLFSGVPGAAH 159 | GGLLAKALAALNPGGRLLIHDFVVRADRSGPVLAALWQLQHTAFTPKARS 160 | LDEGWLARALPDAGFAEVSIAPMIAEMTMLAQGVKPG 161 | 162 | >2553022978 C651DRAFT_03472 Methylase involved in ubiquinone/menaquinone biosynthesis [Donghicola sp. S598 : C651DRAFT_AMWC01001928_1.1523] 163 | AFGFMGSQALFTALDQQVFTHLADGALSAEEMAERTDLHRDRAETLLTAL 164 | AGMGLVTVDDGLFSNSPAAEAFLVKGAKYDFGDYIRLQVGKQMYGLMGQL 165 | GGAVSGTLSEEETGSYEQWFSDPEEARLYSESQHAGSHGPARQMTRRVDL 166 | SGAKTLLDVGGGTGAYAITFCNAFPDLTATIVDFPNVAALGRSYVDEAKL 167 | SARISYVEGN 168 | 169 | >2558678304 Q344DRAFT_1000 Ubiquinone/menaquinone biosynthesis C-methylase UbiE [Oceanicola nanhaiensis DSM 18065 : Q344DRAFT_scaffold00001.1] 170 | MTPLTEADEISDIAFGFMGSKALFAALKFGVFTKLSEGPGTAADLAGDEI 171 | HPDRMETLLTALAGLGLVTVEDGRFSNSPAAEAFLVRGAKYDFGEYLRLQ 172 | VGQQMYPLLDQIERALDGSLGAEATKSYAEWFSDPEEARLYSESQHAGSI 173 | GPARQLAKALEIEGPARLLDVGGGTGAFAITLCKAFPELTATIVDFPNVA 174 | ALGRRYVEEAGLSDRIAYADGDALEKGWPGGQDIVLMSYLFSGVPGDAHH 175 | GLIADAWTALRPGGRILVHDFVVDESREGPRLAALWQLQHTAFTPEARSL 176 | DDGWLKAALEGAGFTEVGVRPMIPEMTMLAEGVKPA 177 | 178 | >2565720611 OCH239_01665 Dimerisation domain-containing protein [Roseivivax halodurans JCM 10272 : JALZ01000001] 179 | MGPITEADQISDIAFGFMGSKTLFVALDHQIFTHLSKGAGDADEIAVKTG 180 | LHRDRAETLLTGLAGLGLLTVEEGRFANAPGAEAFLVHGAKYDFGDYLRL 181 | QVGQQMYALIHQLGAAIAGRMPANATASYEDWFEDAEEARLYSESQHAGS 182 | LGPARQVTRKVDLSDAKTLLDVGGGTGAFAITLCDAFPELTATVVDFPNV 183 | AKLGRGYVEEAGLSDRISYVDGNALRTDWPGGQDIVMMSYLLSGVPGAEH 184 | ETLIRRAYETLAPGGRLLVHDFVVEADRSGPKLAALWQLQHTAFTPEARS 185 | VDVAGLEHLLGANGFAEVETIEMIPQMTKLCVGRRSA 186 | 187 | >2579689465 U879_03735 Dimerisation domain-containing protein [Defluviimonas sp. 20V17 : AYXI01000038] 188 | MMESIVDSMLAQETPAPAGNPIESAEELSSIAFGFMASKALFAGLHVDLF 189 | TLLADGPKTAEELAKAAGIPLNRIVTLTTALASVGVVSISDRKLIQNSSA 190 | AQNFLAKQSKYDFGDYLRYQIDQQMYPFLLQLNAVMKGDLSDDAIASYQH 191 | WMADEEQASVYSESQHAGSLGPGRTLARKVDLAKASTLLDVGGGTGAMTI 192 | SLCNEYPDLHATIIDFPNVAEIGWRFVSEAGLVDRVRYIPGNAVQAQWPG 193 | RQDAILMSYLMSGVPGEAVADLLRKAFDALAPGGKLMVHDFMVEEDRRGP 194 | ALAALWQLQHMAFTPDAHSLSVGWLTEAGRKIGFAVDDVDNLIPAMTKLV 195 | VFAKPS 196 | 197 | >2593183274 N556DRAFT_08021 Ubiquinone/menaquinone biosynthesis C-methylase UbiE [Rhodospirillales bacterium URHD0017 : N556DRAFT_scaffold00054.54] 198 | MTIKPLDDVRQISALTYGFIASKALFAALDLDLFTRIATGTTDLAALAAA 199 | TGVAPNRLRTLLTALKTVGLVTETDGKFANAPAAGTYLVAGAPGDFRDYI 200 | SVVNGGFLYEGLRHLSKAMRGERIFPDKGFYEGIVYSEGGVGGEAFSRAQ 201 | HAGSLGPAQLMARRVELGNATTFLDVGGGSGAYSLAFLRKNPKLRATILD 202 | FPQTVDTARRYAAEAGMADRVTHVTGNALSTPWPRDQDVVLMSYVWSAVG 203 | GNDIRTLASRAFEALKPGGIVLVHDFMVDDDHDGPAFAAWYLLASLPDNP 204 | QAECLSPGLVERRLGDAGFAVGGTEPMLAEITALTRARKP 205 | 206 | >2597124009 JM93DRAFT_02836 Ubiquinone/menaquinone biosynthesis C-methylase UbiE [Roseibium hamelinense ATCC BAA-252 : JM93DRAFT_scaffold00006.6] 207 | MAKQPLETAEQISEIAFGFMGSKALFAALHVDLFSALSDKELAADEVAAE 208 | TGLDSDRATTLLTALNTLGLVEADDGRFKNAPAADAFLVTGRKYAFGDYL 209 | RFQIDRQMYPFLTQLNDALTDNLAPDQVSSYAQWFSDADEARLYSQSQHA 210 | GSLGPGRTLAKLVDLSQTRSLLDVGGGTGAFSISLCEAYPELKSTILDFP 211 | NVAKVGEEFIKKAGLSDRVWYQPGNALDDVWPAEADAVLMSYLFSGVPGK 212 | AIPGLVKNAMVTLPPGGHFMVHDFMVSGERTGPKLAALWQLQHTAFNPHA 213 | KSITSDYVSDLMEAAGFEDVNTREMIPGMTTLVWARKPQ 214 | 215 | >2599852567 Ga0056027_02933 Dimerisation domain-containing protein [Thalassobaculum litoreum DSM 18839 : Ga0056027_scaffold00008.8] 216 | VKPIESAEDISELAFGFMASKALFAALHVDVFGALSDGPKSITDLAAATK 217 | VPAQRMQTLVTALVSVGLLTRNDGKIANAPASDAYLVRDNTNYFGDYLRF 218 | QIDRQMYPFMENLDKVLLGDTDDIEYPDYASWMADRHHAELFSRSQHSGS 219 | LGPGAVMAKRLLKEGAVSEDVGSMLDVGGGSGAFSIMFCKRFPKLHATVL 220 | DFPNVIEVGKTFVAEEEMSDRIDFVAGDGTNANWPNDQDIVLMSYLFSGV 221 | PEEAIDKLCSDAFRVLKPGGLIAIHDFMVTDDRKGPALAALWQLQHMVYT 222 | PDGVGMTPGFVEKHLKKAGFEIEIDDDLIPGMTRVMTARKPG 223 | 224 | >2609105254 HLUCCA09_14310 Ubiquinone/menaquinone biosynthesis C-methylase UbiE [Rhodobacteraceae bacterium bin09 : ITZX_scaf_510] 225 | MAGLTEAEEISDIAFGYMGSRALFAALEFGIFTRLADGALPAAEIAAASG 226 | LPEERCRTLMTALTALGLTVRAGEGFANSPAAEAFLVKGARYDFGDYLRL 227 | QVGRQMYPLMDQIGEALAGRLPEGATASYADWFADAEEARLYSESQHAGS 228 | LGPARGLAKRVDLSGVARMLDIGGGTGAFAITLCRANPDLHVTVVDFPNV 229 | ALLGEAYVADAGLSDRITYVTGDALAAEWPGDQDAILMSYLLSGVPDHAH 230 | EGLIGRAFDHIGSGGRLLVHDFIVDEDRTGPKNTALWQLQHTAFTPEARS 231 | LSDGWLADRMCAAGFCGIEVEPLIPGMTKLASARRPDA 232 | 233 | >2609135787 HLUCCO17_14775 Dimerisation domain-containing protein [Rhizobiales bacterium HL-109 : ITZY_scaf_197] 234 | MLRTRADEISEIAFGFMASKALFSALHFDLFTHLNDGPKTAAELGKAAGL 235 | HPDRAETLLTALAGLGLVAVDDGKFSNAPASEAFLVKGAKYDFGDYLRRQ 236 | VGQQMYGLLDQVEDALSGNLPAEATGSYAEWFSDPAEARLYSESQHAGSL 237 | GPARQLVKALDLSGAKRMLDVGGGTGAFAITFCKAFPELGATVVDFPNVA 238 | AIGREFVDKAGLGDRIHYIEADGLKADWPRNQDVILMSYLLSGIPGETHE 239 | GLIKRAYDHLAPGGTLLIHDFIVCADRTGPKLAALWQLQHTAFTPQARSV 240 | DDAWLVESMERHGFRDASVETMIPEMTMLAKAVKPA 241 | 242 | >2616590712 Ga0070519_10532 Dimerisation domain-containing protein [Citreicella aestuarii DSM 22011 : Ga0070519_1053] 243 | LAQPDQPLTDADQVSRIAFGFMGSQALFAALECGVFTALAEEPGDAAAVA 244 | NRIGLHPDRAETLLTALAGLGLVAVHDGVFANSPAAAAFLVKGVKYDFGD 245 | YLRLQVGRQMYGLMGQLVPAVTGTLPETATASYEQWFSDPEQARLYSESQ 246 | HAGSLGPARQLARRLDLSGARRMLDVGGGTGAFAITLCGAFPQLAATIVD 247 | FPNVATLGRGHVAEAGLSDRITYVEGNALDTPWPGGQDVILMSYLFSGVP 248 | GDAQAGLIARAFDCLAPGGRLLIHDFVVHADRTGPPLAALWQLQHTAFTP 249 | EARSLDTAGLSTALNGAGFVDVLIDEMIPQMTMLATAHRPG 250 | 251 | >2616591799 Ga0070518_101426 Dimerisation domain-containing protein [Litorimicrobium taeanense DSM 22007 : Ga0070518_101] 252 | MLTQDSMPPSAKAIETAEDLSSIAFGFMASKALFAGLHVDIFSALADGPK 253 | SAEELAGAADIPINRIVMLTTALASVGLLTIGDDKKVQNSPAAQSFLSKQ 254 | TKYDFGDYLRYQIDQQMYPFLLQLNAVMKGDLSDDAIASYRHWMADEEQA 255 | SVYSESQHAGSLGPGRTLARKVDLATASTLLDVGGGTGAMTISLCREYPN 256 | LQATIIDFPNVTEIGWRFISEADLVDRVRYIPGNAVEVQWPGNQDAILMS 257 | YLMSGVPGDNVEELVHKAFDALAPGGKLMVHDFMVEEDRRGPALAALWQL 258 | QHMAFTPDAHSLSVGWLTQTGKRIGFDVDQVDNLIPAMTKLVVFSKPS 259 | 260 | >2617877652 Ga0070148_1037 Ubiquinone/menaquinone biosynthesis C-methylase UbiE [Yangia pacifica CGMCC 1.3455 : Ga0070148_103] 261 | MALLTEAHEISDIAFGFMASKALFAALEFGIFDTLAEGPLSAEELGAACD 262 | LHPERARTLLTALAGLGVVSVEGDRYSNSPAAASFLVKGEKYDFSDYLRL 263 | QVGRQMYPLMSQLEPALKDALPEEATGSYAEWFADPEEARLYSDSQHAGS 264 | LGPAGVLAKRLDLSEARSMLDVGGGTGAFSITFCKAFPHLRSTIVDFPNV 265 | VTVGREKVAAAGLSERITYLEADATSLDWPDGQDVVLMSYLLSGVPAEAH 266 | APLFDAAYRALRPGGQLLLHDFIVRGDRSGPHLTALWQLQHTAFTPQAAS 267 | LDAEGLAAALEAAGFAEVGVRAMIPEMTMLAEARRPE 268 | 269 | >2620103054 Ga0069992_1037 Ubiquinone/menaquinone biosynthesis C-methylase UbiE [Yangia pacifica DSM 26894 : Ga0069992_103] 270 | MALLTEAHEISDIAFGFMASKALFAALEFGIFDTLAEGPLSAEELGAACD 271 | LHPERARTLLTALAGLGVVSVEGDRYSNSPAAASFLVKGEKYDFSDYLRL 272 | QVGRQMYPLMSQLEPALKDALPEEATGSYAEWFADPEEARLYSDSQHAGS 273 | LGPAGVLAKRLDLSEARSMLDVGGGTGAFSITFCKAFPHLRSTIVDFPNV 274 | VTVGREKVAAAGLSERITYLEADATSLDWPDGQDVVLMSYLLSGVPAEAH 275 | APLFDAAYRALRPGGQLLLHDFIVRGDRSGPHLTALWQLQHTAFTPQAAS 276 | LDAEGLAAALEAAGFAEVGVRAMIPEMTMLAEARRPE 277 | 278 | >2620334468 Ga0069990_11421 Dimerisation domain-containing protein [Roseovarius indicus DSM 26383 : Ga0069990_114] 279 | MLDDDTPTTQVKAIETAEDLSSIAFSFMASKALFAGLHADIFSQLSDGPK 280 | SAAELAKAVDIPINRIVMLTTALASVGVLVIGDDEKIENSPAAQSFLSKQ 281 | SKYDFGDYLRYQIDQQMYPFLLQLNAVMKGDLTEDAISSYKHWMADEEQA 282 | SVYSESQHAGSLGPGRTLARKVDLSAARTLLDVGGGTGAMTISLCKAYEN 283 | LHATIIDFPNVSEIGWRFISEAGLVDRVRYIPGNAVEVQWPGNQDAILMS 284 | YLMSGVPGDDVSALLQNAYDSLAPGGKLLVHDFMVEEDRRGPALAALWQL 285 | QHMAFTPDARSLSVGWLTEAGKKLGFEVELADNLIPAMTKLVVFSKPA 286 | 287 | >2620496472 Ga0070504_10014 Dimerisation domain-containing protein [Antarctobacter heliothermus DSM 11445 : Ga0070504_1001] 288 | MSLLTDADEISQIAFGFMGSKALFAALELGIFTHLAEGAMTATELAQVSK 289 | IHEDRVTTLLTALAGMGLVAVDEDRFSNAPAAQAFLVQGAKYDFSDYLRL 290 | QVGRQMYPLMDQLGDALAGTMAEDATASYADWFSDPEQARLYSESQHSGS 291 | LGPARQLVRALDLSGKHKMLDLGGGTAAFDITLCKANPELTSTILEFPNV 292 | AALGRTYVDKAGLTDRITYLEGNALETPWPQGQDVVMMSYLLSGVPGDTH 293 | ARLIRQAFEVLNPGGIVLIHDFVVHEDRTGPSLAALWQLQHTAFTPEARS 294 | LDAGWLDGALAGAGFAGITVAPLIPEMTMLAQAHKP 295 | 296 | >2622865483 Ga0070237_103213 Dimerisation domain-containing protein [Thalassobius gelatinovorus DSM 5887 : Ga0070237_103] 297 | MTVLTEADQISDIAFGFMGSKALFAALGAGVFTHLDDGPLSCAELATLCP 298 | LDEIRTEILLTALAGLGLVEGENGKFKNSPAAESFLVKGAKYDFGDYLRL 299 | QVGQQMYGLLDQIDDALQDKLPDEATASYAEWFSDPEEAKIYSASQHSGS 300 | LGPARQLARKIDLSDARHLLDVGGGTGAFAITLCKAFPNLTATIVDFPNV 301 | AILGREYVARAGLSDRISYVEGNALETEWPGGQDVVLMSYLFSGVPGEAH 302 | DGLLGRARDQLKPGGRFLLHDFVVREDRTGPKLAALWQLQHTAFTPRARS 303 | LDEGWLRTSLDKAGFEDISVDVMIPEMTMLASAVKPA 304 | 305 | >2623181840 Ga0070585_101672 Dimerisation domain-containing protein [Tropicibacter naphthalenivorans DSM 19561 : Ga0070585_101] 306 | MPVLTEAEDISDIAFAYMGSKALFAALEFGVFSALSGGAVSLDELSAQTG 307 | LPRERCRTLMSALTSLGLTTRDAAGRFANSPAAEAFLVKGAKYDFGDYLR 308 | LQVGQQMYPLMDQIEKALPGDLGADETASYAQWFSDPEEARLYSQSQHAG 309 | SLGPARGLARRVDLSQARHLLDVGGGTGAFAITLCKAFPQLKATVIDFPN 310 | VAALGETYVADAGLSDRITYQHGNALEADWPSGVDAILMSYLFSGVPDHT 311 | HEGLLTKAFDRLHPGGKVLIHDFIVDADLSGPKNTALWQLQHTAFTPEAR 312 | SLDDAWLAQALGRAGFDAVEVAPLIPGMTKLAVGVKPA 313 | 314 | >2635168442 Ga0074799_106217 Dimerisation domain-containing protein [Roseivivax sediminis DSM 26472 : Ga0074799_106] 315 | MAAITEADQISDIAFGFMGSKALFVALDHQIFTHLSKGAADVDEISEKTG 316 | LHPDRAETLLTALAGLGLLTVSDGRFANAPGAEVFLVQGAKYDFGDYLRL 317 | QVGRQMYGLISQLGAAISGNMPPDATVSYEDWFEDAEEARLYSESQHAGS 318 | LGPARQVTKKVDLSGARTLLDVGGGTGAFAITLCQAFPELTATVVDFANV 319 | SKLGRRYVDEAGLSDRIRYVDGNALRTDWPDGQDIVMMSYLLSGVPGSEH 320 | DGLVARAYHHLVPGGRLLIHDFVVEADRSGPKLAALWQLQHTAFTPEARS 321 | VDAAGLSNLLSGAGFAETEVIEMIPAMTKLAVGRRPT -------------------------------------------------------------------------------- /KEGGDecoder/DMSPSynthase/ncbi_dmsp_synthase_dsyB.faa: -------------------------------------------------------------------------------- 1 | >WP_025312975.1 methyltransferase [Roseibacterium elongatum] 2 | MNALTNADEISEIAFGFMGSKALFAALEHGVFTSLAGGASDAASVAKASDLDTDRAETLLTALAGLGLVV 3 | PQGQGQFVNSPAAEAFLVKGAKYDFGDYLRLQVGRQMYGLLDQIDDALTGALPEEATASYAEWFADPDEA 4 | RLYSESQHSGSLGPARQLIKRLDLSQATRLLDVGGGTGAFAITLCEANPGLSATIVDFPTVAALGRSYVQ 5 | KAGLSDRISYIEGNALETDWPGGQDVILMSYLFSGVPGVTHDGLIRAAMERLNPGGLLLIHDFVVHADRT 6 | GPKLAALWQLQHTAFTPRARSLDEGWLVSALDRGGFEDVTVSEMIPEMTMLAEARKPA 7 | >WP_023852424.1 O-methyltransferase [Rhodobacteraceae bacterium PD-2] 8 | MAALETADQVSDIAFGFMGSKALFAALEVGVFTELSRQPSTAAQLAERTAVDADRAETLLTALAGLGLVV 9 | REDGIYSNAPAAEAFLVRGAKHDFGDYLRLQVGRQMYGLLDQIDHALTDRLPKDATASYAEWFSDPEQAR 10 | LYSRSQHAGSLGPARQMLRRVDLSGAERLLDVGGGTGAFAITLCAANPDLSATIVDFPNVAALGRDYVAE 11 | AGLSDRIAYVEGNALERDWPGGQDVVLMSYLFSGVPGEAHAELLRHAYDTLAPGGRLLLHDFVVRADRSG 12 | PKLAALWQLQHTAFTPRARSLDAGWLAEALGQAGFAGIEIDDLIPEMTMLAIAHKPA 13 | >WP_043748339.1 methyltransferase [Pseudooceanicola atlanticus] 14 | MTPATEADEISNIAFGFMGSKALFLAIDLDLFSKLAEGPKTADEVAEATDVHRDRVETLMTALAGLGLLT 15 | VEDGKFANSPAAESFLVKGAKYDFSDYLARQVGQQMYPLMDQLAPAVKGDLGEDATGSYEEWFSDPEEAR 16 | LYSESQHAGSLGPARQLARRLDLSEARSMLDVGGGTGAFAITLCKAFPELKTTIVEFPNVAELGRGYVEK 17 | AGLSDRIEYVDGNALKTGWPGGQDIVLMSYLLSGVPGDMHEGLIADAMKALKPGGQLLIHDFMVHADRSG 18 | PGLAALWQLQHTAFTPEARSVDSGTLATELTEAGFEDVSVDEMIPQMTMIAKATKPA 19 | >WP_043143384.1 methyltransferase [Mameliella alba] 20 | MPLLTEADEISQIAFGFMGSKALFAALELEVFTHLAKGAMTAGELAKAAEMHEDRAMTLLTALAGMGLVA 21 | VEEGRFSNAPAAEAFLVQGAKYDFSDYLRLQVGKQMYPLMEQLEGALSGEMSDEDTASYADWFSDPDEAK 22 | LYSESQHAGSLGPARQLARSLDLTGKTKMLDVGGGTAAFDITLCQKNPGLKATVLEFPNVAALGRGYVEK 23 | AGLSDRISYLDGNALETPWPEGQDVVLMSYLFSGVPGETHAGLIAQAFKVLNPGGMVLIHDFIVNEDRTG 24 | PGLAALWQLQHTAFTPEARSLDAGWLEGALAAAGFQDVSVGAMIPEMTMLAQGKKP 25 | >WP_051644456.1 methyltransferase [Labrenzia sp. DG1229] 26 | MLAQDTVATENTIESAEDLSSIAFGFMASKSLFAGLHVDIFSVLADGPKSAEELAKATSIPLNRIVILTT 27 | ALASVGLLAIGDDKKIWNSPAAQNFLSKQSQYDFGDYLRHQIDQQMYPFLLQLNAVMKGDLSEDAIASYS 28 | HWMTDEEQASVYSESQHAGSLGPGKTLARKVDLGSADTLLDVGGGTGAMTISLCNEYENLHATIIDFPNV 29 | AEIGWRFISEANLVDRVRYIPGNAIEVQWPSNQSAILMSYLMSGVPGDDVEGLLQKAFDTLSPGGKLMVH 30 | DFMVEENRRGPALAALWQLQHMAFTPEARSLSVGWLTEAGKRKGFKVADVDNLIPAMTKLVVFEKPS 31 | >ERR00112.1 acetylserotonin O-methyltransferase [Labrenzia sp. C1B70] 32 | MPVARSLETAEEISDIAFGFMGSKALFSALHVDLFSLLSEKTLTPQQVAEESELDLDRATTLLTALTSLG 33 | LVRREGAGFTNSPAAEAFLVKGRKYDFGDYLRFQIDKQMYPFMTQLNDALTDSLEDDQVASYETWFSDPE 34 | EARLYSRSQHAGSLGPGRGLAKLVDLSAAKQLLDVGGGTGAFSISLCKAYPGLRSTVLDFPNVAKVGEEF 35 | IAEEGLQDRIRYAPGNALKDTWPDSADAVLMSYLFSGVPGTAIPGLVRKAFEVLTPGSDFMVHDFMVDEN 36 | RDGPKLAALWQLQHTAFNPEARSITSSYVAGLMEAAGFTDIAVEVMIPGMTMLVHGRKPE 37 | >ERP98606.1 acetylserotonin O-methyltransferase [Labrenzia sp. C1B10] 38 | MPVARSLETAEEISDIAFGFMGSKALFSALHVDLFSLLSEKTLTPQQVAEESELDLDRATTLLTALTSLG 39 | LVRREGAGFTNSPAAEAFLVKGRKYDFGDYLRFQIDKQMYPFMTQLNDALTDSLEDDQVASYETWFSDPE 40 | EARLYSRSQHAGSLGPGRGLAKLVDLSAAKQLLDVGGGTGAFSISLCKAYPGLRSTVLDFPNVAKVGEEF 41 | IAEEGLQDRIRYAPGNALKDTWPDSADAVLMSYLFSGVPGTAIPGLVRKAFEVLTPGSDFMVHDFMVDEN 42 | RDGPKLAALWQLQHTAFNPEARSITSSYVAGLMEAAGFTDIAVEVMIPGMTMLVHGRKPE 43 | >AOR83342.1 DsyB [Labrenzia aggregata] 44 | MPVARSLETAEEISDIAFGFMGSKALFSALHVDLFSLLSEKTLTPQHVAEESELDLDRATTLLTALTSLG 45 | LVRREGAGFTNSPAAEAFLVKGRKYDFGDYLRFQIDKQMYPFMTQLNDALTDSLEDDQVASYETWFSDPE 46 | EARLYSRSQHAGSLGPGRGLAKLVDLSAAKQLLDVGGGTGAFSISLCKAYPGLRSTVLDFPNVAKVGEEF 47 | IAEEGLQDRIRYAPGNALKDTWPDSADAVLMSYLFSGVPGTAIPGLVRKAFEVLTPGSDFMVHDFMVDEN 48 | RDGPKLAALWQLQHTAFNPEARSITSSYVAGLMEAAGFTDIAVEVMIPGMTMLVHGRKPE 49 | >CTQ43687.1 Demethylspheroidene O-methyltransferase [Labrenzia aggregata] 50 | MPVARSLETAEEISDIAFGFMGSKALFSALHVDLFSLLSEKTLTPQQVAEESELDLDRATTLLTALTSLG 51 | LVRREGAGFTNSPAAEAFLVKGRKYDFGDYLRFQIDKQMYPFMTQLNDALTDSLEDDQVASYETWFSDPE 52 | EARLYSRSQHAGSLGPGRGLAKLVDLSAAKQLLDVGGGTGAFSISLCKAYPGLRSTVLDFPNVAKVGEEF 53 | IAEEGLQDRIRYAPGNALKDTWPDSADAVLMSYLFSGVPGTAIPGLVRKAFEVLTPGGDFMVHDFMVDEN 54 | RDGPKLAALWQLQHTAFNPEARSITSSYVAGLMEAAGFTDIAVEVMIPGMTMLVHGRKPE 55 | -------------------------------------------------------------------------------- /KEGGDecoder/Decode_and_Expand.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | ''' 4 | Combines the *.list format output files of KEGG-decoder.py and 5 | KEGG-expander.py to form a single figure of all functions 6 | 7 | Update Adds parameters to force labels to be printed on heatmap. 8 | ''' 9 | import matplotlib 10 | matplotlib.use('Agg') 11 | import argparse 12 | 13 | parser = argparse.ArgumentParser(description="Accepts HMM search results of expander_dbvX.hmm\ 14 | text file as input. Produces function\ 15 | list and heat map figure.") 16 | parser.add_argument('KOALA LIST', help="Input KOALA function list format. As generated from\ 17 | KEGG-decoder") 18 | parser.add_argument('HMM LIST', help="Input HMM function list format. As generated from\ 19 | KEGG-expander") 20 | args = parser.parse_args() 21 | arg_dict = vars(args) 22 | 23 | import matplotlib.pyplot as plt 24 | 25 | import pandas as pd 26 | 27 | 28 | koala = pd.read_csv(open(str(arg_dict['KOALA LIST']), "r"), index_col=0, sep='\t') 29 | hmm = pd.read_csv(open(str(arg_dict['HMM LIST']), "r"), index_col=0, sep='\t') 30 | output_df = koala.merge(hmm, left_index=True, right_index=True) 31 | 32 | #Reorganize column orientation to put like pathways together 33 | cols = output_df.columns.tolist() 34 | retinal_index = cols.index('Retinal biosynthesis') 35 | cols.insert(retinal_index+1, cols.pop(int(cols.index('beta-carotene 15,15-monooxygenase')))) 36 | cols.insert(retinal_index+2, cols.pop(int(cols.index('rhodopsin')))) 37 | trans_urea = cols.index('transporter: urea') 38 | cols.insert(trans_urea+1, cols.pop(int(cols.index('transporter: ammonia')))) 39 | nifH_index = cols.index('nitrogen fixation') 40 | cols.insert(nifH_index+1, cols.pop(int(cols.index('Vanadium-only nitrogenase')))) 41 | cols.insert(nifH_index+2, cols.pop(int(cols.index('Iron-only nitrogenase')))) 42 | dmsplyase_index = cols.index('DMSP demethylation') 43 | cols.insert(dmsplyase_index, cols.pop(int(cols.index('DMSP lyase (dddLQPDKW)')))) 44 | cols.insert(dmsplyase_index+1, cols.pop(int(cols.index('DMSP synthase (dsyB)')))) 45 | sulfitereductase_index = cols.index('dissimilatory sulfite < > sulfide') 46 | cols.insert(sulfitereductase_index+1, cols.pop(int(cols.index('DsrD dissimilatory sulfite reductase')))) 47 | cbtA_index = cols.index('Cobalt transporter CbtA') 48 | copper_index = cols.index('Copper transporter CopA') 49 | iron_index = cols.index('Fe-Mn transporter MntH') 50 | cols.insert(cbtA_index+1, cols.pop(int(cols.index('Cobalt transporter CbtB')))) 51 | cols.insert(copper_index+1, cols.pop(int(cols.index('Copper binding HMA protein')))) 52 | cols.insert(iron_index+1, cols.pop(int(cols.index('Fe-Zn-Mn permease ZupT')))) 53 | output_df = output_df[cols] 54 | 55 | import seaborn as sns 56 | sns.set(font_scale=1.2) 57 | sns.set_style({"savefig.dpi": 200}) 58 | ax = sns.heatmap(output_df, cmap=plt.cm.YlOrRd, linewidths=2, linecolor='k', square=True, xticklabels=True, yticklabels=True) 59 | ax.xaxis.tick_top() 60 | #ax.set_yticklabels(ax.get_yticklabels(), rotation=90) 61 | plt.xticks(rotation=90) 62 | plt.yticks(rotation=0) 63 | # get figure (usually obtained via "fig,ax=plt.subplots()" with matplotlib) 64 | fig = ax.get_figure() 65 | # specify dimensions and save 66 | fig.set_size_inches(100, 100) 67 | fig.savefig("decode-expand_heatmap.svg") -------------------------------------------------------------------------------- /KEGGDecoder/DesferrioxamineBiosynthesis/Streptomycetes_ref_pathway.faa: -------------------------------------------------------------------------------- 1 | >SCO2782 DesA putative pyridoxal-dependent decarboxylase [MIBiG BGC0000940 Streptomyces coelicolor M145] 2 | MRSHLLNDTTAEQYRRSVTEGVERVAAKLATTDRPFTGVTVDALSPRIDAIDLDEPLHDTAAVLDELEDVYLRDAVYFHHPRYLAHLNCPVVIPALLGEAVLSAVNSSLDTWDQSAGGTLIERKLIDWTCARIGLGPAADGVFTSGGTQSNLQALLLAREEAKAEDFADLRIFASEASHFSVRKSAKLLGLGPDAVVSIPVDRDKRMQTVALARELERCARDGLVPMAVVATGGTTDFGSIDPLPEIAGLCEQYGVWMHVDAAYGCGLLASLKYRDRITGIERADSVTVDYHKSFFQPVSSSAVLVRDAATLRHATYHAEYLNPRRMVQERIPNQVDKSLQTTRRFDALKLWMTLRVMGADGIGVLFDEVCDLAAEGWKLLAADPRFDVVVQPSLSTLVFRHIPADVTDPAEIDRANLYARKALFASGDAVVAGTKVAGRHYLKFTLLNPETTPADIAAVLDLIAGHAEQYLGDSLDRAS 3 | >SCO2783 DesB putative monoxygenase [MIBiG BGC0000940 Streptomyces coelicolor M145] 4 | MGIGLGPFNLGLACLTEPVAELNGVFLESKPDFEWHAGMFLDGAHLQTPFMSDLVTLADPTSPYSFLNYLKEQGRLYSFYIRENFYPLRVEYDDYCRWAARKLSSVRFSTTVTEVTYDEREELYAVATTSGDTYRARRLVLGTGTPPHIPDACRGLAGDFLHNSRYVRHRAELVKKKSITLVGSGQSAAEIYQDLLSEIDVHGYGLNWVTRSPRFFPLEYTKLTLEMTSPEYVDYYHALPEDTRYRLTAEQKGLFKGIDGDLINEIFDLLYQKRLGGPVPTRLLTNSALTSARYADGTYTLGFRQEEQGTDFEIETEGLVLATGYRYTEPEFLKPVRDRLRYDSRGNFDIGRNYAVDVTGGGVFLQNAGVHAHSVTSPDLGMGAYRNSCIVRELLGREYYPVEQSIAFQEFAV 5 | >SCO2784 DesC putative acetyltransferase [MIBiG BGC0000940 Streptomyces coelicolor M145] 6 | MSRLSTTTPVGALTLRPVDPLTDAVLLHGWLTHPKSAFWMMQDARLVDVERAYMELAADEHQQAHLGLHDGVPAFLTERYDPAHRELVGLYEPEPGDVGMHFLVAPTDRPVHGFTRAVITTVMTELFADPATRRVVVEPDVTNTAVHALNAAVGFVPEREIQKPEKKALLSFCTREQFAKAVSA 7 | >SCO2785 DesD putative siderophore biosynthetic protein [MIBiG BGC0000940 Streptomyces coelicolor M145] 8 | MSLADAVAHLTPERWEEANRLLVRKALAEFTHERLLTPEREPDDGGGQTYVVRSDDGQTAYRFTATVRALDHWQVDAASVTRHRDGAELPLAALDFFIELKQTLGLSDEILPVYLEEISSTLSGTCYKLTKPQLSSAELARSGDFQAVETGMTEGHPCFVANNGRLGFGIHEYLSYAPETASPVRLVWLAAHRSRAAFTAGVGIEYESFVRDELGAATVDRFHGVLRGRGLDPADYLLIPVHPWQWWNKLTVTFAAEVARGHLVCLGEGDDEYLAQQSIRTFFNASHPGKHYVKTALSVLNMGFMRGLSAAYMEATPAINDWLARLIEGDPVLKETGLSIIRERAAVGYRHLEYEQATDRYSPYRKMLAALWRESPVPSIREGETLATMASLVHQDHEGASFAGALIERSGLTPTEWLRHYLRAYYVPLLHSFYAYDLVYMPHGENVILVLADGVVRRAVYKDIAEEIAVMDPDAVLPPEVSRIAVDVPDDKKLLSIFTDVFDCFFRFLAANLAEEGIVTEDAFWRTVAEVTREYQESVPELADKFERYDMFAPEFALSCLNRLQLRDNRQMVDLADPSGALQLVGTLKNPLAGR -------------------------------------------------------------------------------- /KEGGDecoder/DesferrioxamineBiosynthesis/ferrioxamine_sfam_match.tblout: -------------------------------------------------------------------------------- 1 | # --- full sequence ---- --- best 1 domain ---- --- domain number estimation ---- 2 | # target name accession query name accession E-value score bias E-value score bias exp reg clu ov env dom rep inc description of target 3 | #------------------- ---------- -------------------- ---------- --------- ------ ----- --------- ------ ----- --- --- --- --- --- --- --- --- --------------------- 4 | 2219 - SCO2782 - 2.3e-164 551.6 0.0 2.6e-164 551.5 0.0 1.0 1 0 0 1 1 1 1 - 5 | 41158 - SCO2782 - 2.6e-124 418.8 0.2 3.5e-124 418.4 0.2 1.1 1 0 0 1 1 1 1 - 6 | 25128 - SCO2782 - 3.8e-113 383.3 0.0 6.4e-113 382.5 0.0 1.3 1 1 0 1 1 1 1 - 7 | 2732 - SCO2783 - 2.4e-168 564.5 0.0 2.7e-168 564.3 0.0 1.0 1 0 0 1 1 1 1 - 8 | 89100 - SCO2783 - 3.4e-61 211.6 0.0 3.9e-61 211.5 0.0 1.0 1 0 0 1 1 1 1 - 9 | 276595 - SCO2783 - 5.1e-57 197.2 0.0 7.8e-57 196.6 0.0 1.2 1 0 0 1 1 1 1 - 10 | 9429 - SCO2784 - 1.3e-76 260.2 0.3 1.4e-76 260.1 0.3 1.0 1 0 0 1 1 1 1 - 11 | 51934 - SCO2784 - 4.8e-55 191.4 0.0 5.1e-55 191.3 0.0 1.0 1 0 0 1 1 1 1 - 12 | 158444 - SCO2784 - 2.7e-32 115.8 0.8 3.4e-32 115.5 0.8 1.0 1 0 0 1 1 1 1 - 13 | 51934 - SCO2785 - 1.9e-271 907.1 0.0 2.1e-271 907.0 0.0 1.0 1 0 0 1 1 1 1 - 14 | 3347 - SCO2785 - 1.5e-222 744.7 0.0 1.6e-222 744.5 0.0 1.0 1 0 0 1 1 1 1 - 15 | 88599 - SCO2785 - 5.3e-80 273.8 0.0 6.5e-80 273.5 0.0 1.0 1 0 0 1 1 1 1 - 16 | # 17 | # Program: hmmscan 18 | # Version: 3.1b2 (February 2015) 19 | # Pipeline mode: SCAN 20 | # Query file: Streptomycetes_ref_pathway.faa 21 | # Target file: SFAM_database.hmm 22 | # Option settings: hmmscan --tblout ferrioxamine_sfam_match.tblout --noali --cpu 25 SFAM_database.hmm Streptomycetes_ref_pathway.faa 23 | # Current dir: /media/eclipse/sfams 24 | # Date: Mon Apr 16 15:26:38 2018 25 | # [ok] 26 | -------------------------------------------------------------------------------- /KEGGDecoder/HMM_Models/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bjtully/BioData/cb1d45a957eda783412a48911c8592252915d9cd/KEGGDecoder/HMM_Models/.DS_Store -------------------------------------------------------------------------------- /KEGGDecoder/KEGG_clustering.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | ''' 4 | This script is a Heirarchical clustering module for the KEGG-decoder.py 5 | versions after V.0.8 6 | Runs scipy clustering with various metrix on the KEGG_Decoder genome DataFrame 7 | ''' 8 | from scipy.cluster.hierarchy import ward, complete, average, dendrogram, fcluster, linkage 9 | 10 | def hClust_euclidean(genome_df): 11 | linkage_matrix = linkage(genome_df, method='average', metric='euclidean') 12 | #linkage_matrix = linkage(df, metric='braycurtis') 13 | names = genome_df.index.tolist() 14 | #clust = dendrogram(linkage_matrix, orientation="right", labels=names, get_leaves=True) 15 | clust = dendrogram(linkage_matrix, no_plot=True, labels=names, get_leaves=True) 16 | leaves = clust['ivl'] 17 | leave_order = list(leaves) 18 | genome_df = genome_df.reindex(leave_order) 19 | 20 | return genome_df 21 | 22 | def hClust_correlation(genome_df): 23 | linkage_matrix = linkage(genome_df, method='single', metric='correlation') 24 | #linkage_matrix = linkage(df, metric='braycurtis') 25 | names = genome_df.index.tolist() 26 | #clust = dendrogram(linkage_matrix, orientation="right", labels=names, get_leaves=True) 27 | clust = dendrogram(linkage_matrix, no_plot=True, labels=names, get_leaves=True) 28 | leaves = clust['ivl'] 29 | leave_order = list(leaves) 30 | genome_df = genome_df.reindex(leave_order) 31 | 32 | return genome_df 33 | 34 | def hClust_most_least(genome_df): 35 | sort_dex = genome_df.sum(axis=1).sort_values(ascending=True).index 36 | genome_df = genome_df.loc[sort_dex] 37 | 38 | return genome_df 39 | 40 | def hClust_least_most(genome_df): 41 | sort_dex = genome_df.sum(axis=1).sort_values(ascending=False).index 42 | genome_df = genome_df.loc[sort_dex] 43 | 44 | return genome_df 45 | 46 | if __name__ == '__main__': 47 | import argparse 48 | parser = argparse.ArgumentParser(description="This file is intended as a Plotly module for the KEGG_decoder") 49 | args = parser.parse_args() 50 | arg_dict = vars(args) -------------------------------------------------------------------------------- /KEGGDecoder/KEGG_clustering.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bjtully/BioData/cb1d45a957eda783412a48911c8592252915d9cd/KEGGDecoder/KEGG_clustering.pyc -------------------------------------------------------------------------------- /KEGGDecoder/KEGG_expander.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | ''' 4 | KEGG-expander.py V.0.5 5 | V.0.5 Removal of Amphibactin biosynthesis componenets. Additions of dsrD to help distinguish 6 | oxidative and reductive dsrAB 7 | V.0.4.1 Adds argument that should allow script to generate figure on most Unix systems 8 | V.0.4 Adds amphibactin biosynthesis, ferrioxamine biosynthesis 9 | V.0.3.1 Added parameters to force labels to be printed. 10 | KEGG-expander.py V.0.3 11 | V.0.3. Adds DMSP lyase (dddQ, dddP, dddD, dddK, dddW), DMSP synthase (dsyB) 12 | Usage: python KEGG-decoder.py 13 | 14 | Designed to parse through the hmmsearch results table generated from 15 | the expander_dbvX.hmm to generate a heatmap figure similar to 16 | KEGG-decoder.py 17 | 18 | Recommended to run hmmsearch as follows: 19 | hmmsearch --tblout -T 75 expander_dbvX.hmm 20 | bit score cutoff of 75, equivalent to e-value < 10^-20 21 | 22 | Dependencies: 23 | Pandas - http://pandas.pydata.org/pandas-docs/stable/install.html 24 | Seaborn - http://seaborn.pydata.org/installing.html 25 | matplotlib - http://matplotlib.org/users/installing.html 26 | 27 | For extended information about HMM assignments, genes and pathways, 28 | please see accompanying document "Pfam_definitions.txt" 29 | 30 | 31 | ''' 32 | 33 | 34 | import matplotlib 35 | matplotlib.use('Agg') 36 | import argparse 37 | 38 | parser = argparse.ArgumentParser(description="Accepts HMM search results of expander_dbvX.hmm\ 39 | text file as input. Produces function\ 40 | list and heat map figure.") 41 | parser.add_argument('Input', help="Input HMM table file. See documentation\ 42 | for correct format") 43 | parser.add_argument('Output', help="List version of the final heat\ 44 | map figure") 45 | args = parser.parse_args() 46 | arg_dict = vars(args) 47 | 48 | genome_data = {} 49 | 50 | for line in open(str(arg_dict['Input']), "r"): 51 | if line[0] != "#": 52 | line = line.rstrip() 53 | info = line.split() 54 | genome_id = info[0].split("_")[0] 55 | #Sfams for DMSP lyase dddP and dddD require a more strigent bit score cutoff (>500) 56 | if info[3].split(".")[0] == "14591" or info[3].split(".")[0] == "25993": 57 | if float(info[5]) > 500: 58 | try: 59 | genome_data[genome_id].append(info[3].split(".")[0]) 60 | except KeyError: 61 | genome_data[genome_id] = [info[3].split(".")[0]] 62 | else: 63 | continue 64 | if info[3].split(".")[0] == "4254": 65 | if float(info[5]) > 260: 66 | try: 67 | genome_data[genome_id].append(info[3].split(".")[0]) 68 | except KeyError: 69 | genome_data[genome_id] = [info[3].split(".")[0]] 70 | else: 71 | continue 72 | else: 73 | try: 74 | genome_data[genome_id].append(info[3].split(".")[0]) 75 | except KeyError: 76 | genome_data[genome_id] = [info[3].split(".")[0]] 77 | #Sfams for amphibactin biosynthesis requires a more stringent bit score cutoff (>1000) 78 | # if info[3].split(".")[0] == "1544" or info[3].split(".")[0] == "27549": 79 | # if float(info[5]) > 1000: 80 | # try: 81 | # genome_data[genome_id].append(info[3].split(".")[0]) 82 | # except KeyError: 83 | # genome_data[genome_id] = [info[3].split(".")[0]] 84 | # else: 85 | # continue 86 | #Sfams for ferrioxamine biosynthesis requires a more stringent bit score cutoff (>200) 87 | if info[3].split(".")[0] == "2219" or info[3].split(".")[0] == "2732" or info[3].split(".")[0] == "9429" or info[3].split(".")[0] == "51934": 88 | if float(info[5]) > 200: 89 | try: 90 | genome_data[genome_id].append(info[3].split(".")[0]) 91 | except KeyError: 92 | genome_data[genome_id] = [info[3].split(".")[0]] 93 | else: 94 | continue 95 | 96 | def rhodopsin(hmm_match): 97 | out_data = {'beta-carotene 15,15-monooxygenase': 0, 'rhodopsin': 0} 98 | if 'PF01036' in hmm_match: 99 | out_data['rhodopsin'] = 1 100 | if 'TIGR03753' in hmm_match: 101 | out_data['beta-carotene 15,15-monooxygenase'] = 1 102 | return out_data 103 | 104 | #def peptidases(hmm_match): 105 | # out_data = {'Peptidase family C25': 0, 'Bacterial pre-peptidase C-terminal domain': 0, 106 | # 'Clostripain family': 0, 'Peptidase family M28': 0, 'Peptidase family M50': 0, 107 | # 'Di- and tripeptidases': 0, 'Leucyl aminopeptidase': 0, 'Xaa-Pro aminopeptidase': 0, 108 | # 'Peptidase propeptide and YPEB domain': 0, 'Oligoendopeptidase F': 0, 109 | # 'Phosphoserine aminotransferase': 0, 'Lipoprotein signal peptidase': 0, 110 | # 'Aminopeptidase N': 0, 'Zinc carboxypeptidase': 0, 'Peptidase S24-like': 0, 111 | # 'Peptidase S26': 0, 'D-aminopeptidase': 0, 'M61 glycyl aminopeptidase': 0} 112 | # if 'PF01364' in hmm_match: 113 | # out_data['Peptidase family C25'] = 1 114 | # if 'PF04151' in hmm_match: 115 | # out_data['Bacterial pre-peptidase C-terminal domain'] = 1 116 | # if 'PF03415' in hmm_match: 117 | # out_data['Clostripain family'] = 1 118 | # if 'PF04389' in hmm_match: 119 | # out_data['Peptidase family M28'] = 1 120 | # if 'PF02163' in hmm_match: 121 | # out_data['Peptidase family M50'] = 1 122 | # if 'PF01546' in hmm_match: 123 | # out_data['Di- and tripeptidases'] = 1 124 | # if 'PF02073' in hmm_match: 125 | # out_data['Leucyl aminopeptidase'] = 1 126 | # if 'PF00557' in hmm_match: 127 | # out_data['Xaa-Pro aminopeptidase'] = 1 128 | # if 'PF03413' in hmm_match: 129 | # out_data['Peptidase propeptide and YPEB domain'] = 1 130 | # if 'PF01432' in hmm_match: 131 | # out_data['Oligoendopeptidase F'] = 1 132 | # if 'PF00266' in hmm_match: 133 | # out_data['Phosphoserine aminotransferase'] = 1 134 | # if 'PF01252' in hmm_match: 135 | # out_data['Lipoprotein signal peptidase'] = 1 136 | # if 'PF01433' in hmm_match: 137 | # out_data['Aminopeptidase N'] = 1 138 | # if 'PF00246' in hmm_match: 139 | # out_data['Zinc carboxypeptidase'] = 1 140 | # if 'PF00717' in hmm_match: 141 | # out_data['Peptidase S24-like'] = 1 142 | # if 'PF10502' in hmm_match: 143 | # out_data['Peptidase S26'] = 1 144 | # if 'PF04951' in hmm_match: 145 | # out_data['D-aminopeptidase'] = 1 146 | # if 'PF05299' in hmm_match: 147 | # out_data['M61 glycyl aminopeptidase'] = 1 148 | # return out_data 149 | 150 | def alt_nitrogenase(hmm_match): 151 | out_data = {'Vanadium-only nitrogenase': 0, 'Iron-only nitrogenase': 0} 152 | v_nitro = ['TIGR01860', 'TIGR02932', 'TIGR02930'] 153 | for i in v_nitro: 154 | if i in hmm_match: 155 | out_data['Vanadium-only nitrogenase'] += 0.33 156 | fe_nitro = ['TIGR01861', 'TIGR02931', 'TIGR02929'] 157 | for i in fe_nitro: 158 | if i in hmm_match: 159 | out_data['Iron-only nitrogenase'] += 0.33 160 | return out_data 161 | 162 | def amm_trans(hmm_match): 163 | out_data = {'transporter: ammonia': 0} 164 | if 'PF00909' in hmm_match: 165 | out_data['transporter: ammonia'] = 1 166 | return out_data 167 | 168 | def dmsplyase(hmm_match): 169 | out_data = {'DMSP lyase (dddLQPDKW)': 0} 170 | dmsp = ['PF16867', '14591', '25993', '94923', '274874'] 171 | for i in dmsp: 172 | if i in hmm_match: 173 | out_data['DMSP lyase (dddLQPDKW)'] = 1 174 | return out_data 175 | 176 | def dmspsynthase(hmm_match): 177 | out_data = {'DMSP synthase (dsyB)' : 0} 178 | if '4254' in hmm_match: 179 | out_data['DMSP synthase (dsyB)'] = 1 180 | return out_data 181 | 182 | #def amphibactin(hmm_match): 183 | # out_data = {'amphibactin ACO2092-3homolog':0} 184 | # if ('1544' in hmm_match) and ('27549' in hmm_match): 185 | # out_data['amphibactin ACO2092-3homolog'] = 1 186 | # return out_data 187 | 188 | def ferrioxamine(hmm_match): 189 | out_data = {'ferrioxamine biosynthesis':0} 190 | ferrioxamine = ["2219", "2732", "9429", "51934"] 191 | for i in ferrioxamine: 192 | if i in hmm_match: 193 | out_data['ferrioxamine biosynthesis'] += 0.25 194 | return out_data 195 | 196 | def dissim_sulfite(hmm_match): 197 | out_data = {'DsrD dissimilatory sulfite reductase':0} 198 | if 'PF08679' in hmm_match: 199 | out_data['DsrD dissimilatory sulfite reductase'] = 1 200 | return out_data 201 | 202 | def metal_transport(hmm_match): 203 | out_data = {'Cobalt transporter CbtB':0, 'Copper binding HMA protein':0, 204 | 'Fe-Zn-Mn permease ZupT':0} 205 | if 'TIGR02459' in hmm_match: 206 | out_data['Cobalt transporter CbtB'] = 1 207 | if 'TIGR00003' in hmm_match: 208 | out_data['Copper binding HMA protein'] = 1 209 | if 'TIGR00820' in hmm_match: 210 | out_data['Fe-Zn-Mn permease ZupT'] = 1 211 | 212 | 213 | return out_data 214 | 215 | function_order = ['beta-carotene 15,15-monooxygenase', 'rhodopsin', 'Vanadium-only nitrogenase', 216 | 'Iron-only nitrogenase', 'transporter: ammonia', 217 | 'DMSP lyase (dddLQPDKW)', 'DMSP synthase (dsyB)', 218 | 'ferrioxamine biosynthesis', 'DsrD dissimilatory sulfite reductase', 'Cobalt transporter CbtB', 219 | 'Copper binding HMA protein', 'Fe-Zn-Mn permease ZupT'] 220 | 221 | filehandle = str(arg_dict['Output']) 222 | out_file = open(filehandle, "w") 223 | out_file.write('Function'+"\t"+str("\t".join(function_order))+"\n") 224 | 225 | for k in genome_data: 226 | pathway_data = {} 227 | pathway_data.update(rhodopsin(genome_data[k])) 228 | # pathway_data.update(peptidases(genome_data[k])) 229 | pathway_data.update(alt_nitrogenase(genome_data[k])) 230 | pathway_data.update(amm_trans(genome_data[k])) 231 | pathway_data.update(dmsplyase(genome_data[k])) 232 | pathway_data.update(dmspsynthase(genome_data[k])) 233 | # pathway_data.update(amphibactin(genome_data[k])) 234 | pathway_data.update(ferrioxamine(genome_data[k])) 235 | pathway_data.update(dissim_sulfite(genome_data[k])) 236 | pathway_data.update(metal_transport(genome_data[k])) 237 | 238 | out_string = str(k)+"\t" 239 | out_list = [k] 240 | for i in function_order: 241 | out_list.append(pathway_data[i]) 242 | out_string = str(out_list).strip('[]') 243 | tab_string = "" 244 | for l in out_string: 245 | if l == "\'": 246 | continue 247 | if l == ",": 248 | tab_string = tab_string + "\t" 249 | else: 250 | tab_string = tab_string + l 251 | out_file.write(tab_string+"\n") 252 | 253 | out_file.close() 254 | 255 | import matplotlib.pyplot as plt 256 | 257 | import pandas as pd 258 | 259 | file_in = open(filehandle, "r") 260 | genome = pd.read_csv(file_in, index_col=0, sep='\t') 261 | import seaborn as sns 262 | sns.set(font_scale=1.2) 263 | sns.set_style({"savefig.dpi": 200}) 264 | ax = sns.heatmap(genome, cmap=plt.cm.YlOrRd, linewidths=2, linecolor='k', square=True, xticklabels=True, yticklabels=True) 265 | ax.xaxis.tick_top() 266 | #ax.set_yticklabels(ax.get_yticklabels(), rotation=90) 267 | plt.xticks(rotation=90) 268 | plt.yticks(rotation=0) 269 | # get figure (usually obtained via "fig,ax=plt.subplots()" with matplotlib) 270 | fig = ax.get_figure() 271 | # specify dimensions and save 272 | fig.set_size_inches(100, 100) 273 | fig.savefig("hmm_heatmap.svg") 274 | -------------------------------------------------------------------------------- /KEGGDecoder/MakeTanglegram.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | ''' 4 | This script is an implementation for tanglegram for the KEGG-decoder.py 5 | versions after V.0.8 6 | Runs tanglegram on two DataFrames - one generated from clustered KEGG 7 | metabolisms & one phylogenetic newick file provided by the user 8 | Added by Taylor Reiter : tereiter@ucdavis.edu 9 | ''' 10 | 11 | def make_tanglegram(genome_df, newick, output_file, tanglegram_opt): 12 | import matplotlib.pyplot as plt 13 | import pandas as pd 14 | import itertools 15 | from Bio import Phylo 16 | import tanglegram as tg 17 | from scipy.spatial.distance import pdist, squareform 18 | 19 | # FORMAT KEGGDECODER OUTPUT 20 | # generate distance matrix for genome_df from pathway values 21 | # genome_df = pd.read_csv(genome_df, index_col=0, sep='\t') 22 | kegg_d = squareform(pdist(genome_df, metric='euclidean')) 23 | kegg_m = pd.DataFrame(kegg_d) 24 | kegg_m.columns = genome_df.index.tolist() 25 | kegg_m.index = genome_df.index.tolist() 26 | kegg_m = kegg_m.reindex(sorted(kegg_m.columns), axis=1) # reorder column names alphabetically 27 | kegg_m.sort_index(inplace=True) # reorder row names alphabetically 28 | 29 | # FORMAT NEWICK FILE 30 | # generate distance matrix from newick file 31 | tree = Phylo.read(newick, 'newick') 32 | 33 | tree_d = {} 34 | for x, y in itertools.combinations(tree.get_terminals(), 2): 35 | v = tree.distance(x, y) 36 | tree_d[x.name] = tree_d.get(x.name, {}) 37 | tree_d[x.name][y.name] = v 38 | tree_d[y.name] = tree_d.get(y.name, {}) 39 | tree_d[y.name][x.name] = v 40 | 41 | for x in tree.get_terminals(): 42 | tree_d[x.name][x.name] = 0 43 | 44 | tree_m = pd.DataFrame(tree_d) 45 | tree_m = tree_m.reindex(sorted(tree_m.columns), axis=1) # reorder column names alphabetically 46 | tree_m.sort_index(inplace=True) # reorder row names alphabetically 47 | 48 | # Plot and try to minimize cross-over 49 | yLen = len(genome_df.index.tolist()) * 0.4 50 | if len(genome_df.index.tolist()) <= 10: 51 | xLen = 10 52 | else: 53 | xLen = 10 + len(genome_df.index.tolist()) / 10 54 | fig = tg.gen_tangle(kegg_m, tree_m, optimize_order=tanglegram_opt, color_by_diff=True, 55 | link_kwargs={'method': 'complete'}) 56 | fig.set_size_inches(xLen, yLen) 57 | fig.savefig(output_file) 58 | 59 | if __name__ == "__main__": 60 | import argparse 61 | parser = argparse.ArgumentParser(description="This file is intended as a Tanglegram module for the KEGG_decoder") 62 | args = parser.parse_args() 63 | arg_dict = vars(args) 64 | -------------------------------------------------------------------------------- /KEGGDecoder/MethylphosphonateSynthase/methylphosphonate-synthase_ref.faa: -------------------------------------------------------------------------------- 1 | >AIF00981.1 helix-turn-helix domain-containing protein [uncultured marine thaumarchaeote KM3_13_H10] 2 | MQSMADEKTFTKEFRESLENKRLGSNFLGILNDIKRRPSDAAKELEISNEEIQDIINGKIMLPSEIVSKA 3 | IKIWPVNTRDFYIMHDDCPNGLKIMRCEDSVKSSRIMHRAGKPYYEYRDTAMSSVGPFRPEWIEQLCIVD 4 | DNEPSNKQVQWNNGHFMHQFTYFIGDVNFYYINENGEKKVAIMNTGDSNYITPFVPHSFATRKGAKKNGL 5 | ILALTYGNNLSGDSQHELSSVGKKLGKEFALDFSSKKSASSSLIKFHRNNSSLTLHELSKRTNLHIEKLR 6 | DFENGKIPAYSEYAILAECFNVNIRDLLPYDKISNKVVVQLHKNTEKWFYPEDTKNYELVELANSSSLPY 7 | SKALEINILNENDKTLDLKIGLHQYGYNIGDTDVSISYESEDGLKTDIIKPGDSFYLKPFVEHNFRGKAK 8 | ILILRISGKITGEPQRELSLIGQKKITRVINESLQWFDAKGKN 9 | >AIF09591.1 helix-turn-helix domain-containing protein [uncultured marine thaumarchaeote KM3_38_E02] 10 | MVETLKNSRGLELERKRIGSNFLGILNDLKRRPEDAAEELGVTLIEINSIIEGKQELSFDLVSKATSIWP 11 | VNTRDFFIVRDDCETGIKIMTAVESKDSGRIMNRAGKPYYEYRDTAMSSVAPFRPEWIMELCIVNDNDPN 12 | NKSVQWNNGHFMHQFTYFIGEVNFYYMSPTGEKKVALMNTGDSVYITPFVPHTFATRKGASKNGLILALT 13 | YGNKLTGEVQQELSSVSPILGKEYVLDFSNKNKAFGSLLSFHRNNANIPISDLATRVKISKEKIESFENG 14 | LASPSFEEITKFARALRINSRELFPNDLIENNVILQKYNEGEKWFFPESTKSYEFIELATTSNLPFSKAF 15 | EINVLDSNEDKFDLKIGLHQYVYNLGDKDIQLNWIFDGEKYQKILKPDDSAYIKPFIEHNFRGEGKILIL 16 | RVGGKSTGDAQIELSFVGKPNVERAINEMMLWFDPTGKN 17 | >AIF18123.1 helix-turn-helix domain-containing protein [uncultured marine thaumarchaeote KM3_82_A11] 18 | MNARDFYIMHDDCPSGLKIMRCEDSMKSSRIMHRGGKPYYEYRDTAMSSVGPFRPEWIEELCVVDDNELD 19 | NRQVQWNNGHFMHQFTYFVGDVNFYYIDENGEKKVDVMNTGDSNYITPFTPHSFATRKGASKNGLILALT 20 | YGNNLSGDSQHELSSIGKKLGKEFAFDFSSKEIASVSLIKFHRNNASLTLHELSKRTNMDIEKLKDFENG 21 | KIPTYSEYAILAECLQVNIRDLLPYDKISNKVIVQFYKNTKKWFYPEDTKNYKLVELANTISLPHSKALE 22 | VNVLSENDKTLDLKIGLHQYGYNIGDTDVSISYESEDGLKADMIKPGDSFYIKPFVAHNFRGKGKVLILR 23 | ISGKITGEPQRELSLIGKKNMARVINESTQWFNVNGKN 24 | >EBM92518.1 hypothetical protein GOS_8330300 [marine metagenome] 25 | MKTRNKIFSAKRLRQVLNDLKRRPEDAARELKISNKKIINILKGKDELKISLVKRMLKIWPVTLNNFINF 26 | KFFNDKDPEIIIFNENSSKKTSRLMQRKGRDYYEYRDTAMSRNAPFRPEWIRVLQIVDNNNPKNPKVVWN 27 | KGHLLHQFTYFVGNINFYYFENKIKKVKTMKTGDTMYISPYVPHSFTSRDNDRNFIIALTYLDKITNDVQ 28 | DNLGNLGIKNSMSAIFNENSNNDIIKRYTNNSFLNFEEIIKGYKIKNFKKNSTFHFSKKLSDKLNINLRD 29 | ILETENTKKVIINTQSKSRIWFYPTKKKRILQIRELASSKYSPESKSFELSVLKSNNIKHQNHSHQYFYV 30 | LGKYLKLKINSKIYNLKKNDTFYLKPFTKFSLMNNNAKILVLRVSDRISGDNLLQLSQIGKKNIKRVIKE 31 | SESWF 32 | >EDF86513.1 hypothetical protein GOS_880853, partial [marine metagenome] 33 | MKSNDAELKKSAHNLRGLLNDLKRRPEDAANELGFDIKDLEDYLSGTKPISSDFIKTACDKWPINERDFF 34 | VIKDDCPSGIKIMRSSDSNKSSRIMERAGKPYYDYRDTVITSSTTFRPEWIEELCIVDDNDPNNKSVQWN 35 | NGHFMHQFTYFIGDVNYYYVDENGKKCVGIMNTGDSCYITPFVPHSFTTRAGSKKPGLILALTFGSSLSG 36 | DAQQELAAISNPISASEFALDFTSYEKGIGSLIKYFRNSFSISLEELSRRSEISEKKLNDIENGSSSTLD 37 | ELKTISASMNINLRDILSNDLIEPATIIQKHDEGNQWKLANNAYQVVELANSSTLPYARSLEISVFSESD 38 | HEDYDQNIGLHQYIYNIGDSELAINFENGKEKLYPGDSCYIKPFLKHNFRGNGKILCLRI 39 | >EDA64276.1 hypothetical protein GOS_1964171, partial [marine metagenome] 40 | MISNIKFRKLLNDLKRRPEDAAKDLNITKKEILKFLNNKKEISHKILNQATKVWPVNHGDFFSFSDDTQN 41 | GFKIMRSDKSNKSKRIMIRGKKPYYLYKDTVMSKVSPFKPELITELVVVENDNPNNPKVKFNNGHFLHQF 42 | TYFIGPVNFYYLKNNKKKIARMNTGDSMYISPYVPHSFTTRKNKKNELGKILALTYSDQIDNETLNELNV 43 | LGFDIANKYRIDLTNENRAFWGNLDIFFNNSSISLDEFKKKTNIDLNKLRKKKNVPNLNTLKKISNFLNI 44 | GLRDLLPPKEKIEVKIQKYSQNRNWYFPSNKKKDYLFVELTNIPQLPYSRGYEFHILSNHKINRTLEVPS 45 | HQYIYNIGHTNVQAIINKKKIKIYPGDSLYIKPNLKHYFNQKGKLLILRLGGRISGDRLYQLSMLSKQNL 46 | FKTIDDIKPWFNK 47 | >ECZ75939.1 hypothetical protein GOS_2125985, partial [marine metagenome] 48 | FKIMRHNDSEKSKRIMKRGGKPYYLYKDTVMSKLSPFRPELITELEVVANDSPKNSKVRFNNGHFLHQFT 49 | YFIGPVNFYYIEKNKKKVAKMNTGDSMYISPYVCHSFTTRKNKKKLLGKILALTYSDKLDNETLNEITAL 50 | GFNITKKYKINLKDHKTAFWSNIENFFKNSSITFKEFKRNTSIDLDKIKKSKKIPSEKIIKKISHNLKLN 51 | YRDLFPPNNTLEVKIQKYSNCKNWFYPSMKKKDYLFKELTNIPQLPFSRGYELTLLNEKKHKSFLDVPSH 52 | QYIYNLGTTKINFIINGKKGYLNPSDSIYIKPNNKHIFYKKGKLLILRLGGRISGDSLYQLSMISEKNLK 53 | KTIDDNRPWFNK 54 | >ECZ75315.1 hypothetical protein GOS_2127099, partial [marine metagenome] 55 | MSISNIKFRKILNDLKRRPEDAARDLKISNKKLLQILNNKTKTDFDIIEKATKIWPVNYGDFFSFEDDTK 56 | NDFKIMRASISDKSSRIMSRGKKPYYLYKDTVMSKLSPFKPELITELQIVSDNNSNNIDVKFNNGHFLHQ 57 | FTYFIGPVNFYYMLNGKKKVAVMNTGDSMYISPYIPHSFATRKNNQEVLGKIIAITYSDKLDNETLNESS 58 | SIGFNLIKKLKINLKDEYYSFWSNLEKQINNSFISFNMLNDLLKYDLGSLKKNKKIPKINTIKKIAKYLN 59 | LNLRDLLPPNNLTDVKIQKYKDNRSWFYPSNIKRDYKIIELTNVSELPYSRGFEIKILKNNKNNCSLEVP 60 | THQYIYNIGKKDIKIEIDGINEKLNKNDSMYIKPNKKHKFISEGKVLVLRLGGRLSGDSLYQLSKMSDK 61 | >ECZ56278.1 hypothetical protein GOS_2161595, partial [marine metagenome] 62 | KELCIVDDNTPENDSIRWNNGHFMHQFTYFIGDVNFYYIDENGKRKTAEMKTGDSMYITPFIPHTFATRK 63 | GAKENGLILALTYGDKLAGDAKQELACLSKELGTKFSLDFSTKEKSSASLIKFHREISSITINELSNRTK 64 | IPKQQLLEFENGNLIPSSSEMSLLAHALNINVRELLPNDIQEKKVVVKTHDNCTQWNYPDDSKRYILTEL 65 | ASTSALPFSKALEIKVQNSDSSELDLEVGLHQYIYNVGNSELEINWKLEEKTYHKIIKPGDSIYIKPFVN 66 | HNFRGNGSLVALRVGGKIPGDSQRELSILGNKNVSRAINETKLWFNAGN 67 | >ECZ36112.1 hypothetical protein GOS_2197111, partial [marine metagenome] 68 | MKQEQLIKKIGKRILSESNDLKRTLESLANEIGVEKEKLKKVVDGQCELADSYSVIRKMGLVYPIDIADL 69 | FLIQDDCENGIKIMRAADSLKSSRIFNRIDANKIKTPYYEYRDTAMSSLGPFKPEWIKELRVVNDNDPKN 70 | PNVVYNNGHFMHQTTLFIGPVNFYWEVNGKKFCREMNTGDSNYITPFWPHSFTSRDANEEAYILAITFGG 71 | DVRRAQKELYALGEKTKNYVLDYRENNKAVRQLINHHMNNESITIDHLIDLAQQKSIDINLKDLLISDKP 72 | ISKRDLKIIADFLNIELENLIIPEYKPEDEVVIKHSNSKNRYYFPNKDNIAYKIETLARTSKMPLLK 73 | >ECZ36112.1 hypothetical protein GOS_2197111, partial [marine metagenome] 74 | MKQEQLIKKIGKRILSESNDLKRTLESLANEIGVEKEKLKKVVDGQCELADSYSVIRKMGLVYPIDIADL 75 | FLIQDDCENGIKIMRAADSLKSSRIFNRIDANKIKTPYYEYRDTAMSSLGPFKPEWIKELRVVNDNDPKN 76 | PNVVYNNGHFMHQTTLFIGPVNFYWEVNGKKFCREMNTGDSNYITPFWPHSFTSRDANEEAYILAITFGG 77 | DVRRAQKELYALGEKTKNYVLDYRENNKAVRQLINHHMNNESITIDHLIDLAQQKSIDINLKDLLISDKP 78 | ISKRDLKIIADFLNIELENLIIPEYKPEDEVVIKHSNSKNRYYFPNKDNIAYKIETLARTSKMPLLK 79 | >sp|A9A1T2.1|MPNS_NITMS Methylphosphonate synthase [Nitrosopumilus maritimus SCM1] 80 | MEKKIDFKPDSYLIRSGNNFLGILNDIKRRPEDAANELGVSIEEINSIISGKQKISPSLIEKAVNIWPVN 81 | ERDFYIVSDDCSSGILIMTSQDSIKSSRIMERAGKPYYEYRDTAMSKTAPFRPEWILELCKVENNDPENP 82 | KAQWNNGHFMHQFTYFIGEVNFYYKDPEGKKHVAIMNTGDSMYITPFTPHTFTTRDGASQNGLILALTYG 83 | SKLTGDIQQELSSLSLDCGSQYALDFTNHENASLSLLEYYFELSNLTKEKFAKRTNFSMETLADFFTKKK 84 | LPTFDELKIIAKALNVNSRDLMPNDLTESKVIVKTHDQCDHWKYPESGNYEFYELASTTALPHSKAFEID 85 | VSSSEDLNLDLKVGLHQYVYNIGDSALTINWNYENKTYQKSLNPGDSAYIKPFVPHNFRGNGKILILRIG 86 | GKISGDSQRELSFVGRENTQRAISETMQWFDPKGSNS 87 | >WP_029455269.1 hypothetical protein [Candidatus Pelagibacter ubique] 88 | MSISNIKFRKILNDLKRRPEDAARDLKISNKKLLQILNNKTKTDFDIIEKATKIWPVNYGDFFSFEDDTK 89 | NDFKIMRASISDKSSRIMSRGKKPYYLYKDTVMSKLSPFKPELITELQIVSDNYSNNIDVKFNNGHFLHQ 90 | FTYFIGPVNFYYMLNGKKKVAVMNTGDSMYISPYIPHSFATRKNNQGVLGKIIAITYSDKLDNETLNESS 91 | SIGFNLIKKLKVNLKDEYNSFWSNLEKQINNSFISFNMLNELLKYDLGSLKKNKKIPKINTIKKIAKYLN 92 | LNLRDLLPPNNLTDVKIQKYKDNRSWFYPSNTKRDYKIIELTNVSELPYSRGFEIKILKNNKKNCFLEVP 93 | THQYIYNIGKKDIKIKIDGINEKLNKNDSMYIKPNKKHKFISEGKVLVLRLGGRLSGDSLYQLSKMSDKN 94 | LKRTLNDNKPWFNK 95 | >WP_008298466.1 XRE family transcriptional regulator, partial [Candidatus Nitrosopumilus salaria] 96 | MEEEMSPVPDLYHVRSGNNFLGILNDIKRRPEDAANELGISLNEINSIISGRKKISPELIEKAVKIWPVN 97 | ERDFYIISDDCPTGVLIMSSDESKKTSRIMERAGKPYYEYRDTAMSKTAPFRPEWILELCKVKDNDPNNP 98 | EVQWNNGHFMHQFTYFIGQVNFYFKGDDGKKHVAVMNTGDSMYITPFTPHTFTTRDGSQSNGLILALTYG 99 | SKLTGDVQQEISLLPINSGSNYALDFSSKERSSSSLINYYYKISNLSMDEFEKRTNISKTSLTSYLEKNE 100 | IPSFEDLEKIAKALNVNCRDLMSNDKIESKVIVKHHDESNSWSYPENSKTYDFLQLASTTALPHSKAFEI 101 | ITKESQDETLDLNIGLHQYVYNVGDTPVLLSWHYDEKLFKKSLNPGDSAYIKPFVAHNFRNKGKLLNLRI 102 | GGKINGD -------------------------------------------------------------------------------- /KEGGDecoder/MethylphosphonateSynthase/methylphosphonate-synthase_sfam_match.tblout: -------------------------------------------------------------------------------- 1 | # --- full sequence ---- --- best 1 domain ---- --- domain number estimation ---- 2 | # target name accession query name accession E-value score bias E-value score bias exp reg clu ov env dom rep inc description of target 3 | #------------------- ---------- -------------------- ---------- --------- ------ ----- --------- ------ ----- --- --- --- --- --- --- --- --- --------------------- 4 | 11622 - AIF00981.1 - 2.1e-12 51.2 0.0 0.00017 25.4 0.0 2.2 2 0 0 2 2 2 2 - 5 | 1662 - AIF00981.1 - 1.5e-11 48.6 0.2 8.4e-07 33.1 0.0 3.5 3 1 0 3 3 2 2 - 6 | 4294 - AIF00981.1 - 1.8e-08 38.5 0.0 3.1e-06 31.2 0.0 3.1 4 0 0 4 4 3 1 - 7 | 142788 - AIF00981.1 - 8.4e-06 30.4 2.4 0.00049 24.7 0.6 2.9 2 1 0 2 2 2 1 - 8 | 11622 - AIF09591.1 - 5.3e-17 66.2 0.0 3.1e-07 34.4 0.0 2.2 2 0 0 2 2 2 2 - 9 | 1662 - AIF09591.1 - 5.4e-17 66.3 0.0 9.4e-10 42.7 0.1 3.2 2 1 0 2 2 2 2 - 10 | 4294 - AIF09591.1 - 2e-11 48.1 0.0 1e-06 32.7 0.0 3.3 2 2 0 2 2 2 2 - 11 | 16849 - AIF09591.1 - 1.6e-09 42.0 0.2 3.7e-07 34.4 0.1 2.4 2 0 0 2 2 2 1 - 12 | 8074 - AIF09591.1 - 2.6e-08 37.7 0.0 6.4e-07 33.1 0.0 2.1 2 0 0 2 2 2 1 - 13 | 60315 - AIF09591.1 - 3.5e-07 34.7 0.3 0.00041 24.9 0.1 2.3 2 0 0 2 2 2 1 - 14 | 4195 - AIF09591.1 - 9.4e-07 32.2 0.1 6.4e-05 26.2 0.1 2.1 2 0 0 2 2 2 1 - 15 | 189435 - AIF09591.1 - 2.6e-06 31.8 0.0 0.0021 22.5 0.0 2.6 2 0 0 2 2 2 1 - 16 | 17413 - AIF09591.1 - 4.3e-06 30.4 0.2 0.004 20.6 0.1 2.2 2 0 0 2 2 2 1 - 17 | 5033 - AIF09591.1 - 7.3e-06 30.0 0.0 0.00024 25.0 0.0 2.2 2 0 0 2 2 2 1 - 18 | 1662 - AIF18123.1 - 1.9e-10 45.0 0.1 1.5e-06 32.3 0.0 3.1 2 1 0 2 2 2 1 - 19 | 11622 - AIF18123.1 - 4.7e-09 40.3 0.2 1.9e-05 28.6 0.1 2.3 2 0 0 2 2 2 2 - 20 | 4294 - AIF18123.1 - 1.7e-08 38.5 0.1 7.9e-07 33.1 0.0 2.7 3 0 0 3 3 3 1 - 21 | 11622 - EBM92518.1 - 9.3e-11 45.9 1.6 9.4e-05 26.3 0.0 2.2 2 0 0 2 2 2 2 - 22 | 1662 - EBM92518.1 - 8.7e-10 42.8 4.7 0.00091 23.2 0.5 3.1 2 1 0 2 2 2 2 - 23 | 51372 - EBM92518.1 - 1.8e-06 31.9 0.9 0.29 15.1 0.1 2.6 2 0 0 2 2 2 2 - 24 | 1662 - EDF86513.1 - 1.9e-18 71.0 0.2 2e-15 61.2 0.1 2.2 2 0 0 2 2 2 1 - 25 | 11622 - EDF86513.1 - 2.4e-17 67.3 0.0 3e-09 40.9 0.0 2.1 2 0 0 2 2 2 2 - 26 | 1881 - EDF86513.1 - 6.9e-12 49.5 0.0 1.4e-11 48.5 0.0 1.4 1 0 0 1 1 1 1 - 27 | 4294 - EDF86513.1 - 1.9e-10 44.9 0.0 4e-09 40.6 0.0 2.2 2 0 0 2 2 2 1 - 28 | 5033 - EDF86513.1 - 7.7e-10 42.9 0.0 1.4e-09 42.1 0.0 1.3 1 0 0 1 1 1 1 - 29 | 8074 - EDF86513.1 - 4.8e-08 36.8 0.1 1.3e-07 35.4 0.0 1.7 2 0 0 2 2 1 1 - 30 | 60315 - EDF86513.1 - 1.5e-06 32.7 0.5 7.9e-06 30.4 0.8 1.9 2 0 0 2 2 1 1 - 31 | 78136 - EDF86513.1 - 2.1e-06 31.5 0.1 3.1e-06 30.9 0.1 1.2 1 0 0 1 1 1 1 - 32 | 10910 - EDF86513.1 - 5.1e-06 30.4 0.0 1.2e-05 29.2 0.0 1.6 2 0 0 2 2 1 1 - 33 | 11495 - EDF86513.1 - 6.7e-06 29.8 0.4 0.00052 23.6 0.3 2.1 2 0 0 2 2 2 1 - 34 | 1662 - EDA64276.1 - 7.9e-13 52.7 1.1 0.00012 26.0 0.1 3.7 2 2 0 2 2 2 2 - 35 | 11622 - EDA64276.1 - 3.7e-12 50.4 0.3 1.5e-06 32.2 0.0 2.5 2 1 0 2 2 2 2 - 36 | 4294 - EDA64276.1 - 7.8e-08 36.4 0.0 0.018 18.9 0.0 3.0 2 2 0 2 2 2 2 - 37 | 1662 - ECZ75939.1 - 9.7e-05 26.4 0.1 0.52 14.2 0.0 2.4 2 1 0 2 2 2 2 - 38 | 1662 - ECZ75315.1 - 1.2e-12 52.1 0.5 7.5e-06 30.0 0.0 3.1 3 0 0 3 3 3 2 - 39 | 11622 - ECZ75315.1 - 1.2e-08 39.0 0.5 0.00052 23.9 0.0 2.5 2 1 0 2 2 2 2 - 40 | 4294 - ECZ75315.1 - 1.2e-06 32.5 0.0 0.17 15.7 0.0 3.7 3 1 0 3 3 3 1 - 41 | 1662 - ECZ56278.1 - 5.5e-17 66.3 0.4 3e-13 54.1 0.3 2.2 2 0 0 2 2 2 2 - 42 | 4294 - ECZ56278.1 - 3.4e-14 57.1 0.0 4.3e-11 47.0 0.0 2.3 2 0 0 2 2 2 1 - 43 | 11622 - ECZ56278.1 - 4.4e-13 53.4 0.2 1.8e-09 41.7 0.0 2.2 2 0 0 2 2 2 2 - 44 | 8074 - ECZ56278.1 - 6.4e-11 46.2 0.0 1.1e-08 38.9 0.0 2.2 2 0 0 2 2 2 1 - 45 | 1881 - ECZ56278.1 - 1.7e-07 35.2 0.2 3.5e-07 34.1 0.2 1.5 1 1 0 1 1 1 1 - 46 | 5033 - ECZ56278.1 - 2.8e-07 34.6 0.0 2e-05 28.5 0.0 2.1 2 0 0 2 2 2 1 - 47 | 4294 - ECZ36112.1 - 7.8e-08 36.4 0.0 3.9e-05 27.6 0.0 3.4 2 2 1 3 3 3 1 - 48 | 1662 - ECZ36112.1 - 1.4e-07 35.6 0.5 0.043 17.7 0.0 3.5 2 2 1 3 3 3 2 - 49 | 4294 - ECZ36112.1 - 7.8e-08 36.4 0.0 3.9e-05 27.6 0.0 3.4 2 2 1 3 3 3 1 - 50 | 1662 - ECZ36112.1 - 1.4e-07 35.6 0.5 0.043 17.7 0.0 3.5 2 2 1 3 3 3 2 - 51 | 11622 - sp|A9A1T2.1|MPNS_NITMS - 4.6e-16 63.2 0.0 5.4e-07 33.6 0.0 2.2 2 0 0 2 2 2 2 - 52 | 1662 - sp|A9A1T2.1|MPNS_NITMS - 3.3e-14 57.2 0.1 2.6e-08 38.0 0.0 3.4 2 2 0 2 2 2 2 - 53 | 11495 - sp|A9A1T2.1|MPNS_NITMS - 9.7e-09 39.0 0.2 3.7e-06 30.6 0.0 2.4 2 0 0 2 2 2 1 - 54 | 4294 - sp|A9A1T2.1|MPNS_NITMS - 1e-08 39.3 0.0 7e-06 30.0 0.0 2.8 2 1 0 2 2 2 1 - 55 | 16849 - sp|A9A1T2.1|MPNS_NITMS - 1.7e-08 38.8 0.3 8.5e-07 33.2 0.1 2.3 2 0 0 2 2 2 1 - 56 | 6849 - sp|A9A1T2.1|MPNS_NITMS - 1.5e-07 35.7 1.0 4.6e-06 30.8 0.4 2.3 2 0 0 2 2 2 1 - 57 | 311835 - sp|A9A1T2.1|MPNS_NITMS - 4.4e-07 33.2 0.0 1.7e-06 31.3 0.0 2.0 1 1 0 1 1 1 1 - 58 | 1662 - WP_029455269.1 - 5.3e-12 50.0 0.3 2.9e-05 28.1 0.0 3.2 3 0 0 3 3 3 2 - 59 | 11622 - WP_029455269.1 - 1.1e-07 35.8 0.5 0.0023 21.8 0.0 2.7 2 1 0 2 2 2 2 - 60 | 4294 - WP_029455269.1 - 1.3e-06 32.4 0.0 0.18 15.6 0.0 3.5 2 2 0 2 2 2 2 - 61 | 1662 - WP_008298466.1 - 8.3e-17 65.7 2.0 9.7e-11 45.9 0.7 4.0 2 2 0 2 2 2 2 - 62 | 11622 - WP_008298466.1 - 3.3e-15 60.4 0.0 9.1e-07 32.8 0.1 2.3 2 1 0 2 2 2 2 - 63 | 4294 - WP_008298466.1 - 3.1e-10 44.2 0.2 4.5e-07 33.9 0.1 2.9 2 1 0 2 2 2 1 - 64 | 11495 - WP_008298466.1 - 9.4e-10 42.4 1.0 2.4e-07 34.5 0.3 2.3 2 0 0 2 2 2 1 - 65 | 6849 - WP_008298466.1 - 1.1e-09 42.7 3.0 3.1e-08 37.9 1.9 2.2 2 0 0 2 2 2 1 - 66 | 16849 - WP_008298466.1 - 2.5e-09 41.4 1.6 2.9e-07 34.7 0.7 2.2 2 0 0 2 2 2 1 - 67 | 1881 - WP_008298466.1 - 3e-08 37.6 0.1 2e-07 34.9 0.1 2.0 2 0 0 2 2 2 1 - 68 | 54712 - WP_008298466.1 - 2.5e-07 35.2 2.2 4.1e-05 28.0 0.9 2.2 2 0 0 2 2 2 1 - 69 | 4387 - WP_008298466.1 - 1.8e-06 31.8 0.2 0.0008 23.1 0.2 2.2 2 0 0 2 2 2 1 - 70 | 4195 - WP_008298466.1 - 8.2e-06 29.1 0.0 0.0033 20.6 0.1 2.1 2 0 0 2 2 2 1 - 71 | 31064 - WP_008298466.1 - 8.8e-06 29.8 0.7 0.00033 24.7 0.4 2.2 2 0 0 2 2 2 1 - 72 | # 73 | # Program: hmmscan 74 | # Version: 3.1b2 (February 2015) 75 | # Pipeline mode: SCAN 76 | # Query file: methylphosphonate-synthase_ref.faa 77 | # Target file: SFAM_database.hmm 78 | # Option settings: hmmscan --tblout methylphosphonate-synthase_sfam_match.tblout --noali --cpu 25 SFAM_database.hmm methylphosphonate-synthase_ref.faa 79 | # Current dir: /media/eclipse/sfams 80 | # Date: Mon Apr 16 15:30:57 2018 81 | # [ok] 82 | -------------------------------------------------------------------------------- /KEGGDecoder/Pfam_definitions.txt: -------------------------------------------------------------------------------- 1 | V.0.1 2 | 3 | Phototrophy 4 | TIGR03753 beta-carotene 15,15'-monooxygenase, Brp/Blh family 5 | pf01036 rhodopsin 6 | 7 | ####REMOVED DUE NON-DISCRIMINATORY MATCHES##### 8 | Peptidase activity 9 | pf01364 Peptidase family C25 10 | pf04151 Bacterial pre-peptidase C-terminal domain 11 | pf03415 Clostripain family 12 | pf04389 Peptidase family M28 13 | pf02163 Peptidase family M50 14 | pf01546 Di- and tripeptidases 15 | pf02073 Leucyl aminopeptidase (aminopeptidase T) 16 | pf00557 Xaa-Pro aminopeptidase 17 | pf03413 Peptidase propeptide and YPEB domain 18 | pf01432 Oligoendopeptidase F 19 | pf00266 Phosphoserine aminotransferase 20 | pf01252 Lipoprotein signal peptidase 21 | pf01433 Aminopeptidase N 22 | pf00246 Zinc carboxypeptidase 23 | pf00717 Peptidase S24-like 24 | pf10502 Peptidase S26 25 | pf04951 D-aminopeptidase 26 | pf05299 M61 glycyl aminopeptidase 27 | ##### 28 | 29 | Alternative nitrogenase 30 | TIGR01860 nitrogenase vanadium-iron protein, alpha chain vnfD 31 | TIGR01861 nitrogenase iron-iron protein, alpha chain anfD 32 | TIGR02931 Fe-only nitrogenase, beta subunit anfK [also potentially pf00148, Nitrogenase component 1 type Oxidoreductase] 33 | TIGR02932 V-containing nitrogenase, beta subunit vnfK 34 | TIGR02930 V-containing nitrogenase, delta subunit vnfG 35 | TIGR02929 Fe-only nitrogenase, delta subunit anfG 36 | pf03139 Vanadium/alternative nitrogenase delta subunit, targets vnfG (V) and anfG (Fe) alternatives 37 | 38 | Ammonia transporter 39 | pf00909 Ammonium Transporter Family (high-affinity ammonia transporter) 40 | 41 | DMSP Lyase 42 | PF16867 dddQ 43 | 14591 dddP - requires a much more strigent bit score cutoff (recommend >500) 44 | 25993 dddD - requires a much more strigent bit score cutoff (recommend >500) 45 | 94923 dddK 46 | 274874 dddW 47 | 48 | DMSP Synthase 49 | 4254 dsyB - requires a more strigent bit score cutoff (recommend >260) 50 | 51 | Methylphosphonate biosynthesis 52 | 53 | ####REMOVED DUE NON-DISCRIMINATORY MATCHES##### 54 | Amphibactin biosynthesis 55 | Multiple SFAM models have exceptionally high bit score matches to reference sequences (>>1000). Both ABO2092 and ABO2903 have common domains and as such share the highest SFAM models 56 | Recorded the best two models for all references and require both to successfully match for identification 57 | 1544 ABO2902/3 - requires a much more strigent bit score cutoff (recommend >1000) 58 | 27549 ABO2902/3 - requires a much more strigent bit score cutoff (recommend >1000) 59 | ##### 60 | 61 | 62 | Ferrioxamine biosynthesis 63 | Limited references for comparison. Known to occur as operon in Streptomyces. Each gene matched an SFAM model with at least bit score >200, 3 of 4 top hit is over 500 bit score 64 | Will required a minimum of 200 bit score for all matches. Check to the presense of all 4 genes in operon 65 | 2219 SCO2782 DesA putative pyridoxal-dependent decarboxylase - requires a more strigent bit score cutoff (recommend >200) 66 | 2732 SCO2783 DesB putative monoxygenase - requires a more strigent bit score cutoff (recommend >200) 67 | 9429 SCO2784 DesC putative acetyltransferase - requires a more strigent bit score cutoff (recommend >200) 68 | 51934 SCO2785 DesD putative siderophore biosynthetic protein - requires a more strigent bit score cutoff (recommend >200) 69 | 70 | Metal Transporters 71 | TIGR02459 Cobalt transporter CbtB 72 | TIGR00003 Copper binding HMA (heavy-metal-associated) protein 73 | TIGR00820 Fe, Zn, Mn permease ZupT 74 | -------------------------------------------------------------------------------- /KEGGDecoder/Plotly_viz.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | ''' 4 | This script is a Plotly vizualization module written for KEGG-decoder.py 5 | versions after V.0.8 Builds an interactive heatmap of metabolic pathways 6 | specified by KEGG-decoder.py 7 | Added by Roth Conrad : rotheconrad@gatech.edu 8 | ''' 9 | 10 | def plotly_viz(genome_df, output_file): 11 | # build heatmap in plotly.offline 12 | from .KEGG_clustering import hClust_euclidean 13 | Euclidean_genome_df = hClust_euclidean(genome_df) 14 | 15 | from .KEGG_clustering import hClust_correlation 16 | Correlation_genome_df = hClust_correlation(genome_df) 17 | 18 | from .KEGG_clustering import hClust_most_least 19 | Most_Least_genome_df = hClust_most_least(genome_df) 20 | 21 | from .KEGG_clustering import hClust_least_most 22 | Least_Most_genome_df = hClust_least_most(genome_df) 23 | 24 | import plotly.graph_objs as go 25 | import plotly.offline as py 26 | 27 | xLen = len(genome_df.columns.values.tolist())*20 28 | len_genomes = len(genome_df.index.tolist()) 29 | menL = 3.0 30 | if len_genomes >= 200: 31 | yLen = len_genomes * 80 32 | elif len_genomes >= 100: 33 | yLen = len_genomes * 60 34 | elif len_genomes >= 50: 35 | yLen = len_genomes * 50 36 | elif len_genomes >= 25: 37 | yLen = len_genomes * 100 38 | else: 39 | yLen = 1500 40 | 41 | colorscale = [ 42 | [0, '#f1eef6'], 43 | [0.2, '#f1eef6'], 44 | [0.2 ,'#bdc9e1'], 45 | [0.4 ,'#bdc9e1'], 46 | [0.4 ,'#74a9cf'], 47 | [0.6 ,'#74a9cf'], 48 | [0.6 ,'#2b8cbe'], 49 | [0.8 ,'#2b8cbe'], 50 | [0.8 ,'#045a8d'], 51 | [1 ,'#045a8d']] 52 | 53 | colorbar = {'tick0': 0, 'dtick': 0.2, 'lenmode': 'pixels', 'len': 500, 'y': 1} 54 | 55 | Euclidean_clust = go.Heatmap(x=Euclidean_genome_df.columns.values.tolist(), 56 | y=Euclidean_genome_df.index.tolist(), 57 | z=Euclidean_genome_df.values.tolist(), 58 | colorscale=colorscale, 59 | colorbar=colorbar, 60 | hovertemplate='Sample: %{y}
Function: %{x}
Proportion: %{z}', 61 | xgap = 1, 62 | ygap = 1) 63 | 64 | Correlation_clust = go.Heatmap(x=Correlation_genome_df.columns.values.tolist(), 65 | y=Correlation_genome_df.index.tolist(), 66 | z=Correlation_genome_df.values.tolist(), 67 | colorscale=colorscale, 68 | colorbar=colorbar, 69 | xgap = 1, 70 | ygap = 1, 71 | hovertemplate='Sample: %{y}
Function: %{x}
Proportion: %{z}', 72 | visible=False) 73 | 74 | Most_Least_clust = go.Heatmap(x=Most_Least_genome_df.columns.values.tolist(), 75 | y=Most_Least_genome_df.index.tolist(), 76 | z=Most_Least_genome_df.values.tolist(), 77 | colorscale=colorscale, 78 | colorbar=colorbar, 79 | xgap = 1, 80 | ygap = 1, 81 | hovertemplate='Sample: %{y}
Function: %{x}
Proportion: %{z}', 82 | visible=False) 83 | 84 | Least_Most_clust = go.Heatmap(x=Least_Most_genome_df.columns.values.tolist(), 85 | y=Least_Most_genome_df.index.tolist(), 86 | z=Least_Most_genome_df.values.tolist(), 87 | colorscale=colorscale, 88 | colorbar=colorbar, 89 | xgap = 1, 90 | ygap = 1, 91 | hovertemplate='Sample: %{y}
Function: %{x}
Proportion: %{z}', 92 | visible=False) 93 | 94 | data = [Euclidean_clust, Correlation_clust, Most_Least_clust, Least_Most_clust] 95 | 96 | updatemenus = [dict( 97 | buttons = [ 98 | dict(label = 'Euclidean_Clustering', method = 'update', args = [{'visible': [True, False, False, False]}]), 99 | dict(label = 'Correlation_Clustering', method = 'update', args = [{'visible': [False, True, False, False]}]), 100 | dict(label = 'Most_to_Least', method = 'update', args = [{'visible': [False, False, True, False]}]), 101 | dict(label = 'Least_to_Most', method = 'update', args = [{'visible': [False, False, False, True]}]) 102 | ], 103 | direction = 'down', 104 | pad = {'r': 10, 't': 10}, 105 | showactive = True, 106 | x = 0.1, 107 | xanchor = 'left', 108 | y = menL, 109 | yanchor = 'top' 110 | )] 111 | 112 | layout = go.Layout(xaxis={'side': 'top'}, 113 | autosize=False, 114 | width=xLen, 115 | height=yLen, 116 | plot_bgcolor='#000000', 117 | margin=go.layout.Margin(t=500), 118 | updatemenus=updatemenus, 119 | ) 120 | 121 | 122 | 123 | fig = go.Figure(data=data, layout=layout) 124 | py.plot(fig, filename=output_file, auto_open=False) 125 | # py.iplot(data, filename='pandas.heatmap') 126 | 127 | 128 | if __name__ == "__main__": 129 | import argparse 130 | parser = argparse.ArgumentParser(description="This file is intended as a Plotly module for the KEGG_decoder") 131 | args = parser.parse_args() 132 | arg_dict = vars(args) 133 | -------------------------------------------------------------------------------- /KEGGDecoder/PreviousKEGGDecoderVersions/Decode_and_Expand-V0.0.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | ''' 4 | Combines the *.list format output files of KEGG-decoder.py and 5 | KEGG-expander.py to form a single figure of all functions 6 | 7 | 8 | ''' 9 | 10 | import argparse 11 | 12 | parser = argparse.ArgumentParser(description="Accepts HMM search results of expander_dbvX.hmm\ 13 | text file as input. Produces function\ 14 | list and heat map figure.") 15 | parser.add_argument('KOALA LIST', help="Input KOALA function list format. As generated from\ 16 | KEGG-decoder") 17 | parser.add_argument('HMM LIST', help="Input HMM function list format. As generated from\ 18 | KEGG-expander") 19 | args = parser.parse_args() 20 | arg_dict = vars(args) 21 | 22 | import matplotlib.pyplot as plt 23 | 24 | import pandas as pd 25 | 26 | 27 | koala = pd.read_table(open(str(arg_dict['KOALA LIST']), "r"), index_col=0) 28 | hmm = pd.read_table(open(str(arg_dict['HMM LIST']), "r"), index_col=0) 29 | output_df = koala.merge(hmm, left_index=True, right_index=True) 30 | 31 | #Reorganize column orientation to put like pathways together 32 | cols = output_df.columns.tolist() 33 | retinal_index = cols.index('Retinal biosynthesis') 34 | cols.insert(retinal_index+1, cols.pop(int(cols.index('beta-carotene 15,15-monooxygenase')))) 35 | cols.insert(retinal_index+2, cols.pop(int(cols.index('rhodopsin')))) 36 | trans_urea = cols.index('transporter: urea') 37 | cols.insert(trans_urea+1, cols.pop(int(cols.index('transporter: ammonia')))) 38 | nifH_index = cols.index('nitrogen fixation') 39 | cols.insert(nifH_index+1, cols.pop(int(cols.index('Vanadium-only nitrogenase')))) 40 | cols.insert(nifH_index+2, cols.pop(int(cols.index('Iron-only nitrogenase')))) 41 | dmsplyase_index = cols.index('DMSP demethylation') 42 | cols.insert(dmsplyase_index, cols.pop(int(cols.index('DMSP lyase (dddLQPDKW)')))) 43 | cols.insert(dmsplyase_index+1, cols.pop(int(cols.index('DMSP synthase (dsyB)')))) 44 | output_df = output_df[cols] 45 | 46 | import seaborn as sns 47 | sns.set(font_scale=1.2) 48 | sns.set_style({"savefig.dpi": 200}) 49 | ax = sns.heatmap(output_df, cmap=plt.cm.YlOrRd, linewidths=2, linecolor='k', square=True) 50 | ax.xaxis.tick_top() 51 | #ax.set_yticklabels(ax.get_yticklabels(), rotation=90) 52 | plt.xticks(rotation=90) 53 | plt.yticks(rotation=0) 54 | # get figure (usually obtained via "fig,ax=plt.subplots()" with matplotlib) 55 | fig = ax.get_figure() 56 | # specify dimensions and save 57 | fig.set_size_inches(100, 100) 58 | fig.savefig("decode-expand_heatmap.svg") -------------------------------------------------------------------------------- /KEGGDecoder/PreviousKEGGDecoderVersions/KEGG-expanderV0.3.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | ''' 4 | KEGG-expander.py V.0.3 5 | V.0.3. Adds DMSP lyase (dddQ, dddP, dddD, dddK, dddW), DMSP synthase (dsyB) 6 | Usage: python KEGG-decoder.py 7 | 8 | Designed to parse through the hmmsearch results table generated from 9 | the expander_dbvX.hmm to generate a heatmap figure similar to 10 | KEGG-decoder.py 11 | 12 | Recommended to run hmmsearch as follows: 13 | hmmsearch --tblout -T 75 expander_dbvX.hmm 14 | bit score cutoff of 75, equivalent to e-value < 10^-20 15 | 16 | Dependencies: 17 | Pandas - http://pandas.pydata.org/pandas-docs/stable/install.html 18 | Seaborn - http://seaborn.pydata.org/installing.html 19 | matplotlib - http://matplotlib.org/users/installing.html 20 | 21 | For extended information about HMM assignments, genes and pathways, 22 | please see accompanying document "Pfam_definitions.txt" 23 | 24 | ''' 25 | 26 | 27 | 28 | import argparse 29 | 30 | parser = argparse.ArgumentParser(description="Accepts HMM search results of expander_dbvX.hmm\ 31 | text file as input. Produces function\ 32 | list and heat map figure.") 33 | parser.add_argument('Input', help="Input HMM table file. See documentation\ 34 | for correct format") 35 | parser.add_argument('Output', help="List version of the final heat\ 36 | map figure") 37 | args = parser.parse_args() 38 | arg_dict = vars(args) 39 | 40 | genome_data = {} 41 | 42 | for line in open(str(arg_dict['Input']), "r"): 43 | if line[0] != "#": 44 | line = line.rstrip() 45 | info = line.split() 46 | genome_id = info[0].split("_")[0] 47 | #Sfams for DMSP lyase dddP and dddD require a more strigent bit score cutoff (>500) 48 | if info[3].split(".")[0] == "14591" or info[3].split(".")[0] == "25993": 49 | if float(info[5]) > 500: 50 | try: 51 | genome_data[genome_id].append(info[3].split(".")[0]) 52 | except KeyError: 53 | genome_data[genome_id] = [info[3].split(".")[0]] 54 | else: 55 | continue 56 | if info[3].split(".")[0] == "4254": 57 | if float(info[5]) > 260: 58 | try: 59 | genome_data[genome_id].append(info[3].split(".")[0]) 60 | except KeyError: 61 | genome_data[genome_id] = [info[3].split(".")[0]] 62 | else: 63 | continue 64 | else: 65 | try: 66 | genome_data[genome_id].append(info[3].split(".")[0]) 67 | except KeyError: 68 | genome_data[genome_id] = [info[3].split(".")[0]] 69 | 70 | def rhodopsin(hmm_match): 71 | out_data = {'beta-carotene 15,15-monooxygenase': 0, 'rhodopsin': 0} 72 | if 'PF01036' in hmm_match: 73 | out_data['rhodopsin'] = 1 74 | if 'TIGR03753' in hmm_match: 75 | out_data['beta-carotene 15,15-monooxygenase'] = 1 76 | return out_data 77 | 78 | def peptidases(hmm_match): 79 | out_data = {'Peptidase family C25': 0, 'Bacterial pre-peptidase C-terminal domain': 0, 80 | 'Clostripain family': 0, 'Peptidase family M28': 0, 'Peptidase family M50': 0, 81 | 'Di- and tripeptidases': 0, 'Leucyl aminopeptidase': 0, 'Xaa-Pro aminopeptidase': 0, 82 | 'Peptidase propeptide and YPEB domain': 0, 'Oligoendopeptidase F': 0, 83 | 'Phosphoserine aminotransferase': 0, 'Lipoprotein signal peptidase': 0, 84 | 'Aminopeptidase N': 0, 'Zinc carboxypeptidase': 0, 'Peptidase S24-like': 0, 85 | 'Peptidase S26': 0, 'D-aminopeptidase': 0, 'M61 glycyl aminopeptidase': 0} 86 | if 'PF01364' in hmm_match: 87 | out_data['Peptidase family C25'] = 1 88 | if 'PF04151' in hmm_match: 89 | out_data['Bacterial pre-peptidase C-terminal domain'] = 1 90 | if 'PF03415' in hmm_match: 91 | out_data['Clostripain family'] = 1 92 | if 'PF04389' in hmm_match: 93 | out_data['Peptidase family M28'] = 1 94 | if 'PF02163' in hmm_match: 95 | out_data['Peptidase family M50'] = 1 96 | if 'PF01546' in hmm_match: 97 | out_data['Di- and tripeptidases'] = 1 98 | if 'PF02073' in hmm_match: 99 | out_data['Leucyl aminopeptidase'] = 1 100 | if 'PF00557' in hmm_match: 101 | out_data['Xaa-Pro aminopeptidase'] = 1 102 | if 'PF03413' in hmm_match: 103 | out_data['Peptidase propeptide and YPEB domain'] = 1 104 | if 'PF01432' in hmm_match: 105 | out_data['Oligoendopeptidase F'] = 1 106 | if 'PF00266' in hmm_match: 107 | out_data['Phosphoserine aminotransferase'] = 1 108 | if 'PF01252' in hmm_match: 109 | out_data['Lipoprotein signal peptidase'] = 1 110 | if 'PF01433' in hmm_match: 111 | out_data['Aminopeptidase N'] = 1 112 | if 'PF00246' in hmm_match: 113 | out_data['Zinc carboxypeptidase'] = 1 114 | if 'PF00717' in hmm_match: 115 | out_data['Peptidase S24-like'] = 1 116 | if 'PF10502' in hmm_match: 117 | out_data['Peptidase S26'] = 1 118 | if 'PF04951' in hmm_match: 119 | out_data['D-aminopeptidase'] = 1 120 | if 'PF05299' in hmm_match: 121 | out_data['M61 glycyl aminopeptidase'] = 1 122 | return out_data 123 | 124 | def alt_nitrogenase(hmm_match): 125 | out_data = {'Vanadium-only nitrogenase': 0, 'Iron-only nitrogenase': 0} 126 | v_nitro = ['TIGR01860', 'TIGR02932', 'TIGR02930'] 127 | for i in v_nitro: 128 | if i in hmm_match: 129 | out_data['Vanadium-only nitrogenase'] += 0.33 130 | fe_nitro = ['TIGR01861', 'TIGR02931', 'TIGR02929'] 131 | for i in fe_nitro: 132 | if i in hmm_match: 133 | out_data['Iron-only nitrogenase'] += 0.33 134 | return out_data 135 | 136 | def amm_trans(hmm_match): 137 | out_data = {'transporter: ammonia': 0} 138 | if 'PF00909' in hmm_match: 139 | out_data['transporter: ammonia'] = 1 140 | return out_data 141 | 142 | def dmsplyase(hmm_match): 143 | out_data = {'DMSP lyase (dddLQPDKW)': 0} 144 | dmsp = ['PF16867', '14591', '25993', '94923', '274874'] 145 | for i in dmsp: 146 | if i in hmm_match: 147 | out_data['DMSP lyase (dddLQPDKW)'] = 1 148 | return out_data 149 | 150 | def dmspsynthase(hmm_match): 151 | out_data = {'DMSP synthase (dsyB)' : 0} 152 | if '4254' in hmm_match: 153 | out_data['DMSP synthase (dsyB)'] = 1 154 | return out_data 155 | 156 | function_order = ['beta-carotene 15,15-monooxygenase', 'rhodopsin', 'Peptidase family C25', 157 | 'Bacterial pre-peptidase C-terminal domain', 'Clostripain family', 158 | 'Peptidase family M28', 'Peptidase family M50', 'Di- and tripeptidases', 159 | 'Leucyl aminopeptidase', 'Xaa-Pro aminopeptidase', 160 | 'Peptidase propeptide and YPEB domain', 'Oligoendopeptidase F', 161 | 'Phosphoserine aminotransferase', 'Lipoprotein signal peptidase', 162 | 'Aminopeptidase N', 'Zinc carboxypeptidase', 'Peptidase S24-like', 'Peptidase S26', 163 | 'D-aminopeptidase', 'M61 glycyl aminopeptidase', 'Vanadium-only nitrogenase', 164 | 'Iron-only nitrogenase', 'transporter: ammonia', 165 | 'DMSP lyase (dddLQPDKW)', 'DMSP synthase (dsyB)'] 166 | 167 | filehandle = str(arg_dict['Output']) 168 | out_file = open(filehandle, "w") 169 | out_file.write('Function'+"\t"+str("\t".join(function_order))+"\n") 170 | 171 | for k in genome_data: 172 | pathway_data = {} 173 | pathway_data.update(rhodopsin(genome_data[k])) 174 | pathway_data.update(peptidases(genome_data[k])) 175 | pathway_data.update(alt_nitrogenase(genome_data[k])) 176 | pathway_data.update(amm_trans(genome_data[k])) 177 | pathway_data.update(dmsplyase(genome_data[k])) 178 | pathway_data.update(dmspsynthase(genome_data[k])) 179 | 180 | out_string = str(k)+"\t" 181 | out_list = [k] 182 | for i in function_order: 183 | out_list.append(pathway_data[i]) 184 | out_string = str(out_list).strip('[]') 185 | tab_string = "" 186 | for l in out_string: 187 | if l == "\'": 188 | continue 189 | if l == ",": 190 | tab_string = tab_string + "\t" 191 | else: 192 | tab_string = tab_string + l 193 | out_file.write(tab_string+"\n") 194 | 195 | out_file.close() 196 | 197 | import matplotlib.pyplot as plt 198 | 199 | import pandas as pd 200 | 201 | file_in = open(filehandle, "r") 202 | genome = pd.read_table(file_in, index_col=0) 203 | import seaborn as sns 204 | sns.set(font_scale=1.2) 205 | sns.set_style({"savefig.dpi": 200}) 206 | ax = sns.heatmap(genome, cmap=plt.cm.YlOrRd, linewidths=2, linecolor='k', square=True) 207 | ax.xaxis.tick_top() 208 | #ax.set_yticklabels(ax.get_yticklabels(), rotation=90) 209 | plt.xticks(rotation=90) 210 | plt.yticks(rotation=0) 211 | # get figure (usually obtained via "fig,ax=plt.subplots()" with matplotlib) 212 | fig = ax.get_figure() 213 | # specify dimensions and save 214 | fig.set_size_inches(100, 100) 215 | fig.savefig("hmm_heatmap.svg") -------------------------------------------------------------------------------- /KEGGDecoder/PreviousKEGGDecoderVersions/KEGG-expanderV0.4.1.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | ''' 4 | KEGG-expander.py V.0.4 5 | V.0.4 Adds amphibactin biosynthesis, ferrioxamine biosynthesis 6 | V.0.3.1 Added parameters to force labels to be printed. 7 | KEGG-expander.py V.0.3 8 | V.0.3. Adds DMSP lyase (dddQ, dddP, dddD, dddK, dddW), DMSP synthase (dsyB) 9 | Usage: python KEGG-decoder.py 10 | 11 | Designed to parse through the hmmsearch results table generated from 12 | the expander_dbvX.hmm to generate a heatmap figure similar to 13 | KEGG-decoder.py 14 | 15 | Recommended to run hmmsearch as follows: 16 | hmmsearch --tblout -T 75 expander_dbvX.hmm 17 | bit score cutoff of 75, equivalent to e-value < 10^-20 18 | 19 | Dependencies: 20 | Pandas - http://pandas.pydata.org/pandas-docs/stable/install.html 21 | Seaborn - http://seaborn.pydata.org/installing.html 22 | matplotlib - http://matplotlib.org/users/installing.html 23 | 24 | For extended information about HMM assignments, genes and pathways, 25 | please see accompanying document "Pfam_definitions.txt" 26 | 27 | ''' 28 | 29 | 30 | 31 | import argparse 32 | 33 | parser = argparse.ArgumentParser(description="Accepts HMM search results of expander_dbvX.hmm\ 34 | text file as input. Produces function\ 35 | list and heat map figure.") 36 | parser.add_argument('Input', help="Input HMM table file. See documentation\ 37 | for correct format") 38 | parser.add_argument('Output', help="List version of the final heat\ 39 | map figure") 40 | args = parser.parse_args() 41 | arg_dict = vars(args) 42 | 43 | genome_data = {} 44 | 45 | for line in open(str(arg_dict['Input']), "r"): 46 | if line[0] != "#": 47 | line = line.rstrip() 48 | info = line.split() 49 | genome_id = info[0].split("_")[0] 50 | #Sfams for DMSP lyase dddP and dddD require a more strigent bit score cutoff (>500) 51 | if info[3].split(".")[0] == "14591" or info[3].split(".")[0] == "25993": 52 | if float(info[5]) > 500: 53 | try: 54 | genome_data[genome_id].append(info[3].split(".")[0]) 55 | except KeyError: 56 | genome_data[genome_id] = [info[3].split(".")[0]] 57 | else: 58 | continue 59 | if info[3].split(".")[0] == "4254": 60 | if float(info[5]) > 260: 61 | try: 62 | genome_data[genome_id].append(info[3].split(".")[0]) 63 | except KeyError: 64 | genome_data[genome_id] = [info[3].split(".")[0]] 65 | else: 66 | continue 67 | #Sfams for amphibactin biosynthesis requires a more stringent bit score cutoff (>1000) 68 | if info[3].split(".")[0] == "1544" or info[3].split(".")[0] == "27549": 69 | if float(info[5]) > 1000: 70 | try: 71 | genome_data[genome_id].append(info[3].split(".")[0]) 72 | except KeyError: 73 | genome_data[genome_id] = [info[3].split(".")[0]] 74 | else: 75 | continue 76 | #Sfams for ferrioxamine biosynthesis requires a more stringent bit score cutoff (>200) 77 | if info[3].split(".")[0] == "2219" or info[3].split(".")[0] == "2732" or info[3].split(".")[0] == "9429" or info[3].split(".")[0] == "51934": 78 | if float(info[5]) > 200: 79 | try: 80 | genome_data[genome_id].append(info[3].split(".")[0]) 81 | except KeyError: 82 | genome_data[genome_id] = [info[3].split(".")[0]] 83 | else: 84 | continue 85 | else: 86 | try: 87 | genome_data[genome_id].append(info[3].split(".")[0]) 88 | except KeyError: 89 | genome_data[genome_id] = [info[3].split(".")[0]] 90 | 91 | 92 | def rhodopsin(hmm_match): 93 | out_data = {'beta-carotene 15,15-monooxygenase': 0, 'rhodopsin': 0} 94 | if 'PF01036' in hmm_match: 95 | out_data['rhodopsin'] = 1 96 | if 'TIGR03753' in hmm_match: 97 | out_data['beta-carotene 15,15-monooxygenase'] = 1 98 | return out_data 99 | 100 | def peptidases(hmm_match): 101 | out_data = {'Peptidase family C25': 0, 'Bacterial pre-peptidase C-terminal domain': 0, 102 | 'Clostripain family': 0, 'Peptidase family M28': 0, 'Peptidase family M50': 0, 103 | 'Di- and tripeptidases': 0, 'Leucyl aminopeptidase': 0, 'Xaa-Pro aminopeptidase': 0, 104 | 'Peptidase propeptide and YPEB domain': 0, 'Oligoendopeptidase F': 0, 105 | 'Phosphoserine aminotransferase': 0, 'Lipoprotein signal peptidase': 0, 106 | 'Aminopeptidase N': 0, 'Zinc carboxypeptidase': 0, 'Peptidase S24-like': 0, 107 | 'Peptidase S26': 0, 'D-aminopeptidase': 0, 'M61 glycyl aminopeptidase': 0} 108 | if 'PF01364' in hmm_match: 109 | out_data['Peptidase family C25'] = 1 110 | if 'PF04151' in hmm_match: 111 | out_data['Bacterial pre-peptidase C-terminal domain'] = 1 112 | if 'PF03415' in hmm_match: 113 | out_data['Clostripain family'] = 1 114 | if 'PF04389' in hmm_match: 115 | out_data['Peptidase family M28'] = 1 116 | if 'PF02163' in hmm_match: 117 | out_data['Peptidase family M50'] = 1 118 | if 'PF01546' in hmm_match: 119 | out_data['Di- and tripeptidases'] = 1 120 | if 'PF02073' in hmm_match: 121 | out_data['Leucyl aminopeptidase'] = 1 122 | if 'PF00557' in hmm_match: 123 | out_data['Xaa-Pro aminopeptidase'] = 1 124 | if 'PF03413' in hmm_match: 125 | out_data['Peptidase propeptide and YPEB domain'] = 1 126 | if 'PF01432' in hmm_match: 127 | out_data['Oligoendopeptidase F'] = 1 128 | if 'PF00266' in hmm_match: 129 | out_data['Phosphoserine aminotransferase'] = 1 130 | if 'PF01252' in hmm_match: 131 | out_data['Lipoprotein signal peptidase'] = 1 132 | if 'PF01433' in hmm_match: 133 | out_data['Aminopeptidase N'] = 1 134 | if 'PF00246' in hmm_match: 135 | out_data['Zinc carboxypeptidase'] = 1 136 | if 'PF00717' in hmm_match: 137 | out_data['Peptidase S24-like'] = 1 138 | if 'PF10502' in hmm_match: 139 | out_data['Peptidase S26'] = 1 140 | if 'PF04951' in hmm_match: 141 | out_data['D-aminopeptidase'] = 1 142 | if 'PF05299' in hmm_match: 143 | out_data['M61 glycyl aminopeptidase'] = 1 144 | return out_data 145 | 146 | def alt_nitrogenase(hmm_match): 147 | out_data = {'Vanadium-only nitrogenase': 0, 'Iron-only nitrogenase': 0} 148 | v_nitro = ['TIGR01860', 'TIGR02932', 'TIGR02930'] 149 | for i in v_nitro: 150 | if i in hmm_match: 151 | out_data['Vanadium-only nitrogenase'] += 0.33 152 | fe_nitro = ['TIGR01861', 'TIGR02931', 'TIGR02929'] 153 | for i in fe_nitro: 154 | if i in hmm_match: 155 | out_data['Iron-only nitrogenase'] += 0.33 156 | return out_data 157 | 158 | def amm_trans(hmm_match): 159 | out_data = {'transporter: ammonia': 0} 160 | if 'PF00909' in hmm_match: 161 | out_data['transporter: ammonia'] = 1 162 | return out_data 163 | 164 | def dmsplyase(hmm_match): 165 | out_data = {'DMSP lyase (dddLQPDKW)': 0} 166 | dmsp = ['PF16867', '14591', '25993', '94923', '274874'] 167 | for i in dmsp: 168 | if i in hmm_match: 169 | out_data['DMSP lyase (dddLQPDKW)'] = 1 170 | return out_data 171 | 172 | def dmspsynthase(hmm_match): 173 | out_data = {'DMSP synthase (dsyB)' : 0} 174 | if '4254' in hmm_match: 175 | out_data['DMSP synthase (dsyB)'] = 1 176 | return out_data 177 | 178 | def amphibactin(hmm_match): 179 | out_data = {'amphibactin ACO2092-3homolog':0} 180 | if ('1544' in hmm_match) and ('27549' in hmm_match): 181 | out_data['amphibactin ACO2092-3homolog'] = 1 182 | return out_data 183 | 184 | def ferrioxamine(hmm_match): 185 | out_data = {'ferrioxamine biosynthesis':0} 186 | ferrioxamine = ["2219", "2732", "9429", "51934"] 187 | for i in ferrioxamine: 188 | if i in hmm_match: 189 | out_data['ferrioxamine biosynthesis'] += 0.25 190 | return out_data 191 | 192 | function_order = ['beta-carotene 15,15-monooxygenase', 'rhodopsin', 'Peptidase family C25', 193 | 'Bacterial pre-peptidase C-terminal domain', 'Clostripain family', 194 | 'Peptidase family M28', 'Peptidase family M50', 'Di- and tripeptidases', 195 | 'Leucyl aminopeptidase', 'Xaa-Pro aminopeptidase', 196 | 'Peptidase propeptide and YPEB domain', 'Oligoendopeptidase F', 197 | 'Phosphoserine aminotransferase', 'Lipoprotein signal peptidase', 198 | 'Aminopeptidase N', 'Zinc carboxypeptidase', 'Peptidase S24-like', 'Peptidase S26', 199 | 'D-aminopeptidase', 'M61 glycyl aminopeptidase', 'Vanadium-only nitrogenase', 200 | 'Iron-only nitrogenase', 'transporter: ammonia', 201 | 'DMSP lyase (dddLQPDKW)', 'DMSP synthase (dsyB)', 'amphibactin ACO2092-3homolog', 202 | 'ferrioxamine biosynthesis'] 203 | 204 | filehandle = str(arg_dict['Output']) 205 | out_file = open(filehandle, "w") 206 | out_file.write('Function'+"\t"+str("\t".join(function_order))+"\n") 207 | 208 | for k in genome_data: 209 | pathway_data = {} 210 | pathway_data.update(rhodopsin(genome_data[k])) 211 | pathway_data.update(peptidases(genome_data[k])) 212 | pathway_data.update(alt_nitrogenase(genome_data[k])) 213 | pathway_data.update(amm_trans(genome_data[k])) 214 | pathway_data.update(dmsplyase(genome_data[k])) 215 | pathway_data.update(dmspsynthase(genome_data[k])) 216 | pathway_data.update(amphibactin(genome_data[k])) 217 | pathway_data.update(ferrioxamine(genome_data[k])) 218 | 219 | out_string = str(k)+"\t" 220 | out_list = [k] 221 | for i in function_order: 222 | out_list.append(pathway_data[i]) 223 | out_string = str(out_list).strip('[]') 224 | tab_string = "" 225 | for l in out_string: 226 | if l == "\'": 227 | continue 228 | if l == ",": 229 | tab_string = tab_string + "\t" 230 | else: 231 | tab_string = tab_string + l 232 | out_file.write(tab_string+"\n") 233 | 234 | out_file.close() 235 | 236 | import matplotlib.pyplot as plt 237 | 238 | import pandas as pd 239 | 240 | file_in = open(filehandle, "r") 241 | genome = pd.read_table(file_in, index_col=0) 242 | import seaborn as sns 243 | sns.set(font_scale=1.2) 244 | sns.set_style({"savefig.dpi": 200}) 245 | ax = sns.heatmap(genome, cmap=plt.cm.YlOrRd, linewidths=2, linecolor='k', square=True, xticklabels=True, yticklabels=True) 246 | ax.xaxis.tick_top() 247 | #ax.set_yticklabels(ax.get_yticklabels(), rotation=90) 248 | plt.xticks(rotation=90) 249 | plt.yticks(rotation=0) 250 | # get figure (usually obtained via "fig,ax=plt.subplots()" with matplotlib) 251 | fig = ax.get_figure() 252 | # specify dimensions and save 253 | fig.set_size_inches(100, 100) 254 | fig.savefig("hmm_heatmap.svg") -------------------------------------------------------------------------------- /KEGGDecoder/PreviousKEGGDecoderVersions/KOALA_definitionsV0.5.txt: -------------------------------------------------------------------------------- 1 | V.0.5 2 | V.0.5 Adds functions for sulfolipid biosynthesis (key gene sqdB) and C-P lyase 3 | V.0.3. Adds retinal biosynthesis, sulfite dehydrogenase (quinone), hydrazine dehydrogenase, hydrazine synthase, DMSP/DMS/DMSO cycling, cobalamin biosynthesis, competence-related DNA transport, anaplerotic reactions 4 | Nitrogen metabolism 5 | Dissimilatory nitrate reduction requires narGH K00370 + K00371 AND/OR napAB K02567 + K02568 6 | Nitrite oxidation requires nxrAB K00370 + K00371 7 | DNRA requires nirBD (K00362 + K00363) and/or nrfAH K03385 + K15876 8 | Denitrification step of nitrite reduction NO2 > NO requires nirK K00368 OR nirS K15864 9 | Denitrification step of nitric oxide reduction NO > N2O requires norBC K04561 + K02305 10 | Denitrification step of nitrous-oxide reduction N2O > N2 requires nosZ K00376 11 | Nitrogen fixation requires the nifKDH (K02586 + K02591 + K02588) 12 | Nitrification by bacteria hydroxylamine to nitrite NH2OH > NO2 requires hao K10535 13 | Nitrification ammonium to hydroxylamine NH4 > N2OH for detection, amoA (K10944) is essential and amoBC (K10945 + K10946) supports functional potential 14 | Anammox requires hydroxylamine oxidoreductase (K10535) and nirK or nirS (K00368 or K15864) 15 | hydrazine dehydrogenase K20935 16 | hydrazine synthase (K20932 + K20933 + K20934) 17 | 18 | 19 | glycolysis as a pathway measured as a fraction of the following KOs: 20 | phosphoglucomutase K01835 21 | glucose-6-phosphate isomerase K01810 22 | 6-phosphofructokinase K00850 OR pyrophosphate--fructose-6-phosphate 1-phosphotransferase K00895 23 | fructose-bisphosphate aldolase K01623 24 | glyceraldehyde 3-phosphate dehydrogenase K00134 OR glyceraldehyde-3-phosphate dehydrogenase (NAD(P)) K00150 25 | phosphoglycerate kinase K00927 26 | 2,3-bisphosphoglycerate-dependent phosphoglycerate mutase K01834 OR 2,3-bisphosphoglycerate-independent phosphoglycerate mutase K15633 27 | enolase K01689 28 | pyruvate kinase K00873 OR pyruvate, orthophosphate dikinase K01006 29 | 30 | gluconeogenesis requires fructose-1,6-bisphosphatase K03841 to reverse reaction, also requires all other parts of glycolysis EXCEPT for K00850/K00895 31 | 32 | TCA cycle as measured as a fraction of the following KOs: 33 | aconitate hydratase - 2 subunits K01681 & K01682 34 | isocitrate dehydrogenase K00031 OR isocitrate dehydrogenase (NAD+) K00030 OR isocitrate--homoisocitrate dehydrogenase K17753 35 | 2-oxoglutarate/2-oxoacid ferredoxin oxidoreductase korAB essential K00174 + K00175 ALSO check for korCD K00177 & K00176 36 | succinyl-CoA synthetase K01899 + K01900 OR succinyl-CoA synthetase K01902 + K01903 OR succinyl-CoA:acetate CoA-transferase K18118 37 | fumarate reductase (K00244 + K00245 + K00246 + K00247) OR succinate dehydrogenase / fumarate reductase (K00239 + K00240 + K00241 + K00242) OR succinate dehydrogenase (ubiquinone) (K00234 + K00235 + K00236 + K00237) 38 | fumarate hydratase (K01677 + K01678 + K01679) 39 | malate dehydrogenase (quinone) K00116 OR K00025 OR K00026 OR K00024 40 | citrate synthase K01647 41 | 42 | CBB cycle measured as a fraction of the following KOs: 43 | FIRST check and report presence of RuBisCo - ribulose-bisphosphate carboxylase (Type 1) two subunits K01601 + K01602 44 | Then check for 3 essential steps in canonical cycle: 45 | phosphoglycerate kinase K00927 46 | glyceraldehyde 3-phosphate dehydrogenase K00134 OR glyceraldehyde-3-phosphate dehydrogenase (NADP+) K05298 OR glyceraldehyde-3-phosphate dehydrogenase (NAD(P)) K00150 47 | phosphoribulokinase K00855 48 | Then check for ribulose regeneration (several alternative pathways): 49 | Option 1 - ribulose-phosphate 3-epimerase K01783 AND xylulose-5-phosphate/fructose-6-phosphate phosphoketolase K01621 50 | Option 2 - transketolase K00615 AND ribulose-phosphate 3-epimerase K01783 51 | Option 3 - transketolase K00615 AND ribose 5-phosphate isomerase A K01807 52 | Option 4 - fructose-bisphosphate aldolase, class I K01623 OR fructose-bisphosphate aldolase, class II K01624 OR fructose-bisphosphate aldolase, class I K11645 53 | transketolase K00615 54 | Some combination of these genes: 55 | fructose-1,6-bisphosphatase II / sedoheptulose-1,7-bisphosphatase K11532 56 | fructose-1,6-bisphosphatase I K03841 57 | fructose-1,6-bisphosphatase II K02446 58 | 59 | Reverse TCA cycle display completeness of TCA cycle, check for essential gene components: 60 | Requires 61 | Option 1 - ATP-citrate lyase K15230 + K15231 62 | Option 2 - citryl-CoA synthetase K15232 + K152333 AND citryl-CoA lyase K15234 63 | 64 | Wood-Ljungdahl pathway requires some essential genes and pathway fraction: 65 | Essential: 66 | Option 1 (when fused) - acetyl-CoA decarbonylase/synthase complex subunit alpha K00192 AND carbon-monoxide dehydrogenase K00198 + K00196 OR carbon-monoxide dehydrogenase K03520 + K03518 + K03519 67 | Option 2 - CO-methylating acetyl-CoA synthase K14138 AND carbon-monoxide dehydrogenase K00198 + K03518 + K03519 + K03520 68 | Methyl branch: 69 | formate dehydrogenase K05299 + K15022 70 | formate--tetrahydrofolate ligase K01938 71 | methylenetetrahydrofolate dehydrogenase (NADP+) / methenyltetrahydrofolate cyclohydrolase K01491 72 | methylenetetrahydrofolate reductase (NADPH) K00297 73 | 74 | 3-hydroxypropionate pathway measured as a fraction of the following KOs: 75 | Two separate pathways cycling from acetyl-CoA 76 | Pathway 1: 77 | pyruvate ferredoxin oxidoreductase K00169 + K00170 + K00171 + K00172 + K03737 78 | pyruvate, orthophosphate dikinase K01006 OR pyruvate, water dikinase K01007 79 | phosphoenolpyruvate carboxylase K01595 80 | malate dehydrogenase K00024 81 | succinyl-CoA:(S)-malate CoA-transferase K14471 + K14472 82 | malyl-CoA/(S)-citramalyl-CoA lyase K08691 83 | Pathway 2: 84 | acetyl-CoA carboxylase, biotin carboxylase K02160 + K01961 + K01962 + K01963 + K01964 + K15037 + K15036 + K18603 + K18604 + K18605 85 | malonyl-CoA reductase / 3-hydroxypropionate dehydrogenase (NADP+) K14468 + K15017 86 | 3-hydroxypropionate dehydrogenase (NADP+) K15039 87 | acrylyl-CoA reductase (NADPH) / 3-hydroxypropionyl-CoA dehydratase / 3-hydroxypropionyl-CoA synthetase K14469 + K15018 88 | 3-hydroxypropionyl-coenzyme A dehydratase K15019 89 | acryloyl-coenzyme A reductase K15020 90 | malyl-CoA/(S)-citramalyl-CoA lyase K08691 91 | 2-methylfumaryl-CoA hydratase K14449 92 | 2-methylfumaryl-CoA isomerase K14470 93 | 3-methylfumaryl-CoA hydratase K09709 94 | malyl-CoA/(S)-citramalyl-CoA lyase K08691 95 | 96 | 4-hydroxybutyrate/3-hydroxypropionate cycle measures as a fraction of the following KOs: 97 | Reference from Thaumarchaea -- incomplete in KOs 98 | acetyl-CoA carboxylase, biotin carboxylase K02160 + K01961 + K01962 + K01963 + K01964 + K15037 + K15036 + K18603 + K18604 + K18605 99 | malonic semialdehyde reductase K18602 100 | 3-hydroxypropionyl-CoA synthetase (ADP-forming) K18594 101 | acrylyl-CoA reductase (NADPH) / 3-hydroxypropionyl-CoA dehydratase / 3-hydroxypropionyl-CoA synthetase K14469 + K15019 102 | propionyl-CoA carboxylase K15052 + K01964 + K15037 + K15036 + K18603 + K18604 + K18605 103 | methylmalonyl-CoA/ethylmalonyl-CoA epimerase K05606 104 | methylmalonyl-CoA mutase K01847 + K01848 + K01849 105 | 4-hydroxybutyryl-CoA synthetase (ADP-forming) K18593 106 | 4-hydroxybutyryl-CoA dehydratase / vinylacetyl-CoA-Delta-isomerase K14534 107 | enoyl-CoA hydratase / 3-hydroxyacyl-CoA dehydrogenase K15016 108 | acetyl-CoA C-acetyltransferase K00626 109 | 110 | Carbon degradation - Each enzyme taken as a single entity: 111 | beta-glucosidase K05350 112 | beta-glucosidase K05349 113 | cellulase K01225 114 | cellulase K19668 115 | chitinase K01183 116 | bifunctional chitinase/lysozyme K13381 117 | basic endochitinase B K20547 118 | diacetylchitobiose deacetylase K03478 OR K18454 119 | beta-N-acetylhexosaminidase K01207 120 | pectinesterase K01051 121 | exo-poly-alpha-galacturonosidase K01213 122 | oligogalacturonide lyase K01730 123 | exopolygalacturonase K01184 124 | D-galacturonate isomerase K01812 125 | D-galacturonate epimerase K08679 126 | alpha-amylase K01176 127 | glucoamylase K01178 128 | pullulanase K01200 129 | 130 | Chemotaxis (as determined by Tara) KO fraction above a certain threshold: 131 | K13924 cheBR; two-component system, chemotaxis family, CheB/CheR fusion protein [EC:3.1.1.61 2.1.1.80] 132 | K00575 cheR; chemotaxis protein methyltransferase CheR [EC:2.1.1.80] 133 | K03413 cheY; two-component system, chemotaxis family, response regulator CheY 134 | K03412 cheB; two-component system, chemotaxis family, response regulator CheB [EC:3.1.1.61] 135 | K03406 mcp; methyl-accepting chemotaxis protein 136 | K03407 cheA; two-component system, chemotaxis family, sensor kinase CheA [EC:2.7.13.3] 137 | K03415 cheV; two-component system, chemotaxis family, response regulator CheV 138 | K03408 cheW; purine-binding chemotaxis protein CheW 139 | 140 | Flagellum biosynthesis (as determined by Tara) KO fraction above a certain threshold: 141 | K02409 flagellar M-ring protein FliF 142 | K02401 flagellar biosynthetic protein FlhB 143 | K02394 flagellar P-ring protein precursor FlgI 144 | K02397 flagellar hook-associated protein 3 FlgL 145 | K02396 flagellar hook-associated protein 1 FlgK 146 | K02391 flagellar basal-body rod protein FlgF 147 | K02390 flagellar hook protein FlgE 148 | K02393 flagellar L-ring protein precursor FlgH 149 | K02392 flagellar basal-body rod protein FlgG 150 | K02386 flagella basal body P-ring formation protein FlgA 151 | K02557 chemotaxis protein MotB 152 | K02556 chemotaxis protein MotA 153 | K02400 flagellar biosynthesis protein FlhA 154 | K02418 flagellar protein FliO/FliZ 155 | K02389 flagellar basal-body rod modification protein FlgD 156 | K02412 flagellum-specific ATP synthase [EC:3.6.3.14] 157 | K02387 flagellar basal-body rod protein FlgB 158 | K02410 flagellar motor switch protein FliG 159 | K02411 flagellar assembly protein FliH 160 | K02416 flagellar motor switch protein FliM 161 | K02417 flagellar motor switch protein FliN/FliY 162 | K02407 flagellar hook-associated protein 2 163 | K02406 flagellin 164 | 165 | Sulfur metabolism 166 | Assimilation 167 | K00392 sir; sulfite reductase (ferredoxin) [EC:1.8.7.1] 168 | K00380 + K00381 cysJ; sulfite reductase (NADPH) flavoprotein alpha-component [EC:1.8.1.2] + cysI; sulfite reductase (NADPH) hemoprotein beta-component [EC:1.8.1.2] 169 | Reversible Dissimilatory 170 | K00958 sat; sulfate adenylyltransferase 171 | K00395 + K00394 aprB; adenylylsulfate reductase, subunit B [EC:1.8.99.2] + aprA; adenylylsulfate reductase, subunit A [EC:1.8.99.2] 172 | K11180 + K11181 dsrA; sulfite reductase, dissimilatory-type alpha subunit [EC:1.8.99.3] + dsrB; sulfite reductase, dissimilatory-type beta subunit [EC:1.8.99.3] 173 | Thiosulfate oxidation measured as a fraction of KOs: 174 | soxA K17222 175 | soxB K17224 176 | soxC K17225 177 | soxX K17223 178 | soxY K17226 179 | soxZ K17227 180 | Alternative thiosulfate oxidation 181 | K16936 doxA; thiosulfate dehydrogenase [quinone] small subunit [EC:1.8.5.2] 182 | K16937 doxD; thiosulfate dehydrogenase [quinone] large subunit [EC:1.8.5.2] 183 | K19713 tsdA; thiosulfate dehydrogenase [EC:1.8.2.2] 184 | Sulfur reduction (reversible reaction) 185 | K17219 sreA; sulfur reductase molybdopterin subunit 186 | K17220 sreB; sulfur reductase FeS subunit 187 | K17221 sreC; sulfur reductase membrane anchor 188 | K08352 psrA; thiosulfate reductase / polysulfide reductase chain A 189 | K08353 psrB; thiosulfate reductase electron transport protein 190 | K08354 psrC; thiosulfate reductase cytochrome b subunit 191 | K17993 hydA; sulfhydrogenase subunit alpha 192 | K17996 hydB; sulfhydrogenase subunit beta (sulfur reductase) 193 | K17995 hydG; sulfhydrogenase subunit gamma (sulfur reductase) 194 | K17994 hydD; sulfhydrogenase subunit delta 195 | Aerobic sulfur disproportionation 196 | K16952 sor; sulfur oxygenase/reductase [EC:1.13.11.55] 197 | K17725 sdo; sulfur dioxygenase 198 | K05301 sorB; sulfite dehydrogenase 199 | Sulfide oxidation 200 | K17218 sqr; sulfide:quinone oxidoreductase [EC:1.8.5.4] 201 | K17229 fccB; sulfide dehydrogenase [flavocytochrome c] flavoprotein chain 202 | Sulfite dehydrogenase (quinone) 203 | K21307 + K21308 + K21309 soeABC 204 | DMSP demethylation 205 | K17486 dmdA; dimethylsulfoniopropionate demethylase 206 | DMS dehydrogenase 207 | K16964 ddhA; dimethylsulfide dehydrogenase subunit alpha 208 | K16965 ddhB; dimethylsulfide dehydrogenase subunit beta 209 | K16966 ddhC; dimethylsulfide dehydrogenase subunit gamma 210 | DMSO reductase 211 | K07306 dmsA; anaerobic dimethyl sulfoxide reductase subunit A 212 | K07307 dmsB; anaerobic dimethyl sulfoxide reductase subunit B (DMSO reductase iron- sulfur subunit) 213 | K07308 dmsC; anaerobic dimethyl sulfoxide reductase subunit C (DMSO reductase anchor subunit) 214 | 215 | Methanogenesis 216 | Methanogenesis via methanol 217 | K14080 mtaA; [methyl-Co(III) methanol-specific corrinoid protein]:coenzyme M methyltransferase 218 | K04480 mtaB; methanol---5-hydroxybenzumidazolylcobamide Co-methyltransferase 219 | K14081 mtaC; methanol corrinoid protein 220 | Methanogenesis via dimethylamine 221 | K14082 mtbA; [methyl-Co(III) methylamine-specific corrinoid protein]:coenzyme M methyltransferase 222 | K16178 mtbB; dimethylamine---corrinoid protein Co-methyltransferase 223 | Methanogenesis via dimethylsulfide, methanethiol, methylpropanoate 224 | K16954 mtsA; methylthiol:coenzyme M methyltransferase 225 | K16955 mtsB; methylated-thiol--corrinoid protein 226 | Methanogenesis via methylamine 227 | K16176 mtmB; monomethylamine methyltransferase 228 | Methanogenesis via trimethylamine 229 | K14083 mttB; trimethylamine methyltransferase 230 | Methanogenesis via acetate 231 | K00193 cdhC; acetyl-CoA decarbonylase/synthase complex subunit beta 232 | K00194 cdhD; acetyl-CoA decarbonylase/synthase complex subunit delta 233 | K00197 cdhE; acetyl-CoA decarbonylase/synthase complex subunit gamma 234 | Methanogenesis via CO2 235 | K00200 + K00201 + K00202 + K00203 + K11261 + K00205 fmdABCDEF; formylmethanofuran dehydrogenase 236 | K00200 + K00201 + K00202 + K00203 fwdABCD; tungsten-containing formylmethanofuran dehydrogenase 237 | K00672 ftr; formylmethanofuran--tetrahydromethanopterin N-formyltransferase 238 | K01499 mch; methenyltetrahydromethanopterin cyclohydrolase 239 | K13942 hmd; 5,10-methenyltetrahydromethanopterin hydrogenase 240 | K00320 mer; 5,10-methylenetetrahydromethanopterin reductase 241 | K00577 + K00578 + K00579 + K00580 + K00581 + K00582 + K00583 + K00584 mtrABCDEFGH; tetrahydromethanopterin S-methyltransferase 242 | Coenzyme M reduction to methane 243 | K00399 + K00401 + K00402 mcrABCD; methyl-coenzyme M reductase 244 | Coenzyme B/Coenzyme M regeneration 245 | K03388 + K03389 + K03390 + K08264 + K08265 hdrABCDE; CoB-CoM heterodisulfide reductase 246 | Dimethylamine/trimethylamine dehydrogenase 247 | K00317 dmd-tmd; dimethylamine/trimethylamine dehydrogenase 248 | 249 | Methane oxidation 250 | K16157 + K16158 + K16159 + K16161 mmoXYZC; soluble methane monooxygenase 251 | 252 | Transporters 253 | K02040 + K02037 + K02038 + K02036 pstABCS; phosphate 254 | K02044 + K02042 + K02041 phnDEC; phosphonate 255 | K02064 + K02063 + K02062 tbpA,thiPQ; thiamin 256 | K06858 + K06073 + K06074 btuFCD; vitamin B12 257 | K11959 + K11960 + K11961 + K11962 + K11963 urtABCED; urea 258 | 259 | 260 | Thiamin biosynthesis 261 | Generation of 2-[(2R,5Z)-2-carboxy-4-methylthiazol-5(2H)-ylidene]ethyl phosphate 262 | K03148 thiF; sulfur carrier protein ThiS adenylyltransferase 263 | K04487 iscS; cysteine desulfurase 264 | K03150 thiH; 2-iminoacetate synthase OR K03153 thiO; glycine oxidase 265 | K03151 thiI; thiamine biosynthesis protein ThiI 266 | K01662 dxs; 1-deoxy-D-xylulose-5-phosphate synthase 267 | K03149 thiG; thiazole synthase 268 | K10810 tenI; thiazole tautomerase OR THI4 K03146; thiamine thiazole synthase 269 | Generation of 4-amino-2-methyl-5-(diphosphomethyl)pyrimidine 270 | K18278 THI5; pyrimidine precursor biosynthesis enzyme OR K03147 thiC; phosphomethylpyrimidine synthase OR K00877 THI20; hydroxymethylpyrimidine/phosphomethylpyrimidine kinase / thiaminase OR K00941 thiD; hydroxymethylpyrimidine/phosphomethylpyrimidine kinase OR K14153 thiDE; hydroxymethylpyrimidine kinase / phosphomethylpyrimidine kinase / thiamine-phosphate diphosphorylase 271 | K00877 THI20; hydroxymethylpyrimidine/phosphomethylpyrimidine kinase / thiaminase OR K00941 thiD; hydroxymethylpyrimidine/phosphomethylpyrimidine kinase 272 | Terminal biosynthesis 273 | K00788 thiE; thiamine-phosphate pyrophosphorylase OR K14153 thiDE; hydroxymethylpyrimidine kinase / phosphomethylpyrimidine kinase / thiamine-phosphate diphosphorylase OR K14154 THI6;thiamine-phosphate diphosphorylase / hydroxyethylthiazole kinase 274 | K00946 thiL; thiamine-monophosphate kinase 275 | 276 | Riboflavin biosynthesis 277 | K02858 ribB; 3,4-dihydroxy 2-butanone 4-phosphate synthase OR K14652 ribAB; 3,4-dihydroxy 2-butanone 4-phosphate synthase / GTP cyclohydrolase II 278 | K00082 ribD2; 5-amino-6-(5-phosphoribosylamino)uracil reductase OR K11752 ribD; diaminohydroxyphosphoribosylaminopyrimidine deaminase / 5-amino-6-(5-phosphoribosylamino)uracil reductase 279 | K00794 ribH; 6,7-dimethyl-8-ribityllumazine synthase 280 | K00793 ribE; riboflavin synthase 281 | Conversion to FMN and FAD (not included in script) 282 | K00861 RFK; riboflavin kinase OR K20884 FHY; riboflavin kinase / FMN hydrolase OR K11753 ribF; riboflavin kinase / FMN adenylyltransferase 283 | K14656 ribL; FAD synthetase OR K00953 FLAD1; FAD synthetase 284 | 285 | Cobalamin biosynthesis 286 | K00798 pduO; cob(I)alamin adenosyltransferase OR K19221 cobA; cob(I)alamin adenosyltransferase 287 | K02232 cobQ; adenosylcobyric acid synthase 288 | K02225 cobC; cobalamin biosynthetic protein CobC 289 | K02227 cobD; adenosylcobinamide-phosphate synthase 290 | K02231 cobU; adenosylcobinamide kinase / adenosylcobinamide-phosphate guanylyltransferase 291 | If cobU is present, this function in the biosynthesis process has already been counted: 292 | K19712 cobY; adenosylcobinamide-phosphate guanylyltransferase 293 | K02233 cobV; adenosylcobinamide-GDP ribazoletransferase 294 | K02226 cobC; alpha-ribazole phosphatase 295 | K00768 cobT; nicotinate-nucleotide--dimethylbenzimidazole phosphoribosyltransferase 296 | 297 | Oxidative phosphorylation (Nuo, ATPases) 298 | F-type ATPase 299 | K02111 + K02112 + K02115 + K02113 + K02114 + K02108 + K02109 + K02110 atpFBCHGDAE 300 | V-type ATPase 301 | K02117 + K02118 + K02119 + K02120 + K02121 + K02122 + K02107 + K02123 + K02124 ntpABCDEFIK,ahaH 302 | NADH-quinone oxidoreductase 303 | K00330 + K00331 + K00332 + K00333 + K00334 + K00335 + K00336 + K00337 + K00338 + K00339 + K00340 + K00341 + K00342 + K00343 nuoABCDEFGHIJKLMN 304 | NAD(P)H-quinone oxidoreductase 305 | K05574 + K05582 + K05581 + K05579 + K05572 + K05580 + K05578 + K05576 + K05577 + K05575 + K05573 + K05583 + K05584 + K05585 ndcABCDEFGHIJKLMN 306 | Cytochrome c oxidase, cbb3-type 307 | K00404 + K00405 + K00407 + K00406 ccoPQNO 308 | Cytochrome bd complex 309 | K00425 + K00426 cydAB 310 | cytochrome o ubiquinol oxidase 311 | K02300 + K02299 + K02298 + K02297 cyoABCD 312 | cytochrome c oxidase 313 | K02277 + K02276 + K02274 + K02275 coxABCD 314 | cytochrome aa3-600 menaquinol oxidase 315 | K02829 + K02828 + K02827 + K02826 qoxABCD 316 | ubiquinol-cytochrome c reductase 317 | K00411 petA; ubiquinol-cytochrome c reductase iron-sulfur subunit 318 | K00410 fbcH; ubiquinol-cytochrome c reductase cytochrome b/c1 subunit 319 | 320 | Hydrogen redox 321 | K00437 + K18008 hydB2,hydA2; NiFe hydrogenase 322 | K18016 + K18017 + K18023 mbhLJK; membrane-bound hydrogenase 323 | K00533 + K00534 hupSL; ferrodoxin hydrogenase 324 | K05922 + K05927 hydA3,hydB3; hydrogen:quinone oxidoreductase 325 | K00436 + K18005 + K18006 + K18007 hoxHFUY; NAD-reducing hydrogenase 326 | K17992 + K18330 + K18331 + K18332 hndABCD; NADP-reducing hydrogenase 327 | K06282 + K06281 + K03620 hyaABC; NiFe hydrogenase Hyd-1 328 | 329 | Photosynthesis 330 | Photosystem II (major reaction center proteins - 21 additional proteins possible as part of structure) 331 | K02703 + K02706 + K02705 + K02704 + K02707 + K02708 332 | Photosystem I 333 | K02689 + K02690 + K026891 + K02692 + K02693 + K02694 + K02696 + K02697 + K02698 + K02699 + K02700 + K08905 + K02695 + K02701 + K14332 + K02702 334 | Cytochrome b6/f complex 335 | K02635 + K02637 + K02634 + K02636 + K02642 + K02643 + K03689 + K02640 336 | Anoxygenic photosynthesis 337 | Anoxygenic reaction center II (pufML) 338 | K08929 + K08928 339 | Anoxygenic reaction center I (pscABCD) 340 | K08940 + K08941 + K08942 + K08943 341 | Retinal biosynthesis (necessary for proteorhodopsin) 342 | K06443 crtY; lycopene beta-cyclase 343 | K02291 crtB; 15-cis-phytoene/all-trans-phytoene synthase 344 | K10027 crtI; phytoene desaturase 345 | K13789 crtE; geranylgeranyl diphosphate synthase, type II 346 | 347 | Entner-Doudoroff Pathway (to generate pyruvate, alt to glycolysis) 348 | K00036 G6PD; glucose-6-phosphate 1-dehydrogenase 349 | K01057 PGLS; 6-phosphogluconolactonase OR K07404 pgl; 6-phosphogluconolactonase 350 | Alternative to previous steps: K13937 H6PD; hexose-6-phosphate dehydrogenase 351 | K01690 edd; phosphogluconate dehydratase 352 | K01625 eda; 2-dehydro-3-deoxyphosphogluconate aldolase OR K17463 dgaF; 2-dehydro-3-deoxy-phosphogluconate aldolase OR K11395 kdpgA; 2-dehydro-3-deoxy-phosphogluconate/2-dehydro-3-deoxy-6-phosphogalactonate aldolase 353 | 354 | Mixed Acid Fermentation (conversion of pyruvate to fermentation products) 355 | Pyruvate to Lactate 356 | K00016 LDH; L-lactate dehydrogenase 357 | Pyruvate to Formate + Acetyl-CoA 358 | K00656 pf1D; formate C-acetyltransferase 359 | Formate to CO2 & H2 360 | (K00122 + K00125 + K00126 + K00123 + K00124 + K00127 formate dehydrogenase) 361 | To Acetate 362 | Direct from pyruvate 363 | K00156 poxB; pyruvate dehydrogenase (quinone) 364 | Direct from pyruvate via Acetyl-P 365 | K00158 poxL; pyruvate oxidase + K01512 acyP; acylphosphatase 366 | Direct from acetyl-CoA 367 | K01067 ACH1; acetyl-CoA hydrolase 368 | From acetyl-CoA via Acetyl-P 369 | (K13788 pta; phosphate acetyltransferase OR K04020 eutD; phosphotransacetylase) + K01512 acyP; acylphosphatase 370 | From lactate 371 | K00467 lactate 2-monooxygenase 372 | To Ethanol 373 | Step 1 to Acetaldehyde from Acetate 374 | (K00128 + K14085 + K00149 aldehyde dehydrogenase (NAD+)) OR K00129 aldehyde dehydrogenase (NAD(P)+) OR K00138 aldB; aldehyde dehydrogenase 375 | Step 1 to Acetaldehyde from Acetyl-CoA (reversible) 376 | K00132 + K04072 + K04073 + K18366 + K04021 acetaldehyde dehydrogenase (acetylating) 377 | Step 2 to Ethanol from Acetaldehyde 378 | (K13951 OR K13980 OR K13952 OR K13953 OR K13954 OR K00001 OR K00121 OR K04072 OR K18857 alcohol dehydrogenase) OR (K14028 + K14029 methanol dehydrogenase (cytochrome c)) OR K00114 exaA; alcohol dehydrogenase (cytochrome c) OR K00002 adh; alcohol dehydrogenase (NADP+) OR K04022 eutG; alcohol dehydrogenase 379 | PEP to Succinate via OAA, malate & fumarate 380 | Step 1 381 | K01596 pckA; phosphoenolpyruvate carboxykinase (GTP) OR K20370 PEPCK; phosphoenolpyruvate carboxykinase (diphosphate) OR K01610 pckA; phosphoenolpyruvate carboxykinase (ATP) 382 | Step 2 383 | (K00025 + K00026 + K00024 malate dehydrogenase) OR K00051 malate dehydrogenase (NADP+) OR K00116 mqo; malate dehydrogenase (quinone) 384 | Step 3 385 | (K01676 + K01677 + K01678 + K01679 fumarate hydratase, class I) 386 | Step 4 387 | (K00244 + K00245 + K00246 + K00247 fumarate reductase flavoprotein) 388 | Naphthalene degradation to salicylate 389 | K14579 + K14580 + K14578 + K14581 nahAabcd; naphthalene 1,2-dioxygenase (4 subunits) 390 | K14582 nahB; cis-1,2-dihydro-1,2-dihydroxynaphthalene/dibenzothiophene dihydrodiol dehydrogenase 391 | K14583 nahC; 1,2-dihydroxynaphthalene dioxygenase 392 | K14584 nahD; 2-hydroxychromene-2-carboxylate isomerase 393 | K14585 nahE; trans-o-hydroxybenzylidenepyruvate hydratase-aldolase 394 | K00152 nahF; salicylaldehyde dehydrogenase 395 | 396 | Biofilm formation 397 | K11935 + K11931 + K11936 + K11937 pgaABCD; biofilm PGA synthesis protein 398 | K13654 mcbR; GntR family transcriptional regulator, colanic acid and biofilm gene transcriptional regulator 399 | K12148 bssS; biofilm regulator BssS 400 | K13650 mcbA; MqsR-controlled colanic acid and biofilm protein A 401 | K04335 + K04334 + K04336 csgABC; curli fimbriae biosynthesis 402 | K12687 flu; antigen 43 403 | 404 | Competence-related DNA transporter 405 | Core components 406 | K02237 comEA; competence protein ComEA 407 | K01493 comEB; dCMP deaminase [EC:3.5.4.12] 408 | K02238 comEC; competence protein ComEC 409 | K02239 comER; competence protein ComER 410 | K02240 comFA; competence protein ComFA 411 | K02241 comFB; competence protein ComFB 412 | K02242 comFC; competence protein ComFC 413 | K02243 comGA; competence protein ComGA 414 | K02244 comGB; competence protein ComGB 415 | K02245 comGC; competence protein ComGC 416 | K02246 comGD; competence protein ComGD 417 | K02247 comGE; competence protein ComGE 418 | K02248 comGF; competence protein ComGF 419 | K02249 comGG; competence protein ComGG 420 | Related components 421 | K02250 comK; competence protein ComK 422 | K02251 comQ; competence protein ComQ 423 | K02252 comS; competence protein ComS 424 | K02253 comX; competence protein ComX 425 | K02254 comZ; competence protein ComZ 426 | Additional competence factors 427 | K12292 comA; ATP-binding cassette, subfamily C, bacterial, competence factor transporting protein [EC:3.4.22.-] 428 | K07680 comP; two-component system, NarL family, sensor histidine kinase ComP [EC:2.7.13.3] 429 | K12293 comB; competence factor transport accessory protein ComB 430 | K12415 comC; competence-stimulating peptide 431 | K12294 comD; two-component system, AgrA family, sensor histidine kinase ComD [EC:2.7.13.3] 432 | K12295 comE; two-component system, AgrA family, response regulator ComE 433 | K12296 comX1_2; competence protein ComX 434 | 435 | Anaplerotic Reactions 436 | Glyoxylate shunt 437 | Requires the presence of both isocitrate lyase and malate synthase 438 | K01637 aceA; isocitrate lyase 439 | K01638 aceB; malate synthase 440 | Anaplerotic enzymes 441 | All reactions are reversible. Organisms can have 0-4 of the pathways 442 | K00029 maeB; malate dehydrogenase (oxaloacetate-decarboxylating) (NADP+) or 'malic' enzyme 443 | K01958 pyruvate carboxylase OR K01959 pycA + K01960 pycB; pyruvate carboxylase subunits A + B 444 | K01595 ppc; phosphoenolpyruvate carboxylase 445 | K01610 pckA; phosphoenolpyruvate carboxykinase (ATP) OR K01596 pckA; phosphoenolpyruvate carboxykinase (GTP) OR K20370 PEPCK; phosphoenolpyruvate carboxykinase (diphosphate) 446 | 447 | Sulfolipid biosynthesis 448 | Two components SQD1/sqdB and SQD2 (SQD2 may not be present in bacteria/archaea but belongs to the CAZy GT4 family) 449 | K06118 sqdB; UDP-sulfoquinovose synthase 450 | K06119 SQD2; sulfoquinovosyltransferase 451 | 452 | C-P lyase 453 | There is key enzyme that performs the cleavage of methane from the C-bond PhnJ, but the entire pathway requires an operon that leads to synthesis of a ribosyl phosphonate 454 | The transporter for phosphonate phnCED is represented in the transporter subcategory 455 | K02043 phnF; GntR family transcriptional regulator, phosphonate transport system regulatory protein 456 | K06166 phnG; alpha-D-ribose 1-methylphosphonate 5-triphosphate synthase subunit 457 | K06164 phnH; alpha-D-ribose 1-methylphosphonate 5-triphosphate synthase subunit 458 | K06164 phnI; alpha-D-ribose 1-methylphosphonate 5-triphosphate synthase subunit 459 | K06163 phnJ; alpha-D-ribose 1-methylphosphonate 5-phosphate C-P lyase 460 | K05781 phnK; putative phosphonate transport system ATP-binding protein 461 | K05780 phnL; alpha-D-ribose 1-methylphosphonate 5-triphosphate synthase subunit 462 | K06162 phnM; alpha-D-ribose 1-methylphosphonate 5-triphosphate diphosphatase 463 | k05775 phnN; ribose 1,5-bisphosphokinase 464 | k09994 phnO; aminoalkylphosphonate N-acetyltransferase 465 | K06167 phnP; phosphoribosyl 1,2-cyclic phosphate phosphodiesterase -------------------------------------------------------------------------------- /KEGGDecoder/README.md: -------------------------------------------------------------------------------- 1 | KEGG-Decoder 2 | ================================================================ 3 | ### Description ### 4 | Designed to parse through a KEGG-Koala outputs (including blastKOALA, ghostKOALA, KOFAMSCAN) to determine the completeness of various metabolic pathways. 5 | 6 | * This module was constructed using manually curated "canonical" pathways described as part of KEGG Pathway Maps. For information regarding which KOs are used to predict a metabolic pathway see the KOALA_definitions.txt 7 | 8 | * if you are interested in certain pathway and the genes are listed in KEGG it is possible to add it to file (with some Python scripting) 9 | 10 | ### KEGG-Decoder Demonstration and Hands-on tutorial ### 11 | 12 | [YouTube video](https://youtu.be/1v4UzjE7K2g?t=962) on how KEGG-Decoder intefaces with KEGG and how the heatmap if organized. 13 | 14 | Hands-on tutorial [![Binder](https://mybinder.org/badge_logo.svg)](https://gesis.mybinder.org/binder/v2/gh/biovcnet/bvcn-binder-kegg-koala/master?urlpath=lab) 15 | 16 | **Developed as part of the [BVCN](https://biovcnet.github.io/)** 17 | 18 | ### Please Cite ### 19 | If you find that using KEGG Decoder to process your data has been useful, please cite this manuscript. If you are using KEGG Decoder to make figures then definitely cite this manuscript! 20 | 21 | * [Graham ED, Heidelberg JF, Tully BJ. (2018) Potential for primary productivity in a globally-distributed bacterial phototroph. ISME J 350, 1–6](https://www.nature.com/articles/s41396-018-0091-3) 22 | 23 | 24 | ### Dependencies ### 25 | 26 | * [Pandas](http://pandas.pydata.org/pandas-docs/stable/install.html) 27 | 28 | * [Seaborn](http://seaborn.pydata.org/installing.html) 29 | 30 | * [matplotlib](http://matplotlib.org/users/installing.html) 31 | 32 | * [tanglegram](https://github.com/schlegelp/tanglegram) 33 | 34 | ## Installation ## 35 | Recommend installing KEGG-Decoder in it virtual environment with PYTHON=3.6 (e.g., conda or python). 36 | 37 | ``` 38 | conda create -n keggdecoder python=3.6 39 | conda activate keggdecoder 40 | python3 -m pip install KEGGDecoder 41 | ``` 42 | The current pip install will set the various dependencies (matplotlib, seaborn, pandas, etc.) to versions that actively work with this version of the script. This is partially due to avoid a bug in matplotlib=3.0.4 that would cut the top and bottom line of the `static` image output. 43 | 44 | ## Upgrade ## 45 | ``` 46 | conda activate keggdecoder 47 | pip install --upgrade KEGGDecoder 48 | ``` 49 | 50 | ## Procedure ## 51 | * Start with protein FASTA file (INPUT_PROTEIN.fasta). This file can be multiple genomes combined. Be sure your submitted FASTA file has headers that group genomes together, KEGG-decoder.py groups based on the name provided in FASTA header before the first underscore (_) 52 | ``` 53 | For example 54 | >NORP9_1 55 | >NORP9_2 56 | >NORP9_3 57 | >NORP10_1 58 | >NORP10_2 59 | >NORP10_3 60 | In the output this produces two rows of output, one for genome NORP9 and one for genome NORP10 in the list and heat map 61 | ``` 62 | * Process protein sequences through KEGG-KOALA ([GhostKoala](https://www.kegg.jp/ghostkoala/), [BlastKoala](https://www.kegg.jp/blastkoala/), or [KOFAMSCAN](https://www.genome.jp/tools/kofamkoala/)) and download the tab-delimited KO assignment text file (KOALA_OUTPUT.txt) 63 | * The KOALA output text file should look like this: 64 | ``` 65 | NORP9_1 K00370 66 | NORP9_2 K00371 67 | ``` 68 | 69 | * Run KEGG-decoder 70 | ``` 71 | KEGG-decoder --input (-i) --output (-o) --vizoption (-v) 72 | ``` 73 | 74 | * The FUNCTION_OUT.list generates a TSV version of the heat map. The first row contains pathway/process names, subsequent rows contain submitted groups/genomes and fractional percentage of pathway/process 75 | 76 | * 'static' figure output is an SVG file function_heatmap.svg. Each distinct identifier before the underscore in the FASTA file will have a row 77 | 78 | * 'interactive' figure output is an HTML file function_heatmap.html. Each distinct identifier before the underscore in the FASTA file will have a row, but can be loaded into a browser and value will be displayed by hovering over a cell with the mouse. Draw a box to zoom in on specific regions. Designed to allow easier parsing of larger sets of genomes. 79 | 80 | * 'tanglegram' -- For a little more advanced analysis, KEGGDecoder can generate a tanglegram to compare the order of two trees, one generated by the clustered KEGG metabolic outputs and a Newick format (presumably phylogenetic) tree provided by the user. At least 3 input genomes are required, but more is recommended. Genome names must match. 81 | 82 | KEGG-Expander 83 | ================================================================ 84 | ### UNDER CONSTRUCTION ### 85 | While KEGG-decoder is now a module, KEGG-expander and Decoder_and_Expand will still require running the Python scripts. Using the FUNCTION_OUT.list file will allow you to still make the intended final figure. 86 | 87 | ### Description ### 88 | Designed to expand on the output from KEGG-Decoder. Within KEGG there is a lack of information regarding several processes of interest. To overcome these shortcomings, a small targeted HMM database was created (and will be updated) to fill in gaps of information. 89 | 90 | HMM models are predominantly from the PFam database, but when necessary are pulled from TIGRfam and SFam. 91 | 92 | ### Dependencies ### 93 | * [HMMER3](http://www.hmmer.org/) 94 | 95 | ### Additional Information ### 96 | * Details as to which HMM models and genes are in each described pathway or process can be found in the supporting document, Pfam_definitions.txt 97 | * In version 0.7, KEGG-Expander targets several transporter subunits to link with metal transporter columns in KEGG-Decoder. Removed the peptidase entries due to ineffective interpretation. 98 | * In version 0.6, KEGG-Expander targets: phototrophy via proteorhodopsin, (some) peptidases, alternative nitrogenases, ammonia transport, DMSP lyase, and DMSP synthase, and ferrioxamine biosynthesis 99 | * Unfortunately, accuracy depends on the model used, using a bit score cutoff of 75 (approximately an E-value <10E-20) does not always capture the best matches. For example the rhodopsin model does not distinguish between proteorhodopsin and other light driven rhodopsins (we use a tree to determine the proteorhodopsins). Or several of the DMSP lyases at low bit scores will match metalloproteases; in this instance the script has been modified to look for a more stringent bit score (>500). Or the TIGRfam models for the Fe-only and Vanadium nitrogenases generally match the same protein. 100 | 101 | ## Prodecure ## 102 | * Using a protein FASTA file with the same gene name set-up as described above - GENOMEID_Number - run a search against the custom HMM database 103 | ``` 104 | hmmsearch --tblout _expanderv0.7.tbl -T 75 /path/to/BioData/KEGGDecoder/HMM_Models/expander_dbv0.7.hmm 105 | ``` 106 | * The HMM results table is used to construct the heatmap by running KEGG-expander.py 107 | ``` 108 | python KEGG-expander.py _expanderv0.7.tbl 109 | ``` 110 | * The OUTPUT LIST generates a text version of the heat map. The first row contains pathway/process names, subsequent rows contain submitted groups/genomes and fractional percentage of pathway/process 111 | 112 | * Figure is output as hmm_heatmap.svg. Each distinct identifier before the underscore in the FASTA file will have a row 113 | 114 | Decoder and Expand 115 | ================================================================ 116 | ### Description ### 117 | Combines the KEGG and HMM heatmaps in to a final heat map. 118 | 119 | ### Procedure ### 120 | * Run the script Decoder_and_Expand.py 121 | ``` 122 | python Decode_and_Expand.py 123 | ``` 124 | * Figure is output as decode-expand_heatmap.py. Each distinct identifier before the underscore in the FASTA file will have a row 125 | 126 | Change Log 127 | ================================================================ 128 | ## V1.3 129 | Added several pathways associated with carotenoid biosynthesis, including 130 | end-products: astaxanthin, nostoxanthin, zeaxanthin diglucoside, & 131 | myxoxanthophylls. Plus, staphyloaxanthin biosynthesis and the two pathways for 132 | terpenoid building blocks, the mevalonate pathway and the MEP/DOXP pathway. 133 | 134 | The pathways were provided by Dr. Tania Kurbessoian 135 | ## V1.2.1 136 | Fixed typo in determing reverse TCA cycle as identified by KEGG-Decoder 137 | user Cheng. Issue #52 138 | 139 | Added all-trans-8'-apo-beta-carotenal 15,15'-oxygenase 140 | which will cleave apo-carotenals to generate retinal. Suggested by Eric Webb. 141 | Upstream pathway unknown 142 | 143 | ## V1.2 144 | Added several new pathways including: 145 | 146 | * PET degradation 147 | * carbon storage, related to starch/gylcogen & polyhydroxybutyrate 148 | * posphate storage, related to the reversible polyphosphate reaction. 149 | 150 | Part of summer research with Sheyla Aviles. 151 | 152 | ## V1.1 153 | Correcting typos identified by Chris Neely. Adding more complete 154 | pathways components for amino acid biosynthesis identified by 155 | Dr. Eric Webb 156 | 157 | * phenylalanine added K01713 pheC; cyclohexadienyl dehydratase OR K05359 ADT; arogenate/prephenate dehydratase OR K04518 pheA2; prephenate dehydratase 158 | * tyrosine added K00220 tyrC; cyclohexadieny/prephenate dehydrogenase OR K24018; cyclohexadieny/prephenate dehydrogenase OR K15226 tyrAa; arogenate dehydrogenase 159 | 160 | ## V1.0.10 ## 161 | Added the 20 amino acids. In most instances, only the last step in converting precusor to amino acid is assessed (except for valine, isoleucine, leucine, and tryptophan). The following amino acids share detection pathways: 162 | 163 | * serine & glycine 164 | * threonine & glycine 165 | * valine & isoleucine 166 | * phenylalanine & tyrosine 167 | * aspartate & glutamate 168 | 169 | ## V1.0.6-1.0.8 ## 170 | * Updates made as part of the Speeding Up Science Part 2 hackathon. Updates were made by Chris Neely, Jason Fell, and Marisa Lim. 171 | * Changes include reduction of white space in the `static` output, removal of a minimum requirement for the `interactive` output, and increased functioning of `tanglegram` output. Specifically, `tanglegram` now uses complete-linkage Euclidean distance to determine the clusters on the KEGG-Decoder tree. This provides the best resolution for visualizing possible groups with similar functional capacity. 172 | * In V1.0.8.2, a correction to determining the completeness of ubiquinol-cytochrome c reductase. Previously, only checked for the presence of K00411 and K00410. K00410 is a fusion of K00412 and K00413 only present in a subset of Proteobacteria. Identified by Grayson Chadwick. 173 | * In V1.0.8.1, a mismatch in the terms used to identify `bifunctional chitinase/lysozyme` would result in a `0` not matter if K13381 was present. This has been corrected. Identified by Chris Neely. 174 | 175 | ## V1.0.5 ## 176 | Various upgrades to the tanglegram visualization and enchanced naming efficiency. 177 | 178 | ## V1.0.2 ## 179 | Fixed an issue with tanglegram support that should fix issue with pandas dependency 180 | V.1.0.2 Adds Na+-transporting NADH:ubiquinone oxidoreductase and several metal transporters. KEGG-Decoder added metal transporters for cobalt (CbiMQ), cobalt (CbtA), cobalt (CorA), nickel ABC-type transporter substrate-binding subunit (NirA), copper (copA), ferrous iron (FeoB), 181 | ferric iron ABC-type transporter substrate-binding subunit (AfuA), Fe/Mn transporter (MntH). Additional metal transporter components were added 182 | through KEGG-expander: Cobalt transporter (CbtB), Copper binding HMA (heavy-metal-associated) protein, Fe, Zn, Mn permease (ZupT) 183 | Removed 'peptidases' from KEGG-expander due to inability to discern intracellular from extracellular activity. Recommend using MetaSanity to 184 | identify extracellular peptidases. 185 | Updated KEGG-expander HMM set to V0.7. 186 | 187 | ## V1.0 ## 188 | KEGGDecoder can now be installed via pip install. KEGGDecoder now offers 2 visualization outputs - the classic 'static' version and 189 | the new 'interactive' version which will open a heatmap where you zoom and interact with the heatmap output 190 | Contributions to V1.0 occured as part of the Moore Foundation funded 'Speeding Up Science' hackathon. With contributions provided by: Taylor Reiter (UCDavis), Roth Conrad (GeorgiaTech), Jay Osvatic (UniVienna), Luiz Irber (UCDavis) 191 | 192 | ## V0.8 ## 193 | Add elements regarding arsenic reduction 194 | 195 | ## V0.7 ## 196 | Clarifies elements of methane oxidation and adds additional methanol/alcohol dehydrogenase 197 | to KEGG function search. Adds the serine pathway for formaldehyde assimilation 198 | 199 | ## V0.6 ## 200 | V.0.6 Adds Bacterial Secretion Systems as descrived by KEGG covering Type I, II, III, IV, Vabc, VI, Sec-SRP and Twin Arginine Targeting systems 201 | 202 | ## V0.5 ## 203 | Adds parameters to force labels to be printed on heatmap. Includes functions 204 | for sulfolipid biosynthesis (key gene sqdB) and C-P lyase 205 | 206 | ## V0.4 ## 207 | Adds sections that more accurately represents anoxygenic photosynthesis - type-II and type-I reaction centers, adds NiFe hydrogenase Hyd-1 hyaABC, corrected typo leading to missed assignment to hydrogen:quinone oxidoreductase 208 | 209 | ## V0.3 ## 210 | Latest version adds checks for: retinal biosynthesis, sulfite dehydrogenase (quinone), hydrazine dehydrogenase, hydrazine synthase, DMSP/DMS/DMSO cycling, cobalamin biosynthesis, competence-related DNA transport, anaplerotic reactions 211 | -------------------------------------------------------------------------------- /KEGGDecoder/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Designed to parse through a blastKoala or ghostKoala output to determine 3 | the completeness of various KEGG pathways 4 | """ 5 | 6 | 7 | __version__ = "1.3" 8 | 9 | -------------------------------------------------------------------------------- /KEGGDecoder/images/interactive.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bjtully/BioData/cb1d45a957eda783412a48911c8592252915d9cd/KEGGDecoder/images/interactive.png -------------------------------------------------------------------------------- /KEGGDecoder/images/static.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bjtully/BioData/cb1d45a957eda783412a48911c8592252915d9cd/KEGGDecoder/images/static.png -------------------------------------------------------------------------------- /KEGGDecoder/images/tanglegram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bjtully/BioData/cb1d45a957eda783412a48911c8592252915d9cd/KEGGDecoder/images/tanglegram.png -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2019 Benjamin Tully 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | BioData 2 | ======= 3 | 4 | KEGG-decoder 5 | ============ 6 | For parsing KEGG KOALA outputs and generating a metabolic function heat map -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["flit >= 1.1"] 3 | build-backend = "flit.buildapi" 4 | 5 | [tool.flit.metadata] 6 | module = "KEGGDecoder" 7 | author = "Benjamin Tully" 8 | author-email = "tully.bj@gmail.com" 9 | home-page = "https://github.com/bjtully/BioData/KEGGDecoder" 10 | classifiers = ["License :: OSI Approved :: MIT License"] 11 | requires = [ 12 | "matplotlib==3.0.3", 13 | "seaborn==0.9.0", 14 | "pandas==0.25.2", 15 | "numpy==1.17.3", 16 | "plotly==4.2.1", 17 | "tanglegram==0.1.0", 18 | "biopython==1.74", 19 | "scipy==1.3.1", 20 | ] 21 | 22 | [tool.flit.scripts] 23 | KEGG-decoder = "KEGGDecoder.KEGG_decoder:main" 24 | -------------------------------------------------------------------------------- /tests/test_decoder.py: -------------------------------------------------------------------------------- 1 | from KEGGDecoder import KEGG_decoder 2 | 3 | 4 | def test_nitrogen(): 5 | out = KEGG_decoder.nitrogen("K00368") 6 | assert 'nitrite reduction' in out 7 | assert out['nitrite reduction'] == 1 8 | 9 | def test_arsenic(): 10 | out = KEGG_decoder.arsenic(["K00537", "K03325"]) 11 | assert 'Arsenic reduction' in out 12 | assert out['Arsenic reduction'] >= 0 13 | assert out['Arsenic reduction'] <= 1 14 | # there are four arsenic KOs 15 | # should be divisible by .25 16 | # this line relies on the implementation 17 | # which may not be ideal in the long run 18 | assert out['Arsenic reduction'] % .25 == 0 19 | 20 | 21 | def test_command_line_default_viz(script_runner, tmp_path): 22 | p = tmp_path / "NORP_subset.txt" 23 | with open('tests/NORP_subset.txt', 'r') as f: 24 | p.write_text(f.read()) 25 | 26 | ret = script_runner.run('KEGG-decoder', 27 | '-i', str(p), 28 | '-o', 'test.txt', 29 | cwd=str(tmp_path)) 30 | 31 | print(ret.stdout) 32 | print(ret.stderr) 33 | assert ret.success 34 | #assert ret.stdout == '' 35 | #assert ret.stderr == '' 36 | 37 | 38 | def test_command_line_static(script_runner, tmp_path): 39 | p = tmp_path / "NORP_subset.txt" 40 | with open('tests/NORP_subset.txt', 'r') as f: 41 | p.write_text(f.read()) 42 | 43 | ret = script_runner.run('KEGG-decoder', 44 | '-i', str(p), 45 | '-o', 'test.txt', 46 | '-v', 'static', 47 | cwd=str(tmp_path)) 48 | 49 | print(ret.stdout) 50 | print(ret.stderr) 51 | assert ret.success 52 | #assert ret.stdout == '' 53 | #assert ret.stderr == '' 54 | 55 | 56 | def test_command_line_interactive(script_runner, tmp_path): 57 | p = tmp_path / "NORP_subset.txt" 58 | with open('tests/NORP_subset.txt', 'r') as f: 59 | p.write_text(f.read()) 60 | 61 | ret = script_runner.run('KEGG-decoder', 62 | '-i', str(p), 63 | '-o', 'test.txt', 64 | '-v', 'interactive', 65 | cwd=str(tmp_path)) 66 | print(ret.stdout) 67 | print(ret.stderr) 68 | assert ret.success 69 | #assert ret.stdout == '' 70 | #assert ret.stderr == '' 71 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = 3 | py36, 4 | black 5 | isolated_build = True 6 | 7 | [tox:.package] 8 | basepython = python3 9 | 10 | [testenv] 11 | deps = 12 | pytest 13 | pytest-cov 14 | pytest-console-scripts 15 | commands = 16 | pytest --cov {envsitepackagesdir}/KEGGDecoder 17 | 18 | [testenv:black] 19 | description = Tests for formatting errors with black. 20 | basepython = python3.6 21 | deps = 22 | black 23 | commands = 24 | black KEGGDecoder 25 | black tests 26 | 27 | [coverage:run] 28 | branch = True 29 | --------------------------------------------------------------------------------