├── traitar ├── __init__.py ├── _version.py ├── data │ ├── models │ │ ├── phypat.tar.gz │ │ └── phypat+PGL.tar.gz │ ├── sample_data │ │ ├── traitar_out │ │ │ ├── annotation │ │ │ │ └── pfam │ │ │ │ │ ├── joblog_hmmer_recompute.txt │ │ │ │ │ ├── joblog_filter_and_aggregate_recompute.txt │ │ │ │ │ ├── joblog_generate_annotation_matrix.txt │ │ │ │ │ ├── joblog_hmmer.txt │ │ │ │ │ └── joblog_filter_and_aggregate.txt │ │ │ └── phenotype_prediction │ │ │ │ ├── phypat │ │ │ │ ├── feat_gffs │ │ │ │ │ ├── Listeria_grayi_DSM_20601_Citrate_important_features.gff │ │ │ │ │ ├── Listeria_ivanovii_WSLC3009_Motile_important_features.gff │ │ │ │ │ ├── Listeria_grayi_DSM_20601_Motile_important_features.gff │ │ │ │ │ ├── Listeria_ivanovii_WSLC3009_Acetate_utilization_important_features.gff │ │ │ │ │ ├── Listeria_ivanovii_WSLC3009_Arginine_dihydrolase_important_features.gff │ │ │ │ │ ├── Listeria_ivanovii_WSLC3009_Starch_hydrolysis_important_features.gff │ │ │ │ │ ├── Listeria_grayi_DSM_20601_Starch_hydrolysis_important_features.gff │ │ │ │ │ ├── Listeria_ivanovii_WSLC3009_D-Sorbitol_important_features.gff │ │ │ │ │ ├── Listeria_grayi_DSM_20601_Malonate_important_features.gff │ │ │ │ │ ├── Listeria_ivanovii_WSLC3009_Malonate_important_features.gff │ │ │ │ │ ├── Listeria_grayi_DSM_20601_D-Mannitol_important_features.gff │ │ │ │ │ ├── Listeria_grayi_DSM_20601_Voges_Proskauer_important_features.gff │ │ │ │ │ ├── Listeria_ivanovii_WSLC3009_Voges_Proskauer_important_features.gff │ │ │ │ │ ├── Listeria_ivanovii_WSLC3009_Bacillus_or_coccobacillus_important_features.gff │ │ │ │ │ ├── Listeria_ivanovii_WSLC3009_Growth_in_KCN_important_features.gff │ │ │ │ │ ├── Listeria_ivanovii_WSLC3009_Growth_in_6.5%_NaCl_important_features.gff │ │ │ │ │ ├── Listeria_grayi_DSM_20601_Acetate_utilization_important_features.gff │ │ │ │ │ ├── Listeria_grayi_DSM_20601_Growth_in_6.5%_NaCl_important_features.gff │ │ │ │ │ ├── Listeria_ivanovii_WSLC3009_Alkaline_phosphatase_important_features.gff │ │ │ │ │ ├── Listeria_grayi_DSM_20601_Alkaline_phosphatase_important_features.gff │ │ │ │ │ ├── Listeria_ivanovii_WSLC3009_Coagulase_production_important_features.gff │ │ │ │ │ ├── Listeria_grayi_DSM_20601_Coagulase_production_important_features.gff │ │ │ │ │ ├── Listeria_grayi_DSM_20601_Growth_in_KCN_important_features.gff │ │ │ │ │ ├── Listeria_ivanovii_WSLC3009_Methyl_red_important_features.gff │ │ │ │ │ ├── Listeria_ivanovii_WSLC3009_Lactose_important_features.gff │ │ │ │ │ ├── Listeria_grayi_DSM_20601_DNase_important_features.gff │ │ │ │ │ ├── Listeria_grayi_DSM_20601_Lactose_important_features.gff │ │ │ │ │ ├── Listeria_ivanovii_WSLC3009_Catalase_important_features.gff │ │ │ │ │ ├── Listeria_grayi_DSM_20601_Catalase_important_features.gff │ │ │ │ │ ├── Listeria_grayi_DSM_20601_Facultative_important_features.gff │ │ │ │ │ ├── Listeria_ivanovii_WSLC3009_Growth_at_42°C_important_features.gff │ │ │ │ │ ├── Listeria_grayi_DSM_20601_Gram_positive_important_features.gff │ │ │ │ │ ├── Listeria_ivanovii_WSLC3009_Gram_positive_important_features.gff │ │ │ │ │ ├── Listeria_ivanovii_WSLC3009_Growth_on_ordinary_blood_agar_important_features.gff │ │ │ │ │ ├── Listeria_grayi_DSM_20601_D-Mannose_important_features.gff │ │ │ │ │ ├── Listeria_grayi_DSM_20601_Esculin_hydrolysis_important_features.gff │ │ │ │ │ ├── Listeria_grayi_DSM_20601_Growth_on_ordinary_blood_agar_important_features.gff │ │ │ │ │ ├── Listeria_ivanovii_WSLC3009_Facultative_important_features.gff │ │ │ │ │ ├── Listeria_ivanovii_WSLC3009_Maltose_important_features.gff │ │ │ │ │ ├── Listeria_grayi_DSM_20601_Growth_at_42°C_important_features.gff │ │ │ │ │ ├── Listeria_ivanovii_WSLC3009_Esculin_hydrolysis_important_features.gff │ │ │ │ │ ├── Listeria_grayi_DSM_20601_Maltose_important_features.gff │ │ │ │ │ ├── Listeria_grayi_DSM_20601_Trehalose_important_features.gff │ │ │ │ │ ├── Listeria_grayi_DSM_20601_Salicin_important_features.gff │ │ │ │ │ ├── Listeria_ivanovii_WSLC3009_Salicin_important_features.gff │ │ │ │ │ └── Listeria_ivanovii_WSLC3009_Trehalose_important_features.gff │ │ │ │ ├── predictions_single-votes.txt │ │ │ │ ├── predictions_conservative-vote.txt │ │ │ │ └── predictions_majority-vote.txt │ │ │ │ ├── heatmap_combined.png │ │ │ │ ├── heatmap_phypat.png │ │ │ │ ├── heatmap_phypat+PGL.png │ │ │ │ ├── phypat+PGL │ │ │ │ ├── feat_gffs │ │ │ │ │ ├── Listeria_grayi_DSM_20601_DNase_important_features.gff │ │ │ │ │ ├── Listeria_ivanovii_WSLC3009_Arginine_dihydrolase_important_features.gff │ │ │ │ │ ├── Listeria_ivanovii_WSLC3009_Malonate_important_features.gff │ │ │ │ │ ├── Listeria_ivanovii_WSLC3009_Voges_Proskauer_important_features.gff │ │ │ │ │ ├── Listeria_grayi_DSM_20601_D-Mannitol_important_features.gff │ │ │ │ │ ├── Listeria_grayi_DSM_20601_Voges_Proskauer_important_features.gff │ │ │ │ │ ├── Listeria_ivanovii_WSLC3009_Growth_in_KCN_important_features.gff │ │ │ │ │ ├── Listeria_grayi_DSM_20601_Malonate_important_features.gff │ │ │ │ │ ├── Listeria_grayi_DSM_20601_Growth_in_KCN_important_features.gff │ │ │ │ │ ├── Listeria_ivanovii_WSLC3009_D-Sorbitol_important_features.gff │ │ │ │ │ ├── Listeria_grayi_DSM_20601_Acetate_utilization_important_features.gff │ │ │ │ │ ├── Listeria_grayi_DSM_20601_Starch_hydrolysis_important_features.gff │ │ │ │ │ ├── Listeria_ivanovii_WSLC3009_Acetate_utilization_important_features.gff │ │ │ │ │ ├── Listeria_ivanovii_WSLC3009_Starch_hydrolysis_important_features.gff │ │ │ │ │ ├── Listeria_grayi_DSM_20601_Growth_in_6.5%_NaCl_important_features.gff │ │ │ │ │ ├── Listeria_grayi_DSM_20601_Coagulase_production_important_features.gff │ │ │ │ │ ├── Listeria_ivanovii_WSLC3009_Coagulase_production_important_features.gff │ │ │ │ │ ├── Listeria_grayi_DSM_20601_Gram_positive_important_features.gff │ │ │ │ │ ├── Listeria_grayi_DSM_20601_Growth_at_42°C_important_features.gff │ │ │ │ │ ├── Listeria_grayi_DSM_20601_Catalase_important_features.gff │ │ │ │ │ ├── Listeria_ivanovii_WSLC3009_Growth_in_6.5%_NaCl_important_features.gff │ │ │ │ │ ├── Listeria_grayi_DSM_20601_Citrate_important_features.gff │ │ │ │ │ ├── Listeria_ivanovii_WSLC3009_Gram_positive_important_features.gff │ │ │ │ │ ├── Listeria_ivanovii_WSLC3009_Catalase_important_features.gff │ │ │ │ │ ├── Listeria_ivanovii_WSLC3009_Alkaline_phosphatase_important_features.gff │ │ │ │ │ ├── Listeria_grayi_DSM_20601_Alkaline_phosphatase_important_features.gff │ │ │ │ │ ├── Listeria_ivanovii_WSLC3009_Growth_at_42°C_important_features.gff │ │ │ │ │ ├── Listeria_ivanovii_WSLC3009_Growth_on_ordinary_blood_agar_important_features.gff │ │ │ │ │ ├── Listeria_grayi_DSM_20601_Motile_important_features.gff │ │ │ │ │ ├── Listeria_grayi_DSM_20601_Lactose_important_features.gff │ │ │ │ │ ├── Listeria_ivanovii_WSLC3009_Motile_important_features.gff │ │ │ │ │ ├── Listeria_ivanovii_WSLC3009_Lactose_important_features.gff │ │ │ │ │ ├── Listeria_grayi_DSM_20601_Growth_on_ordinary_blood_agar_important_features.gff │ │ │ │ │ ├── Listeria_ivanovii_WSLC3009_Facultative_important_features.gff │ │ │ │ │ ├── Listeria_grayi_DSM_20601_Esculin_hydrolysis_important_features.gff │ │ │ │ │ ├── Listeria_grayi_DSM_20601_Facultative_important_features.gff │ │ │ │ │ └── Listeria_ivanovii_WSLC3009_Methyl_red_important_features.gff │ │ │ │ ├── predictions_majority-vote.txt │ │ │ │ ├── predictions_single-votes.txt │ │ │ │ └── predictions_conservative-vote.txt │ │ │ │ ├── predictions_single-votes_combined.txt │ │ │ │ ├── feature_track_commands.txt │ │ │ │ ├── predictions_majority-vote_combined.txt │ │ │ │ ├── predictions_flat_majority-votes_combined.txt │ │ │ │ └── predictions_flat_single-votes_combined.txt │ │ ├── samples.txt │ │ └── samples_gene_gff.txt │ └── colors.txt ├── get_external_data.py ├── domtblout2gene_generic.py ├── PhenotypeCollection.py ├── modify.py ├── merge_preds.py ├── hmmer2filtered_best.py └── predict.py ├── .gitignore ├── workflow.png ├── MANIFEST.in ├── setup.py ├── Dockerfile ├── traits.tsv └── INSTALL.md /traitar/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | config.json 3 | *.swp 4 | -------------------------------------------------------------------------------- /traitar/_version.py: -------------------------------------------------------------------------------- 1 | __version__ = "1.1.2" 2 | -------------------------------------------------------------------------------- /workflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hzi-bifo/traitar/HEAD/workflow.png -------------------------------------------------------------------------------- /traitar/data/models/phypat.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hzi-bifo/traitar/HEAD/traitar/data/models/phypat.tar.gz -------------------------------------------------------------------------------- /traitar/data/models/phypat+PGL.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hzi-bifo/traitar/HEAD/traitar/data/models/phypat+PGL.tar.gz -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/annotation/pfam/joblog_hmmer_recompute.txt: -------------------------------------------------------------------------------- 1 | Seq Host Starttime Runtime Send Receive Exitval Signal Command 2 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat/feat_gffs/Listeria_grayi_DSM_20601_Citrate_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/annotation/pfam/joblog_filter_and_aggregate_recompute.txt: -------------------------------------------------------------------------------- 1 | Seq Host Starttime Runtime Send Receive Exitval Signal Command 2 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.md 2 | recursive-include traitar * 3 | recursive-exclude traitar config.json 4 | recursive-exclude traitar *.pyc 5 | recursive-exclude traitar *.swp 6 | -------------------------------------------------------------------------------- /traitar/data/sample_data/samples.txt: -------------------------------------------------------------------------------- 1 | sample_file_name sample_name category 2 | 1457190.3.RefSeq.faa Listeria_ivanovii_WSLC3009 environment_1 3 | 525367.9.RefSeq.faa Listeria_grayi_DSM_20601 environment_2 4 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/heatmap_combined.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hzi-bifo/traitar/HEAD/traitar/data/sample_data/traitar_out/phenotype_prediction/heatmap_combined.png -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/heatmap_phypat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hzi-bifo/traitar/HEAD/traitar/data/sample_data/traitar_out/phenotype_prediction/heatmap_phypat.png -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/heatmap_phypat+PGL.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hzi-bifo/traitar/HEAD/traitar/data/sample_data/traitar_out/phenotype_prediction/heatmap_phypat+PGL.png -------------------------------------------------------------------------------- /traitar/data/colors.txt: -------------------------------------------------------------------------------- 1 | r g b 2 | 147 170 0 3 | 244 200 0 4 | 206 162 98 5 | 83 55 122 6 | 193 0 32 7 | 0 83 138 8 | 166 189 215 9 | 129 112 102 10 | 255 142 0 11 | 0 125 52 12 | 255 104 0 13 | 246 118 142 14 | -------------------------------------------------------------------------------- /traitar/data/sample_data/samples_gene_gff.txt: -------------------------------------------------------------------------------- 1 | sample_file_name sample_name category gene_gff 2 | 1457190.3.RefSeq.faa Listeria_ivanovii_WSLC3009 environment_1 1457190.3.RefSeq.gff 3 | 525367.9.RefSeq.faa Listeria_grayi_DSM_20601 environment_2 525367.9.RefSeq.gff 4 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat/feat_gffs/Listeria_ivanovii_WSLC3009_Motile_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|CP007172 HMMER phypat 732068 732650 4.3999999999999995e-25 + . Parent=AX25_03545;Name=PF02154;Note=FliM;Description=Flagellar motor switch protein FliM 3 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat/feat_gffs/Listeria_grayi_DSM_20601_Motile_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|NZ_GG692932 HMMER phypat 212934 213516 7.1999999999999998e-25 + . Parent=HMPREF0556_1093;Name=PF02154;Note=FliM;Description=Flagellar motor switch protein FliM 3 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat/feat_gffs/Listeria_ivanovii_WSLC3009_Acetate_utilization_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|CP007172 HMMER phypat 101569 102724 5.5999999999999991e-95 + . Parent=AX25_00465;Name=PF02274;Note=Amidinotransf;Description=Amidinotransferase 3 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat/feat_gffs/Listeria_ivanovii_WSLC3009_Arginine_dihydrolase_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|CP007172 HMMER phypat 101569 102724 5.5999999999999991e-95 + . Parent=AX25_00465;Name=PF02274;Note=Amidinotransf;Description=Amidinotransferase 3 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat/feat_gffs/Listeria_ivanovii_WSLC3009_Starch_hydrolysis_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|CP007172 HMMER phypat 165019 165232 2.8000000000000002e-13 + . Parent=AX25_00765;Name=PF00041;Note=fn3;Description=Fibronectin type III domain 3 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat/feat_gffs/Listeria_grayi_DSM_20601_Starch_hydrolysis_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|NZ_GG692935 HMMER phypat 227300 227513 2.2000000000000002e-11 + . Parent=HMPREF0556_2205;Name=PF00041;Note=fn3;Description=Fibronectin type III domain 3 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat/feat_gffs/Listeria_ivanovii_WSLC3009_D-Sorbitol_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|CP007172 HMMER phypat 558315 558636 3.5000000000000004e-24 - . Parent=AX25_02695;Name=PF06923;Note=GutM;Description=Glucitol operon activator protein (GutM) 3 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat+PGL/feat_gffs/Listeria_grayi_DSM_20601_DNase_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|NZ_GG692932 HMMER phypat+PGL 662055 662640 8.3999999999999991e-70 + . Parent=HMPREF0556_1556;Name=PF02568;Note=ThiI;Description=Thiamine biosynthesis protein (ThiI) 3 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat+PGL/feat_gffs/Listeria_ivanovii_WSLC3009_Arginine_dihydrolase_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|CP007172 HMMER phypat+PGL 101569 102724 5.5999999999999991e-95 + . Parent=AX25_00465;Name=PF02274;Note=Amidinotransf;Description=Amidinotransferase 3 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat/feat_gffs/Listeria_grayi_DSM_20601_Malonate_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|NZ_GG692932 HMMER phypat 691105 691612 8.2999999999999998e-11 + . Parent=HMPREF0556_1583;Name=PF06833;Note=MdcE;Description=Malonate decarboxylase gamma subunit (MdcE) 3 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat/feat_gffs/Listeria_ivanovii_WSLC3009_Malonate_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|CP007172 HMMER phypat 1645836 1646325 2.1000000000000002e-10 - . Parent=AX25_08150;Name=PF06833;Note=MdcE;Description=Malonate decarboxylase gamma subunit (MdcE) 3 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat+PGL/feat_gffs/Listeria_ivanovii_WSLC3009_Malonate_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|CP007172 HMMER phypat+PGL 1645836 1646325 2.1000000000000002e-10 - . Parent=AX25_08150;Name=PF06833;Note=MdcE;Description=Malonate decarboxylase gamma subunit (MdcE) 3 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat/feat_gffs/Listeria_grayi_DSM_20601_D-Mannitol_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|NZ_GG692934 HMMER phypat 237179 237542 5.5999999999999999e-27 + . Parent=HMPREF0556_2493;Name=PF01232;Note=Mannitol_dh;Description=Mannitol dehydrogenase Rossmann domain 3 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat/feat_gffs/Listeria_grayi_DSM_20601_Voges_Proskauer_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|NZ_GG692933 HMMER phypat 558881 559535 1.5999999999999999e-93 - . Parent=HMPREF0556_0655;Name=PF03306;Note=AAL_decarboxy;Description=Alpha-acetolactate decarboxylase 3 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat/feat_gffs/Listeria_ivanovii_WSLC3009_Voges_Proskauer_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|CP007172 HMMER phypat 2130427 2131081 2.4999999999999998e-94 + . Parent=AX25_10545;Name=PF03306;Note=AAL_decarboxy;Description=Alpha-acetolactate decarboxylase 3 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat+PGL/feat_gffs/Listeria_ivanovii_WSLC3009_Voges_Proskauer_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|CP007172 HMMER phypat+PGL 2130427 2131081 2.4999999999999998e-94 + . Parent=AX25_10545;Name=PF03306;Note=AAL_decarboxy;Description=Alpha-acetolactate decarboxylase 3 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat+PGL/feat_gffs/Listeria_grayi_DSM_20601_D-Mannitol_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|NZ_GG692934 HMMER phypat+PGL 237179 237542 5.5999999999999999e-27 + . Parent=HMPREF0556_2493;Name=PF01232;Note=Mannitol_dh;Description=Mannitol dehydrogenase Rossmann domain 3 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat+PGL/feat_gffs/Listeria_grayi_DSM_20601_Voges_Proskauer_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|NZ_GG692933 HMMER phypat+PGL 558881 559535 1.5999999999999999e-93 - . Parent=HMPREF0556_0655;Name=PF03306;Note=AAL_decarboxy;Description=Alpha-acetolactate decarboxylase 3 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/annotation/pfam/joblog_generate_annotation_matrix.txt: -------------------------------------------------------------------------------- 1 | Seq Host Starttime Runtime Send Receive Exitval Signal Command 2 | 1 : 1468755187.571 8.796 0 0 0 0 domtblout2gene_generic.py traitar_out/annotation/pfam/summary.dat <(ls traitar_out/annotation/pfam/*_filtered_best.dat) /home/aaron/traitar/traitar/data/models/phypat.tar.gz 3 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat/feat_gffs/Listeria_ivanovii_WSLC3009_Bacillus_or_coccobacillus_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|CP007172 HMMER phypat 1299564 1299777 4.3e-23 + . Parent=AX25_06440;Name=PF03880;Note=DbpA;Description=DbpA RNA binding domain 3 | accn|CP007172 HMMER phypat 1817280 1817532 9.4999999999999995e-11 + . Parent=AX25_09030;Name=PF08447;Note=PAS_3;Description=PAS fold 4 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat/feat_gffs/Listeria_ivanovii_WSLC3009_Growth_in_KCN_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|CP007172 HMMER phypat 847008 847890 1.0999999999999998e-93 + . Parent=AX25_04130;Name=PF03060;Note=NMO;Description=Nitronate monooxygenase 3 | accn|CP007172 HMMER phypat 2322150 2323038 2.4999999999999997e-90 - . Parent=AX25_11535;Name=PF03060;Note=NMO;Description=Nitronate monooxygenase 4 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat+PGL/feat_gffs/Listeria_ivanovii_WSLC3009_Growth_in_KCN_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|CP007172 HMMER phypat+PGL 847008 847890 1.0999999999999998e-93 + . Parent=AX25_04130;Name=PF03060;Note=NMO;Description=Nitronate monooxygenase 3 | accn|CP007172 HMMER phypat+PGL 2322150 2323038 2.4999999999999997e-90 - . Parent=AX25_11535;Name=PF03060;Note=NMO;Description=Nitronate monooxygenase 4 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat/feat_gffs/Listeria_ivanovii_WSLC3009_Growth_in_6.5%_NaCl_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|CP007172 HMMER phypat 1013674 1014205 1.6999999999999997e-67 - . Parent=AX25_04955;Name=PF13743;Note=Thioredoxin_5;Description=Thioredoxin 3 | accn|CP007172 HMMER phypat 2227380 2227959 4.0999999999999999e-12 - . Parent=AX25_11025;Name=PF03881;Note=Fructosamin_kin;Description=Fructosamine kinase 4 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat/feat_gffs/Listeria_grayi_DSM_20601_Acetate_utilization_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|NZ_GG692933 HMMER phypat 185775 185970 5.1e-10 + . Parent=HMPREF0556_0273;Name=PF06779;Note=DUF1228;Description=Protein of unknown function (DUF1228) 3 | accn|NZ_GG692934 HMMER phypat 187223 188375 5.6999999999999999e-91 + . Parent=HMPREF0556_2438;Name=PF02274;Note=Amidinotransf;Description=Amidinotransferase 4 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat/feat_gffs/Listeria_grayi_DSM_20601_Growth_in_6.5%_NaCl_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|NZ_GG692933 HMMER phypat 271478 272273 3.1999999999999993e-89 + . Parent=HMPREF0556_0359;Name=PF03881;Note=Fructosamin_kin;Description=Fructosamine kinase 3 | accn|NZ_GG692932 HMMER phypat 340799 341327 1.8999999999999999e-69 - . Parent=HMPREF0556_1240;Name=PF13743;Note=Thioredoxin_5;Description=Thioredoxin 4 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat+PGL/feat_gffs/Listeria_grayi_DSM_20601_Malonate_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|NZ_GG692933 HMMER phypat+PGL 143586 144711 1.8999999999999993e-178 - . Parent=HMPREF0556_0230;Name=PF09364;Note=XFP_N;Description=XFP N-terminal domain 3 | accn|NZ_GG692932 HMMER phypat+PGL 691105 691612 8.2999999999999998e-11 + . Parent=HMPREF0556_1583;Name=PF06833;Note=MdcE;Description=Malonate decarboxylase gamma subunit (MdcE) 4 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat/feat_gffs/Listeria_ivanovii_WSLC3009_Alkaline_phosphatase_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|CP007172 HMMER phypat 177278 177740 1.6999999999999999e-40 - . Parent=AX25_00830;Name=PF02872;Note=5_nucleotid_C;Description=5'-nucleotidase, C-terminal domain 3 | accn|CP007172 HMMER phypat 2500086 2500479 4.4000000000000002e-14 - . Parent=AX25_12400;Name=PF02872;Note=5_nucleotid_C;Description=5'-nucleotidase, C-terminal domain 4 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat/feat_gffs/Listeria_grayi_DSM_20601_Alkaline_phosphatase_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|NZ_GG692932 HMMER phypat 985368 985827 5.0999999999999997e-30 + . Parent=HMPREF0556_1869;Name=PF02872;Note=5_nucleotid_C;Description=5'-nucleotidase, C-terminal domain 3 | accn|NZ_GG692935 HMMER phypat 233709 234093 6.3999999999999992e-14 + . Parent=HMPREF0556_2208;Name=PF02872;Note=5_nucleotid_C;Description=5'-nucleotidase, C-terminal domain 4 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat/feat_gffs/Listeria_ivanovii_WSLC3009_Coagulase_production_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|CP007172 HMMER phypat 2009187 2009379 7.6000000000000002e-23 - . Parent=AX25_09950;Name=PF02742;Note=Fe_dep_repr_C;Description=Iron dependent repressor, metal binding and dimerisation domain 3 | accn|CP007172 HMMER phypat 2423494 2423710 1.0000000000000001e-20 - . Parent=AX25_11975;Name=PF01883;Note=DUF59;Description=Domain of unknown function DUF59 4 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat/feat_gffs/Listeria_grayi_DSM_20601_Coagulase_production_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|NZ_GG692933 HMMER phypat 660000 660189 8.1000000000000001e-23 + . Parent=HMPREF0556_0761;Name=PF02742;Note=Fe_dep_repr_C;Description=Iron dependent repressor, metal binding and dimerisation domain 3 | accn|NZ_GG692932 HMMER phypat 57888 58110 1.4999999999999998e-20 + . Parent=HMPREF0556_0932;Name=PF01883;Note=DUF59;Description=Domain of unknown function DUF59 4 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/annotation/pfam/joblog_hmmer.txt: -------------------------------------------------------------------------------- 1 | Seq Host Starttime Runtime Send Receive Exitval Signal Command 2 | 2 : 1468754761.237 407.424 0 0 0 0 hmmsearch --cpu 1 --cut_ga --domtblout traitar_out/annotation/pfam/Listeria_grayi_DSM_20601_domtblout.dat /home/aaron/hmms/Pfam-A.hmm > /dev/null ./525367.9.RefSeq.faa 3 | 1 : 1468754761.234 414.615 0 0 0 0 hmmsearch --cpu 1 --cut_ga --domtblout traitar_out/annotation/pfam/Listeria_ivanovii_WSLC3009_domtblout.dat /home/aaron/hmms/Pfam-A.hmm > /dev/null ./1457190.3.RefSeq.faa 4 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/annotation/pfam/joblog_filter_and_aggregate.txt: -------------------------------------------------------------------------------- 1 | Seq Host Starttime Runtime Send Receive Exitval Signal Command 2 | 1 : 1468755176.703 10.645 0 0 0 0 hmmer2filtered_best.py traitar_out/annotation/pfam/Listeria_ivanovii_WSLC3009_domtblout.dat traitar_out/annotation/pfam/Listeria_ivanovii_WSLC3009_filtered_best.dat pfam 3 | 2 : 1468755176.706 10.643 0 0 0 0 hmmer2filtered_best.py traitar_out/annotation/pfam/Listeria_grayi_DSM_20601_domtblout.dat traitar_out/annotation/pfam/Listeria_grayi_DSM_20601_filtered_best.dat pfam 4 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat/feat_gffs/Listeria_grayi_DSM_20601_Growth_in_KCN_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|NZ_GG692933 HMMER phypat 176224 176893 1.0999999999999999e-49 + . Parent=HMPREF0556_0259;Name=PF03060;Note=NMO;Description=Nitronate monooxygenase 3 | accn|NZ_GG692933 HMMER phypat 414016 414394 4.0999999999999993e-10 + . Parent=HMPREF0556_0503;Name=PF03060;Note=NMO;Description=Nitronate monooxygenase 4 | accn|NZ_GG692933 HMMER phypat 702457 702619 3.3000000000000002e-09 + . Parent=HMPREF0556_0803;Name=PF03060;Note=NMO;Description=Nitronate monooxygenase 5 | accn|NZ_GG692934 HMMER phypat 122588 122798 2.4e-09 - . Parent=HMPREF0556_2380;Name=PF03060;Note=NMO;Description=Nitronate monooxygenase 6 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat+PGL/feat_gffs/Listeria_grayi_DSM_20601_Growth_in_KCN_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|NZ_GG692933 HMMER phypat+PGL 176224 176893 1.0999999999999999e-49 + . Parent=HMPREF0556_0259;Name=PF03060;Note=NMO;Description=Nitronate monooxygenase 3 | accn|NZ_GG692933 HMMER phypat+PGL 414016 414394 4.0999999999999993e-10 + . Parent=HMPREF0556_0503;Name=PF03060;Note=NMO;Description=Nitronate monooxygenase 4 | accn|NZ_GG692933 HMMER phypat+PGL 702457 702619 3.3000000000000002e-09 + . Parent=HMPREF0556_0803;Name=PF03060;Note=NMO;Description=Nitronate monooxygenase 5 | accn|NZ_GG692934 HMMER phypat+PGL 122588 122798 2.4e-09 - . Parent=HMPREF0556_2380;Name=PF03060;Note=NMO;Description=Nitronate monooxygenase 6 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat/feat_gffs/Listeria_ivanovii_WSLC3009_Methyl_red_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|CP007172 HMMER phypat 872833 873025 1.8e-24 - . Parent=AX25_04220;Name=PF06146;Note=PsiE;Description=Phosphate-starvation-inducible E 3 | accn|CP007172 HMMER phypat 1363517 1364072 8.299999999999998e-79 - . Parent=AX25_06785;Name=PF11922;Note=DUF3440;Description=Domain of unknown function (DUF3440) 4 | accn|CP007172 HMMER phypat 2442434 2443307 1.7999999999999993e-139 - . Parent=AX25_12055;Name=PF04227;Note=Indigoidine_A;Description=Indigoidine synthase A like protein 5 | accn|CP007172 HMMER phypat 2741684 2742632 1.6999999999999995e-55 - . Parent=AX25_13705;Name=PF02126;Note=PTE;Description=Phosphotriesterase family 6 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat+PGL/feat_gffs/Listeria_ivanovii_WSLC3009_D-Sorbitol_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|CP007172 HMMER phypat+PGL 199702 200485 4.4999999999999991e-104 + . Parent=AX25_00930;Name=PF04951;Note=Peptidase_M55;Description=D-aminopeptidase 3 | accn|CP007172 HMMER phypat+PGL 558315 558636 3.5000000000000004e-24 - . Parent=AX25_02695;Name=PF06923;Note=GutM;Description=Glucitol operon activator protein (GutM) 4 | accn|CP007172 HMMER phypat+PGL 779761 780109 3.3999999999999999e-11 + . Parent=AX25_03790;Name=PF13577;Note=SnoaL_4;Description=SnoaL-like domain 5 | accn|CP007172 HMMER phypat+PGL 2104126 2105491 3.1999999999999995e-115 - . Parent=AX25_10435;Name=PF05935;Note=Arylsulfotrans;Description=Arylsulfotransferase (ASST) 6 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat+PGL/feat_gffs/Listeria_grayi_DSM_20601_Acetate_utilization_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|NZ_GG692933 HMMER phypat+PGL 185775 185970 5.1e-10 + . Parent=HMPREF0556_0273;Name=PF06779;Note=DUF1228;Description=Protein of unknown function (DUF1228) 3 | accn|NZ_GG692933 HMMER phypat+PGL 249098 249422 1.3e-17 + . Parent=HMPREF0556_0335;Name=PF02517;Note=Abi;Description=CAAX protease self-immunity 4 | accn|NZ_GG692933 HMMER phypat+PGL 492426 492687 8.8000000000000009e-23 - . Parent=HMPREF0556_0581;Name=PF02517;Note=Abi;Description=CAAX protease self-immunity 5 | accn|NZ_GG692932 HMMER phypat+PGL 378393 378651 9.4999999999999999e-14 - . Parent=HMPREF0556_1276;Name=PF02517;Note=Abi;Description=CAAX protease self-immunity 6 | accn|NZ_GG692934 HMMER phypat+PGL 187223 188375 5.6999999999999999e-91 + . Parent=HMPREF0556_2438;Name=PF02274;Note=Amidinotransf;Description=Amidinotransferase 7 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat/feat_gffs/Listeria_ivanovii_WSLC3009_Lactose_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|CP007172 HMMER phypat 552962 553409 1.3000000000000002e-10 - . Parent=AX25_02665;Name=PF09863;Note=DUF2090;Description=Uncharacterized protein conserved in bacteria (DUF2090) 3 | accn|CP007172 HMMER phypat 691219 691369 7.4000000000000001e-20 + . Parent=AX25_03305;Name=PF13906;Note=AA_permease_C;Description=C-terminus of AA_permease 4 | accn|CP007172 HMMER phypat 968773 969763 1.0999999999999999e-87 + . Parent=AX25_04705;Name=PF03773;Note=DUF318;Description=Predicted permease 5 | accn|CP007172 HMMER phypat 2567983 2568133 7.3e-23 - . Parent=AX25_12730;Name=PF13906;Note=AA_permease_C;Description=C-terminus of AA_permease 6 | accn|CP007172 HMMER phypat 2630722 2630863 4.8000000000000001e-16 - . Parent=AX25_13050;Name=PF00401;Note=ATP-synt_DE;Description=ATP synthase, Delta/Epsilon chain, long alpha-helix domain 7 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat+PGL/feat_gffs/Listeria_grayi_DSM_20601_Starch_hydrolysis_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|NZ_GG692933 HMMER phypat+PGL 26740 26959 1.2999999999999999e-16 + . Parent=HMPREF0556_0126;Name=PF04854;Note=DUF624;Description=Protein of unknown function, DUF624 3 | accn|NZ_GG692933 HMMER phypat+PGL 28416 28653 1.4e-27 + . Parent=HMPREF0556_0127;Name=PF06580;Note=His_kinase;Description=Histidine kinase 4 | accn|NZ_GG692933 HMMER phypat+PGL 406518 406671 2.6e-24 + . Parent=HMPREF0556_0498;Name=PF02229;Note=PC4;Description=Transcriptional Coactivator p15 (PC4) 5 | accn|NZ_GG692932 HMMER phypat+PGL 161648 161990 2.0000000000000001e-25 - . Parent=HMPREF0556_1036;Name=PF04266;Note=ASCH;Description=ASCH domain 6 | accn|NZ_GG692932 HMMER phypat+PGL 307746 308292 5.5999999999999992e-65 + . Parent=HMPREF0556_1200;Name=PF09323;Note=DUF1980;Description=Domain of unknown function (DUF1980) 7 | accn|NZ_GG692935 HMMER phypat+PGL 227300 227513 2.2000000000000002e-11 + . Parent=HMPREF0556_2205;Name=PF00041;Note=fn3;Description=Fibronectin type III domain 8 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat/feat_gffs/Listeria_grayi_DSM_20601_DNase_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|NZ_GG692932 HMMER phypat 55503 55746 6.3000000000000004e-18 + . Parent=HMPREF0556_0929;Name=PF02588;Note=DUF161;Description=Uncharacterized BCR, YitT family COG1284 3 | accn|NZ_GG692932 HMMER phypat 535749 535989 3.4999999999999997e-13 - . Parent=HMPREF0556_1437;Name=PF02588;Note=DUF161;Description=Uncharacterized BCR, YitT family COG1284 4 | accn|NZ_GG692932 HMMER phypat 633125 633665 1.3e-15 + . Parent=HMPREF0556_1527;Name=PF01633;Note=Choline_kinase;Description=Choline/ethanolamine kinase 5 | accn|NZ_GG692932 HMMER phypat 910609 910852 8.9000000000000008e-19 - . Parent=HMPREF0556_1790;Name=PF02588;Note=DUF161;Description=Uncharacterized BCR, YitT family COG1284 6 | accn|NZ_GG692935 HMMER phypat 75325 75565 1.6999999999999999e-19 + . Parent=HMPREF0556_2069;Name=PF02588;Note=DUF161;Description=Uncharacterized BCR, YitT family COG1284 7 | accn|NZ_GG692935 HMMER phypat 169006 169681 5.4000000000000002e-94 + . Parent=HMPREF0556_2156;Name=PF06415;Note=iPGM_N;Description=BPG-independent PGAM N-terminus (iPGM_N) 8 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat/feat_gffs/Listeria_grayi_DSM_20601_Lactose_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|NZ_GG692933 HMMER phypat 354824 354974 2.9999999999999998e-18 + . Parent=HMPREF0556_0438;Name=PF13906;Note=AA_permease_C;Description=C-terminus of AA_permease 3 | accn|NZ_GG692933 HMMER phypat 504329 504479 5.5000000000000008e-20 - . Parent=HMPREF0556_0598;Name=PF13906;Note=AA_permease_C;Description=C-terminus of AA_permease 4 | accn|NZ_GG692932 HMMER phypat 26062 26503 3.7000000000000007e-10 - . Parent=HMPREF0556_0904;Name=PF09863;Note=DUF2090;Description=Uncharacterized protein conserved in bacteria (DUF2090) 5 | accn|NZ_GG692932 HMMER phypat 171895 172045 5.2999999999999997e-19 + . Parent=HMPREF0556_1045;Name=PF13906;Note=AA_permease_C;Description=C-terminus of AA_permease 6 | accn|NZ_GG692932 HMMER phypat 306837 307713 1.2999999999999998e-87 + . Parent=HMPREF0556_1199;Name=PF03773;Note=DUF318;Description=Predicted permease 7 | accn|NZ_GG692935 HMMER phypat 55457 55598 3.9999999999999999e-16 + . Parent=HMPREF0556_2049;Name=PF00401;Note=ATP-synt_DE;Description=ATP synthase, Delta/Epsilon chain, long alpha-helix domain 8 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | from setuptools import find_packages 3 | from setuptools import setup 4 | import re 5 | 6 | 7 | VERSIONFILE=os.path.join('traitar', '_version.py') 8 | verstrline = open(VERSIONFILE, "rt").read() 9 | VSRE = r"^__version__ = ['\"]([^'\"]*)['\"]" 10 | mo = re.search(VSRE, verstrline, re.M) 11 | if mo: 12 | verstr = mo.group(1) 13 | else: 14 | raise RuntimeError("Unable to find version string in %s." % (VERSIONFILE,)) 15 | 16 | long_description = open('README.rst', 'r').read() 17 | 18 | setup(name='traitar', 19 | version = verstr, 20 | description='traitar - The microbial trait analyzer', 21 | long_description = long_description, 22 | url = 'http://github.com/aweimann/traitar', 23 | author='Aaron Weimann', 24 | author_email='weimann@hhu.de', 25 | license='GNU General Public License, version 3 (GPL-3.0)', 26 | packages= ['traitar'], 27 | include_package_data = True, 28 | scripts = ['traitar/traitar', 'traitar/merge_preds.py', 'traitar/heatmap.py', 'traitar/domtblout2gene_generic.py', 'traitar/predict.py', 'traitar/hmmer2filtered_best.py', 'traitar/hmm2gff.py'], 29 | zip_safe=False, 30 | install_requires = ["pandas >= 0.13.1", "matplotlib >= 1.3.1", "numpy >= 1.6", "scipy >= 0.13.3"]) 31 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat/feat_gffs/Listeria_ivanovii_WSLC3009_Catalase_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|CP007172 HMMER phypat 705988 706246 1.2999999999999999e-32 + . Parent=AX25_03395;Name=PF04241;Note=DUF423;Description=Protein of unknown function (DUF423) 3 | accn|CP007172 HMMER phypat 822340 822847 1.6e-52 - . Parent=AX25_04005;Name=PF04264;Note=YceI;Description=YceI-like domain 4 | accn|CP007172 HMMER phypat 1481672 1482032 5.2999999999999992e-51 - . Parent=AX25_07360;Name=PF02803;Note=Thiolase_C;Description=Thiolase, C-terminal domain 5 | accn|CP007172 HMMER phypat 2200814 2201657 1.0999999999999998e-63 + . Parent=AX25_10870;Name=PF02628;Note=COX15-CtaA;Description=Cytochrome oxidase assembly protein 6 | accn|CP007172 HMMER phypat 2207638 2207926 1.0000000000000001e-29 - . Parent=AX25_10905;Name=PF04234;Note=CopC;Description=CopC domain 7 | accn|CP007172 HMMER phypat 2376895 2377903 4.5999999999999988e-115 - . Parent=AX25_11740;Name=PF01208;Note=URO-D;Description=Uroporphyrinogen decarboxylase (URO-D) 8 | accn|CP007172 HMMER phypat 2915692 2915986 7.0999999999999982e-45 - . Parent=AX25_14595;Name=PF00199;Note=Catalase;Description=Catalase 9 | accn|CP007172 HMMER phypat 2916125 2916515 2.5000000000000001e-60 - . Parent=AX25_14600;Name=PF00199;Note=Catalase;Description=Catalase 10 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | ############################################################ 2 | # Dockerfile to run Traitar - the microbial trait analyzer 3 | # Based on Ubuntu Image 4 | ############################################################ 5 | 6 | # Set the base image to use to Ubuntu 7 | FROM ubuntu:trusty 8 | 9 | RUN apt-get update 10 | MAINTAINER Aaron Weimann (weimann@hhu.de) 11 | RUN sudo apt-get install -y python-scipy python-matplotlib python-pip python-pandas 12 | RUN echo "deb http://archive.ubuntu.com/ubuntu trusty-backports main restricted universe multiverse ">> /etc/apt/sources.list 13 | RUN sudo apt-get update 14 | RUN sudo apt-get install -y hmmer prodigal 15 | RUN sudo apt-get install -y wget 16 | RUN sudo apt-get install -y git 17 | RUN mkdir /home/traitar 18 | RUN wget -O /home/traitar/Pfam-A.hmm.gz ftp://ftp.ebi.ac.uk/pub/databases/Pfam/releases/Pfam27.0/Pfam-A.hmm.gz 19 | RUN gunzip /home/traitar/Pfam-A.hmm.gz 20 | RUN sudo apt-get install -y parallel 21 | ENV shell /bin/bash 22 | WORKDIR /home/traitar 23 | ADD https://www.random.org/strings/?num=16&len=16&digits=on&upperalpha=on&loweralpha=on&unique=on&format=plain&rnd=new uuid 24 | RUN git clone https://github.com/aweimann/traitar 25 | WORKDIR /home/traitar/traitar 26 | RUN python setup.py sdist 27 | RUN pip install traitar --find-links file:///home/traitar/traitar/dist 28 | RUN traitar pfam --local /home/traitar 29 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat+PGL/feat_gffs/Listeria_ivanovii_WSLC3009_Acetate_utilization_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|CP007172 HMMER phypat+PGL 101569 102724 5.5999999999999991e-95 + . Parent=AX25_00465;Name=PF02274;Note=Amidinotransf;Description=Amidinotransferase 3 | accn|CP007172 HMMER phypat+PGL 464434 465547 6.2999999999999994e-35 + . Parent=AX25_02215;Name=PF02133;Note=Transp_cyt_pur;Description=Permease for cytosine/purines, uracil, thiamine, allantoin 4 | accn|CP007172 HMMER phypat+PGL 779761 780109 3.3999999999999999e-11 + . Parent=AX25_03790;Name=PF13577;Note=SnoaL_4;Description=SnoaL-like domain 5 | accn|CP007172 HMMER phypat+PGL 1044681 1044945 4.2000000000000004e-14 - . Parent=AX25_05120;Name=PF02517;Note=Abi;Description=CAAX protease self-immunity 6 | accn|CP007172 HMMER phypat+PGL 1523406 1523685 1.1999999999999999e-11 - . Parent=AX25_07555;Name=PF13362;Note=Toprim_3;Description=Toprim domain 7 | accn|CP007172 HMMER phypat+PGL 1576707 1576791 1.8e-09 - . Parent=AX25_07835;Name=PF07719;Note=TPR_2;Description=Tetratricopeptide repeat 8 | accn|CP007172 HMMER phypat+PGL 2217433 2217694 1.4e-22 + . Parent=AX25_10960;Name=PF02517;Note=Abi;Description=CAAX protease self-immunity 9 | accn|CP007172 HMMER phypat+PGL 2286724 2286991 5.4999999999999999e-14 + . Parent=AX25_11300;Name=PF02517;Note=Abi;Description=CAAX protease self-immunity 10 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat+PGL/feat_gffs/Listeria_ivanovii_WSLC3009_Starch_hydrolysis_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|CP007172 HMMER phypat+PGL 51982 52327 4.7000000000000005e-26 - . Parent=AX25_00235;Name=PF04266;Note=ASCH;Description=ASCH domain 3 | accn|CP007172 HMMER phypat+PGL 165019 165232 2.8000000000000002e-13 + . Parent=AX25_00765;Name=PF00041;Note=fn3;Description=Fibronectin type III domain 4 | accn|CP007172 HMMER phypat+PGL 596500 596653 6.6000000000000004e-23 + . Parent=AX25_02885;Name=PF02229;Note=PC4;Description=Transcriptional Coactivator p15 (PC4) 5 | accn|CP007172 HMMER phypat+PGL 969790 970339 1.4999999999999997e-64 + . Parent=AX25_04710;Name=PF09323;Note=DUF1980;Description=Domain of unknown function (DUF1980) 6 | accn|CP007172 HMMER phypat+PGL 2044479 2044818 7.6999999999999999e-12 + . Parent=AX25_10140;Name=PF07694;Note=5TM-5TMR_LYT;Description=5TMR of 5TMR-LYT 7 | accn|CP007172 HMMER phypat+PGL 2150855 2151098 3.9e-28 - . Parent=AX25_10640;Name=PF06580;Note=His_kinase;Description=Histidine kinase 8 | accn|CP007172 HMMER phypat+PGL 2151579 2151807 4.3999999999999998e-29 - . Parent=AX25_10645;Name=PF04854;Note=DUF624;Description=Protein of unknown function, DUF624 9 | accn|CP007172 HMMER phypat+PGL 784981 785938 2.1999999999999997e-57 + . Parent=AX25_03830;Name=PF00962;Note=A_deaminase;Description=Adenosine/AMP deaminase 10 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat/feat_gffs/Listeria_grayi_DSM_20601_Catalase_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|NZ_GG692936 HMMER phypat 37208 37565 5.7999999999999993e-46 - . Parent=HMPREF0556_0041;Name=PF02803;Note=Thiolase_C;Description=Thiolase, C-terminal domain 3 | accn|NZ_GG692933 HMMER phypat 174489 174840 4.6999999999999999e-40 + . Parent=HMPREF0556_0255;Name=PF02803;Note=Thiolase_C;Description=Thiolase, C-terminal domain 4 | accn|NZ_GG692933 HMMER phypat 454293 454800 5.0999999999999995e-53 - . Parent=HMPREF0556_0543;Name=PF04264;Note=YceI;Description=YceI-like domain 5 | accn|NZ_GG692933 HMMER phypat 505800 506088 1.2e-29 + . Parent=HMPREF0556_0601;Name=PF04234;Note=CopC;Description=CopC domain 6 | accn|NZ_GG692933 HMMER phypat 511717 512557 6.9000000000000001e-61 - . Parent=HMPREF0556_0605;Name=PF02628;Note=COX15-CtaA;Description=Cytochrome oxidase assembly protein 7 | accn|NZ_GG692932 HMMER phypat 92529 93537 3.3999999999999997e-113 + . Parent=HMPREF0556_0969;Name=PF01208;Note=URO-D;Description=Uroporphyrinogen decarboxylase (URO-D) 8 | accn|NZ_GG692932 HMMER phypat 184824 185085 6.1999999999999996e-34 + . Parent=HMPREF0556_1060;Name=PF04241;Note=DUF423;Description=Protein of unknown function (DUF423) 9 | accn|NZ_GG692934 HMMER phypat 137926 139069 1.9999999999999993e-176 - . Parent=HMPREF0556_2392;Name=PF00199;Note=Catalase;Description=Catalase 10 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat/predictions_single-votes.txt: -------------------------------------------------------------------------------- 1 | Alkaline phosphatase Beta hemolysis Coagulase production Lipase Nitrate to nitrite Nitrite to gas Pyrrolidonyl-beta-naphthylamide Bile-susceptible Colistin-Polymyxin susceptible DNase Growth at 42°C Growth in 6.5% NaCl Growth in KCN Growth on MacConkey agar Growth on ordinary blood agar Mucate utilization Arginine dihydrolase Indole Lysine decarboxylase Ornithine decarboxylase Acetate utilization Citrate Malonate Tartrate utilization Gas from glucose Glucose fermenter Glucose oxidizer Methyl red Voges Proskauer Cellobiose D-Mannitol D-Mannose D-Sorbitol D-Xylose Esculin hydrolysis Glycerol Lactose L-Arabinose L-Rhamnose Maltose Melibiose myo-Inositol ONPG (beta galactosidase) Raffinose Salicin Starch hydrolysis Sucrose Trehalose Urea hydrolysis Bacillus or coccobacillus Coccus Coccus - clusters or groups predominate Coccus - pairs or chains predominate Gram negative Gram positive Motile Spore formation Yellow pigment Aerobe Anaerobe Capnophilic Facultative Catalase Oxidase Hydrogen sulfide Casein hydrolysis Gelatin hydrolysis 2 | Listeria_grayi_DSM_20601 4 0 5 0 0 0 0 0 0 3 5 5 5 0 5 0 2 0 0 0 5 4 5 0 0 5 0 0 5 0 4 3 0 0 5 0 5 0 0 5 0 0 0 0 5 5 0 5 0 2 1 0 0 0 5 5 0 0 0 0 0 5 5 0 0 0 0 3 | Listeria_ivanovii_WSLC3009 3 0 5 0 0 0 0 0 0 2 5 5 5 0 5 0 3 0 0 0 5 0 5 0 0 5 0 5 5 0 0 0 4 0 5 0 5 0 0 5 0 0 0 0 5 5 0 5 0 4 0 0 0 0 5 5 0 0 0 0 0 5 5 0 0 0 0 4 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat+PGL/predictions_majority-vote.txt: -------------------------------------------------------------------------------- 1 | Alkaline phosphatase Beta hemolysis Coagulase production Lipase Nitrate to nitrite Nitrite to gas Pyrrolidonyl-beta-naphthylamide Bile-susceptible Colistin-Polymyxin susceptible DNase Growth at 42°C Growth in 6.5% NaCl Growth in KCN Growth on MacConkey agar Growth on ordinary blood agar Mucate utilization Arginine dihydrolase Indole Lysine decarboxylase Ornithine decarboxylase Acetate utilization Citrate Malonate Tartrate utilization Gas from glucose Glucose fermenter Glucose oxidizer Methyl red Voges Proskauer Cellobiose D-Mannitol D-Mannose D-Sorbitol D-Xylose Esculin hydrolysis Glycerol Lactose L-Arabinose L-Rhamnose Maltose Melibiose myo-Inositol ONPG (beta galactosidase) Raffinose Salicin Starch hydrolysis Sucrose Trehalose Urea hydrolysis Bacillus or coccobacillus Coccus Coccus - clusters or groups predominate Coccus - pairs or chains predominate Gram negative Gram positive Motile Spore formation Yellow pigment Aerobe Anaerobe Capnophilic Facultative Catalase Oxidase Hydrogen sulfide Casein hydrolysis Gelatin hydrolysis 2 | Listeria_grayi_DSM_20601 1 0 1 0 0 0 0 1 0 0 1 1 0 0 1 0 1 0 0 0 0 1 1 0 0 1 0 0 1 0 0 1 0 0 1 0 1 0 0 1 0 0 0 0 1 1 1 1 0 1 0 0 0 0 1 1 0 0 0 0 0 1 1 0 0 0 0 3 | Listeria_ivanovii_WSLC3009 1 0 1 0 0 0 0 0 0 0 1 1 0 0 1 0 0 0 0 0 0 1 1 0 0 1 0 1 1 0 0 1 1 1 1 0 1 0 0 1 0 0 0 0 1 1 0 1 0 1 0 0 0 0 1 1 0 0 0 0 0 1 1 0 0 0 0 4 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat+PGL/predictions_single-votes.txt: -------------------------------------------------------------------------------- 1 | Alkaline phosphatase Beta hemolysis Coagulase production Lipase Nitrate to nitrite Nitrite to gas Pyrrolidonyl-beta-naphthylamide Bile-susceptible Colistin-Polymyxin susceptible DNase Growth at 42°C Growth in 6.5% NaCl Growth in KCN Growth on MacConkey agar Growth on ordinary blood agar Mucate utilization Arginine dihydrolase Indole Lysine decarboxylase Ornithine decarboxylase Acetate utilization Citrate Malonate Tartrate utilization Gas from glucose Glucose fermenter Glucose oxidizer Methyl red Voges Proskauer Cellobiose D-Mannitol D-Mannose D-Sorbitol D-Xylose Esculin hydrolysis Glycerol Lactose L-Arabinose L-Rhamnose Maltose Melibiose myo-Inositol ONPG (beta galactosidase) Raffinose Salicin Starch hydrolysis Sucrose Trehalose Urea hydrolysis Bacillus or coccobacillus Coccus Coccus - clusters or groups predominate Coccus - pairs or chains predominate Gram negative Gram positive Motile Spore formation Yellow pigment Aerobe Anaerobe Capnophilic Facultative Catalase Oxidase Hydrogen sulfide Casein hydrolysis Gelatin hydrolysis 2 | Listeria_grayi_DSM_20601 5 0 5 0 0 0 0 4 0 0 3 5 2 0 5 1 4 0 0 0 0 5 5 0 2 5 0 0 5 0 1 4 0 0 5 0 5 0 0 5 0 1 0 0 5 5 3 5 0 5 0 0 0 0 5 5 0 1 0 0 0 5 5 0 0 0 0 3 | Listeria_ivanovii_WSLC3009 5 0 5 0 0 0 0 1 0 0 5 5 2 0 5 0 2 0 0 0 0 4 5 2 2 5 0 5 5 0 0 3 3 4 5 0 5 0 0 5 0 0 0 0 5 5 0 5 0 5 0 0 1 0 5 5 0 0 0 0 0 5 5 0 0 0 0 4 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat/predictions_conservative-vote.txt: -------------------------------------------------------------------------------- 1 | Alkaline phosphatase Beta hemolysis Coagulase production Lipase Nitrate to nitrite Nitrite to gas Pyrrolidonyl-beta-naphthylamide Bile-susceptible Colistin-Polymyxin susceptible DNase Growth at 42°C Growth in 6.5% NaCl Growth in KCN Growth on MacConkey agar Growth on ordinary blood agar Mucate utilization Arginine dihydrolase Indole Lysine decarboxylase Ornithine decarboxylase Acetate utilization Citrate Malonate Tartrate utilization Gas from glucose Glucose fermenter Glucose oxidizer Methyl red Voges Proskauer Cellobiose D-Mannitol D-Mannose D-Sorbitol D-Xylose Esculin hydrolysis Glycerol Lactose L-Arabinose L-Rhamnose Maltose Melibiose myo-Inositol ONPG (beta galactosidase) Raffinose Salicin Starch hydrolysis Sucrose Trehalose Urea hydrolysis Bacillus or coccobacillus Coccus Coccus - clusters or groups predominate Coccus - pairs or chains predominate Gram negative Gram positive Motile Spore formation Yellow pigment Aerobe Anaerobe Capnophilic Facultative Catalase Oxidase Hydrogen sulfide Casein hydrolysis Gelatin hydrolysis 2 | Listeria_grayi_DSM_20601 0 0 1 0 0 0 0 0 0 0 1 1 1 0 1 0 0 0 0 0 1 0 1 0 0 1 0 0 1 0 0 0 0 0 1 0 1 0 0 1 0 0 0 0 1 1 0 1 0 0 0 0 0 0 1 1 0 0 0 0 0 1 1 0 0 0 0 3 | Listeria_ivanovii_WSLC3009 0 0 1 0 0 0 0 0 0 0 1 1 1 0 1 0 0 0 0 0 1 0 1 0 0 1 0 1 1 0 0 0 0 0 1 0 1 0 0 1 0 0 0 0 1 1 0 1 0 0 0 0 0 0 1 1 0 0 0 0 0 1 1 0 0 0 0 4 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat/predictions_majority-vote.txt: -------------------------------------------------------------------------------- 1 | Alkaline phosphatase Beta hemolysis Coagulase production Lipase Nitrate to nitrite Nitrite to gas Pyrrolidonyl-beta-naphthylamide Bile-susceptible Colistin-Polymyxin susceptible DNase Growth at 42°C Growth in 6.5% NaCl Growth in KCN Growth on MacConkey agar Growth on ordinary blood agar Mucate utilization Arginine dihydrolase Indole Lysine decarboxylase Ornithine decarboxylase Acetate utilization Citrate Malonate Tartrate utilization Gas from glucose Glucose fermenter Glucose oxidizer Methyl red Voges Proskauer Cellobiose D-Mannitol D-Mannose D-Sorbitol D-Xylose Esculin hydrolysis Glycerol Lactose L-Arabinose L-Rhamnose Maltose Melibiose myo-Inositol ONPG (beta galactosidase) Raffinose Salicin Starch hydrolysis Sucrose Trehalose Urea hydrolysis Bacillus or coccobacillus Coccus Coccus - clusters or groups predominate Coccus - pairs or chains predominate Gram negative Gram positive Motile Spore formation Yellow pigment Aerobe Anaerobe Capnophilic Facultative Catalase Oxidase Hydrogen sulfide Casein hydrolysis Gelatin hydrolysis 2 | Listeria_grayi_DSM_20601 1 0 1 0 0 0 0 0 0 1 1 1 1 0 1 0 0 0 0 0 1 1 1 0 0 1 0 0 1 0 1 1 0 0 1 0 1 0 0 1 0 0 0 0 1 1 0 1 0 0 0 0 0 0 1 1 0 0 0 0 0 1 1 0 0 0 0 3 | Listeria_ivanovii_WSLC3009 1 0 1 0 0 0 0 0 0 0 1 1 1 0 1 0 1 0 0 0 1 0 1 0 0 1 0 1 1 0 0 0 1 0 1 0 1 0 0 1 0 0 0 0 1 1 0 1 0 1 0 0 0 0 1 1 0 0 0 0 0 1 1 0 0 0 0 4 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat+PGL/predictions_conservative-vote.txt: -------------------------------------------------------------------------------- 1 | Alkaline phosphatase Beta hemolysis Coagulase production Lipase Nitrate to nitrite Nitrite to gas Pyrrolidonyl-beta-naphthylamide Bile-susceptible Colistin-Polymyxin susceptible DNase Growth at 42°C Growth in 6.5% NaCl Growth in KCN Growth on MacConkey agar Growth on ordinary blood agar Mucate utilization Arginine dihydrolase Indole Lysine decarboxylase Ornithine decarboxylase Acetate utilization Citrate Malonate Tartrate utilization Gas from glucose Glucose fermenter Glucose oxidizer Methyl red Voges Proskauer Cellobiose D-Mannitol D-Mannose D-Sorbitol D-Xylose Esculin hydrolysis Glycerol Lactose L-Arabinose L-Rhamnose Maltose Melibiose myo-Inositol ONPG (beta galactosidase) Raffinose Salicin Starch hydrolysis Sucrose Trehalose Urea hydrolysis Bacillus or coccobacillus Coccus Coccus - clusters or groups predominate Coccus - pairs or chains predominate Gram negative Gram positive Motile Spore formation Yellow pigment Aerobe Anaerobe Capnophilic Facultative Catalase Oxidase Hydrogen sulfide Casein hydrolysis Gelatin hydrolysis 2 | Listeria_grayi_DSM_20601 1 0 1 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 1 1 0 0 1 0 0 1 0 0 0 0 0 1 0 1 0 0 1 0 0 0 0 1 1 0 1 0 1 0 0 0 0 1 1 0 0 0 0 0 1 1 0 0 0 0 3 | Listeria_ivanovii_WSLC3009 1 0 1 0 0 0 0 0 0 0 1 1 0 0 1 0 0 0 0 0 0 0 1 0 0 1 0 1 1 0 0 0 0 0 1 0 1 0 0 1 0 0 0 0 1 1 0 1 0 1 0 0 0 0 1 1 0 0 0 0 0 1 1 0 0 0 0 4 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/predictions_single-votes_combined.txt: -------------------------------------------------------------------------------- 1 | Indole D-Xylose Starch hydrolysis Growth on MacConkey agar Catalase Gelatin hydrolysis Cellobiose Urea hydrolysis Growth at 42°C Glycerol Voges Proskauer Pyrrolidonyl-beta-naphthylamide Coccus - clusters or groups predominate Coccus Lipase D-Mannitol Trehalose Aerobe Lysine decarboxylase Motile Nitrite to gas Coccus - pairs or chains predominate Maltose Growth on ordinary blood agar Colistin-Polymyxin susceptible Gas from glucose Acetate utilization Melibiose Malonate Spore formation Yellow pigment DNase myo-Inositol Methyl red Nitrate to nitrite ONPG (beta galactosidase) D-Mannose Gram positive Anaerobe Growth in 6.5% NaCl Bile-susceptible Glucose fermenter Glucose oxidizer Gram negative Ornithine decarboxylase Salicin L-Arabinose Casein hydrolysis Capnophilic Oxidase Mucate utilization Alkaline phosphatase Beta hemolysis Lactose Growth in KCN Tartrate utilization Hydrogen sulfide Citrate Raffinose Facultative Esculin hydrolysis L-Rhamnose Bacillus or coccobacillus Sucrose D-Sorbitol Coagulase production Arginine dihydrolase 2 | Listeria_grayi_DSM_20601 0 0 10 0 10 0 0 0 8 0 10 0 0 1 0 5 10 0 0 10 0 0 10 10 0 2 5 0 10 0 1 3 1 0 0 0 7 10 0 10 4 10 0 0 0 10 0 0 0 0 1 9 0 10 7 0 0 9 0 10 10 0 7 3 0 10 6 3 | Listeria_ivanovii_WSLC3009 0 4 10 0 10 0 0 0 10 0 10 0 0 0 0 0 10 0 0 10 0 1 10 10 0 2 5 0 10 0 0 2 0 10 0 0 3 10 0 10 1 10 0 0 0 10 0 0 0 0 0 8 0 10 7 2 0 4 0 10 10 0 9 0 7 10 5 4 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat+PGL/feat_gffs/Listeria_grayi_DSM_20601_Growth_in_6.5%_NaCl_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|NZ_GG692936 HMMER phypat+PGL 10543 10681 1.2e-20 + . Parent=HMPREF0556_0012;Name=PF08338;Note=DUF1731;Description=Domain of unknown function (DUF1731) 3 | accn|NZ_GG692933 HMMER phypat+PGL 271478 272273 3.1999999999999993e-89 + . Parent=HMPREF0556_0359;Name=PF03881;Note=Fructosamin_kin;Description=Fructosamine kinase 4 | accn|NZ_GG692933 HMMER phypat+PGL 334758 334977 1.9999999999999998e-32 + . Parent=HMPREF0556_0416;Name=PF07831;Note=PYNP_C;Description=Pyrimidine nucleoside phosphorylase C-terminal domain 5 | accn|NZ_GG692933 HMMER phypat+PGL 658916 659174 5.5000000000000006e-17 - . Parent=HMPREF0556_0759;Name=PF13456;Note=RVT_3;Description=Reverse transcriptase-like 6 | accn|NZ_GG692932 HMMER phypat+PGL 340799 341327 1.8999999999999999e-69 - . Parent=HMPREF0556_1240;Name=PF13743;Note=Thioredoxin_5;Description=Thioredoxin 7 | accn|NZ_GG692932 HMMER phypat+PGL 434865 435468 9.3999999999999994e-78 + . Parent=HMPREF0556_1334;Name=PF02436;Note=PYC_OADA;Description=Conserved carboxylase domain 8 | accn|NZ_GG692932 HMMER phypat+PGL 568714 568801 3.7e-12 + . Parent=HMPREF0556_1469;Name=PF13653;Note=GDPD_2;Description=Glycerophosphoryl diester phosphodiesterase family 9 | accn|NZ_GG692935 HMMER phypat+PGL 186483 187467 4.9999999999999992e-73 + . Parent=HMPREF0556_2173;Name=PF00282;Note=Pyridoxal_deC;Description=Pyridoxal-dependent decarboxylase conserved domain 10 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/feature_track_commands.txt: -------------------------------------------------------------------------------- 1 | hmm2gff.py traitar_out/annotation/pfam/Listeria_ivanovii_WSLC3009_filtered_best.dat traitar_out/phenotype_prediction/phypat/feat_gffs/ Listeria_ivanovii_WSLC3009 /home/aaron/traitar/traitar/data/models/phypat.tar.gz 8556,8567,8549,8548,8558,8489,8536,8537,8540,8550,8545,8530,8566,8563,8510,8488,8477,8524,8534,8517,8498,8486,8562,8478 --gene_gff ./1457190.3.RefSeq.gff --gene_gff_type refseq 2 | hmm2gff.py traitar_out/annotation/pfam/Listeria_ivanovii_WSLC3009_filtered_best.dat traitar_out/phenotype_prediction/phypat/feat_gffs/ Listeria_ivanovii_WSLC3009 /home/aaron/traitar/traitar/data/models/phypat+PGL.tar.gz 8556,8567,8549,8548,8558,8489,8536,8537,8540,8550,8545,8530,8566,8563,8510,8488,8477,8524,8534,8517,8498,8486,8562,8478 --gene_gff ./1457190.3.RefSeq.gff --gene_gff_type refseq 3 | hmm2gff.py traitar_out/annotation/pfam/Listeria_grayi_DSM_20601_filtered_best.dat traitar_out/phenotype_prediction/phypat/feat_gffs/ Listeria_grayi_DSM_20601 /home/aaron/traitar/traitar/data/models/phypat.tar.gz 8556,8567,8494,8549,8548,8558,8489,8537,8526,8540,8550,8530,8533,8547,8563,8510,8488,8477,8524,8534,8498,8486,8562,8478 --gene_gff ./525367.9.RefSeq.gff --gene_gff_type refseq 4 | hmm2gff.py traitar_out/annotation/pfam/Listeria_grayi_DSM_20601_filtered_best.dat traitar_out/phenotype_prediction/phypat/feat_gffs/ Listeria_grayi_DSM_20601 /home/aaron/traitar/traitar/data/models/phypat+PGL.tar.gz 8556,8567,8494,8549,8548,8558,8489,8537,8526,8540,8550,8530,8533,8547,8563,8510,8488,8477,8524,8534,8498,8486,8562,8478 --gene_gff ./525367.9.RefSeq.gff --gene_gff_type refseq -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat/feat_gffs/Listeria_grayi_DSM_20601_Facultative_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|NZ_GG692936 HMMER phypat 4608 4803 1.2e-25 - . Parent=HMPREF0556_0005;Name=PF04167;Note=DUF402;Description=Protein of unknown function (DUF402) 3 | accn|NZ_GG692936 HMMER phypat 33971 34277 2.2000000000000002e-16 - . Parent=HMPREF0556_0039;Name=PF03610;Note=EIIA-man;Description=PTS system fructose IIA component 4 | accn|NZ_GG692933 HMMER phypat 115431 115746 5.3000000000000005e-17 + . Parent=HMPREF0556_0201;Name=PF03610;Note=EIIA-man;Description=PTS system fructose IIA component 5 | accn|NZ_GG692933 HMMER phypat 117359 117704 7.8999999999999989e-41 + . Parent=HMPREF0556_0203;Name=PF03610;Note=EIIA-man;Description=PTS system fructose IIA component 6 | accn|NZ_GG692933 HMMER phypat 335050 335260 6.8000000000000001e-14 + . Parent=HMPREF0556_0417;Name=PF08211;Note=dCMP_cyt_deam_2;Description=Cytidine and deoxycytidylate deaminase zinc-binding region 7 | accn|NZ_GG692933 HMMER phypat 416050 416767 4.7000000000000004e-61 + . Parent=HMPREF0556_0506;Name=PF01226;Note=Form_Nir_trans;Description=Formate/nitrite transporter 8 | accn|NZ_GG692932 HMMER phypat 881739 883500 1.6999999999999992e-212 - . Parent=HMPREF0556_1769;Name=PF02901;Note=PFL;Description=Pyruvate formate lyase 9 | accn|NZ_GG692932 HMMER phypat 1024139 1025051 1.2999999999999999e-122 - . Parent=HMPREF0556_1909;Name=PF08267;Note=Meth_synt_1;Description=Cobalamin-independent synthase, N-terminal domain 10 | accn|NZ_GG692934 HMMER phypat 80363 80681 1.1e-29 + . Parent=HMPREF0556_2339;Name=PF03610;Note=EIIA-man;Description=PTS system fructose IIA component 11 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/predictions_majority-vote_combined.txt: -------------------------------------------------------------------------------- 1 | Indole D-Xylose Starch hydrolysis Growth on MacConkey agar Catalase Gelatin hydrolysis Cellobiose Urea hydrolysis Growth at 42°C Glycerol Voges Proskauer Pyrrolidonyl-beta-naphthylamide Coccus - clusters or groups predominate Coccus Lipase D-Mannitol Trehalose Aerobe Lysine decarboxylase Motile Nitrite to gas Coccus - pairs or chains predominate Maltose Growth on ordinary blood agar Colistin-Polymyxin susceptible Gas from glucose Acetate utilization Melibiose Malonate Spore formation Yellow pigment DNase myo-Inositol Methyl red Nitrate to nitrite ONPG (beta galactosidase) D-Mannose Gram positive Anaerobe Growth in 6.5% NaCl Bile-susceptible Glucose fermenter Glucose oxidizer Gram negative Ornithine decarboxylase Salicin L-Arabinose Casein hydrolysis Capnophilic Oxidase Mucate utilization Alkaline phosphatase Beta hemolysis Lactose Growth in KCN Tartrate utilization Hydrogen sulfide Citrate Raffinose Facultative Esculin hydrolysis L-Rhamnose Bacillus or coccobacillus Sucrose D-Sorbitol Coagulase production Arginine dihydrolase 2 | Listeria_grayi_DSM_20601 0.0 0.0 3.0 0.0 3.0 0.0 0.0 0.0 3.0 0.0 3.0 0.0 0.0 0.0 0.0 1.0 3.0 0.0 0.0 3.0 0.0 0.0 3.0 3.0 0.0 0.0 1.0 0.0 3.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 3.0 3.0 0.0 3.0 2.0 3.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 3.0 1.0 0.0 0.0 3.0 0.0 3.0 3.0 0.0 2.0 2.0 0.0 3.0 2.0 3 | Listeria_ivanovii_WSLC3009 0.0 2.0 3.0 0.0 3.0 0.0 0.0 0.0 3.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 3.0 0.0 0.0 3.0 3.0 0.0 0.0 1.0 0.0 3.0 0.0 0.0 0.0 0.0 3.0 0.0 0.0 2.0 3.0 0.0 3.0 0.0 3.0 0.0 0.0 0.0 3.0 0.0 0.0 0.0 0.0 0.0 3.0 0.0 3.0 1.0 0.0 0.0 2.0 0.0 3.0 3.0 0.0 3.0 0.0 3.0 3.0 1.0 4 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat/feat_gffs/Listeria_ivanovii_WSLC3009_Growth_at_42°C_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|CP007172 HMMER phypat 101569 102724 5.5999999999999991e-95 + . Parent=AX25_00465;Name=PF02274;Note=Amidinotransf;Description=Amidinotransferase 3 | accn|CP007172 HMMER phypat 363004 363649 1.7000000000000001e-09 + . Parent=AX25_01685;Name=PF13483;Note=Lactamase_B_3;Description=Beta-lactamase superfamily domain 4 | accn|CP007172 HMMER phypat 1132737 1133799 6.6999999999999999e-87 + . Parent=AX25_05545;Name=PF04464;Note=Glyphos_transf;Description=CDP-Glycerol:Poly(glycerophosphate) glycerophosphotransferase 5 | accn|CP007172 HMMER phypat 1137167 1138277 2.9999999999999991e-121 + . Parent=AX25_05565;Name=PF04464;Note=Glyphos_transf;Description=CDP-Glycerol:Poly(glycerophosphate) glycerophosphotransferase 6 | accn|CP007172 HMMER phypat 1138364 1139429 1.2999999999999998e-78 + . Parent=AX25_05570;Name=PF04464;Note=Glyphos_transf;Description=CDP-Glycerol:Poly(glycerophosphate) glycerophosphotransferase 7 | accn|CP007172 HMMER phypat 1179084 1179507 1.2999999999999998e-63 + . Parent=AX25_05820;Name=PF10662;Note=PduV-EutP;Description=Ethanolamine utilisation - propanediol utilisation 8 | accn|CP007172 HMMER phypat 1530819 1531284 4.8e-10 - . Parent=AX25_07590;Name=PF10662;Note=PduV-EutP;Description=Ethanolamine utilisation - propanediol utilisation 9 | accn|CP007172 HMMER phypat 1653345 1653918 1.5999999999999998e-30 - . Parent=AX25_08175;Name=PF13483;Note=Lactamase_B_3;Description=Beta-lactamase superfamily domain 10 | accn|CP007172 HMMER phypat 2411024 2411606 2.3999999999999998e-28 + . Parent=AX25_11905;Name=PF01865;Note=PhoU_div;Description=Protein of unknown function DUF47 11 | -------------------------------------------------------------------------------- /traitar/get_external_data.py: -------------------------------------------------------------------------------- 1 | import urllib2 2 | import json 3 | import sys 4 | import os 5 | import gzip 6 | import traitar 7 | 8 | def download(args): 9 | """download Pfam HMMs and write download destination into config file""" 10 | attempts = 0 11 | if not args.local: 12 | while attempts < 3: 13 | try: 14 | if not os.path.exists(args.download): 15 | sys.stderr.write("directory %s doesn't exists; please create first\n" % args.download) 16 | sys.exit(0) 17 | response = urllib2.urlopen("ftp://ftp.ebi.ac.uk/pub/databases/Pfam/releases/Pfam27.0/Pfam-A.hmm.gz", timeout = 5) 18 | with open(os.path.join(args.download, "Pfam-A.hmm.gz"), 'w' ) as f: 19 | CHUNK = 1000000 20 | while True: 21 | chunk = response.read(CHUNK) 22 | if not chunk: 23 | break 24 | else: 25 | f.write(chunk) 26 | with gzip.open(os.path.join(args.download, "Pfam-A.hmm.gz"), 'rb') as zf: 27 | with open(os.path.join(args.download, "Pfam-A.hmm"), 'wb') as out_f: 28 | for l in zf: 29 | out_f.write(l) 30 | break 31 | except urllib2.URLError as e: 32 | attempts += 1 33 | print e 34 | with open(os.path.abspath(os.path.dirname(traitar.__file__)) + "/" + "config.json", 'w') as config: 35 | if not os.path.exists(os.path.join(args.download, "Pfam-A.hmm")): 36 | sys.exit("something went wrong; make sure %s contains Pfam-A.hmm" % args.download) 37 | config.write(json.dumps({"hmms": os.path.abspath(os.path.join(args.download))})) 38 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat+PGL/feat_gffs/Listeria_grayi_DSM_20601_Coagulase_production_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|NZ_GG692932 HMMER phypat+PGL 33791 35081 6.4999999999999975e-193 - . Parent=HMPREF0556_0912;Name=PF03051;Note=Peptidase_C1_2;Description=Peptidase C1-like family 3 | accn|NZ_GG692932 HMMER phypat+PGL 88819 89053 1.4000000000000001e-15 - . Parent=HMPREF0556_0964;Name=PF12732;Note=YtxH;Description=YtxH-like protein 4 | accn|NZ_GG692932 HMMER phypat+PGL 504864 505254 1.2999999999999999e-27 + . Parent=HMPREF0556_1402;Name=PF01138;Note=RNase_PH;Description=3' exoribonuclease family, domain 1 5 | accn|NZ_GG692932 HMMER phypat+PGL 505311 505509 4.0999999999999996e-14 + . Parent=HMPREF0556_1402;Name=PF03725;Note=RNase_PH_C;Description=3' exoribonuclease family, domain 2 6 | accn|NZ_GG692932 HMMER phypat+PGL 645391 645682 8.9999999999999995e-24 + . Parent=HMPREF0556_1541;Name=PF12732;Note=YtxH;Description=YtxH-like protein 7 | accn|NZ_GG692932 HMMER phypat+PGL 958093 958189 5.2999999999999996e-12 - . Parent=HMPREF0556_1843;Name=PF13014;Note=KH_3;Description=KH domain 8 | accn|NZ_GG692932 HMMER phypat+PGL 957112 957358 3.5000000000000002e-16 - . Parent=HMPREF0556_1843;Name=PF03726;Note=PNPase;Description=Polyribonucleotide nucleotidyltransferase, RNA binding domain 9 | accn|NZ_GG692932 HMMER phypat+PGL 956434 956821 5.5000000000000006e-24 - . Parent=HMPREF0556_1843;Name=PF01138;Note=RNase_PH;Description=3' exoribonuclease family, domain 1 10 | accn|NZ_GG692932 HMMER phypat+PGL 956830 957019 3.6000000000000001e-19 - . Parent=HMPREF0556_1843;Name=PF03725;Note=RNase_PH_C;Description=3' exoribonuclease family, domain 2 11 | accn|NZ_GG692935 HMMER phypat+PGL 248495 248693 9.9999999999999993e-23 - . Parent=HMPREF0556_2227;Name=PF13244;Note=DUF4040;Description=Domain of unknown function (DUF4040) 12 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat/feat_gffs/Listeria_grayi_DSM_20601_Gram_positive_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|NZ_GG692933 HMMER phypat 278191 278365 1.4000000000000002e-21 - . Parent=HMPREF0556_0364;Name=PF13556;Note=HTH_30;Description=PucR C-terminal helix-turn-helix domain 3 | accn|NZ_GG692933 HMMER phypat 361353 361782 8.9999999999999997e-54 + . Parent=HMPREF0556_0445;Name=PF03816;Note=LytR_cpsA_psr;Description=Cell envelope-related transcriptional attenuator domain 4 | accn|NZ_GG692933 HMMER phypat 419983 420397 6.2000000000000008e-26 + . Parent=HMPREF0556_0510;Name=PF04203;Note=Sortase;Description=Sortase family 5 | accn|NZ_GG692933 HMMER phypat 536286 536502 9.9999999999999993e-35 + . Parent=HMPREF0556_0632;Name=PF04472;Note=DUF552;Description=Protein of unknown function (DUF552) 6 | accn|NZ_GG692933 HMMER phypat 637981 638476 2.8999999999999996e-42 + . Parent=HMPREF0556_0736;Name=PF13307;Note=Helicase_C_2;Description=Helicase C-terminal domain 7 | accn|NZ_GG692932 HMMER phypat 312906 313266 6.1000000000000005e-30 + . Parent=HMPREF0556_1204;Name=PF04203;Note=Sortase;Description=Sortase family 8 | accn|NZ_GG692932 HMMER phypat 334583 335018 2.2000000000000001e-48 - . Parent=HMPREF0556_1233;Name=PF03816;Note=LytR_cpsA_psr;Description=Cell envelope-related transcriptional attenuator domain 9 | accn|NZ_GG692932 HMMER phypat 574022 574268 4.0999999999999998e-37 + . Parent=HMPREF0556_1475;Name=PF03951;Note=Gln-synt_N;Description=Glutamine synthetase, beta-Grasp domain 10 | accn|NZ_GG692935 HMMER phypat 63871 64327 4.5000000000000002e-49 - . Parent=HMPREF0556_2058;Name=PF03816;Note=LytR_cpsA_psr;Description=Cell envelope-related transcriptional attenuator domain 11 | accn|NZ_GG692934 HMMER phypat 101528 101678 2.7000000000000001e-17 + . Parent=HMPREF0556_2360;Name=PF13556;Note=HTH_30;Description=PucR C-terminal helix-turn-helix domain 12 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat+PGL/feat_gffs/Listeria_ivanovii_WSLC3009_Coagulase_production_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|CP007172 HMMER phypat+PGL 1203906 1204119 2.3999999999999998e-24 + . Parent=AX25_05965;Name=PF07568;Note=HisKA_2;Description=Histidine kinase 3 | accn|CP007172 HMMER phypat+PGL 1271836 1272223 5.0000000000000002e-27 + . Parent=AX25_06330;Name=PF01138;Note=RNase_PH;Description=3' exoribonuclease family, domain 1 4 | accn|CP007172 HMMER phypat+PGL 1272280 1272478 3.6999999999999999e-13 + . Parent=AX25_06330;Name=PF03725;Note=RNase_PH_C;Description=3' exoribonuclease family, domain 2 5 | accn|CP007172 HMMER phypat+PGL 1387407 1387488 4.3999999999999998e-12 + . Parent=AX25_06890;Name=PF13014;Note=KH_3;Description=KH domain 6 | accn|CP007172 HMMER phypat+PGL 1386426 1386672 3.7e-16 + . Parent=AX25_06890;Name=PF03726;Note=PNPase;Description=Polyribonucleotide nucleotidyltransferase, RNA binding domain 7 | accn|CP007172 HMMER phypat+PGL 1385748 1386135 5.4000000000000001e-24 + . Parent=AX25_06890;Name=PF01138;Note=RNase_PH;Description=3' exoribonuclease family, domain 1 8 | accn|CP007172 HMMER phypat+PGL 1386144 1386333 1.1000000000000002e-19 + . Parent=AX25_06890;Name=PF03725;Note=RNase_PH_C;Description=3' exoribonuclease family, domain 2 9 | accn|CP007172 HMMER phypat+PGL 1681874 1682165 1.3000000000000002e-18 - . Parent=AX25_08310;Name=PF12732;Note=YtxH;Description=YtxH-like protein 10 | accn|CP007172 HMMER phypat+PGL 2381324 2381615 8.9000000000000007e-16 + . Parent=AX25_11765;Name=PF12732;Note=YtxH;Description=YtxH-like protein 11 | accn|CP007172 HMMER phypat+PGL 2440584 2441883 9.3999999999999964e-206 + . Parent=AX25_12045;Name=PF03051;Note=Peptidase_C1_2;Description=Peptidase C1-like family 12 | accn|CP007172 HMMER phypat+PGL 2481592 2481790 3.4999999999999999e-23 + . Parent=AX25_12280;Name=PF13244;Note=DUF4040;Description=Domain of unknown function (DUF4040) 13 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat/feat_gffs/Listeria_ivanovii_WSLC3009_Gram_positive_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|CP007172 HMMER phypat 193593 194091 1.9999999999999999e-39 + . Parent=AX25_00900;Name=PF13307;Note=Helicase_C_2;Description=Helicase C-terminal domain 3 | accn|CP007172 HMMER phypat 456659 457088 2.2999999999999996e-54 + . Parent=AX25_02185;Name=PF03816;Note=LytR_cpsA_psr;Description=Cell envelope-related transcriptional attenuator domain 4 | accn|CP007172 HMMER phypat 464070 464244 4.5000000000000001e-20 + . Parent=AX25_02210;Name=PF13556;Note=HTH_30;Description=PucR C-terminal helix-turn-helix domain 5 | accn|CP007172 HMMER phypat 978036 978396 1.3999999999999999e-30 + . Parent=AX25_04750;Name=PF04203;Note=Sortase;Description=Sortase family 6 | accn|CP007172 HMMER phypat 1069806 1070235 1.4999999999999998e-43 - . Parent=AX25_05260;Name=PF03816;Note=LytR_cpsA_psr;Description=Cell envelope-related transcriptional attenuator domain 7 | accn|CP007172 HMMER phypat 1353274 1353520 1.6000000000000001e-36 + . Parent=AX25_06730;Name=PF03951;Note=Gln-synt_N;Description=Glutamine synthetase, beta-Grasp domain 8 | accn|CP007172 HMMER phypat 2033547 2034039 3.2e-40 - . Parent=AX25_10075;Name=PF13307;Note=Helicase_C_2;Description=Helicase C-terminal domain 9 | accn|CP007172 HMMER phypat 2172185 2172398 2.5999999999999995e-35 - . Parent=AX25_10725;Name=PF04472;Note=DUF552;Description=Protein of unknown function (DUF552) 10 | accn|CP007172 HMMER phypat 2344837 2345332 5.5999999999999999e-22 - . Parent=AX25_11585;Name=PF04203;Note=Sortase;Description=Sortase family 11 | accn|CP007172 HMMER phypat 2621343 2621799 6.5999999999999995e-51 + . Parent=AX25_13000;Name=PF03816;Note=LytR_cpsA_psr;Description=Cell envelope-related transcriptional attenuator domain 12 | accn|CP007172 HMMER phypat 2831166 2831601 9.9999999999999985e-48 - . Parent=AX25_14155;Name=PF03816;Note=LytR_cpsA_psr;Description=Cell envelope-related transcriptional attenuator domain 13 | accn|CP007172 HMMER phypat 2869060 2869213 9.9e-20 + . Parent=AX25_14360;Name=PF13556;Note=HTH_30;Description=PucR C-terminal helix-turn-helix domain 14 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat/feat_gffs/Listeria_ivanovii_WSLC3009_Growth_on_ordinary_blood_agar_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|CP007172 HMMER phypat 107832 108099 9.3000000000000012e-26 + . Parent=AX25_00505;Name=PF04397;Note=LytTR;Description=LytTr DNA-binding domain 3 | accn|CP007172 HMMER phypat 172895 173240 2.7000000000000005e-14 - . Parent=AX25_00800;Name=PF06114;Note=DUF955;Description=Domain of unknown function (DUF955) 4 | accn|CP007172 HMMER phypat 797669 798038 5.0999999999999992e-34 + . Parent=AX25_03890;Name=PF00781;Note=DAGK_cat;Description=Diacylglycerol kinase catalytic domain 5 | accn|CP007172 HMMER phypat 987151 987574 5.2999999999999994e-40 - . Parent=AX25_04805;Name=PF01451;Note=LMWPc;Description=Low molecular weight phosphotyrosine protein phosphatase 6 | accn|CP007172 HMMER phypat 1030468 1030753 1.2e-29 + . Parent=AX25_05050;Name=PF04397;Note=LytTR;Description=LytTr DNA-binding domain 7 | accn|CP007172 HMMER phypat 1878512 1878890 2.7000000000000002e-34 - . Parent=AX25_09305;Name=PF00781;Note=DAGK_cat;Description=Diacylglycerol kinase catalytic domain 8 | accn|CP007172 HMMER phypat 1982242 1982407 2.7999999999999996e-16 - . Parent=AX25_09810;Name=PF01471;Note=PG_binding_1;Description=Putative peptidoglycan binding domain 9 | accn|CP007172 HMMER phypat 2305272 2306166 3.5999999999999996e-84 + . Parent=AX25_11425;Name=PF03601;Note=Cons_hypoth698;Description=Conserved hypothetical protein 698 10 | accn|CP007172 HMMER phypat 2396123 2396483 5.2999999999999996e-13 + . Parent=AX25_11830;Name=PF01451;Note=LMWPc;Description=Low molecular weight phosphotyrosine protein phosphatase 11 | accn|CP007172 HMMER phypat 2641606 2641990 4.1999999999999998e-33 - . Parent=AX25_13110;Name=PF01451;Note=LMWPc;Description=Low molecular weight phosphotyrosine protein phosphatase 12 | accn|CP007172 HMMER phypat 2661552 2661900 4.5999999999999997e-31 - . Parent=AX25_13215;Name=PF00781;Note=DAGK_cat;Description=Diacylglycerol kinase catalytic domain 13 | accn|CP007172 HMMER phypat 2824411 2824798 1.6e-27 - . Parent=AX25_14130;Name=PF02706;Note=Wzz;Description=Chain length determinant protein 14 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat/feat_gffs/Listeria_grayi_DSM_20601_D-Mannose_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|NZ_GG692933 HMMER phypat 116716 117160 3.7999999999999997e-55 + . Parent=HMPREF0556_0202;Name=PF03830;Note=PTSIIB_sorb;Description=PTS system sorbose subfamily IIB component 3 | accn|NZ_GG692933 HMMER phypat 117842 118280 5.5999999999999996e-59 + . Parent=HMPREF0556_0203;Name=PF03830;Note=PTSIIB_sorb;Description=PTS system sorbose subfamily IIB component 4 | accn|NZ_GG692933 HMMER phypat 120962 121064 3.7000000000000003e-18 - . Parent=HMPREF0556_0207;Name=PF00367;Note=PTS_EIIB;Description=phosphotransferase system, EIIB 5 | accn|NZ_GG692933 HMMER phypat 209317 209416 6.2000000000000001e-18 + . Parent=HMPREF0556_0295;Name=PF00367;Note=PTS_EIIB;Description=phosphotransferase system, EIIB 6 | accn|NZ_GG692933 HMMER phypat 235378 235474 4.9000000000000001e-17 + . Parent=HMPREF0556_0319;Name=PF00367;Note=PTS_EIIB;Description=phosphotransferase system, EIIB 7 | accn|NZ_GG692932 HMMER phypat 136060 136153 3.5000000000000002e-14 + . Parent=HMPREF0556_1014;Name=PF00367;Note=PTS_EIIB;Description=phosphotransferase system, EIIB 8 | accn|NZ_GG692932 HMMER phypat 991034 991133 6.9000000000000009e-14 - . Parent=HMPREF0556_1878;Name=PF00367;Note=PTS_EIIB;Description=phosphotransferase system, EIIB 9 | accn|NZ_GG692935 HMMER phypat 157607 158789 2.7999999999999991e-139 + . Parent=HMPREF0556_2145;Name=PF03611;Note=EIIC-GAT;Description=PTS system sugar-specific permease component 10 | accn|NZ_GG692934 HMMER phypat 2718 3870 2.1999999999999996e-100 - . Parent=HMPREF0556_2245;Name=PF03611;Note=EIIC-GAT;Description=PTS system sugar-specific permease component 11 | accn|NZ_GG692934 HMMER phypat 44549 44624 2.3e-14 + . Parent=HMPREF0556_2303;Name=PF00367;Note=PTS_EIIB;Description=phosphotransferase system, EIIB 12 | accn|NZ_GG692934 HMMER phypat 180180 180282 5.3999999999999999e-20 + . Parent=HMPREF0556_2432;Name=PF00367;Note=PTS_EIIB;Description=phosphotransferase system, EIIB 13 | accn|NZ_GG692934 HMMER phypat 237179 237542 5.5999999999999999e-27 + . Parent=HMPREF0556_2493;Name=PF01232;Note=Mannitol_dh;Description=Mannitol dehydrogenase Rossmann domain 14 | -------------------------------------------------------------------------------- /traits.tsv: -------------------------------------------------------------------------------- 1 | accession category 2 | Alkaline phosphatase Enzyme 3 | Beta hemolysis Enzyme 4 | Coagulase production Enzyme 5 | Lipase Enzyme 6 | Nitrate to nitrite Enzyme 7 | Nitrite to gas Enzyme 8 | Pyrrolidonyl-beta-naphthylamide Enzyme 9 | Bile-susceptible Growth 10 | Colistin-Polymyxin susceptible Growth 11 | DNase Growth 12 | Growth at 42°C Growth 13 | Growth in 6.5% NaCl Growth 14 | Growth in KCN Growth 15 | Growth on MacConkey agar Growth 16 | Growth on ordinary blood agar Growth 17 | Mucate utilization Growth 18 | Arginine dihydrolase Growth: Amino acid 19 | Indole Growth: Amino acid 20 | Lysine decarboxylase Growth: Amino acid 21 | Ornithine decarboxylase Growth: Amino acid 22 | Acetate utilization Growth: Carboxylic acid 23 | Citrate Growth: Carboxylic acid 24 | Malonate Growth: Carboxylic acid 25 | Tartrate utilization Growth: Carboxylic acid 26 | Gas from glucose Growth: Glucose 27 | Glucose fermenter Growth: Glucose 28 | Glucose oxidizer Growth: Glucose 29 | Methyl red Growth: Glucose 30 | Voges Proskauer Growth: Glucose 31 | Cellobiose Growth: Sugar 32 | D-Mannitol Growth: Sugar 33 | D-Mannose Growth: Sugar 34 | D-Sorbitol Growth: Sugar 35 | D-Xylose Growth: Sugar 36 | Esculin hydrolysis Growth: Sugar 37 | Glycerol Growth: Sugar 38 | Lactose Growth: Sugar 39 | L-Arabinose Growth: Sugar 40 | L-Rhamnose Growth: Sugar 41 | Maltose Growth: Sugar 42 | Melibiose Growth: Sugar 43 | myo-Inositol Growth: Sugar 44 | ONPG (beta galactosidase) Growth: Sugar 45 | Raffinose Growth: Sugar 46 | Salicin Growth: Sugar 47 | Starch hydrolysis Growth: Sugar 48 | Sucrose Growth: Sugar 49 | Trehalose Growth: Sugar 50 | Urea hydrolysis Growth: Sugar 51 | Bacillus or coccobacillus Morphology 52 | Coccus Morphology 53 | Coccus - clusters or groups predominate Morphology 54 | Coccus - pairs or chains predominate Morphology 55 | Gram negative Morphology 56 | Gram positive Morphology 57 | Motile Morphology 58 | Spore formation Morphology 59 | Yellow pigment Morphology 60 | Aerobe Oxygen 61 | Anaerobe Oxygen 62 | Capnophilic Oxygen 63 | Facultative Oxygen 64 | Catalase Oxygen:Enzyme 65 | Oxidase Oxygen:Enzyme 66 | Hydrogen sulfide Product 67 | Casein hydrolysis Proteolysis 68 | Gelatin hydrolysis Proteolysis 69 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat+PGL/feat_gffs/Listeria_grayi_DSM_20601_Gram_positive_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|NZ_GG692933 HMMER phypat+PGL 361353 361782 8.9999999999999997e-54 + . Parent=HMPREF0556_0445;Name=PF03816;Note=LytR_cpsA_psr;Description=Cell envelope-related transcriptional attenuator domain 3 | accn|NZ_GG692933 HMMER phypat+PGL 419983 420397 6.2000000000000008e-26 + . Parent=HMPREF0556_0510;Name=PF04203;Note=Sortase;Description=Sortase family 4 | accn|NZ_GG692933 HMMER phypat+PGL 536286 536502 9.9999999999999993e-35 + . Parent=HMPREF0556_0632;Name=PF04472;Note=DUF552;Description=Protein of unknown function (DUF552) 5 | accn|NZ_GG692932 HMMER phypat+PGL 312906 313266 6.1000000000000005e-30 + . Parent=HMPREF0556_1204;Name=PF04203;Note=Sortase;Description=Sortase family 6 | accn|NZ_GG692932 HMMER phypat+PGL 334583 335018 2.2000000000000001e-48 - . Parent=HMPREF0556_1233;Name=PF03816;Note=LytR_cpsA_psr;Description=Cell envelope-related transcriptional attenuator domain 7 | accn|NZ_GG692932 HMMER phypat+PGL 574022 574268 4.0999999999999998e-37 + . Parent=HMPREF0556_1475;Name=PF03951;Note=Gln-synt_N;Description=Glutamine synthetase, beta-Grasp domain 8 | accn|NZ_GG692932 HMMER phypat+PGL 968838 969066 2.9999999999999998e-18 - . Parent=HMPREF0556_1857;Name=PF14480;Note=DNA_pol3_a_NI;Description=DNA polymerase III polC-type N-terminus I 9 | accn|NZ_GG692935 HMMER phypat+PGL 63871 64327 4.5000000000000002e-49 - . Parent=HMPREF0556_2058;Name=PF03816;Note=LytR_cpsA_psr;Description=Cell envelope-related transcriptional attenuator domain 10 | accn|NZ_GG692935 HMMER phypat+PGL 140111 140366 3.3999999999999998e-40 + . Parent=HMPREF0556_2129;Name=PF02650;Note=HTH_WhiA;Description=WhiA C-terminal HTH domain 11 | accn|NZ_GG692935 HMMER phypat+PGL 139826 140102 3.1e-41 + . Parent=HMPREF0556_2129;Name=PF14527;Note=LAGLIDADG_WhiA;Description=WhiA LAGLIDADG-like domain 12 | accn|NZ_GG692934 HMMER phypat+PGL 76140 76464 5.1999999999999995e-54 - . Parent=HMPREF0556_2334;Name=PF06153;Note=DUF970;Description=Protein of unknown function (DUF970) 13 | accn|NZ_GG692934 HMMER phypat+PGL 307439 307664 3.3999999999999997e-38 + . Parent=HMPREF0556_2569;Name=PF06257;Note=DUF1021;Description=Protein of unknown function (DUF1021) 14 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat+PGL/feat_gffs/Listeria_grayi_DSM_20601_Growth_at_42°C_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|NZ_GG692933 HMMER phypat+PGL 495176 495338 1.2e-15 - . Parent=HMPREF0556_0584;Name=PF14659;Note=Phage_int_SAM_3;Description=Phage integrase, N-terminal SAM-like domain 3 | accn|NZ_GG692933 HMMER phypat+PGL 679389 679632 3.7999999999999999e-33 + . Parent=HMPREF0556_0783;Name=PF02583;Note=Trns_repr_metal;Description=Metal-sensitive transcriptional repressor 4 | accn|NZ_GG692932 HMMER phypat+PGL 57888 58110 1.4999999999999998e-20 + . Parent=HMPREF0556_0932;Name=PF01883;Note=DUF59;Description=Domain of unknown function DUF59 5 | accn|NZ_GG692932 HMMER phypat+PGL 296628 296898 8.2999999999999998e-11 + . Parent=HMPREF0556_1185;Name=PF14635;Note=HHH_7;Description=Helix-hairpin-helix motif 6 | accn|NZ_GG692932 HMMER phypat+PGL 313467 313932 6.7000000000000001e-11 + . Parent=HMPREF0556_1205;Name=PF13483;Note=Lactamase_B_3;Description=Beta-lactamase superfamily domain 7 | accn|NZ_GG692932 HMMER phypat+PGL 599575 600373 2.2e-56 + . Parent=HMPREF0556_1496;Name=PF00176;Note=SNF2_N;Description=SNF2 family N-terminal domain 8 | accn|NZ_GG692932 HMMER phypat+PGL 635521 635749 1.3000000000000001e-20 + . Parent=HMPREF0556_1530;Name=PF02583;Note=Trns_repr_metal;Description=Metal-sensitive transcriptional repressor 9 | accn|NZ_GG692932 HMMER phypat+PGL 683270 683840 2.0000000000000002e-29 + . Parent=HMPREF0556_1578;Name=PF13483;Note=Lactamase_B_3;Description=Beta-lactamase superfamily domain 10 | accn|NZ_GG692932 HMMER phypat+PGL 925257 925446 6.8000000000000003e-10 - . Parent=HMPREF0556_1805;Name=PF02737;Note=3HCDH_N;Description=3-hydroxyacyl-CoA dehydrogenase, NAD binding domain 11 | accn|NZ_GG692934 HMMER phypat+PGL 16707 16869 1.3000000000000001e-09 + . Parent=HMPREF0556_2261;Name=PF14659;Note=Phage_int_SAM_3;Description=Phage integrase, N-terminal SAM-like domain 12 | accn|NZ_GG692934 HMMER phypat+PGL 187223 188375 5.6999999999999999e-91 + . Parent=HMPREF0556_2438;Name=PF02274;Note=Amidinotransf;Description=Amidinotransferase 13 | accn|NZ_GG692934 HMMER phypat+PGL 333503 333692 9.900000000000001e-10 + . Parent=HMPREF0556_2601;Name=PF02737;Note=3HCDH_N;Description=3-hydroxyacyl-CoA dehydrogenase, NAD binding domain 14 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat/feat_gffs/Listeria_grayi_DSM_20601_Esculin_hydrolysis_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|NZ_GG692936 HMMER phypat 46498 46705 2.0999999999999996e-25 - . Parent=HMPREF0556_0045;Name=PF14310;Note=Fn3-like;Description=Fibronectin type III-like domain 3 | accn|NZ_GG692933 HMMER phypat 315906 316113 6.0999999999999993e-17 + . Parent=HMPREF0556_0399;Name=PF14310;Note=Fn3-like;Description=Fibronectin type III-like domain 4 | accn|NZ_GG692933 HMMER phypat 408406 408559 6.4999999999999995e-11 - . Parent=HMPREF0556_0500;Name=PF06347;Note=SH3_4;Description=Bacterial SH3 domain 5 | accn|NZ_GG692933 HMMER phypat 558881 559535 1.5999999999999999e-93 - . Parent=HMPREF0556_0655;Name=PF03306;Note=AAL_decarboxy;Description=Alpha-acetolactate decarboxylase 6 | accn|NZ_GG692932 HMMER phypat 22359 22695 6.7999999999999993e-43 - . Parent=HMPREF0556_0901;Name=PF06993;Note=DUF1304;Description=Protein of unknown function (DUF1304) 7 | accn|NZ_GG692932 HMMER phypat 304644 305013 4.8999999999999999e-11 + . Parent=HMPREF0556_1197;Name=PF14526;Note=Cass2;Description=Integron-associated effector binding protein 8 | accn|NZ_GG692932 HMMER phypat 404632 405235 1.5e-50 + . Parent=HMPREF0556_1305;Name=PF03932;Note=CutC;Description=CutC family 9 | accn|NZ_GG692932 HMMER phypat 749536 749680 2.7e-11 - . Parent=HMPREF0556_1637;Name=PF06347;Note=SH3_4;Description=Bacterial SH3 domain 10 | accn|NZ_GG692932 HMMER phypat 820111 820576 9.300000000000001e-10 + . Parent=HMPREF0556_1707;Name=PF10662;Note=PduV-EutP;Description=Ethanolamine utilisation - propanediol utilisation 11 | accn|NZ_GG692935 HMMER phypat 107066 107267 2.1999999999999999e-15 + . Parent=HMPREF0556_2098;Name=PF13345;Note=DUF4098;Description=Domain of unknown function (DUF4098) 12 | accn|NZ_GG692935 HMMER phypat 108433 108655 1.8000000000000001e-15 + . Parent=HMPREF0556_2099;Name=PF13345;Note=DUF4098;Description=Domain of unknown function (DUF4098) 13 | accn|NZ_GG692935 HMMER phypat 141592 141955 9.9000000000000012e-19 + . Parent=HMPREF0556_2131;Name=PF14526;Note=Cass2;Description=Integron-associated effector binding protein 14 | accn|NZ_GG692935 HMMER phypat 179563 179830 1.3999999999999999e-24 - . Parent=HMPREF0556_2165;Name=PF05973;Note=Gp49;Description=Phage derived protein Gp49-like (DUF891) 15 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat+PGL/feat_gffs/Listeria_grayi_DSM_20601_Catalase_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|NZ_GG692936 HMMER phypat+PGL 10543 10681 1.2e-20 + . Parent=HMPREF0556_0012;Name=PF08338;Note=DUF1731;Description=Domain of unknown function (DUF1731) 3 | accn|NZ_GG692936 HMMER phypat+PGL 54764 55613 3.7999999999999992e-119 - . Parent=HMPREF0556_0052;Name=PF04898;Note=Glu_syn_central;Description=Glutamate synthase central domain 4 | accn|NZ_GG692936 HMMER phypat+PGL 55793 56171 1.5999999999999999e-37 - . Parent=HMPREF0556_0052;Name=PF01645;Note=Glu_synthase;Description=Conserved region in glutamate synthase 5 | accn|NZ_GG692933 HMMER phypat+PGL 454293 454800 5.0999999999999995e-53 - . Parent=HMPREF0556_0543;Name=PF04264;Note=YceI;Description=YceI-like domain 6 | accn|NZ_GG692933 HMMER phypat+PGL 464878 465154 6.7000000000000005e-21 + . Parent=HMPREF0556_0552;Name=PF13806;Note=Rieske_2;Description=Rieske-like [2Fe-2S] domain 7 | accn|NZ_GG692933 HMMER phypat+PGL 511717 512557 6.9000000000000001e-61 - . Parent=HMPREF0556_0605;Name=PF02628;Note=COX15-CtaA;Description=Cytochrome oxidase assembly protein 8 | accn|NZ_GG692932 HMMER phypat+PGL 113466 113790 1.0999999999999999e-44 + . Parent=HMPREF0556_0989;Name=PF03960;Note=ArsC;Description=ArsC family 9 | accn|NZ_GG692932 HMMER phypat+PGL 419739 420426 3.4999999999999993e-91 + . Parent=HMPREF0556_1320;Name=PF00198;Note=2-oxoacid_dh;Description=2-oxoacid dehydrogenases acyltransferase (catalytic domain) 10 | accn|NZ_GG692932 HMMER phypat+PGL 921959 922634 1.1e-83 - . Parent=HMPREF0556_1802;Name=PF00198;Note=2-oxoacid_dh;Description=2-oxoacid dehydrogenases acyltransferase (catalytic domain) 11 | accn|NZ_GG692935 HMMER phypat+PGL 3331 3649 6.3999999999999995e-38 + . Parent=HMPREF0556_1995;Name=PF14748;Note=P5CR_dimer;Description=Pyrroline-5-carboxylate reductase dimerisation 12 | accn|NZ_GG692935 HMMER phypat+PGL 199661 199985 6.8000000000000001e-34 + . Parent=HMPREF0556_2180;Name=PF03960;Note=ArsC;Description=ArsC family 13 | accn|NZ_GG692934 HMMER phypat+PGL 137926 139069 1.9999999999999993e-176 - . Parent=HMPREF0556_2392;Name=PF00199;Note=Catalase;Description=Catalase 14 | accn|NZ_GG692934 HMMER phypat+PGL 312658 313243 4.9999999999999999e-13 - . Parent=HMPREF0556_2575;Name=PF02230;Note=Abhydrolase_2;Description=Phospholipase/Carboxylesterase 15 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat+PGL/feat_gffs/Listeria_ivanovii_WSLC3009_Growth_in_6.5%_NaCl_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|CP007172 HMMER phypat+PGL 1013674 1014205 1.6999999999999997e-67 - . Parent=AX25_04955;Name=PF13743;Note=Thioredoxin_5;Description=Thioredoxin 3 | accn|CP007172 HMMER phypat+PGL 1118353 1118956 1.4999999999999997e-78 + . Parent=AX25_05500;Name=PF02436;Note=PYC_OADA;Description=Conserved carboxylase domain 4 | accn|CP007172 HMMER phypat+PGL 1192979 1193468 4.099999999999999e-57 + . Parent=AX25_05920;Name=PF01923;Note=Cob_adeno_trans;Description=Cobalamin adenosyltransferase 5 | accn|CP007172 HMMER phypat+PGL 1211714 1212182 5.0999999999999997e-30 + . Parent=AX25_06005;Name=PF01923;Note=Cob_adeno_trans;Description=Cobalamin adenosyltransferase 6 | accn|CP007172 HMMER phypat+PGL 1233023 1233512 4.5999999999999989e-64 + . Parent=AX25_06140;Name=PF01923;Note=Cob_adeno_trans;Description=Cobalamin adenosyltransferase 7 | accn|CP007172 HMMER phypat+PGL 1346622 1346703 1.7000000000000001e-10 + . Parent=AX25_06695;Name=PF13653;Note=GDPD_2;Description=Glycerophosphoryl diester phosphodiesterase family 8 | accn|CP007172 HMMER phypat+PGL 1811759 1811900 1.3000000000000002e-18 + . Parent=AX25_09005;Name=PF08338;Note=DUF1731;Description=Domain of unknown function (DUF1731) 9 | accn|CP007172 HMMER phypat+PGL 1889659 1890529 9.4999999999999993e-37 + . Parent=AX25_09345;Name=PF00209;Note=SNF;Description=Sodium:neurotransmitter symporter family 10 | accn|CP007172 HMMER phypat+PGL 2010246 2010498 1.5e-17 + . Parent=AX25_09960;Name=PF13456;Note=RVT_3;Description=Reverse transcriptase-like 11 | accn|CP007172 HMMER phypat+PGL 2132196 2132415 5.9999999999999998e-30 - . Parent=AX25_10550;Name=PF07831;Note=PYNP_C;Description=Pyrimidine nucleoside phosphorylase C-terminal domain 12 | accn|CP007172 HMMER phypat+PGL 2227380 2227959 4.0999999999999999e-12 - . Parent=AX25_11025;Name=PF03881;Note=Fructosamin_kin;Description=Fructosamine kinase 13 | accn|CP007172 HMMER phypat+PGL 2458903 2459878 2.6000000000000001e-74 + . Parent=AX25_12120;Name=PF00282;Note=Pyridoxal_deC;Description=Pyridoxal-dependent decarboxylase conserved domain 14 | accn|CP007172 HMMER phypat+PGL 2524959 2525940 5.3999999999999996e-70 - . Parent=AX25_12540;Name=PF00282;Note=Pyridoxal_deC;Description=Pyridoxal-dependent decarboxylase conserved domain 15 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat/feat_gffs/Listeria_grayi_DSM_20601_Growth_on_ordinary_blood_agar_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|NZ_GG692936 HMMER phypat 70750 71125 4.7000000000000003e-36 - . Parent=HMPREF0556_0071;Name=PF00781;Note=DAGK_cat;Description=Diacylglycerol kinase catalytic domain 3 | accn|NZ_GG692933 HMMER phypat 250847 251759 6.0999999999999987e-85 + . Parent=HMPREF0556_0337;Name=PF03601;Note=Cons_hypoth698;Description=Conserved hypothetical protein 698 4 | accn|NZ_GG692933 HMMER phypat 449903 450275 4.8999999999999993e-37 + . Parent=HMPREF0556_0537;Name=PF00781;Note=DAGK_cat;Description=Diacylglycerol kinase catalytic domain 5 | accn|NZ_GG692933 HMMER phypat 683478 683643 2.1000000000000001e-16 + . Parent=HMPREF0556_0786;Name=PF01471;Note=PG_binding_1;Description=Putative peptidoglycan binding domain 6 | accn|NZ_GG692932 HMMER phypat 77377 77755 9.1999999999999992e-11 - . Parent=HMPREF0556_0955;Name=PF01451;Note=LMWPc;Description=Low molecular weight phosphotyrosine protein phosphatase 7 | accn|NZ_GG692932 HMMER phypat 318673 319087 7.7999999999999996e-40 - . Parent=HMPREF0556_1212;Name=PF01451;Note=LMWPc;Description=Low molecular weight phosphotyrosine protein phosphatase 8 | accn|NZ_GG692932 HMMER phypat 438094 438343 2.3999999999999999e-13 - . Parent=HMPREF0556_1337;Name=PF04397;Note=LytTR;Description=LytTr DNA-binding domain 9 | accn|NZ_GG692932 HMMER phypat 511243 511642 2e-35 + . Parent=HMPREF0556_1410;Name=PF02706;Note=Wzz;Description=Chain length determinant protein 10 | accn|NZ_GG692935 HMMER phypat 44479 44821 5.8999999999999996e-23 + . Parent=HMPREF0556_2037;Name=PF01451;Note=LMWPc;Description=Low molecular weight phosphotyrosine protein phosphatase 11 | accn|NZ_GG692935 HMMER phypat 99820 100111 2.8e-27 - . Parent=HMPREF0556_2094;Name=PF04397;Note=LytTR;Description=LytTr DNA-binding domain 12 | accn|NZ_GG692934 HMMER phypat 192563 192830 1.1000000000000001e-27 + . Parent=HMPREF0556_2446;Name=PF04397;Note=LytTR;Description=LytTr DNA-binding domain 13 | accn|NZ_GG692934 HMMER phypat 217815 218130 3.1999999999999999e-18 - . Parent=HMPREF0556_2473;Name=PF06114;Note=DUF955;Description=Domain of unknown function (DUF955) 14 | accn|NZ_GG692934 HMMER phypat 239650 239977 1.8999999999999998e-32 - . Parent=HMPREF0556_2496;Name=PF01451;Note=LMWPc;Description=Low molecular weight phosphotyrosine protein phosphatase 15 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat+PGL/feat_gffs/Listeria_grayi_DSM_20601_Citrate_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|NZ_GG692936 HMMER phypat+PGL 92888 93122 7.1999999999999993e-33 - . Parent=HMPREF0556_0090;Name=PF10397;Note=ADSL_C;Description=Adenylosuccinate lyase C-terminus 3 | accn|NZ_GG692933 HMMER phypat+PGL 167486 167636 3.7000000000000007e-10 - . Parent=HMPREF0556_0248;Name=PF00135;Note=COesterase;Description=Carboxylesterase family 4 | accn|NZ_GG692933 HMMER phypat+PGL 189027 189177 8.400000000000001e-10 + . Parent=HMPREF0556_0278;Name=PF00135;Note=COesterase;Description=Carboxylesterase family 5 | accn|NZ_GG692933 HMMER phypat+PGL 207893 208181 5.0999999999999999e-29 - . Parent=HMPREF0556_0294;Name=PF02746;Note=MR_MLE_N;Description=Mandelate racemase / muconate lactonizing enzyme, N-terminal domain 6 | accn|NZ_GG692933 HMMER phypat+PGL 400555 400714 1.6000000000000001e-09 + . Parent=HMPREF0556_0492;Name=PF00135;Note=COesterase;Description=Carboxylesterase family 7 | accn|NZ_GG692933 HMMER phypat+PGL 477969 478329 3.4000000000000002e-13 + . Parent=HMPREF0556_0566;Name=PF00135;Note=COesterase;Description=Carboxylesterase family 8 | accn|NZ_GG692933 HMMER phypat+PGL 537965 538256 1.8e-37 + . Parent=HMPREF0556_0635;Name=PF02829;Note=3H;Description=3H domain 9 | accn|NZ_GG692932 HMMER phypat+PGL 16371 17031 1.3000000000000002e-60 - . Parent=HMPREF0556_0895;Name=PF11975;Note=Glyco_hydro_4C;Description=Family 4 glycosyl hydrolase C-terminal domain 10 | accn|NZ_GG692932 HMMER phypat+PGL 21522 22185 5.5999999999999989e-48 + . Parent=HMPREF0556_0900;Name=PF11975;Note=Glyco_hydro_4C;Description=Family 4 glycosyl hydrolase C-terminal domain 11 | accn|NZ_GG692932 HMMER phypat+PGL 691105 691612 8.2999999999999998e-11 + . Parent=HMPREF0556_1583;Name=PF06833;Note=MdcE;Description=Malonate decarboxylase gamma subunit (MdcE) 12 | accn|NZ_GG692935 HMMER phypat+PGL 61412 61733 1.2e-15 + . Parent=HMPREF0556_2056;Name=PF02746;Note=MR_MLE_N;Description=Mandelate racemase / muconate lactonizing enzyme, N-terminal domain 13 | accn|NZ_GG692935 HMMER phypat+PGL 239695 240079 1.3000000000000002e-10 - . Parent=HMPREF0556_2217;Name=PF13434;Note=K_oxygenase;Description=L-lysine 6-monooxygenase (NADPH-requiring) 14 | accn|NZ_GG692934 HMMER phypat+PGL 312658 313243 4.9999999999999999e-13 - . Parent=HMPREF0556_2575;Name=PF02230;Note=Abhydrolase_2;Description=Phospholipase/Carboxylesterase 15 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat+PGL/feat_gffs/Listeria_ivanovii_WSLC3009_Gram_positive_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|CP007172 HMMER phypat+PGL 230901 231126 4.9000000000000004e-38 + . Parent=AX25_01065;Name=PF06257;Note=DUF1021;Description=Protein of unknown function (DUF1021) 3 | accn|CP007172 HMMER phypat+PGL 456659 457088 2.2999999999999996e-54 + . Parent=AX25_02185;Name=PF03816;Note=LytR_cpsA_psr;Description=Cell envelope-related transcriptional attenuator domain 4 | accn|CP007172 HMMER phypat+PGL 536207 536552 4.4999999999999995e-32 - . Parent=AX25_02595;Name=PF07739;Note=TipAS;Description=TipAS antibiotic-recognition domain 5 | accn|CP007172 HMMER phypat+PGL 978036 978396 1.3999999999999999e-30 + . Parent=AX25_04750;Name=PF04203;Note=Sortase;Description=Sortase family 6 | accn|CP007172 HMMER phypat+PGL 1069806 1070235 1.4999999999999998e-43 - . Parent=AX25_05260;Name=PF03816;Note=LytR_cpsA_psr;Description=Cell envelope-related transcriptional attenuator domain 7 | accn|CP007172 HMMER phypat+PGL 1353274 1353520 1.6000000000000001e-36 + . Parent=AX25_06730;Name=PF03951;Note=Gln-synt_N;Description=Glutamine synthetase, beta-Grasp domain 8 | accn|CP007172 HMMER phypat+PGL 1373339 1373561 6.8999999999999994e-23 + . Parent=AX25_06835;Name=PF14480;Note=DNA_pol3_a_NI;Description=DNA polymerase III polC-type N-terminus I 9 | accn|CP007172 HMMER phypat+PGL 2172185 2172398 2.5999999999999995e-35 - . Parent=AX25_10725;Name=PF04472;Note=DUF552;Description=Protein of unknown function (DUF552) 10 | accn|CP007172 HMMER phypat+PGL 2344837 2345332 5.5999999999999999e-22 - . Parent=AX25_11585;Name=PF04203;Note=Sortase;Description=Sortase family 11 | accn|CP007172 HMMER phypat+PGL 2570550 2570805 1.3999999999999997e-38 - . Parent=AX25_12745;Name=PF02650;Note=HTH_WhiA;Description=WhiA C-terminal HTH domain 12 | accn|CP007172 HMMER phypat+PGL 2570265 2570541 1e-41 - . Parent=AX25_12745;Name=PF14527;Note=LAGLIDADG_WhiA;Description=WhiA LAGLIDADG-like domain 13 | accn|CP007172 HMMER phypat+PGL 2621343 2621799 6.5999999999999995e-51 + . Parent=AX25_13000;Name=PF03816;Note=LytR_cpsA_psr;Description=Cell envelope-related transcriptional attenuator domain 14 | accn|CP007172 HMMER phypat+PGL 2799040 2799364 1.3999999999999998e-54 - . Parent=AX25_13995;Name=PF06153;Note=DUF970;Description=Protein of unknown function (DUF970) 15 | accn|CP007172 HMMER phypat+PGL 2831166 2831601 9.9999999999999985e-48 - . Parent=AX25_14155;Name=PF03816;Note=LytR_cpsA_psr;Description=Cell envelope-related transcriptional attenuator domain 16 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat/feat_gffs/Listeria_ivanovii_WSLC3009_Facultative_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|CP007172 HMMER phypat 153233 153578 5.7999999999999991e-39 + . Parent=AX25_00705;Name=PF03610;Note=EIIA-man;Description=PTS system fructose IIA component 3 | accn|CP007172 HMMER phypat 305073 305376 7.1999999999999999e-32 + . Parent=AX25_01415;Name=PF03610;Note=EIIA-man;Description=PTS system fructose IIA component 4 | accn|CP007172 HMMER phypat 585910 586861 2.5e-46 + . Parent=AX25_02840;Name=PF04245;Note=NA37;Description=37-kD nucleoid-associated bacterial protein 5 | accn|CP007172 HMMER phypat 614393 615107 5.5000000000000008e-60 + . Parent=AX25_02955;Name=PF01226;Note=Form_Nir_trans;Description=Formate/nitrite transporter 6 | accn|CP007172 HMMER phypat 805886 806225 2.3999999999999999e-37 - . Parent=AX25_03950;Name=PF03610;Note=EIIA-man;Description=PTS system fructose IIA component 7 | accn|CP007172 HMMER phypat 808379 808688 5.9000000000000001e-13 - . Parent=AX25_03955;Name=PF03610;Note=EIIA-man;Description=PTS system fructose IIA component 8 | accn|CP007172 HMMER phypat 958797 959532 3.2999999999999996e-87 + . Parent=AX25_04665;Name=PF01226;Note=Form_Nir_trans;Description=Formate/nitrite transporter 9 | accn|CP007172 HMMER phypat 1462276 1464034 3.5999999999999986e-211 + . Parent=AX25_07275;Name=PF02901;Note=PFL;Description=Pyruvate formate lyase 10 | accn|CP007172 HMMER phypat 1531703 1531940 8.7000000000000003e-13 - . Parent=AX25_07595;Name=PF08211;Note=dCMP_cyt_deam_2;Description=Cytidine and deoxycytidylate deaminase zinc-binding region 11 | accn|CP007172 HMMER phypat 1788797 1789724 1.4999999999999998e-123 - . Parent=AX25_08815;Name=PF08267;Note=Meth_synt_1;Description=Cobalamin-independent synthase, N-terminal domain 12 | accn|CP007172 HMMER phypat 1804276 1804471 1.6000000000000001e-25 - . Parent=AX25_08965;Name=PF04167;Note=DUF402;Description=Protein of unknown function (DUF402) 13 | accn|CP007172 HMMER phypat 1838406 1838706 3.9e-13 - . Parent=AX25_09140;Name=PF03610;Note=EIIA-man;Description=PTS system fructose IIA component 14 | accn|CP007172 HMMER phypat 2044479 2044818 7.6999999999999999e-12 + . Parent=AX25_10140;Name=PF07694;Note=5TM-5TMR_LYT;Description=5TMR of 5TMR-LYT 15 | accn|CP007172 HMMER phypat 2050250 2052026 8.1999999999999975e-165 + . Parent=AX25_10165;Name=PF02901;Note=PFL;Description=Pyruvate formate lyase 16 | accn|CP007172 HMMER phypat 2135493 2135829 3.7000000000000002e-28 - . Parent=AX25_10570;Name=PF03610;Note=EIIA-man;Description=PTS system fructose IIA component 17 | accn|CP007172 HMMER phypat 2805647 2805965 6.5999999999999998e-31 + . Parent=AX25_14035;Name=PF03610;Note=EIIA-man;Description=PTS system fructose IIA component 18 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat+PGL/feat_gffs/Listeria_ivanovii_WSLC3009_Catalase_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|CP007172 HMMER phypat+PGL 417260 417575 3.6e-46 - . Parent=AX25_02010;Name=PF14748;Note=P5CR_dimer;Description=Pyrroline-5-carboxylate reductase dimerisation 3 | accn|CP007172 HMMER phypat+PGL 596879 597293 1.6999999999999999e-19 + . Parent=AX25_02890;Name=PF02230;Note=Abhydrolase_2;Description=Phospholipase/Carboxylesterase 4 | accn|CP007172 HMMER phypat+PGL 790095 790668 5.7000000000000003e-11 + . Parent=AX25_03850;Name=PF02230;Note=Abhydrolase_2;Description=Phospholipase/Carboxylesterase 5 | accn|CP007172 HMMER phypat+PGL 822340 822847 1.6e-52 - . Parent=AX25_04005;Name=PF04264;Note=YceI;Description=YceI-like domain 6 | accn|CP007172 HMMER phypat+PGL 1097013 1097700 3.6999999999999997e-90 + . Parent=AX25_05400;Name=PF00198;Note=2-oxoacid_dh;Description=2-oxoacid dehydrogenases acyltransferase (catalytic domain) 7 | accn|CP007172 HMMER phypat+PGL 1425108 1425777 9.4999999999999985e-82 + . Parent=AX25_07105;Name=PF00198;Note=2-oxoacid_dh;Description=2-oxoacid dehydrogenases acyltransferase (catalytic domain) 8 | accn|CP007172 HMMER phypat+PGL 1439514 1439817 7.1e-16 - . Parent=AX25_07175;Name=PF14748;Note=P5CR_dimer;Description=Pyrroline-5-carboxylate reductase dimerisation 9 | accn|CP007172 HMMER phypat+PGL 1811759 1811900 1.3000000000000002e-18 + . Parent=AX25_09005;Name=PF08338;Note=DUF1731;Description=Domain of unknown function (DUF1731) 10 | accn|CP007172 HMMER phypat+PGL 1856790 1857639 2.599999999999999e-112 - . Parent=AX25_09205;Name=PF04898;Note=Glu_syn_central;Description=Glutamate synthase central domain 11 | accn|CP007172 HMMER phypat+PGL 1857819 1858923 5.399999999999998e-159 - . Parent=AX25_09205;Name=PF01645;Note=Glu_synthase;Description=Conserved region in glutamate synthase 12 | accn|CP007172 HMMER phypat+PGL 2200814 2201657 1.0999999999999998e-63 + . Parent=AX25_10870;Name=PF02628;Note=COX15-CtaA;Description=Cytochrome oxidase assembly protein 13 | accn|CP007172 HMMER phypat+PGL 2315547 2316195 2.2999999999999999e-52 - . Parent=AX25_11490;Name=PF06283;Note=ThuA;Description=Trehalose utilisation 14 | accn|CP007172 HMMER phypat+PGL 2355151 2355475 5.999999999999999e-45 - . Parent=AX25_11635;Name=PF03960;Note=ArsC;Description=ArsC family 15 | accn|CP007172 HMMER phypat+PGL 2517718 2518045 9.1000000000000004e-35 - . Parent=AX25_12500;Name=PF03960;Note=ArsC;Description=ArsC family 16 | accn|CP007172 HMMER phypat+PGL 2915692 2915986 7.0999999999999982e-45 - . Parent=AX25_14595;Name=PF00199;Note=Catalase;Description=Catalase 17 | accn|CP007172 HMMER phypat+PGL 2916125 2916515 2.5000000000000001e-60 - . Parent=AX25_14600;Name=PF00199;Note=Catalase;Description=Catalase 18 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat/feat_gffs/Listeria_ivanovii_WSLC3009_Maltose_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|CP007172 HMMER phypat 33936 34089 1.8e-09 - . Parent=AX25_00175;Name=PF03633;Note=Glyco_hydro_65C;Description=Glycosyl hydrolase family 65, C-terminal domain 3 | accn|CP007172 HMMER phypat 137112 137520 1.5999999999999999e-38 - . Parent=AX25_00640;Name=PF07702;Note=UTRA;Description=UTRA domain 4 | accn|CP007172 HMMER phypat 264898 265066 9.3000000000000014e-18 + . Parent=AX25_01215;Name=PF11734;Note=TilS_C;Description=TilS substrate C-terminal domain 5 | accn|CP007172 HMMER phypat 392576 392975 6.6000000000000001e-35 - . Parent=AX25_01865;Name=PF07702;Note=UTRA;Description=UTRA domain 6 | accn|CP007172 HMMER phypat 590281 590689 2.7000000000000001e-40 + . Parent=AX25_02855;Name=PF07702;Note=UTRA;Description=UTRA domain 7 | accn|CP007172 HMMER phypat 950429 950825 1.6999999999999999e-40 + . Parent=AX25_04605;Name=PF07702;Note=UTRA;Description=UTRA domain 8 | accn|CP007172 HMMER phypat 1007768 1008182 9.7999999999999998e-43 + . Parent=AX25_04925;Name=PF07702;Note=UTRA;Description=UTRA domain 9 | accn|CP007172 HMMER phypat 1307774 1308185 2.2000000000000001e-33 + . Parent=AX25_06510;Name=PF07702;Note=UTRA;Description=UTRA domain 10 | accn|CP007172 HMMER phypat 1467528 1467867 8.8000000000000004e-14 + . Parent=AX25_07295;Name=PF11188;Note=DUF2975;Description=Protein of unknown function (DUF2975) 11 | accn|CP007172 HMMER phypat 1694455 1695190 5.0000000000000002e-63 - . Parent=AX25_08375;Name=PF01915;Note=Glyco_hydro_3_C;Description=Glycosyl hydrolase family 3 C-terminal domain 12 | accn|CP007172 HMMER phypat 1849330 1850020 1.4999999999999999e-53 - . Parent=AX25_09180;Name=PF01915;Note=Glyco_hydro_3_C;Description=Glycosyl hydrolase family 3 C-terminal domain 13 | accn|CP007172 HMMER phypat 2015284 2016757 2.299999999999999e-192 - . Parent=AX25_09990;Name=PF02074;Note=Peptidase_M32;Description=Carboxypeptidase Taq (M32) metallopeptidase 14 | accn|CP007172 HMMER phypat 2140440 2140803 9.9999999999999998e-13 - . Parent=AX25_10600;Name=PF07702;Note=UTRA;Description=UTRA domain 15 | accn|CP007172 HMMER phypat 2141129 2141528 2.7999999999999995e-34 - . Parent=AX25_10605;Name=PF07702;Note=UTRA;Description=UTRA domain 16 | accn|CP007172 HMMER phypat 2279595 2279742 6.4999999999999999e-15 - . Parent=AX25_11270;Name=PF03633;Note=Glyco_hydro_65C;Description=Glycosyl hydrolase family 65, C-terminal domain 17 | accn|CP007172 HMMER phypat 2658923 2659463 2.0999999999999999e-17 - . Parent=AX25_13205;Name=PF13439;Note=Glyco_transf_4;Description=Glycosyltransferase Family 4 18 | accn|CP007172 HMMER phypat 2840378 2840789 7.6000000000000004e-39 - . Parent=AX25_14200;Name=PF07702;Note=UTRA;Description=UTRA domain 19 | accn|CP007172 HMMER phypat 2816438 2816984 6.7000000000000001e-12 - . Parent=AX25_14095;Name=PF13439;Note=Glyco_transf_4;Description=Glycosyltransferase Family 4 20 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat+PGL/feat_gffs/Listeria_ivanovii_WSLC3009_Alkaline_phosphatase_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|CP007172 HMMER phypat+PGL 29795 29918 2.7e-11 - . Parent=AX25_00160;Name=PF13905;Note=Thioredoxin_8;Description=Thioredoxin-like 3 | accn|CP007172 HMMER phypat+PGL 177278 177740 1.6999999999999999e-40 - . Parent=AX25_00830;Name=PF02872;Note=5_nucleotid_C;Description=5'-nucleotidase, C-terminal domain 4 | accn|CP007172 HMMER phypat+PGL 241414 241639 2.7000000000000005e-14 - . Parent=AX25_01120;Name=PF13545;Note=HTH_Crp_2;Description=Crp-like helix-turn-helix domain 5 | accn|CP007172 HMMER phypat+PGL 620510 620711 4.7000000000000002e-13 - . Parent=AX25_02980;Name=PF13545;Note=HTH_Crp_2;Description=Crp-like helix-turn-helix domain 6 | accn|CP007172 HMMER phypat+PGL 779433 779604 9.5999999999999995e-14 + . Parent=AX25_03785;Name=PF13545;Note=HTH_Crp_2;Description=Crp-like helix-turn-helix domain 7 | accn|CP007172 HMMER phypat+PGL 1217565 1218120 7.1999999999999999e-17 + . Parent=AX25_06050;Name=PF13500;Note=AAA_26;Description=AAA domain 8 | accn|CP007172 HMMER phypat+PGL 1231524 1232199 6.0000000000000002e-27 + . Parent=AX25_06135;Name=PF13500;Note=AAA_26;Description=AAA domain 9 | accn|CP007172 HMMER phypat+PGL 1266397 1266526 1.8000000000000002e-10 + . Parent=AX25_06305;Name=PF13905;Note=Thioredoxin_8;Description=Thioredoxin-like 10 | accn|CP007172 HMMER phypat+PGL 1305495 1305705 1.2e-16 + . Parent=AX25_06480;Name=PF13545;Note=HTH_Crp_2;Description=Crp-like helix-turn-helix domain 11 | accn|CP007172 HMMER phypat+PGL 1531703 1531940 8.7000000000000003e-13 - . Parent=AX25_07595;Name=PF08211;Note=dCMP_cyt_deam_2;Description=Cytidine and deoxycytidylate deaminase zinc-binding region 12 | accn|CP007172 HMMER phypat+PGL 1553144 1553444 2.7e-11 - . Parent=AX25_07710;Name=PF03848;Note=TehB;Description=Tellurite resistance protein TehB 13 | accn|CP007172 HMMER phypat+PGL 1655853 1656300 1.4999999999999999e-53 - . Parent=AX25_08185;Name=PF01262;Note=AlaDh_PNT_C;Description=Alanine dehydrogenase/PNT, C-terminal domain 14 | accn|CP007172 HMMER phypat+PGL 2293533 2293683 7.3000000000000002e-13 + . Parent=AX25_11360;Name=PF13545;Note=HTH_Crp_2;Description=Crp-like helix-turn-helix domain 15 | accn|CP007172 HMMER phypat+PGL 2320519 2320732 1.3000000000000001e-09 - . Parent=AX25_11515;Name=PF13545;Note=HTH_Crp_2;Description=Crp-like helix-turn-helix domain 16 | accn|CP007172 HMMER phypat+PGL 2500086 2500479 4.4000000000000002e-14 - . Parent=AX25_12400;Name=PF02872;Note=5_nucleotid_C;Description=5'-nucleotidase, C-terminal domain 17 | accn|CP007172 HMMER phypat+PGL 2812373 2812922 8.0999999999999998e-12 - . Parent=AX25_14080;Name=PF13519;Note=VWA_2;Description=von Willebrand factor type A domain 18 | accn|CP007172 HMMER phypat+PGL 2877259 2877472 1.6000000000000002e-10 - . Parent=AX25_14415;Name=PF13545;Note=HTH_Crp_2;Description=Crp-like helix-turn-helix domain 19 | -------------------------------------------------------------------------------- /traitar/domtblout2gene_generic.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """script to create a summary matrix and a gene2hmm mapping from filtered and aggregated hmmer output files""" 3 | from traitar.PhenotypeCollection import PhenotypeCollection 4 | import pandas as ps 5 | def gene2hmm(domtblout_fs, pt_models, gene2hmm_out = None, is_gene2hmm = False): 6 | """function to create a summary matrix and a gene2hmm mapping from filtered and aggregated hmmer output files""" 7 | #read accession file 8 | accs = pt_models.get_pf2desc() 9 | f = open(domtblout_fs, 'r') 10 | domtblout_list = [i.strip() for i in f.readlines()] 11 | f.close() 12 | sum_df = ps.DataFrame(ps.np.zeros((len(domtblout_list), accs.shape[0]))) 13 | #set index to the actual files by getting rid of the preceding path 14 | sum_df.index = [i.split("/")[-1].replace("_filtered_best.dat", "") for i in domtblout_list] 15 | sum_df.columns = accs.index 16 | #create a gene2hmm dictionary 17 | gene2hmm = {} 18 | for f in domtblout_list: 19 | gene_df = ps.read_csv(f, sep = "\t") 20 | if not f in gene2hmm: 21 | f_mod = f.split("/")[-1].replace("_filtered_best.dat", "") 22 | gene2hmm[f_mod] = {} 23 | for i in gene_df.index: 24 | #append genes to gene2hmm dict 25 | gene = gene_df.loc[i, 'target name'] 26 | if pt_models.get_hmm_name() == "pfam": 27 | query = gene_df.iloc[i, 4].split('.')[0] 28 | if pt_models.get_hmm_name() == "dbcan": 29 | query = gene_df.iloc[i, 3].split('.')[0] 30 | if is_gene2hmm: 31 | if gene not in gene2hmm[f_mod]: 32 | gene2hmm[f_mod][gene] = [query] 33 | else: 34 | gene2hmm[f_mod][gene].append(query) 35 | #add annotations to summary file 36 | try: 37 | sum_df.loc[f_mod, query] += 1 38 | except KeyError: 39 | print query 40 | #write annotation summary to disk 41 | if not is_gene2hmm: 42 | sum_df.to_csv(gene2hmm_out, sep = "\t") 43 | #write gene2hmm to disk 44 | #for i in gene2hmm: 45 | # with open(gene2hmm_out, 'w') as out: 46 | # out.write("%s\t%s\n" (i, ",".join(gene2hmm[i]))) 47 | return sum_df, gene2hmm 48 | if __name__ == "__main__": 49 | import argparse 50 | parser = argparse.ArgumentParser("generate summary matrix from the filtered best hmmer annotation files") 51 | parser.add_argument("outfile", help='summary matrix output file') 52 | parser.add_argument("in_filtered_best_fs", help='file with filtered.best file name per row') 53 | parser.add_argument("archive_f", help = "phenotype archive file") 54 | args = parser.parse_args() 55 | pt_models = PhenotypeCollection(args.archive_f) 56 | sum_df, gene2hmm = gene2hmm(args.in_filtered_best_fs, pt_models, gene2hmm_out = args.outfile) 57 | -------------------------------------------------------------------------------- /INSTALL.md: -------------------------------------------------------------------------------- 1 | # Installation 2 | traitar is available for Linux via the python packaging index. We tested the software under Ubuntu 14.04 LTS (trusty). We didn't test older versions but these might work as well. 3 | Prior to installation with pip make sure the following packages are installed by running 4 | 5 | ``sudo apt-get install python-scipy python-matplotlib python-pip python-pandas`` 6 | and optionally 7 | ``sudo apt-get install python-virtualenv`` 8 | 9 | These package might not be available for your Linux distribution. Please let us know, if you have troubles installing this software, so that we can assist you with the installation. 10 | 11 | Once finished we strongly encourage you to install traitar with pip rather than by manually cloning the repository. Install locally by 12 | 13 | ``pip install traitar-.tar.gz --user`` 14 | 15 | You need to add the following line to your ~/.bashrc to adjust the PATH variable so that the bash finds the executables needed for running traitar. 16 | 17 | ``PATH=$PATH:~/.local/bin/`` 18 | 19 | You may need to run ``source ~/.bashrc`` once in your current session. 20 | 21 | You can also install globally with 22 | 23 | ``sudo pip install traitar-.tar.gz`` 24 | ### Creating a virtual environment 25 | You may want to use virtualenv to create a clean environment for traitar i.e. run 26 | 27 | ``` 28 | virtualenv --system-site-packages 29 | source /bin/activate 30 | 31 | pip install -U traitar-.tar.gz 32 | PATH=$PATH:/bin/ 33 | source ~/.bashrc 34 | ``` 35 | 36 | ### Additional requirements 37 | traitar further needs prodigal and hmmsearch available on the command line. For parallel execution it further requires GNU parallel. 38 | All three are available as preconfigured package for many Linux installation e.g. for Debian / Ubuntu. For Ubuntu 14.04 it is available via the trusty-backports. 39 | 40 | ``` 41 | #include trusty backports source in the sources.list 42 | "sudo deb http://archive.ubuntu.com/ubuntu trusty-backports main restricted universe multiverse ">> /etc/apt/sources.list 43 | 44 | #update sources 45 | apt-get install update 46 | 47 | #install packages 48 | sudo apt-get install parallel prodigal hmmer 49 | ``` 50 | 51 | ``traitar -h`` will provide help regarding the different options of traitar. 52 | traitar requires the Pfam 27.0 HMM models. These are not distributed with this package but need to be downloaded 53 | 54 | ``traitar pfam `` 55 | 56 | will download and extract the Pfam models to the specified folder. The download can take a while depending on you internet connection. The Pfam models are extracted automatically to the same folder. This can slow down some machines. 57 | You may also download and extract the Pfam models manually from ftp://ftp.ebi.ac.uk/pub/databases/Pfam/releases/Pfam27.0/Pfam-A.hmm.gz and run 58 | 59 | ``traitar pfam --local `` 60 | 61 | to let traitar know where. 62 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat/feat_gffs/Listeria_grayi_DSM_20601_Growth_at_42°C_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|NZ_GG692933 HMMER phypat 198272 199364 6.7999999999999987e-107 + . Parent=HMPREF0556_0286;Name=PF04464;Note=Glyphos_transf;Description=CDP-Glycerol:Poly(glycerophosphate) glycerophosphotransferase 3 | accn|NZ_GG692933 HMMER phypat 318457 319543 4.4999999999999999e-48 - . Parent=HMPREF0556_0402;Name=PF04464;Note=Glyphos_transf;Description=CDP-Glycerol:Poly(glycerophosphate) glycerophosphotransferase 4 | accn|NZ_GG692933 HMMER phypat 753629 754709 6.8999999999999996e-56 + . Parent=HMPREF0556_0853;Name=PF04464;Note=Glyphos_transf;Description=CDP-Glycerol:Poly(glycerophosphate) glycerophosphotransferase 5 | accn|NZ_GG692933 HMMER phypat 755774 756845 8.5999999999999999e-58 - . Parent=HMPREF0556_0854;Name=PF04464;Note=Glyphos_transf;Description=CDP-Glycerol:Poly(glycerophosphate) glycerophosphotransferase 6 | accn|NZ_GG692932 HMMER phypat 70729 71260 1.0999999999999999e-25 - . Parent=HMPREF0556_0948;Name=PF01865;Note=PhoU_div;Description=Protein of unknown function DUF47 7 | accn|NZ_GG692932 HMMER phypat 272146 272908 6.7000000000000006e-18 + . Parent=HMPREF0556_1153;Name=PF04464;Note=Glyphos_transf;Description=CDP-Glycerol:Poly(glycerophosphate) glycerophosphotransferase 8 | accn|NZ_GG692932 HMMER phypat 313467 313932 6.7000000000000001e-11 + . Parent=HMPREF0556_1205;Name=PF13483;Note=Lactamase_B_3;Description=Beta-lactamase superfamily domain 9 | accn|NZ_GG692932 HMMER phypat 447196 448285 2.9999999999999997e-87 + . Parent=HMPREF0556_1346;Name=PF04464;Note=Glyphos_transf;Description=CDP-Glycerol:Poly(glycerophosphate) glycerophosphotransferase 10 | accn|NZ_GG692932 HMMER phypat 450218 451277 4.4999999999999992e-82 + . Parent=HMPREF0556_1348;Name=PF04464;Note=Glyphos_transf;Description=CDP-Glycerol:Poly(glycerophosphate) glycerophosphotransferase 11 | accn|NZ_GG692932 HMMER phypat 453283 454291 1.8999999999999995e-75 + . Parent=HMPREF0556_1351;Name=PF04464;Note=Glyphos_transf;Description=CDP-Glycerol:Poly(glycerophosphate) glycerophosphotransferase 12 | accn|NZ_GG692932 HMMER phypat 533588 534689 9.0999999999999979e-94 + . Parent=HMPREF0556_1434;Name=PF04464;Note=Glyphos_transf;Description=CDP-Glycerol:Poly(glycerophosphate) glycerophosphotransferase 13 | accn|NZ_GG692932 HMMER phypat 670229 670688 3.1000000000000002e-10 + . Parent=HMPREF0556_1564;Name=PF01972;Note=SDH_sah;Description=Serine dehydrogenase proteinase 14 | accn|NZ_GG692932 HMMER phypat 683270 683840 2.0000000000000002e-29 + . Parent=HMPREF0556_1578;Name=PF13483;Note=Lactamase_B_3;Description=Beta-lactamase superfamily domain 15 | accn|NZ_GG692932 HMMER phypat 820111 820576 9.300000000000001e-10 + . Parent=HMPREF0556_1707;Name=PF10662;Note=PduV-EutP;Description=Ethanolamine utilisation - propanediol utilisation 16 | accn|NZ_GG692934 HMMER phypat 187223 188375 5.6999999999999999e-91 + . Parent=HMPREF0556_2438;Name=PF02274;Note=Amidinotransf;Description=Amidinotransferase 17 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat+PGL/feat_gffs/Listeria_grayi_DSM_20601_Alkaline_phosphatase_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|NZ_GG692933 HMMER phypat+PGL 67301 67514 9.9999999999999998e-17 + . Parent=HMPREF0556_0159;Name=PF13545;Note=HTH_Crp_2;Description=Crp-like helix-turn-helix domain 3 | accn|NZ_GG692933 HMMER phypat+PGL 269290 269737 3.2e-51 + . Parent=HMPREF0556_0356;Name=PF01262;Note=AlaDh_PNT_C;Description=Alanine dehydrogenase/PNT, C-terminal domain 4 | accn|NZ_GG692933 HMMER phypat+PGL 335050 335260 6.8000000000000001e-14 + . Parent=HMPREF0556_0417;Name=PF08211;Note=dCMP_cyt_deam_2;Description=Cytidine and deoxycytidylate deaminase zinc-binding region 5 | accn|NZ_GG692933 HMMER phypat+PGL 424146 424353 4.9000000000000003e-13 - . Parent=HMPREF0556_0514;Name=PF13545;Note=HTH_Crp_2;Description=Crp-like helix-turn-helix domain 6 | accn|NZ_GG692932 HMMER phypat+PGL 190014 190161 6.2000000000000003e-10 + . Parent=HMPREF0556_1066;Name=PF13545;Note=HTH_Crp_2;Description=Crp-like helix-turn-helix domain 7 | accn|NZ_GG692932 HMMER phypat+PGL 325954 326359 7.200000000000001e-10 - . Parent=HMPREF0556_1221;Name=PF07819;Note=PGAP1;Description=PGAP1-like protein 8 | accn|NZ_GG692932 HMMER phypat+PGL 353497 353719 3.3e-15 + . Parent=HMPREF0556_1254;Name=PF13545;Note=HTH_Crp_2;Description=Crp-like helix-turn-helix domain 9 | accn|NZ_GG692932 HMMER phypat+PGL 364462 364666 8.6999999999999997e-11 + . Parent=HMPREF0556_1263;Name=PF13545;Note=HTH_Crp_2;Description=Crp-like helix-turn-helix domain 10 | accn|NZ_GG692932 HMMER phypat+PGL 499741 499912 1.4999999999999999e-13 + . Parent=HMPREF0556_1397;Name=PF13905;Note=Thioredoxin_8;Description=Thioredoxin-like 11 | accn|NZ_GG692932 HMMER phypat+PGL 581249 581399 7.5999999999999996e-10 + . Parent=HMPREF0556_1483;Name=PF13545;Note=HTH_Crp_2;Description=Crp-like helix-turn-helix domain 12 | accn|NZ_GG692932 HMMER phypat+PGL 799555 799861 1e-10 + . Parent=HMPREF0556_1687;Name=PF03848;Note=TehB;Description=Tellurite resistance protein TehB 13 | accn|NZ_GG692932 HMMER phypat+PGL 985368 985827 5.0999999999999997e-30 + . Parent=HMPREF0556_1869;Name=PF02872;Note=5_nucleotid_C;Description=5'-nucleotidase, C-terminal domain 14 | accn|NZ_GG692937 HMMER phypat+PGL 5726 5999 3.4000000000000002e-13 - . Parent=HMPREF0556_1916;Name=PF12846;Note=AAA_10;Description=AAA-like domain 15 | accn|NZ_GG692937 HMMER phypat+PGL 32369 33386 1.0999999999999999e-53 - . Parent=HMPREF0556_1951;Name=PF12846;Note=AAA_10;Description=AAA-like domain 16 | accn|NZ_GG692935 HMMER phypat+PGL 233709 234093 6.3999999999999992e-14 + . Parent=HMPREF0556_2208;Name=PF02872;Note=5_nucleotid_C;Description=5'-nucleotidase, C-terminal domain 17 | accn|NZ_GG692934 HMMER phypat+PGL 217206 217425 1.4999999999999998e-24 + . Parent=HMPREF0556_2471;Name=PF13545;Note=HTH_Crp_2;Description=Crp-like helix-turn-helix domain 18 | accn|NZ_GG692934 HMMER phypat+PGL 260597 260810 1.5e-16 + . Parent=HMPREF0556_2518;Name=PF13545;Note=HTH_Crp_2;Description=Crp-like helix-turn-helix domain 19 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat+PGL/feat_gffs/Listeria_ivanovii_WSLC3009_Growth_at_42°C_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|CP007172 HMMER phypat+PGL 101569 102724 5.5999999999999991e-95 + . Parent=AX25_00465;Name=PF02274;Note=Amidinotransf;Description=Amidinotransferase 3 | accn|CP007172 HMMER phypat+PGL 150785 151004 1.3999999999999999e-18 - . Parent=AX25_00690;Name=PF07885;Note=Ion_trans_2;Description=Ion channel 4 | accn|CP007172 HMMER phypat+PGL 363004 363649 1.7000000000000001e-09 + . Parent=AX25_01685;Name=PF13483;Note=Lactamase_B_3;Description=Beta-lactamase superfamily domain 5 | accn|CP007172 HMMER phypat+PGL 477006 477468 1.2999999999999999e-30 + . Parent=AX25_02285;Name=PF06445;Note=GyrI-like;Description=GyrI-like small molecule binding domain 6 | accn|CP007172 HMMER phypat+PGL 849834 850065 3.5000000000000005e-22 + . Parent=AX25_04140;Name=PF13246;Note=Hydrolase_like2;Description=Putative hydrolase of sodium-potassium ATPase alpha subunit 7 | accn|CP007172 HMMER phypat+PGL 882007 882256 9.5000000000000003e-24 + . Parent=AX25_04255;Name=PF13246;Note=Hydrolase_like2;Description=Putative hydrolase of sodium-potassium ATPase alpha subunit 8 | accn|CP007172 HMMER phypat+PGL 946608 946878 3.7000000000000001e-11 + . Parent=AX25_04575;Name=PF14635;Note=HHH_7;Description=Helix-hairpin-helix motif 9 | accn|CP007172 HMMER phypat+PGL 1364370 1364937 1.7000000000000002e-18 - . Parent=AX25_06790;Name=PF00176;Note=SNF2_N;Description=SNF2 family N-terminal domain 10 | accn|CP007172 HMMER phypat+PGL 1421633 1421822 2.3000000000000001e-10 + . Parent=AX25_07090;Name=PF02737;Note=3HCDH_N;Description=3-hydroxyacyl-CoA dehydrogenase, NAD binding domain 11 | accn|CP007172 HMMER phypat+PGL 1653345 1653918 1.5999999999999998e-30 - . Parent=AX25_08175;Name=PF13483;Note=Lactamase_B_3;Description=Beta-lactamase superfamily domain 12 | accn|CP007172 HMMER phypat+PGL 1699332 1699554 1.7999999999999999e-21 - . Parent=AX25_08395;Name=PF02583;Note=Trns_repr_metal;Description=Metal-sensitive transcriptional repressor 13 | accn|CP007172 HMMER phypat+PGL 1737825 1738632 2.8999999999999997e-59 - . Parent=AX25_08575;Name=PF00176;Note=SNF2_N;Description=SNF2 family N-terminal domain 14 | accn|CP007172 HMMER phypat+PGL 1839718 1840135 3.3e-10 - . Parent=AX25_09145;Name=PF00176;Note=SNF2_N;Description=SNF2 family N-terminal domain 15 | accn|CP007172 HMMER phypat+PGL 1985118 1985361 4.4999999999999999e-33 - . Parent=AX25_09825;Name=PF02583;Note=Trns_repr_metal;Description=Metal-sensitive transcriptional repressor 16 | accn|CP007172 HMMER phypat+PGL 2202181 2202400 4.4000000000000001e-22 + . Parent=AX25_10875;Name=PF07885;Note=Ion_trans_2;Description=Ion channel 17 | accn|CP007172 HMMER phypat+PGL 2423494 2423710 1.0000000000000001e-20 - . Parent=AX25_11975;Name=PF01883;Note=DUF59;Description=Domain of unknown function DUF59 18 | accn|CP007172 HMMER phypat+PGL 2793448 2793595 1.6999999999999999e-11 - . Parent=AX25_13960;Name=PF13246;Note=Hydrolase_like2;Description=Putative hydrolase of sodium-potassium ATPase alpha subunit 19 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat/feat_gffs/Listeria_ivanovii_WSLC3009_Esculin_hydrolysis_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|CP007172 HMMER phypat 150785 151004 1.3999999999999999e-18 - . Parent=AX25_00690;Name=PF07885;Note=Ion_trans_2;Description=Ion channel 3 | accn|CP007172 HMMER phypat 397608 398001 1.4999999999999998e-20 + . Parent=AX25_01895;Name=PF14526;Note=Cass2;Description=Integron-associated effector binding protein 4 | accn|CP007172 HMMER phypat 477015 477468 9.6999999999999989e-22 + . Parent=AX25_02285;Name=PF14526;Note=Cass2;Description=Integron-associated effector binding protein 5 | accn|CP007172 HMMER phypat 528103 528436 2.1000000000000001e-42 - . Parent=AX25_02560;Name=PF06993;Note=DUF1304;Description=Protein of unknown function (DUF1304) 6 | accn|CP007172 HMMER phypat 623224 623443 4.8999999999999999e-15 + . Parent=AX25_03000;Name=PF13345;Note=DUF4098;Description=Domain of unknown function (DUF4098) 7 | accn|CP007172 HMMER phypat 958182 958644 8.0999999999999991e-41 + . Parent=AX25_04660;Name=PF14526;Note=Cass2;Description=Integron-associated effector binding protein 8 | accn|CP007172 HMMER phypat 1062710 1063298 3.9999999999999999e-48 + . Parent=AX25_05220;Name=PF03932;Note=CutC;Description=CutC family 9 | accn|CP007172 HMMER phypat 1179084 1179507 1.2999999999999998e-63 + . Parent=AX25_05820;Name=PF10662;Note=PduV-EutP;Description=Ethanolamine utilisation - propanediol utilisation 10 | accn|CP007172 HMMER phypat 1530819 1531284 4.8e-10 - . Parent=AX25_07590;Name=PF10662;Note=PduV-EutP;Description=Ethanolamine utilisation - propanediol utilisation 11 | accn|CP007172 HMMER phypat 1588583 1588727 3.0999999999999999e-13 + . Parent=AX25_07890;Name=PF06347;Note=SH3_4;Description=Bacterial SH3 domain 12 | accn|CP007172 HMMER phypat 1695313 1695520 8.4000000000000013e-22 - . Parent=AX25_08375;Name=PF14310;Note=Fn3-like;Description=Fibronectin type III-like domain 13 | accn|CP007172 HMMER phypat 1850125 1850332 4.1999999999999991e-25 - . Parent=AX25_09180;Name=PF14310;Note=Fn3-like;Description=Fibronectin type III-like domain 14 | accn|CP007172 HMMER phypat 2039671 2040055 1.1999999999999999e-11 - . Parent=AX25_10115;Name=PF03435;Note=Saccharop_dh;Description=Saccharopine dehydrogenase 15 | accn|CP007172 HMMER phypat 2130427 2131081 2.4999999999999998e-94 + . Parent=AX25_10545;Name=PF03306;Note=AAL_decarboxy;Description=Alpha-acetolactate decarboxylase 16 | accn|CP007172 HMMER phypat 2202181 2202400 4.4000000000000001e-22 + . Parent=AX25_10875;Name=PF07885;Note=Ion_trans_2;Description=Ion channel 17 | accn|CP007172 HMMER phypat 2546319 2546787 1.3000000000000001e-09 - . Parent=AX25_12635;Name=PF08840;Note=BAAT_C;Description=BAAT / Acyl-CoA thioester hydrolase C terminal 18 | accn|CP007172 HMMER phypat 2585347 2585563 3.5999999999999998e-11 - . Parent=AX25_12835;Name=PF13345;Note=DUF4098;Description=Domain of unknown function (DUF4098) 19 | accn|CP007172 HMMER phypat 2586563 2586767 9.1999999999999996e-15 - . Parent=AX25_12840;Name=PF13345;Note=DUF4098;Description=Domain of unknown function (DUF4098) 20 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat/feat_gffs/Listeria_grayi_DSM_20601_Maltose_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|NZ_GG692936 HMMER phypat 45700 46390 5.599999999999999e-56 - . Parent=HMPREF0556_0045;Name=PF01915;Note=Glyco_hydro_3_C;Description=Glycosyl hydrolase family 3 C-terminal domain 3 | accn|NZ_GG692933 HMMER phypat 123297 123699 3.3000000000000002e-37 - . Parent=HMPREF0556_0208;Name=PF07702;Note=UTRA;Description=UTRA domain 4 | accn|NZ_GG692933 HMMER phypat 158756 158909 1.5e-17 + . Parent=HMPREF0556_0241;Name=PF03633;Note=Glyco_hydro_65C;Description=Glycosyl hydrolase family 65, C-terminal domain 5 | accn|NZ_GG692933 HMMER phypat 207350 207710 5.0999999999999995e-24 + . Parent=HMPREF0556_0293;Name=PF07702;Note=UTRA;Description=UTRA domain 6 | accn|NZ_GG692933 HMMER phypat 229349 229745 9.4000000000000013e-24 + . Parent=HMPREF0556_0313;Name=PF07702;Note=UTRA;Description=UTRA domain 7 | accn|NZ_GG692933 HMMER phypat 272557 272956 7.4999999999999997e-36 - . Parent=HMPREF0556_0360;Name=PF07702;Note=UTRA;Description=UTRA domain 8 | accn|NZ_GG692933 HMMER phypat 296044 296731 1.9000000000000003e-21 + . Parent=HMPREF0556_0380;Name=PF04230;Note=PS_pyruv_trans;Description=Polysaccharide pyruvyl transferase 9 | accn|NZ_GG692933 HMMER phypat 315045 315801 1.8000000000000002e-54 + . Parent=HMPREF0556_0399;Name=PF01915;Note=Glyco_hydro_3_C;Description=Glycosyl hydrolase family 3 C-terminal domain 10 | accn|NZ_GG692933 HMMER phypat 331339 331561 1.7000000000000001e-29 + . Parent=HMPREF0556_0414;Name=PF01773;Note=Nucleos_tra2_N;Description=Na+ dependent nucleoside transporter N-terminus 11 | accn|NZ_GG692933 HMMER phypat 404678 405077 5.8999999999999996e-37 + . Parent=HMPREF0556_0495;Name=PF07702;Note=UTRA;Description=UTRA domain 12 | accn|NZ_GG692933 HMMER phypat 650317 651784 1.0999999999999997e-188 + . Parent=HMPREF0556_0750;Name=PF02074;Note=Peptidase_M32;Description=Carboxypeptidase Taq (M32) metallopeptidase 13 | accn|NZ_GG692932 HMMER phypat 268043 268727 4.8000000000000001e-16 + . Parent=HMPREF0556_1151;Name=PF04230;Note=PS_pyruv_trans;Description=Polysaccharide pyruvyl transferase 14 | accn|NZ_GG692932 HMMER phypat 332010 332424 2.9999999999999996e-39 + . Parent=HMPREF0556_1229;Name=PF07702;Note=UTRA;Description=UTRA domain 15 | accn|NZ_GG692932 HMMER phypat 532041 532848 5.7999999999999996e-26 + . Parent=HMPREF0556_1433;Name=PF04230;Note=PS_pyruv_trans;Description=Polysaccharide pyruvyl transferase 16 | accn|NZ_GG692935 HMMER phypat 25475 26015 8.6999999999999999e-18 + . Parent=HMPREF0556_2018;Name=PF13439;Note=Glyco_transf_4;Description=Glycosyltransferase Family 4 17 | accn|NZ_GG692934 HMMER phypat 21148 21970 5.8e-19 - . Parent=HMPREF0556_2266;Name=PF04230;Note=PS_pyruv_trans;Description=Polysaccharide pyruvyl transferase 18 | accn|NZ_GG692934 HMMER phypat 47226 47634 1.5999999999999999e-33 + . Parent=HMPREF0556_2305;Name=PF07702;Note=UTRA;Description=UTRA domain 19 | accn|NZ_GG692934 HMMER phypat 342869 343061 6.0999999999999993e-17 + . Parent=HMPREF0556_2608;Name=PF11734;Note=TilS_C;Description=TilS substrate C-terminal domain 20 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat/feat_gffs/Listeria_grayi_DSM_20601_Trehalose_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|NZ_GG692936 HMMER phypat 54764 55613 3.7999999999999992e-119 - . Parent=HMPREF0556_0052;Name=PF04898;Note=Glu_syn_central;Description=Glutamate synthase central domain 3 | accn|NZ_GG692933 HMMER phypat 116716 117160 3.7999999999999997e-55 + . Parent=HMPREF0556_0202;Name=PF03830;Note=PTSIIB_sorb;Description=PTS system sorbose subfamily IIB component 4 | accn|NZ_GG692933 HMMER phypat 117842 118280 5.5999999999999996e-59 + . Parent=HMPREF0556_0203;Name=PF03830;Note=PTSIIB_sorb;Description=PTS system sorbose subfamily IIB component 5 | accn|NZ_GG692933 HMMER phypat 278191 278365 1.4000000000000002e-21 - . Parent=HMPREF0556_0364;Name=PF13556;Note=HTH_30;Description=PucR C-terminal helix-turn-helix domain 6 | accn|NZ_GG692933 HMMER phypat 354824 354974 2.9999999999999998e-18 + . Parent=HMPREF0556_0438;Name=PF13906;Note=AA_permease_C;Description=C-terminus of AA_permease 7 | accn|NZ_GG692933 HMMER phypat 504329 504479 5.5000000000000008e-20 - . Parent=HMPREF0556_0598;Name=PF13906;Note=AA_permease_C;Description=C-terminus of AA_permease 8 | accn|NZ_GG692932 HMMER phypat 22359 22695 6.7999999999999993e-43 - . Parent=HMPREF0556_0901;Name=PF06993;Note=DUF1304;Description=Protein of unknown function (DUF1304) 9 | accn|NZ_GG692932 HMMER phypat 171895 172045 5.2999999999999997e-19 + . Parent=HMPREF0556_1045;Name=PF13906;Note=AA_permease_C;Description=C-terminus of AA_permease 10 | accn|NZ_GG692932 HMMER phypat 183937 184093 3.1e-20 + . Parent=HMPREF0556_1058;Name=PF12464;Note=Mac;Description=Maltose acetyltransferase 11 | accn|NZ_GG692932 HMMER phypat 403025 403466 2.3999999999999999e-47 + . Parent=HMPREF0556_1303;Name=PF04069;Note=OpuAC;Description=Substrate binding domain of ABC-type glycine betaine transport system 12 | accn|NZ_GG692932 HMMER phypat 404632 405235 1.5e-50 + . Parent=HMPREF0556_1305;Name=PF03932;Note=CutC;Description=CutC family 13 | accn|NZ_GG692932 HMMER phypat 684201 684507 1.2e-34 + . Parent=HMPREF0556_1579;Name=PF07085;Note=DRTGG;Description=DRTGG domain 14 | accn|NZ_GG692932 HMMER phypat 853943 854735 7.2999999999999991e-63 + . Parent=HMPREF0556_1741;Name=PF04069;Note=OpuAC;Description=Substrate binding domain of ABC-type glycine betaine transport system 15 | accn|NZ_GG692932 HMMER phypat 857465 858260 1.1999999999999998e-63 - . Parent=HMPREF0556_1744;Name=PF04069;Note=OpuAC;Description=Substrate binding domain of ABC-type glycine betaine transport system 16 | accn|NZ_GG692932 HMMER phypat 867705 868431 4.7999999999999987e-67 + . Parent=HMPREF0556_1755;Name=PF04069;Note=OpuAC;Description=Substrate binding domain of ABC-type glycine betaine transport system 17 | accn|NZ_GG692932 HMMER phypat 881739 883500 1.6999999999999992e-212 - . Parent=HMPREF0556_1769;Name=PF02901;Note=PFL;Description=Pyruvate formate lyase 18 | accn|NZ_GG692934 HMMER phypat 78791 79682 2.8999999999999992e-114 + . Parent=HMPREF0556_2337;Name=PF02733;Note=Dak1;Description=Dak1 domain 19 | accn|NZ_GG692934 HMMER phypat 101528 101678 2.7000000000000001e-17 + . Parent=HMPREF0556_2360;Name=PF13556;Note=HTH_30;Description=PucR C-terminal helix-turn-helix domain 20 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat/feat_gffs/Listeria_grayi_DSM_20601_Salicin_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|NZ_GG692933 HMMER phypat 32668 32818 2.2999999999999999e-09 + . Parent=HMPREF0556_0131;Name=PF05043;Note=Mga;Description=Mga helix-turn-helix domain 3 | accn|NZ_GG692933 HMMER phypat 59891 61319 2.5999999999999994e-135 + . Parent=HMPREF0556_0152;Name=PF00232;Note=Glyco_hydro_1;Description=Glycosyl hydrolase family 1 4 | accn|NZ_GG692933 HMMER phypat 63696 65103 8.2999999999999975e-181 - . Parent=HMPREF0556_0156;Name=PF00232;Note=Glyco_hydro_1;Description=Glycosyl hydrolase family 1 5 | accn|NZ_GG692933 HMMER phypat 71200 72544 2.8999999999999991e-144 + . Parent=HMPREF0556_0164;Name=PF00232;Note=Glyco_hydro_1;Description=Glycosyl hydrolase family 1 6 | accn|NZ_GG692933 HMMER phypat 94387 94927 6.4999999999999992e-71 + . Parent=HMPREF0556_0186;Name=PF03390;Note=2HCT;Description=2-hydroxycarboxylate transporter family 7 | accn|NZ_GG692933 HMMER phypat 96472 96682 8.6e-11 + . Parent=HMPREF0556_0188;Name=PF05043;Note=Mga;Description=Mga helix-turn-helix domain 8 | accn|NZ_GG692933 HMMER phypat 108714 110133 1.9999999999999998e-122 + . Parent=HMPREF0556_0197;Name=PF00232;Note=Glyco_hydro_1;Description=Glycosyl hydrolase family 1 9 | accn|NZ_GG692933 HMMER phypat 110448 110673 2.2999999999999999e-12 + . Parent=HMPREF0556_0198;Name=PF05043;Note=Mga;Description=Mga helix-turn-helix domain 10 | accn|NZ_GG692933 HMMER phypat 150500 150869 1.6999999999999998e-43 + . Parent=HMPREF0556_0236;Name=PF02903;Note=Alpha-amylase_N;Description=Alpha amylase, N-terminal ig-like domain 11 | accn|NZ_GG692933 HMMER phypat 200755 200926 3.8000000000000001e-09 + . Parent=HMPREF0556_0288;Name=PF05043;Note=Mga;Description=Mga helix-turn-helix domain 12 | accn|NZ_GG692933 HMMER phypat 240165 240312 6.3000000000000001e-17 + . Parent=HMPREF0556_0325;Name=PF03123;Note=CAT_RBD;Description=CAT RNA binding domain 13 | accn|NZ_GG692933 HMMER phypat 273170 274535 9.9999999999999995e-127 + . Parent=HMPREF0556_0361;Name=PF00232;Note=Glyco_hydro_1;Description=Glycosyl hydrolase family 1 14 | accn|NZ_GG692933 HMMER phypat 402951 404328 1.3999999999999997e-137 + . Parent=HMPREF0556_0494;Name=PF00232;Note=Glyco_hydro_1;Description=Glycosyl hydrolase family 1 15 | accn|NZ_GG692932 HMMER phypat 135090 135231 4.1000000000000004e-15 + . Parent=HMPREF0556_1013;Name=PF03123;Note=CAT_RBD;Description=CAT RNA binding domain 16 | accn|NZ_GG692932 HMMER phypat 863304 864681 7.0999999999999986e-126 - . Parent=HMPREF0556_1751;Name=PF00232;Note=Glyco_hydro_1;Description=Glycosyl hydrolase family 1 17 | accn|NZ_GG692935 HMMER phypat 185401 185569 8.1999999999999997e-27 + . Parent=HMPREF0556_2172;Name=PF03123;Note=CAT_RBD;Description=CAT RNA binding domain 18 | accn|NZ_GG692934 HMMER phypat 126373 127774 4.2999999999999976e-174 + . Parent=HMPREF0556_2382;Name=PF00232;Note=Glyco_hydro_1;Description=Glycosyl hydrolase family 1 19 | accn|NZ_GG692934 HMMER phypat 177435 178845 1.5999999999999996e-136 + . Parent=HMPREF0556_2430;Name=PF00232;Note=Glyco_hydro_1;Description=Glycosyl hydrolase family 1 20 | accn|NZ_GG692934 HMMER phypat 234676 234868 3e-10 + . Parent=HMPREF0556_2491;Name=PF05043;Note=Mga;Description=Mga helix-turn-helix domain 21 | accn|NZ_GG692934 HMMER phypat 275564 275795 5.6999999999999999e-13 - . Parent=HMPREF0556_2538;Name=PF05043;Note=Mga;Description=Mga helix-turn-helix domain 22 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat/feat_gffs/Listeria_ivanovii_WSLC3009_Salicin_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|CP007172 HMMER phypat 75739 77149 1.6999999999999993e-140 + . Parent=AX25_00345;Name=PF00232;Note=Glyco_hydro_1;Description=Glycosyl hydrolase family 1 3 | accn|CP007172 HMMER phypat 159549 160992 2.3999999999999993e-120 + . Parent=AX25_00740;Name=PF00232;Note=Glyco_hydro_1;Description=Glycosyl hydrolase family 1 4 | accn|CP007172 HMMER phypat 333151 334585 5.5999999999999992e-135 + . Parent=AX25_01545;Name=PF00232;Note=Glyco_hydro_1;Description=Glycosyl hydrolase family 1 5 | accn|CP007172 HMMER phypat 340767 342171 5.4999999999999981e-174 - . Parent=AX25_01580;Name=PF00232;Note=Glyco_hydro_1;Description=Glycosyl hydrolase family 1 6 | accn|CP007172 HMMER phypat 366227 367613 9.1999999999999985e-146 + . Parent=AX25_01700;Name=PF00232;Note=Glyco_hydro_1;Description=Glycosyl hydrolase family 1 7 | accn|CP007172 HMMER phypat 367672 367834 2.6e-23 + . Parent=AX25_01705;Name=PF03123;Note=CAT_RBD;Description=CAT RNA binding domain 8 | accn|CP007172 HMMER phypat 393179 394544 1.9999999999999999e-126 + . Parent=AX25_01870;Name=PF00232;Note=Glyco_hydro_1;Description=Glycosyl hydrolase family 1 9 | accn|CP007172 HMMER phypat 588561 589941 3.1999999999999992e-136 + . Parent=AX25_02850;Name=PF00232;Note=Glyco_hydro_1;Description=Glycosyl hydrolase family 1 10 | accn|CP007172 HMMER phypat 673648 673873 2.6000000000000001e-09 + . Parent=AX25_03220;Name=PF05043;Note=Mga;Description=Mga helix-turn-helix domain 11 | accn|CP007172 HMMER phypat 963377 964796 1.0999999999999997e-121 + . Parent=AX25_04690;Name=PF00232;Note=Glyco_hydro_1;Description=Glycosyl hydrolase family 1 12 | accn|CP007172 HMMER phypat 965111 965324 1.6e-12 + . Parent=AX25_04695;Name=PF05043;Note=Mga;Description=Mga helix-turn-helix domain 13 | accn|CP007172 HMMER phypat 1697627 1697879 4.9000000000000007e-16 - . Parent=AX25_08390;Name=PF05043;Note=Mga;Description=Mga helix-turn-helix domain 14 | accn|CP007172 HMMER phypat 2246497 2246725 4.8999999999999997e-12 - . Parent=AX25_11125;Name=PF05043;Note=Mga;Description=Mga helix-turn-helix domain 15 | accn|CP007172 HMMER phypat 2284373 2284742 4.9000000000000002e-48 - . Parent=AX25_11295;Name=PF02903;Note=Alpha-amylase_N;Description=Alpha amylase, N-terminal ig-like domain 16 | accn|CP007172 HMMER phypat 2297706 2297946 1.2e-10 - . Parent=AX25_11390;Name=PF05043;Note=Mga;Description=Mga helix-turn-helix domain 17 | accn|CP007172 HMMER phypat 2460381 2460624 3.3999999999999998e-22 - . Parent=AX25_12125;Name=PF05043;Note=Mga;Description=Mga helix-turn-helix domain 18 | accn|CP007172 HMMER phypat 2526690 2526861 1.7000000000000001e-26 - . Parent=AX25_12550;Name=PF03123;Note=CAT_RBD;Description=CAT RNA binding domain 19 | accn|CP007172 HMMER phypat 2745214 2745439 5.4999999999999997e-11 - . Parent=AX25_13725;Name=PF05043;Note=Mga;Description=Mga helix-turn-helix domain 20 | accn|CP007172 HMMER phypat 2771415 2771619 1.1000000000000001e-08 - . Parent=AX25_13855;Name=PF05043;Note=Mga;Description=Mga helix-turn-helix domain 21 | accn|CP007172 HMMER phypat 2833859 2835221 4.5999999999999982e-126 - . Parent=AX25_14175;Name=PF00232;Note=Glyco_hydro_1;Description=Glycosyl hydrolase family 1 22 | accn|CP007172 HMMER phypat 2900176 2901580 1.2999999999999996e-171 + . Parent=AX25_14510;Name=PF00232;Note=Glyco_hydro_1;Description=Glycosyl hydrolase family 1 23 | -------------------------------------------------------------------------------- /traitar/PhenotypeCollection.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import tarfile 3 | import sys 4 | 5 | class PhenotypeCollection: 6 | 7 | def __init__(self, archive_f): 8 | self.archive_f = archive_f 9 | self.tar = tarfile.open(archive_f, mode = "r:gz") 10 | info = self.tar.extractfile("config.txt") 11 | info_df = pd.read_csv(info, sep = "\t", index_col = 0) 12 | self.name = info_df.loc["archive_name", "value"] 13 | self.hmm_f = info_df.loc["hmm_f", "value"] 14 | self.hmm_name = info_df.loc["hmm_name", "value"] 15 | 16 | def get_pt2acc(self): 17 | pt2acc_f = self.tar.extractfile("pt2acc.txt") 18 | pt2acc = pd.read_csv(pt2acc_f, sep = "\t", index_col = 0, encoding='utf-8') 19 | pt2acc.index = pt2acc.index.astype('U') 20 | return pt2acc 21 | 22 | def get_pt2id(self): 23 | pt2id_f = self.tar.extractfile("pt2acc.txt") 24 | pt2id = pd.read_csv(pt2id_f, sep = "\t", index_col = 1, encoding='utf-8') 25 | pt2id.index = pt2id.index.astype('U') 26 | return pt2id 27 | 28 | def get_acc2pt(self): 29 | pt2acc_f = self.tar.extractfile("pt2acc.txt") 30 | pt2acc = pd.read_csv(pt2acc_f, sep = "\t", index_col = 1, encoding='utf-8') 31 | pt2acc.index = pt2acc.index.astype('U') 32 | return pt2acc 33 | 34 | def get_pf2desc(self): 35 | pfam_mapping = pd.read_csv(self.tar.extractfile("pf2acc_desc.txt"), index_col = 0, sep = "\t") 36 | return pfam_mapping 37 | 38 | def get_name(self): 39 | pt2desc_f = self.tar.extractfile("pt2acc.txt") 40 | return self.name 41 | 42 | def get_archive_f(self): 43 | return self.archive_f 44 | 45 | def get_hmm_f(self): 46 | return self.hmm_f 47 | 48 | def get_hmm_name(self): 49 | return self.hmm_name 50 | 51 | def get_bias(self, pt): 52 | bias_f = self.tar.extractfile("%s_bias.txt"%(pt)) 53 | bias = pd.read_csv(bias_f, sep = "\t", index_col = 0, header = None) 54 | return bias 55 | 56 | def get_predictors(self, pt): 57 | extracted_f = self.tar.extractfile("%s_feats.txt"%(pt)) 58 | predictors = pd.read_csv(extracted_f, sep = "\t", index_col = 0 ) 59 | return predictors 60 | 61 | def get_selected_features(self, pt, strategy, include_negative): 62 | pt2id = self.get_pt2id() 63 | try: 64 | pt_id = pt2id.loc[pt,].iloc[0] 65 | except: 66 | pt_id = pt 67 | try: 68 | extracted_f = self.tar.extractfile("%s_non-zero+weights.txt" % pt_id) 69 | except KeyError: 70 | sys.stderr.write("target phenotype %s has no associated model in the phenotype collection\n" % pt) 71 | sys.exit(1) 72 | feats = pd.read_csv(extracted_f, sep = "\t", index_col = 0) 73 | #use the 5 best models 74 | feats = feats.loc[:, feats.columns[0:6].tolist() + feats.columns[-2:].tolist()] 75 | pos = feats.apply(lambda x: (x.iloc[1:5] > 0).sum(), axis = 1) 76 | neg = feats.apply(lambda x: (x.iloc[1:5] < 0).sum(), axis = 1) 77 | if strategy == "non-zero": 78 | if include_negative: 79 | out_feats = feats.loc[(pos > 0) | (neg > 0),] 80 | else: 81 | out_feats = feats.loc[(pos > 0),] 82 | else: 83 | if include_negative: 84 | out_feats = feats.loc[((pos > 3) | (neg > 3) ), ] 85 | else: 86 | out_feats = feats.loc[pos > 3, ] 87 | return out_feats 88 | 89 | 90 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat/feat_gffs/Listeria_ivanovii_WSLC3009_Trehalose_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|CP007172 HMMER phypat 153698 154139 3.0000000000000001e-58 + . Parent=AX25_00705;Name=PF03830;Note=PTSIIB_sorb;Description=PTS system sorbose subfamily IIB component 3 | accn|CP007172 HMMER phypat 302981 303413 4e-41 + . Parent=AX25_01400;Name=PF03830;Note=PTSIIB_sorb;Description=PTS system sorbose subfamily IIB component 4 | accn|CP007172 HMMER phypat 464070 464244 4.5000000000000001e-20 + . Parent=AX25_02210;Name=PF13556;Note=HTH_30;Description=PucR C-terminal helix-turn-helix domain 5 | accn|CP007172 HMMER phypat 477006 477468 1.2999999999999999e-30 + . Parent=AX25_02285;Name=PF06445;Note=GyrI-like;Description=GyrI-like small molecule binding domain 6 | accn|CP007172 HMMER phypat 528103 528436 2.1000000000000001e-42 - . Parent=AX25_02560;Name=PF06993;Note=DUF1304;Description=Protein of unknown function (DUF1304) 7 | accn|CP007172 HMMER phypat 691219 691369 7.4000000000000001e-20 + . Parent=AX25_03305;Name=PF13906;Note=AA_permease_C;Description=C-terminus of AA_permease 8 | accn|CP007172 HMMER phypat 705119 705272 2.3999999999999999e-21 + . Parent=AX25_03385;Name=PF12464;Note=Mac;Description=Maltose acetyltransferase 9 | accn|CP007172 HMMER phypat 805395 805842 6.5000000000000003e-62 - . Parent=AX25_03945;Name=PF03830;Note=PTSIIB_sorb;Description=PTS system sorbose subfamily IIB component 10 | accn|CP007172 HMMER phypat 860701 861511 5.999999999999999e-45 + . Parent=AX25_04185;Name=PF06224;Note=HTH_42;Description=Winged helix DNA-binding domain 11 | accn|CP007172 HMMER phypat 1061160 1061592 3.3999999999999997e-48 + . Parent=AX25_05210;Name=PF04069;Note=OpuAC;Description=Substrate binding domain of ABC-type glycine betaine transport system 12 | accn|CP007172 HMMER phypat 1062710 1063298 3.9999999999999999e-48 + . Parent=AX25_05220;Name=PF03932;Note=CutC;Description=CutC family 13 | accn|CP007172 HMMER phypat 1462276 1464034 3.5999999999999986e-211 + . Parent=AX25_07275;Name=PF02901;Note=PFL;Description=Pyruvate formate lyase 14 | accn|CP007172 HMMER phypat 1490511 1491306 5.6999999999999994e-63 + . Parent=AX25_07400;Name=PF04069;Note=OpuAC;Description=Substrate binding domain of ABC-type glycine betaine transport system 15 | accn|CP007172 HMMER phypat 1494579 1495377 1.9e-62 - . Parent=AX25_07420;Name=PF04069;Note=OpuAC;Description=Substrate binding domain of ABC-type glycine betaine transport system 16 | accn|CP007172 HMMER phypat 1652228 1652534 2.7000000000000001e-33 - . Parent=AX25_08170;Name=PF07085;Note=DRTGG;Description=DRTGG domain 17 | accn|CP007172 HMMER phypat 1856790 1857639 2.599999999999999e-112 - . Parent=AX25_09205;Name=PF04898;Note=Glu_syn_central;Description=Glutamate synthase central domain 18 | accn|CP007172 HMMER phypat 2050250 2052026 8.1999999999999975e-165 + . Parent=AX25_10165;Name=PF02901;Note=PFL;Description=Pyruvate formate lyase 19 | accn|CP007172 HMMER phypat 2139659 2140106 9.6999999999999983e-52 - . Parent=AX25_10595;Name=PF03830;Note=PTSIIB_sorb;Description=PTS system sorbose subfamily IIB component 20 | accn|CP007172 HMMER phypat 2567983 2568133 7.3e-23 - . Parent=AX25_12730;Name=PF13906;Note=AA_permease_C;Description=C-terminus of AA_permease 21 | accn|CP007172 HMMER phypat 2711589 2712543 1.2999999999999999e-45 - . Parent=AX25_13480;Name=PF06224;Note=HTH_42;Description=Winged helix DNA-binding domain 22 | accn|CP007172 HMMER phypat 2804068 2804962 2.3999999999999996e-112 + . Parent=AX25_14025;Name=PF02733;Note=Dak1;Description=Dak1 domain 23 | accn|CP007172 HMMER phypat 2869060 2869213 9.9e-20 + . Parent=AX25_14360;Name=PF13556;Note=HTH_30;Description=PucR C-terminal helix-turn-helix domain 24 | -------------------------------------------------------------------------------- /traitar/modify.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import tarfile 3 | import os 4 | import StringIO 5 | import traitar.PhenotypeCollection 6 | import sys 7 | 8 | mfs = ["%s_bias.txt", "%s_feats.txt","%s_non-zero+weights.txt"] 9 | 10 | def validate(model_dir, pts): 11 | """validate that there is a model for each phenotype""" 12 | for i in pts.index: 13 | for j in mfs: 14 | #if not os.path.exists(os.path.join(model_dir, j % str(int(i)-1))): 15 | if not os.path.exists(os.path.join(model_dir, j % i)): 16 | sys.stderr.write("%s does not exists" % os.path.join(model_dir, j % i)) 17 | raise Exception 18 | 19 | def remove(archive_f, phenotypes, out_f, keep = False): 20 | """remove given phenotypes from the pt archive and write a new archive""" 21 | pts = pd.read_csv(phenotypes, index_col = 0, header = None) 22 | pts.index = pts.index.values.astype('string') 23 | ptc = PhenotypeCollection.PhenotypeCollection(archive_f) 24 | pt2acc = ptc.get_pt2acc() 25 | pt2acc.index = pt2acc.index.values.astype('string') 26 | if not keep: 27 | pt2acc.drop(pts.index, inplace = True) 28 | else: 29 | pt2acc = pt2acc.loc[pts.index,] 30 | pt2acc_s = StringIO.StringIO(pt2acc.to_csv(sep = "\t", encoding = 'utf8')) 31 | pt2acc_tarinfo = tarfile.TarInfo("pt2acc.txt") 32 | pt2acc_tarinfo.size = len(pt2acc_s.buf) 33 | t = ptc.tar 34 | members = [] 35 | members.append((t.extractfile("pf2acc_desc.txt"), t.getmember("pf2acc_desc.txt"))) 36 | members.append((t.extractfile("config.txt"), t.getmember("config.txt"))) 37 | for i in pt2acc.index: 38 | members.append((t.extractfile("%s_non-zero+weights.txt" % i),t.getmember("%s_non-zero+weights.txt" % i))) 39 | members.append((t.extractfile("%s_bias.txt" % i), t.getmember("%s_bias.txt" % i))) 40 | members.append((t.extractfile("%s_feats.txt" % i), t.getmember("%s_feats.txt" % i))) 41 | out_tar = tarfile.open("%s" % out_f, "w:gz") 42 | out_tar.addfile(pt2acc_tarinfo, pt2acc_s) 43 | for m in members: 44 | out_tar.addfile(m[1], m[0]) 45 | out_tar.close() 46 | 47 | def extend(archive_f, model_dir, pf2acc_desc_f, pts, pfam_v): 48 | """extend an existing model archive by additional models""" 49 | #validate new phenotype models 50 | validate(model_dir, pt2acc_desc_f) 51 | ptc = PhenotypeCollection.PhenotypeCollection(archive_f) 52 | pt2acc = ptc.get_pt2acc() 53 | #check if phenotype models not already exist 54 | #add phenotype models to the archive 55 | 56 | def new(models_dir, pf2acc_f, pt2desc_f, hmm_name, hmm_model_f, archive_name): 57 | """create new archive with phenotype models""" 58 | #read in pf and pt accessions 59 | pts = pd.read_csv(pt2desc_f, sep = "\t", index_col = 0) 60 | validate(models_dir, pts) 61 | create_tar(models_dir, pf2acc_f, pt2desc_f, hmm_name, hmm_model_f, archive_name) 62 | 63 | def create_tar(models_dir, pf2acc_f, pt2desc_f, hmm_name, hmm_model_f, archive_name): 64 | #create tar archive 65 | pt2desc = pd.read_csv(pt2desc_f, sep = "\t", index_col = 0) 66 | t = tarfile.open("%s.tar.gz" % archive_name, "w:gz") 67 | t.add(pf2acc_f, arcname = "pf2acc_desc.txt") 68 | t.add(pt2desc_f, arcname = "pt2acc.txt") 69 | config = [archive_name, hmm_name, hmm_model_f] 70 | config_df = pd.DataFrame(config, index = ["archive_name", "hmm_name", "hmm_f"], columns = ["value"]) 71 | config_s = StringIO.StringIO(config_df.to_csv(sep = "\t")) 72 | config_tarinfo = tarfile.TarInfo("config.txt") 73 | config_tarinfo.size = len(config_s.buf) 74 | t.addfile(config_tarinfo, config_s) 75 | for i in pt2desc.index: 76 | for j in mfs: 77 | #t.add(os.path.join(models_dir, j % str(int(i)-1)), arcname = os.path.basename(os.path.join(models_dir, j % str(int(i)-1)))) 78 | t.add(os.path.join(models_dir, j % i), arcname = os.path.basename(os.path.join(models_dir, j % i))) 79 | t.close() 80 | 81 | 82 | 83 | 84 | 85 | -------------------------------------------------------------------------------- /traitar/merge_preds.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import pandas as ps 3 | def flatten_df(df1, df2, name1, name2, out): 4 | with open(out, 'w') as f: 5 | f.write("sample\tphenotype\tphenotype model\n") 6 | for i in set(df1.index).union(set(df2.index)): 7 | for j in set(df1.columns).union(set(df2.columns)): 8 | if i in df1.index and j in df1.columns and not df1.loc[i,j] == 0: 9 | f.write("\t".join([str(i), str(j), str(df1.loc[i,j]), name1]) + "\n") 10 | if i in df2.index and j in df2.columns and not df2.loc[i,j] == 0: 11 | f.write("\t".join([str(i), str(j), str(df2.loc[i,j]), name2]) + "\n") 12 | 13 | def comb_preds(phypat_dir, phypat_PGL_dir, primary_name, secondary_name, out_dir, k): 14 | #phypat only predictions 15 | m1_maj = ps.read_csv("%s/predictions_majority-vote.txt"%phypat_dir, index_col = 0, sep = "\t") 16 | m1 = ps.read_csv("%s/predictions_single-votes.txt"%phypat_dir, index_col = 0, sep = "\t") 17 | m2_maj = ps.read_csv("%s/predictions_majority-vote.txt"%phypat_PGL_dir, index_col = 0, sep = "\t") 18 | m2 = ps.read_csv("%s/predictions_single-votes.txt"%phypat_PGL_dir, index_col = 0, sep = "\t") 19 | #write to disk a single vote version of the predictions 20 | #phypat plus ml predictions 21 | #set NAs to zero 22 | m1[ps.isnull(m1)] = 0 23 | m2[ps.isnull(m2)] = 0 24 | #collect phenotypes that are shared / different in the two data sets 25 | #combine predictions of phypat and phypat+PGL into one matrix 26 | m_columns = set(m1.columns).union(set(m2.columns)) 27 | m = ps.DataFrame(ps.np.zeros((m1.shape[0], len(m_columns)))) 28 | m_maj = ps.DataFrame(ps.np.zeros((m1.shape[0], len(m_columns)))) 29 | m_maj.columns = m.columns = m_columns 30 | m_maj.index = m1.index 31 | m.index = m1.index 32 | #combine single voters 33 | m.loc[:, m1.columns] = m1 34 | m.loc[:, m2.columns] = m.loc[:, m2.columns] + m2 35 | #combine majority votes 36 | m_maj.loc[:, set(m1.columns).difference(set(m2.columns))] = m1_maj.loc[:, set(m1.columns).difference(set(m2.columns))] * 2 37 | m_maj.loc[:, set(m2.columns).difference(set(m1.columns))] = m2_maj.loc[:, set(m2.columns).difference(set(m1.columns))] 38 | m_temp = m_maj.loc[:, set(m2.columns).intersection(set(m1.columns))] 39 | #sys.stderr.write(str((m1_maj.loc[:, set(m2.columns).intersection(set(m1.columns))] == 1) & (m2_maj.loc[:, set(m2.columns).intersection(set(m1.columns))] == 1))) 40 | if not set(m2.columns).intersection(set(m1.columns)) == set(): 41 | m_temp[(m1_maj.loc[:, set(m2.columns).intersection(set(m1.columns))] == 1) & (m2_maj.loc[:, set(m2.columns).intersection(set(m1.columns))] == 1)] = 3 42 | m_temp[(m1_maj.loc[:, set(m2.columns).intersection(set(m1.columns))] == 1) & (m2_maj.loc[:, set(m2.columns).intersection(set(m1.columns))] == 0)] = 1 43 | m_temp[(m1_maj.loc[:, set(m2.columns).intersection(set(m1.columns))] == 0) & (m2_maj.loc[:, set(m2.columns).intersection(set(m1.columns))] == 1)] = 2 44 | m_maj.loc[:, set(m2.columns).intersection(set(m1.columns))] = m_temp 45 | m.to_csv("%s/predictions_single-votes_combined.txt"%out_dir, sep = "\t") 46 | m_maj.to_csv("%s/predictions_majority-vote_combined.txt"%out_dir, index_label = None, sep = "\t") 47 | flatten_df(m1_maj, m2_maj, primary_name, secondary_name, "%s/predictions_flat_majority-votes_combined.txt"%out_dir) 48 | flatten_df(m1, m2, primary_name, secondary_name, "%s/predictions_flat_single-votes_combined.txt"%out_dir) 49 | 50 | 51 | if __name__ == "__main__": 52 | import argparse 53 | parser = argparse.ArgumentParser("combine the misclassified samples of different phenotypes into data matrices") 54 | parser.add_argument("out_dir",help='the output directory') 55 | parser.add_argument("phypat_dir",help='directory with the phypat predictions') 56 | parser.add_argument("phypat_PGL_dir",help='directory with the phyapt+PGL predictions') 57 | parser.add_argument("primary_name",help='name of the primary phenotype model collection') 58 | parser.add_argument("secondary_name",help='name of the secondary phenotype collection') 59 | parser.add_argument("-k", "--voters", default = 5, help='number of classifiers used in the voting committee', type = int) 60 | args = parser.parse_args() 61 | comb_preds(args.phypat_dir, args.phypat_PGL_dir, args.primary_name, args.secondary_name, args.out_dir, args.voters) 62 | -------------------------------------------------------------------------------- /traitar/hmmer2filtered_best.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """parse hmmer output file and generated filtered best file""" 3 | import sys 4 | import pandas as ps 5 | from StringIO import StringIO 6 | hmmer_colnames = ['target name','target accession','tlen','query name','accession','qlen','E-value','score_overall','bias_overall','#','of','c-Evalue','i-Evalue','score_domain','bias_domain','from_hmm','to_hmm','ali_from','ali_to','env_from','env_to','acc','description of target'] 7 | 8 | def filter_dbcan(m): 9 | return ((m.iloc[:,13] >= 25) & ((m.iloc[:,12] <= 0.001) & (m.loc[:,"ali_to"] - m.loc[:, "ali_from"] <= 80) | (m.iloc[:,12] <= 0.00001) & (m.loc[:,"ali_to"] - m.loc[:, "ali_from"] > 80))) 10 | 11 | def filter_pfam(m): 12 | #TODO check thresholds 13 | return (m.iloc[:,12] <= 0.01) & (m.iloc[:, 13] >= 25) 14 | 15 | def apply_thresholds(infile_f, hmm_name, out_filt_f, out_excl_f): 16 | """parse HMMER output file and apply thresholds for e-valu and bit score)""" 17 | #preparse lines by replacing white space delimitation by tabs 18 | #skip header 19 | infile_f.readline() 20 | infile_f.readline() 21 | infile_f.readline() 22 | #replace whitespace by tabs and skip lines which start with a # char 23 | cleaned = "".join(filter(lambda x: not x.startswith("#"), ["\t".join(i.split(None, 22)) for i in infile_f.readlines()])) 24 | #read tab delimited hmmer output file with pandas via stringIO 25 | #account for cases where hmmer didn't return any hits 26 | try: 27 | m = ps.read_csv(StringIO(cleaned), sep = "\t", header = None) 28 | except ValueError: 29 | m = ps.DataFrame(columns = hmmer_colnames) 30 | m.columns = hmmer_colnames 31 | if hmm_name == "pfam": 32 | keep = filter_pfam(m) 33 | if hmm_name == "dbcan": 34 | keep = filter_dbcan(m) 35 | #apply eval threshold 36 | m_eval = m.loc[keep, :] 37 | m_eval = m.loc[keep, :] 38 | if not out_filt_f is None: 39 | m_eval.to_csv(out_filt_f, sep = "\t") 40 | m_eval_excl = m.loc[~keep, :] 41 | if not out_excl_f is None: 42 | m_eval_excl.to_csv(out_excl_f, sep = "\t") 43 | return m_eval 44 | 45 | 46 | def aggregate_domain_hits(filtered_df, out_f): 47 | #sort by gene identifier and Pfam 48 | with open(out_f, 'w') as out_fo: 49 | ps.DataFrame(filtered_df.columns).T.to_csv(out_f, sep = "\t", index = False, header = False, mode = 'a') 50 | filtered_df.sort(columns = ["target name", "query name"], inplace = True) 51 | if filtered_df.shape[0] > 0: 52 | current_max = filtered_df.iloc[0,] 53 | else: 54 | #nothing todo 55 | return 56 | for i in range(1, filtered_df.shape[0]): 57 | if current_max.loc["query name"] != filtered_df.iloc[i,].loc["query name"] or current_max.loc["target name"] != filtered_df.iloc[i,].loc["target name"]: 58 | ps.DataFrame(current_max).T.to_csv(out_f, sep = "\t", index = False, header = False, mode = 'a') 59 | current_max = filtered_df.iloc[i,] 60 | else: 61 | if current_max.iloc[13] < filtered_df.iloc[i,13]: 62 | current_max = filtered_df.iloc[i,] 63 | #write last domain hit to disk 64 | ps.DataFrame(current_max).T.to_csv(out_f, sep = "\t", index = False, header = False, mode = 'a') 65 | 66 | if __name__ == "__main__": 67 | import argparse 68 | parser = argparse.ArgumentParser("parse hmmer output file and generated filtered best file") 69 | parser.add_argument("infile_f", help='a space-delimited output file from hmmer', default = sys.stdin, type = file) 70 | parser.add_argument("out_best_f", help='hmmer tab delimited file with hits removed according to the thresholds and only best domain hit retained') 71 | parser.add_argument("hmm_name", choices = ["pfam", "dbcan"], help ='name of the HMM database; this will determine the filtering applied') 72 | parser.add_argument("--out_excl_f", help='domain hits filtered due to the applied thresholds') 73 | parser.add_argument("--out_filt_f", help='hmmer tab delimited file with hits removed according to the thresholds') 74 | #parser.add_argument("aln_cov_thresh", help = 'threshold for the alignment length coverage', type = int) 75 | args = parser.parse_args() 76 | filtered_df = apply_thresholds(args.infile_f, args.hmm_name, args.out_filt_f, args.out_excl_f) 77 | aggregate_domain_hits(filtered_df, args.out_best_f) 78 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat+PGL/feat_gffs/Listeria_ivanovii_WSLC3009_Growth_on_ordinary_blood_agar_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|CP007172 HMMER phypat+PGL 172895 173240 2.7000000000000005e-14 - . Parent=AX25_00800;Name=PF06114;Note=DUF955;Description=Domain of unknown function (DUF955) 3 | accn|CP007172 HMMER phypat+PGL 694495 694936 1.4999999999999999e-12 + . Parent=AX25_03330;Name=PF12698;Note=ABC2_membrane_3;Description=ABC-2 family transporter protein 4 | accn|CP007172 HMMER phypat+PGL 754365 754935 3.2000000000000003e-20 + . Parent=AX25_03665;Name=PF12698;Note=ABC2_membrane_3;Description=ABC-2 family transporter protein 5 | accn|CP007172 HMMER phypat+PGL 797669 798038 5.0999999999999992e-34 + . Parent=AX25_03890;Name=PF00781;Note=DAGK_cat;Description=Diacylglycerol kinase catalytic domain 6 | accn|CP007172 HMMER phypat+PGL 972525 973560 6.4999999999999997e-46 + . Parent=AX25_04725;Name=PF12698;Note=ABC2_membrane_3;Description=ABC-2 family transporter protein 7 | accn|CP007172 HMMER phypat+PGL 987151 987574 5.2999999999999994e-40 - . Parent=AX25_04805;Name=PF01451;Note=LMWPc;Description=Low molecular weight phosphotyrosine protein phosphatase 8 | accn|CP007172 HMMER phypat+PGL 1466126 1467164 2.1999999999999999e-12 + . Parent=AX25_07290;Name=PF05977;Note=MFS_3;Description=Transmembrane secretion effector 9 | accn|CP007172 HMMER phypat+PGL 1641791 1642112 2.2e-28 - . Parent=AX25_08135;Name=PF04186;Note=FxsA;Description=FxsA cytoplasmic membrane protein 10 | accn|CP007172 HMMER phypat+PGL 1815477 1816350 2.7000000000000003e-42 + . Parent=AX25_09020;Name=PF01545;Note=Cation_efflux;Description=Cation efflux family 11 | accn|CP007172 HMMER phypat+PGL 1878512 1878890 2.7000000000000002e-34 - . Parent=AX25_09305;Name=PF00781;Note=DAGK_cat;Description=Diacylglycerol kinase catalytic domain 12 | accn|CP007172 HMMER phypat+PGL 1932072 1932429 5.1999999999999999e-34 - . Parent=AX25_09575;Name=PF06470;Note=SMC_hinge;Description=SMC proteins Flexible Hinge Domain 13 | accn|CP007172 HMMER phypat+PGL 1964872 1965229 1.7e-45 - . Parent=AX25_09730;Name=PF02787;Note=CPSase_L_D3;Description=Carbamoyl-phosphate synthetase large chain, oligomerisation domain 14 | accn|CP007172 HMMER phypat+PGL 1982242 1982407 2.7999999999999996e-16 - . Parent=AX25_09810;Name=PF01471;Note=PG_binding_1;Description=Putative peptidoglycan binding domain 15 | accn|CP007172 HMMER phypat+PGL 2074202 2075129 7.3999999999999978e-81 - . Parent=AX25_10280;Name=PF00710;Note=Asparaginase;Description=Asparaginase 16 | accn|CP007172 HMMER phypat+PGL 2191398 2191740 1.3e-22 - . Parent=AX25_10820;Name=PF02620;Note=DUF177;Description=Uncharacterized ACR, COG1399 17 | accn|CP007172 HMMER phypat+PGL 2300084 2301173 5.6000000000000007e-49 + . Parent=AX25_11400;Name=PF12698;Note=ABC2_membrane_3;Description=ABC-2 family transporter protein 18 | accn|CP007172 HMMER phypat+PGL 2391831 2392377 4.5e-11 - . Parent=AX25_11810;Name=PF12698;Note=ABC2_membrane_3;Description=ABC-2 family transporter protein 19 | accn|CP007172 HMMER phypat+PGL 2396123 2396483 5.2999999999999996e-13 + . Parent=AX25_11830;Name=PF01451;Note=LMWPc;Description=Low molecular weight phosphotyrosine protein phosphatase 20 | accn|CP007172 HMMER phypat+PGL 2396612 2397425 9.6999999999999981e-63 + . Parent=AX25_11835;Name=PF01545;Note=Cation_efflux;Description=Cation efflux family 21 | accn|CP007172 HMMER phypat+PGL 2453637 2454204 9.9999999999999998e-17 - . Parent=AX25_12105;Name=PF12698;Note=ABC2_membrane_3;Description=ABC-2 family transporter protein 22 | accn|CP007172 HMMER phypat+PGL 2515872 2516703 6.2999999999999984e-70 - . Parent=AX25_12485;Name=PF01545;Note=Cation_efflux;Description=Cation efflux family 23 | accn|CP007172 HMMER phypat+PGL 2571860 2572703 9.0999999999999967e-122 - . Parent=AX25_12755;Name=PF03668;Note=ATP_bind_2;Description=P-loop ATPase protein family 24 | accn|CP007172 HMMER phypat+PGL 2641606 2641990 4.1999999999999998e-33 - . Parent=AX25_13110;Name=PF01451;Note=LMWPc;Description=Low molecular weight phosphotyrosine protein phosphatase 25 | accn|CP007172 HMMER phypat+PGL 2661552 2661900 4.5999999999999997e-31 - . Parent=AX25_13215;Name=PF00781;Note=DAGK_cat;Description=Diacylglycerol kinase catalytic domain 26 | accn|CP007172 HMMER phypat+PGL 2676899 2677706 8.299999999999999e-78 - . Parent=AX25_13305;Name=PF01545;Note=Cation_efflux;Description=Cation efflux family 27 | accn|CP007172 HMMER phypat+PGL 2874480 2875482 5.4999999999999996e-10 - . Parent=AX25_14400;Name=PF05977;Note=MFS_3;Description=Transmembrane secretion effector 28 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat+PGL/feat_gffs/Listeria_grayi_DSM_20601_Motile_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|NZ_GG692933 HMMER phypat+PGL 126629 127163 2.9999999999999998e-13 + . Parent=HMPREF0556_0210;Name=PF00702;Note=Hydrolase;Description=haloacid dehalogenase-like hydrolase 3 | accn|NZ_GG692933 HMMER phypat+PGL 160625 161156 3.8000000000000002e-15 + . Parent=HMPREF0556_0243;Name=PF00702;Note=Hydrolase;Description=haloacid dehalogenase-like hydrolase 4 | accn|NZ_GG692933 HMMER phypat+PGL 254394 254805 9.1999999999999992e-11 + . Parent=HMPREF0556_0341;Name=PF13527;Note=Acetyltransf_9;Description=Acetyltransferase (GNAT) domain 5 | accn|NZ_GG692933 HMMER phypat+PGL 608292 608826 1.5999999999999998e-82 + . Parent=HMPREF0556_0705;Name=PF01227;Note=GTP_cyclohydroI;Description=GTP cyclohydrolase I 6 | accn|NZ_GG692933 HMMER phypat+PGL 680939 681587 7.2999999999999994e-45 + . Parent=HMPREF0556_0784;Name=PF00702;Note=Hydrolase;Description=haloacid dehalogenase-like hydrolase 7 | accn|NZ_GG692932 HMMER phypat+PGL 45677 46355 3.2000000000000001e-12 + . Parent=HMPREF0556_0924;Name=PF00702;Note=Hydrolase;Description=haloacid dehalogenase-like hydrolase 8 | accn|NZ_GG692932 HMMER phypat+PGL 95603 95870 6.0999999999999996e-10 + . Parent=HMPREF0556_0972;Name=PF13527;Note=Acetyltransf_9;Description=Acetyltransferase (GNAT) domain 9 | accn|NZ_GG692932 HMMER phypat+PGL 182884 183622 3.9e-10 - . Parent=HMPREF0556_1057;Name=PF00702;Note=Hydrolase;Description=haloacid dehalogenase-like hydrolase 10 | accn|NZ_GG692932 HMMER phypat+PGL 201322 201445 2.7000000000000001e-19 + . Parent=HMPREF0556_1080;Name=PF13677;Note=MotB_plug;Description=Membrane MotB of proton-channel complex MotA/MotB 11 | accn|NZ_GG692932 HMMER phypat+PGL 203132 203396 1.3000000000000002e-10 + . Parent=HMPREF0556_1082;Name=PF09976;Note=TPR_21;Description=Tetratricopeptide repeat 12 | accn|NZ_GG692932 HMMER phypat+PGL 207501 207747 5.1000000000000006e-27 + . Parent=HMPREF0556_1086;Name=PF07194;Note=P2;Description=P2 response regulator binding domain 13 | accn|NZ_GG692932 HMMER phypat+PGL 211879 212188 5.9000000000000003e-11 + . Parent=HMPREF0556_1091;Name=PF07559;Note=FlaE;Description=Flagellar basal body protein FlaE 14 | accn|NZ_GG692932 HMMER phypat+PGL 385382 386024 6.4000000000000005e-42 + . Parent=HMPREF0556_1283;Name=PF00702;Note=Hydrolase;Description=haloacid dehalogenase-like hydrolase 15 | accn|NZ_GG692932 HMMER phypat+PGL 437759 438071 2.9999999999999999e-16 - . Parent=HMPREF0556_1336;Name=PF14501;Note=HATPase_c_5;Description=GHKL domain 16 | accn|NZ_GG692932 HMMER phypat+PGL 795241 795517 1.0000000000000001e-11 + . Parent=HMPREF0556_1680;Name=PF00702;Note=Hydrolase;Description=haloacid dehalogenase-like hydrolase 17 | accn|NZ_GG692932 HMMER phypat+PGL 801572 801953 5.7000000000000002e-20 + . Parent=HMPREF0556_1690;Name=PF13567;Note=DUF4131;Description=Domain of unknown function (DUF4131) 18 | accn|NZ_GG692937 HMMER phypat+PGL 19405 20014 9.4e-35 + . Parent=HMPREF0556_1937;Name=PF00702;Note=Hydrolase;Description=haloacid dehalogenase-like hydrolase 19 | accn|NZ_GG692935 HMMER phypat+PGL 51375 51561 2.0999999999999999e-12 + . Parent=HMPREF0556_2046;Name=PF09378;Note=HAS-barrel;Description=HAS barrel domain 20 | accn|NZ_GG692935 HMMER phypat+PGL 122148 122664 1.4999999999999999e-14 + . Parent=HMPREF0556_2114;Name=PF00702;Note=Hydrolase;Description=haloacid dehalogenase-like hydrolase 21 | accn|NZ_GG692935 HMMER phypat+PGL 203380 203677 7.5e-10 + . Parent=HMPREF0556_2185;Name=PF14501;Note=HATPase_c_5;Description=GHKL domain 22 | accn|NZ_GG692935 HMMER phypat+PGL 234543 235173 1.1e-12 + . Parent=HMPREF0556_2210;Name=PF00702;Note=Hydrolase;Description=haloacid dehalogenase-like hydrolase 23 | accn|NZ_GG692934 HMMER phypat+PGL 101777 102275 3.9000000000000001e-11 - . Parent=HMPREF0556_2361;Name=PF00702;Note=Hydrolase;Description=haloacid dehalogenase-like hydrolase 24 | accn|NZ_GG692934 HMMER phypat+PGL 187223 188375 5.6999999999999999e-91 + . Parent=HMPREF0556_2438;Name=PF02274;Note=Amidinotransf;Description=Amidinotransferase 25 | accn|NZ_GG692934 HMMER phypat+PGL 191773 192082 2.9999999999999999e-21 + . Parent=HMPREF0556_2445;Name=PF14501;Note=HATPase_c_5;Description=GHKL domain 26 | accn|NZ_GG692934 HMMER phypat+PGL 251204 251891 4.0999999999999999e-12 + . Parent=HMPREF0556_2508;Name=PF00702;Note=Hydrolase;Description=haloacid dehalogenase-like hydrolase 27 | accn|NZ_GG692934 HMMER phypat+PGL 332298 332547 2.8e-11 + . Parent=HMPREF0556_2599;Name=PF13527;Note=Acetyltransf_9;Description=Acetyltransferase (GNAT) domain 28 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat+PGL/feat_gffs/Listeria_grayi_DSM_20601_Lactose_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|NZ_GG692936 HMMER phypat+PGL 13574 14024 3.4e-16 - . Parent=HMPREF0556_0014;Name=PF04892;Note=VanZ;Description=VanZ like family 3 | accn|NZ_GG692933 HMMER phypat+PGL 94387 94927 6.4999999999999992e-71 + . Parent=HMPREF0556_0186;Name=PF03390;Note=2HCT;Description=2-hydroxycarboxylate transporter family 4 | accn|NZ_GG692933 HMMER phypat+PGL 408673 408796 1.3999999999999999e-18 - . Parent=HMPREF0556_0500;Name=PF01476;Note=LysM;Description=LysM domain 5 | accn|NZ_GG692933 HMMER phypat+PGL 436269 436719 2.9999999999999997e-20 - . Parent=HMPREF0556_0527;Name=PF04892;Note=VanZ;Description=VanZ like family 6 | accn|NZ_GG692933 HMMER phypat+PGL 445325 445514 1.3e-14 + . Parent=HMPREF0556_0530;Name=PF08240;Note=ADH_N;Description=Alcohol dehydrogenase GroES-like domain 7 | accn|NZ_GG692933 HMMER phypat+PGL 448816 448990 3.8000000000000001e-09 - . Parent=HMPREF0556_0535;Name=PF08240;Note=ADH_N;Description=Alcohol dehydrogenase GroES-like domain 8 | accn|NZ_GG692933 HMMER phypat+PGL 499288 499414 4.5999999999999996e-19 + . Parent=HMPREF0556_0590;Name=PF01476;Note=LysM;Description=LysM domain 9 | accn|NZ_GG692933 HMMER phypat+PGL 601654 601798 8.3999999999999995e-13 + . Parent=HMPREF0556_0698;Name=PF01476;Note=LysM;Description=LysM domain 10 | accn|NZ_GG692933 HMMER phypat+PGL 618857 619091 2.0999999999999999e-13 + . Parent=HMPREF0556_0716;Name=PF12895;Note=Apc3;Description=Anaphase-promoting complex, cyclosome, subunit 3 11 | accn|NZ_GG692933 HMMER phypat+PGL 660000 660189 8.1000000000000001e-23 + . Parent=HMPREF0556_0761;Name=PF02742;Note=Fe_dep_repr_C;Description=Iron dependent repressor, metal binding and dimerisation domain 12 | accn|NZ_GG692933 HMMER phypat+PGL 708774 708972 8.8000000000000004e-14 + . Parent=HMPREF0556_0810;Name=PF07730;Note=HisKA_3;Description=Histidine kinase 13 | accn|NZ_GG692932 HMMER phypat+PGL 33791 35081 6.4999999999999975e-193 - . Parent=HMPREF0556_0912;Name=PF03051;Note=Peptidase_C1_2;Description=Peptidase C1-like family 14 | accn|NZ_GG692932 HMMER phypat+PGL 257290 259207 0.0 - . Parent=HMPREF0556_1142;Name=PF06874;Note=FBPase_2;Description=Firmicute fructose-1,6-bisphosphatase 15 | accn|NZ_GG692932 HMMER phypat+PGL 379956 380100 2.6e-14 + . Parent=HMPREF0556_1277;Name=PF01476;Note=LysM;Description=LysM domain 16 | accn|NZ_GG692932 HMMER phypat+PGL 407582 407786 3.3000000000000002e-23 + . Parent=HMPREF0556_1308;Name=PF07730;Note=HisKA_3;Description=Histidine kinase 17 | accn|NZ_GG692932 HMMER phypat+PGL 452147 452462 3.2000000000000001e-12 + . Parent=HMPREF0556_1350;Name=PF08240;Note=ADH_N;Description=Alcohol dehydrogenase GroES-like domain 18 | accn|NZ_GG692932 HMMER phypat+PGL 761273 761504 1e-10 + . Parent=HMPREF0556_1647;Name=PF12895;Note=Apc3;Description=Anaphase-promoting complex, cyclosome, subunit 3 19 | accn|NZ_GG692932 HMMER phypat+PGL 801572 801953 5.7000000000000002e-20 + . Parent=HMPREF0556_1690;Name=PF13567;Note=DUF4131;Description=Domain of unknown function (DUF4131) 20 | accn|NZ_GG692937 HMMER phypat+PGL 22006 22234 5.4999999999999998e-13 - . Parent=HMPREF0556_1940;Name=PF13154;Note=DUF3991;Description=Protein of unknown function (DUF3991) 21 | accn|NZ_GG692935 HMMER phypat+PGL 6004 6178 6.8999999999999994e-11 + . Parent=HMPREF0556_1999;Name=PF08240;Note=ADH_N;Description=Alcohol dehydrogenase GroES-like domain 22 | accn|NZ_GG692935 HMMER phypat+PGL 59821 59941 3.1999999999999996e-14 + . Parent=HMPREF0556_2054;Name=PF01476;Note=LysM;Description=LysM domain 23 | accn|NZ_GG692934 HMMER phypat+PGL 655 970 9.7e-37 - . Parent=HMPREF0556_2243;Name=PF08240;Note=ADH_N;Description=Alcohol dehydrogenase GroES-like domain 24 | accn|NZ_GG692934 HMMER phypat+PGL 1679 1994 8.8e-39 - . Parent=HMPREF0556_2244;Name=PF08240;Note=ADH_N;Description=Alcohol dehydrogenase GroES-like domain 25 | accn|NZ_GG692934 HMMER phypat+PGL 50963 51224 8.6999999999999995e-14 - . Parent=HMPREF0556_2309;Name=PF04205;Note=FMN_bind;Description=FMN-binding domain 26 | accn|NZ_GG692934 HMMER phypat+PGL 75886 76009 5.9000000000000009e-20 + . Parent=HMPREF0556_2333;Name=PF01476;Note=LysM;Description=LysM domain 27 | accn|NZ_GG692934 HMMER phypat+PGL 107615 108068 3.6000000000000003e-16 - . Parent=HMPREF0556_2367;Name=PF13506;Note=Glyco_transf_21;Description=Glycosyl transferase family 21 28 | accn|NZ_GG692934 HMMER phypat+PGL 279914 280295 4.7999999999999997e-30 - . Parent=HMPREF0556_2543;Name=PF08240;Note=ADH_N;Description=Alcohol dehydrogenase GroES-like domain 29 | accn|NZ_GG692934 HMMER phypat+PGL 320528 320723 2.0000000000000001e-13 - . Parent=HMPREF0556_2586;Name=PF07730;Note=HisKA_3;Description=Histidine kinase 30 | -------------------------------------------------------------------------------- /traitar/predict.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os.path 3 | import pandas as ps 4 | import sys 5 | from traitar.PhenotypeCollection import PhenotypeCollection 6 | 7 | """predict new samples""" 8 | 9 | def filter_pred(scores, is_majority, k): 10 | """either do majority vote aggregation or conservative all or nothing vote""" 11 | if is_majority: 12 | if (scores > 0).sum() >= (k/2 + 1): 13 | return scores[scores >= 0].mean() 14 | else: 15 | return ps.np.NaN 16 | else: 17 | if (scores > 0).all(): 18 | return scores.mean() 19 | else: 20 | return ps.np.NaN 21 | 22 | def aggregate(pred_df, k, out_dir, pt2acc): 23 | """employ different prediction strategies""" 24 | out = ["majority-vote", "conservative-vote", "single-votes"] 25 | maj_pred_dfs = ps.DataFrame(ps.np.zeros(shape = (pred_df.shape[0], pred_df.shape[1] / k)), columns = ["%s" % i for i in range(pred_df.shape[1]/k)]) 26 | maj_pred_dfs.index = pred_df.index 27 | maj_pred_dfs_columns = maj_pred_dfs.columns.tolist() 28 | for i in range(pred_df.shape[1] / k): 29 | maj_pred_dfs_columns[i] = pred_df.columns.values[i * k].split("_")[0] 30 | maj_pred_dfs.iloc[:, i] = pred_df.iloc[:, (i * k) : (i * k + k)].apply(lambda x: (x > 0).sum(), axis = 1).astype('int') 31 | maj_pred_dfs.columns = pt2acc.loc[maj_pred_dfs_columns, :].iloc[:, 0] 32 | #majority vote 33 | maj_pred_dfs.to_csv("%s/predictions_%s.txt"%(out_dir, out[2]), sep = "\t", float_format='%.0f', encoding = "utf-8") 34 | (maj_pred_dfs >= k/2 + 1).astype('int').to_csv("%s/predictions_%s.txt"%(out_dir, out[0]), sep = "\t", float_format='%.0f', encoding = "utf-8") 35 | (maj_pred_dfs == k).astype('int').to_csv("%s/predictions_%s.txt"%(out_dir, out[1]), sep = "\t", float_format='%.0f', encoding = "utf-8") #conservative vote 36 | return maj_pred_dfs 37 | 38 | 39 | 40 | def majority_predict(pt, pt_models, test_data, k, bias_weight = 1): 41 | """predict the class label based on a committee vote of the models in models""" 42 | #TODO if the classification model is trained on non binary data we would require the same normalization applied to the training data 43 | #binarize 44 | test_data_n = (test_data > 0).astype('int') 45 | #check if classification model exists for the requested phenotype 46 | try: 47 | bias = pt_models.get_bias(pt) 48 | predictors = pt_models.get_predictors(pt) 49 | except KeyError: 50 | return ps.DataFrame() 51 | #build prediction matrix with the k best models 52 | preds = ps.np.zeros((test_data.shape[0], k)) 53 | for i in range(k): 54 | preds[:, i] = bias.iloc[i, 0] * bias_weight + predictors.iloc[:, i].dot(test_data_n.loc[:, predictors.iloc[:, i].index].T) 55 | pred_df = ps.DataFrame(preds, index = test_data.index) 56 | #set column names 57 | pred_df.columns = ["%s_%s" %(pt, predictors.columns[i].split("_")[0]) for i in range(k)] 58 | return pred_df 59 | 60 | def annotate_and_predict(pt_models, summary_f, out_dir, k): 61 | """load annotation previously generated with HMMER and predict phenotypes with phypat and phypat+GGL models""" 62 | pred_df = ps.DataFrame() 63 | #read pfam to description file 64 | pfam_mapping = pt_models.get_pf2desc() 65 | pt_mapping = pt_models.get_pt2acc() 66 | #read annotation file 67 | m = ps.read_csv(summary_f, sep="\t", index_col = 0) 68 | #restrict to those pfams contained in the model 69 | #m_red = m.loc[:, pfam_mapping.index ].astype('int') 70 | #m.columns = pfam_mapping.index 71 | for pt in pt_mapping.index: 72 | #predict an append to prediction df 73 | preds = majority_predict(pt, pt_models, m, k) 74 | pred_df = ps.concat([pred_df, preds], axis = 1) 75 | pred_df.index = m.index 76 | pred_df.to_csv("%s/predictions_raw.txt"%out_dir, sep = "\t", float_format='%.3f') 77 | #aggregate predictions 78 | aggr_dfs = aggregate(pred_df, k, out_dir, pt_mapping) 79 | return pred_df 80 | 81 | 82 | if __name__ == "__main__": 83 | import argparse 84 | parser = argparse.ArgumentParser("predict phenotypes from hmmer Pfam annotation") 85 | parser.add_argument("pt_models", help='archive with the phenotype predictors') 86 | parser.add_argument("out_dir", help='directory for the phenotype predictions') 87 | #parser.add_argument("phenotype_f", help='file with ids of phenotypes that should be predicted') 88 | parser.add_argument("annotation_matrix", help='summary file with pfams') 89 | parser.add_argument("-k", "--voters", default = 5, help='use this number of voters for the classification', type = int) 90 | args = parser.parse_args() 91 | pt_models = PhenotypeCollection(args.pt_models) 92 | annotate_and_predict(pt_models, args.annotation_matrix, args.out_dir, args.voters) 93 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat+PGL/feat_gffs/Listeria_ivanovii_WSLC3009_Motile_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|CP007172 HMMER phypat+PGL 30197 30737 2.8000000000000001e-15 - . Parent=AX25_00165;Name=PF00702;Note=Hydrolase;Description=haloacid dehalogenase-like hydrolase 3 | accn|CP007172 HMMER phypat+PGL 86010 86688 2.7000000000000005e-10 - . Parent=AX25_00400;Name=PF00702;Note=Hydrolase;Description=haloacid dehalogenase-like hydrolase 4 | accn|CP007172 HMMER phypat+PGL 101569 102724 5.5999999999999991e-95 + . Parent=AX25_00465;Name=PF02274;Note=Amidinotransf;Description=Amidinotransferase 5 | accn|CP007172 HMMER phypat+PGL 107046 107352 2.5000000000000002e-19 + . Parent=AX25_00500;Name=PF14501;Note=HATPase_c_5;Description=GHKL domain 6 | accn|CP007172 HMMER phypat+PGL 194208 194895 3.1e-09 + . Parent=AX25_00905;Name=PF00702;Note=Hydrolase;Description=haloacid dehalogenase-like hydrolase 7 | accn|CP007172 HMMER phypat+PGL 332610 332913 2.5000000000000001e-11 + . Parent=AX25_01540;Name=PF00702;Note=Hydrolase;Description=haloacid dehalogenase-like hydrolase 8 | accn|CP007172 HMMER phypat+PGL 678877 679267 8.3999999999999998e-12 + . Parent=AX25_03245;Name=PF00702;Note=Hydrolase;Description=haloacid dehalogenase-like hydrolase 9 | accn|CP007172 HMMER phypat+PGL 685335 685980 5.3999999999999996e-48 + . Parent=AX25_03285;Name=PF00702;Note=Hydrolase;Description=haloacid dehalogenase-like hydrolase 10 | accn|CP007172 HMMER phypat+PGL 704067 704805 5.4999999999999997e-11 - . Parent=AX25_03380;Name=PF00702;Note=Hydrolase;Description=haloacid dehalogenase-like hydrolase 11 | accn|CP007172 HMMER phypat+PGL 720428 720593 1.3e-23 + . Parent=AX25_03480;Name=PF13677;Note=MotB_plug;Description=Membrane MotB of proton-channel complex MotA/MotB 12 | accn|CP007172 HMMER phypat+PGL 726772 727012 1.7000000000000001e-26 + . Parent=AX25_03510;Name=PF07194;Note=P2;Description=P2 response regulator binding domain 13 | accn|CP007172 HMMER phypat+PGL 731008 731317 5.9999999999999997e-13 + . Parent=AX25_03535;Name=PF07559;Note=FlaE;Description=Flagellar basal body protein FlaE 14 | accn|CP007172 HMMER phypat+PGL 751016 751781 1.4000000000000001e-29 + . Parent=AX25_03645;Name=PF00702;Note=Hydrolase;Description=haloacid dehalogenase-like hydrolase 15 | accn|CP007172 HMMER phypat+PGL 849690 850575 1.2e-40 + . Parent=AX25_04140;Name=PF00702;Note=Hydrolase;Description=haloacid dehalogenase-like hydrolase 16 | accn|CP007172 HMMER phypat+PGL 881860 882778 1.8999999999999999e-36 + . Parent=AX25_04255;Name=PF00702;Note=Hydrolase;Description=haloacid dehalogenase-like hydrolase 17 | accn|CP007172 HMMER phypat+PGL 1549451 1549838 1.9000000000000001e-17 - . Parent=AX25_07695;Name=PF13567;Note=DUF4131;Description=Domain of unknown function (DUF4131) 18 | accn|CP007172 HMMER phypat+PGL 1557664 1557934 1.3000000000000002e-10 - . Parent=AX25_07745;Name=PF00702;Note=Hydrolase;Description=haloacid dehalogenase-like hydrolase 19 | accn|CP007172 HMMER phypat+PGL 1861062 1861431 6.5000000000000001e-14 - . Parent=AX25_09215;Name=PF13527;Note=Acetyltransf_9;Description=Acetyltransferase (GNAT) domain 20 | accn|CP007172 HMMER phypat+PGL 1984137 1984782 1.4999999999999997e-46 - . Parent=AX25_09820;Name=PF00702;Note=Hydrolase;Description=haloacid dehalogenase-like hydrolase 21 | accn|CP007172 HMMER phypat+PGL 2066977 2067511 4.7999999999999989e-81 - . Parent=AX25_10245;Name=PF01227;Note=GTP_cyclohydroI;Description=GTP cyclohydrolase I 22 | accn|CP007172 HMMER phypat+PGL 2418307 2418835 8.6999999999999994e-16 - . Parent=AX25_11940;Name=PF00702;Note=Hydrolase;Description=haloacid dehalogenase-like hydrolase 23 | accn|CP007172 HMMER phypat+PGL 2422573 2422909 4.0000000000000004e-21 - . Parent=AX25_11970;Name=PF13527;Note=Acetyltransf_9;Description=Acetyltransferase (GNAT) domain 24 | accn|CP007172 HMMER phypat+PGL 2498181 2498817 8.3999999999999995e-13 - . Parent=AX25_12390;Name=PF00702;Note=Hydrolase;Description=haloacid dehalogenase-like hydrolase 25 | accn|CP007172 HMMER phypat+PGL 2514769 2515075 6.4999999999999994e-12 - . Parent=AX25_12475;Name=PF14501;Note=HATPase_c_5;Description=GHKL domain 26 | accn|CP007172 HMMER phypat+PGL 2581510 2581783 1.3000000000000001e-12 - . Parent=AX25_12810;Name=PF00702;Note=Hydrolase;Description=haloacid dehalogenase-like hydrolase 27 | accn|CP007172 HMMER phypat+PGL 2633365 2633551 2.0999999999999999e-12 - . Parent=AX25_13065;Name=PF09378;Note=HAS-barrel;Description=HAS barrel domain 28 | accn|CP007172 HMMER phypat+PGL 2784683 2785370 7.4000000000000011e-38 - . Parent=AX25_13920;Name=PF00702;Note=Hydrolase;Description=haloacid dehalogenase-like hydrolase 29 | accn|CP007172 HMMER phypat+PGL 2793271 2794111 2.2999999999999997e-24 - . Parent=AX25_13960;Name=PF00702;Note=Hydrolase;Description=haloacid dehalogenase-like hydrolase 30 | accn|CP007172 HMMER phypat+PGL 2863110 2863452 1.2e-09 + . Parent=AX25_14320;Name=PF13527;Note=Acetyltransf_9;Description=Acetyltransferase (GNAT) domain 31 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat+PGL/feat_gffs/Listeria_ivanovii_WSLC3009_Lactose_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|CP007172 HMMER phypat+PGL 414009 414483 2.5000000000000003e-12 - . Parent=AX25_01990;Name=PF13506;Note=Glyco_transf_21;Description=Glycosyl transferase family 21 3 | accn|CP007172 HMMER phypat+PGL 477006 477468 1.2999999999999999e-30 + . Parent=AX25_02285;Name=PF06445;Note=GyrI-like;Description=GyrI-like small molecule binding domain 4 | accn|CP007172 HMMER phypat+PGL 502970 503288 6.0000000000000001e-32 + . Parent=AX25_02445;Name=PF08240;Note=ADH_N;Description=Alcohol dehydrogenase GroES-like domain 5 | accn|CP007172 HMMER phypat+PGL 540135 540621 1.2e-16 + . Parent=AX25_02610;Name=PF13506;Note=Glyco_transf_21;Description=Glycosyl transferase family 21 6 | accn|CP007172 HMMER phypat+PGL 599231 599354 1.8999999999999999e-20 - . Parent=AX25_02900;Name=PF01476;Note=LysM;Description=LysM domain 7 | accn|CP007172 HMMER phypat+PGL 634442 634631 2.1999999999999999e-15 + . Parent=AX25_03060;Name=PF08240;Note=ADH_N;Description=Alcohol dehydrogenase GroES-like domain 8 | accn|CP007172 HMMER phypat+PGL 796601 796775 7.7000000000000006e-11 - . Parent=AX25_03885;Name=PF08240;Note=ADH_N;Description=Alcohol dehydrogenase GroES-like domain 9 | accn|CP007172 HMMER phypat+PGL 801402 801804 3e-23 + . Parent=AX25_03920;Name=PF04892;Note=VanZ;Description=VanZ like family 10 | accn|CP007172 HMMER phypat+PGL 869633 871553 0.0 - . Parent=AX25_04210;Name=PF06874;Note=FBPase_2;Description=Firmicute fructose-1,6-bisphosphatase 11 | accn|CP007172 HMMER phypat+PGL 931266 931389 2.3999999999999999e-14 + . Parent=AX25_04485;Name=PF01476;Note=LysM;Description=LysM domain 12 | accn|CP007172 HMMER phypat+PGL 1065643 1065847 1.1999999999999998e-23 + . Parent=AX25_05235;Name=PF07730;Note=HisKA_3;Description=Histidine kinase 13 | accn|CP007172 HMMER phypat+PGL 1134650 1134962 1.1999999999999999e-12 + . Parent=AX25_05555;Name=PF08240;Note=ADH_N;Description=Alcohol dehydrogenase GroES-like domain 14 | accn|CP007172 HMMER phypat+PGL 1483599 1483935 2e-12 - . Parent=AX25_07370;Name=PF04892;Note=VanZ;Description=VanZ like family 15 | accn|CP007172 HMMER phypat+PGL 1484064 1485273 3.2999999999999998e-71 - . Parent=AX25_07375;Name=PF11700;Note=ATG22;Description=Vacuole effluxer Atg22 like 16 | accn|CP007172 HMMER phypat+PGL 1549451 1549838 1.9000000000000001e-17 - . Parent=AX25_07695;Name=PF13567;Note=DUF4131;Description=Domain of unknown function (DUF4131) 17 | accn|CP007172 HMMER phypat+PGL 1576539 1576782 1.3000000000000001e-12 - . Parent=AX25_07835;Name=PF12895;Note=Apc3;Description=Anaphase-promoting complex, cyclosome, subunit 3 18 | accn|CP007172 HMMER phypat+PGL 1705151 1706234 1.3e-106 + . Parent=AX25_08435;Name=PF00872;Note=Transposase_mut;Description=Transposase, Mutator family 19 | accn|CP007172 HMMER phypat+PGL 1754364 1754472 8.7999999999999997e-12 - . Parent=AX25_08655;Name=PF01476;Note=LysM;Description=LysM domain 20 | accn|CP007172 HMMER phypat+PGL 1814753 1815212 8.8000000000000009e-18 - . Parent=AX25_09015;Name=PF04892;Note=VanZ;Description=VanZ like family 21 | accn|CP007172 HMMER phypat+PGL 2009187 2009379 7.6000000000000002e-23 - . Parent=AX25_09950;Name=PF02742;Note=Fe_dep_repr_C;Description=Iron dependent repressor, metal binding and dimerisation domain 22 | accn|CP007172 HMMER phypat+PGL 2056773 2057004 2.3999999999999999e-14 - . Parent=AX25_10190;Name=PF12895;Note=Apc3;Description=Anaphase-promoting complex, cyclosome, subunit 3 23 | accn|CP007172 HMMER phypat+PGL 2075807 2075951 1.6999999999999998e-12 - . Parent=AX25_10285;Name=PF01476;Note=LysM;Description=LysM domain 24 | accn|CP007172 HMMER phypat+PGL 2440584 2441883 9.3999999999999964e-206 + . Parent=AX25_12045;Name=PF03051;Note=Peptidase_C1_2;Description=Peptidase C1-like family 25 | accn|CP007172 HMMER phypat+PGL 2625445 2625568 6.8000000000000001e-15 - . Parent=AX25_13020;Name=PF01476;Note=LysM;Description=LysM domain 26 | accn|CP007172 HMMER phypat+PGL 2675499 2675667 2.2000000000000002e-10 - . Parent=AX25_13295;Name=PF08240;Note=ADH_N;Description=Alcohol dehydrogenase GroES-like domain 27 | accn|CP007172 HMMER phypat+PGL 2730869 2731142 1.8000000000000001e-15 - . Parent=AX25_13650;Name=PF04205;Note=FMN_bind;Description=FMN-binding domain 28 | accn|CP007172 HMMER phypat+PGL 2766935 2767250 1.6000000000000001e-36 - . Parent=AX25_13830;Name=PF08240;Note=ADH_N;Description=Alcohol dehydrogenase GroES-like domain 29 | accn|CP007172 HMMER phypat+PGL 2767962 2768274 3.1999999999999999e-37 - . Parent=AX25_13835;Name=PF08240;Note=ADH_N;Description=Alcohol dehydrogenase GroES-like domain 30 | accn|CP007172 HMMER phypat+PGL 2796907 2797033 1.8000000000000002e-20 + . Parent=AX25_13975;Name=PF01476;Note=LysM;Description=LysM domain 31 | accn|CP007172 HMMER phypat+PGL 2897498 2897843 9.499999999999999e-45 - . Parent=AX25_14495;Name=PF01661;Note=Macro;Description=Macro domain 32 | accn|CP007172 HMMER phypat+PGL 2236056 2236137 3.7000000000000006e-14 + . Parent=AX25_11070;Name=PF02661;Note=Fic;Description=Fic/DOC family 33 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat+PGL/feat_gffs/Listeria_grayi_DSM_20601_Growth_on_ordinary_blood_agar_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|NZ_GG692936 HMMER phypat+PGL 14243 15122 7.1999999999999998e-43 + . Parent=HMPREF0556_0015;Name=PF01545;Note=Cation_efflux;Description=Cation efflux family 3 | accn|NZ_GG692936 HMMER phypat+PGL 26982 27882 9.9999999999999998e-13 + . Parent=HMPREF0556_0031;Name=PF05977;Note=MFS_3;Description=Transmembrane secretion effector 4 | accn|NZ_GG692936 HMMER phypat+PGL 70750 71125 4.7000000000000003e-36 - . Parent=HMPREF0556_0071;Name=PF00781;Note=DAGK_cat;Description=Diacylglycerol kinase catalytic domain 5 | accn|NZ_GG692933 HMMER phypat+PGL 449903 450275 4.8999999999999993e-37 + . Parent=HMPREF0556_0537;Name=PF00781;Note=DAGK_cat;Description=Diacylglycerol kinase catalytic domain 6 | accn|NZ_GG692933 HMMER phypat+PGL 519455 519794 1.8000000000000002e-20 + . Parent=HMPREF0556_0614;Name=PF02620;Note=DUF177;Description=Uncharacterized ACR, COG1399 7 | accn|NZ_GG692933 HMMER phypat+PGL 601891 602818 2.7999999999999999e-81 + . Parent=HMPREF0556_0699;Name=PF00710;Note=Asparaginase;Description=Asparaginase 8 | accn|NZ_GG692933 HMMER phypat+PGL 683478 683643 2.1000000000000001e-16 + . Parent=HMPREF0556_0786;Name=PF01471;Note=PG_binding_1;Description=Putative peptidoglycan binding domain 9 | accn|NZ_GG692933 HMMER phypat+PGL 699094 699451 1.6999999999999998e-48 + . Parent=HMPREF0556_0801;Name=PF02787;Note=CPSase_L_D3;Description=Carbamoyl-phosphate synthetase large chain, oligomerisation domain 10 | accn|NZ_GG692933 HMMER phypat+PGL 710462 711482 2.2000000000000002e-27 + . Parent=HMPREF0556_0812;Name=PF12698;Note=ABC2_membrane_3;Description=ABC-2 family transporter protein 11 | accn|NZ_GG692933 HMMER phypat+PGL 711581 712619 2e-41 + . Parent=HMPREF0556_0813;Name=PF12698;Note=ABC2_membrane_3;Description=ABC-2 family transporter protein 12 | accn|NZ_GG692933 HMMER phypat+PGL 742709 743066 9.4000000000000004e-32 + . Parent=HMPREF0556_0842;Name=PF06470;Note=SMC_hinge;Description=SMC proteins Flexible Hinge Domain 13 | accn|NZ_GG692933 HMMER phypat+PGL 768693 769212 1.8e-09 - . Parent=HMPREF0556_0867;Name=PF05977;Note=MFS_3;Description=Transmembrane secretion effector 14 | accn|NZ_GG692932 HMMER phypat+PGL 68635 68884 4.8e-10 - . Parent=HMPREF0556_0946;Name=PF01507;Note=PAPS_reduct;Description=Phosphoadenosine phosphosulfate reductase family 15 | accn|NZ_GG692932 HMMER phypat+PGL 77377 77755 9.1999999999999992e-11 - . Parent=HMPREF0556_0955;Name=PF01451;Note=LMWPc;Description=Low molecular weight phosphotyrosine protein phosphatase 16 | accn|NZ_GG692932 HMMER phypat+PGL 132786 133866 2.2999999999999997e-53 - . Parent=HMPREF0556_1011;Name=PF12698;Note=ABC2_membrane_3;Description=ABC-2 family transporter protein 17 | accn|NZ_GG692932 HMMER phypat+PGL 174927 175356 1.6e-12 + . Parent=HMPREF0556_1049;Name=PF12698;Note=ABC2_membrane_3;Description=ABC-2 family transporter protein 18 | accn|NZ_GG692932 HMMER phypat+PGL 255186 255753 7.4999999999999996e-14 + . Parent=HMPREF0556_1139;Name=PF12698;Note=ABC2_membrane_3;Description=ABC-2 family transporter protein 19 | accn|NZ_GG692932 HMMER phypat+PGL 318673 319087 7.7999999999999996e-40 - . Parent=HMPREF0556_1212;Name=PF01451;Note=LMWPc;Description=Low molecular weight phosphotyrosine protein phosphatase 20 | accn|NZ_GG692932 HMMER phypat+PGL 349894 350290 6.8000000000000003e-10 - . Parent=HMPREF0556_1251;Name=PF05977;Note=MFS_3;Description=Transmembrane secretion effector 21 | accn|NZ_GG692932 HMMER phypat+PGL 605225 606209 2.0999999999999999e-12 - . Parent=HMPREF0556_1501;Name=PF12698;Note=ABC2_membrane_3;Description=ABC-2 family transporter protein 22 | accn|NZ_GG692932 HMMER phypat+PGL 629347 630286 1.0000000000000001e-11 - . Parent=HMPREF0556_1524;Name=PF05977;Note=MFS_3;Description=Transmembrane secretion effector 23 | accn|NZ_GG692932 HMMER phypat+PGL 879088 880153 3.7999999999999998e-11 - . Parent=HMPREF0556_1766;Name=PF05977;Note=MFS_3;Description=Transmembrane secretion effector 24 | accn|NZ_GG692935 HMMER phypat+PGL 4653 5472 4.9999999999999992e-80 + . Parent=HMPREF0556_1997;Name=PF01545;Note=Cation_efflux;Description=Cation efflux family 25 | accn|NZ_GG692935 HMMER phypat+PGL 44479 44821 5.8999999999999996e-23 + . Parent=HMPREF0556_2037;Name=PF01451;Note=LMWPc;Description=Low molecular weight phosphotyrosine protein phosphatase 26 | accn|NZ_GG692935 HMMER phypat+PGL 86861 87914 9.9999999999999986e-52 + . Parent=HMPREF0556_2079;Name=PF12698;Note=ABC2_membrane_3;Description=ABC-2 family transporter protein 27 | accn|NZ_GG692935 HMMER phypat+PGL 137619 138465 3.0999999999999997e-118 + . Parent=HMPREF0556_2127;Name=PF03668;Note=ATP_bind_2;Description=P-loop ATPase protein family 28 | accn|NZ_GG692935 HMMER phypat+PGL 200937 201771 2.2999999999999992e-77 + . Parent=HMPREF0556_2183;Name=PF01545;Note=Cation_efflux;Description=Cation efflux family 29 | accn|NZ_GG692934 HMMER phypat+PGL 217815 218130 3.1999999999999999e-18 - . Parent=HMPREF0556_2473;Name=PF06114;Note=DUF955;Description=Domain of unknown function (DUF955) 30 | accn|NZ_GG692934 HMMER phypat+PGL 239650 239977 1.8999999999999998e-32 - . Parent=HMPREF0556_2496;Name=PF01451;Note=LMWPc;Description=Low molecular weight phosphotyrosine protein phosphatase 31 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/predictions_flat_majority-votes_combined.txt: -------------------------------------------------------------------------------- 1 | sample phenotype phenotype model 2 | Listeria_grayi_DSM_20601 Starch hydrolysis 1 phypat 3 | Listeria_grayi_DSM_20601 Starch hydrolysis 1 phypat+PGL 4 | Listeria_grayi_DSM_20601 Catalase 1 phypat 5 | Listeria_grayi_DSM_20601 Catalase 1 phypat+PGL 6 | Listeria_grayi_DSM_20601 Growth at 42°C 1 phypat 7 | Listeria_grayi_DSM_20601 Growth at 42°C 1 phypat+PGL 8 | Listeria_grayi_DSM_20601 Voges Proskauer 1 phypat 9 | Listeria_grayi_DSM_20601 Voges Proskauer 1 phypat+PGL 10 | Listeria_grayi_DSM_20601 D-Mannitol 1 phypat 11 | Listeria_grayi_DSM_20601 Trehalose 1 phypat 12 | Listeria_grayi_DSM_20601 Trehalose 1 phypat+PGL 13 | Listeria_grayi_DSM_20601 Motile 1 phypat 14 | Listeria_grayi_DSM_20601 Motile 1 phypat+PGL 15 | Listeria_grayi_DSM_20601 Maltose 1 phypat 16 | Listeria_grayi_DSM_20601 Maltose 1 phypat+PGL 17 | Listeria_grayi_DSM_20601 Growth on ordinary blood agar 1 phypat 18 | Listeria_grayi_DSM_20601 Growth on ordinary blood agar 1 phypat+PGL 19 | Listeria_grayi_DSM_20601 Acetate utilization 1 phypat 20 | Listeria_grayi_DSM_20601 Malonate 1 phypat 21 | Listeria_grayi_DSM_20601 Malonate 1 phypat+PGL 22 | Listeria_grayi_DSM_20601 DNase 1 phypat 23 | Listeria_grayi_DSM_20601 D-Mannose 1 phypat 24 | Listeria_grayi_DSM_20601 D-Mannose 1 phypat+PGL 25 | Listeria_grayi_DSM_20601 Gram positive 1 phypat 26 | Listeria_grayi_DSM_20601 Gram positive 1 phypat+PGL 27 | Listeria_grayi_DSM_20601 Growth in 6.5% NaCl 1 phypat 28 | Listeria_grayi_DSM_20601 Growth in 6.5% NaCl 1 phypat+PGL 29 | Listeria_grayi_DSM_20601 Bile-susceptible 1 phypat+PGL 30 | Listeria_grayi_DSM_20601 Glucose fermenter 1 phypat 31 | Listeria_grayi_DSM_20601 Glucose fermenter 1 phypat+PGL 32 | Listeria_grayi_DSM_20601 Salicin 1 phypat 33 | Listeria_grayi_DSM_20601 Salicin 1 phypat+PGL 34 | Listeria_grayi_DSM_20601 Alkaline phosphatase 1 phypat 35 | Listeria_grayi_DSM_20601 Alkaline phosphatase 1 phypat+PGL 36 | Listeria_grayi_DSM_20601 Lactose 1 phypat 37 | Listeria_grayi_DSM_20601 Lactose 1 phypat+PGL 38 | Listeria_grayi_DSM_20601 Growth in KCN 1 phypat 39 | Listeria_grayi_DSM_20601 Citrate 1 phypat 40 | Listeria_grayi_DSM_20601 Citrate 1 phypat+PGL 41 | Listeria_grayi_DSM_20601 Facultative 1 phypat 42 | Listeria_grayi_DSM_20601 Facultative 1 phypat+PGL 43 | Listeria_grayi_DSM_20601 Esculin hydrolysis 1 phypat 44 | Listeria_grayi_DSM_20601 Esculin hydrolysis 1 phypat+PGL 45 | Listeria_grayi_DSM_20601 Bacillus or coccobacillus 1 phypat+PGL 46 | Listeria_grayi_DSM_20601 Sucrose 1 phypat+PGL 47 | Listeria_grayi_DSM_20601 Coagulase production 1 phypat 48 | Listeria_grayi_DSM_20601 Coagulase production 1 phypat+PGL 49 | Listeria_grayi_DSM_20601 Arginine dihydrolase 1 phypat+PGL 50 | Listeria_ivanovii_WSLC3009 D-Xylose 1 phypat+PGL 51 | Listeria_ivanovii_WSLC3009 Starch hydrolysis 1 phypat 52 | Listeria_ivanovii_WSLC3009 Starch hydrolysis 1 phypat+PGL 53 | Listeria_ivanovii_WSLC3009 Catalase 1 phypat 54 | Listeria_ivanovii_WSLC3009 Catalase 1 phypat+PGL 55 | Listeria_ivanovii_WSLC3009 Growth at 42°C 1 phypat 56 | Listeria_ivanovii_WSLC3009 Growth at 42°C 1 phypat+PGL 57 | Listeria_ivanovii_WSLC3009 Voges Proskauer 1 phypat 58 | Listeria_ivanovii_WSLC3009 Voges Proskauer 1 phypat+PGL 59 | Listeria_ivanovii_WSLC3009 Trehalose 1 phypat 60 | Listeria_ivanovii_WSLC3009 Trehalose 1 phypat+PGL 61 | Listeria_ivanovii_WSLC3009 Motile 1 phypat 62 | Listeria_ivanovii_WSLC3009 Motile 1 phypat+PGL 63 | Listeria_ivanovii_WSLC3009 Maltose 1 phypat 64 | Listeria_ivanovii_WSLC3009 Maltose 1 phypat+PGL 65 | Listeria_ivanovii_WSLC3009 Growth on ordinary blood agar 1 phypat 66 | Listeria_ivanovii_WSLC3009 Growth on ordinary blood agar 1 phypat+PGL 67 | Listeria_ivanovii_WSLC3009 Acetate utilization 1 phypat 68 | Listeria_ivanovii_WSLC3009 Malonate 1 phypat 69 | Listeria_ivanovii_WSLC3009 Malonate 1 phypat+PGL 70 | Listeria_ivanovii_WSLC3009 Methyl red 1 phypat 71 | Listeria_ivanovii_WSLC3009 Methyl red 1 phypat+PGL 72 | Listeria_ivanovii_WSLC3009 D-Mannose 1 phypat+PGL 73 | Listeria_ivanovii_WSLC3009 Gram positive 1 phypat 74 | Listeria_ivanovii_WSLC3009 Gram positive 1 phypat+PGL 75 | Listeria_ivanovii_WSLC3009 Growth in 6.5% NaCl 1 phypat 76 | Listeria_ivanovii_WSLC3009 Growth in 6.5% NaCl 1 phypat+PGL 77 | Listeria_ivanovii_WSLC3009 Glucose fermenter 1 phypat 78 | Listeria_ivanovii_WSLC3009 Glucose fermenter 1 phypat+PGL 79 | Listeria_ivanovii_WSLC3009 Salicin 1 phypat 80 | Listeria_ivanovii_WSLC3009 Salicin 1 phypat+PGL 81 | Listeria_ivanovii_WSLC3009 Alkaline phosphatase 1 phypat 82 | Listeria_ivanovii_WSLC3009 Alkaline phosphatase 1 phypat+PGL 83 | Listeria_ivanovii_WSLC3009 Lactose 1 phypat 84 | Listeria_ivanovii_WSLC3009 Lactose 1 phypat+PGL 85 | Listeria_ivanovii_WSLC3009 Growth in KCN 1 phypat 86 | Listeria_ivanovii_WSLC3009 Citrate 1 phypat+PGL 87 | Listeria_ivanovii_WSLC3009 Facultative 1 phypat 88 | Listeria_ivanovii_WSLC3009 Facultative 1 phypat+PGL 89 | Listeria_ivanovii_WSLC3009 Esculin hydrolysis 1 phypat 90 | Listeria_ivanovii_WSLC3009 Esculin hydrolysis 1 phypat+PGL 91 | Listeria_ivanovii_WSLC3009 Bacillus or coccobacillus 1 phypat 92 | Listeria_ivanovii_WSLC3009 Bacillus or coccobacillus 1 phypat+PGL 93 | Listeria_ivanovii_WSLC3009 D-Sorbitol 1 phypat 94 | Listeria_ivanovii_WSLC3009 D-Sorbitol 1 phypat+PGL 95 | Listeria_ivanovii_WSLC3009 Coagulase production 1 phypat 96 | Listeria_ivanovii_WSLC3009 Coagulase production 1 phypat+PGL 97 | Listeria_ivanovii_WSLC3009 Arginine dihydrolase 1 phypat 98 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat+PGL/feat_gffs/Listeria_ivanovii_WSLC3009_Facultative_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|CP007172 HMMER phypat+PGL 85626 85857 2.3000000000000001e-10 + . Parent=AX25_00395;Name=PF13273;Note=DUF4064;Description=Protein of unknown function (DUF4064) 3 | accn|CP007172 HMMER phypat+PGL 184710 184857 8.6999999999999997e-11 + . Parent=AX25_00865;Name=PF12911;Note=OppC_N;Description=N-terminal TM domain of oligopeptide transport permease C 4 | accn|CP007172 HMMER phypat+PGL 243373 244771 2.1999999999999992e-258 + . Parent=AX25_01130;Name=PF01289;Note=Thiol_cytolysin;Description=Thiol-activated cytolysin 5 | accn|CP007172 HMMER phypat+PGL 351046 351361 7.5000000000000003e-27 + . Parent=AX25_01625;Name=PF01228;Note=Gly_radical;Description=Glycine radical 6 | accn|CP007172 HMMER phypat+PGL 367672 367834 2.6e-23 + . Parent=AX25_01705;Name=PF03123;Note=CAT_RBD;Description=CAT RNA binding domain 7 | accn|CP007172 HMMER phypat+PGL 382573 383137 1.8999999999999999e-10 + . Parent=AX25_01810;Name=PF03486;Note=HI0933_like;Description=HI0933-like protein 8 | accn|CP007172 HMMER phypat+PGL 423800 424097 4.2000000000000004e-14 + . Parent=AX25_02060;Name=PF13273;Note=DUF4064;Description=Protein of unknown function (DUF4064) 9 | accn|CP007172 HMMER phypat+PGL 764238 764535 3.6999999999999997e-17 + . Parent=AX25_03710;Name=PF13273;Note=DUF4064;Description=Protein of unknown function (DUF4064) 10 | accn|CP007172 HMMER phypat+PGL 833874 834153 6.1999999999999998e-13 + . Parent=AX25_04065;Name=PF02030;Note=Lipoprotein_8;Description=Hypothetical lipoprotein (MG045 family) 11 | accn|CP007172 HMMER phypat+PGL 899638 899995 9.7999999999999998e-11 + . Parent=AX25_04350;Name=PF05728;Note=UPF0227;Description=Uncharacterised protein family (UPF0227) 12 | accn|CP007172 HMMER phypat+PGL 999146 999500 1.1999999999999998e-27 + . Parent=AX25_04870;Name=PF13273;Note=DUF4064;Description=Protein of unknown function (DUF4064) 13 | accn|CP007172 HMMER phypat+PGL 1015408 1015579 8.3000000000000003e-10 - . Parent=AX25_04965;Name=PF13432;Note=TPR_16;Description=Tetratricopeptide repeat 14 | accn|CP007172 HMMER phypat+PGL 1039938 1040202 3.3000000000000005e-14 - . Parent=AX25_05100;Name=PF13273;Note=DUF4064;Description=Protein of unknown function (DUF4064) 15 | accn|CP007172 HMMER phypat+PGL 1217565 1218120 7.1999999999999999e-17 + . Parent=AX25_06050;Name=PF13500;Note=AAA_26;Description=AAA domain 16 | accn|CP007172 HMMER phypat+PGL 1231524 1232199 6.0000000000000002e-27 + . Parent=AX25_06135;Name=PF13500;Note=AAA_26;Description=AAA domain 17 | accn|CP007172 HMMER phypat+PGL 1360440 1360629 1.4999999999999998e-34 + . Parent=AX25_06765;Name=PF03672;Note=UPF0154;Description=Uncharacterised protein family (UPF0154) 18 | accn|CP007172 HMMER phypat+PGL 1464082 1464400 3.1999999999999995e-46 + . Parent=AX25_07275;Name=PF01228;Note=Gly_radical;Description=Glycine radical 19 | accn|CP007172 HMMER phypat+PGL 1482198 1482687 7.9000000000000004e-24 + . Parent=AX25_07365;Name=PF01154;Note=HMG_CoA_synt_N;Description=Hydroxymethylglutaryl-coenzyme A synthase N terminal 20 | accn|CP007172 HMMER phypat+PGL 1576926 1577100 1.0999999999999999e-09 - . Parent=AX25_07835;Name=PF13432;Note=TPR_16;Description=Tetratricopeptide repeat 21 | accn|CP007172 HMMER phypat+PGL 1652228 1652534 2.7000000000000001e-33 - . Parent=AX25_08170;Name=PF07085;Note=DRTGG;Description=DRTGG domain 22 | accn|CP007172 HMMER phypat+PGL 1804276 1804471 1.6000000000000001e-25 - . Parent=AX25_08965;Name=PF04167;Note=DUF402;Description=Protein of unknown function (DUF402) 23 | accn|CP007172 HMMER phypat+PGL 1821274 1821700 1e-42 + . Parent=AX25_09060;Name=PF01712;Note=dNK;Description=Deoxynucleoside kinase 24 | accn|CP007172 HMMER phypat+PGL 1824859 1825171 6.900000000000001e-10 + . Parent=AX25_09085;Name=PF07972;Note=Flavodoxin_NdrI;Description=NrdI Flavodoxin like 25 | accn|CP007172 HMMER phypat+PGL 1854361 1854463 7.0999999999999993e-10 - . Parent=AX25_09200;Name=PF03486;Note=HI0933_like;Description=HI0933-like protein 26 | accn|CP007172 HMMER phypat+PGL 2007170 2008835 1.3999999999999993e-268 - . Parent=AX25_09945;Name=PF01268;Note=FTHFS;Description=Formate--tetrahydrofolate ligase 27 | accn|CP007172 HMMER phypat+PGL 2044479 2044818 7.6999999999999999e-12 + . Parent=AX25_10140;Name=PF07694;Note=5TM-5TMR_LYT;Description=5TMR of 5TMR-LYT 28 | accn|CP007172 HMMER phypat+PGL 2052110 2052410 5.2e-22 + . Parent=AX25_10165;Name=PF01228;Note=Gly_radical;Description=Glycine radical 29 | accn|CP007172 HMMER phypat+PGL 2052824 2054054 1.5999999999999998e-157 - . Parent=AX25_10170;Name=PF03486;Note=HI0933_like;Description=HI0933-like protein 30 | accn|CP007172 HMMER phypat+PGL 2056848 2057016 1.4000000000000001e-09 - . Parent=AX25_10190;Name=PF13432;Note=TPR_16;Description=Tetratricopeptide repeat 31 | accn|CP007172 HMMER phypat+PGL 2087935 2089081 1.6000000000000001e-55 - . Parent=AX25_10350;Name=PF01676;Note=Metalloenzyme;Description=Metalloenzyme superfamily 32 | accn|CP007172 HMMER phypat+PGL 2356553 2356661 4.3e-11 - . Parent=AX25_11640;Name=PF08352;Note=oligo_HPY;Description=Oligopeptide/dipeptide transporter, C-terminal region 33 | accn|CP007172 HMMER phypat+PGL 2357515 2357704 2.2000000000000002e-20 - . Parent=AX25_11645;Name=PF08352;Note=oligo_HPY;Description=Oligopeptide/dipeptide transporter, C-terminal region 34 | accn|CP007172 HMMER phypat+PGL 2357988 2358165 1.2999999999999998e-19 - . Parent=AX25_11650;Name=PF12911;Note=OppC_N;Description=N-terminal TM domain of oligopeptide transport permease C 35 | accn|CP007172 HMMER phypat+PGL 2362543 2362783 1.7e-24 + . Parent=AX25_11665;Name=PF13038;Note=DUF3899;Description=Domain of unknown function (DUF3899) 36 | accn|CP007172 HMMER phypat+PGL 2526690 2526861 1.7000000000000001e-26 - . Parent=AX25_12550;Name=PF03123;Note=CAT_RBD;Description=CAT RNA binding domain 37 | accn|CP007172 HMMER phypat+PGL 2549614 2551111 1.3999999999999995e-90 - . Parent=AX25_12655;Name=PF01676;Note=Metalloenzyme;Description=Metalloenzyme superfamily 38 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat+PGL/feat_gffs/Listeria_grayi_DSM_20601_Esculin_hydrolysis_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|NZ_GG692936 HMMER phypat+PGL 46498 46705 2.0999999999999996e-25 - . Parent=HMPREF0556_0045;Name=PF14310;Note=Fn3-like;Description=Fibronectin type III-like domain 3 | accn|NZ_GG692933 HMMER phypat+PGL 150500 150869 1.6999999999999998e-43 + . Parent=HMPREF0556_0236;Name=PF02903;Note=Alpha-amylase_N;Description=Alpha amylase, N-terminal ig-like domain 4 | accn|NZ_GG692933 HMMER phypat+PGL 315906 316113 6.0999999999999993e-17 + . Parent=HMPREF0556_0399;Name=PF14310;Note=Fn3-like;Description=Fibronectin type III-like domain 5 | accn|NZ_GG692933 HMMER phypat+PGL 408406 408559 6.4999999999999995e-11 - . Parent=HMPREF0556_0500;Name=PF06347;Note=SH3_4;Description=Bacterial SH3 domain 6 | accn|NZ_GG692933 HMMER phypat+PGL 420962 421757 3.7999999999999996e-26 - . Parent=HMPREF0556_0511;Name=PF00561;Note=Abhydrolase_1;Description=alpha/beta hydrolase fold 7 | accn|NZ_GG692933 HMMER phypat+PGL 445802 446168 3.7000000000000003e-30 + . Parent=HMPREF0556_0530;Name=PF13602;Note=ADH_zinc_N_2;Description=Zinc-binding dehydrogenase 8 | accn|NZ_GG692933 HMMER phypat+PGL 542031 542352 5.4999999999999994e-26 + . Parent=HMPREF0556_0638;Name=PF01678;Note=DAP_epimerase;Description=Diaminopimelate epimerase 9 | accn|NZ_GG692933 HMMER phypat+PGL 558881 559535 1.5999999999999999e-93 - . Parent=HMPREF0556_0655;Name=PF03306;Note=AAL_decarboxy;Description=Alpha-acetolactate decarboxylase 10 | accn|NZ_GG692933 HMMER phypat+PGL 568104 568293 3e-10 - . Parent=HMPREF0556_0663;Name=PF00670;Note=AdoHcyase_NAD;Description=S-adenosyl-L-homocysteine hydrolase, NAD binding domain 11 | accn|NZ_GG692933 HMMER phypat+PGL 590600 591383 1.3999999999999998e-77 + . Parent=HMPREF0556_0687;Name=PF02784;Note=Orn_Arg_deC_N;Description=Pyridoxal-dependent decarboxylase, pyridoxal binding domain 12 | accn|NZ_GG692933 HMMER phypat+PGL 591428 591719 7.3999999999999996e-24 + . Parent=HMPREF0556_0687;Name=PF00278;Note=Orn_DAP_Arg_deC;Description=Pyridoxal-dependent decarboxylase, C-terminal sheet domain 13 | accn|NZ_GG692932 HMMER phypat+PGL 180528 180861 4.4999999999999998e-12 + . Parent=HMPREF0556_1054;Name=PF00561;Note=Abhydrolase_1;Description=alpha/beta hydrolase fold 14 | accn|NZ_GG692932 HMMER phypat+PGL 202634 202862 9.4999999999999999e-13 + . Parent=HMPREF0556_1082;Name=PF13704;Note=Glyco_tranf_2_4;Description=Glycosyl transferase family 2 15 | accn|NZ_GG692932 HMMER phypat+PGL 279346 279589 1.4e-11 + . Parent=HMPREF0556_1163;Name=PF00561;Note=Abhydrolase_1;Description=alpha/beta hydrolase fold 16 | accn|NZ_GG692932 HMMER phypat+PGL 404632 405235 1.5e-50 + . Parent=HMPREF0556_1305;Name=PF03932;Note=CutC;Description=CutC family 17 | accn|NZ_GG692932 HMMER phypat+PGL 437759 438071 2.9999999999999999e-16 - . Parent=HMPREF0556_1336;Name=PF14501;Note=HATPase_c_5;Description=GHKL domain 18 | accn|NZ_GG692932 HMMER phypat+PGL 463316 463781 2e-16 - . Parent=HMPREF0556_1362;Name=PF08713;Note=DNA_alkylation;Description=DNA alkylation repair enzyme 19 | accn|NZ_GG692932 HMMER phypat+PGL 749536 749680 2.7e-11 - . Parent=HMPREF0556_1637;Name=PF06347;Note=SH3_4;Description=Bacterial SH3 domain 20 | accn|NZ_GG692932 HMMER phypat+PGL 772637 773684 5.0999999999999991e-145 + . Parent=HMPREF0556_1656;Name=PF04371;Note=PAD_porph;Description=Porphyromonas-type peptidyl-arginine deiminase 21 | accn|NZ_GG692932 HMMER phypat+PGL 820111 820576 9.300000000000001e-10 + . Parent=HMPREF0556_1707;Name=PF10662;Note=PduV-EutP;Description=Ethanolamine utilisation - propanediol utilisation 22 | accn|NZ_GG692932 HMMER phypat+PGL 1014991 1015630 2.7000000000000002e-25 - . Parent=HMPREF0556_1902;Name=PF00561;Note=Abhydrolase_1;Description=alpha/beta hydrolase fold 23 | accn|NZ_GG692937 HMMER phypat+PGL 38207 38969 1.1e-28 - . Parent=HMPREF0556_1955;Name=PF02534;Note=T4SS-DNA_transf;Description=Type IV secretory system Conjugative DNA transfer 24 | accn|NZ_GG692935 HMMER phypat+PGL 6487 6913 9.9999999999999998e-17 + . Parent=HMPREF0556_1999;Name=PF13602;Note=ADH_zinc_N_2;Description=Zinc-binding dehydrogenase 25 | accn|NZ_GG692935 HMMER phypat+PGL 15434 15608 5.1000000000000006e-27 - . Parent=HMPREF0556_2009;Name=PF01361;Note=Tautomerase;Description=Tautomerase enzyme 26 | accn|NZ_GG692935 HMMER phypat+PGL 107066 107267 2.1999999999999999e-15 + . Parent=HMPREF0556_2098;Name=PF13345;Note=DUF4098;Description=Domain of unknown function (DUF4098) 27 | accn|NZ_GG692935 HMMER phypat+PGL 108433 108655 1.8000000000000001e-15 + . Parent=HMPREF0556_2099;Name=PF13345;Note=DUF4098;Description=Domain of unknown function (DUF4098) 28 | accn|NZ_GG692935 HMMER phypat+PGL 109469 110312 3.1999999999999996e-108 - . Parent=HMPREF0556_2102;Name=PF02065;Note=Melibiase;Description=Melibiase 29 | accn|NZ_GG692935 HMMER phypat+PGL 111425 111662 1.1e-32 - . Parent=HMPREF0556_2103;Name=PF02065;Note=Melibiase;Description=Melibiase 30 | accn|NZ_GG692935 HMMER phypat+PGL 161762 162677 1.1999999999999997e-97 + . Parent=HMPREF0556_2150;Name=PF01244;Note=Peptidase_M19;Description=Membrane dipeptidase (Peptidase family M19) 31 | accn|NZ_GG692935 HMMER phypat+PGL 203380 203677 7.5e-10 + . Parent=HMPREF0556_2185;Name=PF14501;Note=HATPase_c_5;Description=GHKL domain 32 | accn|NZ_GG692934 HMMER phypat+PGL 1198 1573 8.2000000000000001e-11 - . Parent=HMPREF0556_2243;Name=PF13602;Note=ADH_zinc_N_2;Description=Zinc-binding dehydrogenase 33 | accn|NZ_GG692934 HMMER phypat+PGL 2213 2603 1.2e-10 - . Parent=HMPREF0556_2244;Name=PF13602;Note=ADH_zinc_N_2;Description=Zinc-binding dehydrogenase 34 | accn|NZ_GG692934 HMMER phypat+PGL 61460 62072 8.0999999999999985e-40 - . Parent=HMPREF0556_2318;Name=PF00722;Note=Glyco_hydro_16;Description=Glycosyl hydrolases family 16 35 | accn|NZ_GG692934 HMMER phypat+PGL 191773 192082 2.9999999999999999e-21 + . Parent=HMPREF0556_2445;Name=PF14501;Note=HATPase_c_5;Description=GHKL domain 36 | accn|NZ_GG692934 HMMER phypat+PGL 209912 210644 5.4999999999999997e-11 - . Parent=HMPREF0556_2463;Name=PF00561;Note=Abhydrolase_1;Description=alpha/beta hydrolase fold 37 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat+PGL/feat_gffs/Listeria_grayi_DSM_20601_Facultative_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|NZ_GG692936 HMMER phypat+PGL 4608 4803 1.2e-25 - . Parent=HMPREF0556_0005;Name=PF04167;Note=DUF402;Description=Protein of unknown function (DUF402) 3 | accn|NZ_GG692936 HMMER phypat+PGL 21307 21733 6.9999999999999984e-43 + . Parent=HMPREF0556_0024;Name=PF01712;Note=dNK;Description=Deoxynucleoside kinase 4 | accn|NZ_GG692936 HMMER phypat+PGL 24905 25217 1.5e-09 + . Parent=HMPREF0556_0029;Name=PF07972;Note=Flavodoxin_NdrI;Description=NrdI Flavodoxin like 5 | accn|NZ_GG692936 HMMER phypat+PGL 39008 39500 4.3999999999999998e-29 + . Parent=HMPREF0556_0042;Name=PF01154;Note=HMG_CoA_synt_N;Description=Hydroxymethylglutaryl-coenzyme A synthase N terminal 6 | accn|NZ_GG692936 HMMER phypat+PGL 50429 50531 3.4000000000000003e-09 - . Parent=HMPREF0556_0050;Name=PF03486;Note=HI0933_like;Description=HI0933-like protein 7 | accn|NZ_GG692933 HMMER phypat+PGL 77786 78101 9.6999999999999991e-29 + . Parent=HMPREF0556_0168;Name=PF01228;Note=Gly_radical;Description=Glycine radical 8 | accn|NZ_GG692933 HMMER phypat+PGL 189902 190457 1.6000000000000001e-09 + . Parent=HMPREF0556_0279;Name=PF03486;Note=HI0933_like;Description=HI0933-like protein 9 | accn|NZ_GG692933 HMMER phypat+PGL 217604 217778 2.2999999999999999e-16 + . Parent=HMPREF0556_0302;Name=PF08352;Note=oligo_HPY;Description=Oligopeptide/dipeptide transporter, C-terminal region 10 | accn|NZ_GG692933 HMMER phypat+PGL 218609 218717 2.7999999999999996e-10 + . Parent=HMPREF0556_0303;Name=PF08352;Note=oligo_HPY;Description=Oligopeptide/dipeptide transporter, C-terminal region 11 | accn|NZ_GG692933 HMMER phypat+PGL 225212 225509 3.6000000000000003e-20 - . Parent=HMPREF0556_0309;Name=PF13273;Note=DUF4064;Description=Protein of unknown function (DUF4064) 12 | accn|NZ_GG692933 HMMER phypat+PGL 240165 240312 6.3000000000000001e-17 + . Parent=HMPREF0556_0325;Name=PF03123;Note=CAT_RBD;Description=CAT RNA binding domain 13 | accn|NZ_GG692933 HMMER phypat+PGL 332536 333682 2.7000000000000002e-58 + . Parent=HMPREF0556_0415;Name=PF01676;Note=Metalloenzyme;Description=Metalloenzyme superfamily 14 | accn|NZ_GG692933 HMMER phypat+PGL 470172 470409 2.6e-13 + . Parent=HMPREF0556_0559;Name=PF02030;Note=Lipoprotein_8;Description=Hypothetical lipoprotein (MG045 family) 15 | accn|NZ_GG692933 HMMER phypat+PGL 557069 557357 7.7000000000000006e-17 + . Parent=HMPREF0556_0653;Name=PF13273;Note=DUF4064;Description=Protein of unknown function (DUF4064) 16 | accn|NZ_GG692933 HMMER phypat+PGL 618836 619010 2.0000000000000001e-10 + . Parent=HMPREF0556_0716;Name=PF13432;Note=TPR_16;Description=Tetratricopeptide repeat 17 | accn|NZ_GG692933 HMMER phypat+PGL 621754 622981 2.8999999999999989e-155 + . Parent=HMPREF0556_0720;Name=PF03486;Note=HI0933_like;Description=HI0933-like protein 18 | accn|NZ_GG692933 HMMER phypat+PGL 660336 662001 1.899999999999999e-268 + . Parent=HMPREF0556_0762;Name=PF01268;Note=FTHFS;Description=Formate--tetrahydrofolate ligase 19 | accn|NZ_GG692932 HMMER phypat+PGL 106532 106775 1.4e-22 - . Parent=HMPREF0556_0983;Name=PF13038;Note=DUF3899;Description=Domain of unknown function (DUF3899) 20 | accn|NZ_GG692932 HMMER phypat+PGL 110179 110341 1.7999999999999999e-21 + . Parent=HMPREF0556_0986;Name=PF12911;Note=OppC_N;Description=N-terminal TM domain of oligopeptide transport permease C 21 | accn|NZ_GG692932 HMMER phypat+PGL 111832 112021 2.6000000000000002e-21 + . Parent=HMPREF0556_0987;Name=PF08352;Note=oligo_HPY;Description=Oligopeptide/dipeptide transporter, C-terminal region 22 | accn|NZ_GG692932 HMMER phypat+PGL 112901 113048 1.5e-11 + . Parent=HMPREF0556_0988;Name=PF08352;Note=oligo_HPY;Description=Oligopeptide/dipeptide transporter, C-terminal region 23 | accn|NZ_GG692932 HMMER phypat+PGL 127296 127611 7.5999999999999999e-13 + . Parent=HMPREF0556_1001;Name=PF07972;Note=Flavodoxin_NdrI;Description=NrdI Flavodoxin like 24 | accn|NZ_GG692932 HMMER phypat+PGL 135090 135231 4.1000000000000004e-15 + . Parent=HMPREF0556_1013;Name=PF03123;Note=CAT_RBD;Description=CAT RNA binding domain 25 | accn|NZ_GG692932 HMMER phypat+PGL 160178 161405 1.6e-11 + . Parent=HMPREF0556_1035;Name=PF03486;Note=HI0933_like;Description=HI0933-like protein 26 | accn|NZ_GG692932 HMMER phypat+PGL 327645 327996 3.6000000000000001e-24 + . Parent=HMPREF0556_1223;Name=PF13273;Note=DUF4064;Description=Protein of unknown function (DUF4064) 27 | accn|NZ_GG692932 HMMER phypat+PGL 420472 420568 3e-09 + . Parent=HMPREF0556_1321;Name=PF03486;Note=HI0933_like;Description=HI0933-like protein 28 | accn|NZ_GG692932 HMMER phypat+PGL 530310 530598 2.8999999999999998e-13 + . Parent=HMPREF0556_1430;Name=PF13273;Note=DUF4064;Description=Protein of unknown function (DUF4064) 29 | accn|NZ_GG692932 HMMER phypat+PGL 579616 579802 1.5e-32 + . Parent=HMPREF0556_1481;Name=PF03672;Note=UPF0154;Description=Uncharacterised protein family (UPF0154) 30 | accn|NZ_GG692932 HMMER phypat+PGL 585708 585780 9.4999999999999995e-12 + . Parent=HMPREF0556_1487;Name=PF03672;Note=UPF0154;Description=Uncharacterised protein family (UPF0154) 31 | accn|NZ_GG692932 HMMER phypat+PGL 654032 654143 1.2e-09 + . Parent=HMPREF0556_1549;Name=PF03486;Note=HI0933_like;Description=HI0933-like protein 32 | accn|NZ_GG692932 HMMER phypat+PGL 684201 684507 1.2e-34 + . Parent=HMPREF0556_1579;Name=PF07085;Note=DRTGG;Description=DRTGG domain 33 | accn|NZ_GG692932 HMMER phypat+PGL 761549 761741 8.0999999999999992e-11 + . Parent=HMPREF0556_1647;Name=PF13432;Note=TPR_16;Description=Tetratricopeptide repeat 34 | accn|NZ_GG692932 HMMER phypat+PGL 883548 883866 3.2000000000000001e-45 - . Parent=HMPREF0556_1769;Name=PF01228;Note=Gly_radical;Description=Glycine radical 35 | accn|NZ_GG692935 HMMER phypat+PGL 168775 170272 1.1999999999999996e-91 + . Parent=HMPREF0556_2156;Name=PF01676;Note=Metalloenzyme;Description=Metalloenzyme superfamily 36 | accn|NZ_GG692935 HMMER phypat+PGL 185401 185569 8.1999999999999997e-27 + . Parent=HMPREF0556_2172;Name=PF03123;Note=CAT_RBD;Description=CAT RNA binding domain 37 | accn|NZ_GG692934 HMMER phypat+PGL 18919 19678 2.3e-42 - . Parent=HMPREF0556_2263;Name=PF10592;Note=AIPR;Description=AIPR protein 38 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/phypat+PGL/feat_gffs/Listeria_ivanovii_WSLC3009_Methyl_red_important_features.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | accn|CP007172 HMMER phypat+PGL 133910 134078 9.9999999999999997e-29 + . Parent=AX25_00625;Name=PF12354;Note=Internalin_N;Description=Bacterial adhesion/invasion protein N terminal 3 | accn|CP007172 HMMER phypat+PGL 135512 135677 3.0000000000000001e-27 + . Parent=AX25_00630;Name=PF12354;Note=Internalin_N;Description=Bacterial adhesion/invasion protein N terminal 4 | accn|CP007172 HMMER phypat+PGL 336138 336357 1.8e-09 + . Parent=AX25_01555;Name=PF12793;Note=SgrR_N;Description=Sugar transport-related sRNA regulator N-term 5 | accn|CP007172 HMMER phypat+PGL 437580 437754 5.6000000000000008e-21 + . Parent=AX25_02140;Name=PF12354;Note=Internalin_N;Description=Bacterial adhesion/invasion protein N terminal 6 | accn|CP007172 HMMER phypat+PGL 440158 440323 4.4000000000000001e-21 + . Parent=AX25_02145;Name=PF12354;Note=Internalin_N;Description=Bacterial adhesion/invasion protein N terminal 7 | accn|CP007172 HMMER phypat+PGL 445599 445761 5.0999999999999999e-26 + . Parent=AX25_02155;Name=PF12354;Note=Internalin_N;Description=Bacterial adhesion/invasion protein N terminal 8 | accn|CP007172 HMMER phypat+PGL 449252 449417 3.9999999999999999e-19 + . Parent=AX25_02160;Name=PF12354;Note=Internalin_N;Description=Bacterial adhesion/invasion protein N terminal 9 | accn|CP007172 HMMER phypat+PGL 541145 542735 7.9999999999999984e-64 + . Parent=AX25_02615;Name=PF03170;Note=BcsB;Description=Bacterial cellulose synthase subunit 10 | accn|CP007172 HMMER phypat+PGL 641115 641358 1.3e-31 + . Parent=AX25_03110;Name=PF11313;Note=DUF3116;Description=Protein of unknown function (DUF3116) 11 | accn|CP007172 HMMER phypat+PGL 709819 710242 6.5999999999999995e-71 - . Parent=AX25_03420;Name=PF12181;Note=MogR_DNAbind;Description=DNA binding domain of the motility gene repressor (MogR) 12 | accn|CP007172 HMMER phypat+PGL 731008 731317 5.9999999999999997e-13 + . Parent=AX25_03535;Name=PF07559;Note=FlaE;Description=Flagellar basal body protein FlaE 13 | accn|CP007172 HMMER phypat+PGL 1277477 1277645 4.0000000000000002e-25 + . Parent=AX25_06360;Name=PF12354;Note=Internalin_N;Description=Bacterial adhesion/invasion protein N terminal 14 | accn|CP007172 HMMER phypat+PGL 1279213 1279381 1.5000000000000001e-26 + . Parent=AX25_06365;Name=PF12354;Note=Internalin_N;Description=Bacterial adhesion/invasion protein N terminal 15 | accn|CP007172 HMMER phypat+PGL 1280575 1280743 2.9999999999999998e-25 + . Parent=AX25_06370;Name=PF12354;Note=Internalin_N;Description=Bacterial adhesion/invasion protein N terminal 16 | accn|CP007172 HMMER phypat+PGL 1284329 1284494 1.2e-26 + . Parent=AX25_06375;Name=PF12354;Note=Internalin_N;Description=Bacterial adhesion/invasion protein N terminal 17 | accn|CP007172 HMMER phypat+PGL 1285974 1286142 4.5999999999999997e-31 + . Parent=AX25_06380;Name=PF12354;Note=Internalin_N;Description=Bacterial adhesion/invasion protein N terminal 18 | accn|CP007172 HMMER phypat+PGL 1287004 1287172 4.0999999999999994e-24 + . Parent=AX25_06385;Name=PF12354;Note=Internalin_N;Description=Bacterial adhesion/invasion protein N terminal 19 | accn|CP007172 HMMER phypat+PGL 1288510 1288675 1.5000000000000001e-27 + . Parent=AX25_06390;Name=PF12354;Note=Internalin_N;Description=Bacterial adhesion/invasion protein N terminal 20 | accn|CP007172 HMMER phypat+PGL 1292155 1292323 9.1999999999999996e-29 + . Parent=AX25_06400;Name=PF12354;Note=Internalin_N;Description=Bacterial adhesion/invasion protein N terminal 21 | accn|CP007172 HMMER phypat+PGL 1293497 1293665 1.0000000000000001e-30 + . Parent=AX25_06405;Name=PF12354;Note=Internalin_N;Description=Bacterial adhesion/invasion protein N terminal 22 | accn|CP007172 HMMER phypat+PGL 1299819 1300071 1.1e-38 + . Parent=AX25_06445;Name=PF11313;Note=DUF3116;Description=Protein of unknown function (DUF3116) 23 | accn|CP007172 HMMER phypat+PGL 1405241 1405610 7.4000000000000006e-30 + . Parent=AX25_07005;Name=PF01321;Note=Creatinase_N;Description=Creatinase/Prolidase N-terminal domain 24 | accn|CP007172 HMMER phypat+PGL 1654206 1654602 2.6e-25 + . Parent=AX25_08180;Name=PF01321;Note=Creatinase_N;Description=Creatinase/Prolidase N-terminal domain 25 | accn|CP007172 HMMER phypat+PGL 1829818 1829986 1.4999999999999998e-24 - . Parent=AX25_09110;Name=PF12354;Note=Internalin_N;Description=Bacterial adhesion/invasion protein N terminal 26 | accn|CP007172 HMMER phypat+PGL 1831759 1831927 1.6e-27 - . Parent=AX25_09115;Name=PF12354;Note=Internalin_N;Description=Bacterial adhesion/invasion protein N terminal 27 | accn|CP007172 HMMER phypat+PGL 2187832 2188057 2.0999999999999999e-12 + . Parent=AX25_10800;Name=PF12793;Note=SgrR_N;Description=Sugar transport-related sRNA regulator N-term 28 | accn|CP007172 HMMER phypat+PGL 2212826 2212988 1.3e-17 - . Parent=AX25_10945;Name=PF12354;Note=Internalin_N;Description=Bacterial adhesion/invasion protein N terminal 29 | accn|CP007172 HMMER phypat+PGL 2350588 2350837 3.2000000000000002e-36 - . Parent=AX25_11615;Name=PF11313;Note=DUF3116;Description=Protein of unknown function (DUF3116) 30 | accn|CP007172 HMMER phypat+PGL 2442434 2443307 1.7999999999999993e-139 - . Parent=AX25_12055;Name=PF04227;Note=Indigoidine_A;Description=Indigoidine synthase A like protein 31 | accn|CP007172 HMMER phypat+PGL 2536285 2536414 2.3999999999999999e-12 - . Parent=AX25_12600;Name=PF12354;Note=Internalin_N;Description=Bacterial adhesion/invasion protein N terminal 32 | accn|CP007172 HMMER phypat+PGL 2562339 2562507 1.1000000000000001e-18 - . Parent=AX25_12710;Name=PF12354;Note=Internalin_N;Description=Bacterial adhesion/invasion protein N terminal 33 | accn|CP007172 HMMER phypat+PGL 2689901 2690141 3.4000000000000004e-17 - . Parent=AX25_13365;Name=PF11313;Note=DUF3116;Description=Protein of unknown function (DUF3116) 34 | accn|CP007172 HMMER phypat+PGL 2690568 2690736 1.3999999999999999e-30 - . Parent=AX25_13370;Name=PF12354;Note=Internalin_N;Description=Bacterial adhesion/invasion protein N terminal 35 | accn|CP007172 HMMER phypat+PGL 2691889 2692057 2.5000000000000001e-28 - . Parent=AX25_13375;Name=PF12354;Note=Internalin_N;Description=Bacterial adhesion/invasion protein N terminal 36 | accn|CP007172 HMMER phypat+PGL 2741684 2742632 1.6999999999999995e-55 - . Parent=AX25_13705;Name=PF02126;Note=PTE;Description=Phosphotriesterase family 37 | -------------------------------------------------------------------------------- /traitar/data/sample_data/traitar_out/phenotype_prediction/predictions_flat_single-votes_combined.txt: -------------------------------------------------------------------------------- 1 | sample phenotype phenotype model 2 | Listeria_grayi_DSM_20601 Starch hydrolysis 5 phypat 3 | Listeria_grayi_DSM_20601 Starch hydrolysis 5 phypat+PGL 4 | Listeria_grayi_DSM_20601 Catalase 5 phypat 5 | Listeria_grayi_DSM_20601 Catalase 5 phypat+PGL 6 | Listeria_grayi_DSM_20601 Growth at 42°C 5 phypat 7 | Listeria_grayi_DSM_20601 Growth at 42°C 3 phypat+PGL 8 | Listeria_grayi_DSM_20601 Voges Proskauer 5 phypat 9 | Listeria_grayi_DSM_20601 Voges Proskauer 5 phypat+PGL 10 | Listeria_grayi_DSM_20601 Coccus 1 phypat 11 | Listeria_grayi_DSM_20601 D-Mannitol 4 phypat 12 | Listeria_grayi_DSM_20601 D-Mannitol 1 phypat+PGL 13 | Listeria_grayi_DSM_20601 Trehalose 5 phypat 14 | Listeria_grayi_DSM_20601 Trehalose 5 phypat+PGL 15 | Listeria_grayi_DSM_20601 Motile 5 phypat 16 | Listeria_grayi_DSM_20601 Motile 5 phypat+PGL 17 | Listeria_grayi_DSM_20601 Maltose 5 phypat 18 | Listeria_grayi_DSM_20601 Maltose 5 phypat+PGL 19 | Listeria_grayi_DSM_20601 Growth on ordinary blood agar 5 phypat 20 | Listeria_grayi_DSM_20601 Growth on ordinary blood agar 5 phypat+PGL 21 | Listeria_grayi_DSM_20601 Gas from glucose 2 phypat+PGL 22 | Listeria_grayi_DSM_20601 Acetate utilization 5 phypat 23 | Listeria_grayi_DSM_20601 Malonate 5 phypat 24 | Listeria_grayi_DSM_20601 Malonate 5 phypat+PGL 25 | Listeria_grayi_DSM_20601 Yellow pigment 1 phypat+PGL 26 | Listeria_grayi_DSM_20601 DNase 3 phypat 27 | Listeria_grayi_DSM_20601 myo-Inositol 1 phypat+PGL 28 | Listeria_grayi_DSM_20601 D-Mannose 3 phypat 29 | Listeria_grayi_DSM_20601 D-Mannose 4 phypat+PGL 30 | Listeria_grayi_DSM_20601 Gram positive 5 phypat 31 | Listeria_grayi_DSM_20601 Gram positive 5 phypat+PGL 32 | Listeria_grayi_DSM_20601 Growth in 6.5% NaCl 5 phypat 33 | Listeria_grayi_DSM_20601 Growth in 6.5% NaCl 5 phypat+PGL 34 | Listeria_grayi_DSM_20601 Bile-susceptible 4 phypat+PGL 35 | Listeria_grayi_DSM_20601 Glucose fermenter 5 phypat 36 | Listeria_grayi_DSM_20601 Glucose fermenter 5 phypat+PGL 37 | Listeria_grayi_DSM_20601 Salicin 5 phypat 38 | Listeria_grayi_DSM_20601 Salicin 5 phypat+PGL 39 | Listeria_grayi_DSM_20601 Mucate utilization 1 phypat+PGL 40 | Listeria_grayi_DSM_20601 Alkaline phosphatase 4 phypat 41 | Listeria_grayi_DSM_20601 Alkaline phosphatase 5 phypat+PGL 42 | Listeria_grayi_DSM_20601 Lactose 5 phypat 43 | Listeria_grayi_DSM_20601 Lactose 5 phypat+PGL 44 | Listeria_grayi_DSM_20601 Growth in KCN 5 phypat 45 | Listeria_grayi_DSM_20601 Growth in KCN 2 phypat+PGL 46 | Listeria_grayi_DSM_20601 Citrate 4 phypat 47 | Listeria_grayi_DSM_20601 Citrate 5 phypat+PGL 48 | Listeria_grayi_DSM_20601 Facultative 5 phypat 49 | Listeria_grayi_DSM_20601 Facultative 5 phypat+PGL 50 | Listeria_grayi_DSM_20601 Esculin hydrolysis 5 phypat 51 | Listeria_grayi_DSM_20601 Esculin hydrolysis 5 phypat+PGL 52 | Listeria_grayi_DSM_20601 Bacillus or coccobacillus 2 phypat 53 | Listeria_grayi_DSM_20601 Bacillus or coccobacillus 5 phypat+PGL 54 | Listeria_grayi_DSM_20601 Sucrose 3 phypat+PGL 55 | Listeria_grayi_DSM_20601 Coagulase production 5 phypat 56 | Listeria_grayi_DSM_20601 Coagulase production 5 phypat+PGL 57 | Listeria_grayi_DSM_20601 Arginine dihydrolase 2 phypat 58 | Listeria_grayi_DSM_20601 Arginine dihydrolase 4 phypat+PGL 59 | Listeria_ivanovii_WSLC3009 D-Xylose 4 phypat+PGL 60 | Listeria_ivanovii_WSLC3009 Starch hydrolysis 5 phypat 61 | Listeria_ivanovii_WSLC3009 Starch hydrolysis 5 phypat+PGL 62 | Listeria_ivanovii_WSLC3009 Catalase 5 phypat 63 | Listeria_ivanovii_WSLC3009 Catalase 5 phypat+PGL 64 | Listeria_ivanovii_WSLC3009 Growth at 42°C 5 phypat 65 | Listeria_ivanovii_WSLC3009 Growth at 42°C 5 phypat+PGL 66 | Listeria_ivanovii_WSLC3009 Voges Proskauer 5 phypat 67 | Listeria_ivanovii_WSLC3009 Voges Proskauer 5 phypat+PGL 68 | Listeria_ivanovii_WSLC3009 Trehalose 5 phypat 69 | Listeria_ivanovii_WSLC3009 Trehalose 5 phypat+PGL 70 | Listeria_ivanovii_WSLC3009 Motile 5 phypat 71 | Listeria_ivanovii_WSLC3009 Motile 5 phypat+PGL 72 | Listeria_ivanovii_WSLC3009 Coccus - pairs or chains predominate 1 phypat+PGL 73 | Listeria_ivanovii_WSLC3009 Maltose 5 phypat 74 | Listeria_ivanovii_WSLC3009 Maltose 5 phypat+PGL 75 | Listeria_ivanovii_WSLC3009 Growth on ordinary blood agar 5 phypat 76 | Listeria_ivanovii_WSLC3009 Growth on ordinary blood agar 5 phypat+PGL 77 | Listeria_ivanovii_WSLC3009 Gas from glucose 2 phypat+PGL 78 | Listeria_ivanovii_WSLC3009 Acetate utilization 5 phypat 79 | Listeria_ivanovii_WSLC3009 Malonate 5 phypat 80 | Listeria_ivanovii_WSLC3009 Malonate 5 phypat+PGL 81 | Listeria_ivanovii_WSLC3009 DNase 2 phypat 82 | Listeria_ivanovii_WSLC3009 Methyl red 5 phypat 83 | Listeria_ivanovii_WSLC3009 Methyl red 5 phypat+PGL 84 | Listeria_ivanovii_WSLC3009 D-Mannose 3 phypat+PGL 85 | Listeria_ivanovii_WSLC3009 Gram positive 5 phypat 86 | Listeria_ivanovii_WSLC3009 Gram positive 5 phypat+PGL 87 | Listeria_ivanovii_WSLC3009 Growth in 6.5% NaCl 5 phypat 88 | Listeria_ivanovii_WSLC3009 Growth in 6.5% NaCl 5 phypat+PGL 89 | Listeria_ivanovii_WSLC3009 Bile-susceptible 1 phypat+PGL 90 | Listeria_ivanovii_WSLC3009 Glucose fermenter 5 phypat 91 | Listeria_ivanovii_WSLC3009 Glucose fermenter 5 phypat+PGL 92 | Listeria_ivanovii_WSLC3009 Salicin 5 phypat 93 | Listeria_ivanovii_WSLC3009 Salicin 5 phypat+PGL 94 | Listeria_ivanovii_WSLC3009 Alkaline phosphatase 3 phypat 95 | Listeria_ivanovii_WSLC3009 Alkaline phosphatase 5 phypat+PGL 96 | Listeria_ivanovii_WSLC3009 Lactose 5 phypat 97 | Listeria_ivanovii_WSLC3009 Lactose 5 phypat+PGL 98 | Listeria_ivanovii_WSLC3009 Growth in KCN 5 phypat 99 | Listeria_ivanovii_WSLC3009 Growth in KCN 2 phypat+PGL 100 | Listeria_ivanovii_WSLC3009 Tartrate utilization 2 phypat+PGL 101 | Listeria_ivanovii_WSLC3009 Citrate 4 phypat+PGL 102 | Listeria_ivanovii_WSLC3009 Facultative 5 phypat 103 | Listeria_ivanovii_WSLC3009 Facultative 5 phypat+PGL 104 | Listeria_ivanovii_WSLC3009 Esculin hydrolysis 5 phypat 105 | Listeria_ivanovii_WSLC3009 Esculin hydrolysis 5 phypat+PGL 106 | Listeria_ivanovii_WSLC3009 Bacillus or coccobacillus 4 phypat 107 | Listeria_ivanovii_WSLC3009 Bacillus or coccobacillus 5 phypat+PGL 108 | Listeria_ivanovii_WSLC3009 D-Sorbitol 4 phypat 109 | Listeria_ivanovii_WSLC3009 D-Sorbitol 3 phypat+PGL 110 | Listeria_ivanovii_WSLC3009 Coagulase production 5 phypat 111 | Listeria_ivanovii_WSLC3009 Coagulase production 5 phypat+PGL 112 | Listeria_ivanovii_WSLC3009 Arginine dihydrolase 3 phypat 113 | Listeria_ivanovii_WSLC3009 Arginine dihydrolase 2 phypat+PGL 114 | --------------------------------------------------------------------------------