├── scripts ├── __init__.py ├── sha256sum ├── intersect_items.py ├── join_tables.py ├── flu_regions.py ├── export_titers_for_auspice_v1.py ├── table_to_node_data.py ├── xls2csv.py ├── prune_reference.py ├── sanitize_trees.py ├── import_tip_clades.py ├── glyc.py └── sequence_export.py ├── config ├── h3n2 │ ├── ha │ │ ├── exclude-sites.txt │ │ ├── prioritized_seqs_file.tsv │ │ ├── genemap.gff │ │ ├── emerging_haplotypes.tsv │ │ └── reference.fasta │ ├── na │ │ ├── genemap.gff │ │ ├── prioritized_seqs_file.tsv │ │ └── reference.fasta │ ├── mp │ │ └── prioritized_seqs_file.tsv │ ├── np │ │ └── prioritized_seqs_file.tsv │ ├── ns │ │ └── prioritized_seqs_file.tsv │ ├── pa │ │ └── prioritized_seqs_file.tsv │ ├── pb1 │ │ └── prioritized_seqs_file.tsv │ └── pb2 │ │ └── prioritized_seqs_file.tsv ├── vic │ ├── ha │ │ ├── exclude-sites.txt │ │ ├── prioritized_seqs_file.tsv │ │ ├── emerging_haplotypes.tsv │ │ └── genemap.gff │ ├── mp │ │ └── prioritized_seqs_file.tsv │ ├── na │ │ ├── prioritized_seqs_file.tsv │ │ ├── genemap.gff │ │ └── reference.fasta │ ├── np │ │ └── prioritized_seqs_file.tsv │ ├── ns │ │ └── prioritized_seqs_file.tsv │ ├── pa │ │ └── prioritized_seqs_file.tsv │ ├── pb1 │ │ └── prioritized_seqs_file.tsv │ ├── pb2 │ │ └── prioritized_seqs_file.tsv │ ├── vaccine.json │ └── outliers.txt ├── yam │ ├── ha │ │ ├── exclude-sites.txt │ │ ├── clades.tsv │ │ ├── subclades.tsv │ │ ├── genemap.gff │ │ └── reference.fasta │ ├── na │ │ ├── genemap.gff │ │ ├── subclades.tsv │ │ └── reference.fasta │ ├── outliers.txt │ ├── vaccine.json │ └── reference_strains.txt ├── h1n1pdm │ ├── ha │ │ ├── exclude-sites.txt │ │ ├── prioritized_seqs_file.tsv │ │ ├── genemap.gff │ │ └── emerging_haplotypes.tsv │ ├── mp │ │ └── prioritized_seqs_file.tsv │ ├── na │ │ ├── prioritized_seqs_file.tsv │ │ ├── genemap.gff │ │ └── reference.fasta │ ├── np │ │ └── prioritized_seqs_file.tsv │ ├── ns │ │ └── prioritized_seqs_file.tsv │ ├── pa │ │ └── prioritized_seqs_file.tsv │ ├── pb1 │ │ └── prioritized_seqs_file.tsv │ └── pb2 │ │ └── prioritized_seqs_file.tsv ├── clades_for_titer_plots_h1n1pdm.txt ├── clades_for_titer_plots_vic.txt ├── clades_for_titer_plots_h3n2.txt ├── subclades_for_titer_plots_vic.txt ├── references_to_exclude_in_titer_plots_h3n2.txt ├── colors.tsv ├── subclades_for_titer_plots_h1n1pdm.txt ├── references_to_exclude_in_titer_plots_h1n1pdm.txt ├── hi_titer_count_colors.tsv ├── subclades_for_titer_plots_h3n2.txt ├── references_for_titer_plots │ ├── h3n2 │ │ ├── egg_fra.txt │ │ ├── cell_fra.txt │ │ ├── egg_hi.txt │ │ └── cell_hi.txt │ ├── vic │ │ ├── egg_hi.txt │ │ └── cell_hi.txt │ └── h1n1pdm │ │ ├── egg_hi.txt │ │ └── cell_hi.txt ├── references_to_exclude_in_titer_plots_vic.txt ├── references_to_include_in_titer_plots_h1n1pdm.txt ├── mask_config.tsv ├── nextstrain_clades_yam_ha.tsv ├── references_to_include_in_titer_plots_h3n2.txt ├── frequency_weights_by_region.json ├── distance_maps │ ├── h3n2 │ │ ├── ha │ │ │ ├── koel.json │ │ │ ├── welsh_epitope_sites.json │ │ │ ├── bush_epitope_A.json │ │ │ ├── bush_epitope_E.json │ │ │ ├── bush_epitope_B.json │ │ │ ├── bush_epitope_C.json │ │ │ ├── bush_epitope_D.json │ │ │ ├── luksza.json │ │ │ └── shih.json │ │ └── na │ │ │ └── munoz.json │ └── h1n1pdm │ │ └── ha │ │ └── canton.json ├── distance_maps.tsv ├── colors_for_titer_plots_vic.tsv ├── nextstrain_clades_vic_ha.tsv ├── references_to_include_in_titer_plots_vic.txt ├── colors_for_titer_plots_h1n1pdm.tsv ├── colors_for_titer_plots_h3n2.tsv ├── h2n2 │ └── reference_strains.txt ├── h1n1 │ └── reference_strains.txt ├── nextstrain_clades_h1n1pdm_ha.tsv └── nextstrain_clades_h3n2_ha.tsv ├── profiles ├── allflu │ ├── vic_include.txt │ ├── yam_include.txt │ ├── h1n1pdm_include.txt │ ├── config.yaml │ └── h3n2_include.txt ├── gisaid │ ├── config.yaml │ └── builds.yaml ├── europe │ └── config.yaml ├── example │ ├── config.yaml │ └── builds.yaml ├── nextstrain │ └── config.yaml ├── nextflu-private │ ├── README.md │ ├── vic │ │ └── ha │ │ │ └── clades.tsv │ └── deploy.smk ├── ci │ ├── prepare_data.smk │ └── builds.yaml ├── scicore │ ├── submit.sh │ ├── cluster.json │ └── config.yaml ├── nextstrain-public │ ├── deploy.smk │ └── rename.smk ├── nextclade.yaml ├── full-trees │ └── h1n1pdm_titer_strains.txt ├── neut-library.yaml └── nextflu-private-forecasts │ └── rename.smk ├── nextclade ├── dataset_config │ ├── yam │ │ ├── includes.txt │ │ └── ha │ │ │ └── JN993010 │ │ │ ├── annotation.gff │ │ │ ├── pathogen.json │ │ │ ├── README.md │ │ │ └── reference.fasta │ ├── h3n2 │ │ ├── na │ │ │ └── EPI1857215 │ │ │ │ ├── annotation.gff │ │ │ │ └── reference.fasta │ │ ├── ha │ │ │ ├── CY163680 │ │ │ │ ├── annotation.gff │ │ │ │ ├── pathogen.json │ │ │ │ └── reference.fasta │ │ │ └── EPI1857216 │ │ │ │ ├── annotation.gff │ │ │ │ └── reference.fasta │ │ ├── pb2 │ │ │ ├── annotation.gff │ │ │ └── README.md │ │ ├── np │ │ │ ├── annotation.gff │ │ │ ├── README.md │ │ │ └── reference.fasta │ │ ├── pb1 │ │ │ ├── annotation.gff │ │ │ └── README.md │ │ ├── mp │ │ │ ├── annotation.gff │ │ │ ├── README.md │ │ │ └── reference.fasta │ │ ├── ns │ │ │ ├── README.md │ │ │ ├── annotation.gff │ │ │ └── reference.fasta │ │ └── pa │ │ │ ├── annotation.gff │ │ │ ├── README.md │ │ │ └── pathogen.json │ ├── vic │ │ ├── ha │ │ │ ├── KX058884 │ │ │ │ ├── annotation.gff │ │ │ │ └── pathogen.json │ │ │ └── EPI1926632 │ │ │ │ ├── annotation.gff │ │ │ │ ├── pathogen.json │ │ │ │ └── README.md │ │ ├── na │ │ │ └── CY073894 │ │ │ │ ├── annotation.gff │ │ │ │ └── reference.fasta │ │ ├── includes.txt │ │ └── pa │ │ │ ├── README.md │ │ │ └── pathogen.json │ └── h1n1pdm │ │ ├── ha │ │ ├── CY121680 │ │ │ ├── annotation.gff │ │ │ ├── pathogen.json │ │ │ └── reference.fasta │ │ └── MW626062 │ │ │ ├── annotation.gff │ │ │ └── pathogen.json │ │ ├── na │ │ └── MW626056 │ │ │ ├── annotation.gff │ │ │ └── reference.fasta │ │ ├── pb1 │ │ ├── annotation.gff │ │ └── README.md │ │ ├── pb2 │ │ ├── annotation.gff │ │ └── README.md │ │ ├── np │ │ ├── annotation.gff │ │ ├── README.md │ │ └── reference.fasta │ │ ├── mp │ │ ├── README.md │ │ ├── annotation.gff │ │ └── reference.fasta │ │ ├── ns │ │ ├── README.md │ │ ├── annotation.gff │ │ └── reference.fasta │ │ └── pa │ │ ├── annotation.gff │ │ ├── README.md │ │ └── pathogen.json ├── config │ ├── human-nai-marker-table_for-publication_final_20240918.pdf │ ├── pa-marker-who-table_07-08-2024_updated_final-version.pdf │ ├── vic │ │ └── ha │ │ │ └── KX058884 │ │ │ └── founder_sequences_SigPep.fasta │ ├── h1n1pdm │ │ └── ha │ │ │ ├── CY121680 │ │ │ └── founder_sequences_SigPep.fasta │ │ │ └── MW626062 │ │ │ └── founder_sequences_SigPep.fasta │ ├── auspice_config.json │ └── h3n2 │ │ └── ha │ │ ├── CY163680 │ │ └── founder_sequences_SigPep.fasta │ │ └── EPI1857216 │ │ └── founder_sequences_SigPep.fasta └── scripts │ └── extract_founder_sequences.py ├── .gitattributes ├── images ├── 03-download-metadata.png ├── 04-download-sequences.png ├── 01-search-gisaid-for-h3n2.png └── 02-gisaid-search-results.png ├── ingest ├── vendored │ ├── s3-object-exists │ ├── .shellcheckrc │ ├── .github │ │ ├── workflows │ │ │ ├── ci.yaml │ │ │ └── pre-commit.yaml │ │ ├── pull_request_template.md │ │ └── dependabot.yml │ ├── sha256sum │ ├── .gitrepo │ ├── notify-on-job-fail │ ├── notify-on-diff │ ├── notify-on-job-start │ ├── .pre-commit-config.yaml │ ├── cloudfront-invalidate │ ├── trigger-on-new-data │ ├── download-from-s3 │ └── trigger ├── build-configs │ ├── manual-upload │ │ ├── config.yaml │ │ └── Snakefile │ └── nextstrain-automation │ │ └── config.yaml ├── defaults │ ├── strain_name_fixes.tsv │ ├── h3n2 │ │ └── prioritized_strain_ids.tsv │ └── final_annotations.tsv ├── scripts │ ├── lowercase-fields │ ├── dedup-by-gisaid-id │ └── annotate-with-gihsn └── Snakefile ├── example_data └── haplotypes.tsv ├── workflow ├── envs │ ├── notebook.yaml │ └── nextstrain.yaml └── snakemake_rules │ └── common.smk ├── nextstrain-pathogen.yaml ├── .github ├── workflows │ ├── ci.yaml │ ├── run-public-builds.yaml │ ├── run-nextflu-private-builds.yaml │ ├── run-private-nextflu-builds.yaml │ ├── run-nextclade.yaml │ └── ingest.yaml └── dependabot.yml ├── notebooks └── README.md ├── source-data ├── 2018_South_America_flu_vaccination_coverage.tsv └── 2018_Europe_flu_vaccination_coverage.tsv ├── models ├── welsh_escape.json ├── ne_star-lbi.json ├── cTiter_x-ne_star.json ├── fra_cTiter_x-ne_star.json ├── cell_fra_cTiter_x-ne_star.json ├── human_cell_fra_cTiter_x-ne_star.json └── human_cell_hi_cTiter_x-ne_star.json ├── .gitignore ├── flu-forecasting └── scripts │ ├── merge_weighted_distances_to_future.py │ └── calculate_clade_frequency_forecasts.py └── zoltar └── project.json /scripts/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /config/h3n2/ha/exclude-sites.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /config/vic/ha/exclude-sites.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /config/yam/ha/exclude-sites.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /profiles/allflu/vic_include.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /profiles/allflu/yam_include.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /profiles/allflu/h1n1pdm_include.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /config/h1n1pdm/ha/exclude-sites.txt: -------------------------------------------------------------------------------- 1 | 618 2 | -------------------------------------------------------------------------------- /config/vic/ha/prioritized_seqs_file.tsv: -------------------------------------------------------------------------------- 1 | strain accession 2 | -------------------------------------------------------------------------------- /config/vic/mp/prioritized_seqs_file.tsv: -------------------------------------------------------------------------------- 1 | strain accession 2 | -------------------------------------------------------------------------------- /config/vic/na/prioritized_seqs_file.tsv: -------------------------------------------------------------------------------- 1 | strain accession 2 | -------------------------------------------------------------------------------- /config/vic/np/prioritized_seqs_file.tsv: -------------------------------------------------------------------------------- 1 | strain accession 2 | -------------------------------------------------------------------------------- /config/vic/ns/prioritized_seqs_file.tsv: -------------------------------------------------------------------------------- 1 | strain accession 2 | -------------------------------------------------------------------------------- /config/vic/pa/prioritized_seqs_file.tsv: -------------------------------------------------------------------------------- 1 | strain accession 2 | -------------------------------------------------------------------------------- /config/vic/pb1/prioritized_seqs_file.tsv: -------------------------------------------------------------------------------- 1 | strain accession 2 | -------------------------------------------------------------------------------- /config/vic/pb2/prioritized_seqs_file.tsv: -------------------------------------------------------------------------------- 1 | strain accession 2 | -------------------------------------------------------------------------------- /config/h1n1pdm/ha/prioritized_seqs_file.tsv: -------------------------------------------------------------------------------- 1 | strain accession 2 | -------------------------------------------------------------------------------- /config/h1n1pdm/mp/prioritized_seqs_file.tsv: -------------------------------------------------------------------------------- 1 | strain accession 2 | -------------------------------------------------------------------------------- /config/h1n1pdm/na/prioritized_seqs_file.tsv: -------------------------------------------------------------------------------- 1 | strain accession 2 | -------------------------------------------------------------------------------- /config/h1n1pdm/np/prioritized_seqs_file.tsv: -------------------------------------------------------------------------------- 1 | strain accession 2 | -------------------------------------------------------------------------------- /config/h1n1pdm/ns/prioritized_seqs_file.tsv: -------------------------------------------------------------------------------- 1 | strain accession 2 | -------------------------------------------------------------------------------- /config/h1n1pdm/pa/prioritized_seqs_file.tsv: -------------------------------------------------------------------------------- 1 | strain accession 2 | -------------------------------------------------------------------------------- /config/h1n1pdm/pb1/prioritized_seqs_file.tsv: -------------------------------------------------------------------------------- 1 | strain accession 2 | -------------------------------------------------------------------------------- /config/h1n1pdm/pb2/prioritized_seqs_file.tsv: -------------------------------------------------------------------------------- 1 | strain accession 2 | -------------------------------------------------------------------------------- /nextclade/dataset_config/yam/includes.txt: -------------------------------------------------------------------------------- 1 | B/Phuket/3073/2013 2 | -------------------------------------------------------------------------------- /config/clades_for_titer_plots_h1n1pdm.txt: -------------------------------------------------------------------------------- 1 | 5a.1 2 | 5a.2a 3 | 5a.2a.1 4 | -------------------------------------------------------------------------------- /config/clades_for_titer_plots_vic.txt: -------------------------------------------------------------------------------- 1 | V1A.3a.2 2 | V1A.3a.1 3 | V1A.3/133R 4 | V1A.3/155A 5 | -------------------------------------------------------------------------------- /config/clades_for_titer_plots_h3n2.txt: -------------------------------------------------------------------------------- 1 | 2a.1 2 | 2a.1b 3 | 2a.3 4 | 2a.3a.1 5 | 2a.3b 6 | 2b 7 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Always use LF line endings even on Windows. 2 | * text=auto eol=lf 3 | *.png binary 4 | -------------------------------------------------------------------------------- /config/subclades_for_titer_plots_vic.txt: -------------------------------------------------------------------------------- 1 | C.3 2 | C.3.1 3 | C.5 4 | C.5.1 5 | C.5.6 6 | C.5.6.1 7 | C.5.7 8 | -------------------------------------------------------------------------------- /images/03-download-metadata.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nextstrain/seasonal-flu/HEAD/images/03-download-metadata.png -------------------------------------------------------------------------------- /images/04-download-sequences.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nextstrain/seasonal-flu/HEAD/images/04-download-sequences.png -------------------------------------------------------------------------------- /config/references_to_exclude_in_titer_plots_h3n2.txt: -------------------------------------------------------------------------------- 1 | A/Darwin/726/2019 2 | A/NorthCarolina/4/2016 3 | A/Yokohama/68/2020 4 | -------------------------------------------------------------------------------- /images/01-search-gisaid-for-h3n2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nextstrain/seasonal-flu/HEAD/images/01-search-gisaid-for-h3n2.png -------------------------------------------------------------------------------- /images/02-gisaid-search-results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nextstrain/seasonal-flu/HEAD/images/02-gisaid-search-results.png -------------------------------------------------------------------------------- /profiles/gisaid/config.yaml: -------------------------------------------------------------------------------- 1 | cores: all 2 | use-conda: true 3 | conda-frontend: mamba 4 | configfile: profiles/gisaid/builds.yaml 5 | -------------------------------------------------------------------------------- /config/colors.tsv: -------------------------------------------------------------------------------- 1 | recency last week #d94701 2 | recency last month #fd8d3c 3 | recency last quarter #fdbe85 4 | recency older #feedde 5 | -------------------------------------------------------------------------------- /config/h3n2/na/genemap.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | ##sequence-region CY114383 1 1436 3 | CY114383 feature gene 4 1413 . + . gene_name="NA" 4 | -------------------------------------------------------------------------------- /config/vic/na/genemap.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | ##sequence-region CY018815.1 1 1516 3 | CY018815.1 feature gene 31 1431 . + . gene_name="NA" 4 | -------------------------------------------------------------------------------- /config/yam/na/genemap.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | ##sequence-region CY019709.1 1 1516 3 | CY019709.1 feature gene 31 1431 . + . gene_name="NA" 4 | -------------------------------------------------------------------------------- /config/h1n1pdm/na/genemap.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | ##sequence-region CY121682.1 1 1432 3 | CY121682.1 feature gene 9 1418 . + . gene_name="NA" 4 | -------------------------------------------------------------------------------- /config/subclades_for_titer_plots_h1n1pdm.txt: -------------------------------------------------------------------------------- 1 | C.1.9 2 | C.1.9.3 3 | D.3.1 4 | D.3.1:113K 5 | D.3.1:113K-139D-283K 6 | D.3.1:157L 7 | D.3.1:185T 8 | -------------------------------------------------------------------------------- /config/references_to_exclude_in_titer_plots_h1n1pdm.txt: -------------------------------------------------------------------------------- 1 | A/Brisbane/2/2018 2 | A/Brisbane/2/2018-egg 3 | A/Indiana/2/2020-egg 4 | A/Togo/881/2020-egg 5 | -------------------------------------------------------------------------------- /config/hi_titer_count_colors.tsv: -------------------------------------------------------------------------------- 1 | hi_titer_count 0 #999999 2 | hi_titer_count 1-5 #bdc9e1 3 | hi_titer_count 6-10 #74a9cf 4 | hi_titer_count >10 #0570b0 5 | -------------------------------------------------------------------------------- /profiles/europe/config.yaml: -------------------------------------------------------------------------------- 1 | configfile: 2 | - profiles/europe/builds.yaml 3 | 4 | keep-going: False 5 | printshellcmds: True 6 | show-failed-logs: True 7 | -------------------------------------------------------------------------------- /config/subclades_for_titer_plots_h3n2.txt: -------------------------------------------------------------------------------- 1 | J.2 2 | J.2.2 3 | J.2.3 4 | J.2.4 5 | J.2.5 6 | J.2:135A-145N 7 | J.2:135K 8 | J.2:145N-261Q 9 | J.2:223I 10 | J.2:8D 11 | -------------------------------------------------------------------------------- /profiles/allflu/config.yaml: -------------------------------------------------------------------------------- 1 | configfile: 2 | - profiles/allflu/builds.yaml 3 | 4 | cores: 8 5 | keep-going: False 6 | printshellcmds: True 7 | show-failed-logs: True 8 | -------------------------------------------------------------------------------- /profiles/example/config.yaml: -------------------------------------------------------------------------------- 1 | configfile: 2 | - profiles/example/builds.yaml 3 | 4 | cores: 8 5 | keep-going: False 6 | printshellcmds: True 7 | show-failed-logs: True 8 | -------------------------------------------------------------------------------- /config/references_for_titer_plots/h3n2/egg_fra.txt: -------------------------------------------------------------------------------- 1 | A/Croatia/10136RV/2023-egg # J.2, vaccine strain 2 | A/Singapore/GP20238/2024-egg # J.2.4 3 | A/Nepal/N042/2025-egg # J.2.4 4 | -------------------------------------------------------------------------------- /config/references_for_titer_plots/vic/egg_hi.txt: -------------------------------------------------------------------------------- 1 | B/Austria/1359417/2021-egg # C, vaccine strain 2 | B/Switzerland/329/2024-egg # C.5.6:199A 3 | B/Victoria/41/2024-egg # C.5.7 4 | -------------------------------------------------------------------------------- /profiles/nextstrain/config.yaml: -------------------------------------------------------------------------------- 1 | configfile: 2 | - profiles/nextstrain/builds.yaml 3 | 4 | cores: 8 5 | keep-going: True 6 | printshellcmds: True 7 | show-failed-logs: True 8 | -------------------------------------------------------------------------------- /config/h3n2/mp/prioritized_seqs_file.tsv: -------------------------------------------------------------------------------- 1 | strain accession 2 | A/Croatia/10136RV/2023 EPI3250713 3 | A/Croatia/10136RV/2023-egg EPI3356216 4 | A/DistrictOfColumbia/27/2023 EPI2990351 5 | -------------------------------------------------------------------------------- /config/h3n2/np/prioritized_seqs_file.tsv: -------------------------------------------------------------------------------- 1 | strain accession 2 | A/Croatia/10136RV/2023 EPI3250711 3 | A/Croatia/10136RV/2023-egg EPI3356214 4 | A/DistrictOfColumbia/27/2023 EPI2990339 5 | -------------------------------------------------------------------------------- /config/h3n2/ns/prioritized_seqs_file.tsv: -------------------------------------------------------------------------------- 1 | strain accession 2 | A/Croatia/10136RV/2023 EPI3250712 3 | A/Croatia/10136RV/2023-egg EPI3356215 4 | A/DistrictOfColumbia/27/2023 EPI2990345 5 | -------------------------------------------------------------------------------- /config/h3n2/pa/prioritized_seqs_file.tsv: -------------------------------------------------------------------------------- 1 | strain accession 2 | A/Croatia/10136RV/2023 EPI3250714 3 | A/Croatia/10136RV/2023-egg EPI3356217 4 | A/DistrictOfColumbia/27/2023 EPI2990331 5 | -------------------------------------------------------------------------------- /config/h3n2/pb1/prioritized_seqs_file.tsv: -------------------------------------------------------------------------------- 1 | strain accession 2 | A/Croatia/10136RV/2023 EPI3250716 3 | A/Croatia/10136RV/2023-egg EPI3356219 4 | A/DistrictOfColumbia/27/2023 EPI2990344 5 | -------------------------------------------------------------------------------- /config/h3n2/pb2/prioritized_seqs_file.tsv: -------------------------------------------------------------------------------- 1 | strain accession 2 | A/Croatia/10136RV/2023 EPI3250715 3 | A/Croatia/10136RV/2023-egg EPI3356218 4 | A/DistrictOfColumbia/27/2023 EPI2990348 5 | -------------------------------------------------------------------------------- /nextclade/config/human-nai-marker-table_for-publication_final_20240918.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nextstrain/seasonal-flu/HEAD/nextclade/config/human-nai-marker-table_for-publication_final_20240918.pdf -------------------------------------------------------------------------------- /nextclade/config/pa-marker-who-table_07-08-2024_updated_final-version.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nextstrain/seasonal-flu/HEAD/nextclade/config/pa-marker-who-table_07-08-2024_updated_final-version.pdf -------------------------------------------------------------------------------- /config/yam/na/subclades.tsv: -------------------------------------------------------------------------------- 1 | clade gene site alt 2 | 42Q NA 42 Q 3 | 42R NA 42 R 4 | 42R NA 186 R 5 | 42R NA 340 N 6 | 68A NA 68 A 7 | 68A NA 125 T 8 | 68A NA 463 D 9 | 68A NA 465 A 10 | 125N NA 125 N 11 | -------------------------------------------------------------------------------- /config/references_to_exclude_in_titer_plots_vic.txt: -------------------------------------------------------------------------------- 1 | B/Brisbane/35/2018 2 | B/Brisbane/35/2018-egg 3 | B/Colorado/6/2017 4 | B/Colorado/6/2017-egg 5 | B/Iowa/6/2017 6 | B/Maryland/15/2016-egg 7 | B/Nigeria/3352/2018 8 | -------------------------------------------------------------------------------- /profiles/nextflu-private/README.md: -------------------------------------------------------------------------------- 1 | # Monthly reports on seasonal influenza evolution 2 | 3 | [See the Nextstrain wiki](https://wiki.nextstrain.org/t/Seasonal+Influenza) for details about preparing monthly reports. 4 | -------------------------------------------------------------------------------- /config/references_to_include_in_titer_plots_h1n1pdm.txt: -------------------------------------------------------------------------------- 1 | A/Iowa/22/2020-egg 2 | A/Iowa/23/2020 3 | A/NorthCarolina/1/2021 4 | A/NorthCarolina/1/2021-egg 5 | A/Louisiana/1/2020 6 | A/Togo/881/2020 7 | A/Wisconsin/588/2019 8 | -------------------------------------------------------------------------------- /ingest/vendored/s3-object-exists: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -euo pipefail 3 | 4 | url="${1#s3://}" 5 | bucket="${url%%/*}" 6 | key="${url#*/}" 7 | 8 | aws s3api head-object --bucket "$bucket" --key "$key" &>/dev/null 9 | -------------------------------------------------------------------------------- /example_data/haplotypes.tsv: -------------------------------------------------------------------------------- 1 | haplotype gene site alt 2 | Brisbane10 nuc 63 A 3 | Brisbane10 nuc 920 C 4 | Brisbane10 nuc 1313 C 5 | 6 | Perth16 HA1 144 K 7 | 8 | clade HA1 312 S 9 | 10 | recurrent_AA HA1 94 H 11 | -------------------------------------------------------------------------------- /config/h3n2/ha/prioritized_seqs_file.tsv: -------------------------------------------------------------------------------- 1 | strain accession 2 | A/Croatia/10136RV/2023 EPI3250718 3 | A/Croatia/10136RV/2023-egg EPI3356221 4 | A/DistrictOfColumbia/27/2023 EPI2990337 5 | A/DistrictOfColumbia/27/2023-egg EPI3391167 6 | -------------------------------------------------------------------------------- /config/h3n2/na/prioritized_seqs_file.tsv: -------------------------------------------------------------------------------- 1 | strain accession 2 | A/Croatia/10136RV/2023 EPI3250717 3 | A/Croatia/10136RV/2023-egg EPI3356220 4 | A/DistrictOfColumbia/27/2023 EPI2990330 5 | A/DistrictOfColumbia/27/2023-egg EPI3391166 6 | -------------------------------------------------------------------------------- /config/mask_config.tsv: -------------------------------------------------------------------------------- 1 | lineage segment attribute mask 2 | h3n2 ha ep wolf 3 | h3n2 ha ne wolf_nonepitope 4 | h3n2 ha rb koel 5 | h3n2 na ep bhatt 6 | h3n2 na ne bhatt_nonepitope 7 | h1n1pdm ha ep canton 8 | h1n1pdm ha ne canton_nonepitope 9 | -------------------------------------------------------------------------------- /config/nextstrain_clades_yam_ha.tsv: -------------------------------------------------------------------------------- 1 | clade gene site alt 2 | 2 HA1 48 K 3 | 2 HA1 108 A 4 | 2 nuc 1196 T 5 | 3 HA1 150 I 6 | 3 HA1 229 D 7 | 3 HA1 165 Y 8 | 3 nuc 1280 C 9 | 172Q HA1 172 Q 10 | 172Q nuc 848 G 11 | 172Q nuc 311 T 12 | -------------------------------------------------------------------------------- /config/references_to_include_in_titer_plots_h3n2.txt: -------------------------------------------------------------------------------- 1 | A/Cambodia/e0826360/2020 2 | A/Darwin/6/2021 3 | A/Darwin/6/2021-egg 4 | A/Darwin/9/2021 5 | A/Darwin/9/2021-egg 6 | A/Michigan/173/2020 7 | A/Michigan/173/2020-egg 8 | A/Kansas/14/2017 9 | -------------------------------------------------------------------------------- /config/yam/ha/clades.tsv: -------------------------------------------------------------------------------- 1 | clade gene site alt 2 | Y1 SigPep 1 M 3 | Y2 HA1 48 K 4 | Y2 HA1 108 A 5 | Y2 HA1 181 A 6 | Y3 HA1 116 K 7 | Y3 HA1 150 I 8 | Y3 HA1 165 Y 9 | Y3 HA1 202 S 10 | Y3 HA1 229 D 11 | Y3 HA1 298 E 12 | Y3 HA1 312 K 13 | -------------------------------------------------------------------------------- /config/yam/ha/subclades.tsv: -------------------------------------------------------------------------------- 1 | clade gene site alt 2 | Y1 SigPep 1 M 3 | Y2 HA1 48 K 4 | Y2 HA1 108 A 5 | Y2 HA1 181 A 6 | Y3 HA1 116 K 7 | Y3 HA1 150 I 8 | Y3 HA1 165 Y 9 | Y3 HA1 202 S 10 | Y3 HA1 229 D 11 | Y3 HA1 298 E 12 | Y3 HA1 312 K 13 | -------------------------------------------------------------------------------- /config/h3n2/ha/genemap.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | ##sequence-region CY163680.1 1 1737 3 | CY163680.1 feature gene 18 65 . + . gene_name="SigPep" 4 | CY163680.1 feature gene 66 1052 . + . gene_name="HA1" 5 | CY163680.1 feature gene 1053 1715 . + . gene_name="HA2" 6 | -------------------------------------------------------------------------------- /config/yam/ha/genemap.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | ##sequence-region JN993010.1 1 1755 3 | JN993010.1 feature gene 1 45 . + . gene_name="SigPep" 4 | JN993010.1 feature gene 46 1083 . + . gene_name="HA1" 5 | JN993010.1 feature gene 1084 1755 . + . gene_name="HA2" 6 | -------------------------------------------------------------------------------- /config/h1n1pdm/ha/genemap.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | ##sequence-region CY121680.1 1 1752 3 | CY121680.1 feature gene 21 71 . + . gene_name="SigPep" 4 | CY121680.1 feature gene 72 1052 . + . gene_name="HA1" 5 | CY121680.1 feature gene 1053 1718 . + . gene_name="HA2" 6 | -------------------------------------------------------------------------------- /workflow/envs/notebook.yaml: -------------------------------------------------------------------------------- 1 | name: notebook 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | - altair=5.0.1 6 | - jupyter=1.0 7 | - jupyterlab_code_formatter=2.2.1 8 | - pandas=1.5.0 9 | - python=3.9* 10 | - vl-convert-python=0.11.1 11 | -------------------------------------------------------------------------------- /ingest/vendored/.shellcheckrc: -------------------------------------------------------------------------------- 1 | # Use of this file requires Shellcheck v0.7.0 or newer. 2 | # 3 | # SC2064 - We intentionally want variables to expand immediately within traps 4 | # so the trap can not fail due to variable interpolation later. 5 | # 6 | disable=SC2064 7 | -------------------------------------------------------------------------------- /nextclade/dataset_config/h3n2/na/EPI1857215/annotation.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | ##sequence-region EPI1857215 1 1439 3 | EPI1857215 annotation remark 1 1439 . . . accessions=EPI1857215; 4 | EPI1857215 feature gene 8 1417 . + . codon_start=1;gene=NA;gene_name=NA;product=neuraminidase; 5 | -------------------------------------------------------------------------------- /ingest/build-configs/manual-upload/config.yaml: -------------------------------------------------------------------------------- 1 | # TODO: remove `trials/ingest/` after we switch to the ingest workflow 2 | # AWS S3 destination for the downloaded GISAID files 3 | s3_dst: "s3://nextstrain-data-private/files/workflows/seasonal-flu/trials/ingest/gisaid-downloads/unprocessed" 4 | -------------------------------------------------------------------------------- /nextclade/dataset_config/h3n2/ha/CY163680/annotation.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | ##sequence-region CY163680.1 1 1737 3 | CY163680.1 feature gene 18 65 . + . gene_name="SigPep" 4 | CY163680.1 feature gene 66 1052 . + . gene_name="HA1" 5 | CY163680.1 feature gene 1053 1715 . + . gene_name="HA2" 6 | -------------------------------------------------------------------------------- /nextclade/dataset_config/vic/ha/KX058884/annotation.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | ##sequence-region KX058884.1 1 1885 3 | KX058884.1 feature gene 34 78 . + . gene_name="SigPep" 4 | KX058884.1 feature gene 79 1119 . + . gene_name="HA1" 5 | KX058884.1 feature gene 1120 1791 . + . gene_name="HA2" 6 | -------------------------------------------------------------------------------- /nextclade/dataset_config/yam/ha/JN993010/annotation.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | ##sequence-region JN993010.1 1 1755 3 | JN993010.1 feature gene 1 45 . + . gene_name="SigPep" 4 | JN993010.1 feature gene 46 1083 . + . gene_name="HA1" 5 | JN993010.1 feature gene 1084 1755 . + . gene_name="HA2" 6 | -------------------------------------------------------------------------------- /ingest/defaults/strain_name_fixes.tsv: -------------------------------------------------------------------------------- 1 | # GISAID strain name fixes 2 | label fix 3 | 4 | Influenza A Virus (A/Malaysia/228/2014(H7N9)) segment 6 neuraminidase (NA) gene A/Malaysia/228/2014 5 | Influenza A Virus (A/Malaysia/228/2014(H7N9)) segment 4 hemagglutinin (HA) gene A/Malaysia/228/2014 6 | -------------------------------------------------------------------------------- /nextclade/dataset_config/h1n1pdm/ha/CY121680/annotation.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | ##sequence-region CY121680.1 1 1752 3 | CY121680.1 feature gene 21 71 . + . gene_name="SigPep" 4 | CY121680.1 feature gene 72 1052 . + . gene_name="HA1" 5 | CY121680.1 feature gene 1053 1718 . + . gene_name="HA2" 6 | -------------------------------------------------------------------------------- /nextclade/dataset_config/h1n1pdm/ha/MW626062/annotation.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | ##sequence-region MW626062.1 1 1752 3 | MW626062.1 feature gene 21 71 . + . gene_name="SigPep" 4 | MW626062.1 feature gene 72 1052 . + . gene_name="HA1" 5 | MW626062.1 feature gene 1053 1718 . + . gene_name="HA2" 6 | -------------------------------------------------------------------------------- /nextclade/dataset_config/h3n2/ha/EPI1857216/annotation.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | ##sequence-region EPI1857216 1 1718 3 | EPI1857216 feature gene 1 48 . + . gene_name="SigPep" 4 | EPI1857216 feature gene 49 1035 . + . gene_name="HA1" 5 | EPI1857216 feature gene 1036 1698 . + . gene_name="HA2" 6 | -------------------------------------------------------------------------------- /nextclade/dataset_config/vic/ha/EPI1926632/annotation.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | ##sequence-region EPI1926632 1 1847 3 | EPI1926632 feature gene 20 64 . + . gene_name="SigPep" 4 | EPI1926632 feature gene 65 1096 . + . gene_name="HA1" 5 | EPI1926632 feature gene 1097 1765 . + . gene_name="HA2" 6 | -------------------------------------------------------------------------------- /config/h1n1pdm/ha/emerging_haplotypes.tsv: -------------------------------------------------------------------------------- 1 | haplotype gene site alt 2 | C.1.9 clade C.1.9 3 | C.1.9.3 clade C.1.9.3 4 | D.3.1 clade D.3.1 5 | 6 | D.3.1:270A clade D.3.1 7 | D.3.1:270A HA1 270 A 8 | 9 | D.3.1.1 clade D.3.1.1 10 | 11 | D.3.1.1:205K clade D.3.1.1 12 | D.3.1.1:205K HA1 205 K 13 | -------------------------------------------------------------------------------- /ingest/defaults/h3n2/prioritized_strain_ids.tsv: -------------------------------------------------------------------------------- 1 | strain id 2 | A/Croatia/10136RV/2023 EPI_ISL_19085723 3 | A/Croatia/10136RV/2023-egg EPI_ISL_19185072 4 | A/DistrictOfColumbia/27/2023 EPI_ISL_18862356 5 | A/DistrictOfColumbia/27/2023-egg EPI_ISL_19209054 6 | A/Massachusetts/18/2022 EPI_ISL_13897082 7 | -------------------------------------------------------------------------------- /nextclade/dataset_config/vic/na/CY073894/annotation.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | ##sequence-region CY073894.1 1 1401 3 | CY073894.1 annotation remark 1 1401 . . . accessions=CY073894; 4 | CY073894.1 feature gene 1 1401 . + . codon_start=1;gene=NA;gene_name=NA;product=neuraminidase;protein_id=ADN32819.1; 5 | -------------------------------------------------------------------------------- /nextclade/dataset_config/h1n1pdm/na/MW626056/annotation.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | ##sequence-region MW626056.1 1 1433 3 | MW626056.1 annotation remark 1 1433 . . . accessions=MW626056; 4 | MW626056.1 feature gene 9 1418 . + . codon_start=1;gene=NA;gene_name=NA;product=neuraminidase;protein_id=QRV63257.1; 5 | -------------------------------------------------------------------------------- /profiles/ci/prepare_data.smk: -------------------------------------------------------------------------------- 1 | rule prepare_sequences: 2 | input: 3 | sequences="example_data/h3n2_{segment}.fasta", 4 | output: 5 | sequences="data/h3n2/raw_{segment}.fasta", 6 | shell: 7 | """ 8 | cp -f {input.sequences} {output.sequences} 9 | """ 10 | -------------------------------------------------------------------------------- /config/vic/ha/emerging_haplotypes.tsv: -------------------------------------------------------------------------------- 1 | haplotype gene site alt 2 | C.3 clade C.3 3 | C.3.1 clade C.3.1 4 | C.5.1 clade C.5.1 5 | C.5.6 clade C.5.6 6 | C.5.6.1 clade C.5.6.1 7 | C.5.7 clade C.5.7 8 | 9 | C.5.6:75E clade C.5.6 10 | C.5.6:75E HA1 75 E 11 | 12 | C.5.6:189A clade C.5.6 13 | C.5.6:189A HA1 189 A 14 | -------------------------------------------------------------------------------- /nextclade/dataset_config/vic/includes.txt: -------------------------------------------------------------------------------- 1 | B/Brazil/1417/2023 2 | B/Massachusetts/1/2022 3 | B/Amazonas/2022-014046-IEC/2022 4 | B/Iquitos/FPI20551/2022 5 | B/Pennsylvania/3/2022 6 | 7 | 8 | # C.3.2 9 | B/Lisboa/134/2024 10 | B/Wisconsin/25/2025 11 | A/SaoPaulo/0068-IBTEC/2024 12 | B/Colorado/46/2025 13 | 14 | 15 | -------------------------------------------------------------------------------- /nextstrain-pathogen.yaml: -------------------------------------------------------------------------------- 1 | # This is currently an empty file to indicate the top level pathogen repo. 2 | # The inclusion of this file allows the Nextstrain CLI to run the 3 | # `nextstrain build` from any directory regardless of runtime. 4 | # 5 | # See https://github.com/nextstrain/cli/releases/tag/8.2.0 for more details. 6 | -------------------------------------------------------------------------------- /profiles/allflu/h3n2_include.txt: -------------------------------------------------------------------------------- 1 | A/Albany/6/1968 2 | A/HongKong/33/1973 3 | A/Memphis/105/1976 4 | A/Netherlands/233/1982 5 | A/Singapore/35/1989 6 | A/Stockholm/20/1993 7 | A/NewYork/631/1996 8 | A/Wisconsin/67/2005 9 | A/Perth/16/2009 10 | A/HongKong/4801/2014 11 | A/Cambodia/e0826360/2020 12 | A/Darwin/9/2021 13 | -------------------------------------------------------------------------------- /.github/workflows/ci.yaml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | pull_request: 8 | workflow_dispatch: 9 | 10 | jobs: 11 | ci: 12 | uses: nextstrain/.github/.github/workflows/pathogen-repo-ci.yaml@v0 13 | with: 14 | build-args: --configfile profiles/ci/builds.yaml -p 15 | -------------------------------------------------------------------------------- /config/frequency_weights_by_region.json: -------------------------------------------------------------------------------- 1 | { 2 | "Africa": 1.02, 3 | "Europe": 0.74, 4 | "North America": 0.54, 5 | "China": 1.36, 6 | "South Asia": 1.45, 7 | "Japan Korea": 0.20, 8 | "Oceania": 0.04, 9 | "South America": 0.41, 10 | "Southeast Asia": 0.62, 11 | "West Asia": 0.75 12 | } 13 | -------------------------------------------------------------------------------- /config/distance_maps/h3n2/ha/koel.json: -------------------------------------------------------------------------------- 1 | { 2 | "default": 0, 3 | "map": { 4 | "HA1": { 5 | "145": 1, 6 | "155": 1, 7 | "156": 1, 8 | "158": 1, 9 | "159": 1, 10 | "189": 1, 11 | "193": 1 12 | } 13 | }, 14 | "name": "koel" 15 | } -------------------------------------------------------------------------------- /ingest/vendored/.github/workflows/ci.yaml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | workflow_dispatch: 9 | 10 | jobs: 11 | shellcheck: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v4 15 | - uses: nextstrain/.github/actions/shellcheck@master 16 | -------------------------------------------------------------------------------- /ingest/vendored/sha256sum: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Portable sha256sum utility. 4 | """ 5 | from hashlib import sha256 6 | from sys import stdin 7 | 8 | chunk_size = 5 * 1024**2 # 5 MiB 9 | 10 | h = sha256() 11 | 12 | for chunk in iter(lambda: stdin.buffer.read(chunk_size), b""): 13 | h.update(chunk) 14 | 15 | print(h.hexdigest()) 16 | -------------------------------------------------------------------------------- /ingest/vendored/.github/workflows/pre-commit.yaml: -------------------------------------------------------------------------------- 1 | name: pre-commit 2 | 3 | on: 4 | - push 5 | 6 | jobs: 7 | pre-commit: 8 | runs-on: ubuntu-latest 9 | steps: 10 | - uses: actions/checkout@v4 11 | - uses: actions/setup-python@v5 12 | with: 13 | python-version: "3.12" 14 | - uses: pre-commit/action@v3.0.1 15 | -------------------------------------------------------------------------------- /notebooks/README.md: -------------------------------------------------------------------------------- 1 | For 2025-09-16-plot-ga-by-antigenic-advance.py: 2 | - run `marimo run notebooks/2025-09-16-plot-ga-by-antigenic-advance.py` from top-level repo directory 3 | - make sure that `forecasts-flu` is a sister directory to `seasonal-flu` and `../forecasts-flu/results/h3n2/region/mlr/ga.tsv` exists 4 | - include `full-h3n2_ha.tsv` in the top-level repo directory 5 | -------------------------------------------------------------------------------- /config/yam/outliers.txt: -------------------------------------------------------------------------------- 1 | B/Catalonia/NSVH100562319/2017 2 | B/England/581/2012 3 | B/Gifu/62/2018 4 | B/HongKong/2196/2010 5 | B/Kisumu/7/2005 6 | B/Kolkata/2546/2009 7 | B/Kolkata/N-1272/2009 8 | B/Kolkata/N-2047/2009 9 | B/Nairobi/351/2005 10 | B/NewHampshire/1/2016 11 | B/Norway/2155/2017 12 | B/Palermo/2/2011 13 | B/Riyadh/3/2010 14 | B/Riyadh/4/2010 15 | B/Thailand/CU-B10303/2014 16 | -------------------------------------------------------------------------------- /profiles/scicore/submit.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | #SBATCH --output=log/%j.out # where to store the output ( %j is the JOBID ) 4 | #SBATCH --error=log/%j.err # where to store error messages 5 | 6 | # activate conda environment 7 | source ~/miniconda3/etc/profile.d/conda.sh 8 | conda activate nextstrain 9 | #Test 10 | export AUGUR_MINIFY_JSON=1 11 | 12 | {exec_job} 13 | -------------------------------------------------------------------------------- /scripts/sha256sum: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Originally copied from nextstrain/ncov-ingest repo 3 | """ 4 | Portable sha256sum utility. 5 | """ 6 | from hashlib import sha256 7 | from sys import stdin 8 | 9 | chunk_size = 5 * 1024**2 # 5 MiB 10 | 11 | h = sha256() 12 | 13 | for chunk in iter(lambda: stdin.buffer.read(chunk_size), b""): 14 | h.update(chunk) 15 | 16 | print(h.hexdigest()) 17 | -------------------------------------------------------------------------------- /profiles/nextstrain-public/deploy.smk: -------------------------------------------------------------------------------- 1 | """ 2 | This part of the workflow handles automatic deployments of public builds. 3 | Depends on the `all_public` rule from rename.smk 4 | """ 5 | 6 | rule deploy_all: 7 | input: rules.all_public.input 8 | params: 9 | s3_dst = config["deploy_url"] 10 | shell: 11 | """ 12 | nextstrain remote upload {params.s3_dst} {input} 13 | """ 14 | -------------------------------------------------------------------------------- /config/vic/ha/genemap.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | ##sequence-region KX058884.1 1 1885 3 | ## Coordinates based on PDB entry 4FQM (https://www.rcsb.org/structure/4FQM) 4 | ## from Dreyfus et al. 2012 (https://www.science.org/doi/full/10.1126/science.1222908). 5 | KX058884.1 feature gene 34 78 . + . gene_name="SigPep" 6 | KX058884.1 feature gene 79 1119 . + . gene_name="HA1" 7 | KX058884.1 feature gene 1120 1788 . + . gene_name="HA2" 8 | -------------------------------------------------------------------------------- /source-data/2018_South_America_flu_vaccination_coverage.tsv: -------------------------------------------------------------------------------- 1 | # South American data from http://ais.paho.org/imm/InfluenzaCoverageMap.asp 2 | Bolivia 70 3 | Colombia 50 4 | Ecuador 90 5 | Peru 50 6 | Venezuela 15 7 | Brazil 90 8 | Costa Rica 75 9 | Belize 3 10 | El Salvador 35 11 | Guatemala 75 12 | Honduras 85 13 | Panama 90 14 | Cuba 90 15 | Mexico 90 16 | Bermuda 15 17 | Argentina 60 18 | Chile 55 19 | Paraguay 35 20 | Uruguay 30 21 | -------------------------------------------------------------------------------- /config/distance_maps/h3n2/ha/welsh_epitope_sites.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Welsh et al. epitope sites", 3 | "default": 0, 4 | "map": { 5 | "HA1": { 6 | "50": 1, 7 | "82": 1, 8 | "124": 1, 9 | "135": 1, 10 | "137": 1, 11 | "143": 1, 12 | "144": 1, 13 | "145": 1, 14 | "157": 1, 15 | "159": 1, 16 | "189": 1, 17 | "193": 1, 18 | "275": 1, 19 | "276": 1 20 | } 21 | } 22 | } -------------------------------------------------------------------------------- /config/distance_maps.tsv: -------------------------------------------------------------------------------- 1 | lineage segment compare_to attribute distance_map 2 | h3n2 ha root ep wolf 3 | h3n2 ha root ne wolf_nonepitope 4 | h3n2 ha root rb koel 5 | h3n2 ha ancestor ne_star luksza_nonepitope 6 | h3n2 ha ancestor cell_entry Yu_et_al_2025_cell_entry 7 | h3n2 ha ancestor ph_stability Yu_et_al_2025_ph_stability 8 | h3n2 na root ep bhatt 9 | h3n2 na root ne bhatt_nonepitope 10 | h1n1pdm ha root ep canton 11 | h1n1pdm ha root ne canton_nonepitope 12 | -------------------------------------------------------------------------------- /ingest/vendored/.gitrepo: -------------------------------------------------------------------------------- 1 | ; DO NOT EDIT (unless you know what you are doing) 2 | ; 3 | ; This subdirectory is a git "subrepo", and this file is maintained by the 4 | ; git-subrepo command. See https://github.com/ingydotnet/git-subrepo#readme 5 | ; 6 | [subrepo] 7 | remote = https://github.com/nextstrain/ingest 8 | branch = main 9 | commit = cd6d31a3b35cd1bb7eddf830c565be6d6e69f27a 10 | parent = eed11b63f5b662da6e7400bc65cc7c618d3ef4b8 11 | method = merge 12 | cmdver = 0.4.6 13 | -------------------------------------------------------------------------------- /workflow/envs/nextstrain.yaml: -------------------------------------------------------------------------------- 1 | name: nextstrain 2 | channels: 3 | - conda-forge 4 | - bioconda 5 | dependencies: 6 | - augur=31.4.0 7 | - awscli=1.27.9 8 | - epiweeks=2.1.2 9 | - nextclade=3.15.3 10 | - python=3.11.* 11 | - seaborn>=0.11* 12 | - seqkit=2.2.0 13 | - csvtk=0.31.0 14 | - tsv-utils=2.2.3 15 | - tabulate=0.9.0 16 | - xlrd=1.* 17 | - pip=23.0.1 18 | - pathogen-embed=3.1.0 19 | - jinja2=3.1.2 20 | - pip: 21 | - rethinkdb==2.3.0.post6 22 | -------------------------------------------------------------------------------- /models/welsh_escape.json: -------------------------------------------------------------------------------- 1 | { 2 | "predictors": [ 3 | "welsh_escape" 4 | ], 5 | "cv_error_mean": 5.444043356439342, 6 | "cv_error_std": 1.7586532985230514, 7 | "coefficients_mean": [ 8 | 1.0 9 | ], 10 | "coefficients_std": [ 11 | 0.3 12 | ], 13 | "mean_stds_mean": [ 14 | 1.0 15 | ], 16 | "mean_stds_std": [ 17 | 0.02 18 | ], 19 | "cost_function": "diffsum", 20 | "l1_lambda": 0.1, 21 | "delta_months": 12, 22 | "training_window": 6, 23 | "pseudocount": null 24 | } 25 | -------------------------------------------------------------------------------- /nextclade/dataset_config/h3n2/pb2/annotation.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | #!gff-spec-version 1.21 3 | #!processor NCBI annotwriter 4 | ##sequence-region NC_007373.1 1 2341 5 | ##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=335341 6 | NC_007373.1 RefSeq region 1 2341 . + . ID=NC_007373.1:1..2341 7 | NC_007373.1 RefSeq CDS 28 2307 . + 0 Name=PB2;gbkey=CDS;gene=PB2;protein_id=YP_308849.1;locus_tag=FLUAVH3N2_s1p1;product=polymerase PB2;ID=cds-YP_308849.1;Dbxref=GenBank:YP_308849.1,GeneID:3655161 8 | -------------------------------------------------------------------------------- /config/colors_for_titer_plots_vic.tsv: -------------------------------------------------------------------------------- 1 | clade_test V1A.3 #b8bc4a 2 | clade_test V1A.3/133R #8cbb69 3 | clade_test V1A.3/155A #b8bc4a 4 | clade_test V1A.3a.1 #e67932 5 | clade_test V1A.3a.2 #dc2f24 6 | subclade_test A.3.2 #3F52CD 7 | subclade_test C #4681CA 8 | subclade_test C.1 #57A1AD 9 | subclade_test C.2 #70B487 10 | subclade_test C.3 #90BC65 11 | subclade_test C.4 #B4BD4C 12 | subclade_test C.5 #D3B240 13 | subclade_test C.5.1 #E59638 14 | subclade_test C.5.2 #E4642E 15 | subclade_test C.5.3 #DB2823 16 | -------------------------------------------------------------------------------- /nextclade/dataset_config/h3n2/np/annotation.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | #!gff-spec-version 1.21 3 | #!processor NCBI annotwriter 4 | ##sequence-region NC_007369.1 1 1566 5 | ##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=335341 6 | NC_007369.1 RefSeq region 1 1566 . + . ID=NC_007369.1:1..1566 7 | NC_007369.1 RefSeq CDS 46 1542 . + 0 Name=NP;gene=NP;gbkey=CDS;protein_id=YP_308843.1;locus_tag=FLUAVH3N2_s5p1;ID=cds-YP_308843.1;product=nucleocapsid protein;Dbxref=GenBank:YP_308843.1,GeneID:3655155 8 | -------------------------------------------------------------------------------- /nextclade/dataset_config/h1n1pdm/pb1/annotation.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | #!gff-spec-version 1.21 3 | #!processor NCBI annotwriter 4 | ##sequence-region NC_026435.1 1 2274 5 | ##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=641809 6 | NC_026435.1 RefSeq region 1 2274 . + . ID=NC_026435.1:1..2274; 7 | NC_026435.1 RefSeq CDS 1 2274 . + 0 Name=PB1;gbkey=CDS;gene=PB1;locus_tag=UJ99_s2gp1;product=polymerase PB1;protein_id=YP_009118628.1;ID=cds-YP_009118628.1;Dbxref=GenBank:YP_009118628.1,GeneID:23308122 8 | -------------------------------------------------------------------------------- /nextclade/dataset_config/h1n1pdm/pb2/annotation.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | #!gff-spec-version 1.21 3 | #!processor NCBI annotwriter 4 | ##sequence-region NC_026438.1 1 2280 5 | ##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=641809 6 | NC_026438.1 RefSeq region 1 2280 . + . ID=NC_026438.1:1..2280; 7 | NC_026438.1 RefSeq CDS 1 2280 . + 0 Name=PB2;gbkey=CDS;gene=PB2;locus_tag=UJ99_s1gp1;product=polymerase PB2;protein_id=YP_009118631.1;ID=cds-YP_009118631.1;Dbxref=GenBank:YP_009118631.1,GeneID:23308131 8 | -------------------------------------------------------------------------------- /profiles/nextclade.yaml: -------------------------------------------------------------------------------- 1 | custom_rules: 2 | - workflow/snakemake_rules/download_from_s3.smk 3 | - profiles/nextclade/run-nextclade.smk 4 | 5 | s3_dst: "s3://nextstrain-data-private/files/workflows/seasonal-flu" 6 | 7 | segments: 8 | - ha 9 | - na 10 | - pb2 11 | - pb1 12 | - pa 13 | - np 14 | - mp 15 | - ns 16 | 17 | builds: 18 | h1n1pdm: 19 | lineage: h1n1pdm 20 | h3n2: 21 | lineage: h3n2 22 | vic: 23 | lineage: vic 24 | segments: 25 | - ha 26 | - na 27 | -------------------------------------------------------------------------------- /ingest/vendored/.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | ### Description of proposed changes 2 | 3 | 4 | 5 | ### Related issue(s) 6 | 7 | 8 | 9 | ### Checklist 10 | 11 | 12 | 13 | - [ ] Checks pass 14 | - [ ] If adding a script, add an entry for it in the README. 15 | 16 | 17 | -------------------------------------------------------------------------------- /nextclade/dataset_config/h1n1pdm/np/annotation.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | #!gff-spec-version 1.21 3 | #!processor NCBI annotwriter 4 | ##sequence-region NC_026436.1 1 1497 5 | ##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=641809 6 | NC_026436.1 RefSeq region 1 1497 . + . ID=NC_026436.1:1..1497; 7 | NC_026436.1 RefSeq CDS 1 1497 . + 0 Name=NP;gene=NP;gbkey=CDS;locus_tag=UJ99_s5gp1;protein_id=YP_009118629.1;ID=cds-YP_009118629.1;product=nucleocapsid protein;Dbxref=GenBank:YP_009118629.1,GeneID:23308125 8 | -------------------------------------------------------------------------------- /config/references_for_titer_plots/vic/cell_hi.txt: -------------------------------------------------------------------------------- 1 | B/Austria/1359417/2021 # C, vaccine strain 2 | B/Victoria/16/2023 # C.3:208P 3 | B/Pennsylvania/14/2025 # C.3.1, CDC 4 | B/Tasmania/31/2025 # C.3.1 5 | B/Kanagawa/AC2414/2025 # C.3.1 6 | B/Utah/11/2023 # C.5.1:128K 7 | B/Kansas/5/2024 # C.5.1:128K,202V 8 | B/Ghana/3778/2024 # C.5.1:202V 9 | B/Missouri/3/2024 # C.5.6:199A, CDC 10 | B/Switzerland/329/2024 # C.5.6:199A, Crick 11 | B/Alabama/7/2023 # C.5.6 12 | B/Ranong/373/2023 # C.5.7 13 | B/Tokyo/23150/2023 # C.5.7 14 | B/Texas/19/2024 # C.5.7 15 | -------------------------------------------------------------------------------- /profiles/scicore/cluster.json: -------------------------------------------------------------------------------- 1 | { 2 | "__default__": { 3 | "time": "00:29:00", 4 | "qos": "30min", 5 | "n": 1, 6 | "mem": "8G", 7 | "conda_env": "nextstrain" 8 | }, 9 | "refine": { 10 | "time": "05:59:00", 11 | "qos": "6hours", 12 | "n": 1 13 | }, 14 | "tree": { 15 | "n": 4, 16 | "qos": "6hours", 17 | "time": "05:59:00" 18 | }, 19 | "align": { 20 | "n": 4, 21 | "time": "00:29:00", 22 | "mem": "16G", 23 | "qos": "30min" 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /ingest/build-configs/nextstrain-automation/config.yaml: -------------------------------------------------------------------------------- 1 | # Custom rules to run as part of the Nextstrain automated workflow 2 | # The paths should be relative to the ingest directory. 3 | custom_rules: 4 | - build-configs/nextstrain-automation/fetch_from_s3.smk 5 | - build-configs/nextstrain-automation/upload.smk 6 | 7 | # TODO: remove `/trials/ingest` after we switch to the ingest workflow 8 | s3_dst: "s3://nextstrain-data-private/files/workflows/seasonal-flu/trials/ingest" 9 | s3_src: "s3://nextstrain-data-private/files/workflows/seasonal-flu/trials/ingest" 10 | -------------------------------------------------------------------------------- /config/references_for_titer_plots/h3n2/cell_fra.txt: -------------------------------------------------------------------------------- 1 | A/DistrictOfColumbia/27/2023 # J.2:145N, vaccine strain 2 | A/Wisconsin/154/2024 # J.2:135K 3 | A/Catalonia/NSVH102423723/2024 # J.2:135K, Crick 4 | A/Michigan/32/2024 # J.2:145N 5 | A/Colorado/209/2024 # J.2:223I 6 | #A/Victoria/488/2024 # J.2:207R 7 | A/Colorado/6/2024 # J.2.1 8 | A/Minnesota/97/2024 # J.2.2:65K,145N 9 | A/Alaska/8/2025 # J.2.3, CDC 10 | A/Netherlands/10685/2024 # J.2.3 11 | A/Valladolid/1187/2025 # J.2.4 (189R + 135K), Crick 12 | A/Mississippi/37/2025 # J.2.4, CDC 13 | A/Sydney/1359/2024 # J.2.4 14 | A/Kentucky/29/2024 # J.2.5 15 | -------------------------------------------------------------------------------- /nextclade/dataset_config/vic/ha/EPI1926632/pathogen.json: -------------------------------------------------------------------------------- 1 | { 2 | "aaMotifs": [ 3 | { 4 | "name": "glycosylation", 5 | "nameShort": "Glyc.", 6 | "nameFriendly": "Glycosylation", 7 | "description": "N-linked glycosylation motifs (N-X-S/T with X any amino acid other than P)", 8 | "includeCdses": [ 9 | { 10 | "cds":"HA1", 11 | "ranges":[] 12 | }, 13 | { 14 | "cds":"HA2", 15 | "ranges":[{"begin":0, "end":187}] 16 | } 17 | ], 18 | "motifs": [ 19 | "N[^P][ST]" 20 | ] 21 | } 22 | ] 23 | } -------------------------------------------------------------------------------- /profiles/scicore/config.yaml: -------------------------------------------------------------------------------- 1 | configfile: 2 | - profiles/europe/builds.yaml 3 | 4 | cluster-config: profiles/scicore/cluster.json 5 | cluster: "sbatch --time={cluster.time} --mem={cluster.mem} --cpus-per-task={cluster.n} --qos={cluster.qos}" 6 | 7 | jobs: 128 8 | 9 | jobscript: profiles/scicore/submit.sh 10 | 11 | # Set the name for the job as display in the cluster queue. 12 | jobname: "{rulename}.{jobid}.sh" 13 | 14 | 15 | cores: 4 16 | keep-going: True 17 | printshellcmds: True 18 | show-failed-logs: True 19 | reason: True 20 | stats: stats.json 21 | # Print log files of failed jobs 22 | show-failed-logs: True 23 | -------------------------------------------------------------------------------- /config/nextstrain_clades_vic_ha.tsv: -------------------------------------------------------------------------------- 1 | clade gene site alt 2 | 1A nuc 206 G 3 | 1A nuc 644 C 4 | 1A nuc 1340 T 5 | 1A nuc 1821 T 6 | 1B nuc 1034 G 7 | 1B nuc 1172 G 8 | V1A HA1 117 V 9 | V1A HA1 146 I 10 | V1A nuc 296 G 11 | V1A.1 HA2 152 K 12 | V1A.1 HA1 162 - 13 | V1A.1 HA1 163 - 14 | V1A.1 HA1 180 V 15 | V1A.1 nuc 1810 G 16 | V1A.2 HA1 163 - 17 | V1A.2 HA1 164 - 18 | V1A.2 HA1 180 T 19 | V1A.2 HA1 209 N 20 | V1A.3 HA1 136 E 21 | V1A.3 HA1 163 - 22 | V1A.3 HA1 164 - 23 | V1A.3 nuc 1646 C 24 | 3a HA1 150 K 25 | 3a HA1 184 E 26 | 3a HA1 197 D 27 | 3a HA1 279 K 28 | 3a2 HA1 150 K 29 | 3a2 HA1 144 L 30 | 3a1 HA1 150 K 31 | 3a1 HA1 220 M 32 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # Dependabot configuration file 2 | # 3 | # 4 | # Each ecosystem is checked on a scheduled interval defined below. To trigger 5 | # a check manually, go to 6 | # 7 | # https://github.com/nextstrain/seasonal-flu/network/updates 8 | # 9 | # and look for a "Check for updates" button. You may need to click around a 10 | # bit first. 11 | --- 12 | version: 2 13 | updates: 14 | - package-ecosystem: "github-actions" 15 | directory: "/" 16 | schedule: 17 | interval: "weekly" 18 | -------------------------------------------------------------------------------- /config/references_for_titer_plots/h1n1pdm/egg_hi.txt: -------------------------------------------------------------------------------- 1 | A/Victoria/4897/2022-egg # D, vaccine strain 2 | A/France/BFC-IPP15052/2024-egg # D.3.1 3 | #A/Missouri/11/2025-egg # D.3.1 4 | A/Sydney/124/2025-egg # D.3.1 5 | A/Switzerland/6849/2025-egg # D.3.1:113K 6 | A/California/177/2024-egg # C.1.9.1 7 | A/Victoria/42/2025-egg # C.1.9.3 8 | A/Norway/10655/2024-egg # C.1.9.3:35N 9 | #A/Washington/310/2024-egg # C.1.9.3:166V 10 | A/Caerphilly/5142/2024-egg # C.1.9.3:120T,137S,166V 11 | # A/Singapore/SAR3644/2024-egg # Missing? 12 | A/Iceland/34025/2024-egg # D.5 13 | A/Tokyo/EIS10-381/2024-egg # Missing? 14 | A/Tokyo/EIS10-554/2024-egg # Missing? 15 | -------------------------------------------------------------------------------- /ingest/vendored/.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # Dependabot configuration file 2 | # 3 | # 4 | # Each ecosystem is checked on a scheduled interval defined below. To trigger 5 | # a check manually, go to 6 | # 7 | # https://github.com/nextstrain/ingest/network/updates 8 | # 9 | # and look for a "Check for updates" button. You may need to click around a 10 | # bit first. 11 | --- 12 | version: 2 13 | updates: 14 | - package-ecosystem: "github-actions" 15 | directory: "/" 16 | schedule: 17 | interval: "weekly" 18 | -------------------------------------------------------------------------------- /config/yam/vaccine.json: -------------------------------------------------------------------------------- 1 | { 2 | "nodes": { 3 | "B/Shanghai/361/2002": { 4 | "vaccine": { 5 | "selection_date": "2004-09-25" 6 | } 7 | }, 8 | "B/Florida/4/2006": { 9 | "vaccine": { 10 | "selection_date": "2008-09-25" 11 | } 12 | }, 13 | "B/Wisconsin/1/2010": { 14 | "vaccine": { 15 | "selection_date": "2012-02-25" 16 | } 17 | }, 18 | "B/Phuket/3073/2013": { 19 | "vaccine": { 20 | "selection_date": "2014-09-25" 21 | } 22 | } 23 | } 24 | } -------------------------------------------------------------------------------- /models/ne_star-lbi.json: -------------------------------------------------------------------------------- 1 | { 2 | "predictors": [ 3 | "ne_star", 4 | "lbi" 5 | ], 6 | "cv_error_mean": 5.444043356439342, 7 | "cv_error_std": 1.7586532985230514, 8 | "coefficients_mean": [ 9 | -0.6756465974025607, 10 | 1.033713901753553 11 | ], 12 | "coefficients_std": [ 13 | 0.3336818417473266, 14 | 0.3936161339928366 15 | ], 16 | "mean_stds_mean": [ 17 | 1.1405240069244975, 18 | 0.1290705493482985 19 | ], 20 | "mean_stds_std": [ 21 | 0.036754535005899855, 22 | 0.02083404312198264 23 | ], 24 | "cost_function": "diffsum", 25 | "l1_lambda": 0.1, 26 | "delta_months": 12, 27 | "training_window": 6, 28 | "pseudocount": null 29 | } -------------------------------------------------------------------------------- /models/cTiter_x-ne_star.json: -------------------------------------------------------------------------------- 1 | { 2 | "predictors": [ 3 | "cTiter_x", 4 | "ne_star" 5 | ], 6 | "cv_error_mean": 5.823634831801705, 7 | "cv_error_std": 1.4714831128264352, 8 | "coefficients_mean": [ 9 | 0.8880449909700426, 10 | -1.0074938116832592 11 | ], 12 | "coefficients_std": [ 13 | 0.22289407223621063, 14 | 0.41396205335824204 15 | ], 16 | "mean_stds_mean": [ 17 | 0.22238984824857977, 18 | 1.1405240069244975 19 | ], 20 | "mean_stds_std": [ 21 | 0.051326848248942424, 22 | 0.036754535005899855 23 | ], 24 | "cost_function": "diffsum", 25 | "l1_lambda": 0.1, 26 | "delta_months": 12, 27 | "training_window": 6, 28 | "pseudocount": null 29 | } 30 | -------------------------------------------------------------------------------- /models/fra_cTiter_x-ne_star.json: -------------------------------------------------------------------------------- 1 | { 2 | "predictors": [ 3 | "fra_cTiter_x", 4 | "ne_star" 5 | ], 6 | "cv_error_mean": 8.288751542568207, 7 | "cv_error_std": 4.590061451047961, 8 | "coefficients_mean": [ 9 | 1.4022025868694787, 10 | -0.46227708276390217 11 | ], 12 | "coefficients_std": [ 13 | 0.22611682803638786, 14 | 0.1787549565201879 15 | ], 16 | "mean_stds_mean": [ 17 | 0.10027528334129619, 18 | 1.1356939490365228 19 | ], 20 | "mean_stds_std": [ 21 | 0.05129205379298049, 22 | 0.021887145941695038 23 | ], 24 | "cost_function": "diffsum", 25 | "l1_lambda": 0.1, 26 | "delta_months": 12, 27 | "training_window": 6, 28 | "pseudocount": null 29 | } -------------------------------------------------------------------------------- /models/cell_fra_cTiter_x-ne_star.json: -------------------------------------------------------------------------------- 1 | { 2 | "predictors": [ 3 | "cell_fra_cTiter_x", 4 | "ne_star" 5 | ], 6 | "cv_error_mean": 5.823634831801705, 7 | "cv_error_std": 1.4714831128264352, 8 | "coefficients_mean": [ 9 | 0.8880449909700426, 10 | -1.0074938116832592 11 | ], 12 | "coefficients_std": [ 13 | 0.22289407223621063, 14 | 0.41396205335824204 15 | ], 16 | "mean_stds_mean": [ 17 | 0.22238984824857977, 18 | 1.1405240069244975 19 | ], 20 | "mean_stds_std": [ 21 | 0.051326848248942424, 22 | 0.036754535005899855 23 | ], 24 | "cost_function": "diffsum", 25 | "l1_lambda": 0.1, 26 | "delta_months": 12, 27 | "training_window": 6, 28 | "pseudocount": null 29 | } 30 | -------------------------------------------------------------------------------- /models/human_cell_fra_cTiter_x-ne_star.json: -------------------------------------------------------------------------------- 1 | { 2 | "predictors": [ 3 | "human_cell_fra_cTiter_x", 4 | "ne_star" 5 | ], 6 | "cv_error_mean": 5.823634831801705, 7 | "cv_error_std": 1.4714831128264352, 8 | "coefficients_mean": [ 9 | 0.8880449909700426, 10 | -1.0074938116832592 11 | ], 12 | "coefficients_std": [ 13 | 0.22289407223621063, 14 | 0.41396205335824204 15 | ], 16 | "mean_stds_mean": [ 17 | 0.22238984824857977, 18 | 1.1405240069244975 19 | ], 20 | "mean_stds_std": [ 21 | 0.051326848248942424, 22 | 0.036754535005899855 23 | ], 24 | "cost_function": "diffsum", 25 | "l1_lambda": 0.1, 26 | "delta_months": 12, 27 | "training_window": 6, 28 | "pseudocount": null 29 | } 30 | -------------------------------------------------------------------------------- /models/human_cell_hi_cTiter_x-ne_star.json: -------------------------------------------------------------------------------- 1 | { 2 | "predictors": [ 3 | "human_cell_hi_cTiter_x", 4 | "ne_star" 5 | ], 6 | "cv_error_mean": 5.823634831801705, 7 | "cv_error_std": 1.4714831128264352, 8 | "coefficients_mean": [ 9 | 0.8880449909700426, 10 | -1.0074938116832592 11 | ], 12 | "coefficients_std": [ 13 | 0.22289407223621063, 14 | 0.41396205335824204 15 | ], 16 | "mean_stds_mean": [ 17 | 0.22238984824857977, 18 | 1.1405240069244975 19 | ], 20 | "mean_stds_std": [ 21 | 0.051326848248942424, 22 | 0.036754535005899855 23 | ], 24 | "cost_function": "diffsum", 25 | "l1_lambda": 0.1, 26 | "delta_months": 12, 27 | "training_window": 6, 28 | "pseudocount": null 29 | } 30 | -------------------------------------------------------------------------------- /nextclade/dataset_config/h3n2/pb1/annotation.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | #!gff-spec-version 1.21 3 | #!processor NCBI annotwriter 4 | ##sequence-region NC_007372.1 1 2341 5 | ##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=335341 6 | NC_007372.1 RefSeq region 1 2341 . + . ID=NC_007372.1:1..2341 7 | NC_007372.1 RefSeq CDS 25 2298 . + 0 Name=PB1;gbkey=CDS;gene=PB1;protein_id=YP_308847.1;locus_tag=FLUAVH3N2_s2p1;product=polymerase PB1;ID=cds-YP_308847.1;Dbxref=GenBank:YP_308847.1,GeneID:3655159 8 | NC_007372.1 RefSeq CDS 119 391 . + 0 Name=PB1-F2;gbkey=CDS;gene=PB1-F2;protein_id=YP_308848.1;locus_tag=FLUAVH3N2_s2p2;product=PB1-F2 protein;ID=cds-YP_308848.1;Dbxref=GenBank:YP_308848.1,GeneID:3655160 9 | -------------------------------------------------------------------------------- /config/distance_maps/h3n2/ha/bush_epitope_A.json: -------------------------------------------------------------------------------- 1 | { 2 | "default": 0, 3 | "output_type": "integer", 4 | "map": { 5 | "HA1": { 6 | "122": 1, 7 | "124": 1, 8 | "126": 1, 9 | "130": 1, 10 | "131": 1, 11 | "132": 1, 12 | "133": 1, 13 | "135": 1, 14 | "137": 1, 15 | "138": 1, 16 | "140": 1, 17 | "142": 1, 18 | "143": 1, 19 | "144": 1, 20 | "145": 1, 21 | "146": 1, 22 | "150": 1, 23 | "152": 1, 24 | "168": 1 25 | } 26 | }, 27 | "name": "bush_epitope_A" 28 | } 29 | -------------------------------------------------------------------------------- /config/references_to_include_in_titer_plots_vic.txt: -------------------------------------------------------------------------------- 1 | B/Austria/1359417/2021 2 | B/Sichuan-Jingyang/12048/2019 3 | B/Darwin/11/2021 4 | B/Netherlands/11263/2022 5 | B/Singapore/WUH4618/2021 6 | B/Victoria/2113/2019 7 | B/Victoria/16/2023 8 | B/Washington/2/2019 9 | B/Henan-Xigong/1118/2021 10 | B/Massachusetts/1/2021 11 | B/Kenya/186/2021 12 | B/NorthCarolina/1/2021 13 | B/Colorado/5/2022 14 | B/Netherlands/10900/2022 15 | B/Austria/1359417/2021-egg 16 | B/Sichuan-Jingyang/12048/2019-egg 17 | B/Singapore/WUH4618/2021-egg 18 | B/Washington/2/2019-egg 19 | B/Zhejiang-Xiacheng/11085/2021-egg 20 | B/Zhejiang-Nanhu/1854/2021-egg 21 | B/Netherlands/10894/2022-egg 22 | B/Henan-Xigong/1118/2021-egg 23 | B/Michigan/1/2021-egg 24 | -------------------------------------------------------------------------------- /config/colors_for_titer_plots_h1n1pdm.tsv: -------------------------------------------------------------------------------- 1 | clade_test 1 #5c22be 2 | clade_test 2 #4a3fdd 3 | clade_test 3 #66bbbe 4 | clade_test 5 #4c89e8 5 | clade_test 6B.1A.5a #57a5d7 6 | clade_test 5a.1 #3e59cf 7 | clade_test 5a.2 #705c94 8 | clade_test 5a.2a #e39b39 9 | clade_test 5a.2a.1 #dfd24e 10 | clade_test 5B #7fb975 11 | clade_test 6 #ff8e3a 12 | clade_test 6b #ff8e3a 13 | clade_test 6b1 #f5c546 14 | clade_test 6B.1A #dfd24e 15 | clade_test 7 #f93529 16 | subclade_test B #3E5DD0 17 | subclade_test C.1 #4A8CC2 18 | subclade_test C.1.1 #60AA9E 19 | subclade_test C.1.1.1 #80B974 20 | subclade_test C.1.2 #A6BE55 21 | subclade_test C.1.3 #CBB742 22 | subclade_test C.1.4 #E29D39 23 | subclade_test C.1.5 #E56A2F 24 | subclade_test C.1.6 #DB2823 25 | -------------------------------------------------------------------------------- /config/distance_maps/h3n2/ha/bush_epitope_E.json: -------------------------------------------------------------------------------- 1 | { 2 | "default": 0, 3 | "output_type": "integer", 4 | "map": { 5 | "HA1": { 6 | "57": 1, 7 | "59": 1, 8 | "62": 1, 9 | "63": 1, 10 | "67": 1, 11 | "75": 1, 12 | "78": 1, 13 | "80": 1, 14 | "81": 1, 15 | "82": 1, 16 | "83": 1, 17 | "86": 1, 18 | "87": 1, 19 | "88": 1, 20 | "91": 1, 21 | "92": 1, 22 | "94": 1, 23 | "109": 1, 24 | "260": 1, 25 | "261": 1, 26 | "262": 1, 27 | "265": 1 28 | } 29 | }, 30 | "name": "bush_epitope_E" 31 | } 32 | -------------------------------------------------------------------------------- /profiles/nextflu-private/vic/ha/clades.tsv: -------------------------------------------------------------------------------- 1 | clade gene site alt 2 | V1A nuc 228 G 3 | V1A nuc 666 C 4 | V1A nuc 1362 T 5 | V1A nuc 1843 T 6 | V1B nuc 1056 G 7 | V1B nuc 1194 G 8 | V1A.1 HA1 129 G 9 | V1A.1 HA1 162 - 10 | V1A.1 HA1 163 - 11 | V1A.1 HA1 180 V 12 | V1A.1 HA2 152 K 13 | V1A.2 HA1 163 - 14 | V1A.2 HA1 164 - 15 | V1A.2 HA1 180 T 16 | V1A.2 HA1 209 N 17 | V1A.3 HA1 136 E 18 | V1A.3 HA1 163 - 19 | V1A.3 HA1 164 - 20 | V1A.3 nuc 484 G 21 | V1A.3/133R nuc 153 C 22 | V1A.3/133R HA1 133 R 23 | V1A.3/155A nuc 153 C 24 | V1A.3/155A HA1 155 A 25 | V1A.3a HA1 150 K 26 | V1A.3a HA1 184 E 27 | V1A.3a HA1 197 D 28 | V1A.3a HA1 279 K 29 | V1A.3a.1 HA1 150 K 30 | V1A.3a.1 HA1 220 M 31 | V1A.3a.1 HA1 241 Q 32 | V1A.3a.2 HA1 127 T 33 | V1A.3a.2 HA1 144 L 34 | V1A.3a.2 HA1 203 R 35 | -------------------------------------------------------------------------------- /config/distance_maps/h3n2/ha/bush_epitope_B.json: -------------------------------------------------------------------------------- 1 | { 2 | "default": 0, 3 | "output_type": "integer", 4 | "map": { 5 | "HA1": { 6 | "128": 1, 7 | "129": 1, 8 | "155": 1, 9 | "156": 1, 10 | "157": 1, 11 | "158": 1, 12 | "159": 1, 13 | "160": 1, 14 | "163": 1, 15 | "164": 1, 16 | "165": 1, 17 | "186": 1, 18 | "187": 1, 19 | "188": 1, 20 | "189": 1, 21 | "190": 1, 22 | "192": 1, 23 | "193": 1, 24 | "194": 1, 25 | "196": 1, 26 | "197": 1, 27 | "198": 1 28 | } 29 | }, 30 | "name": "bush_epitope_B" 31 | } 32 | -------------------------------------------------------------------------------- /config/references_for_titer_plots/h3n2/egg_hi.txt: -------------------------------------------------------------------------------- 1 | A/Croatia/10136RV/2023-egg # J.2:145N, vaccine strain 2 | A/DistrictOfColumbia/27/2023-egg # J.2:145N, vaccine strain 3 | A/Idaho/69/2023-egg # J.2:145N 4 | A/Oregon/266/2024-egg # J.2:135K 5 | A/Catalonia/NSVH102423723/2024-egg # J.2:135K 6 | A/Oklahoma/5/2024-egg # J.2:158K 7 | A/Norway/423/2024-egg # J.2:158K 8 | A/Iowa/12/2024-egg # J.2:104N,145N 9 | A/Netherlands/2093/2024-egg # J.2.3 10 | A/Netherlands/2093/2025-egg # J.2.3:195Y 11 | A/Michigan/73/2025-egg # J.2.3:195Y 12 | A/Nepal/N042/2025-egg # J.2.4 13 | A/Valladolid/1187/2025-egg # J.2.4 14 | A/Singapore/GP20238/2024-egg # J.2.4 15 | A/NewJersey/16/2025-egg # J.2.5 16 | #A/Idaho/47/2023-egg # J.1.1:214T 17 | A/Minnesota/117/2024-egg # Missing 18 | A/Wisconsin/160/2024-egg # Missing 19 | -------------------------------------------------------------------------------- /ingest/defaults/final_annotations.tsv: -------------------------------------------------------------------------------- 1 | # Manually curated annotations TSV file 2 | # 3 | # This runs as the final step in the curate chain. If possible, please 4 | # put hardcoded annotations in an annotations file specific to a relevant 5 | # curation layer. For instance, lineage annotatoins should not go in this 6 | # file but rather `defaults/lineages.tsv` 7 | # 8 | # 9 | # This TSV should not have a header and should have exactly three columns: 10 | # COLUMN 1: (CURATED) STRAIN NAME 11 | # COLUMN 2: FIELD NAME (e.g. date) 12 | # COLUMN 3: NEW VALUE (e.g. 2025-01-01) 13 | # 14 | # If there are multiple annotations for the same id and field, then the last value is used 15 | # Lines starting with '#' are treated as comments 16 | # Any '#' after the field value are treated as comments. 17 | 18 | -------------------------------------------------------------------------------- /config/colors_for_titer_plots_h3n2.tsv: -------------------------------------------------------------------------------- 1 | clade_test 1 #571EA2 2 | clade_test 1a #4334BF 3 | clade_test 1a.1 #3F55CE 4 | clade_test 2 #4376CD 5 | clade_test 2a #4C91C0 6 | clade_test 2a.1 #59A4A9 7 | clade_test 2a.1a #6AB18F 8 | clade_test 2a.1b #7FB975 9 | clade_test 2a.2 #97BD5F 10 | clade_test 2a.3 #AFBD4F 11 | clade_test 2a.3a #C7B944 12 | clade_test 2a.3a.1 #D9AD3D 13 | clade_test 2a.3b #E49838 14 | clade_test 2b #E67932 15 | clade_test 2c #E1512A 16 | clade_test 2d #DB2823 17 | subclade_test F.1.1 #3F52CD 18 | subclade_test G.1.1 #4681CA 19 | subclade_test G.1.1.2 #57A1AD 20 | subclade_test G.1.3 #70B487 21 | subclade_test G.1.3.1 #90BC65 22 | subclade_test G.1.3.1.1 #B4BD4C 23 | subclade_test G.1.3.2 #D3B240 24 | subclade_test G.2 #E59638 25 | subclade_test G.2.1 #E4642E 26 | subclade_test G.2.2 #DB2823 27 | -------------------------------------------------------------------------------- /config/references_for_titer_plots/h1n1pdm/cell_hi.txt: -------------------------------------------------------------------------------- 1 | A/Wisconsin/67/2022 # C.1.1, vaccine strain 2 | A/Victoria/4897/2022 # D 3 | #A/Massachusetts/76/2024 # D:152I,308K 4 | A/Missouri/11/2025 # D.3 5 | A/Perth/456/2025 # D.3.1, VIDRL 6 | A/Tokyo/EIS10-554/2024 # D.3.1, NIID 7 | #A/Kanagawa/IC2435/2025 # D.3.1:155R 8 | #A/Tokyo/EIS11-980/2025 # D.3.1:113K 9 | A/Colorado/218/2024 # D.3.1:113K, CDC 10 | A/Norway/8388/2024 # D.3.1:113K, Crick 11 | A/Bahrain/25220003050/2025 # D.3.1:113K 12 | A/Darwin/1015/2025 # D.3.1:113K,139D,283K 13 | A/Delaware/83/2024 # D.5 14 | #A/Tajikistan/2-1057/2024 # D.5, Crick 15 | A/Victoria/376/2024 # C.1.9 16 | A/Tennessee/77/2024 # C.1.9:P137S 17 | A/Canberra/651/2024 # C.1.9.3 18 | A/Iowa/110/2024 # C.1.9.3:166V 19 | A/Norway/7606/2024 # C.1.9.3:166V 20 | A/Victoria/42/2025 # C.1.9.3:137S,155E,166V 21 | -------------------------------------------------------------------------------- /config/yam/reference_strains.txt: -------------------------------------------------------------------------------- 1 | B/Arizona/10/2015 2 | B/Arizona/10/2015-egg 3 | B/Beijing/184/1993 4 | B/Brisbane/9/2014 5 | B/California/12/2015 6 | B/Canterbury/5/2017 7 | B/Darwin/58/2019 8 | B/Florida/4/2006 9 | B/Guangdong-Liwan/1133/2014 10 | B/Guyane/5/2018 11 | B/Guyane/5/2018-egg 12 | B/Hyogo/3210/2015 13 | B/Kanagawa/IC1649/2017 14 | B/Kuwait/7274/2017 15 | B/Kuwait/7274/2017-egg 16 | B/Massachusetts/2/2012 17 | B/Massachusetts/2/2012-egg 18 | B/Mauritius/1791/2017 19 | B/NewHampshire/1/2016 20 | B/NewHampshire/1/2016-egg 21 | B/Perth/4/2017 22 | B/Phuket/3073/2013 23 | B/Phuket/3073/2013-egg 24 | B/Sapporo/2/2015 25 | B/Shanghai/361/2002 26 | B/Sichuan/379/1999 27 | B/Sydney/10/2016 28 | B/Texas/81/2016 29 | B/Texas/81/2016-egg 30 | B/Utah/9/2014 31 | B/Wellington/40/2017 32 | B/Wisconsin/1/2010 33 | B/Yokohama/9/2019 34 | -------------------------------------------------------------------------------- /nextclade/dataset_config/h3n2/ha/CY163680/pathogen.json: -------------------------------------------------------------------------------- 1 | { 2 | "nucMutLabelMap": {}, 3 | "nucMutLabelMapReverse": {}, 4 | "shortcuts": [ 5 | "flu_h3n2_ha_broad", 6 | "nextstrain/flu/h3n2/ha/wisconsin-67-2005" 7 | ], 8 | "aaMotifs": [ 9 | { 10 | "name": "glycosylation", 11 | "nameShort": "Glyc.", 12 | "nameFriendly": "Glycosylation", 13 | "description": "N-linked glycosylation motifs (N-X-S/T with X any amino acid other than P)", 14 | "includeCdses": [ 15 | { 16 | "cds":"HA1", 17 | "ranges":[] 18 | }, 19 | { 20 | "cds":"HA2", 21 | "ranges":[{"begin":0, "end":186}] 22 | } 23 | ], 24 | "motifs": [ 25 | "N[^P][ST]" 26 | ] 27 | } 28 | ] 29 | } -------------------------------------------------------------------------------- /nextclade/config/vic/ha/KX058884/founder_sequences_SigPep.fasta: -------------------------------------------------------------------------------- 1 | >A 2 | MKAIIVLLMVVTSNA 3 | >A.1 4 | MKAIIVLLMVVTSNA 5 | >A.2 6 | MKAIIVLLMVVTSNA 7 | >A.3 8 | MKAIIVLLMVVTSNA 9 | >A.3.1 10 | MKAIIVLLMVVTSNA 11 | >A.3.1.1 12 | MKAIIVLLMVVTSNA 13 | >A.3.2 14 | MKAIIVLLMVVTSNA 15 | >A.3.3 16 | MKAIIVLLMVVTSNA 17 | >B 18 | MKAIIVLLMVVTSNA 19 | >C 20 | MKAIIVLLMVVTSNA 21 | >C.1 22 | MKAIIVLLMVVTSNA 23 | >C.2 24 | MKAIIVLLMVVTSNA 25 | >C.3 26 | MKAIIVLLMVVTSNA 27 | >C.3.1 28 | MKAIIVLLMVVTSNA 29 | >C.3.2 30 | MKAIIVLLMVVTSNA 31 | >C.4 32 | MKAIIVLLMVVTSNA 33 | >C.5 34 | MKAIIVLLMVVTSNA 35 | >C.5.1 36 | MKAIIVLLMVVTSNA 37 | >C.5.2 38 | MKAIIVLLMVVTSNA 39 | >C.5.3 40 | MKAIIVLLMVVTSNA 41 | >C.5.4 42 | MKAIIVLLMVVTSNA 43 | >C.5.5 44 | MKAIIVLLMVVTSNA 45 | >C.5.6 46 | MKAIIVLLMVVTSNA 47 | >C.5.6.1 48 | MKAIIVLLMVVTSNA 49 | >C.5.7 50 | MKAIIVLLMVVTSNA 51 | -------------------------------------------------------------------------------- /nextclade/dataset_config/h1n1pdm/ha/CY121680/pathogen.json: -------------------------------------------------------------------------------- 1 | { 2 | "nucMutLabelMap": {}, 3 | "nucMutLabelMapReverse": {}, 4 | "shortcuts": [ 5 | "flu_h1n1pdm_ha_broad", 6 | "nextstrain/flu/h1n1pdm/ha/california-7-2009" 7 | ], 8 | "aaMotifs": [ 9 | { 10 | "name": "glycosylation", 11 | "nameShort": "Glyc.", 12 | "nameFriendly": "Glycosylation", 13 | "description": "N-linked glycosylation motifs (N-X-S/T with X any amino acid other than P)", 14 | "includeCdses": [ 15 | { 16 | "cds":"HA1", 17 | "ranges":[] 18 | }, 19 | { 20 | "cds":"HA2", 21 | "ranges":[{"begin":0, "end":186}] 22 | } 23 | ], 24 | "motifs": [ 25 | "N[^P][ST]" 26 | ] 27 | } 28 | ] 29 | } -------------------------------------------------------------------------------- /config/h3n2/ha/emerging_haplotypes.tsv: -------------------------------------------------------------------------------- 1 | haplotype gene site alt 2 | J.2 clade J.2 3 | J.2.2 clade J.2.2 4 | J.2.3 clade J.2.3 5 | J.2.4 clade J.2.4 6 | 7 | J.2.4:160K clade J.2.4 8 | J.2.4:160K HA1 160 K 9 | 10 | J.2.4:79V-144N-158D-160K-328A clade J.2.4 11 | J.2.4:79V-144N-158D-160K-328A HA1 79 V 12 | J.2.4:79V-144N-158D-160K-328A HA1 144 N 13 | J.2.4:79V-144N-158D-160K-328A HA1 158 D 14 | J.2.4:79V-144N-158D-160K-328A HA1 160 K 15 | J.2.4:79V-144N-158D-160K-328A HA1 328 A 16 | 17 | J.2.4:62R-158D-160K-328A clade J.2.4 18 | J.2.4:62R-158D-160K-328A HA1 62 R 19 | J.2.4:62R-158D-160K-328A HA1 158 D 20 | J.2.4:62R-158D-160K-328A HA1 160 K 21 | J.2.4:62R-158D-160K-328A HA1 328 A 22 | 23 | K clade K 24 | 25 | K:80K clade K 26 | K:80K HA1 80 K 27 | 28 | K:88I clade K 29 | K:88I HA1 88 I 30 | 31 | K:145N clade K 32 | K:145N HA1 145 N 33 | 34 | K:272T clade K 35 | K:272T HA1 272 T 36 | -------------------------------------------------------------------------------- /nextclade/dataset_config/vic/ha/KX058884/pathogen.json: -------------------------------------------------------------------------------- 1 | { 2 | "nucMutLabelMap": {}, 3 | "nucMutLabelMapReverse": {}, 4 | "shortcuts": [ 5 | "flu_vic_ha", 6 | "nextstrain/flu/vic", 7 | "nextstrain/flu/vic/ha", 8 | "nextstrain/flu/vic/ha/brisbane-60-2008" 9 | ], 10 | "aaMotifs": [ 11 | { 12 | "name": "glycosylation", 13 | "nameShort": "Glyc.", 14 | "nameFriendly": "Glycosylation", 15 | "description": "N-linked glycosylation motifs (N-X-S/T with X any amino acid other than P)", 16 | "includeCdses": [ 17 | { 18 | "cds":"HA1", 19 | "ranges":[] 20 | }, 21 | { 22 | "cds":"HA2", 23 | "ranges":[{"begin":0, "end":186}] 24 | } 25 | ], 26 | "motifs": [ 27 | "N[^P][ST]" 28 | ] 29 | } 30 | ] 31 | } -------------------------------------------------------------------------------- /nextclade/dataset_config/yam/ha/JN993010/pathogen.json: -------------------------------------------------------------------------------- 1 | { 2 | "nucMutLabelMap": {}, 3 | "nucMutLabelMapReverse": {}, 4 | "shortcuts": [ 5 | "flu_yam_ha", 6 | "nextstrain/flu/yam", 7 | "nextstrain/flu/yam/ha", 8 | "nextstrain/flu/yam/ha/wisconsin-1-2010" 9 | ], 10 | "aaMotifs": [ 11 | { 12 | "name": "glycosylation", 13 | "nameShort": "Glyc.", 14 | "nameFriendly": "Glycosylation", 15 | "description": "N-linked glycosylation motifs (N-X-S/T with X any amino acid other than P)", 16 | "includeCdses": [ 17 | { 18 | "cds":"HA1", 19 | "ranges":[] 20 | }, 21 | { 22 | "cds":"HA2", 23 | "ranges":[{"begin":0, "end":186}] 24 | } 25 | ], 26 | "motifs": [ 27 | "N[^P][ST]" 28 | ] 29 | } 30 | ] 31 | } -------------------------------------------------------------------------------- /.github/workflows/run-public-builds.yaml: -------------------------------------------------------------------------------- 1 | name: Run the Nextstrain public builds 2 | 3 | on: 4 | workflow_dispatch: 5 | inputs: 6 | dockerImage: 7 | description: "Specific container image to use for build (will override the default of `nextstrain build`)" 8 | required: false 9 | type: string 10 | 11 | jobs: 12 | run-build: 13 | permissions: 14 | id-token: write 15 | uses: nextstrain/.github/.github/workflows/pathogen-repo-build.yaml@master 16 | secrets: inherit 17 | with: 18 | runtime: aws-batch 19 | env: | 20 | NEXTSTRAIN_DOCKER_IMAGE: ${{ inputs.dockerImage }} 21 | run: | 22 | nextstrain build \ 23 | --detach \ 24 | --cpus 36 \ 25 | --memory 72gib \ 26 | . \ 27 | deploy_all \ 28 | -p \ 29 | --configfile profiles/nextstrain-public.yaml 30 | -------------------------------------------------------------------------------- /nextclade/dataset_config/h3n2/mp/annotation.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | #!gff-spec-version 1.21 3 | #!processor NCBI annotwriter 4 | ##sequence-region NC_007367.1 1 1027 5 | ##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=335341 6 | NC_007367.1 RefSeq region 1 1027 . + . ID=NC_007367.1:1..1027 7 | NC_007367.1 RefSeq CDS 26 51 . + 0 Name=M2;gene=M2;gbkey=CDS;protein_id=YP_308840.1;locus_tag=FLUAVH3N2_s7p1;ID=cds-YP_308840.1;product=matrix protein 2;Dbxref=GenBank:YP_308840.1,GeneID:3655153 8 | NC_007367.1 RefSeq CDS 740 1007 . + 1 Name=M2;gene=M2;gbkey=CDS;protein_id=YP_308840.1;locus_tag=FLUAVH3N2_s7p1;ID=cds-YP_308840.1;product=matrix protein 2;Dbxref=GenBank:YP_308840.1,GeneID:3655153 9 | NC_007367.1 RefSeq CDS 26 784 . + 0 Name=M1;gene=M1;gbkey=CDS;protein_id=YP_308841.1;locus_tag=FLUAVH3N2_s7p2;ID=cds-YP_308841.1;product=matrix protein 1;Dbxref=GenBank:YP_308841.1,GeneID:3655152 10 | -------------------------------------------------------------------------------- /nextclade/dataset_config/h1n1pdm/mp/README.md: -------------------------------------------------------------------------------- 1 | # Influenza A(H1N1pdm) MP based on reference "A/California/07/2009" 2 | 3 | | Key | Value | 4 | | -------------------- | -------------------- | 5 | | authors | [Richard Neher](https://neherlab.org), [Nextstrain](https://nextstrain.org) | 6 | | name | Influenza A(H1N1pdm) MA | 7 | | reference | A/California/07/2009 | 8 | | dataset path | flu/h1n1pdm/mp | 9 | | reference accession | NC_026431 | 10 | 11 | ## Features 12 | This dataset only provides a reference for alignment and an annotation for translation. 13 | 14 | ## What is Nextclade dataset 15 | 16 | Read more about Nextclade datasets in Nextclade documentation: https://docs.nextstrain.org/projects/nextclade/en/stable/user/datasets.html 17 | -------------------------------------------------------------------------------- /nextclade/dataset_config/h1n1pdm/np/README.md: -------------------------------------------------------------------------------- 1 | # Influenza A(H1N1pdm) NP based on reference "A/California/07/2009" 2 | 3 | | Key | Value | 4 | | -------------------- | -------------------- | 5 | | authors | [Richard Neher](https://neherlab.org), [Nextstrain](https://nextstrain.org) | 6 | | name | Influenza A(H1N1pdm) np | 7 | | reference | A/California/07/2009 | 8 | | dataset path | flu/h1n1pdm/np | 9 | | reference accession | NC_026436 | 10 | 11 | ## Features 12 | This dataset only provides a reference for alignment and an annotation for translation. 13 | 14 | ## What is Nextclade dataset 15 | 16 | Read more about Nextclade datasets in Nextclade documentation: https://docs.nextstrain.org/projects/nextclade/en/stable/user/datasets.html 17 | -------------------------------------------------------------------------------- /nextclade/dataset_config/h1n1pdm/ns/README.md: -------------------------------------------------------------------------------- 1 | # Influenza A(H1N1pdm) NS based on reference "A/California/07/2009" 2 | 3 | | Key | Value | 4 | | -------------------- | -------------------- | 5 | | authors | [Richard Neher](https://neherlab.org), [Nextstrain](https://nextstrain.org) | 6 | | name | Influenza A(H1N1pdm) NS | 7 | | reference | A/California/07/2009 | 8 | | dataset path | flu/h1n1pdm/ns | 9 | | reference accession | NC_026432 | 10 | 11 | ## Features 12 | This dataset only provides a reference for alignment and an annotation for translation. 13 | 14 | ## What is Nextclade dataset 15 | 16 | Read more about Nextclade datasets in Nextclade documentation: https://docs.nextstrain.org/projects/nextclade/en/stable/user/datasets.html 17 | -------------------------------------------------------------------------------- /nextclade/dataset_config/h1n1pdm/pb1/README.md: -------------------------------------------------------------------------------- 1 | # Influenza A(H1N1pdm) PB1 based on reference "A/California/07/2009" 2 | 3 | | Key | Value | 4 | | -------------------- | -------------------- | 5 | | authors | [Richard Neher](https://neherlab.org), [Nextstrain](https://nextstrain.org) | 6 | | name | Influenza A(H1N1pdm) PB1 | 7 | | reference | A/California/07/2009 | 8 | | dataset path | flu/h1n1pdm/pb1 | 9 | | reference accession | NC_026435 | 10 | 11 | ## Features 12 | This dataset only provides a reference for alignment and an annotation for translation. 13 | 14 | ## What is Nextclade dataset 15 | 16 | Read more about Nextclade datasets in Nextclade documentation: https://docs.nextstrain.org/projects/nextclade/en/stable/user/datasets.html 17 | -------------------------------------------------------------------------------- /nextclade/dataset_config/h1n1pdm/pb2/README.md: -------------------------------------------------------------------------------- 1 | # Influenza A(H1N1pdm) PB2 based on reference "A/California/07/2009" 2 | 3 | | Key | Value | 4 | | -------------------- | -------------------- | 5 | | authors | [Richard Neher](https://neherlab.org), [Nextstrain](https://nextstrain.org) | 6 | | name | Influenza A(H1N1pdm) PB2 | 7 | | reference | A/California/07/2009 | 8 | | dataset path | flu/h1n1pdm/pb2 | 9 | | reference accession | NC_026438 | 10 | 11 | ## Features 12 | This dataset only provides a reference for alignment and an annotation for translation. 13 | 14 | ## What is Nextclade dataset 15 | 16 | Read more about Nextclade datasets in Nextclade documentation: https://docs.nextstrain.org/projects/nextclade/en/stable/user/datasets.html 17 | -------------------------------------------------------------------------------- /config/distance_maps/h3n2/ha/bush_epitope_C.json: -------------------------------------------------------------------------------- 1 | { 2 | "default": 0, 3 | "output_type": "integer", 4 | "map": { 5 | "HA1": { 6 | "44": 1, 7 | "45": 1, 8 | "46": 1, 9 | "47": 1, 10 | "48": 1, 11 | "50": 1, 12 | "51": 1, 13 | "53": 1, 14 | "54": 1, 15 | "273": 1, 16 | "275": 1, 17 | "276": 1, 18 | "278": 1, 19 | "279": 1, 20 | "280": 1, 21 | "294": 1, 22 | "297": 1, 23 | "299": 1, 24 | "300": 1, 25 | "304": 1, 26 | "305": 1, 27 | "307": 1, 28 | "308": 1, 29 | "309": 1, 30 | "310": 1, 31 | "311": 1, 32 | "312": 1 33 | } 34 | }, 35 | "name": "bush_epitope_C" 36 | } 37 | -------------------------------------------------------------------------------- /nextclade/dataset_config/h1n1pdm/mp/annotation.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | #!gff-spec-version 1.21 3 | #!processor NCBI annotwriter 4 | ##sequence-region NC_026431.1 1 982 5 | ##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=641809 6 | NC_026431.1 RefSeq region 1 982 . + . ID=NC_026431.1:1..982; 7 | NC_026431.1 RefSeq CDS 1 26 . + 0 Name=M2;gene=M2;gbkey=CDS;locus_tag=UJ99_s7gp1;protein_id=YP_009118622.1;product=matrix protein 2;ID=cds-YP_009118622.1;Dbxref=GenBank:YP_009118622.1,GeneID:23308108 8 | NC_026431.1 RefSeq CDS 715 982 . + 1 Name=M2;gene=M2;gbkey=CDS;locus_tag=UJ99_s7gp1;protein_id=YP_009118622.1;product=matrix protein 2;ID=cds-YP_009118622.1;Dbxref=GenBank:YP_009118622.1,GeneID:23308108 9 | NC_026431.1 RefSeq CDS 1 759 . + 0 Name=M1;gene=M1;gbkey=CDS;locus_tag=UJ99_s7gp2;protein_id=YP_009118623.1;product=matrix protein 1;ID=cds-YP_009118623.1;Dbxref=GenBank:YP_009118623.1,GeneID:23308107 10 | -------------------------------------------------------------------------------- /nextclade/dataset_config/h3n2/mp/README.md: -------------------------------------------------------------------------------- 1 | # Influenza A(H3N2) MP (segment 7) based on reference "A/New York/392/2004" 2 | 3 | | Key | Value | 4 | | -------------------- | -------------------- | 5 | | authors | [Richard Neher](https://neherlab.org), [Nextstrain](https://nextstrain.org) | 6 | | name | Influenza A(H3N2) MP (segment 7) | 7 | | reference | A/New York/392/2004 | 8 | | dataset path | flu/h3n2/mp | 9 | | reference accession | NC_007367 | 10 | 11 | ## Features 12 | This dataset only provides a reference for alignment and an annotation for translation. 13 | 14 | ## What is Nextclade dataset 15 | 16 | Read more about Nextclade datasets in Nextclade documentation: https://docs.nextstrain.org/projects/nextclade/en/stable/user/datasets.html 17 | -------------------------------------------------------------------------------- /nextclade/dataset_config/h3n2/np/README.md: -------------------------------------------------------------------------------- 1 | # Influenza A(H3N2) NP (segment 5) based on reference "A/New York/392/2004" 2 | 3 | | Key | Value | 4 | | -------------------- | -------------------- | 5 | | authors | [Richard Neher](https://neherlab.org), [Nextstrain](https://nextstrain.org) | 6 | | name | Influenza A(H3N2) NP (segment 5) | 7 | | reference | A/New York/392/2004 | 8 | | dataset path | flu/h3n2/np | 9 | | reference accession | NC_007369 | 10 | 11 | ## Features 12 | This dataset only provides a reference for alignment and an annotation for translation. 13 | 14 | ## What is Nextclade dataset 15 | 16 | Read more about Nextclade datasets in Nextclade documentation: https://docs.nextstrain.org/projects/nextclade/en/stable/user/datasets.html 17 | -------------------------------------------------------------------------------- /nextclade/dataset_config/h3n2/ns/README.md: -------------------------------------------------------------------------------- 1 | # Influenza A(H3N2) NS (segment 8) based on reference "A/New York/392/2004" 2 | 3 | | Key | Value | 4 | | -------------------- | -------------------- | 5 | | authors | [Richard Neher](https://neherlab.org), [Nextstrain](https://nextstrain.org) | 6 | | name | Influenza A(H3N2) NS (segment 8) | 7 | | reference | A/New York/392/2004 | 8 | | dataset path | flu/h3n2/ns | 9 | | reference accession | NC_007370 | 10 | 11 | ## Features 12 | This dataset only provides a reference for alignment and an annotation for translation. 13 | 14 | ## What is Nextclade dataset 15 | 16 | Read more about Nextclade datasets in Nextclade documentation: https://docs.nextstrain.org/projects/nextclade/en/stable/user/datasets.html 17 | -------------------------------------------------------------------------------- /nextclade/dataset_config/h3n2/pb1/README.md: -------------------------------------------------------------------------------- 1 | # Influenza A(H3N2) PB1 (segment 2) based on reference "A/New York/392/2004" 2 | 3 | | Key | Value | 4 | | -------------------- | -------------------- | 5 | | authors | [Richard Neher](https://neherlab.org), [Nextstrain](https://nextstrain.org) | 6 | | name | Influenza A(H3N2) PB1 (segment 2) | 7 | | reference | A/New York/392/2004 | 8 | | dataset path | flu/h3n2/pb1 | 9 | | reference accession | NC_007372 | 10 | 11 | ## Features 12 | This dataset only provides a reference for alignment and an annotation for translation. 13 | 14 | ## What is Nextclade dataset 15 | 16 | Read more about Nextclade datasets in Nextclade documentation: https://docs.nextstrain.org/projects/nextclade/en/stable/user/datasets.html 17 | -------------------------------------------------------------------------------- /nextclade/dataset_config/h3n2/pb2/README.md: -------------------------------------------------------------------------------- 1 | # Influenza A(H3N2) PB2 (segment 1) based on reference "A/New York/392/2004" 2 | 3 | | Key | Value | 4 | | -------------------- | -------------------- | 5 | | authors | [Richard Neher](https://neherlab.org), [Nextstrain](https://nextstrain.org) | 6 | | name | Influenza A(H3N2) PB2 (segment 1) | 7 | | reference | A/New York/392/2004 | 8 | | dataset path | flu/h3n2/mp | 9 | | reference accession | NC_007373 | 10 | 11 | ## Features 12 | This dataset only provides a reference for alignment and an annotation for translation. 13 | 14 | ## What is Nextclade dataset 15 | 16 | Read more about Nextclade datasets in Nextclade documentation: https://docs.nextstrain.org/projects/nextclade/en/stable/user/datasets.html 17 | -------------------------------------------------------------------------------- /nextclade/dataset_config/h1n1pdm/ha/MW626062/pathogen.json: -------------------------------------------------------------------------------- 1 | { 2 | "nucMutLabelMap": {}, 3 | "nucMutLabelMapReverse": {}, 4 | "shortcuts": [ 5 | "flu_h1n1pdm_ha", 6 | "nextstrain/flu/h1n1pdm", 7 | "nextstrain/flu/h1n1pdm/ha", 8 | "nextstrain/flu/h1n1pdm/ha/wisconsin-588-2019" 9 | ], 10 | "aaMotifs": [ 11 | { 12 | "name": "glycosylation", 13 | "nameShort": "Glyc.", 14 | "nameFriendly": "Glycosylation", 15 | "description": "N-linked glycosylation motifs (N-X-S/T with X any amino acid other than P)", 16 | "includeCdses": [ 17 | { 18 | "cds":"HA1", 19 | "ranges":[] 20 | }, 21 | { 22 | "cds":"HA2", 23 | "ranges":[{"begin":0, "end":186}] 24 | } 25 | ], 26 | "motifs": [ 27 | "N[^P][ST]" 28 | ] 29 | } 30 | ] 31 | } -------------------------------------------------------------------------------- /nextclade/dataset_config/h3n2/ns/annotation.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | #!gff-spec-version 1.21 3 | #!processor NCBI annotwriter 4 | ##sequence-region NC_007370.1 1 890 5 | ##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=335341 6 | NC_007370.1 RefSeq region 1 890 . + . ID=NC_007370.1:1..890 7 | NC_007370.1 RefSeq CDS 27 56 . + 0 Name=NEP;gbkey=CDS;gene=NEP;protein_id=YP_308844.1;locus_tag=FLUAVH3N2_s8p1;ID=cds-YP_308844.1;product=nonstructural protein 2;Dbxref=GenBank:YP_308844.1,GeneID:3655157 8 | NC_007370.1 RefSeq CDS 529 864 . + 0 Name=NEP;gbkey=CDS;gene=NEP;protein_id=YP_308844.1;locus_tag=FLUAVH3N2_s8p1;ID=cds-YP_308844.1;product=nonstructural protein 2;Dbxref=GenBank:YP_308844.1,GeneID:3655157 9 | NC_007370.1 RefSeq CDS 27 719 . + 0 Name=NS1;gbkey=CDS;gene=NS1;protein_id=YP_308845.1;locus_tag=FLUAVH3N2_s8p2;ID=cds-YP_308845.1;product=nonstructural protein 1;Dbxref=GenBank:YP_308845.1,GeneID:3655156 10 | -------------------------------------------------------------------------------- /nextclade/config/h1n1pdm/ha/CY121680/founder_sequences_SigPep.fasta: -------------------------------------------------------------------------------- 1 | >A 2 | MKAILVVLLYTFTTANA 3 | >B 4 | MKAILVVLLYTFTTANA 5 | >C 6 | MKAILVVMLYTFTTANA 7 | >C.1 8 | MKAILVVMLYTFTTANA 9 | >C.1.1 10 | MKAILVVMLYTFTTANA 11 | >C.1.2 12 | MKAILVVMLYTFTTANA 13 | >C.1.3 14 | MKAILVVMLYTFTTANA 15 | >C.1.4 16 | MKAILVVMLYTFTTANA 17 | >C.1.5 18 | MKAILVVMLYTFTTANA 19 | >C.1.6 20 | MKAILVVMLYTFTTANA 21 | >C.1.7 22 | MKAILVVMLYTFTTANA 23 | >C.1.7.1 24 | MKAILVVMLYTFTTANA 25 | >C.1.7.2 26 | MKAILVVILYTVTTANA 27 | >C.1.8 28 | MKAILVVMLYTLTTANA 29 | >C.1.9 30 | MKAILVVMLYTFTTANA 31 | >C.1.9.1 32 | MKAILVVMLYTFTTANA 33 | >C.1.9.2 34 | MKAILVVMLYTFTTANA 35 | >C.1.9.3 36 | MKAILVVMLYTFTTANA 37 | >C.1.9.4 38 | MKAILVVMLYTFTTANA 39 | >D 40 | MKAILVVMLYTFTTANA 41 | >D.1 42 | MKAILVVMLYTFTTANA 43 | >D.2 44 | MKAILVVMLYTFTTANA 45 | >D.3 46 | MKAILVVMLYTFTTANA 47 | >D.3.1 48 | MKAILVVMLYTFTTANA 49 | >D.4 50 | MKAILVVMLYTFTTANA 51 | >D.5 52 | MKAILVVMLYTFTTANA 53 | -------------------------------------------------------------------------------- /nextclade/config/h1n1pdm/ha/MW626062/founder_sequences_SigPep.fasta: -------------------------------------------------------------------------------- 1 | >A 2 | MKAILVVLLYTFTTANA 3 | >B 4 | MKAILVVLLYTFTTANA 5 | >C 6 | MKAILVVMLYTFTTANA 7 | >C.1 8 | MKAILVVMLYTFTTANA 9 | >C.1.1 10 | MKAILVVMLYTFTTANA 11 | >C.1.2 12 | MKAILVVMLYTFTTANA 13 | >C.1.3 14 | MKAILVVMLYTFTTANA 15 | >C.1.4 16 | MKAILVVMLYTFTTANA 17 | >C.1.5 18 | MKAILVVMLYTFTTANA 19 | >C.1.6 20 | MKAILVVMLYTFTTANA 21 | >C.1.7 22 | MKAILVVMLYTFTTANA 23 | >C.1.7.1 24 | MKAILVVMLYTFTTANA 25 | >C.1.7.2 26 | MKAILVVILYTVTTANA 27 | >C.1.8 28 | MKAILVVMLYTLTTANA 29 | >C.1.9 30 | MKAILVVMLYTFTTANA 31 | >C.1.9.1 32 | MKAILVVMLYTFTTANA 33 | >C.1.9.2 34 | MKAILVVMLYTFTTANA 35 | >C.1.9.3 36 | MKAILVVMLYTFTTANA 37 | >C.1.9.4 38 | MKAILVVMLYTFTTANA 39 | >D 40 | MKAILVVMLYTFTTANA 41 | >D.1 42 | MKAILVVMLYTFTTANA 43 | >D.2 44 | MKAILVVMLYTFTTANA 45 | >D.3 46 | MKAILVVMLYTFTTANA 47 | >D.3.1 48 | MKAILVVMLYTFTTANA 49 | >D.4 50 | MKAILVVMLYTFTTANA 51 | >D.5 52 | MKAILVVMLYTFTTANA 53 | -------------------------------------------------------------------------------- /workflow/snakemake_rules/common.smk: -------------------------------------------------------------------------------- 1 | from shlex import ( 2 | quote as shquote, # shquote() is used in this file and also other workflow files 3 | split as shsplitwords, 4 | ) 5 | 6 | def shquotewords(s: str) -> str: 7 | """ 8 | Split string *s* into (POSIX) shell words, quote each word, and join them 9 | back into a string. 10 | 11 | This is suitable for properly quoting multi-word, user-defined values which 12 | should follow shell quoting and escaping semantics (e.g. to allow spaces in 13 | single words) but not allow shell features like variable interpolation, 14 | command substition, redirection, piping, etc. 15 | 16 | For example, quote a query string used as input to augur filter like this: 17 | f"--query {shquote(query)}". 18 | 19 | See usage in https://github.com/nextstrain/ncov for more examples. 20 | """ 21 | return " ".join(shquote(word) for word in shsplitwords(s)) 22 | -------------------------------------------------------------------------------- /nextclade/dataset_config/h1n1pdm/pa/annotation.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | #!gff-spec-version 1.21 3 | #!processor NCBI annotwriter 4 | ##sequence-region NC_026437.1 1 2151 5 | ##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=641809 6 | NC_026437.1 RefSeq region 1 2151 . + . ID=NC_026437.1:1..2151; 7 | NC_026437.1 RefSeq CDS 1 2151 . + 0 Name=PA;gene=PA;gbkey=CDS;locus_tag=UJ99_s3gp1;product=polymerase PA;protein_id=YP_009118630.1;ID=cds-YP_009118630.1;Dbxref=GenBank:YP_009118630.1,GeneID:23308128 8 | NC_026437.1 RefSeq CDS 1 570 . + 0 Name=PA-X;gbkey=CDS;gene=PA-X;locus_tag=UJ99_s3gp2;product=PA-X protein;protein_id=YP_009121769.1;ID=cds-YP_009121769.1;exception=ribosomal slippage;Dbxref=GenBank:YP_009121769.1,GeneID:23561398 9 | NC_026437.1 RefSeq CDS 572 700 . + 0 Name=PA-X;gbkey=CDS;gene=PA-X;locus_tag=UJ99_s3gp2;product=PA-X protein;protein_id=YP_009121769.1;ID=cds-YP_009121769.1;exception=ribosomal slippage;Dbxref=GenBank:YP_009121769.1,GeneID:23561398 10 | -------------------------------------------------------------------------------- /nextclade/dataset_config/h3n2/pa/annotation.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | #!gff-spec-version 1.21 3 | #!processor NCBI annotwriter 4 | ##sequence-region NC_007371.1 1 2233 5 | ##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=335341 6 | NC_007371.1 RefSeq region 1 2233 . + . ID=NC_007371.1:1..2233 7 | NC_007371.1 RefSeq CDS 25 2175 . + 0 Name=PA;gene=PA;gbkey=CDS;protein_id=YP_308846.1;product=polymerase PA;locus_tag=FLUAVH3N2_s3p1;ID=cds-YP_308846.1;Dbxref=GenBank:YP_308846.1,GeneID:3655158 8 | NC_007371.1 RefSeq CDS 25 597 . + 0 Name=PA-X;gbkey=CDS;gene=PA-X;product=PA-X protein;locus_tag=FLUAVH3N2_s3p2;protein_id=YP_006575868.1;ID=cds-YP_006575868.1;exception=ribosomal slippage;Dbxref=GenBank:YP_006575868.1,GeneID:13229459 9 | NC_007371.1 RefSeq CDS 599 784 . + 0 Name=PA-X;gbkey=CDS;gene=PA-X;product=PA-X protein;locus_tag=FLUAVH3N2_s3p2;protein_id=YP_006575868.1;ID=cds-YP_006575868.1;exception=ribosomal slippage;Dbxref=GenBank:YP_006575868.1,GeneID:13229459 10 | -------------------------------------------------------------------------------- /profiles/nextflu-private/deploy.smk: -------------------------------------------------------------------------------- 1 | """ 2 | This part of the workflow handles automatic deployments of nextflu-private builds. 3 | """ 4 | 5 | rule all_private: 6 | input: 7 | jsons=_get_build_outputs(), 8 | output: 9 | json_dir=directory("auspice_renamed"), 10 | params: 11 | build_date=config.get("build_date", datetime.date.today().strftime("%Y-%m-%d")), 12 | shell: 13 | """ 14 | mkdir -p {output.json_dir}; 15 | for file in {input.jsons} 16 | do 17 | ln ${{file}} {output.json_dir}/"flu_seasonal_{params.build_date}_`basename ${{file}}`" 18 | done 19 | """ 20 | 21 | rule deploy_all: 22 | input: 23 | json_dir="auspice_renamed", 24 | params: 25 | deploy_url = config["deploy_url"] 26 | shell: 27 | """ 28 | nextstrain login --no-prompt; 29 | nextstrain remote upload {params.deploy_url} {input.json_dir}/*.json 30 | """ 31 | -------------------------------------------------------------------------------- /config/vic/vaccine.json: -------------------------------------------------------------------------------- 1 | { 2 | "nodes": { 3 | "B/Malaysia/2506/2004": { 4 | "vaccine": { 5 | "selection_date": "2006-09-25" 6 | } 7 | }, 8 | "B/Brisbane/60/2008": { 9 | "vaccine": { 10 | "selection_date": "2009-09-25" 11 | } 12 | }, 13 | "B/Colorado/6/2017": { 14 | "vaccine": { 15 | "selection_date": "2018-02-22" 16 | } 17 | }, 18 | "B/Washington/2/2019": { 19 | "vaccine": { 20 | "selection_date": "2019-09-27" 21 | } 22 | }, 23 | "B/Austria/1359417/2021": { 24 | "vaccine": { 25 | "selection_date": "2021-09-24" 26 | } 27 | }, 28 | "B/Austria/1359417/2021-egg": { 29 | "vaccine": { 30 | "selection_date": "2021-09-24" 31 | } 32 | } 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /ingest/scripts/lowercase-fields: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Lowercases values in an NDJSON 4 | """ 5 | import argparse 6 | from sys import stdin, exit 7 | from typing import Iterable 8 | from augur.io.json import dump_ndjson, load_ndjson 9 | from augur.io.print import print_err 10 | 11 | def lowercase_fields(records: Iterable, fields=list[str]) -> Iterable: 12 | for record in records: 13 | for key in fields: 14 | if key not in record: 15 | print_err(f"[FATAL] {key} not in record. EPI ISL: {record.get('gisaid_epi_isl', '[unknown]')}") 16 | exit(2) 17 | record[key] = record[key].lower() 18 | yield record 19 | 20 | if __name__ == '__main__': 21 | parser = argparse.ArgumentParser(description=__doc__) 22 | parser.add_argument("--fields", nargs="+", help="record fields to turn into lowercase") 23 | args = parser.parse_args() 24 | dump_ndjson(lowercase_fields(load_ndjson(stdin), args.fields)) -------------------------------------------------------------------------------- /scripts/intersect_items.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import argparse 3 | 4 | 5 | if __name__ == '__main__': 6 | parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) 7 | parser.add_argument("--items", nargs="+", required=True, help="one or more files containing a list of items") 8 | parser.add_argument("--output", required=True, help="list of items shared by all input files (the intersection)") 9 | 10 | args = parser.parse_args() 11 | 12 | with open(args.items[0], "r", encoding="utf-8") as fh: 13 | shared_items = {line.strip() for line in fh} 14 | 15 | for item_file in args.items[1:]: 16 | with open(item_file, "r", encoding="utf-8") as fh: 17 | items = {line.strip() for line in fh} 18 | 19 | shared_items = shared_items & items 20 | 21 | with open(args.output, "w", encoding="utf-8") as oh: 22 | for item in sorted(shared_items): 23 | print(item, file=oh) 24 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Files created by the pipeline, which we want to keep out of git 2 | # (or at least out of _this_ git repo). 3 | data/ 4 | builds/ 5 | results/ 6 | tables/ 7 | auspice/ 8 | auspice-who/ 9 | auspice_renamed/ 10 | build/ 11 | logs/ 12 | figures/ 13 | targets/ 14 | benchmarks/ 15 | 16 | # Sensitive environment variables 17 | environment* 18 | 19 | # Snakemake state dir 20 | .snakemake 21 | 22 | # Local config overrides 23 | /config_local.yaml 24 | 25 | # For Python # 26 | ############## 27 | *.pyc 28 | .tox/ 29 | .cache/ 30 | 31 | # Compiled source # 32 | ################### 33 | *.com 34 | *.class 35 | *.dll 36 | *.exe 37 | *.o 38 | *.so 39 | 40 | # OS generated files # 41 | ###################### 42 | .DS_Store 43 | .DS_Store? 44 | ._* 45 | .Spotlight-V100 46 | .Trashes 47 | Icon? 48 | ehthumbs.db 49 | Thumbs.db 50 | *~ 51 | 52 | # nohup output 53 | nohup.out 54 | 55 | # cluster logs 56 | slurm-* 57 | 58 | # Jupyter/Altair droppings 59 | .ipynb_checkpoints 60 | geckodriver.log 61 | -------------------------------------------------------------------------------- /nextclade/dataset_config/h1n1pdm/ns/annotation.gff: -------------------------------------------------------------------------------- 1 | ##gff-version 3 2 | #!gff-spec-version 1.21 3 | #!processor NCBI annotwriter 4 | ##sequence-region NC_026432.1 1 863 5 | ##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=641809 6 | NC_026432.1 RefSeq region 1 863 . + . ID=NC_026432.1:1..863; 7 | NC_026432.1 RefSeq CDS 1 30 . + 0 Name=NEP;gbkey=CDS;gene=NEP;locus_tag=UJ99_s8gp1;protein_id=YP_009118624.1;ID=cds-YP_009118624.1;product=nuclear export protein;Note=nonstructural protein 2;Dbxref=GenBank:YP_009118624.1,GeneID:23308112 8 | NC_026432.1 RefSeq CDS 503 838 . + 0 Name=NEP;gbkey=CDS;gene=NEP;locus_tag=UJ99_s8gp1;protein_id=YP_009118624.1;ID=cds-YP_009118624.1;product=nuclear export protein;Note=nonstructural protein 2;Dbxref=GenBank:YP_009118624.1,GeneID:23308112 9 | NC_026432.1 RefSeq CDS 1 660 . + 0 Name=NS1;gbkey=CDS;gene=NS1;locus_tag=UJ99_s8gp2;protein_id=YP_009118625.1;ID=cds-YP_009118625.1;product=nonstructural protein 1;Dbxref=GenBank:YP_009118625.1,GeneID:23308111 10 | -------------------------------------------------------------------------------- /.github/workflows/run-nextflu-private-builds.yaml: -------------------------------------------------------------------------------- 1 | name: Run Nextstrain builds for the nextflu-private group 2 | 3 | on: 4 | workflow_dispatch: 5 | inputs: 6 | dockerImage: 7 | description: "Specific container image to use for build (will override the default of `nextstrain build`)" 8 | required: false 9 | type: string 10 | 11 | jobs: 12 | run-build: 13 | permissions: 14 | id-token: write 15 | uses: nextstrain/.github/.github/workflows/pathogen-repo-build.yaml@master 16 | secrets: inherit 17 | with: 18 | runtime: aws-batch 19 | env: | 20 | NEXTSTRAIN_DOCKER_IMAGE: ${{ inputs.dockerImage }} 21 | run: | 22 | nextstrain build \ 23 | --detach \ 24 | --cpus 36 \ 25 | --memory 72gib \ 26 | --env NEXTSTRAIN_USERNAME \ 27 | --env NEXTSTRAIN_PASSWORD \ 28 | . \ 29 | deploy_all \ 30 | all_report_outputs \ 31 | -p \ 32 | --configfile profiles/nextflu-private.yaml 33 | -------------------------------------------------------------------------------- /profiles/full-trees/h1n1pdm_titer_strains.txt: -------------------------------------------------------------------------------- 1 | A/Bangkok/P176/2025 2 | A/Busan/277/2025 3 | A/Colorado/218/2024 4 | A/Hawaii/ISC-1140/2025 5 | A/Illinois/65/2024 6 | A/Iowa/123/2024 7 | A/Kanagawa/AC2408/2025 8 | A/Lisbon/188/2023 9 | A/Madagascar/3/2025 10 | A/Maldives/2132/2024 11 | A/Maryland/64/2024 12 | A/Massachusetts/ISC-1679/2025 13 | A/Minnesota/131/2024 14 | A/NovaScotia/ET1801CP00018S/2025 15 | A/Ohio/259/2024 16 | A/Oregon/11/2025 17 | A/Oregon/261/2024 18 | A/Pakistan/306/2024 19 | A/Qatar/83328/2024 20 | A/Qinghai-Chengzhong/SWL1410/2024 21 | A/Santiago/101713/2024 22 | A/Shanghai-Huangpu/SWL12109/2024 23 | A/Singapore/MOH0547/2024 24 | A/StPetersburg/RII-04/2025 25 | A/Tambov/160-1V/2024 26 | A/Tennessee/4/2025 27 | A/Tokyo/EIS11-277/2024 28 | A/Ufa/CRIE/47/2024 29 | A/Uganda/UVRI_KIS6850/2024 30 | A/Ulsan/492/2025 31 | A/Utah/39/2025 32 | A/Vermont/10/2025 33 | A/Vermont/5/2025 34 | A/Victoria/3599/2024 35 | A/Vladimir/RII-MH223382S/2024 36 | A/Wisconsin/30/2025 37 | A/Wisconsin/67/2022 38 | A/Wisconsin/NIRC-IS-1111/2025 39 | A/Zacapa/FLU-012/2025 40 | -------------------------------------------------------------------------------- /nextclade/dataset_config/h3n2/ns/reference.fasta: -------------------------------------------------------------------------------- 1 | >NC_007370.1 Influenza A virus (A/New York/392/2004(H3N2)) segment 8, complete sequence 2 | AGCAAAAGCAGGGTGACAAAGACATAATGGATTCCAACACTGTGTCAAGTTTCCAGGTAG 3 | ATTGCTTTCTTTGGCATATCCGGAAACAAGTTGTAGACCAAGAACTGAGTGATGCCCCAT 4 | TCCTTGATCGGCTTCGCCGAGATCAGAGGTCCCTAAGGGGAAGAGGCAATACTCTCGGTC 5 | TAGACATCAAAGCAGCCACCCATGTTGGAAAGCAAATTGTAGAAAAGATTCTGAAAGAAG 6 | AATCTGATGAGGCACTTAAAATGACCATGGTCTCCACACCTGCTTCGCGATACATAACTG 7 | ACATGACTATTGAGGAATTGTCAAGAAACTGGTTCATGCTAATGCCCAAGCAGAAAGTGG 8 | AAGGACCTCTTTGCATCAGAATGGACCAGGCAATCATGGAGAAAAACATCATGTTGAAAG 9 | CGAATTTCAGTGTGATTTTTGACCGACTAGAGACCATAGTATTACTAAGGGCTTTCACCG 10 | AAGAGGGAGCAATTGTTGGCGAAATCTCACCATTGCCTTCTTTTCCAGGACATACTATTG 11 | AGGATGTCAAAAATGCAATTGGGGTCCTCATCGGAGGACTTGAATGGAATGATAACACAG 12 | TTCGAGTCTCTAAAAATCTACAGAGATTCGCTTGGAGAAGCAGTAATGAGAATGGGGGAC 13 | CTCCACTTACTCCAAAACAGAAACGGAAAATGGCGAGAACAGCTAGGTCAAAAGTTTGAA 14 | GAGATAAGATGGCTGATTGAAGAAGTGAGACACAGACTAAAAACAACTGAAAATAGCTTT 15 | GAACAAATAACATTCATGCAAGCATTACAACTGCTGTTTGAAGTGGAACAGGAGATAAGA 16 | ACTTTCTCATTTCAGCTTATTTAATGATAAAAAACACCCTTGTTTCTACT 17 | -------------------------------------------------------------------------------- /ingest/vendored/notify-on-job-fail: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -euo pipefail 3 | 4 | : "${SLACK_TOKEN:?The SLACK_TOKEN environment variable is required.}" 5 | : "${SLACK_CHANNELS:?The SLACK_CHANNELS environment variable is required.}" 6 | 7 | : "${AWS_BATCH_JOB_ID:=}" 8 | : "${GITHUB_RUN_ID:=}" 9 | 10 | bin="$(dirname "$0")" 11 | job_name="${1:?A job name is required as the first argument}" 12 | github_repo="${2:?A GitHub repository with owner and repository name is required as the second argument}" 13 | 14 | echo "Notifying Slack about failed ${job_name} job." 15 | message="❌ ${job_name} job has FAILED 😞 " 16 | 17 | if [[ -n "${AWS_BATCH_JOB_ID}" ]]; then 18 | message+="See AWS Batch job \`${AWS_BATCH_JOB_ID}\` () for error details. " 19 | elif [[ -n "${GITHUB_RUN_ID}" ]]; then 20 | message+="See GitHub Action for error details. " 21 | fi 22 | 23 | "$bin"/notify-slack "$message" 24 | -------------------------------------------------------------------------------- /config/references_for_titer_plots/h3n2/cell_hi.txt: -------------------------------------------------------------------------------- 1 | A/DistrictOfColumbia/27/2023 # J.2:145N, vaccine strain 2 | A/Croatia/10136RV/2023 # J.2:145N, vaccine strain 3 | A/Idaho/69/2023 # J.2:145N,189R 4 | #A/Wisconsin/154/2024 # J.2:135K 5 | A/Tokyo/EIS11-171/2024 # J.2:135K 6 | A/Catalonia/NSVH102423723/2024 # J.2:135K 7 | #A/Slovenia/49/2024 # J.2:158K 8 | #A/Oklahoma/5/2024 # J.2.:158K 9 | A/Michigan/32/2024 # J.2:104N,145N 10 | A/California/166/2024 # J.2:124R,145N 11 | A/Perth/815/2024 # J.2:63D,189R,309I 12 | A/Colorado/209/2024 # J.2:160M,204I,223I 13 | A/Switzerland/47775/2024 # J.2.1 14 | #A/Pennsylvania/234/2024 # J.2.2:65K 15 | #A/Minnesota/97/2024 # J.2.2:65K,145N 16 | A/Perth/836/2024 # J.2.2:65K,145N 17 | A/Victoria/979/2024 # J.2.2:5E,65K,145N,262N 18 | A/Kanagawa/IC2405/2024 # J.2.2:65K,160K 19 | A/Alaska/8/2025 # J.2.3 20 | A/Netherlands/10685/2024 # J.2.3 21 | A/Tokyo/EIS13-029/2025 # J.2.3 22 | #A/Mississippi/37/2025 # J.2.4 23 | A/Sydney/1359/2024 # J.2.4 24 | A/Valladolid/1187/2025 # J.2.4, Crick 25 | A/Kanagawa/AC2413/2025 # J.2.4, NIID 26 | #A/Kentucky/29/2024 # J.2.5 27 | #A/NewJersey/16/2025 # J.2.5 28 | -------------------------------------------------------------------------------- /nextclade/dataset_config/h1n1pdm/ns/reference.fasta: -------------------------------------------------------------------------------- 1 | >NC_026432.1 Influenza A virus (A/California/07/2009(H1N1)) segment 8 nuclear export protein (NEP) and nonstructural protein 1 (NS1) genes, complete cds 2 | ATGGACTCCAACACCATGTCAAGCTTTCAGGTAGACTGTTTCCTTTGGCATATCCGCAAG 3 | CGATTTGCAGACAATGGATTGGGTGATGCCCCATTCCTTGATCGGCTCCGCCGAGATCAA 4 | AAGTCCTTAAAAGGAAGAGGCAACACCCTTGGCCTCGATATCGAAACAGCCACTCTTGTT 5 | GGGAAACAAATCGTGGAATGGATCTTGAAAGAGGAATCCAGCGAGACACTTAGAATGACA 6 | ATTGCATCTGTACCTACTTCGCGCTACCTTTCTGACATGACCCTCGAGGAAATGTCACGA 7 | GACTGGTTCATGCTCATGCCTAGGCAAAAGATAATAGGCCCTCTTTGCGTGCGATTGGAC 8 | CAGGCGATCATGGAAAAGAACATAGTACTGAAAGCGAACTTCAGTGTAATCTTTAACCGA 9 | TTAGAGACCTTGATACTACTAAGGGCTTTCACTGAGGAGGGAGCAATAGTTGGAGAAATT 10 | TCACCATTACCTTCTCTTCCAGGACATACTTATGAGGATGTCAAAAATGCAGTTGGGGTC 11 | CTCATCGGAGGACTTGAATGGAATGGTAACACGGTTCGAGTCTCTGAAAATATACAGAGA 12 | TTCGCTTGGAGAAACTGTGATGAGAATGGGAGACCTTCACTACCTCCAGAGCAGAAATGA 13 | AAAGTGGCGAGAGCAATTGGGACAGAAATTTGAGGAAATAAGGTGGTTAATTGAAGAAAT 14 | GCGGCACAGATTGAAAGCGACAGAGAATAGTTTCGAACAAATAACATTTATGCAAGCCTT 15 | ACAACTACTGCTTGAAGTAGAACAAGAGATAAGAGCTTTCTCGTTTCAGCTTATTTAATG 16 | ATAAAAAACACCCTTGTTTCTAC 17 | -------------------------------------------------------------------------------- /nextclade/dataset_config/vic/ha/EPI1926632/README.md: -------------------------------------------------------------------------------- 1 | # Nextclade dataset for "Influenza B Vic HA" based on reference "B/Austria/1359417/2021" (flu/vic/ha/EPI1926632) 2 | 3 | 4 | ## Dataset attributes 5 | 6 | | attribute | value | value friendly | 7 | | -------------------- | -------------------- | ---------------------------------------- | 8 | | name | flu/vic/ha | Influenza B Vic HA | 9 | | reference | EPI1926632 | B/Austria/1359417/2021 | 10 | 11 | 12 | ## Features 13 | This dataset supports 14 | 15 | * Assignment to clades and subclades based on the nomenclature defined in [github.com/influenza-clade-nomenclature/seasonal_B-Vic_HA/](https://github.com/influenza-clade-nomenclature/seasonal_B-Vic_HA/) 16 | * Identification of glycosilation motifs 17 | * Sequence QC 18 | * Phylogenetic placement 19 | 20 | ## What is Nextclade dataset 21 | 22 | Read more about Nextclade datasets in Nextclade documentation: https://docs.nextstrain.org/projects/nextclade/en/stable/user/datasets.html 23 | -------------------------------------------------------------------------------- /scripts/join_tables.py: -------------------------------------------------------------------------------- 1 | """Join two tables. 2 | """ 3 | import argparse 4 | 5 | import pandas as pd 6 | 7 | 8 | if __name__ == "__main__": 9 | parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) 10 | parser.add_argument("--left", required=True, help="left table to join") 11 | parser.add_argument("--right", required=True, help="right table to join") 12 | parser.add_argument("--how", default="left", choices=["left", "inner", "outer"], help="how to join tables") 13 | parser.add_argument("--on", default=["strain"], nargs="+", help="columns to join tables on") 14 | parser.add_argument("--output", required=True, help="joined tables") 15 | 16 | args = parser.parse_args() 17 | 18 | left = pd.read_csv(args.left, sep="\t") 19 | right = pd.read_csv(args.right, sep="\t") 20 | 21 | joined_table = left.merge( 22 | right, 23 | how=args.how, 24 | on=args.on, 25 | ) 26 | 27 | joined_table.to_csv( 28 | args.output, 29 | sep="\t", 30 | index=False, 31 | header=True, 32 | na_rep="N/A", 33 | ) 34 | -------------------------------------------------------------------------------- /config/h2n2/reference_strains.txt: -------------------------------------------------------------------------------- 1 | A/Tashkent/1046/1967 2 | A/Montevideo/2208/1967 3 | A/Cornell/1001/1967 4 | A/Ann Arbor/7/1967 5 | A/Georgia/1/1967 6 | A/Albany/9/1967 7 | A/Albany/8/1967 8 | A/Albany/7/1967 9 | A/Albany/6/1967 10 | A/Albany/4/1967 11 | A/Albany/3/1967 12 | A/Johannesburg/617/1967 13 | A/Czech Republic/1/1966 14 | A/Moscow/1019/1965 15 | A/Potsdam/2/1965 16 | A/Cottbus/1/1964 17 | A/Berlin/3/1964 18 | A/Beijing/12/1964 19 | A/Georgia/1/1963 20 | A/Leningrad/29/1963 21 | A/Netherlands/56/1963 22 | A/Netherlands/001K1/1963 23 | A/Netherlands/65/1963 24 | A/England/12/1962 25 | A/England/1/1961 26 | A/Ann Arbor/6/1960 27 | A/Albany/1/1960 28 | A/Netherlands/056H1/1960 29 | A/Albany/1/1959 30 | A/Krasnodar/101/1959 31 | A/Albany/5/1958 32 | A/Albany/4/1958 33 | A/Albany/3/1958 34 | A/Albany/24/1958 35 | A/Albany/2/1958 36 | A/Albany/1/1958 37 | A/Ann Arbor/23/1957 38 | A/Rockville Illinois/5/1957 39 | A/Rockville Illinois/5-CA/1957 40 | A/Ann Arbor/23/1957 41 | A/Albany/22/1957 42 | A/Albany/20/1957 43 | A/Rotterdam/1957 44 | A/ITS/1/1957 45 | A/Okuda/1957 46 | A/Shanghai/202/1957 47 | A/Guiyang/1/1957 48 | A/Zhang/4/1957 49 | -------------------------------------------------------------------------------- /source-data/2018_Europe_flu_vaccination_coverage.tsv: -------------------------------------------------------------------------------- 1 | # extracted from Fig 3 https://www.sciencedirect.com/science/article/pii/S0264410X17317620#b0020 using https://apps.automeris.io/wpd/ 2 | #Scotland 76.31027253668763 3 | #Northern Ireland 72.95597484276729 4 | #England 72.32704402515722 5 | #Wales 68.55345911949685 6 | United Kingdom 72 7 | Belarus 73.79454926624737 8 | Netherlands 66.66666666666666 9 | Israel 63.94129979035639 10 | Ireland 59.74842767295597 11 | Spain 55.765199161425585 12 | Portugal 54.29769392033542 13 | Sweden 49.895178197064986 14 | Italy 48.00838574423481 15 | France 48.00838574423481 16 | Malta 42.76729559748427 17 | Iceland 40.88050314465409 18 | Finland 39.41299790356395 19 | Germany 35.84905660377358 20 | Russia 33.9622641509434 21 | Switzerland 29.35010482180293 22 | Norway 27.044025157232703 23 | Hungary 23.689727463312366 24 | Lithuania 21.59329140461216 25 | Croatia 19.077568134171905 26 | Montenegro 14.67505241090146 27 | Poland 13.626834381551356 28 | Slovakia 13.207547169811324 29 | Kazakhstan 10.69182389937106 30 | Romania 7.756813417190765 31 | Serbia 7.547169811320748 32 | Latvia 4.821802935010488 33 | Estonia 2.725366876310279 34 | -------------------------------------------------------------------------------- /nextclade/dataset_config/vic/pa/README.md: -------------------------------------------------------------------------------- 1 | # Influenza A(H1N1pdm) PA based on reference "A/California/07/2009" 2 | 3 | | Key | Value | 4 | | -------------------- | -------------------- | 5 | | authors | [Richard Neher](https://neherlab.org), [Nextstrain](https://nextstrain.org) | 6 | | name | Influenza A(H1N1pdm) PA | 7 | | reference | A/California/07/2009 | 8 | | dataset path | flu/h1n1pdm/pa | 9 | | reference accession | NC_026437 | 10 | 11 | ## Features 12 | This dataset only provides a reference for alignment and an annotation for translation. 13 | 14 | ### Resistance mutations 15 | Resistance markers are taken from the [summary of by the WHO/GISRS](https://www.who.int/teams/global-influenza-programme/laboratory-network/quality-assurance/antiviral-susceptibility-influenza/polymerase-acidic-protein-inhibitor). 16 | 17 | 18 | ## What is Nextclade dataset 19 | 20 | Read more about Nextclade datasets in Nextclade documentation: https://docs.nextstrain.org/projects/nextclade/en/stable/user/datasets.html 21 | -------------------------------------------------------------------------------- /nextclade/dataset_config/h1n1pdm/pa/README.md: -------------------------------------------------------------------------------- 1 | # Influenza A(H1N1pdm) PA based on reference "A/California/07/2009" 2 | 3 | | Key | Value | 4 | | -------------------- | -------------------- | 5 | | authors | [Richard Neher](https://neherlab.org), [Nextstrain](https://nextstrain.org) | 6 | | name | Influenza A(H1N1pdm) PA | 7 | | reference | A/California/07/2009 | 8 | | dataset path | flu/h1n1pdm/pa | 9 | | reference accession | NC_026437 | 10 | 11 | ## Features 12 | This dataset only provides a reference for alignment and an annotation for translation. 13 | 14 | ### Resistance mutations 15 | Resistance markers are taken from the [summary of by the WHO/GISRS](https://www.who.int/teams/global-influenza-programme/laboratory-network/quality-assurance/antiviral-susceptibility-influenza/polymerase-acidic-protein-inhibitor). 16 | 17 | 18 | ## What is Nextclade dataset 19 | 20 | Read more about Nextclade datasets in Nextclade documentation: https://docs.nextstrain.org/projects/nextclade/en/stable/user/datasets.html 21 | -------------------------------------------------------------------------------- /nextclade/dataset_config/h3n2/pa/README.md: -------------------------------------------------------------------------------- 1 | # Influenza A(H3N2) PA (segment 3) based on reference "A/New York/392/2004" 2 | 3 | | Key | Value | 4 | | -------------------- | -------------------- | 5 | | authors | [Richard Neher](https://neherlab.org), [Nextstrain](https://nextstrain.org) | 6 | | name | Influenza A(H1N1pdm) PA (segment 3) | 7 | | reference | A/New York/392/2004 | 8 | | dataset path | flu/h3n2/pa | 9 | | reference accession | NC_007371 | 10 | 11 | ## Features 12 | This dataset only provides a reference for alignment and an annotation for translation. 13 | 14 | ### Resistance mutations 15 | Resistance markers are taken from the [summary of by the WHO/GISRS](https://www.who.int/teams/global-influenza-programme/laboratory-network/quality-assurance/antiviral-susceptibility-influenza/polymerase-acidic-protein-inhibitor). 16 | 17 | 18 | 19 | ## What is Nextclade dataset 20 | 21 | Read more about Nextclade datasets in Nextclade documentation: https://docs.nextstrain.org/projects/nextclade/en/stable/user/datasets.html 22 | -------------------------------------------------------------------------------- /flu-forecasting/scripts/merge_weighted_distances_to_future.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import argparse 4 | import json 5 | 6 | 7 | if __name__ == '__main__': 8 | parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) 9 | parser.add_argument("--distances", nargs="+", required=True, help="node data JSON files of distances to be merged into a single output node data JSON") 10 | parser.add_argument("--output", required=True, help="merged node data JSON file") 11 | 12 | args = parser.parse_args() 13 | 14 | # Start with a single base JSON in node data format. 15 | with open(args.distances[0], "r", encoding="utf-8") as fh: 16 | base_json = json.load(fh) 17 | 18 | # Update the base JSON with each subsequent model's distances to the future. 19 | for json_file in args.distances[1:]: 20 | with open(json_file, "r", encoding="utf-8") as fh: 21 | model_json = json.load(fh) 22 | for strain, distances in model_json["nodes"].items(): 23 | base_json["nodes"][strain].update(distances) 24 | 25 | # Save merged data. 26 | with open(args.output, "w", encoding="utf-8") as oh: 27 | json.dump(base_json, oh) 28 | -------------------------------------------------------------------------------- /ingest/vendored/notify-on-diff: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -euo pipefail 4 | 5 | : "${SLACK_TOKEN:?The SLACK_TOKEN environment variable is required.}" 6 | : "${SLACK_CHANNELS:?The SLACK_CHANNELS environment variable is required.}" 7 | 8 | bin="$(dirname "$0")" 9 | 10 | src="${1:?A source file is required as the first argument.}" 11 | dst="${2:?A destination s3:// URL is required as the second argument.}" 12 | 13 | dst_local="$(mktemp -t s3-file-XXXXXX)" 14 | diff="$(mktemp -t diff-XXXXXX)" 15 | 16 | trap "rm -f '$dst_local' '$diff'" EXIT 17 | 18 | # if the file is not already present, just exit 19 | "$bin"/s3-object-exists "$dst" || exit 0 20 | 21 | "$bin"/download-from-s3 "$dst" "$dst_local" 22 | 23 | # diff's exit code is 0 for no differences, 1 for differences found, and >1 for errors 24 | diff_exit_code=0 25 | diff "$dst_local" "$src" > "$diff" || diff_exit_code=$? 26 | 27 | if [[ "$diff_exit_code" -eq 1 ]]; then 28 | echo "Notifying Slack about diff." 29 | "$bin"/notify-slack --upload "$src.diff" < "$diff" 30 | elif [[ "$diff_exit_code" -gt 1 ]]; then 31 | echo "Notifying Slack about diff failure" 32 | "$bin"/notify-slack "Diff failed for $src" 33 | else 34 | echo "No change in $src." 35 | fi 36 | -------------------------------------------------------------------------------- /nextclade/dataset_config/h3n2/mp/reference.fasta: -------------------------------------------------------------------------------- 1 | >NC_007367.1 Influenza A virus (A/New York/392/2004(H3N2)) segment 7, complete sequence 2 | AGCAAAAGCAGGTAGATATTGAAAGATGAGCCTTCTAACCGAGGTCGAAACGTATGTTCT 3 | CTCTATCGTTCCATCAGGCCCCCTCAAAGCCGAGATCGCGCAGAGACTTGAAGATGTCTT 4 | TGCTGGGAAAAACACAGATCTTGAGGCTCTCATGGAATGGCTAAAGACAAGACCAATTCT 5 | GTCACCTCTGACTAAGGGGATTTTGGGGTTTGTGTTCACGCTCACCGTGCCCAGTGAGCG 6 | AGGACTGCAGCGTAGACGCTTTGTCCAAAATGCCCTCAATGGGAATGGAGATCCAAATAA 7 | CATGGACAAAGCAGTTAAACTGTATAGGAAACTTAAGAGGGAGATAACGTTCCATGGGGC 8 | CAAAGAAATAGCTCTCAGTTATTCTGCTGGTGCACTTGCCAGTTGCATGGGCCTCATATA 9 | CAATAGGATGGGGGCTGTAACCACTGAAGTGGCATTTGGCCTGGTATGTGCAACATGTGA 10 | ACAGATTGCTGACTCCCAGCACAGGTCTCATAGGCAAATGGTGGCAACAACCAATCCATT 11 | AATAAAACATGAGAACAGAATGGTTTTGGCCAGCACTACAGCTAAGGCTATGGAGCAAAT 12 | GGCTGGATCAAGTGAGCAGGCAGCGGAGGCCATGGAAATTGCTAGTCAGGCCAGGCAAAT 13 | GGTGCAGGCAATGAGAGCCGTTGGGACTCATCCTAGCTCCAGTACTGGTCTAAGAGATGA 14 | TCTTCTTGAAAATTTGCAGACCTATCAGAAACGAATGGGGGTGCAGATGCAACGATTCAA 15 | GTGACCCGCTTGTTGTTGCCGCGAGTATCATTGGGATCTTGCACTTGATATTGTGGATTC 16 | TTGATCGTCTTTTTTTCAAATGCGTCTATCGACTCTTCAAACACGGCCTTAAAAGAGGCC 17 | CTTCTACGGAAGGAGTACCTGAGTCTATGAGGGAAGAATATCGAAAGGAACAGCAGAATG 18 | CTGTGGATGCTGACGACAGTCATTTTGTCAGCATAGAGTTGGAGTAAAAAACTACCTTGT 19 | TTCTACT 20 | -------------------------------------------------------------------------------- /nextclade/dataset_config/h1n1pdm/mp/reference.fasta: -------------------------------------------------------------------------------- 1 | >NC_026431.1 Influenza A virus (A/California/07/2009(H1N1)) segment 7 matrix protein 2 (M2) and matrix protein 1 (M1) genes, complete cds 2 | ATGAGTCTTCTAACCGAGGTCGAAACGTACGTTCTTTCTATCATCCCGTCAGGCCCCCTC 3 | AAAGCCGAGATCGCGCAGAGACTGGAAAGTGTCTTTGCAGGAAAGAACACAGATCTTGAG 4 | GCTCTCATGGAATGGCTAAAGACAAGACCAATCTTGTCACCTCTGACTAAGGGAATTTTA 5 | GGATTTGTGTTCACGCTCACCGTGCCCAGTGAGCGAGGACTGCAGCGTAGACGCTTTGTC 6 | CAAAATGCCCTAAATGGGAATGGGGACCCGAACAACATGGATAGAGCAGTTAAACTATAC 7 | AAGAAGCTCAAAAGAGAAATAACGTTCCATGGGGCCAAGGAGGTGTCACTAAGCTATTCA 8 | ACTGGTGCACTTGCCAGTTGCATGGGCCTCATATACAACAGGATGGGAACAGTGACCACA 9 | GAAGCTGCTTTTGGTCTAGTGTGTGCCACTTGTGAACAGATTGCTGATTCACAGCATCGG 10 | TCTCACAGACAGATGGCTACTACCACCAATCCACTAATCAGGCATGAAAACAGAATGGTG 11 | CTGGCTAGCACTACGGCAAAGGCTATGGAACAGATGGCTGGATCGAGTGAACAGGCAGCG 12 | GAGGCCATGGAGGTTGCTAATCAGACTAGGCAGATGGTACATGCAATGAGAACTATTGGG 13 | ACTCATCCTAGCTCCAGTGCTGGTCTGAAAGATGACCTTCTTGAAAATTTGCAGGCCTAC 14 | CAGAAGCGAATGGGAGTGCAGATGCAGCGATTCAAGTGATCCTCTCGTCATTGCAGCAAA 15 | TATCATTGGGATCTTGCACCTGATATTGTGGATTACTGATCGTCTTTTTTTCAAATGTAT 16 | TTATCGTCGCTTTAAATACGGTTTGAAAAGAGGGCCTTCTACGGAAGGAGTGCCTGAGTC 17 | CATGAGGGAAGAATATCAACAGGAACAGCAGAGTGCTGTGGATGTTGACGATGGTCATTT 18 | TGTCAACATAGAGCTAGAGTAA 19 | -------------------------------------------------------------------------------- /profiles/gisaid/builds.yaml: -------------------------------------------------------------------------------- 1 | custom_rules: 2 | - profiles/gisaid/prepare_data.smk 3 | 4 | metadata_fields: 5 | - Isolate_Name 6 | - Isolate_Id 7 | - Passage_History 8 | - Location 9 | - Collection_Date 10 | - Submission_Date 11 | renamed_metadata_fields: 12 | - strain 13 | - accession 14 | - passage 15 | - full_location 16 | - date 17 | - date_submitted 18 | 19 | lat-longs: "config/lat_longs.tsv" 20 | 21 | segments: 22 | - ha 23 | 24 | submission_date_field: date_submitted 25 | recency: 26 | date_bins: [7, 30, 90] 27 | date_bin_labels: ["last week", "last month", "last quarter"] 28 | upper_bin_label: older 29 | 30 | builds: 31 | "h3n2": 32 | lineage: h3n2 33 | reference: "config/h3n2/{segment}/reference.fasta" 34 | annotation: "config/h3n2/{segment}/genemap.gff" 35 | tree_exclude_sites: "config/h3n2/{segment}/exclude-sites.txt" 36 | clades: "config/h3n2/ha/clades.tsv" 37 | subclades: "config/h3n2/{segment}/subclades.tsv" 38 | auspice_config: "config/h3n2/{segment}/auspice_config.json" 39 | enable_lbi: true 40 | enable_glycosylation: true 41 | subsamples: 42 | global: 43 | filters: "--group-by region year month --subsample-max-sequences 100" 44 | -------------------------------------------------------------------------------- /config/distance_maps/h3n2/ha/bush_epitope_D.json: -------------------------------------------------------------------------------- 1 | { 2 | "default": 0, 3 | "output_type": "integer", 4 | "map": { 5 | "HA1": { 6 | "96": 1, 7 | "102": 1, 8 | "103": 1, 9 | "117": 1, 10 | "121": 1, 11 | "167": 1, 12 | "170": 1, 13 | "171": 1, 14 | "172": 1, 15 | "173": 1, 16 | "174": 1, 17 | "175": 1, 18 | "176": 1, 19 | "177": 1, 20 | "179": 1, 21 | "182": 1, 22 | "201": 1, 23 | "203": 1, 24 | "207": 1, 25 | "208": 1, 26 | "209": 1, 27 | "212": 1, 28 | "213": 1, 29 | "214": 1, 30 | "215": 1, 31 | "216": 1, 32 | "217": 1, 33 | "218": 1, 34 | "219": 1, 35 | "226": 1, 36 | "227": 1, 37 | "228": 1, 38 | "229": 1, 39 | "230": 1, 40 | "238": 1, 41 | "240": 1, 42 | "242": 1, 43 | "244": 1, 44 | "246": 1, 45 | "247": 1, 46 | "248": 1 47 | } 48 | }, 49 | "name": "bush_epitope_D" 50 | } 51 | -------------------------------------------------------------------------------- /ingest/vendored/notify-on-job-start: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -euo pipefail 3 | 4 | : "${SLACK_TOKEN:?The SLACK_TOKEN environment variable is required.}" 5 | : "${SLACK_CHANNELS:?The SLACK_CHANNELS environment variable is required.}" 6 | 7 | : "${AWS_BATCH_JOB_ID:=}" 8 | : "${GITHUB_RUN_ID:=}" 9 | 10 | bin="$(dirname "$0")" 11 | job_name="${1:?A job name is required as the first argument}" 12 | github_repo="${2:?A GitHub repository with owner and repository name is required as the second argument}" 13 | build_dir="${3:-ingest}" 14 | 15 | echo "Notifying Slack about started ${job_name} job." 16 | message="${job_name} job has started." 17 | 18 | if [[ -n "${GITHUB_RUN_ID}" ]]; then 19 | message+=" The job was submitted by GitHub Action ." 20 | fi 21 | 22 | if [[ -n "${AWS_BATCH_JOB_ID}" ]]; then 23 | message+=" The job was launched as AWS Batch job \`${AWS_BATCH_JOB_ID}\` ()." 24 | message+=" Follow along in your local clone of ${github_repo} with: "'```'"nextstrain build --aws-batch --no-download --attach ${AWS_BATCH_JOB_ID} ${build_dir}"'```' 25 | fi 26 | 27 | "$bin"/notify-slack "$message" 28 | -------------------------------------------------------------------------------- /scripts/flu_regions.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | 4 | with open(os.path.join(os.path.dirname(os.path.realpath(__file__)),'../config/frequency_weights_by_region.json')) as fh: 5 | population_sizes = json.load(fh) 6 | 7 | region_properties = { 8 | "global": {'label':"Global", "color":"#111111"}, 9 | 'Africa': {'abbr':'AF', 'label':'Africa', 'color':'#A0CCA5'}, 10 | 'Europe': {'abbr':'EU', 'label':'Europe', 'color':'#658447'}, 11 | 'North America':{'abbr':'NA', 'label':'N America', 'color':'#D6C568'}, 12 | 'China': {'abbr':'CN', 'label':'China', 'color':'#A76BB1'}, 13 | 'South Asia': {'abbr':'SAS', 'label':'South Asia', 'color':'#5199B7'}, 14 | 'Japan Korea': {'abbr':'JK', 'label':'Japan/Korea', 'color':'#2A4786'}, 15 | 'Oceania': {'abbr':'OC', 'label':'Oceania', 'color':'#8E1616'}, 16 | 'South America':{'abbr':'SA', 'label':'S America', 'color':'#EBA85F'}, 17 | 'Southeast Asia':{'abbr':'SEA', 'label':'SE Asia', 'color':'#8FBDD0'}, 18 | 'West Asia': {'abbr':'WA', 'label':'W Asia', 'color':'#76104B'}, 19 | } 20 | 21 | for region in region_properties: 22 | if region in population_sizes: 23 | region_properties[region]['popsize'] = population_sizes[region] 24 | 25 | region_names = [x for x in region_properties.keys() if x!='global'] 26 | -------------------------------------------------------------------------------- /zoltar/project.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Nextstrain seasonal influenza forecasts", 3 | "is_public": false, 4 | "description": "Forecasts for seasonal influenza developed by the Nextstrain team.", 5 | "home_url": "https://nextstrain.org/flu/seasonal/", 6 | "logo_url": "https://nextstrain.org/static/nextstrain-logo-small-ea8c3e13e8c17436264760d638ab970e.png", 7 | "core_data": "", 8 | "time_interval_type": "Month", 9 | "visualization_y_label": "Clade frequency", 10 | "units": [ 11 | {"name": "global:3C.2a1b.2a.2", "abbreviation": "global:3C.2a1b.2a.2"}, 12 | {"name": "global:3C.2a1b.2a.2/53G", "abbreviation": "global:3C.2a1b.2a.2/53G"}, 13 | {"name": "global:3C.2a1b.2a.2/53N", "abbreviation": "global:3C.2a1b.2a.2/53N"}, 14 | {"name": "global:3C.2a1b.2a.2/50K", "abbreviation": "global:3C.2a1b.2a.2/50K"}, 15 | {"name": "global:3C.2a1b.1a", "abbreviation": "global:3C.2a1b.1a"} 16 | ], 17 | "targets": [ 18 | { 19 | "type": "continuous", 20 | "name": "frequency in one year", 21 | "description": "clade frequency in 1 year", 22 | "outcome_variable": "frequency in one year", 23 | "is_step_ahead": false, 24 | "range": [0.0, 1.0] 25 | } 26 | ], 27 | "timezeros": [] 28 | } 29 | -------------------------------------------------------------------------------- /config/distance_maps/h3n2/na/munoz.json: -------------------------------------------------------------------------------- 1 | { 2 | "default": 0, 3 | "map": { 4 | "NA": { 5 | "197": 1, 6 | "198": 1, 7 | "199": 1, 8 | "200": 1, 9 | "221": 1, 10 | "222": 1, 11 | "328": 1, 12 | "329": 1, 13 | "330": 1, 14 | "331": 1, 15 | "332": 1, 16 | "334": 1, 17 | "336": 1, 18 | "338": 1, 19 | "339": 1, 20 | "341": 1, 21 | "342": 1, 22 | "343": 1, 23 | "344": 1, 24 | "346": 1, 25 | "347": 1, 26 | "357": 1, 27 | "358": 1, 28 | "359": 1, 29 | "366": 1, 30 | "367": 1, 31 | "368": 1, 32 | "369": 1, 33 | "370": 1, 34 | "383": 1, 35 | "384": 1, 36 | "385": 1, 37 | "386": 1, 38 | "387": 1, 39 | "389": 1, 40 | "390": 1, 41 | "391": 1, 42 | "392": 1, 43 | "393": 1, 44 | "394": 1, 45 | "396": 1, 46 | "399": 1, 47 | "400": 1, 48 | "401": 1, 49 | "403": 1 50 | } 51 | }, 52 | "name": "munoz" 53 | } -------------------------------------------------------------------------------- /ingest/vendored/.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | default_language_version: 2 | python: python3 3 | repos: 4 | - repo: https://github.com/pre-commit/sync-pre-commit-deps 5 | rev: v0.0.1 6 | hooks: 7 | - id: sync-pre-commit-deps 8 | - repo: https://github.com/shellcheck-py/shellcheck-py 9 | rev: v0.10.0.1 10 | hooks: 11 | - id: shellcheck 12 | - repo: https://github.com/rhysd/actionlint 13 | rev: v1.6.27 14 | hooks: 15 | - id: actionlint 16 | entry: env SHELLCHECK_OPTS='--exclude=SC2027' actionlint 17 | - repo: https://github.com/pre-commit/pre-commit-hooks 18 | rev: v4.6.0 19 | hooks: 20 | - id: trailing-whitespace 21 | - id: check-ast 22 | - id: check-case-conflict 23 | - id: check-docstring-first 24 | - id: check-json 25 | - id: check-executables-have-shebangs 26 | - id: check-merge-conflict 27 | - id: check-shebang-scripts-are-executable 28 | - id: check-symlinks 29 | - id: check-toml 30 | - id: check-yaml 31 | - id: destroyed-symlinks 32 | - id: detect-private-key 33 | - id: end-of-file-fixer 34 | - id: fix-byte-order-marker 35 | - repo: https://github.com/astral-sh/ruff-pre-commit 36 | # Ruff version. 37 | rev: v0.4.6 38 | hooks: 39 | # Run the linter. 40 | - id: ruff 41 | -------------------------------------------------------------------------------- /profiles/nextstrain-public/rename.smk: -------------------------------------------------------------------------------- 1 | 2 | rule all_public: 3 | input: 4 | [ 5 | "auspice_renamed/" + build.get("auspice_name", f"{build_name}_{{segment}}").format(segment=segment) + suffix + ".json" 6 | for build_name, build in config["builds"].items() 7 | for segment in config["segments"] 8 | for suffix in ["", "_tip-frequencies"] 9 | ], 10 | 11 | def _get_file_by_auspice_name(wildcards): 12 | for build_name, build_params in config["builds"].items(): 13 | for segment in config["segments"]: 14 | if build_params.get("auspice_name", f"{build_name}_{{segment}}").format(segment=segment) == wildcards.auspice_name: 15 | return f"auspice/{build_name}_{segment}.json" 16 | 17 | return "" 18 | 19 | rule rename_auspice_main: 20 | input: 21 | _get_file_by_auspice_name, 22 | output: 23 | "auspice_renamed/{auspice_name}.json", 24 | shell: 25 | """ 26 | ln {input} {output} 27 | """ 28 | 29 | rule rename_auspice_tip_frequencies: 30 | input: 31 | lambda wildcards: _get_file_by_auspice_name(wildcards).replace(".json", "_tip-frequencies.json"), 32 | output: 33 | "auspice_renamed/{auspice_name}_tip-frequencies.json", 34 | shell: 35 | """ 36 | ln {input} {output} 37 | """ 38 | -------------------------------------------------------------------------------- /scripts/export_titers_for_auspice_v1.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import argparse 3 | import json 4 | 5 | 6 | if __name__ == '__main__': 7 | parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) 8 | parser.add_argument("--titers-sub") 9 | parser.add_argument("--titers-tree") 10 | parser.add_argument("--output-titers") 11 | parser.add_argument("--output-titers-sub") 12 | parser.add_argument("--output-titers-tree") 13 | 14 | args = parser.parse_args() 15 | 16 | with open(args.titers_sub) as fh: 17 | sub = json.load(fh) 18 | 19 | with open(args.output_titers_sub, 'wt') as sub_file: 20 | json.dump({'avidity': sub['avidity'], 21 | 'potency': sub['potency'], 22 | 'substitution': sub['substitution']}, 23 | sub_file, indent=1) 24 | 25 | with open(args.output_titers, 'wt') as raw_file: 26 | json.dump(sub['titers'], raw_file, indent=1) 27 | 28 | with open(args.titers_tree) as fh: 29 | tree = json.load(fh) 30 | 31 | with open(args.output_titers_tree, 'wt') as tree_file: 32 | json.dump({'avidity': tree['avidity'], 33 | 'potency': tree['potency'], 34 | 'dTiter': {k:v['dTiter'] for k,v in tree['nodes'].items()}}, 35 | tree_file, indent=1) 36 | -------------------------------------------------------------------------------- /ingest/Snakefile: -------------------------------------------------------------------------------- 1 | """ 2 | This is the main ingest Snakefile that orchestrates the full ingest workflow 3 | and defines its default outputs. 4 | """ 5 | # The workflow filepaths are written relative to this Snakefile's base directory 6 | workdir: workflow.current_basedir 7 | 8 | # Use default configuration values. Override with Snakemake's --configfile/--config options. 9 | configfile: "defaults/config.yaml" 10 | 11 | VALID_DATASETS = list(config['filtering'].keys()) 12 | 13 | wildcard_constraints: 14 | # Expected datasets should match the standardized outputs of the `filtering` block 15 | # (example datasets are "h3n2", "avian-flu") 16 | # in scripts/standardized-lineage 17 | dataset = r'|'.join(VALID_DATASETS), 18 | segment = r'pb2|pb1|pa|ha|np|na|mp|ns', 19 | # Constrain GISAID pair names to "gisaid_cache" or YYYY-MM-DD-N 20 | gisaid_pair = r'gisaid_cache|\d{4}-\d{2}-\d{2}(-\d+)?' 21 | 22 | 23 | rule all: 24 | input: 25 | metadata = expand("results/{dataset}/metadata.tsv", dataset=VALID_DATASETS), 26 | sequences = expand("results/{dataset}/{segment}.fasta", dataset=VALID_DATASETS, segment=config["segments"]), 27 | 28 | 29 | include: "rules/prepare_ndjson.smk" 30 | include: "rules/curate.smk" 31 | 32 | 33 | if "custom_rules" in config: 34 | for rule_file in config["custom_rules"]: 35 | 36 | include: rule_file 37 | -------------------------------------------------------------------------------- /profiles/example/builds.yaml: -------------------------------------------------------------------------------- 1 | custom_rules: 2 | - profiles/ci/prepare_data.smk 3 | 4 | fasta_fields: 5 | - strain 6 | - virus 7 | - accession 8 | - date 9 | - date_submitted 10 | - region 11 | - country 12 | - division 13 | - location 14 | - passage_category 15 | - originating_lab 16 | - submitting_lab 17 | - age 18 | - gender 19 | prettify_fields: 20 | - region 21 | - country 22 | - division 23 | - location 24 | - originating_lab 25 | - submitting_lab 26 | 27 | lat-longs: "config/lat_longs.tsv" 28 | 29 | segments: 30 | - ha 31 | - na 32 | 33 | tree: 34 | tree-builder-args: "'-ninit 10 -n 4 -czb'" 35 | 36 | submission_date_field: date_submitted 37 | recency: 38 | date_bins: [7, 30, 90] 39 | date_bin_labels: ["last week", "last month", "last quarter"] 40 | upper_bin_label: older 41 | 42 | builds: 43 | "example_build": 44 | reference: "config/h3n2/{segment}/reference.fasta" 45 | annotation: "config/h3n2/{segment}/genemap.gff" 46 | tree_exclude_sites: "config/h3n2/{segment}/exclude-sites.txt" 47 | clades: "config/h3n2/ha/clades.tsv" 48 | emerging_haplotypes: "example_data/haplotypes.tsv" 49 | lineage: "h3n2" 50 | auspice_config: "config/h3n2/{segment}/auspice_config.json" 51 | subsamples: 52 | global: 53 | filters: "--exclude-where 'ha!=True' 'na!=True'" 54 | -------------------------------------------------------------------------------- /profiles/neut-library.yaml: -------------------------------------------------------------------------------- 1 | custom_rules: 2 | - workflow/snakemake_rules/download_from_s3.smk 3 | 4 | lat-longs: "config/lat_longs.tsv" 5 | 6 | segments: 7 | - ha 8 | 9 | tree: 10 | tree-builder-args: "'-ninit 10 -n 4 -czb'" 11 | 12 | submission_date_field: date_submitted 13 | recency: 14 | date_bins: [7, 30, 90] 15 | date_bin_labels: ["last week", "last month", "last quarter"] 16 | upper_bin_label: older 17 | 18 | builds: 19 | h3n2: 20 | lineage: h3n2 21 | reference: "config/h3n2/{segment}/reference.fasta" 22 | annotation: "config/h3n2/{segment}/genemap.gff" 23 | clades: "config/h3n2/ha/clades.tsv" 24 | subclades: "config/h3n2/{segment}/subclades.tsv" 25 | auspice_config: "config/h3n2/{segment}/auspice_config.json" 26 | subsamples: &subsampling 27 | global: 28 | filters: --min-date 2025-05-01 --exclude-ambiguous-dates-by month --exclude-where 'ha!=True' 'passage_category=egg' --query "\`qc.overallStatus\` == 'good'" 29 | h1n1pdm: 30 | lineage: h1n1pdm 31 | reference: "config/h1n1pdm/{segment}/reference.fasta" 32 | annotation: "config/h1n1pdm/{segment}/genemap.gff" 33 | clades: "config/h1n1pdm/ha/clades.tsv" 34 | subclades: "config/h1n1pdm/{segment}/subclades.tsv" 35 | auspice_config: "config/h1n1pdm/{segment}/auspice_config.json" 36 | subsamples: *subsampling 37 | -------------------------------------------------------------------------------- /config/h1n1/reference_strains.txt: -------------------------------------------------------------------------------- 1 | A/WSN/1933 2 | A/PuertoRico/8/1934 3 | A/Melbourne/1935 4 | A/Iowa/1943 5 | A/Melbourne/1/1946 6 | A/Liverpool/1951 7 | A/Denver/1957 8 | A/USSR/90/1977 9 | A/USSR/92/1977 10 | A/Brazil/11/1978 11 | A/Lackland/3/1978 12 | A/Arizona/14/1978 13 | A/Lackland/7/1978 14 | A/Fukushima/103/1978 15 | A/California/45/1978 16 | A/Kumamoto/103/1978 17 | A/Texas/23/1979 18 | A/USSR/50/1979 19 | A/England/333/1980 20 | A/India/6263/1980 21 | A/HongKong/2/1982 22 | A/Texas/29/1982 23 | A/Chile/1/1983 24 | A/Dunedin/27/1983 25 | A/Victoria/7/1983 26 | A/Switzerland/79/1985 27 | A/Singapore/6/1986 28 | A/Taiwan/1/1986 29 | A/Victoria/36/1988 30 | A/Texas/36/1991 31 | A/Beijing/262/1995 32 | A/Bayern/7/1995 33 | A/Johannesburg/82/1996 34 | A/NewCaledonia/20/1999 35 | A/Madagascar/57794/2000 36 | A/HongKong/1252/2000 37 | A/Egypt/96/2002 38 | A/Chile/8885/2002 39 | A/Hungary/2/2003 40 | A/Netherlands/128/2004 41 | A/HongKong/2637/2004 42 | A/Thessaloniki/24/2005 43 | A/Egypt/39/2005 44 | A/SolomonIslands/3/2006 45 | A/HongKong/2652/2006 46 | A/Fukushima/141/2006 47 | A/Fukushima/97/2006 48 | A/Brisbane/59/2007 49 | A/Netherlands/345/2007 50 | A/Egypt/10/2007 51 | A/StPetersburg/10/2007 52 | A/SouthDakota/6/2007 53 | A/StPetersburg/12/2008 54 | A/HongKong/1870/2008 55 | A/HongKong/1856/2008 56 | A/Seychelles/2239/2008 57 | A/StPetersburg/5/2008 58 | A/Perth/200/2008 59 | A/HongKong/1988/2009 60 | -------------------------------------------------------------------------------- /scripts/table_to_node_data.py: -------------------------------------------------------------------------------- 1 | """Create Augur-compatible node data JSON from a pandas data frame. 2 | """ 3 | import argparse 4 | import pandas as pd 5 | from augur.utils import write_json 6 | 7 | 8 | if __name__ == "__main__": 9 | parser = argparse.ArgumentParser() 10 | parser.add_argument("--table", help="table to convert to a node data JSON") 11 | parser.add_argument("--index-column", default="strain", help="name of the column to use as an index") 12 | parser.add_argument("--delimiter", default=",", help="separator between columns in the given table") 13 | parser.add_argument("--node-name", default="nodes", help="name of the node data attribute in the JSON output") 14 | parser.add_argument("--output", help="node data JSON file") 15 | 16 | args = parser.parse_args() 17 | 18 | if args.output is not None: 19 | table = pd.read_csv( 20 | args.table, 21 | sep=args.delimiter, 22 | index_col=args.index_column, 23 | dtype=str, 24 | ) 25 | 26 | # # Convert columns that aren't strain names or labels to floats. 27 | # for column in table.columns: 28 | # if column != "strain" and not "label" in column: 29 | # table[column] = table[column].astype(float) 30 | 31 | table_dict = table.transpose().to_dict() 32 | write_json({args.node_name: table_dict}, args.output) 33 | -------------------------------------------------------------------------------- /config/distance_maps/h3n2/ha/luksza.json: -------------------------------------------------------------------------------- 1 | { 2 | "default": 0, 3 | "map": { 4 | "HA1": { 5 | "50": 1, 6 | "53": 1, 7 | "54": 1, 8 | "121": 1, 9 | "122": 1, 10 | "124": 1, 11 | "126": 1, 12 | "131": 1, 13 | "133": 1, 14 | "135": 1, 15 | "137": 1, 16 | "142": 1, 17 | "143": 1, 18 | "144": 1, 19 | "145": 1, 20 | "146": 1, 21 | "155": 1, 22 | "156": 1, 23 | "157": 1, 24 | "158": 1, 25 | "159": 1, 26 | "160": 1, 27 | "163": 1, 28 | "164": 1, 29 | "172": 1, 30 | "173": 1, 31 | "174": 1, 32 | "186": 1, 33 | "188": 1, 34 | "189": 1, 35 | "190": 1, 36 | "192": 1, 37 | "193": 1, 38 | "196": 1, 39 | "197": 1, 40 | "201": 1, 41 | "207": 1, 42 | "213": 1, 43 | "217": 1, 44 | "226": 1, 45 | "227": 1, 46 | "242": 1, 47 | "244": 1, 48 | "248": 1, 49 | "275": 1, 50 | "276": 1, 51 | "278": 1, 52 | "299": 1, 53 | "307": 1 54 | } 55 | }, 56 | "name": "luksza" 57 | } -------------------------------------------------------------------------------- /scripts/xls2csv.py: -------------------------------------------------------------------------------- 1 | """Minimal script to convert Excel XLS format to CSV. 2 | """ 3 | import argparse 4 | import csv 5 | import xlrd 6 | 7 | 8 | if __name__ == '__main__': 9 | parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) 10 | parser.add_argument("--xls", required=True, help="path to XLS file to convert") 11 | parser.add_argument("--output", required=True, help="path to CSV output") 12 | 13 | args = parser.parse_args() 14 | 15 | workbook = xlrd.open_workbook(args.xls) 16 | sheet = workbook.sheet_by_index(0) 17 | field_names = [field.value for field in sheet.row(0)] 18 | 19 | with open(args.output, "w", encoding="utf-8") as csvfile: 20 | writer = csv.writer(csvfile, dialect="unix") 21 | writer.writerow(field_names) 22 | 23 | for row_index in range(1, sheet.nrows): 24 | row = [] 25 | for field in sheet.row(row_index): 26 | value = field.value 27 | if isinstance(value, str): 28 | # Handle the case where cells can contain newline-delimited 29 | # values which will appear as new lines in our CSV output 30 | # unless we remove those characters. For example, see H3N2 31 | # record EPI_ISL_18856352. 32 | value = value.replace("\n", "") 33 | 34 | row.append(value) 35 | 36 | writer.writerow(row) 37 | -------------------------------------------------------------------------------- /config/distance_maps/h1n1pdm/ha/canton.json: -------------------------------------------------------------------------------- 1 | { 2 | "default": 0, 3 | "map": { 4 | "HA1": { 5 | "124": 1, 6 | "125": 1, 7 | "137": 1, 8 | "138": 1, 9 | "139": 1, 10 | "140": 1, 11 | "141": 1, 12 | "142": 1, 13 | "153": 1, 14 | "154": 1, 15 | "155": 1, 16 | "156": 1, 17 | "157": 1, 18 | "159": 1, 19 | "160": 1, 20 | "161": 1, 21 | "162": 1, 22 | "163": 1, 23 | "164": 1, 24 | "166": 1, 25 | "167": 1, 26 | "168": 1, 27 | "169": 1, 28 | "170": 1, 29 | "184": 1, 30 | "185": 1, 31 | "186": 1, 32 | "187": 1, 33 | "188": 1, 34 | "189": 1, 35 | "190": 1, 36 | "191": 1, 37 | "192": 1, 38 | "193": 1, 39 | "194": 1, 40 | "195": 1, 41 | "203": 1, 42 | "204": 1, 43 | "205": 1, 44 | "221": 1, 45 | "222": 1, 46 | "235": 1, 47 | "236": 1, 48 | "237": 1, 49 | "70": 1, 50 | "71": 1, 51 | "72": 1, 52 | "73": 1, 53 | "74": 1, 54 | "75": 1 55 | } 56 | }, 57 | "name": "canton" 58 | } -------------------------------------------------------------------------------- /config/nextstrain_clades_h1n1pdm_ha.tsv: -------------------------------------------------------------------------------- 1 | clade gene site alt 2 | 2 HA1 31 D 3 | 2 HA1 162 N 4 | 2 HA1 186 T 5 | 2 nuc 1100 A 6 | 3 HA1 183 P 7 | 3 HA1 134 T 8 | 3 nuc 1484 T 9 | 4 HA1 125 D 10 | 4 nuc 1301 A 11 | 4 HA2 47 K 12 | 4 HA1 283 K 13 | 6 HA1 97 N 14 | 6 HA2 124 N 15 | 6 HA1 185 T 16 | 7 HA1 143 G 17 | 7 nuc 164 T 18 | 7 nuc 1457 T 19 | 7 nuc 1676 T 20 | 8 SigPep 15 T 21 | 8 HA1 186 T 22 | 8 HA1 272 A 23 | 8 HA2 146 D 24 | 8 HA2 147 K 25 | 6b HA1 163 Q 26 | 6b HA1 256 T 27 | 6b nuc 1673 C 28 | 6c HA1 234 I 29 | 6c nuc 50 C 30 | 6c HA1 283 E 31 | 6c HA2 172 K 32 | 6b1 HA1 162 N 33 | 6b1 HA1 216 T 34 | 6b1 HA1 84 N 35 | 6b1 SigPep 13 T 36 | 6b2 SigPep 13 T 37 | 6b2 HA1 152 V 38 | 6b2 HA2 174 E 39 | 6b2 HA1 173 I 40 | 6b1.A HA1 164 T 41 | 6b1.A nuc 1163 T 42 | 6b1.A nuc 1271 T 43 | A1 nuc 1703 A 44 | A1 nuc 384 C 45 | A1 nuc 618 C 46 | A1 nuc 1010 G 47 | A2 nuc 49 G 48 | A2 nuc 618 C 49 | A2 nuc 704 A 50 | A2 nuc 1460 A 51 | A3 nuc 425 A 52 | A3 nuc 618 C 53 | A3 nuc 1631 C 54 | A4 nuc 410 A 55 | A4 nuc 456 G 56 | A4 nuc 493 A 57 | A4 nuc 618 C 58 | A4 nuc 1205 G 59 | A4 nuc 1577 G 60 | A5 nuc 618 C 61 | A5 nuc 1364 C 62 | A5 nuc 1487 C 63 | A5a HA1 129 D 64 | A5a HA1 185 I 65 | A5b HA1 160 M 66 | A6 nuc 209 A 67 | A6 nuc 536 G 68 | A6 nuc 618 C 69 | A6 nuc 1403 T 70 | A7 nuc 317 G 71 | A7 nuc 976 C 72 | A7 nuc 1558 G 73 | A5a.1 HA1 187 A 74 | A5a.1 HA1 189 E 75 | A5a.2 HA1 156 K 76 | A5a.2 HA1 161 I -------------------------------------------------------------------------------- /nextclade/config/auspice_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "title": "Nextclade reference dataset for seasonal influenza viruses", 3 | "build_url": "https://github.com/nextstrain/seasonal-flu", 4 | "maintainers": [ 5 | { "name": "Nextstrain team", "url": "https://nextstrain.org" } 6 | ], 7 | "extensions": { 8 | "nextclade": { 9 | } 10 | }, 11 | "data_provenance": [ 12 | { 13 | "name": "GISAID" 14 | } 15 | ], 16 | "colorings": [ 17 | { 18 | "key": "country", 19 | "title": "Country", 20 | "type": "categorical" 21 | }, 22 | { 23 | "key": "legacy-clade", 24 | "title": "Legacy clade", 25 | "type": "categorical" 26 | }, 27 | { 28 | "key": "proposedSubclade", 29 | "title": "Subclade proposals", 30 | "type": "categorical" 31 | }, 32 | { 33 | "key": "region", 34 | "title": "Region", 35 | "type": "categorical" 36 | }, 37 | { 38 | "key": "date", 39 | "title": "Sample Date", 40 | "type": "ordinal" 41 | }, 42 | { 43 | "key": "EPI_ISL", 44 | "title": "EPI_ISL", 45 | "type": "categorical" 46 | } 47 | ], 48 | "filters": [ 49 | "region", 50 | "country", 51 | "clade_membership", 52 | "subclade" 53 | ], 54 | "display_defaults": { 55 | "color_by": "clade_membership", 56 | "distance_measure": "div", 57 | "branch_label": "clade" 58 | }, 59 | "panels": ["tree","entropy"] 60 | } 61 | -------------------------------------------------------------------------------- /ingest/vendored/cloudfront-invalidate: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Originally from @tsibley's gist: https://gist.github.com/tsibley/a66262d341dedbea39b02f27e2837ea8 3 | set -euo pipefail 4 | 5 | main() { 6 | local domain="$1" 7 | shift 8 | local paths=("$@") 9 | local distribution invalidation 10 | 11 | echo "-> Finding CloudFront distribution" 12 | distribution=$( 13 | aws cloudfront list-distributions \ 14 | --query "DistributionList.Items[?contains(Aliases.Items, \`$domain\`)] | [0].Id" \ 15 | --output text 16 | ) 17 | 18 | if [[ -z $distribution || $distribution == None ]]; then 19 | exec >&2 20 | echo "Unable to find CloudFront distribution id for $domain" 21 | echo 22 | echo "Are your AWS CLI credentials for the right account?" 23 | exit 1 24 | fi 25 | 26 | echo "-> Creating CloudFront invalidation for distribution $distribution" 27 | invalidation=$( 28 | aws cloudfront create-invalidation \ 29 | --distribution-id "$distribution" \ 30 | --paths "${paths[@]}" \ 31 | --query Invalidation.Id \ 32 | --output text 33 | ) 34 | 35 | echo "-> Waiting for CloudFront invalidation $invalidation to complete" 36 | echo " Ctrl-C to stop waiting." 37 | aws cloudfront wait invalidation-completed \ 38 | --distribution-id "$distribution" \ 39 | --id "$invalidation" 40 | } 41 | 42 | main "$@" 43 | -------------------------------------------------------------------------------- /nextclade/dataset_config/h3n2/na/EPI1857215/reference.fasta: -------------------------------------------------------------------------------- 1 | >EPI1857215 2 | AGTAAAGATGAATCCAAATCAAAAGATAATAACGATTGGCTCTGTTTCTCTCACAATTTCCACAATATGCTTCTTCATGCAAATTGCCATCCTGATAACTACTGTAACATTGCATTTCAAGCAATATGAATTCAACTCCCCCCCAAATAACCAAGTGATGCTGTGTGAACCAACAATAATAGAAAGAAACATAACAGAGATAGTGTATTTGACCAACACCACCATAGAGAAGGAAATATGCCCCAAACCAGCAGAATACAGAAATTGGTCAAAACCGCAATGTGGCATTACAGGATTTGCACCTTTCTCTAAGGACAATTCGATTAGGCTTTCCGCTGGTGGGGACATCTGGGTGACAAGAGAACCTTATGTGTCATGCGATCTTGACAAGTGTTATCAATTTGCCCTTGGACAGGGAACAACACTAAACAATGTGCATTCAAATAACACAGTACGTGATAGAACCCCTTATCGGACTCTATTGATGAATGAGTTGGGTGTTCCTTTCCATCTGGGGACCAAGCAAGTGTGCATAGCATGGTCCAGCTCAAGTTGTCACGATGGAAAAGCATGGCTGCATGTTTGTATAACGGGGGATGATAAAAATGCAACTGCTAGCTTCATTTACAATGGGAGGCTTGTAGATAGTGTTGTTTCATGGTCCAACGATATTCTCAGAACCCAGGAGTCAGAATGCGTTTGTATCAATGGAACTTGTACAGTAGTAATGACTGATGGAAATGCTACAGGAAAAGCTGATACTAAAATACTATTCATTGAGGAGGGGAAAATCGTTCATACTAGCAAATTGTCAGGAAGTGCTCAGCATGTCGAAGAGTGCTCTTGCTATCCTCGATATCCTGGTGTCAGATGTGTCTGCAGAGACAACTGGAAAGGATCCAACCGGCCCATCATAGATATAAACATAAAGGATCATAGCATTGTTTCCAGGTATGTGTGTTCTGGACTTGTTGGAGACACACCCAGAAAAAGCGACAGCTCCAGCAGTAGCCATTGTTTGAACCCTAACAATGAAAAAGGTGATCATGGAGTGAAAGGCTGGGCCTTTGATGATGGAAATGACGTGTGGATGGGGAGAACAATCAACGAGACGTCACGCTTAGGGTATGAAACCTTCAAAGTCGTTGAAGGCTGGTCCAACCCTAAGTCCAAATTGCAGATAAATAGGCAAGTCATAGTTGACAGAGGCGATAGGTCCGGTTATTCTGGTATTTTCTCTGTTGAAGGCAAAAGCTGCATCAATCGGTGCTTTTATGTGGAGTTGATTAGGGGAAGAAAAGAGGAAACTGAAGTCTTGTGGACTTCAAACAGTATTGTTGTGTTTTGTGGCACCTCAGGTACATATGGAACAGGCTCATGGCCTGATGGGGCGAACCTCAGTCTCATGCATATATAAGCTTTCGCAATTTTAGAAAAAA 3 | -------------------------------------------------------------------------------- /flu-forecasting/scripts/calculate_clade_frequency_forecasts.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import argparse 3 | import pandas as pd 4 | 5 | from augur.utils import read_node_data 6 | 7 | 8 | if __name__ == '__main__': 9 | parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) 10 | parser.add_argument("--forecasts", required=True, help="TSV of forecasts per strain") 11 | parser.add_argument("--clades", required=True, help="node data JSON of clades per strain") 12 | parser.add_argument("--output", required=True, help="TSV of forecasts per clade") 13 | 14 | args = parser.parse_args() 15 | 16 | # Load forecasts. 17 | forecasts = pd.read_csv( 18 | args.forecasts, 19 | sep="\t", 20 | usecols=("timepoint", "strain", "projected_frequency"), 21 | ) 22 | 23 | # Load clades. 24 | clades = read_node_data(args.clades) 25 | clade_by_strain = { 26 | name: data["clade_membership"] 27 | for name, data in clades["nodes"].items() 28 | } 29 | 30 | # Assign clades to strains in the forecasts table. 31 | forecasts["clade"] = forecasts["strain"].map(clade_by_strain) 32 | 33 | # Calculate projected frequency per clade. 34 | forecasts_by_clade = forecasts.groupby(["timepoint", "clade"])["projected_frequency"].sum().reset_index() 35 | 36 | # Save forecasts by clade. 37 | forecasts_by_clade.to_csv( 38 | args.output, 39 | sep="\t", 40 | header=True, 41 | index=False, 42 | ) 43 | -------------------------------------------------------------------------------- /nextclade/dataset_config/yam/ha/JN993010/README.md: -------------------------------------------------------------------------------- 1 | # Nextclade dataset for "Influenza B Yam HA" based on reference "B/Wisconsin/01/2010" (flu/yam/ha/JN993010) 2 | 3 | | Key | Value | 4 | | -------------------- | -------------------- | 5 | | authors | [Richard Neher](https://neherlab.org), [Nextstrain](https://nextstrain.org) | 6 | | name | Influenza B(Yam) HA | 7 | | reference | B/Wisconsin/01/2010 | 8 | | dataset path | flu/yam/ha/JN993010 | 9 | | reference accession | JN993010 | 10 | 11 | ## Scope of this dataset 12 | B/Yamagata viruses have not been observed since 2020. This dataset is provided for analysis of old sequences or suspected Yamagata sequences. 13 | 14 | 15 | ## Features 16 | This dataset supports 17 | 18 | * Assignment to clades 19 | * Identification of glycosilation motifs 20 | * Sequence QC 21 | * Phylogenetic placement 22 | 23 | ## Clades of seasonal influenza viruses 24 | 25 | The WHO Collaborating centers define "clades" as genetic groups of viruses with signature mutations to facilitate discussion of circulating diversity of the viruses. 26 | Clade demarcation do not always coincide with significantly different antigenic properties of the viruses. 27 | 28 | ## What is Nextclade dataset 29 | 30 | Read more about Nextclade datasets in Nextclade documentation: https://docs.nextstrain.org/projects/nextclade/en/stable/user/datasets.html 31 | -------------------------------------------------------------------------------- /ingest/vendored/trigger-on-new-data: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -euo pipefail 3 | 4 | : "${PAT_GITHUB_DISPATCH:?The PAT_GITHUB_DISPATCH environment variable is required.}" 5 | 6 | bin="$(dirname "$0")" 7 | 8 | github_repo="${1:?A GitHub repository with owner and repository name is required as the first argument.}" 9 | event_type="${2:?An event type is required as the second argument.}" 10 | metadata="${3:?A metadata upload output file is required as the third argument.}" 11 | sequences="${4:?An sequence FASTA upload output file is required as the fourth argument.}" 12 | identical_file_message="${5:-files are identical}" 13 | 14 | new_metadata=$(grep "$identical_file_message" "$metadata" >/dev/null; echo $?) 15 | new_sequences=$(grep "$identical_file_message" "$sequences" >/dev/null; echo $?) 16 | 17 | slack_message="" 18 | 19 | # grep exit status 0 for found match, 1 for no match, 2 if an error occurred 20 | if [[ $new_metadata -eq 1 || $new_sequences -eq 1 ]]; then 21 | slack_message="Triggering new builds due to updated metadata and/or sequences" 22 | "$bin"/trigger "$github_repo" "$event_type" 23 | elif [[ $new_metadata -eq 0 && $new_sequences -eq 0 ]]; then 24 | slack_message="Skipping trigger of rebuild: Both metadata TSV and sequences FASTA are identical to S3 files." 25 | else 26 | slack_message="Skipping trigger of rebuild: Unable to determine if data has been updated." 27 | fi 28 | 29 | 30 | if ! "$bin"/notify-slack "$slack_message"; then 31 | echo "Notifying Slack failed, but exiting with success anyway." 32 | fi 33 | -------------------------------------------------------------------------------- /scripts/prune_reference.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Prunes a reference strain from the provided tree. 4 | """ 5 | import argparse 6 | from augur.io import read_sequences 7 | from Bio import Phylo 8 | import shutil 9 | import sys 10 | 11 | 12 | if __name__ == '__main__': 13 | parser = argparse.ArgumentParser( 14 | description=__doc__, 15 | formatter_class=argparse.ArgumentDefaultsHelpFormatter 16 | ) 17 | parser.add_argument("--tree", help="Newick tree to prune") 18 | parser.add_argument("--reference", nargs="?", help="FASTA file for the reference used to root the tree and to prune from the input") 19 | parser.add_argument("--output", help="Output Newick tree file") 20 | 21 | args = parser.parse_args() 22 | 23 | # If reference is not provided, then just copy the input to output without modifications 24 | if not args.reference: 25 | print("WARNING: No reference was provided, copying input tree to output tree", file=sys.stdout) 26 | shutil.copy(args.tree, args.output) 27 | else: 28 | # Open the reference sequence to get the name of the reference strain. 29 | reference = next(read_sequences(args.reference)) 30 | reference_name = reference.id 31 | 32 | T = Phylo.read(args.tree, "newick") 33 | references = [c for c in T.find_clades(terminal=True) if c.name == reference_name] 34 | if references: 35 | T.root_with_outgroup(references[0]) 36 | T.prune(references[0]) 37 | 38 | Phylo.write(T, args.output, "newick") 39 | -------------------------------------------------------------------------------- /scripts/sanitize_trees.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import argparse 3 | import sys 4 | from treetime import TreeAnc 5 | from augur.utils import read_tree, InvalidTreeError 6 | import Bio.Phylo 7 | 8 | 9 | if __name__ == '__main__': 10 | parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) 11 | parser.add_argument("--trees", nargs="+", help="trees to sanitize by pruning leaves that do not appear in all trees.") 12 | parser.add_argument("--alignments", nargs="+", help="corresponding sequence alignments to remove short branches.") 13 | parser.add_argument("--output-trees", nargs="+", help="sanitized trees, one for each input tree.") 14 | args = parser.parse_args() 15 | 16 | trees = [] 17 | try: 18 | for tree_file in args.trees: 19 | tree = read_tree(tree_file) 20 | trees.append(tree) 21 | except InvalidTreeError as error: 22 | print(error, file=sys.stderr) 23 | sys.exit(1) 24 | 25 | common_leaves = set.intersection(*[set(x.name for x in tree.find_clades(terminal=True)) for tree in trees]) 26 | for output_tree_file, tree, aln in zip(args.output_trees, trees, args.alignments): 27 | for leaf in set(x.name for x in tree.find_clades(terminal=True)).difference(common_leaves): 28 | tree.prune(leaf) 29 | 30 | tt = TreeAnc(tree=tree, aln=aln) 31 | tt.infer_ancestral_sequences(infer_gtr=True) 32 | tt.prune_short_branches() 33 | tt.tree.ladderize() 34 | Bio.Phylo.write(tt.tree, output_tree_file, 'newick') 35 | -------------------------------------------------------------------------------- /ingest/scripts/dedup-by-gisaid-id: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Deduplicate records by GISAID EPI ISL. 4 | Only keeps the first record of duplicates. 5 | """ 6 | import argparse 7 | from sys import stdin 8 | from typing import Iterable 9 | from augur.io.json import dump_ndjson, load_ndjson 10 | from augur.io.print import print_err 11 | 12 | 13 | def deduplicate_records(records: Iterable[dict], 14 | id_field: str) -> Iterable: 15 | """ 16 | Deduplicate *records* by *id_field*, will only keeping the first record of 17 | duplicate ids. 18 | 19 | Yields records with unique ids. 20 | """ 21 | seen_ids = set() 22 | for index, record in enumerate(records): 23 | record_id = record.get(id_field) 24 | 25 | if record_id is None: 26 | raise Exception(f"Records must have the expected id field {id_field!r}") 27 | 28 | if record_id in seen_ids: 29 | print_err( 30 | f"Dropping record (index {index!r}) with duplicate record id {record_id!r}" 31 | ) 32 | continue 33 | 34 | seen_ids.add(record_id) 35 | yield record 36 | 37 | 38 | if __name__ == "__main__": 39 | parser = argparse.ArgumentParser(description=__doc__) 40 | 41 | parser.add_argument("--id-field", default="gisaid_epi_isl", 42 | help="The record field containing a record id. ") 43 | 44 | args = parser.parse_args() 45 | 46 | records = load_ndjson(stdin) 47 | deduped_records = deduplicate_records(records, args.id_field) 48 | dump_ndjson(deduped_records) 49 | -------------------------------------------------------------------------------- /nextclade/config/h3n2/ha/CY163680/founder_sequences_SigPep.fasta: -------------------------------------------------------------------------------- 1 | >A 2 | MKTIIALSYILCLVFA 3 | >A.2 4 | MKTIIALSYILCLVFA 5 | >A.3 6 | MKTIIALSYILCLVFA 7 | >A.3.2 8 | MKTIIALSYILCLVFA 9 | >B 10 | MKTIIALSYILCLVFA 11 | >B.1 12 | MKTIIALSYILCLVFA 13 | >B.1.1 14 | MKTIIALSYILCLVFA 15 | >B.1.2 16 | MKTIIALSYILCLVFA 17 | >B.1.2.1 18 | MKTIIALSYILCLVFA 19 | >B.1.2.1.1 20 | MKTIIALSYILCLVFA 21 | >B.2 22 | MKTIIALSYILCLVFA 23 | >B.3 24 | MKTIIALSYILCLVFA 25 | >B.4 26 | MKTIIALSYILCLVFA 27 | >C 28 | MKTIIALSYILCLVFA 29 | >C.1 30 | MKTIIALSCILCLVFA 31 | >D 32 | MKTIIALSYILCLVFT 33 | >E 34 | MKTIIALSYILCLVFA 35 | >E.1 36 | MKTIIALSYILCLVFA 37 | >E.2 38 | MKTIIALSYILCLVFA 39 | >F 40 | MKTIIALSYILCLVFA 41 | >F.1 42 | MKTIIALSYILCLVFA 43 | >F.1.1 44 | MKTIIALSYTLCLVFA 45 | >G 46 | MKTIIALSYILCLVFA 47 | >G.1 48 | MKTIIALSNILCLVFA 49 | >G.1.1 50 | MKTIIALSNILCLVFA 51 | >G.1.1.1 52 | MKTIIALSNILCLVFA 53 | >G.1.1.2 54 | MKTIIALSNILCLVFA 55 | >G.1.2 56 | MKTIIALSNILCLVFA 57 | >G.1.3 58 | MKTIIALSNILCLVFA 59 | >G.1.3.1 60 | MKTIIALSNILCLVFA 61 | >G.1.3.2 62 | MKTIIALSNILCLVFA 63 | >G.2 64 | MKTIIALSNILCLVFA 65 | >G.2.1 66 | MKTIIALSNILCLVFA 67 | >G.2.2 68 | MKTIIALSNILCLVFA 69 | >G.3 70 | MKTIIALSYILCLVFA 71 | >G.4 72 | MKTIIALSYILCLVFA 73 | >J 74 | MKAIIALSNILCLVFA 75 | >J.1 76 | MKAIIALSNILCLVFA 77 | >J.1.1 78 | MKAIIALSNILCLVFA 79 | >J.2 80 | MKAIIALSNILCLVFA 81 | >J.2.1 82 | MKAIIALSNILCLVFA 83 | >J.2.2 84 | MKAIIALSNILCLVFA 85 | >J.2.3 86 | MKAIIALSNILCLVFA 87 | >J.2.4 88 | MKAIIALSNILCLVFA 89 | >J.2.5 90 | MKAIIALSNILCLVFA 91 | >J.3 92 | MKAIIALSNILCLVFA 93 | >J.4 94 | MKTIIALSNILCLVFA 95 | >K 96 | MKAIIALSNILCLVFA 97 | -------------------------------------------------------------------------------- /nextclade/config/h3n2/ha/EPI1857216/founder_sequences_SigPep.fasta: -------------------------------------------------------------------------------- 1 | >A 2 | MKTIIALSYILCLVFA 3 | >A.2 4 | MKTIIALSYILCLVFA 5 | >A.3 6 | MKTIIALSYILCLVFA 7 | >A.3.2 8 | MKTIIALSYILCLVFA 9 | >B 10 | MKTIIALSYILCLVFA 11 | >B.1 12 | MKTIIALSYILCLVFA 13 | >B.1.1 14 | MKTIIALSYILCLVFA 15 | >B.1.2 16 | MKTIIALSYILCLVFA 17 | >B.1.2.1 18 | MKTIIALSYILCLVFA 19 | >B.1.2.1.1 20 | MKTIIALSYILCLVFA 21 | >B.2 22 | MKTIIALSYILCLVFA 23 | >B.3 24 | MKTIIALSYILCLVFA 25 | >B.4 26 | MKTIIALSYILCLVFA 27 | >C 28 | MKTIIALSYILCLVFA 29 | >C.1 30 | MKTIIALSCILCLVFA 31 | >D 32 | MKTIIALSYILCLVFT 33 | >E 34 | MKTIIALSYILCLVFA 35 | >E.1 36 | MKTIIALSYILCLVFA 37 | >E.2 38 | MKTIIALSYILCLVFA 39 | >F 40 | MKTIIALSYILCLVFA 41 | >F.1 42 | MKTIIALSYILCLVFA 43 | >F.1.1 44 | MKTIIALSYTLCLVFA 45 | >G 46 | MKTIIALSYILCLVFA 47 | >G.1 48 | MKTIIALSNILCLVFA 49 | >G.1.1 50 | MKTIIALSNILCLVFA 51 | >G.1.1.1 52 | MKTIIALSNILCLVFA 53 | >G.1.1.2 54 | MKTIIALSNILCLVFA 55 | >G.1.2 56 | MKTIIALSNILCLVFA 57 | >G.1.3 58 | MKTIIALSNILCLVFA 59 | >G.1.3.1 60 | MKTIIALSNILCLVFA 61 | >G.1.3.2 62 | MKTIIALSNILCLVFA 63 | >G.2 64 | MKTIIALSNILCLVFA 65 | >G.2.1 66 | MKTIIALSNILCLVFA 67 | >G.2.2 68 | MKTIIALSNILCLVFA 69 | >G.3 70 | MKTIIALSYILCLVFA 71 | >G.4 72 | MKTIIALSYILCLVFA 73 | >J 74 | MKAIIALSNILCLVFA 75 | >J.1 76 | MKAIIALSNILCLVFA 77 | >J.1.1 78 | MKAIIALSNILCLVFA 79 | >J.2 80 | MKAIIALSNILCLVFA 81 | >J.2.1 82 | MKAIIALSNILCLVFA 83 | >J.2.2 84 | MKAIIALSNILCLVFA 85 | >J.2.3 86 | MKAIIALSNILCLVFA 87 | >J.2.4 88 | MKAIIALSNILCLVFA 89 | >J.2.5 90 | MKAIIALSNILCLVFA 91 | >J.3 92 | MKAIIALSNILCLVFA 93 | >J.4 94 | MKTIIALSNILCLVFA 95 | >K 96 | MKAIIALSNILCLVFA 97 | -------------------------------------------------------------------------------- /nextclade/dataset_config/vic/pa/pathogen.json: -------------------------------------------------------------------------------- 1 | { 2 | "shortcuts": [ 3 | "flu_vic_ha", 4 | "nextstrain/flu/vic/pa", 5 | "nextstrain/flu/vic/pa/brisbane-60-2008" 6 | ], 7 | "phenotypeData":[ 8 | { 9 | "name": "PAI", 10 | "nameFriendly": "PAI", 11 | "description": "This column displays a score associated with reduced susceptibility to polymerase inhibitors. The score is a weighted sum of known substitutions (x1) or major substitutions (x2), while substitutions at positions that confer resistance but are to amino acids not previously described to confer resistance count x0.5.", 12 | "cds": "PA", 13 | "aaRange": { 14 | "begin":0, 15 | "end": 460 16 | }, 17 | "data": [ 18 | { 19 | "name": "PAI_markers", 20 | "weight": 1, 21 | "locations": { 22 | "22": {"K":1, "default":0.5}, 23 | "33": {"I":1, "default":0.5}, 24 | "35": {"V":1, "default":0.5}, 25 | "37": {"F":1, "M":1, "T":2, "V":1, "default":0.5}, 26 | "119": {"D":1, "default":0.5}, 27 | "198": {"R":1, "default":0.5} 28 | } 29 | } 30 | ] 31 | } 32 | ], 33 | "mutLabels": { 34 | "aaMutLabelMap": { 35 | "PA:23K":["Baloxavir"], 36 | "PA:34I":["Baloxavir"], 37 | "PA:36V":["Baloxavir"], 38 | "PA:38F":["Baloxavir"], 39 | "PA:38M":["Baloxavir"], 40 | "PA:38T":["Baloxavir"], 41 | "PA:38V":["Baloxavir"], 42 | "PA:120D":["Baloxavir"], 43 | "PA:199R":["Baloxavir"] 44 | } 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /nextclade/dataset_config/vic/na/CY073894/reference.fasta: -------------------------------------------------------------------------------- 1 | >CY073894.1 Influenza B virus (B/Brisbane/60/2008) segment 6 sequence 2 | ATGCTACCTTCAACTATACAAACGTTAACCCTATTTCTCACATCAGGGGGAGTATTATTA 3 | TCACTATATGTGTCAGCTTCATTATCATACTTACTATATTCGGATATATTGCTAAAATTC 4 | TCACCAACAGAAATAACTGCACCAACAATGCCATTGGATTGTGCAAACGCATCAAATGTT 5 | CAGGCTGTGAACCGTTCTGCAACAAAAGGGGTGACACTTCTTCTCCCAGAACCGGAGTGG 6 | ACATACCCGCGTTTATCTTGCCCGGGCTCAACCTTTCAGAAAGCACTCCTAATTAGCCCT 7 | CATAGATTCGGAGAAACCAAAGGAAACTCAGCTCCCTTGATAATAAGGGAACCTTTTATT 8 | GCTTGTGGACCAAATGAATGCAAACACTTTGCTCTAACCCATTATGCAGCCCAACCAGGG 9 | GGATACTACAATGGAACAAGAGGAGACAGAAACAAGCTGAGGCATCTAATTTCAGTCAAA 10 | TTGGGCAAAATCCCAACAGTAGAAAACTCCATTTTCCACATGGCAGCATGGAGCGGGTCC 11 | GCGTGCCATGATGGTAAGGAATGGACATATATCGGAGTTGATGGCCCTGACAATAATGCA 12 | TTGCTCAAAGTAAAATATGGAGAAGCATATACTGACACATACCATTCCTATGCAAACAAA 13 | ATCCTAAGAACACAAGAAAGTGCCTGCAATTGCATCGGGGGAAATTGTTATCTTATGATA 14 | ACTGATGGCTCAGCTTCAGGTGTTAGTGAATGCAGATTTCTTAAGATTCGAGAGGGCCGA 15 | ATAATAAAAGAAATATTTCCAACAGGAAGAGTAAAACACACTGAGGAATGCACATGCGGA 16 | TTTGCCAGCAATAAAACCATAGAATGTGCCTGTAGAGATAACAGTTACACAGCAAAAAGA 17 | CCTTTTGTCAAATTAAACGTGGAGACTGATACAGCAGAAATAAGATTGATGTGCACAGAT 18 | ACTTATTTGGACACCCCCAGACCAAACGATGGAAGCATAACAGGCCCTTGTGAATCTAAT 19 | GGGGACAAAGGGAGTGGAGGCATCAAGGGAGGATTTGTTCATCAAAGAATGGAATCCAAG 20 | ATTGGAAGGTGGTACTCTCGAACGATGTCTAAAACTGAAAGGATGGGGATGGGACTGTAT 21 | GTCAAGTATGATGGAGACCCATGGGCTGACAGTGATGCCCTAGCTTTTAGTGGAGTAATG 22 | GTTTCAATGAAAGAACCTGGTTGGTACTCCTTTGGCTTCGAAATAAAAGATAAGAAATGC 23 | GATGTCCCCTGTATTGGGATAGAGATGGTACATGATGGTGGAAAAGAGACTTGGCACTCA 24 | GCAGCAACAGCCATTTACTGTTTAATGGGCTCAGGACAGCTGCTGTGGGACACTGTCACA 25 | GGTGTTGACATGGCTCTGTAA 26 | -------------------------------------------------------------------------------- /config/h3n2/na/reference.fasta: -------------------------------------------------------------------------------- 1 | >CY114383 Organism:Influenza A virus|Strain Name:A/Wisconsin/67/2005|Segment:6|Subtype:H3N2 2 | AAGATGAATCCAAATCAAAAGATAATAACGATTGGCTCTGTTTCTCTCACCATTTCCACAATATGCTTCT 3 | TCATGCAAATTGCCATCTTGATAACTACTGTAACATTGCATTTCAAGCAATATGAATTCAACTCCCCCCC 4 | AAACAACCAAGTGATGCTGTGTGAACCAACAATAATAGAAAGAAACATAACAGAGATAGTGTATCTGACC 5 | AACACCACCATAGAGAAGGAAATATGCCCCAAACTAGCAGAATACAGAAATTGGTCAAAGCCGCAATGTA 6 | ACATTACAGGATTTGCACCTTTTTCTAAGGACAATTCGATTAGGCTTTCCGCTGGTGGGGACATCTGGGT 7 | GACAAGAGAACCTTATGTGTCATGCGATCCTGACAAGTGTTATCAATTTGCCCTTGGGCAGGGAACAACA 8 | CTAAACAACGTGCATTCAAATGACACAGTACATGATAGGACCCCTTATCGGACCCTATTGATGAATGAGT 9 | TAGGTGTTCCATTTCATCTGGGGACCAAGCAAGTGTGCATAGCATGGTCCAGCTCAAGTTGTCACGATGG 10 | AAAAGCATGGCTGCATGTTTGTGTAACGGGGGATGATAAAAATGCAACTGCTAGCTTCATTTACAATGGG 11 | AGGCTTGTAGATAGTATTGTTTCATGGTCCAAAGAAATCCTCAGGACCCAGGAGTCAGAATGCGTTTGTA 12 | TCAATGGAACTTGTACAGTAGTAATGACTGATGGGAGTGCTTCAGGAAAAGCTGATACTAAAATACTATT 13 | CATTGAGGAGGGGAAAATCGTTCATACTAGCACATTGTCAGGAAGTGCTCAGCATGTCGAGGAGTGCTCC 14 | TGCTATCCTCGATATCTTGGTGTCAGATGTGTCTGCAGAGACAACTGGAAAGGCTCCAATAGGCCCATAG 15 | TAGATATAAACATAAAGGATTATAGCATTGTTTCCAGTTATGTGTGCTCAGGACTTGTTGGAGACACACC 16 | CAGAAAAAACGACAGCTCCAGCAGTAGCCATTGCTTGGATCCTAACAATGAAGAAGGTGGTCATGGAGTG 17 | AAAGGCTGGGCCTTTGATGATGGAAATGACGTGTGGATGGGAAGAACGATCAGCGAGAAGTTACGCTCAG 18 | GATATGAAACCTTCAAAGTCATTGAAGGCTGGTCCAACCCTAATTCCAAATTGCAGATAAATAGGCAAGT 19 | CATAGTTGACAGAGGTAATAGGTCCGGTTATTCTGGTATTTTCTCTGTTGAAGGCAAAAGCTGCATCAAT 20 | CGGTGCTTTTATGTGGAGTTGATAAGGGGAAGAAAAGAGGAAACTGAAGTCTTGTGGACCTCAAACAGTA 21 | TTGTTGTGTTTTGTGGCACCTCAGGTACATATGGAACAGGCTCATGGCCTGATGGGGCGGACATCAATCT 22 | CATGCCTATATAAGCTTTCGCAATTTTAGAAAAAAC -------------------------------------------------------------------------------- /.github/workflows/run-private-nextflu-builds.yaml: -------------------------------------------------------------------------------- 1 | name: Run the private Nextflu builds 2 | 3 | on: 4 | schedule: 5 | # Scheduled to run at 5pm UTC (9am PST/10am PDT) on the first Friday of the month 6 | # cron hack based on 7 | - cron: '0 17 */100,1-7 * FRI' 8 | 9 | workflow_dispatch: 10 | inputs: 11 | dockerImage: 12 | description: "Specific container image to use for build (will override the default of `nextstrain build`)" 13 | required: false 14 | type: string 15 | 16 | jobs: 17 | run-build: 18 | permissions: 19 | id-token: write 20 | uses: nextstrain/.github/.github/workflows/pathogen-repo-build.yaml@master 21 | secrets: inherit 22 | with: 23 | runtime: aws-batch 24 | env: | 25 | NEXTSTRAIN_DOCKER_IMAGE: ${{ inputs.dockerImage }} 26 | run: | 27 | nextstrain build \ 28 | --detach \ 29 | --cpus 36 \ 30 | --memory 72gib \ 31 | . \ 32 | all_who \ 33 | -p \ 34 | --configfile profiles/private.nextflu.org.yaml 35 | 36 | deploy-private-nextflu: 37 | needs: [run-build] 38 | runs-on: ubuntu-latest 39 | steps: 40 | - name: Trigger deploy-private-nextflu 41 | run: | 42 | gh workflow run \ 43 | deploy-private-nextflu.yaml \ 44 | --repo nextstrain/seasonal-flu \ 45 | -f aws_batch_job_id=${{ needs.run-build.outputs.aws-batch-job-id }} \ 46 | -f deploy_to_staging=false 47 | env: 48 | GITHUB_TOKEN: ${{ github.token }} 49 | -------------------------------------------------------------------------------- /config/vic/outliers.txt: -------------------------------------------------------------------------------- 1 | A/Malaysia/438/2016 2 | B/Alagoas/4386/2023 3 | B/Auckland/1/2008 4 | B/Bangkok/SI17/2012 5 | B/Bangkok/SI58/2012 6 | B/Bari/53/2023 7 | B/Brisbane/14/2016 8 | B/Brisbine/33/2008 9 | B/California/87/2017-egg 10 | B/Cambodia/26/2011 11 | B/Cambodia/30/2011 12 | B/Cambodia/62/2011 13 | B/Cambodia/89/2011 14 | B/Cambodia/V1005378/2011 15 | B/Darwin/14/2011 16 | B/Guangdong-Jinping/8139/2022 17 | B/Guizhou-Bijiang/1344/2022 18 | B/Hunan-Wuling/11182/2021 19 | B/Jiangsu-Suzhou/7-1123A11/2021 20 | B/Jiangsu-Suzhou/7-1123A14/2021 21 | B/Kol/2024/2008 22 | B/Kolkata/1373/2008 23 | B/Kolkata/2024/2008 24 | B/Kolkata/372/2010 25 | B/Krabi/FS002/2022 26 | B/Lisbon/niSU182_17-18/2018 27 | B/Malaysia/RP0995/2020 28 | B/Moscow/137-90V/2021 29 | B/NakhonPhanom/P3759/2022 30 | B/Netherlands/76/2014 31 | B/Netherlands/883/2016 32 | B/NewCaledonia/119/2015 33 | B/Novosibirsk/RII-26903S/2020 34 | B/Philippines/28/2019 35 | B/RheinlandPfalz/43/2016 36 | B/Shanghai-Qingpu/1539/2021 37 | B/SouthAustralia/81/2012 38 | B/Stockholm/7/2011 39 | B/SuratThani/P4519/2022 40 | B/Sydney/6/2016 41 | B/Thailand/CU-B11637/2015 42 | B/Togo/LNG/419/2013 43 | B/Virginia/20/2018 44 | B/Xinjiang-Tianshan/31/2021 45 | B/Zhejiang-Lanxi/320/2019 46 | B/Zhejiang-Linhai/11097/2019 47 | B/Zhejiang-Wuxin/1211/2021 48 | B/Zhejiang-Yiwu/1101/2021 49 | B/Zhejiang-Yiwu/1104/2021 50 | B/Zhejiang-Yiwu/1168/2020 51 | B/Zhejiang-Yiwu/1226/2021 52 | B/Zhejiang-Yongkang/1274/2021 53 | B/Ukraine/2194/2024 54 | B/Ukraine/2308/2024 55 | B/Ukraine/2309/2024 56 | B/Ukraine/2270/2024 57 | B/Ukraine/2276/2024 58 | B/Ukraine/2417/2024 59 | B/Ukraine/2318/2024 60 | B/Ukraine/2319/2024 61 | -------------------------------------------------------------------------------- /scripts/import_tip_clades.py: -------------------------------------------------------------------------------- 1 | """ 2 | Take clades.json file that lists: 3 | { 4 | "nodes": { 5 | "A/AbuDhabi/16/2017": { 6 | "clade_membership": "A1b/135N" 7 | }, 8 | ... 9 | and creates a new file that has internal nodes 'clade_membership' set to 'unassigned'. 10 | """ 11 | 12 | import argparse 13 | import Bio 14 | import Bio.Phylo 15 | import json 16 | 17 | 18 | if __name__ == '__main__': 19 | parser = argparse.ArgumentParser( 20 | description="Import clade membership", 21 | formatter_class=argparse.ArgumentDefaultsHelpFormatter 22 | ) 23 | parser.add_argument("--tree", required=True, help="Newick tree originally used to assign tips to clades") 24 | parser.add_argument("--clades", required=True, help="JSON file with clade memberships") 25 | parser.add_argument("--output", required=True, help="JSON file with scrubbed clade memberships") 26 | args = parser.parse_args() 27 | 28 | tree = Bio.Phylo.read(args.tree, 'newick') 29 | 30 | with open(args.clades) as infile: 31 | json_data = json.load(infile) 32 | 33 | scrubbed_json_data = {'nodes':{}} 34 | 35 | # Copy clade membership for tips to a new JSON and omit internal nodes from 36 | # the original tree that was used to assign tips to clades. 37 | for node in tree.find_clades(): 38 | if node.is_terminal(): 39 | clade_membership = json_data['nodes'][node.name]['clade_membership'] 40 | scrubbed_json_data['nodes'][node.name] = {'clade_membership': clade_membership} 41 | 42 | with open(args.output, 'w') as outfile: 43 | json.dump(scrubbed_json_data, outfile, indent=1, sort_keys=True) 44 | -------------------------------------------------------------------------------- /config/h1n1pdm/na/reference.fasta: -------------------------------------------------------------------------------- 1 | >CY121682.1 Influenza A virus (A/California/07/2009(H1N1)) neuraminidase (NA) gene, complete cds 2 | AGTTTAAAATGAATCCAAACCAAAAGATAATAACCATTGGTTCGGTCTGTATGACAATTG 3 | GAATGGCTAACTTAATATTACAAATTGGAAACATAATCTCAATATGGATTAGCCACTCAA 4 | TTCAACTTGGGAATCAAAATCAGATTGAAACATGCAATCAAAGCGTCATTACTTATGAAA 5 | ACAACACTTGGGTAAATCAGACATATGTTAACATCAGCAACACCAACTTTGCTGCTGGAC 6 | AGTCAGTGGTTTCCGTGAAATTAGCGGGCAATTCCTCTCTCTGCCCTGTTAGTGGATGGG 7 | CTATATACAGTAAAGACAACAGTGTAAGAATCGGTTCCAAGGGGGATGTGTTTGTCATAA 8 | GGGAACCATTCATATCATGCTCCCCCTTGGAATGCAGAACCTTCTTCTTGACTCAAGGGG 9 | CCTTGCTAAATGACAAACATTCCAATGGAACCATTAAAGACAGGAGCCCATATCGAACCC 10 | TAATGAGCTGTCCTATTGGTGAAGTTCCCTCTCCATACAACTCAAGATTTGAGTCAGTCG 11 | CTTGGTCAGCAAGTGCTTGTCATGATGGCATCAATTGGCTAACAATTGGAATTTCTGGCC 12 | CAGACAATGGGGCAGTGGCTGTGTTAAAGTACAACGGCATAATAACAGACACTATCAAGA 13 | GTTGGAGAAACAATATATTGAGAACACAAGAGTCTGAATGTGCATGTGTAAATGGTTCTT 14 | GCTTTACTGTAATGACCGATGGACCAAGTAATGGACAGGCCTCATACAAGATCTTCAGAA 15 | TAGAAAAGGGAAAGATAGTCAAATCAGTCGAAATGAATGCCCCTAATTATCACTATGAGG 16 | AATGCTCCTGTTATCCTGATTCTAGTGAAATCACATGTGTGTGCAGGGATAACTGGCATG 17 | GCTCGAATCGACCGTGGGTGTCTTTCAACCAGAATCTGGAATATCAGATAGGATACATAT 18 | GCAGTGGGATTTTCGGAGACAATCCACGCCCTAATGATAAGACAGGCAGTTGTGGTCCAG 19 | TATCGTCTAATGGAGCAAATGGAGTAAAAGGGTTTTCATTCAAATACGGCAATGGTGTTT 20 | GGATAGGGAGAACTAAAAGCATTAGTTCAAGAAACGGTTTTGAGATGATTTGGGATCCGA 21 | ACGGATGGACTGGGACAGACAATAACTTCTCAATAAAGCAAGATATCGTAGGAATAAATG 22 | AGTGGTCAGGATATAGCGGGAGTTTTGTTCAGCATCCAGAACTAACAGGGCTGGATTGTA 23 | TAAGACCTTGCTTCTGGGTTGAACTAATCAGAGGGCGACCCAAAGAGAACACAATCTGGA 24 | CTAGCGGGAGCAGCATATCCTTTTGTGGTGTAAACAGTGACACTGTGGGTTGGTCTTGGC 25 | CAGACGGTGCTGAGTTGCCATTTACCATTGACAAGTAATTTGTTCAAAAAAC 26 | -------------------------------------------------------------------------------- /nextclade/dataset_config/h1n1pdm/na/MW626056/reference.fasta: -------------------------------------------------------------------------------- 1 | >MW626056.1 Influenza A virus (A/Wisconsin/588/2019(H1N1)) segment 6 neuraminidase (NA) gene, complete cds 2 | AGTTTAAAATGAATCCAAACCAAAAGATAATAACCATTGGTTCTATCTGTATGACAATTG 3 | GAACGGCTAACTTAATATTACAAATTGGAAACATAATCTCAATATGGGTTAGCCACTCAA 4 | TTCAAATTGGAAATCAAAGCCAGATTGAAACATGCAATAAAAGCGTCATTACTTATGAAA 5 | ACAACACTTGGGTAAATCAGACATTTGTTAACATCAGCAACACTAACTCTGCTGCTAGAC 6 | AGTCAGTGGCTTCCGTGAAATTAGCGGGCAATTCCTCTCTCTGCCCTGTTAGTGGATGGG 7 | CTATATACAGTAAAGACAACAGTGTAAGAATCGGTTCCAAGGGGGATGTGTTTGTCATAA 8 | GGGAACCATTCATATCATGCTCTCCCTTGGAATGCAGAACCTTCTTCTTGACTCAAGGGG 9 | CTTTGCTAAATGACAAACATTCCAATGGAACCATTAAAGACAGAAGCCCATATCGAACCC 10 | TAATGAGCTGTCCTATTGGTGAAGTTCCCTCTCCATACAACTCAAGATTTGAGTCAGTCG 11 | CTTGGTCAGCAAGTGCTTGTCATGATGGCACCAATTGGCTAACAATTGGAATTTCTGGCC 12 | CAGACAGTGGGGCAGTGGCTGTGTTAAAATACAATGGCATAATAACAGACACTATCAAGA 13 | GTTGGAGGAACAAGATATTGAGAACACAAGAGTCTGAATGTGCATGTGTAAATGGTTCTT 14 | GCTTTACCATAATGACCGATGGACCAAGTGATGGACAGGCCTCATACAAAATCTTCAGAA 15 | TAGAAAAGGGAAAGATAATCAAATCAGTCGAAATGAAAGCCCCTAATTATCACTATGAAG 16 | AATGCTCCTGTTACCCTGATTCTAGTGAAATCACATGTGTGTGCAGGGATAACTGGCATG 17 | GCTCGAATCGACCGTGGGTGTCTTTCAACCAGAATCTGGAATATCAGATGGGATACATAT 18 | GCAGTGGGGTTTTCGGAGACAATCCACGCCCTAATGATAAGACAGGCAGTTGTGGTCCAG 19 | TATCGTCTAATGGAGCAAATGGGGTAAAAGGATTTTCATTCAAATACGGCAATGGTGTTT 20 | GGATAGGGAGAACTAAGAGCATTAGTTCAAGAAAAGGTTTTGAGATGATTTGGGATCCGA 21 | ATGGATGGACTGGGACTGACAATAAATTCTCAAAAAAGCAAGATATCGTAGGAATAAATG 22 | AGTGGTCAGGGTATAGCGGGAGTTTTGTTCAGCATCCAGAACTAACAGGGCTGAATTGTA 23 | TAAGACCTTGCTTCTGGGTTGAACTAATAAGAGGACGACCCGAAGAGAACACAATCTGGA 24 | CTAGCGGGAGCAGCATATCCTTTTGTGGTGTAGACAGTGACATTGTGGGTTGGTCTTGGC 25 | CAGACGGTGCTGAGTTGCCATTTACCATTGACAAGTAATTTGTTCAAAAAACT 26 | -------------------------------------------------------------------------------- /scripts/glyc.py: -------------------------------------------------------------------------------- 1 | import argparse, json 2 | from random import sample 3 | import numpy as np 4 | from Bio import Phylo, AlignIO 5 | import re 6 | 7 | def glycosylation_count(total_aa_seq, glyc_mask=None): 8 | if glyc_mask is None: 9 | glyc_mask = np.ones(len(total_aa_seq), dtype=bool) 10 | 11 | # TODO: need to restrict to surface residues. 12 | total_aa_seq_masked = "".join([aa if mask else 'X' 13 | for (mask, aa) in zip(glyc_mask, total_aa_seq)]) 14 | 15 | return len(re.findall('N[^P][ST][^P]', total_aa_seq_masked)) 16 | 17 | 18 | if __name__ == '__main__': 19 | parser = argparse.ArgumentParser( 20 | description="", 21 | formatter_class=argparse.ArgumentDefaultsHelpFormatter 22 | ) 23 | 24 | parser.add_argument('--tree', type=str, required=True, 25 | help="newick file with the tree") 26 | parser.add_argument('--alignment', help="fasta file with ancestral translations") 27 | parser.add_argument('--output', type=str, help="names of files to write selected strains to, one for each gene") 28 | 29 | args = parser.parse_args() 30 | 31 | T = Phylo.read(args.tree, 'newick') 32 | 33 | glyc_json = {} 34 | aln = {s.name:str(s.seq) for s in AlignIO.read(args.alignment, 'fasta')} 35 | root_seq = aln[T.root.name] 36 | root_glyc = glycosylation_count(root_seq) 37 | for n in T.find_clades(order='preorder'): 38 | glyc_json[n.name] = {'glyc':glycosylation_count(aln[n.name]) - root_glyc} 39 | 40 | with open(args.output, 'wt') as fh: 41 | json.dump({'nodes':glyc_json, 'comment':"glycosylation motif count in HA1/NA relative to root sequence."}, fh) 42 | 43 | -------------------------------------------------------------------------------- /profiles/nextflu-private-forecasts/rename.smk: -------------------------------------------------------------------------------- 1 | 2 | rule all_public: 3 | input: 4 | [ 5 | "auspice_renamed/" + build.get("auspice_name", f"{build_name}_{{segment}}").format(segment=segment) + suffix + ".json" 6 | for build_name, build in config["builds"].items() 7 | for segment in config["segments"] 8 | for suffix in ["", "_tip-frequencies", "_measurements"] 9 | ], 10 | 11 | def _get_file_by_auspice_name(wildcards): 12 | for build_name, build_params in config["builds"].items(): 13 | for segment in config["segments"]: 14 | if build_params.get("auspice_name", f"{build_name}_{{segment}}").format(segment=segment) == wildcards.auspice_name: 15 | return f"auspice/{build_name}_{segment}.json" 16 | 17 | return "" 18 | 19 | rule rename_auspice_main: 20 | input: 21 | _get_file_by_auspice_name, 22 | output: 23 | "auspice_renamed/{auspice_name}.json", 24 | shell: 25 | """ 26 | ln {input} {output} 27 | """ 28 | 29 | rule rename_auspice_tip_frequencies: 30 | input: 31 | lambda wildcards: _get_file_by_auspice_name(wildcards).replace(".json", "_tip-frequencies.json"), 32 | output: 33 | "auspice_renamed/{auspice_name}_tip-frequencies.json", 34 | shell: 35 | """ 36 | ln {input} {output} 37 | """ 38 | 39 | rule rename_measurements: 40 | input: 41 | lambda wildcards: _get_file_by_auspice_name(wildcards).replace(".json", "_measurements.json"), 42 | output: 43 | "auspice_renamed/{auspice_name}_measurements.json", 44 | shell: 45 | """ 46 | ln {input} {output} 47 | """ 48 | -------------------------------------------------------------------------------- /config/vic/na/reference.fasta: -------------------------------------------------------------------------------- 1 | >CY018815.1 Influenza B virus (B/Hong Kong/02/1993) segment 6, complete sequence 2 | AAACTGAAGCAAATAAGCCAAAAATGAACAATGCTACCTTCAACTATACAAACGTTAACC 3 | CTATTTCTCACATCAGGGGGAGTGTTATTATCACTATATGTGTCAGCCTTACTGTCATAC 4 | TCACTGTATTCGGATATATTGCTAAAATTTTCACCAACAAAAACAATCGCACCAACAATG 5 | TCGCTGGACTGCGCGAACGCATCAAATGTTCAGGCTGTGAACCATTCTGCAACAAAAGGG 6 | ATGACACTTCTTCTCCCAGAACCGGAGTGGACATACCCTCGTTTATCTTGCCAGGGCTCA 7 | ACTTTCCAGAAAGCACTCCTAATTAGCCCTCATAGATTCGGAGAAACCAAAGGAAACTCA 8 | GCTCCCTTGATAATAAGGGAACCCTTTATTGCTTGTGGACCAAAGGAGTGCAAACACTTT 9 | GCTCTAACCCATTATGCAGCTCAACCAGGGGGATACTACAATGGAACAAGAGAGGACAGA 10 | AACAAGCTGAGGCATTTGATTTCAGTCAGCTTAGGCAAAATCCCAACTGTAGAAAACTCC 11 | ATTTTCCACATGGCAGCTTGGAGTGGATCCGCATGCCATGATGGTAGAGAATGGACATAT 12 | ATCGGAGTTGATGGCCCTGACAGTAATGCATTGATCAAAATAAAATATGGAGAAGCATAC 13 | ACTGACACATACCATTCCTATGCAAACAACATCCTAAGAACACAAGAAAGTGCCTGCAAT 14 | TGCATCGGGGGAGATTGTTATCTTATGATAACCGATGGCTCAGCTTCAGGAATTAGTAAA 15 | TGCAGATTTCTTAAGATTCGAGAGGGTCGAATAATAAAAGAAATATTTCCAACAGGAAGA 16 | GTAGATCATACTGAAGAATGCACATGCGGATTTGCCAGCAATAAAACCATAGAATGTGCC 17 | TGTAGAGATAACAGTTACACAGCAAAAAGACCCTTTGTCAAATTAAATGTGGAGACTGAT 18 | ACAGCTGAAATAAGATTGATGTGCACAGAGACTTATTTGGACACCCCCAGACCAGATGAT 19 | GGAAGCATAACAGGGCCTTGCGAATCCAATGGGGACAAAGGGCGTGGAGGTATCAAGGGA 20 | GGATTTGTCCATCAAAGAATGGCATCCAAGATTGGAAGATGGTACTCCCGAACGATGTCT 21 | AAAACTGAAAGACTGGGGATGGAACTGTATGTCAAGTATGATGGAGACCCATGGACTGAC 22 | AGTGACGCCCTTGCTCCTAGTGGAGTAATGGTCTCAGCGGAAGAACCTGGTTGGTATTCT 23 | TTCGGCTTCGAAATAAAAGATAAGAAATGTGATGTCCCCTGTATTGGGATAGAGATGGTA 24 | CACGATGGTGGAAAAAAGACTTGGCACTCAGCAGCAACAGCCATTTACTGTTTAATGGGC 25 | TCAGGACAGTTGCTATGGGACACTGTCACAGGCGTTGATATGGCTCTGTAATGGAGGAAT 26 | GGTTGAATCTGTTCTAAACCCTTTACTCCTATTTTGTTTGAACAATTGTCCTTACTGGAC 27 | TTAATTGTTTCTGAAA 28 | -------------------------------------------------------------------------------- /config/yam/na/reference.fasta: -------------------------------------------------------------------------------- 1 | >CY019709.1 Influenza B virus (B/Singapore/11/1994) segment 6, complete sequence 2 | AAACTGAGGCAAATAGGCCAAAAATGAACAATGCTACCTTCAACTATACAAACGTTAACC 3 | CTATTTCTCACATCAGGGGGAGTGCTATTATCACTATATGTGTCAGCTTCACTGTCATAC 4 | TTACTGTATTCGGGTATATTGCTAAAATTTTCACCAACAGAAATAACTGCACCAACAATG 5 | CCATTGGATTGTGCAAACGCATCAAATGTTCAGGCTGTGAACCGTTCTGCAACAAAAGGG 6 | GTGACACTTCTTCTCCCAGAACCGGAGTGGACATACCCTCGTTTATCTTGCCCGGGCTCA 7 | ACCTTTCAGAAAGCACTCCTAATTAGCCCTCATAGATTCGGAGAAACCAGAGGAAACTCA 8 | GCTCCCTTGATAATAAGGGAACCTTTTATTGCTTGTGGACCAAAGGAATGCAAACACTTT 9 | GCTCTAACCCATTATGCAGCTCAACCAGGGGGATACTACAATGGAACAAGAGAAGACAGA 10 | AACAAGCTGAGGCATCTAATTTCAGTCAAATTGGGCAAAATCCCAACAGTAGAAAACTCC 11 | ATTTTCCACATGGCAGCTTGGAGCGGGTCCGCATGCCATGATGGTAGAGAATGGACATAT 12 | ATCGGAGTTGATGGCCCTGACAGTAATGCATTGCTCAAAATAAAATATGGAGAAGCATAT 13 | ACTGACACATACCATTCCTATGCAAACAACATCCTAAGAACACAAGAAAGTGCCTGCAAT 14 | TGCATCGGGGGAGATTGTTATCTTATGATAACTGATGGCTCAGCTTCAGGGATTAGTAAA 15 | TGCAGATTTCTTAAGATTCGAGAGGGCCGAATAATAAAAGAAATATTCCCAACAGGAAGA 16 | GTAGAACATACTGAAGAATGCACATGCGGATTTGCCAGCAATAAAACCATAGAATGTGCC 17 | TGTAGAGATAACAGTTACACAGCAAAAAGACCCTTTGTCAAATTAAATGTGGAGACTGAT 18 | ACAGCGGAAATAAGATTGATGTGCACAGAGACTTATTTGGACACCCCCAGACCAGATGAT 19 | GGAAGCATAACAGGGCCTTGCGAATCTAATGGGGATAAAGGAAGTGGAGGCATCAAGGGA 20 | GGATTTGTTCATCAAAGAATGGCATCCAAGATTGGAAGGTGGTACTCTCGAACGATGTCT 21 | AAAACTAAAAGGATGGGGATGGGACTGTATGTCAAGTATGATGGAGACCCATGGACTGAC 22 | AGTGACGCCCTTGCTCTTAGTGGAGTAATGGTTTCAATGGAAGAACCTGGTTGGTATTCC 23 | TTTGGCTTCGAAATAAAAGATAAGAAATGTGATGTCCCCTGTATTGGGATAGAGATGGTA 24 | CATGATGGTGGAAAAAAGACTTGGCACTCAGCAGCAACAGCCATTTACTGTTTAATGGGC 25 | TCAGGACAACTGCTATGGGACACTGTCACAGGTGTTGATATGGCTCTGTAATGTAGGAAT 26 | GGTTGAGTCTGTTCTAAACCCTTTGTTCCTATTTTGTTTGAACAATTGTCCTTACTGAAC 27 | TTAATTGTTTCTGAAA 28 | -------------------------------------------------------------------------------- /.github/workflows/run-nextclade.yaml: -------------------------------------------------------------------------------- 1 | name: Run Nextclade on all sequences 2 | 3 | on: 4 | workflow_dispatch: 5 | inputs: 6 | dockerImage: 7 | description: "Specific container image to use for build (will override the default of `nextstrain build`)" 8 | required: false 9 | type: string 10 | nextcladeServer: 11 | description: "URL for a Nextclade server from which Nextclade datasets should be downloaded instead of the default public server" 12 | required: false 13 | type: string 14 | artifact-name: 15 | description: "Name to use for final artifact uploaded by this action" 16 | required: false 17 | type: string 18 | default: "build-outputs-nextclade" 19 | workflow_call: 20 | inputs: 21 | artifact-name: 22 | description: "Name to use for final artifact uploaded by this action" 23 | required: false 24 | type: string 25 | default: "build-outputs-nextclade" 26 | 27 | jobs: 28 | run-build: 29 | permissions: 30 | id-token: write 31 | uses: nextstrain/.github/.github/workflows/pathogen-repo-build.yaml@master 32 | secrets: inherit 33 | with: 34 | artifact-name: ${{ inputs.artifact-name }} 35 | runtime: aws-batch 36 | env: | 37 | NEXTSTRAIN_DOCKER_IMAGE: ${{ inputs.dockerImage }} 38 | run: | 39 | nextstrain build \ 40 | --detach \ 41 | --cpus 36 \ 42 | --memory 72gib \ 43 | . \ 44 | upload_all_nextclade_files \ 45 | -p \ 46 | --configfile profiles/nextclade.yaml \ 47 | --config nextclade_server="${{ inputs.nextcladeServer }}" \ 48 | --set-threads run_nextclade=12 49 | -------------------------------------------------------------------------------- /profiles/ci/builds.yaml: -------------------------------------------------------------------------------- 1 | custom_rules: 2 | - profiles/ci/prepare_data.smk 3 | 4 | fasta_fields: 5 | - strain 6 | - virus 7 | - accession 8 | - date 9 | - date_submitted 10 | - region 11 | - country 12 | - division 13 | - location 14 | - passage_category 15 | - originating_lab 16 | - submitting_lab 17 | - age 18 | - gender 19 | prettify_fields: 20 | - region 21 | - country 22 | - division 23 | - location 24 | - originating_lab 25 | - submitting_lab 26 | 27 | lat-longs: "config/lat_longs.tsv" 28 | 29 | segments: 30 | - ha 31 | - na 32 | 33 | tree: 34 | tree-builder-args: "'-ninit 10 -n 4 -czb'" 35 | 36 | submission_date_field: date_submitted 37 | recency: 38 | date_bins: [7, 30, 90] 39 | date_bin_labels: ["last week", "last month", "last quarter"] 40 | upper_bin_label: older 41 | 42 | embedding: 43 | # Set lower value of perplexity when sample size is small. 44 | perplexity: 9 45 | 46 | builds: 47 | "ci_build": 48 | lineage: h3n2 49 | reference: "config/h3n2/{segment}/reference.fasta" 50 | annotation: "config/h3n2/{segment}/genemap.gff" 51 | tree_exclude_sites: "config/h3n2/{segment}/exclude-sites.txt" 52 | clades: "config/h3n2/ha/clades.tsv" 53 | subclades: "config/h3n2/{segment}/subclades.tsv" 54 | min_date: "12Y" 55 | auspice_config: "config/h3n2/ha/auspice_config.json" 56 | enable_titer_models: true 57 | enable_lbi: true 58 | enable_glycosylation: true 59 | enable_embeddings: true 60 | titer_collections: 61 | - name: cdc_cell_fra 62 | data: "example_data/cdc_h3n2_cell_fra_titers.tsv" 63 | subsamples: 64 | global: 65 | filters: "--exclude-where 'ha!=True' 'na!=True'" 66 | -------------------------------------------------------------------------------- /config/distance_maps/h3n2/ha/shih.json: -------------------------------------------------------------------------------- 1 | { 2 | "default": 0, 3 | "map": { 4 | "HA1": { 5 | "105": 1, 6 | "106": 1, 7 | "108": 1, 8 | "110": 1, 9 | "115": 1, 10 | "117": 1, 11 | "119": 1, 12 | "121": 1, 13 | "126": 1, 14 | "127": 1, 15 | "128": 1, 16 | "129": 1, 17 | "130": 1, 18 | "139": 1, 19 | "140": 1, 20 | "141": 1, 21 | "142": 1, 22 | "143": 1, 23 | "144": 1, 24 | "147": 1, 25 | "148": 1, 26 | "156": 1, 27 | "157": 1, 28 | "158": 1, 29 | "170": 1, 30 | "172": 1, 31 | "173": 1, 32 | "174": 1, 33 | "176": 1, 34 | "177": 1, 35 | "180": 1, 36 | "181": 1, 37 | "186": 1, 38 | "191": 1, 39 | "197": 1, 40 | "201": 1, 41 | "210": 1, 42 | "211": 1, 43 | "226": 1, 44 | "228": 1, 45 | "232": 1, 46 | "244": 1, 47 | "246": 1, 48 | "259": 1, 49 | "260": 1, 50 | "262": 1, 51 | "283": 1, 52 | "291": 1, 53 | "34": 1, 54 | "37": 1, 55 | "38": 1, 56 | "41": 1, 57 | "46": 1, 58 | "47": 1, 59 | "51": 1, 60 | "59": 1, 61 | "62": 1, 62 | "65": 1, 63 | "66": 1, 64 | "67": 1, 65 | "76": 1, 66 | "78": 1 67 | } 68 | }, 69 | "name": "shih" 70 | } -------------------------------------------------------------------------------- /config/nextstrain_clades_h3n2_ha.tsv: -------------------------------------------------------------------------------- 1 | clade gene site alt 2 | 3b HA1 145 S 3 | 3b HA1 159 F 4 | 3b HA1 160 K 5 | 3b HA1 198 S 6 | 3b HA1 223 I 7 | 3b HA1 312 S 8 | 3b HA2 158 N 9 | 3b nuc 1195 C 10 | 3b nuc 1671 G 11 | 3c HA1 48 I 12 | 3c HA1 45 N 13 | 3c nuc 456 T 14 | 3c2 HA2 160 N 15 | 3c2 nuc 693 A 16 | 3c2 nuc 1518 G 17 | 3c3 HA1 128 A 18 | 3c3 HA1 142 G 19 | 3c3 nuc 1296 A 20 | 3c2.A HA1 159 Y 21 | 3c2.A nuc 1260 A 22 | 3c2.A HA1 3 I 23 | 3c2.A HA1 144 S 24 | 3c2.A HA1 160 T 25 | 3c3.A HA1 159 S 26 | 3c3.A HA1 225 D 27 | 3c3.A HA1 138 S 28 | 3c3.A HA1 326 R 29 | 3c3.B HA1 83 R 30 | 3c3.B HA1 261 Q 31 | 3c3.B HA1 62 K 32 | 3c3.B HA2 18 K 33 | A1 HA1 171 K 34 | A1 HA2 77 V 35 | A1 HA2 155 E 36 | A1a HA1 171 K 37 | A1a HA2 77 V 38 | A1a HA2 150 E 39 | A1a HA2 155 E 40 | A1a nuc 81 A 41 | A1a nuc 114 T 42 | A1a nuc 1484 A 43 | A1b HA1 92 R 44 | A1b HA1 311 Q 45 | A1b nuc 264 G 46 | A1b nuc 538 C 47 | A1b/131K HA1 131 K 48 | A1b/131K HA1 62 G 49 | A1b/131K HA1 142 G 50 | A1b/135K HA1 135 K 51 | A1b/135K HA1 62 G 52 | A1b/135K HA1 142 G 53 | A1b/135N HA1 135 N 54 | A1b/135N nuc 81 G 55 | A1b/137F HA1 135 K 56 | A1b/137F HA1 62 G 57 | A1b/137F HA1 142 G 58 | A1b/137F HA1 193 S 59 | A1b/186D HA1 135 K 60 | A1b/186D HA1 186 D 61 | A1b/186D HA1 190 N 62 | A1b/197R HA1 131 K 63 | A1b/197R HA1 62 G 64 | A1b/197R HA1 142 G 65 | A1b/197R HA1 197 R 66 | A1b/159N HA1 159 N 67 | A1b/159N HA1 160 I 68 | A1b/186S HA1 186 S 69 | A1b/186S HA1 198 P 70 | A1b/94N nuc 328 A 71 | A2 HA1 261 Q 72 | A2 HA1 142 K 73 | A2 nuc 1485 T 74 | A2/re nuc 1689 T 75 | A2/re nuc 1125 A 76 | A3 HA1 121 K 77 | A3 nuc 1134 G 78 | A3 nuc 1320 T 79 | A4 HA1 192 T 80 | A4 HA1 197 H 81 | A4 HA1 31 S 82 | A4 HA1 53 N 83 | A4 HA1 144 R 84 | -------------------------------------------------------------------------------- /nextclade/dataset_config/h1n1pdm/np/reference.fasta: -------------------------------------------------------------------------------- 1 | >NC_026436.1 Influenza A virus (A/California/07/2009(H1N1)) segment 5 nucleocapsid protein (NP) gene, complete cds 2 | ATGGCGTCTCAAGGCACCAAACGATCATATGAACAAATGGAGACTGGTGGGGAGCGCCAG 3 | GATGCCACAGAAATCAGAGCATCTGTCGGAAGAATGATTGGTGGAATCGGGAGATTCTAC 4 | ATCCAAATGTGCACTGAACTCAAACTCAGTGATTATGATGGACGACTAATCCAGAATAGC 5 | ATAACAATAGAGAGGATGGTGCTTTCTGCTTTTGATGAGAGAAGAAATAAATACCTAGAA 6 | GAGCATCCCAGTGCTGGGAAGGACCCTAAGAAAACAGGAGGACCCATATATAGAAGAGTA 7 | GACGGAAAGTGGATGAGAGAACTCATCCTTTATGACAAAGRAGAAATAAGGAGAGTTTGG 8 | CGCCTAGCAAACAATGGCGAAGATGCAACAGCAGGTCTTACTCATATCATGATTTGGCAT 9 | TCCAACCTGAATGATGCCACATATCAGAGAACAAGAGCGCTTGTTCGCACCGGAATGGAT 10 | CCCAGAATGTGCTCTCTAATGCAAGGTTCAACACTTCCCAGAAGGTCTGGTGCCGCAGGT 11 | GCTGCGGTGAAAGGAGTTGGAACAATAGCAATGGAGTTAATCAGAATGATCAAACGTGGA 12 | ATCAATGACCGAAATTTCTGGAGGGGTGAAAATGGACGAAGGACAAGGGTTGCTTATGAA 13 | AGAATGTGCAATATCCTCAAAGGAAAATTTCAAACAGCTGCCCAGAGGGCAATGATGGAT 14 | CAAGTAAGAGAAAGTCGAAACCCAGGAAACGCTGAGATTGAAGACCTCATTTTCCTGGCA 15 | CGGTCAGCACTCATTCTGAGGGGATCAGTTGCACATAAATCCTGCCTGCCTGCTTGTGTG 16 | TATGGGCTTGCAGTAGCAAGTGGGCATGACTTTGAAAGGGAAGGGTACTCACTGGTCGGG 17 | ATAGACCCATTCAAATTACTCCAAAACAGCCAAGTGGTCAGCCTGATGAGACCAAATGAA 18 | AACCCAGCTCACAAGAGTCAATTGGTGTGGATGGCATGCCACTCTGCTGCATTTGAAGAT 19 | TTAAGAGTATCAAGTTTCATAAGAGGAAAGAAAGTGATTCCAAGAGGAAAGCTTTCCACA 20 | AGAGGGGTCCAGATTGCTTCAAATGAGAATGTGGAAACCATGGACTCCAATACCCTGGAA 21 | CTGAGAAGCAGATACTGGGCCATAAGGACCAGGAGTGGAGGAAATACCAATCAACAAAAG 22 | GCATCCGCAGGCCAGATCAGTGTGCAGCCTACATTCTCAGTGCAGCGGAATCTCCCTTTT 23 | GAAAGAGCAACCGTTATGGCAGCATTCAGCGGGAACAATGAAGGACGGACATCCGACATG 24 | CGAACAGAAGTTATAAGAATGATGGAAAGTGCAAAGCCAGAAGATTTGTCCTTCCAGGGG 25 | CGGGGAGTCTTCGAGCTCTCGGACGAAAAGGCAACGAACCCGATCGTGCCTTCCTTTGAC 26 | ATGAGTAATGAAGGGTCTTATTTCTTCGGAGACAATGCAGAGGAGTATGACAGTTGA 27 | -------------------------------------------------------------------------------- /nextclade/dataset_config/h1n1pdm/pa/pathogen.json: -------------------------------------------------------------------------------- 1 | { 2 | "shortcuts": [ 3 | "flu_h1n1pdm_pa", 4 | "nextstrain/flu/h1n1pdm/pa" 5 | ], 6 | "phenotypeData":[ 7 | { 8 | "name": "PAI", 9 | "nameFriendly": "PAI", 10 | "description": "This column displays a score associated with reduced susceptibility to polymerase inhibitors. The score is a weighted sum of known substitutions (x1) or major substitutions (x2), while substitutions at positions that confer resistance but are to amino acids not previously described to confer resistance count x0.5.", 11 | "cds": "PA", 12 | "aaRange": { 13 | "begin":0, 14 | "end": 460 15 | }, 16 | "data": [ 17 | { 18 | "name": "PAI_markers", 19 | "weight": 1, 20 | "locations": { 21 | "22": {"G":1, "K":1, "R":1, "default":0.5}, 22 | "33": {"R":1, "default":0.5}, 23 | "36": {"T":1, "default":0.5}, 24 | "37": {"F":2, "L":2, "M":2, "S":2, "T":2, "V":1, "default":0.5}, 25 | "197": {"K":1, "default":0.5}, 26 | "198": {"D":1, "G":1, "default":0.5} 27 | } 28 | } 29 | ] 30 | } 31 | ], 32 | "mutLabels": { 33 | "aaMutLabelMap": { 34 | "PA:23G":["Baloxavir"], 35 | "PA:23K":["Baloxavir"], 36 | "PA:23R":["Baloxavir"], 37 | "PA:34R":["Baloxavir"], 38 | "PA:37T":["Baloxavir"], 39 | "PA:38F":["Baloxavir"], 40 | "PA:38L":["Baloxavir"], 41 | "PA:38M":["Baloxavir"], 42 | "PA:38S":["Baloxavir"], 43 | "PA:38T":["Baloxavir"], 44 | "PA:38V":["Baloxavir"], 45 | "PA:198K":["Baloxavir"], 46 | "PA:199D":["Baloxavir"], 47 | "PA:199G":["Baloxavir"] 48 | } 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /.github/workflows/ingest.yaml: -------------------------------------------------------------------------------- 1 | name: Ingest 2 | 3 | defaults: 4 | run: 5 | # This is the same as GitHub Action's `bash` keyword as of 20 June 2023: 6 | # https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsshell 7 | # 8 | # Completely spelling it out here so that GitHub can't change it out from under us 9 | # and we don't have to refer to the docs to know the expected behavior. 10 | shell: bash --noprofile --norc -eo pipefail {0} 11 | 12 | on: 13 | workflow_dispatch: 14 | inputs: 15 | dockerImage: 16 | description: "Specific container image to use for build (will override the default of `nextstrain build`)" 17 | required: false 18 | type: string 19 | runtime: 20 | description: "Nextstrain runtime" 21 | type: choice 22 | default: "docker" 23 | options: 24 | - "docker" 25 | - "aws-batch" 26 | 27 | jobs: 28 | ingest: 29 | permissions: 30 | id-token: write 31 | uses: nextstrain/.github/.github/workflows/pathogen-repo-build.yaml@master 32 | secrets: inherit 33 | with: 34 | runtime: ${{ inputs.runtime }} 35 | env: | 36 | NEXTSTRAIN_DOCKER_IMAGE: ${{ inputs.dockerImage }} 37 | run: | 38 | nextstrain build \ 39 | ingest \ 40 | upload_all \ 41 | --configfile build-configs/nextstrain-automation/config.yaml 42 | # Explicitly excluding `ingest/data` and `ingest/fauna/data` 43 | # since this is private data and should not available through the public artifacts 44 | artifact-paths: | 45 | !ingest/data/ 46 | !ingest/results/ 47 | ingest/build.log 48 | ingest/logs/ 49 | ingest/benchmarks/ 50 | -------------------------------------------------------------------------------- /nextclade/dataset_config/h3n2/np/reference.fasta: -------------------------------------------------------------------------------- 1 | >NC_007369.1 Influenza A virus (A/New York/392/2004(H3N2)) segment 5, complete sequence 2 | AGCAAAAGCAGGGTTAATAATCACTCACCGAGTGACATCAAAATCATGGCGTCCCAAGGC 3 | ACCAAACGGTCTTATGAACAGATGGAAACTGATGGGGATCGCCAGAATGCAACTGAGATT 4 | AGGGCATCCGTCGGGAAGATGATTGATGGAATTGGGAGATTCTACATCCAAATGTGCACT 5 | GAACTTAAACTCAGTGATCATGAAGGGCGGTTGATCCAGAACAGCTTGACAATAGAGAAA 6 | ATGGTGCTCTCTGCTTTTGATGAAAGAAGGAATAAATACCTGGAAGAACACCCCAGCGCG 7 | GGGAAAGATCCCAAGAAAACTGGGGGGCCCATATACAGGAGAGTAGATGGAAAATGGATG 8 | AGGGAACTCGTCCTTTATGACAAAGAAGAGATAAGGCGAATCTGGCGCCAAGCCAACAAT 9 | GGTGAGGATGCGACAGCTGGTCTAACTCACATAATGATCTGGCATTCCAATTTGAATGAT 10 | GCAACATACCAGAGGACAAGAGCTCTTGTTCGAACTGGAATGGATCCCAGAATGTGCTCT 11 | CTGATGCAGGGCTCGACTCTCCCTAGAAGGTCCGGAGCTGCAGGTGCTGCAGTCAAAGGA 12 | ATCGGGACAATGGTGATGGAACTGATCAGAATGGTCAAACGGGGGATCAACGATCGAAAT 13 | TTCTGGAGAGGTGAGAATGGGCGGAAAACAAGAAGTGCTTATGAGAGAATGTGCAACATT 14 | CTTAAAGGAAAATTTCAAACAGCTGCACAAAGAGCAATGGTGGATCAAGTGAGAGAAAGT 15 | CGGAACCCAGGAAATGCTGAGATCGAAGATCTCATATTTTTGGCAAGATCTGCATTGATA 16 | TTGAGAGGGTCAGTTGCTCACAAATCTTGCCTACCTGCCTGTGCGTATGGACCTGCAGTA 17 | TCCAGTGGGTACGACTTCGAAAAAGAGGGATATTCCTTGGTGGGAATAGACCCTTTCAAA 18 | CTACTTCAAAATAGCCAAATATACAGCCTAATCAGACCTAACGAGAATCCAGCACACAAG 19 | AGTCAGCTGGTGTGGATGGCATGCCATTCTGCTGCATTTGAAGATTTAAGATTGTTAAGC 20 | TTCATCAGAGGGACAAAAGTATCTCCGCGGGGGAAACTGTCAACTAGAGGAGTACAAATT 21 | GCTTCAAATGAGAACATGGATAATATGGGATCGAGCACTCTTGAACTGAGAAGCGGGTAC 22 | TGGGCCATAAGGACCAGGAGTGGAGGAAACACTAATCAACAGAGGGCCTCCGCAGGCCAA 23 | ACCAGTGTGCAACCTACGTTTTCTGTACAAAGAAACCTCCCATTTGAAAAGTCAACCATC 24 | ATGGCAGCATTCACTGGAAATACGGAGGGAAGGACTTCAGACATGAGGGCAGAAATCATA 25 | AGAATGATGGAAGGTGCAAAACCAGAAGAAGTGTCATTCCGGGGGAGGGGAGTTTTCGAG 26 | CTCTCAGACGAGAAGGCAACGAACCCGATCGTGCCCTCTTTTGATATGAGTAATGAAGGA 27 | TCTTATTTCTTCGGAGACAATGCAGAAGAGTACGACAATTAAGGAAAAAATACCCTTGTT 28 | TCTACT 29 | -------------------------------------------------------------------------------- /config/yam/ha/reference.fasta: -------------------------------------------------------------------------------- 1 | >JN993010.1 Influenza B virus (B/Wisconsin/01/2010) segment 4 hemagglutinin (HA) gene, complete cds 2 | ATGAAGGCAATAATTGTACTACTCATGGTAGTAACATCCAATGCAGATCGAATCTGCACTGGGATAACATCTTCAAACTCACCTCATGTGGTCAAAACAGCTACTCAAGGGGAGGTCAATGTGACTGGCGTGATACCACTGACAACAACACCAACAAAATCTTATTTTGCAAATCTCAAAGGAACAAGGACCAGAGGGAAACTATGCCCGGACTGTCTCAACTGTACAGATCTGGATGTGGCCTTGGGCAGGCCAATGTGTGTGGGGACCACACCTTCTGCTAAAGCTTCAATACTCCACGAGGTCAGACCTGTTACATCCGGGTGCTTTCCTATAATGCACGACAGAACAAAAATCAGGCAACTACCCAATCTTCTCAGAGGATATGAAAATATCAGGTTATCAACCCAAAACGTTATCGATGCAGAAAAAGCACCAGGAGGACCCTACAGACTTGGAACCTCAGGATCTTGCCCTAACGCTACCAGTAAAATCGGATTTTTTGCAACAATGGCTTGGGCTGTCCCAAAGGACAACTACAAAAATGCAACGAACCCACTAACAGTAGAAGTACCATACATTTGTACAGAAGGGGAAGACCAAATTACTGTTTGGGGGTTCCATTCAGATAACAAAACCCAAATGAAGAGCCTCTATGGAGACTCAAATCCTCAAAAGTTCACCTCATCTGCTAATGGAGTAACCACACATTATGTTTCTCAGATTGGCGACTTCCCAGATCAAACAGAAGACGGAGGACTACCACAAAGCGGCAGAATTGTTGTTGATTACATGATGCAAAAACCTGGGAAAACAGGAACAATTGTCTATCAAAGAGGTGTTTTGTTGCCTCAAAAGGTGTGGTGCGCGAGTGGCAGGAGCAAAGTAATAAAAGGGTCATTGCCTTTAATTGGTGAAGCAGATTGCCTTCATGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTACTACACAGGAGAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTAAAAACACCTTTGAAGCTTGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTGAAGGAAAGGGGTTTCTTCGGAGCTATTGCTGGTTTCCTAGAAGGAGGATGGGAAGGAATGATTGCAGGTTGGCACGGATACACATCTCACGGAGCACATGGAGTGGCAGTGGCGGCAGACCTTAAGAGTACACAAGAAGCTATAAATAAGATAACAAAAAATCTCAATTCTTTGAGTGAGCTAGAAGTAAAGAACCTTCAAAGACTAAGTGGTGCCATGGATGAACTCCACAACGAAATACTCGAGCTGGATGAGAAAGTGGATGATCTCAGAGCTGACACTATAAGCTCACAAATAGAACTTGCAGTCTTGCTTTCCAACGAAGGAATAATAAACAGTGAAGACGAGCATCTATTGGCACTTGAGAGAAAACTAAAGAAAATGCTGGGTCCCTCTGCTGTAGACATAGGAAACGGATGCTTCGAAACCAAACACAAATGCAACCAGACCTGCTTAGACAGGATAGCTGCTGGCACCTTTAATGCAGGAGAATTTTCTCTCCCCACTTTTGATTCATTGAACATTACTGCTGCATCTTTAAATGATGATGGATTGGATAACCATACTATACTGCTCTATTACTCAACTGCTGCTTCTAGTTTGGCTGTAACATTAATGCTAGCTATTTTTATTGTTTATATGGTCTCCAGAGACAACGTTTCATGCTCCATCTGTCTATAA 3 | -------------------------------------------------------------------------------- /nextclade/dataset_config/h1n1pdm/ha/CY121680/reference.fasta: -------------------------------------------------------------------------------- 1 | >CY121680.1 Influenza A virus (A/California/07/2009(H1N1)) hemagglutinin (HA) gene, complete cds 2 | GGAAAACAAAAGCAACAAAAATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAACTAAGAGGGGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACACCTAGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGCTTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGTCATCAAGATACAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGATCGAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAAACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATATCCCGTCTATTCAATCTAGAGGCCTATTTGGGGCCATTGCCGGTTTCATTGAAGGGGGGTGGACAGGGATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACCTGAAGAGCACACAGAATGCCATTGACGAGATTACTAACAAAGTAAATTCTGTTATTGAAAAGATGAATACACAGTTCACAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATTTAAATAAAAAAGTTGATGATGGTTTCCTGGACATTTGGACTTACAATGCCGAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTACCACGATTCAAATGTGAAGAACTTATATGAAAAGGTAAGAAGCCAGCTAAAAAACAATGCCAAGGAAATTGGAAACGGCTGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACTTATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAACATTAGGATTTCAGAAGCATGAGAAAAACAC 3 | -------------------------------------------------------------------------------- /nextclade/dataset_config/yam/ha/JN993010/reference.fasta: -------------------------------------------------------------------------------- 1 | >JN993010.1 Influenza B virus (B/Wisconsin/01/2010) segment 4 hemagglutinin (HA) gene, complete cds 2 | ATGAAGGCAATAATTGTACTACTCATGGTAGTAACATCCAATGCAGATCGAATCTGCACTGGGATAACATCTTCAAACTCACCTCATGTGGTCAAAACAGCTACTCAAGGGGAGGTCAATGTGACTGGCGTGATACCACTGACAACAACACCAACAAAATCTTATTTTGCAAATCTCAAAGGAACAAGGACCAGAGGGAAACTATGCCCGGACTGTCTCAACTGTACAGATCTGGATGTGGCCTTGGGCAGGCCAATGTGTGTGGGGACCACACCTTCTGCTAAAGCTTCAATACTCCACGAGGTCAGACCTGTTACATCCGGGTGCTTTCCTATAATGCACGACAGAACAAAAATCAGGCAACTACCCAATCTTCTCAGAGGATATGAAAATATCAGGTTATCAACCCAAAACGTTATCGATGCAGAAAAAGCACCAGGAGGACCCTACAGACTTGGAACCTCAGGATCTTGCCCTAACGCTACCAGTAAAATCGGATTTTTTGCAACAATGGCTTGGGCTGTCCCAAAGGACAACTACAAAAATGCAACGAACCCACTAACAGTAGAAGTACCATACATTTGTACAGAAGGGGAAGACCAAATTACTGTTTGGGGGTTCCATTCAGATAACAAAACCCAAATGAAGAGCCTCTATGGAGACTCAAATCCTCAAAAGTTCACCTCATCTGCTAATGGAGTAACCACACATTATGTTTCTCAGATTGGCGACTTCCCAGATCAAACAGAAGACGGAGGACTACCACAAAGCGGCAGAATTGTTGTTGATTACATGATGCAAAAACCTGGGAAAACAGGAACAATTGTCTATCAAAGAGGTGTTTTGTTGCCTCAAAAGGTGTGGTGCGCGAGTGGCAGGAGCAAAGTAATAAAAGGGTCATTGCCTTTAATTGGTGAAGCAGATTGCCTTCATGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTACTACACAGGAGAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTAAAAACACCTTTGAAGCTTGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTGAAGGAAAGGGGTTTCTTCGGAGCTATTGCTGGTTTCCTAGAAGGAGGATGGGAAGGAATGATTGCAGGTTGGCACGGATACACATCTCACGGAGCACATGGAGTGGCAGTGGCGGCAGACCTTAAGAGTACACAAGAAGCTATAAATAAGATAACAAAAAATCTCAATTCTTTGAGTGAGCTAGAAGTAAAGAACCTTCAAAGACTAAGTGGTGCCATGGATGAACTCCACAACGAAATACTCGAGCTGGATGAGAAAGTGGATGATCTCAGAGCTGACACTATAAGCTCACAAATAGAACTTGCAGTCTTGCTTTCCAACGAAGGAATAATAAACAGTGAAGACGAGCATCTATTGGCACTTGAGAGAAAACTAAAGAAAATGCTGGGTCCCTCTGCTGTAGACATAGGAAACGGATGCTTCGAAACCAAACACAAATGCAACCAGACCTGCTTAGACAGGATAGCTGCTGGCACCTTTAATGCAGGAGAATTTTCTCTCCCCACTTTTGATTCATTGAACATTACTGCTGCATCTTTAAATGATGATGGATTGGATAACCATACTATACTGCTCTATTACTCAACTGCTGCTTCTAGTTTGGCTGTAACATTAATGCTAGCTATTTTTATTGTTTATATGGTCTCCAGAGACAACGTTTCATGCTCCATCTGTCTATAA 3 | -------------------------------------------------------------------------------- /nextclade/scripts/extract_founder_sequences.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | def read_founder_nodes(clades_json_file): 4 | import json 5 | with open(clades_json_file) as fh: 6 | clades_data = json.load(fh) 7 | founder_nodes = dict() 8 | for node_id, node_data in clades_data.get('branches', {}).items(): 9 | if 'labels' in node_data and 'clade' in node_data['labels']: 10 | founder_nodes[node_id] = node_data['labels']['clade'] 11 | 12 | return founder_nodes 13 | 14 | 15 | def extract_sequences_from_node_data(node_clade_map, ancestral_json): 16 | import json 17 | from Bio import SeqRecord, Seq 18 | 19 | sequences = dict() 20 | with open(ancestral_json) as fh: 21 | ancestral_json = json.load(fh)['nodes'] 22 | 23 | for node_id, node_data in ancestral_json.items(): 24 | if node_id in node_clade_map: 25 | clade = node_clade_map[node_id] 26 | sequences[clade] = SeqRecord.SeqRecord(Seq.Seq(node_data['sequence']), id=clade, description='') 27 | 28 | return sequences 29 | 30 | 31 | if __name__=="__main__": 32 | import argparse 33 | from Bio import SeqIO 34 | 35 | parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) 36 | parser.add_argument("--clades-json", required=True, help="JSON file with clade definitions") 37 | parser.add_argument("--ancestral-json", required=True, help="Ancestral sequence JSON file") 38 | parser.add_argument("--output-fasta", required=True, help="Output FASTA file with founder sequences") 39 | args = parser.parse_args() 40 | 41 | node_clade_map = read_founder_nodes(args.clades_json) 42 | sequences = extract_sequences_from_node_data(node_clade_map, args.ancestral_json) 43 | 44 | with open(args.output_fasta, 'w') as fh: 45 | SeqIO.write([sequences[clade] for clade in sorted(sequences.keys())], fh, 'fasta') 46 | -------------------------------------------------------------------------------- /ingest/vendored/download-from-s3: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -euo pipefail 3 | 4 | bin="$(dirname "$0")" 5 | 6 | main() { 7 | local src="${1:?A source s3:// URL is required as the first argument.}" 8 | local dst="${2:?A destination file path is required as the second argument.}" 9 | # How many lines to subsample to. 0 means no subsampling. Optional. 10 | # It is not advised to use this for actual subsampling! This is intended to be 11 | # used for debugging workflows with large datasets such as ncov-ingest as 12 | # described in https://github.com/nextstrain/ncov-ingest/pull/367 13 | 14 | # Uses `tsv-sample` to subsample, so it will not work as expected with files 15 | # that have a single record split across multiple lines (i.e. FASTA sequences) 16 | local n="${3:-0}" 17 | 18 | local s3path="${src#s3://}" 19 | local bucket="${s3path%%/*}" 20 | local key="${s3path#*/}" 21 | 22 | local src_hash dst_hash no_hash=0000000000000000000000000000000000000000000000000000000000000000 23 | dst_hash="$("$bin/sha256sum" < "$dst" || true)" 24 | src_hash="$(aws s3api head-object --bucket "$bucket" --key "$key" --query Metadata.sha256sum --output text 2>/dev/null || echo "$no_hash")" 25 | 26 | echo "[ INFO] Downloading $src → $dst" 27 | if [[ $src_hash != "$dst_hash" ]]; then 28 | aws s3 cp --no-progress "$src" - | 29 | if [[ "$src" == *.gz ]]; then 30 | gunzip -cfq 31 | elif [[ "$src" == *.xz ]]; then 32 | xz -T0 -dcq 33 | elif [[ "$src" == *.zst ]]; then 34 | zstd -T0 -dcq 35 | else 36 | cat 37 | fi | 38 | if [[ "$n" -gt 0 ]]; then 39 | tsv-sample -H -i -n "$n" 40 | else 41 | cat 42 | fi >"$dst" 43 | else 44 | echo "[ INFO] Files are identical, skipping download" 45 | fi 46 | } 47 | 48 | main "$@" 49 | -------------------------------------------------------------------------------- /nextclade/dataset_config/h3n2/ha/EPI1857216/reference.fasta: -------------------------------------------------------------------------------- 1 | >EPI_ISL_1563628 | A/Darwin/6/2021 | A / H3N2 | | 2021-03-16 2 | ATGAAGACTATCATTGCTTTGAGCAACATTCTATGTCTTGTTTTCGCTCAAAAAATACCTGGAAATGACAATAGCACGGC 3 | AACGCTGTGCCTTGGGCACCATGCAGTACCAAACGGAACGATAGTGAAAACAATCACAAATGACCGAATTGAAGTTACTA 4 | ATGCTACTGAGTTGGTTCAGAATTCATCAATAGGTGAAATATGCGGCAGTCCTCATCAGATCCTTGATGGAGGGAACTGC 5 | ACACTAATAGATGCTCTATTGGGGGACCCTCAGTGTGACGGCTTTCAAAATAAGGAATGGGACCTTTTTGTTGAAAGAAG 6 | CAGAGCCAACAGCAACTGTTACCCTTATGATGTGCCGGATTATGCCTCCCTTAGGTCACTAGTTGCCTCATCCGGCACAC 7 | TGGAGTTTAAAAATGAAAGCTTCAATTGGACTGGAGTCAAACAAAACGGAACAAGTTCTGCGTGCATAAGGGGATCTAGT 8 | AGTAGTTTTTTTAGTAGATTAAATTGGTTGACCAGCTTAAACAACATATATCCAGCACAGAACGTGACTATGCCAAACAA 9 | GGAACAATTTGACAAATTGTACATTTGGGGGGTTCACCACCCGGATACGGACAAGAACCAAATCTCCCTGTTTGCTCAAT 10 | CATCAGGAAGAATCACAGTATCTACCAAAAGAAGCCAACAAGCTGTAATCCCAAATATCGGATCTAGACCCAGAATAAGG 11 | GATATCCCTAGCAGAATAAGCATCTATTGGACAATAGTAAAACCGGGAGACATACTTTTGATTAACAGCACAGGGAATCT 12 | AATTGCTCCTAGGGGTTACTTCAAAATACGAAGTGGGAAAAGCTCAATAATGAGATCAGATGCACCCATTGGCAAATGTA 13 | AGTCTGAATGCATCACTCCAAATGGAAGCATTCCCAATGACAAACCGTTCCAAAATGTAAACAGGATCACATACGGGGCC 14 | TGTCCCAGATATGTTAAGCAAAGCACCCTGAAATTGGCAACAGGAATGCGAAATGTACCAGAGAAACAAACCAGAGGCAT 15 | ATTTGGCGCAATAGCGGGTTTCATAGAAAATGGATGGGAGGGAATGGTGGATGGTTGGTACGGTTTCAGGCATCAAAATT 16 | CTGAGGGAAGAGGACAAGCAGCAGATCTCAAAAGCACTCAAGCAGCAATCGATCAAATCAATGGGAAGCTGAATCGATTG 17 | ATCGGAAAAACCAACGAGAAATTCCATCAGATTGAAAAAGAATTCTCAGAAGTAGAAGGAAGAGTTCAAGACCTTGAGAA 18 | ATATGTTGAGGACACTAAAATAGATCTCTGGTCATACAACGCGGAGCTTCTTGTTGCCCTGGAGAACCAACATACGATTG 19 | ACCTAACTGACTCAGAAATGAACAAACTGTTTGAAAAAACAAAGAAGCAACTGAGGGAAAATGCTGAGGATATGGGAAAT 20 | GGTTGTTTCAAAATATACCACAAATGTGACAATGCCTGCATAGGATCAATAAGAAATGAAACTTATGACCACAATGTGTA 21 | CAGGGATGAAGCATTAAACAACCGGTTCCAGATCAAGGGAGTTGAGCTGAAGTCAGGGTACAAAGATTGGATCCTATGGA 22 | TTTCCTTTGCCATGTCATGTTTTTTGCTTTGTATTGCTTTGTTGGGGTTCATCATGTGGGCCTGCCAAAAGGGCAACATT 23 | AGATGCAACATTTGCATTTGAGTGCATTAATTAAAAAC 24 | -------------------------------------------------------------------------------- /ingest/build-configs/manual-upload/Snakefile: -------------------------------------------------------------------------------- 1 | """ 2 | This handles uploads of files downloaded from GISAID to AWS S3. 3 | """ 4 | import os.path 5 | 6 | 7 | # Use default configuration values. Extend with Snakemake's --configfile/--config options. 8 | configfile: os.path.join(workflow.basedir, "config.yaml") 9 | 10 | # Use custom configuration from analysis directory (i.e. working dir), if any. 11 | if os.path.exists("config.yaml"): 12 | configfile: "config.yaml" 13 | 14 | 15 | wildcard_constraints: 16 | # Constrain GISAID pair names to YYYY-MM-DD-N 17 | gisaid_pair = r'\d{4}-\d{2}-\d{2}(-\d+)?' 18 | 19 | 20 | rule upload_gisaid_pairs: 21 | input: 22 | upload_flags=expand([ 23 | "data/{gisaid_pair}-metadata.upload", 24 | "data/{gisaid_pair}-sequences.upload", 25 | ], gisaid_pair=config["gisaid_pairs"]), 26 | 27 | 28 | rule upload_gisaid_metadata: 29 | input: 30 | metadata="data/{gisaid_pair}-metadata.xls", 31 | output: 32 | flag="data/{gisaid_pair}-metadata.upload", 33 | params: 34 | s3_dst=config["s3_dst"], 35 | shell: 36 | r""" 37 | {workflow.basedir}/../../vendored/upload-to-s3 \ 38 | --quiet \ 39 | {input.metadata:q} \ 40 | {params.s3_dst:q}/{wildcards.gisaid_pair}-metadata.xls.zst \ 41 | 2>&1 | tee {output.flag:q} 42 | """ 43 | 44 | 45 | rule upload_gisaid_sequences: 46 | input: 47 | sequences="data/{gisaid_pair}-sequences.fasta", 48 | output: 49 | flag="data/{gisaid_pair}-sequences.upload", 50 | params: 51 | s3_dst=config["s3_dst"], 52 | shell: 53 | r""" 54 | {workflow.basedir}/../../vendored/upload-to-s3 \ 55 | --quiet \ 56 | {input.sequences:q} \ 57 | {params.s3_dst:q}/{wildcards.gisaid_pair}-sequences.fasta.zst \ 58 | 2>&1 | tee {output.flag:q} 59 | """ 60 | -------------------------------------------------------------------------------- /scripts/sequence_export.py: -------------------------------------------------------------------------------- 1 | # this code export the sequence json needed for the old deprecated auspice 2 | import argparse, json 3 | from random import sample 4 | import numpy as np 5 | from Bio import Phylo, AlignIO 6 | 7 | 8 | if __name__ == '__main__': 9 | parser = argparse.ArgumentParser( 10 | description="", 11 | formatter_class=argparse.ArgumentDefaultsHelpFormatter 12 | ) 13 | 14 | parser.add_argument('--tree', type=str, required=True, 15 | help="newick file with the tree") 16 | parser.add_argument('--alignment', type=str, help="json file with ancestral reconstructions. assumes full sequence") 17 | parser.add_argument('--translations', nargs='+', help="fasta files with ancestral translations") 18 | parser.add_argument('--genes', nargs='+', help="names of the genes corresponding to the translations") 19 | parser.add_argument('--output', type=str, help="names of files to write selected strains to, one for each gene") 20 | 21 | args = parser.parse_args() 22 | 23 | with open(args.alignment) as fh: 24 | nuc = json.load(fh)["nodes"] 25 | 26 | T = Phylo.read(args.tree, 'newick') 27 | root_seq=nuc[T.root.name]['sequence'] 28 | sequence_json = {'root':{'nuc':root_seq}} 29 | for n in T.find_clades(order='preorder'): 30 | sequence_json[n.name]={'nuc':{p:d for p,a,d in zip(range(len(root_seq)), root_seq, nuc[n.name]['sequence']) if a!=d}} 31 | 32 | for gene, fname in zip(args.genes, args.translations): 33 | aln = {s.name:str(s.seq) for s in AlignIO.read(fname, 'fasta')} 34 | root_seq = aln[T.root.name] 35 | sequence_json['root'][gene]=root_seq 36 | for n in T.find_clades(order='preorder'): 37 | sequence_json[n.name][gene] = {p:d for p,a,d in zip(range(len(root_seq)), root_seq, aln[n.name]) if a!=d} 38 | 39 | with open(args.output, 'wt') as fh: 40 | json.dump(sequence_json, fh) 41 | 42 | -------------------------------------------------------------------------------- /ingest/vendored/trigger: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -euo pipefail 3 | 4 | : "${PAT_GITHUB_DISPATCH:=}" 5 | 6 | github_repo="${1:?A GitHub repository with owner and repository name is required as the first argument.}" 7 | event_type="${2:?An event type is required as the second argument.}" 8 | shift 2 9 | 10 | if [[ $# -eq 0 && -z $PAT_GITHUB_DISPATCH ]]; then 11 | cat >&2 <<. 12 | You must specify options to curl for your GitHub credentials. For example, you 13 | can specify your GitHub username, and will be prompted for your password: 14 | 15 | $0 $github_repo $event_type --user 16 | 17 | Be sure to enter a personal access token¹ as your password since GitHub has 18 | discontinued password authentication to the API starting on November 13, 2020². 19 | 20 | You can also store your credentials or a personal access token in a netrc 21 | file³: 22 | 23 | machine api.github.com 24 | login 25 | password 26 | 27 | and then tell curl to use it: 28 | 29 | $0 $github_repo $event_type --netrc 30 | 31 | which will then not require you to type your password every time. 32 | 33 | ¹ https://help.github.com/en/github/authenticating-to-github/creating-a-personal-access-token-for-the-command-line 34 | ² https://docs.github.com/en/rest/overview/other-authentication-methods#via-username-and-password 35 | ³ https://ec.haxx.se/usingcurl/usingcurl-netrc 36 | . 37 | exit 1 38 | fi 39 | 40 | auth=':' 41 | if [[ -n $PAT_GITHUB_DISPATCH ]]; then 42 | auth="Authorization: Bearer ${PAT_GITHUB_DISPATCH}" 43 | fi 44 | 45 | if curl -fsS "https://api.github.com/repos/${github_repo}/dispatches" \ 46 | -H 'Accept: application/vnd.github.v3+json' \ 47 | -H 'Content-Type: application/json' \ 48 | -H "$auth" \ 49 | -d '{"event_type":"'"$event_type"'"}' \ 50 | "$@" 51 | then 52 | echo "Successfully triggered $event_type" 53 | else 54 | echo "Request failed" >&2 55 | exit 1 56 | fi 57 | -------------------------------------------------------------------------------- /ingest/scripts/annotate-with-gihsn: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Annotates whether the GISAID record is part of the Global Influenza Hospital 4 | Surveillance Network (GIHSN) by checking for "GIHSN" in the strain name. 5 | """ 6 | import argparse 7 | import re 8 | import sys 9 | from pathlib import Path 10 | from typing import Iterable 11 | from augur.io.json import dump_ndjson, load_ndjson 12 | 13 | SCRIPT_NAME = Path(sys.argv[0]).stem 14 | 15 | def print_err(*args): 16 | print(f"[{SCRIPT_NAME}] ", *args, file=sys.stderr) 17 | 18 | GIHSN_PATTERN = r"(GIHSN)" 19 | 20 | 21 | def annotate_records_with_gihsn(records: Iterable, strain_field: str, gihsn_field: str) -> Iterable: 22 | """ 23 | Adds the *gihsn_field* to the *records*, with value "True" if it has the 24 | "GIHSN" substring in the *strain_field*. 25 | 26 | Yields the modified records. 27 | """ 28 | for record in records: 29 | record = record.copy() 30 | strain = record.get(strain_field) 31 | 32 | if strain is None: 33 | raise Exception(f"Records must have the expected strain field: {strain_field!r}") 34 | 35 | record[gihsn_field] = str(bool(re.search(GIHSN_PATTERN, strain))) 36 | 37 | yield record 38 | 39 | 40 | if __name__ == '__main__': 41 | parser = argparse.ArgumentParser(description=__doc__) 42 | 43 | parser.add_argument("--strain-field", default="strain", 44 | help="The record field containing the GISAID strain name") 45 | parser.add_argument("--gihsn-field", default="gihsn_sample", 46 | help="The name of the new field to add to the record to indicate if it " + \ 47 | "is part of the Global Influenza Hospital Surveillance Network (GIHSN)") 48 | 49 | args = parser.parse_args() 50 | 51 | records = load_ndjson(sys.stdin) 52 | modified_records = annotate_records_with_gihsn(records, args.strain_field, args.gihsn_field) 53 | dump_ndjson(modified_records) 54 | -------------------------------------------------------------------------------- /config/h3n2/ha/reference.fasta: -------------------------------------------------------------------------------- 1 | >CY163680.1 Influenza A virus (A/Wisconsin/67/2005(H3N2)) hemagglutinin (HA) gene, complete cds 2 | GGATAATTCTATTAACCATGAAGACTATCATTGCTTTGAGCTACATTCTATGTCTGGTTTTCGCTCAAAA 3 | ACTTCCCGGAAATGACAACAGCACGGCAACGCTGTGCCTTGGGCACCATGCAGTACCAAACGGAACGATA 4 | GTGAAAACAATCACGAATGACCAAATTGAAGTTACTAATGCTACTGAGCTGGTTCAGAGTTCCTCAACAG 5 | GTGGAATATGCGACAGTCCTCATCAGATCCTTGATGGAGAAAACTGCACACTAATAGATGCTCTATTGGG 6 | AGACCCTCAGTGTGATGGCTTCCAAAATAAGAAATGGGACCTTTTTGTTGAACGCAGCAAAGCCTACAGC 7 | AACTGTTACCCTTATGATGTGCCGGATTATGCCTCCCTTAGGTCACTAGTTGCCTCATCCGGCACACTGG 8 | AGTTTAACGATGAAAGCTTCAATTGGACTGGAGTCACTCAAAATGGAACAAGCTCTTCTTGCAAAAGGAG 9 | ATCTAATAACAGTTTCTTTAGTAGATTGAATTGGTTGACCCACTTAAAATTCAAATACCCAGCATTGAAC 10 | GTGACTATGCCAAACAATGAAAAATTTGACAAATTGTACATTTGGGGGGTTCACCACCCGGTTACGGACA 11 | ATGACCAAATCTTCCTGTATGCTCAAGCATCAGGAAGAATCACAGTCTCTACCAAAAGAAGCCAACAAAC 12 | TGTAATCCCGAATATCGGATCTAGACCCAGAATAAGGAATATCCCCAGCAGAATAAGCATCTATTGGACA 13 | ATAGTAAAACCGGGAGACATACTTTTGATTAACAGCACAGGGAATCTAATTGCTCCTAGGGGTTACTTCA 14 | AAATACGAAGTGGGAAAAGCTCAATAATGAGATCAGATGCACCCATTGGCAAATGCAATTCTGAATGCAT 15 | CACTCCAAATGGAAGCATTCCCAATGACAAACCATTTCAAAATGTAAACAGGATCACATATGGGGCCTGT 16 | CCCAGATATGTTAAGCAAAACACTCTGAAATTGGCAACAGGGATGCGAAATGTACCAGAGAAACAAACTA 17 | GAGGCATATTTGGCGCAATCGCGGGTTTCATAGAAAATGGTTGGGAGGGAATGGTGGATGGTTGGTACGG 18 | TTTCAGGCATCAAAATTCTGAGGGAATAGGACAAGCAGCAGATCTCAAAAGCACTCAAGCAGCAATCAAT 19 | CAAATCAATGGGAAGCTGAATAGGTTGATCGGGAAAACCAACGAGAAATTCCATCAGATTGAAAAAGAAT 20 | TCTCAGAAGTAGAAGGGAGAATTCAGGACCTCGAGAAATATGTTGAGGACACTAAAATAGATCTCTGGTC 21 | ATACAACGCGGAGCTTCTTGTTGCCCTGGAGAACCAACATACAATTGATCTAACTGACTCAGAAATGAAC 22 | AAACTGTTTGAAAGAACAAAGAAGCAACTGAGGGAAAATGCTGAGGATATGGGCAATGGTTGTTTCAAAA 23 | TATACCACAAATGTGACAATGCCTGCATAGGATCAATCAGAAATGGAACTTATGACCATGATGTATACAG 24 | AGATGAAGCATTAAACAACCGGTTCCAGATCAAAGGCGTTGAGCTGAAGTCAGGATACAAAGATTGGATC 25 | CTATGGATTTCCTTTGCCATATCATGTTTTTTGCTTTGTGTTGCTTTGTTGGGGTTCATCATGTGGGCCT 26 | GCCAAAAAGGCAACATTAGGTGCAACATTTGCATTTGAGTGCATTAATTAAAAACAC 27 | -------------------------------------------------------------------------------- /nextclade/dataset_config/h3n2/ha/CY163680/reference.fasta: -------------------------------------------------------------------------------- 1 | >CY163680.1 Influenza A virus (A/Wisconsin/67/2005(H3N2)) hemagglutinin (HA) gene, complete cds 2 | GGATAATTCTATTAACCATGAAGACTATCATTGCTTTGAGCTACATTCTATGTCTGGTTTTCGCTCAAAA 3 | ACTTCCCGGAAATGACAACAGCACGGCAACGCTGTGCCTTGGGCACCATGCAGTACCAAACGGAACGATA 4 | GTGAAAACAATCACGAATGACCAAATTGAAGTTACTAATGCTACTGAGCTGGTTCAGAGTTCCTCAACAG 5 | GTGGAATATGCGACAGTCCTCATCAGATCCTTGATGGAGAAAACTGCACACTAATAGATGCTCTATTGGG 6 | AGACCCTCAGTGTGATGGCTTCCAAAATAAGAAATGGGACCTTTTTGTTGAACGCAGCAAAGCCTACAGC 7 | AACTGTTACCCTTATGATGTGCCGGATTATGCCTCCCTTAGGTCACTAGTTGCCTCATCCGGCACACTGG 8 | AGTTTAACGATGAAAGCTTCAATTGGACTGGAGTCACTCAAAATGGAACAAGCTCTTCTTGCAAAAGGAG 9 | ATCTAATAACAGTTTCTTTAGTAGATTGAATTGGTTGACCCACTTAAAATTCAAATACCCAGCATTGAAC 10 | GTGACTATGCCAAACAATGAAAAATTTGACAAATTGTACATTTGGGGGGTTCACCACCCGGTTACGGACA 11 | ATGACCAAATCTTCCTGTATGCTCAAGCATCAGGAAGAATCACAGTCTCTACCAAAAGAAGCCAACAAAC 12 | TGTAATCCCGAATATCGGATCTAGACCCAGAATAAGGAATATCCCCAGCAGAATAAGCATCTATTGGACA 13 | ATAGTAAAACCGGGAGACATACTTTTGATTAACAGCACAGGGAATCTAATTGCTCCTAGGGGTTACTTCA 14 | AAATACGAAGTGGGAAAAGCTCAATAATGAGATCAGATGCACCCATTGGCAAATGCAATTCTGAATGCAT 15 | CACTCCAAATGGAAGCATTCCCAATGACAAACCATTTCAAAATGTAAACAGGATCACATATGGGGCCTGT 16 | CCCAGATATGTTAAGCAAAACACTCTGAAATTGGCAACAGGGATGCGAAATGTACCAGAGAAACAAACTA 17 | GAGGCATATTTGGCGCAATCGCGGGTTTCATAGAAAATGGTTGGGAGGGAATGGTGGATGGTTGGTACGG 18 | TTTCAGGCATCAAAATTCTGAGGGAATAGGACAAGCAGCAGATCTCAAAAGCACTCAAGCAGCAATCAAT 19 | CAAATCAATGGGAAGCTGAATAGGTTGATCGGGAAAACCAACGAGAAATTCCATCAGATTGAAAAAGAAT 20 | TCTCAGAAGTAGAAGGGAGAATTCAGGACCTCGAGAAATATGTTGAGGACACTAAAATAGATCTCTGGTC 21 | ATACAACGCGGAGCTTCTTGTTGCCCTGGAGAACCAACATACAATTGATCTAACTGACTCAGAAATGAAC 22 | AAACTGTTTGAAAGAACAAAGAAGCAACTGAGGGAAAATGCTGAGGATATGGGCAATGGTTGTTTCAAAA 23 | TATACCACAAATGTGACAATGCCTGCATAGGATCAATCAGAAATGGAACTTATGACCATGATGTATACAG 24 | AGATGAAGCATTAAACAACCGGTTCCAGATCAAAGGCGTTGAGCTGAAGTCAGGATACAAAGATTGGATC 25 | CTATGGATTTCCTTTGCCATATCATGTTTTTTGCTTTGTGTTGCTTTGTTGGGGTTCATCATGTGGGCCT 26 | GCCAAAAAGGCAACATTAGGTGCAACATTTGCATTTGAGTGCATTAATTAAAAACAC 27 | -------------------------------------------------------------------------------- /nextclade/dataset_config/h3n2/pa/pathogen.json: -------------------------------------------------------------------------------- 1 | { 2 | "shortcuts": [ 3 | "flu_h3n2_pa", 4 | "nextstrain/flu/h3n2/pa" 5 | ], 6 | "phenotypeData":[ 7 | { 8 | "name": "PAI", 9 | "nameFriendly": "PAI", 10 | "description": "This column displays a score associated with reduced susceptibility to polymerase inhibitors. The score is a weighted sum of known substitutions (x1) or major substitutions (x2), while substitutions at positions that confer resistance but are to amino acids not previously described to confer resistance count x0.5.", 11 | "cds": "PA", 12 | "aaRange": { 13 | "begin":0, 14 | "end": 460 15 | }, 16 | "data": [ 17 | { 18 | "name": "PAI_markers", 19 | "weight": 1, 20 | "locations": { 21 | "27": {"P":1, "default":0.5}, 22 | "22": {"G":1, "K":2, "R": 2, "default":0.5}, 23 | "33": {"R":1, "default":0.5}, 24 | "35": {"V":2, "default":0.5}, 25 | "36": {"T":2, "default":0.5}, 26 | "37": {"F":2, "L":1, "M":2, "N":2, "S":1, "T":2, "V":1, "default":0.5}, 27 | "118": {"D":1, "default":0.5}, 28 | "197": {"K":1, "default":0.5}, 29 | "198": {"G":1, "default":0.5} 30 | } 31 | } 32 | ] 33 | } 34 | ], 35 | "mutLabels": { 36 | "aaMutLabelMap": { 37 | "PA:23G":["Baloxavir"], 38 | "PA:23K":["Baloxavir"], 39 | "PA:23R":["Baloxavir"], 40 | "PA:28P":["Baloxavir"], 41 | "PA:34R":["Baloxavir"], 42 | "PA:36V":["Baloxavir"], 43 | "PA:37T":["Baloxavir"], 44 | "PA:38F":["Baloxavir"], 45 | "PA:38L":["Baloxavir"], 46 | "PA:38M":["Baloxavir"], 47 | "PA:38N":["Baloxavir"], 48 | "PA:38S":["Baloxavir"], 49 | "PA:38T":["Baloxavir"], 50 | "PA:38V":["Baloxavir"], 51 | "PA:119D":["Baloxavir"], 52 | "PA:198K":["Baloxavir"], 53 | "PA:199G":["Baloxavir"] 54 | } 55 | } 56 | } 57 | --------------------------------------------------------------------------------