├── scripts
    ├── __init__.py
    ├── sha256sum
    ├── intersect_items.py
    ├── join_tables.py
    ├── flu_regions.py
    ├── export_titers_for_auspice_v1.py
    ├── table_to_node_data.py
    ├── xls2csv.py
    ├── prune_reference.py
    ├── sanitize_trees.py
    ├── import_tip_clades.py
    ├── glyc.py
    └── sequence_export.py
├── config
    ├── h3n2
    │   ├── ha
    │   │   ├── exclude-sites.txt
    │   │   ├── prioritized_seqs_file.tsv
    │   │   ├── genemap.gff
    │   │   ├── emerging_haplotypes.tsv
    │   │   └── reference.fasta
    │   ├── na
    │   │   ├── genemap.gff
    │   │   ├── prioritized_seqs_file.tsv
    │   │   └── reference.fasta
    │   ├── mp
    │   │   └── prioritized_seqs_file.tsv
    │   ├── np
    │   │   └── prioritized_seqs_file.tsv
    │   ├── ns
    │   │   └── prioritized_seqs_file.tsv
    │   ├── pa
    │   │   └── prioritized_seqs_file.tsv
    │   ├── pb1
    │   │   └── prioritized_seqs_file.tsv
    │   └── pb2
    │   │   └── prioritized_seqs_file.tsv
    ├── vic
    │   ├── ha
    │   │   ├── exclude-sites.txt
    │   │   ├── prioritized_seqs_file.tsv
    │   │   ├── emerging_haplotypes.tsv
    │   │   └── genemap.gff
    │   ├── mp
    │   │   └── prioritized_seqs_file.tsv
    │   ├── na
    │   │   ├── prioritized_seqs_file.tsv
    │   │   ├── genemap.gff
    │   │   └── reference.fasta
    │   ├── np
    │   │   └── prioritized_seqs_file.tsv
    │   ├── ns
    │   │   └── prioritized_seqs_file.tsv
    │   ├── pa
    │   │   └── prioritized_seqs_file.tsv
    │   ├── pb1
    │   │   └── prioritized_seqs_file.tsv
    │   ├── pb2
    │   │   └── prioritized_seqs_file.tsv
    │   ├── vaccine.json
    │   └── outliers.txt
    ├── yam
    │   ├── ha
    │   │   ├── exclude-sites.txt
    │   │   ├── clades.tsv
    │   │   ├── subclades.tsv
    │   │   ├── genemap.gff
    │   │   └── reference.fasta
    │   ├── na
    │   │   ├── genemap.gff
    │   │   ├── subclades.tsv
    │   │   └── reference.fasta
    │   ├── outliers.txt
    │   ├── vaccine.json
    │   └── reference_strains.txt
    ├── h1n1pdm
    │   ├── ha
    │   │   ├── exclude-sites.txt
    │   │   ├── prioritized_seqs_file.tsv
    │   │   ├── genemap.gff
    │   │   └── emerging_haplotypes.tsv
    │   ├── mp
    │   │   └── prioritized_seqs_file.tsv
    │   ├── na
    │   │   ├── prioritized_seqs_file.tsv
    │   │   ├── genemap.gff
    │   │   └── reference.fasta
    │   ├── np
    │   │   └── prioritized_seqs_file.tsv
    │   ├── ns
    │   │   └── prioritized_seqs_file.tsv
    │   ├── pa
    │   │   └── prioritized_seqs_file.tsv
    │   ├── pb1
    │   │   └── prioritized_seqs_file.tsv
    │   └── pb2
    │   │   └── prioritized_seqs_file.tsv
    ├── clades_for_titer_plots_h1n1pdm.txt
    ├── clades_for_titer_plots_vic.txt
    ├── clades_for_titer_plots_h3n2.txt
    ├── subclades_for_titer_plots_vic.txt
    ├── references_to_exclude_in_titer_plots_h3n2.txt
    ├── colors.tsv
    ├── subclades_for_titer_plots_h1n1pdm.txt
    ├── references_to_exclude_in_titer_plots_h1n1pdm.txt
    ├── hi_titer_count_colors.tsv
    ├── subclades_for_titer_plots_h3n2.txt
    ├── references_for_titer_plots
    │   ├── h3n2
    │   │   ├── egg_fra.txt
    │   │   ├── cell_fra.txt
    │   │   ├── egg_hi.txt
    │   │   └── cell_hi.txt
    │   ├── vic
    │   │   ├── egg_hi.txt
    │   │   └── cell_hi.txt
    │   └── h1n1pdm
    │   │   ├── egg_hi.txt
    │   │   └── cell_hi.txt
    ├── references_to_exclude_in_titer_plots_vic.txt
    ├── references_to_include_in_titer_plots_h1n1pdm.txt
    ├── mask_config.tsv
    ├── nextstrain_clades_yam_ha.tsv
    ├── references_to_include_in_titer_plots_h3n2.txt
    ├── frequency_weights_by_region.json
    ├── distance_maps
    │   ├── h3n2
    │   │   ├── ha
    │   │   │   ├── koel.json
    │   │   │   ├── welsh_epitope_sites.json
    │   │   │   ├── bush_epitope_A.json
    │   │   │   ├── bush_epitope_E.json
    │   │   │   ├── bush_epitope_B.json
    │   │   │   ├── bush_epitope_C.json
    │   │   │   ├── bush_epitope_D.json
    │   │   │   ├── luksza.json
    │   │   │   └── shih.json
    │   │   └── na
    │   │   │   └── munoz.json
    │   └── h1n1pdm
    │   │   └── ha
    │   │       └── canton.json
    ├── distance_maps.tsv
    ├── colors_for_titer_plots_vic.tsv
    ├── nextstrain_clades_vic_ha.tsv
    ├── references_to_include_in_titer_plots_vic.txt
    ├── colors_for_titer_plots_h1n1pdm.tsv
    ├── colors_for_titer_plots_h3n2.tsv
    ├── h2n2
    │   └── reference_strains.txt
    ├── h1n1
    │   └── reference_strains.txt
    ├── nextstrain_clades_h1n1pdm_ha.tsv
    └── nextstrain_clades_h3n2_ha.tsv
├── profiles
    ├── allflu
    │   ├── vic_include.txt
    │   ├── yam_include.txt
    │   ├── h1n1pdm_include.txt
    │   ├── config.yaml
    │   └── h3n2_include.txt
    ├── gisaid
    │   ├── config.yaml
    │   └── builds.yaml
    ├── europe
    │   └── config.yaml
    ├── example
    │   ├── config.yaml
    │   └── builds.yaml
    ├── nextstrain
    │   └── config.yaml
    ├── nextflu-private
    │   ├── README.md
    │   ├── vic
    │   │   └── ha
    │   │   │   └── clades.tsv
    │   └── deploy.smk
    ├── ci
    │   ├── prepare_data.smk
    │   └── builds.yaml
    ├── scicore
    │   ├── submit.sh
    │   ├── cluster.json
    │   └── config.yaml
    ├── nextstrain-public
    │   ├── deploy.smk
    │   └── rename.smk
    ├── nextclade.yaml
    ├── full-trees
    │   └── h1n1pdm_titer_strains.txt
    ├── neut-library.yaml
    └── nextflu-private-forecasts
    │   └── rename.smk
├── nextclade
    ├── dataset_config
    │   ├── yam
    │   │   ├── includes.txt
    │   │   └── ha
    │   │   │   └── JN993010
    │   │   │       ├── annotation.gff
    │   │   │       ├── pathogen.json
    │   │   │       ├── README.md
    │   │   │       └── reference.fasta
    │   ├── h3n2
    │   │   ├── na
    │   │   │   └── EPI1857215
    │   │   │   │   ├── annotation.gff
    │   │   │   │   └── reference.fasta
    │   │   ├── ha
    │   │   │   ├── CY163680
    │   │   │   │   ├── annotation.gff
    │   │   │   │   ├── pathogen.json
    │   │   │   │   └── reference.fasta
    │   │   │   └── EPI1857216
    │   │   │   │   ├── annotation.gff
    │   │   │   │   └── reference.fasta
    │   │   ├── pb2
    │   │   │   ├── annotation.gff
    │   │   │   └── README.md
    │   │   ├── np
    │   │   │   ├── annotation.gff
    │   │   │   ├── README.md
    │   │   │   └── reference.fasta
    │   │   ├── pb1
    │   │   │   ├── annotation.gff
    │   │   │   └── README.md
    │   │   ├── mp
    │   │   │   ├── annotation.gff
    │   │   │   ├── README.md
    │   │   │   └── reference.fasta
    │   │   ├── ns
    │   │   │   ├── README.md
    │   │   │   ├── annotation.gff
    │   │   │   └── reference.fasta
    │   │   └── pa
    │   │   │   ├── annotation.gff
    │   │   │   ├── README.md
    │   │   │   └── pathogen.json
    │   ├── vic
    │   │   ├── ha
    │   │   │   ├── KX058884
    │   │   │   │   ├── annotation.gff
    │   │   │   │   └── pathogen.json
    │   │   │   └── EPI1926632
    │   │   │   │   ├── annotation.gff
    │   │   │   │   ├── pathogen.json
    │   │   │   │   └── README.md
    │   │   ├── na
    │   │   │   └── CY073894
    │   │   │   │   ├── annotation.gff
    │   │   │   │   └── reference.fasta
    │   │   ├── includes.txt
    │   │   └── pa
    │   │   │   ├── README.md
    │   │   │   └── pathogen.json
    │   └── h1n1pdm
    │   │   ├── ha
    │   │       ├── CY121680
    │   │       │   ├── annotation.gff
    │   │       │   ├── pathogen.json
    │   │       │   └── reference.fasta
    │   │       └── MW626062
    │   │       │   ├── annotation.gff
    │   │       │   └── pathogen.json
    │   │   ├── na
    │   │       └── MW626056
    │   │       │   ├── annotation.gff
    │   │       │   └── reference.fasta
    │   │   ├── pb1
    │   │       ├── annotation.gff
    │   │       └── README.md
    │   │   ├── pb2
    │   │       ├── annotation.gff
    │   │       └── README.md
    │   │   ├── np
    │   │       ├── annotation.gff
    │   │       ├── README.md
    │   │       └── reference.fasta
    │   │   ├── mp
    │   │       ├── README.md
    │   │       ├── annotation.gff
    │   │       └── reference.fasta
    │   │   ├── ns
    │   │       ├── README.md
    │   │       ├── annotation.gff
    │   │       └── reference.fasta
    │   │   └── pa
    │   │       ├── annotation.gff
    │   │       ├── README.md
    │   │       └── pathogen.json
    ├── config
    │   ├── human-nai-marker-table_for-publication_final_20240918.pdf
    │   ├── pa-marker-who-table_07-08-2024_updated_final-version.pdf
    │   ├── vic
    │   │   └── ha
    │   │   │   └── KX058884
    │   │   │       └── founder_sequences_SigPep.fasta
    │   ├── h1n1pdm
    │   │   └── ha
    │   │   │   ├── CY121680
    │   │   │       └── founder_sequences_SigPep.fasta
    │   │   │   └── MW626062
    │   │   │       └── founder_sequences_SigPep.fasta
    │   ├── auspice_config.json
    │   └── h3n2
    │   │   └── ha
    │   │       ├── CY163680
    │   │           └── founder_sequences_SigPep.fasta
    │   │       └── EPI1857216
    │   │           └── founder_sequences_SigPep.fasta
    └── scripts
    │   └── extract_founder_sequences.py
├── .gitattributes
├── images
    ├── 03-download-metadata.png
    ├── 04-download-sequences.png
    ├── 01-search-gisaid-for-h3n2.png
    └── 02-gisaid-search-results.png
├── ingest
    ├── vendored
    │   ├── s3-object-exists
    │   ├── .shellcheckrc
    │   ├── .github
    │   │   ├── workflows
    │   │   │   ├── ci.yaml
    │   │   │   └── pre-commit.yaml
    │   │   ├── pull_request_template.md
    │   │   └── dependabot.yml
    │   ├── sha256sum
    │   ├── .gitrepo
    │   ├── notify-on-job-fail
    │   ├── notify-on-diff
    │   ├── notify-on-job-start
    │   ├── .pre-commit-config.yaml
    │   ├── cloudfront-invalidate
    │   ├── trigger-on-new-data
    │   ├── download-from-s3
    │   └── trigger
    ├── build-configs
    │   ├── manual-upload
    │   │   ├── config.yaml
    │   │   └── Snakefile
    │   └── nextstrain-automation
    │   │   └── config.yaml
    ├── defaults
    │   ├── strain_name_fixes.tsv
    │   ├── h3n2
    │   │   └── prioritized_strain_ids.tsv
    │   └── final_annotations.tsv
    ├── scripts
    │   ├── lowercase-fields
    │   ├── dedup-by-gisaid-id
    │   └── annotate-with-gihsn
    └── Snakefile
├── example_data
    └── haplotypes.tsv
├── workflow
    ├── envs
    │   ├── notebook.yaml
    │   └── nextstrain.yaml
    └── snakemake_rules
    │   └── common.smk
├── nextstrain-pathogen.yaml
├── .github
    ├── workflows
    │   ├── ci.yaml
    │   ├── run-public-builds.yaml
    │   ├── run-nextflu-private-builds.yaml
    │   ├── run-private-nextflu-builds.yaml
    │   ├── run-nextclade.yaml
    │   └── ingest.yaml
    └── dependabot.yml
├── notebooks
    └── README.md
├── source-data
    ├── 2018_South_America_flu_vaccination_coverage.tsv
    └── 2018_Europe_flu_vaccination_coverage.tsv
├── models
    ├── welsh_escape.json
    ├── ne_star-lbi.json
    ├── cTiter_x-ne_star.json
    ├── fra_cTiter_x-ne_star.json
    ├── cell_fra_cTiter_x-ne_star.json
    ├── human_cell_fra_cTiter_x-ne_star.json
    └── human_cell_hi_cTiter_x-ne_star.json
├── .gitignore
├── flu-forecasting
    └── scripts
    │   ├── merge_weighted_distances_to_future.py
    │   └── calculate_clade_frequency_forecasts.py
└── zoltar
    └── project.json


/scripts/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/config/h3n2/ha/exclude-sites.txt:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/config/vic/ha/exclude-sites.txt:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/config/yam/ha/exclude-sites.txt:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/profiles/allflu/vic_include.txt:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/profiles/allflu/yam_include.txt:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/profiles/allflu/h1n1pdm_include.txt:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/config/h1n1pdm/ha/exclude-sites.txt:
--------------------------------------------------------------------------------
1 | 618
2 | 


--------------------------------------------------------------------------------
/config/vic/ha/prioritized_seqs_file.tsv:
--------------------------------------------------------------------------------
1 | strain	accession
2 | 


--------------------------------------------------------------------------------
/config/vic/mp/prioritized_seqs_file.tsv:
--------------------------------------------------------------------------------
1 | strain	accession
2 | 


--------------------------------------------------------------------------------
/config/vic/na/prioritized_seqs_file.tsv:
--------------------------------------------------------------------------------
1 | strain	accession
2 | 


--------------------------------------------------------------------------------
/config/vic/np/prioritized_seqs_file.tsv:
--------------------------------------------------------------------------------
1 | strain	accession
2 | 


--------------------------------------------------------------------------------
/config/vic/ns/prioritized_seqs_file.tsv:
--------------------------------------------------------------------------------
1 | strain	accession
2 | 


--------------------------------------------------------------------------------
/config/vic/pa/prioritized_seqs_file.tsv:
--------------------------------------------------------------------------------
1 | strain	accession
2 | 


--------------------------------------------------------------------------------
/config/vic/pb1/prioritized_seqs_file.tsv:
--------------------------------------------------------------------------------
1 | strain	accession
2 | 


--------------------------------------------------------------------------------
/config/vic/pb2/prioritized_seqs_file.tsv:
--------------------------------------------------------------------------------
1 | strain	accession
2 | 


--------------------------------------------------------------------------------
/config/h1n1pdm/ha/prioritized_seqs_file.tsv:
--------------------------------------------------------------------------------
1 | strain	accession
2 | 


--------------------------------------------------------------------------------
/config/h1n1pdm/mp/prioritized_seqs_file.tsv:
--------------------------------------------------------------------------------
1 | strain	accession
2 | 


--------------------------------------------------------------------------------
/config/h1n1pdm/na/prioritized_seqs_file.tsv:
--------------------------------------------------------------------------------
1 | strain	accession
2 | 


--------------------------------------------------------------------------------
/config/h1n1pdm/np/prioritized_seqs_file.tsv:
--------------------------------------------------------------------------------
1 | strain	accession
2 | 


--------------------------------------------------------------------------------
/config/h1n1pdm/ns/prioritized_seqs_file.tsv:
--------------------------------------------------------------------------------
1 | strain	accession
2 | 


--------------------------------------------------------------------------------
/config/h1n1pdm/pa/prioritized_seqs_file.tsv:
--------------------------------------------------------------------------------
1 | strain	accession
2 | 


--------------------------------------------------------------------------------
/config/h1n1pdm/pb1/prioritized_seqs_file.tsv:
--------------------------------------------------------------------------------
1 | strain	accession
2 | 


--------------------------------------------------------------------------------
/config/h1n1pdm/pb2/prioritized_seqs_file.tsv:
--------------------------------------------------------------------------------
1 | strain	accession
2 | 


--------------------------------------------------------------------------------
/nextclade/dataset_config/yam/includes.txt:
--------------------------------------------------------------------------------
1 | B/Phuket/3073/2013
2 | 


--------------------------------------------------------------------------------
/config/clades_for_titer_plots_h1n1pdm.txt:
--------------------------------------------------------------------------------
1 | 5a.1
2 | 5a.2a
3 | 5a.2a.1
4 | 


--------------------------------------------------------------------------------
/config/clades_for_titer_plots_vic.txt:
--------------------------------------------------------------------------------
1 | V1A.3a.2
2 | V1A.3a.1
3 | V1A.3/133R
4 | V1A.3/155A
5 | 


--------------------------------------------------------------------------------
/config/clades_for_titer_plots_h3n2.txt:
--------------------------------------------------------------------------------
1 | 2a.1
2 | 2a.1b
3 | 2a.3
4 | 2a.3a.1
5 | 2a.3b
6 | 2b
7 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Always use LF line endings even on Windows.
2 | * text=auto eol=lf
3 | *.png binary
4 | 


--------------------------------------------------------------------------------
/config/subclades_for_titer_plots_vic.txt:
--------------------------------------------------------------------------------
1 | C.3
2 | C.3.1
3 | C.5
4 | C.5.1
5 | C.5.6
6 | C.5.6.1
7 | C.5.7
8 | 


--------------------------------------------------------------------------------
/images/03-download-metadata.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nextstrain/seasonal-flu/HEAD/images/03-download-metadata.png


--------------------------------------------------------------------------------
/images/04-download-sequences.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nextstrain/seasonal-flu/HEAD/images/04-download-sequences.png


--------------------------------------------------------------------------------
/config/references_to_exclude_in_titer_plots_h3n2.txt:
--------------------------------------------------------------------------------
1 | A/Darwin/726/2019
2 | A/NorthCarolina/4/2016
3 | A/Yokohama/68/2020
4 | 


--------------------------------------------------------------------------------
/images/01-search-gisaid-for-h3n2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nextstrain/seasonal-flu/HEAD/images/01-search-gisaid-for-h3n2.png


--------------------------------------------------------------------------------
/images/02-gisaid-search-results.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nextstrain/seasonal-flu/HEAD/images/02-gisaid-search-results.png


--------------------------------------------------------------------------------
/profiles/gisaid/config.yaml:
--------------------------------------------------------------------------------
1 | cores: all
2 | use-conda: true
3 | conda-frontend: mamba
4 | configfile: profiles/gisaid/builds.yaml
5 | 


--------------------------------------------------------------------------------
/config/colors.tsv:
--------------------------------------------------------------------------------
1 | recency	last week	#d94701
2 | recency	last month	#fd8d3c
3 | recency	last quarter	#fdbe85
4 | recency	older	#feedde
5 | 


--------------------------------------------------------------------------------
/config/h3n2/na/genemap.gff:
--------------------------------------------------------------------------------
1 | ##gff-version 3
2 | ##sequence-region CY114383 1 1436
3 | CY114383	feature	gene	4	1413	.	+	.	gene_name="NA"
4 | 


--------------------------------------------------------------------------------
/config/vic/na/genemap.gff:
--------------------------------------------------------------------------------
1 | ##gff-version 3
2 | ##sequence-region CY018815.1 1 1516
3 | CY018815.1	feature	gene	31	1431	.	+	.	gene_name="NA"
4 | 


--------------------------------------------------------------------------------
/config/yam/na/genemap.gff:
--------------------------------------------------------------------------------
1 | ##gff-version 3
2 | ##sequence-region CY019709.1 1 1516
3 | CY019709.1	feature	gene	31	1431	.	+	.	gene_name="NA"
4 | 


--------------------------------------------------------------------------------
/config/h1n1pdm/na/genemap.gff:
--------------------------------------------------------------------------------
1 | ##gff-version 3
2 | ##sequence-region CY121682.1 1 1432
3 | CY121682.1	feature	gene	9	1418	.	+	.	gene_name="NA"
4 | 


--------------------------------------------------------------------------------
/config/subclades_for_titer_plots_h1n1pdm.txt:
--------------------------------------------------------------------------------
1 | C.1.9
2 | C.1.9.3
3 | D.3.1
4 | D.3.1:113K
5 | D.3.1:113K-139D-283K
6 | D.3.1:157L
7 | D.3.1:185T
8 | 


--------------------------------------------------------------------------------
/config/references_to_exclude_in_titer_plots_h1n1pdm.txt:
--------------------------------------------------------------------------------
1 | A/Brisbane/2/2018
2 | A/Brisbane/2/2018-egg
3 | A/Indiana/2/2020-egg
4 | A/Togo/881/2020-egg
5 | 


--------------------------------------------------------------------------------
/config/hi_titer_count_colors.tsv:
--------------------------------------------------------------------------------
1 | hi_titer_count	0	#999999
2 | hi_titer_count	1-5	#bdc9e1
3 | hi_titer_count	6-10	#74a9cf
4 | hi_titer_count	>10	#0570b0
5 | 


--------------------------------------------------------------------------------
/profiles/europe/config.yaml:
--------------------------------------------------------------------------------
1 | configfile:
2 |   - profiles/europe/builds.yaml
3 | 
4 | keep-going: False
5 | printshellcmds: True
6 | show-failed-logs: True
7 | 


--------------------------------------------------------------------------------
/config/subclades_for_titer_plots_h3n2.txt:
--------------------------------------------------------------------------------
 1 | J.2
 2 | J.2.2
 3 | J.2.3
 4 | J.2.4
 5 | J.2.5
 6 | J.2:135A-145N
 7 | J.2:135K
 8 | J.2:145N-261Q
 9 | J.2:223I
10 | J.2:8D
11 | 


--------------------------------------------------------------------------------
/profiles/allflu/config.yaml:
--------------------------------------------------------------------------------
1 | configfile:
2 |   - profiles/allflu/builds.yaml
3 | 
4 | cores: 8
5 | keep-going: False
6 | printshellcmds: True
7 | show-failed-logs: True
8 | 


--------------------------------------------------------------------------------
/profiles/example/config.yaml:
--------------------------------------------------------------------------------
1 | configfile:
2 |   - profiles/example/builds.yaml
3 | 
4 | cores: 8
5 | keep-going: False
6 | printshellcmds: True
7 | show-failed-logs: True
8 | 


--------------------------------------------------------------------------------
/config/references_for_titer_plots/h3n2/egg_fra.txt:
--------------------------------------------------------------------------------
1 | A/Croatia/10136RV/2023-egg # J.2, vaccine strain
2 | A/Singapore/GP20238/2024-egg # J.2.4
3 | A/Nepal/N042/2025-egg # J.2.4
4 | 


--------------------------------------------------------------------------------
/config/references_for_titer_plots/vic/egg_hi.txt:
--------------------------------------------------------------------------------
1 | B/Austria/1359417/2021-egg # C, vaccine strain
2 | B/Switzerland/329/2024-egg # C.5.6:199A
3 | B/Victoria/41/2024-egg # C.5.7
4 | 


--------------------------------------------------------------------------------
/profiles/nextstrain/config.yaml:
--------------------------------------------------------------------------------
1 | configfile:
2 |   - profiles/nextstrain/builds.yaml
3 | 
4 | cores: 8
5 | keep-going: True
6 | printshellcmds: True
7 | show-failed-logs: True
8 | 


--------------------------------------------------------------------------------
/config/h3n2/mp/prioritized_seqs_file.tsv:
--------------------------------------------------------------------------------
1 | strain	accession
2 | A/Croatia/10136RV/2023	EPI3250713
3 | A/Croatia/10136RV/2023-egg	EPI3356216
4 | A/DistrictOfColumbia/27/2023	EPI2990351
5 | 


--------------------------------------------------------------------------------
/config/h3n2/np/prioritized_seqs_file.tsv:
--------------------------------------------------------------------------------
1 | strain	accession
2 | A/Croatia/10136RV/2023	EPI3250711
3 | A/Croatia/10136RV/2023-egg	EPI3356214
4 | A/DistrictOfColumbia/27/2023	EPI2990339
5 | 


--------------------------------------------------------------------------------
/config/h3n2/ns/prioritized_seqs_file.tsv:
--------------------------------------------------------------------------------
1 | strain	accession
2 | A/Croatia/10136RV/2023	EPI3250712
3 | A/Croatia/10136RV/2023-egg	EPI3356215
4 | A/DistrictOfColumbia/27/2023	EPI2990345
5 | 


--------------------------------------------------------------------------------
/config/h3n2/pa/prioritized_seqs_file.tsv:
--------------------------------------------------------------------------------
1 | strain	accession
2 | A/Croatia/10136RV/2023	EPI3250714
3 | A/Croatia/10136RV/2023-egg	EPI3356217
4 | A/DistrictOfColumbia/27/2023	EPI2990331
5 | 


--------------------------------------------------------------------------------
/config/h3n2/pb1/prioritized_seqs_file.tsv:
--------------------------------------------------------------------------------
1 | strain	accession
2 | A/Croatia/10136RV/2023	EPI3250716
3 | A/Croatia/10136RV/2023-egg	EPI3356219
4 | A/DistrictOfColumbia/27/2023	EPI2990344
5 | 


--------------------------------------------------------------------------------
/config/h3n2/pb2/prioritized_seqs_file.tsv:
--------------------------------------------------------------------------------
1 | strain	accession
2 | A/Croatia/10136RV/2023	EPI3250715
3 | A/Croatia/10136RV/2023-egg	EPI3356218
4 | A/DistrictOfColumbia/27/2023	EPI2990348
5 | 


--------------------------------------------------------------------------------
/nextclade/config/human-nai-marker-table_for-publication_final_20240918.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nextstrain/seasonal-flu/HEAD/nextclade/config/human-nai-marker-table_for-publication_final_20240918.pdf


--------------------------------------------------------------------------------
/nextclade/config/pa-marker-who-table_07-08-2024_updated_final-version.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nextstrain/seasonal-flu/HEAD/nextclade/config/pa-marker-who-table_07-08-2024_updated_final-version.pdf


--------------------------------------------------------------------------------
/config/yam/na/subclades.tsv:
--------------------------------------------------------------------------------
 1 | clade	gene	site	alt
 2 | 42Q	NA	42	Q
 3 | 42R	NA	42	R
 4 | 42R	NA	186	R
 5 | 42R	NA	340	N
 6 | 68A	NA	68	A
 7 | 68A	NA	125	T
 8 | 68A	NA	463	D
 9 | 68A	NA	465	A
10 | 125N	NA	125	N
11 | 


--------------------------------------------------------------------------------
/config/references_to_exclude_in_titer_plots_vic.txt:
--------------------------------------------------------------------------------
1 | B/Brisbane/35/2018
2 | B/Brisbane/35/2018-egg
3 | B/Colorado/6/2017
4 | B/Colorado/6/2017-egg
5 | B/Iowa/6/2017
6 | B/Maryland/15/2016-egg
7 | B/Nigeria/3352/2018
8 | 


--------------------------------------------------------------------------------
/profiles/nextflu-private/README.md:
--------------------------------------------------------------------------------
1 | # Monthly reports on seasonal influenza evolution
2 | 
3 | [See the Nextstrain wiki](https://wiki.nextstrain.org/t/Seasonal+Influenza) for details about preparing monthly reports.
4 | 


--------------------------------------------------------------------------------
/config/references_to_include_in_titer_plots_h1n1pdm.txt:
--------------------------------------------------------------------------------
1 | A/Iowa/22/2020-egg
2 | A/Iowa/23/2020
3 | A/NorthCarolina/1/2021
4 | A/NorthCarolina/1/2021-egg
5 | A/Louisiana/1/2020
6 | A/Togo/881/2020
7 | A/Wisconsin/588/2019
8 | 


--------------------------------------------------------------------------------
/ingest/vendored/s3-object-exists:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | set -euo pipefail
3 | 
4 | url="${1#s3://}"
5 | bucket="${url%%/*}"
6 | key="${url#*/}"
7 | 
8 | aws s3api head-object --bucket "$bucket" --key "$key" &>/dev/null
9 | 


--------------------------------------------------------------------------------
/example_data/haplotypes.tsv:
--------------------------------------------------------------------------------
 1 | haplotype	gene	site	alt
 2 | Brisbane10	nuc	63	A
 3 | Brisbane10	nuc	920	C
 4 | Brisbane10	nuc	1313	C
 5 | 
 6 | Perth16	HA1	144	K
 7 | 
 8 | clade	HA1	312	S
 9 | 
10 | recurrent_AA	HA1	94	H
11 | 


--------------------------------------------------------------------------------
/config/h3n2/ha/prioritized_seqs_file.tsv:
--------------------------------------------------------------------------------
1 | strain	accession
2 | A/Croatia/10136RV/2023	EPI3250718
3 | A/Croatia/10136RV/2023-egg	EPI3356221
4 | A/DistrictOfColumbia/27/2023	EPI2990337
5 | A/DistrictOfColumbia/27/2023-egg	EPI3391167
6 | 


--------------------------------------------------------------------------------
/config/h3n2/na/prioritized_seqs_file.tsv:
--------------------------------------------------------------------------------
1 | strain	accession
2 | A/Croatia/10136RV/2023	EPI3250717
3 | A/Croatia/10136RV/2023-egg	EPI3356220
4 | A/DistrictOfColumbia/27/2023	EPI2990330
5 | A/DistrictOfColumbia/27/2023-egg	EPI3391166
6 | 


--------------------------------------------------------------------------------
/config/mask_config.tsv:
--------------------------------------------------------------------------------
1 | lineage	segment	attribute	mask
2 | h3n2	ha	ep	wolf
3 | h3n2	ha	ne	wolf_nonepitope
4 | h3n2	ha	rb	koel
5 | h3n2	na	ep	bhatt
6 | h3n2	na	ne	bhatt_nonepitope
7 | h1n1pdm	ha	ep	canton
8 | h1n1pdm	ha	ne	canton_nonepitope
9 | 


--------------------------------------------------------------------------------
/config/nextstrain_clades_yam_ha.tsv:
--------------------------------------------------------------------------------
 1 | clade	gene	site	alt
 2 | 2	HA1	48	K
 3 | 2	HA1	108	A
 4 | 2	nuc	1196	T
 5 | 3	HA1	150	I
 6 | 3	HA1	229	D
 7 | 3	HA1	165	Y
 8 | 3	nuc	1280	C
 9 | 172Q	HA1	172	Q
10 | 172Q	nuc	848	G
11 | 172Q	nuc	311	T
12 | 


--------------------------------------------------------------------------------
/config/references_to_include_in_titer_plots_h3n2.txt:
--------------------------------------------------------------------------------
1 | A/Cambodia/e0826360/2020
2 | A/Darwin/6/2021
3 | A/Darwin/6/2021-egg
4 | A/Darwin/9/2021
5 | A/Darwin/9/2021-egg
6 | A/Michigan/173/2020
7 | A/Michigan/173/2020-egg
8 | A/Kansas/14/2017
9 | 


--------------------------------------------------------------------------------
/config/yam/ha/clades.tsv:
--------------------------------------------------------------------------------
 1 | clade	gene	site	alt
 2 | Y1	SigPep	1	M
 3 | Y2	HA1	48	K
 4 | Y2	HA1	108	A
 5 | Y2	HA1	181	A
 6 | Y3	HA1	116	K
 7 | Y3	HA1	150	I
 8 | Y3	HA1	165	Y
 9 | Y3	HA1	202	S
10 | Y3	HA1	229	D
11 | Y3	HA1	298	E
12 | Y3	HA1	312	K
13 | 


--------------------------------------------------------------------------------
/config/yam/ha/subclades.tsv:
--------------------------------------------------------------------------------
 1 | clade	gene	site	alt
 2 | Y1	SigPep	1	M
 3 | Y2	HA1	48	K
 4 | Y2	HA1	108	A
 5 | Y2	HA1	181	A
 6 | Y3	HA1	116	K
 7 | Y3	HA1	150	I
 8 | Y3	HA1	165	Y
 9 | Y3	HA1	202	S
10 | Y3	HA1	229	D
11 | Y3	HA1	298	E
12 | Y3	HA1	312	K
13 | 


--------------------------------------------------------------------------------
/config/h3n2/ha/genemap.gff:
--------------------------------------------------------------------------------
1 | ##gff-version 3
2 | ##sequence-region CY163680.1 1 1737
3 | CY163680.1	feature	gene	18	65	.	+	.	gene_name="SigPep"
4 | CY163680.1	feature	gene	66	1052	.	+	.	gene_name="HA1"
5 | CY163680.1	feature	gene	1053	1715	.	+	.	gene_name="HA2"
6 | 


--------------------------------------------------------------------------------
/config/yam/ha/genemap.gff:
--------------------------------------------------------------------------------
1 | ##gff-version 3
2 | ##sequence-region JN993010.1 1 1755
3 | JN993010.1	feature	gene	1	45	.	+	.	gene_name="SigPep"
4 | JN993010.1	feature	gene	46	1083	.	+	.	gene_name="HA1"
5 | JN993010.1	feature	gene	1084	1755	.	+	.	gene_name="HA2"
6 | 


--------------------------------------------------------------------------------
/config/h1n1pdm/ha/genemap.gff:
--------------------------------------------------------------------------------
1 | ##gff-version 3
2 | ##sequence-region CY121680.1 1 1752
3 | CY121680.1	feature	gene	21	71	.	+	.	gene_name="SigPep"
4 | CY121680.1	feature	gene	72	1052	.	+	.	gene_name="HA1"
5 | CY121680.1	feature	gene	1053	1718	.	+	.	gene_name="HA2"
6 | 


--------------------------------------------------------------------------------
/workflow/envs/notebook.yaml:
--------------------------------------------------------------------------------
 1 | name: notebook
 2 | channels:
 3 |   - conda-forge
 4 | dependencies:
 5 |   - altair=5.0.1
 6 |   - jupyter=1.0
 7 |   - jupyterlab_code_formatter=2.2.1
 8 |   - pandas=1.5.0
 9 |   - python=3.9*
10 |   - vl-convert-python=0.11.1
11 | 


--------------------------------------------------------------------------------
/ingest/vendored/.shellcheckrc:
--------------------------------------------------------------------------------
1 | # Use of this file requires Shellcheck v0.7.0 or newer.
2 | #
3 | # SC2064 - We intentionally want variables to expand immediately within traps
4 | #          so the trap can not fail due to variable interpolation later.
5 | #
6 | disable=SC2064
7 | 


--------------------------------------------------------------------------------
/nextclade/dataset_config/h3n2/na/EPI1857215/annotation.gff:
--------------------------------------------------------------------------------
1 | ##gff-version 3
2 | ##sequence-region EPI1857215 1 1439
3 | EPI1857215	annotation	remark	1	1439	.	.	.	accessions=EPI1857215;
4 | EPI1857215	feature	gene	8	1417	.	+	.	codon_start=1;gene=NA;gene_name=NA;product=neuraminidase;
5 | 


--------------------------------------------------------------------------------
/ingest/build-configs/manual-upload/config.yaml:
--------------------------------------------------------------------------------
1 | # TODO: remove `trials/ingest/` after we switch to the ingest workflow
2 | # AWS S3 destination for the downloaded GISAID files
3 | s3_dst: "s3://nextstrain-data-private/files/workflows/seasonal-flu/trials/ingest/gisaid-downloads/unprocessed"
4 | 


--------------------------------------------------------------------------------
/nextclade/dataset_config/h3n2/ha/CY163680/annotation.gff:
--------------------------------------------------------------------------------
1 | ##gff-version 3
2 | ##sequence-region CY163680.1 1 1737
3 | CY163680.1	feature	gene	18	65	.	+	.	gene_name="SigPep"
4 | CY163680.1	feature	gene	66	1052	.	+	.	gene_name="HA1"
5 | CY163680.1	feature	gene	1053	1715	.	+	.	gene_name="HA2"
6 | 


--------------------------------------------------------------------------------
/nextclade/dataset_config/vic/ha/KX058884/annotation.gff:
--------------------------------------------------------------------------------
1 | ##gff-version 3
2 | ##sequence-region KX058884.1 1 1885
3 | KX058884.1	feature	gene	34	78	.	+	.	gene_name="SigPep"
4 | KX058884.1	feature	gene	79	1119	.	+	.	gene_name="HA1"
5 | KX058884.1	feature	gene	1120	1791	.	+	.	gene_name="HA2"
6 | 


--------------------------------------------------------------------------------
/nextclade/dataset_config/yam/ha/JN993010/annotation.gff:
--------------------------------------------------------------------------------
1 | ##gff-version 3
2 | ##sequence-region JN993010.1 1 1755
3 | JN993010.1	feature	gene	1	45	.	+	.	gene_name="SigPep"
4 | JN993010.1	feature	gene	46	1083	.	+	.	gene_name="HA1"
5 | JN993010.1	feature	gene	1084	1755	.	+	.	gene_name="HA2"
6 | 


--------------------------------------------------------------------------------
/ingest/defaults/strain_name_fixes.tsv:
--------------------------------------------------------------------------------
1 | # GISAID strain name fixes
2 | label	fix
3 | 
4 | Influenza A Virus (A/Malaysia/228/2014(H7N9)) segment 6 neuraminidase (NA) gene	A/Malaysia/228/2014
5 | Influenza A Virus (A/Malaysia/228/2014(H7N9)) segment 4 hemagglutinin (HA) gene	A/Malaysia/228/2014
6 | 


--------------------------------------------------------------------------------
/nextclade/dataset_config/h1n1pdm/ha/CY121680/annotation.gff:
--------------------------------------------------------------------------------
1 | ##gff-version 3
2 | ##sequence-region CY121680.1 1 1752
3 | CY121680.1	feature	gene	21	71	.	+	.	gene_name="SigPep"
4 | CY121680.1	feature	gene	72	1052	.	+	.	gene_name="HA1"
5 | CY121680.1	feature	gene	1053	1718	.	+	.	gene_name="HA2"
6 | 


--------------------------------------------------------------------------------
/nextclade/dataset_config/h1n1pdm/ha/MW626062/annotation.gff:
--------------------------------------------------------------------------------
1 | ##gff-version 3
2 | ##sequence-region MW626062.1 1 1752
3 | MW626062.1	feature	gene	21	71	.	+	.	gene_name="SigPep"
4 | MW626062.1	feature	gene	72	1052	.	+	.	gene_name="HA1"
5 | MW626062.1	feature	gene	1053	1718	.	+	.	gene_name="HA2"
6 | 


--------------------------------------------------------------------------------
/nextclade/dataset_config/h3n2/ha/EPI1857216/annotation.gff:
--------------------------------------------------------------------------------
1 | ##gff-version 3
2 | ##sequence-region EPI1857216 1 1718
3 | EPI1857216	feature	gene	1	48	.	+	.	gene_name="SigPep"
4 | EPI1857216	feature	gene	49	1035	.	+	.	gene_name="HA1"
5 | EPI1857216	feature	gene	1036	1698	.	+	.	gene_name="HA2"
6 | 


--------------------------------------------------------------------------------
/nextclade/dataset_config/vic/ha/EPI1926632/annotation.gff:
--------------------------------------------------------------------------------
1 | ##gff-version 3
2 | ##sequence-region EPI1926632 1 1847
3 | EPI1926632	feature	gene	20	64	.	+	.	gene_name="SigPep"
4 | EPI1926632	feature	gene	65	1096	.	+	.	gene_name="HA1"
5 | EPI1926632	feature	gene	1097	1765	.	+	.	gene_name="HA2"
6 | 


--------------------------------------------------------------------------------
/config/h1n1pdm/ha/emerging_haplotypes.tsv:
--------------------------------------------------------------------------------
 1 | haplotype	gene	site	alt
 2 | C.1.9	clade	C.1.9
 3 | C.1.9.3	clade	C.1.9.3
 4 | D.3.1	clade	D.3.1
 5 | 
 6 | D.3.1:270A	clade	D.3.1
 7 | D.3.1:270A	HA1	270	A
 8 | 
 9 | D.3.1.1	clade	D.3.1.1
10 | 
11 | D.3.1.1:205K	clade	D.3.1.1
12 | D.3.1.1:205K	HA1	205	K
13 | 


--------------------------------------------------------------------------------
/ingest/defaults/h3n2/prioritized_strain_ids.tsv:
--------------------------------------------------------------------------------
1 | strain	id
2 | A/Croatia/10136RV/2023	EPI_ISL_19085723
3 | A/Croatia/10136RV/2023-egg	EPI_ISL_19185072
4 | A/DistrictOfColumbia/27/2023	EPI_ISL_18862356
5 | A/DistrictOfColumbia/27/2023-egg	EPI_ISL_19209054
6 | A/Massachusetts/18/2022	EPI_ISL_13897082
7 | 


--------------------------------------------------------------------------------
/nextclade/dataset_config/vic/na/CY073894/annotation.gff:
--------------------------------------------------------------------------------
1 | ##gff-version 3
2 | ##sequence-region CY073894.1 1 1401
3 | CY073894.1	annotation	remark	1	1401	.	.	.	accessions=CY073894;
4 | CY073894.1	feature	gene	1	1401	.	+	.	codon_start=1;gene=NA;gene_name=NA;product=neuraminidase;protein_id=ADN32819.1;
5 | 


--------------------------------------------------------------------------------
/nextclade/dataset_config/h1n1pdm/na/MW626056/annotation.gff:
--------------------------------------------------------------------------------
1 | ##gff-version 3
2 | ##sequence-region MW626056.1 1 1433
3 | MW626056.1	annotation	remark	1	1433	.	.	.	accessions=MW626056;
4 | MW626056.1	feature	gene	9	1418	.	+	.	codon_start=1;gene=NA;gene_name=NA;product=neuraminidase;protein_id=QRV63257.1;
5 | 


--------------------------------------------------------------------------------
/profiles/ci/prepare_data.smk:
--------------------------------------------------------------------------------
 1 | rule prepare_sequences:
 2 |     input:
 3 |         sequences="example_data/h3n2_{segment}.fasta",
 4 |     output:
 5 |         sequences="data/h3n2/raw_{segment}.fasta",
 6 |     shell:
 7 |         """
 8 |         cp -f {input.sequences} {output.sequences}
 9 |         """
10 | 


--------------------------------------------------------------------------------
/config/vic/ha/emerging_haplotypes.tsv:
--------------------------------------------------------------------------------
 1 | haplotype	gene	site	alt
 2 | C.3	clade	C.3
 3 | C.3.1	clade	C.3.1
 4 | C.5.1	clade	C.5.1
 5 | C.5.6	clade	C.5.6
 6 | C.5.6.1	clade	C.5.6.1
 7 | C.5.7	clade	C.5.7
 8 | 
 9 | C.5.6:75E	clade	C.5.6
10 | C.5.6:75E	HA1	75	E
11 | 
12 | C.5.6:189A	clade	C.5.6
13 | C.5.6:189A	HA1	189	A
14 | 


--------------------------------------------------------------------------------
/nextclade/dataset_config/vic/includes.txt:
--------------------------------------------------------------------------------
 1 | B/Brazil/1417/2023
 2 | B/Massachusetts/1/2022
 3 | B/Amazonas/2022-014046-IEC/2022
 4 | B/Iquitos/FPI20551/2022
 5 | B/Pennsylvania/3/2022
 6 | 
 7 | 
 8 | # C.3.2
 9 | B/Lisboa/134/2024
10 | B/Wisconsin/25/2025
11 | A/SaoPaulo/0068-IBTEC/2024
12 | B/Colorado/46/2025
13 | 
14 | 
15 | 


--------------------------------------------------------------------------------
/nextstrain-pathogen.yaml:
--------------------------------------------------------------------------------
1 | # This is currently an empty file to indicate the top level pathogen repo.
2 | # The inclusion of this file allows the Nextstrain CLI to run the
3 | # `nextstrain build` from any directory regardless of runtime.
4 | #
5 | # See https://github.com/nextstrain/cli/releases/tag/8.2.0 for more details.
6 | 


--------------------------------------------------------------------------------
/profiles/allflu/h3n2_include.txt:
--------------------------------------------------------------------------------
 1 | A/Albany/6/1968
 2 | A/HongKong/33/1973
 3 | A/Memphis/105/1976
 4 | A/Netherlands/233/1982
 5 | A/Singapore/35/1989
 6 | A/Stockholm/20/1993
 7 | A/NewYork/631/1996
 8 | A/Wisconsin/67/2005
 9 | A/Perth/16/2009
10 | A/HongKong/4801/2014
11 | A/Cambodia/e0826360/2020
12 | A/Darwin/9/2021
13 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yaml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - master
 7 |   pull_request:
 8 |   workflow_dispatch:
 9 | 
10 | jobs:
11 |   ci:
12 |     uses: nextstrain/.github/.github/workflows/pathogen-repo-ci.yaml@v0
13 |     with:
14 |       build-args: --configfile profiles/ci/builds.yaml -p
15 | 


--------------------------------------------------------------------------------
/config/frequency_weights_by_region.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "Africa": 1.02,
 3 |     "Europe": 0.74,
 4 |     "North America": 0.54,
 5 |     "China": 1.36,
 6 |     "South Asia": 1.45,
 7 |     "Japan Korea": 0.20,
 8 |     "Oceania": 0.04,
 9 |     "South America": 0.41,
10 |     "Southeast Asia": 0.62,
11 |     "West Asia": 0.75
12 | }
13 | 


--------------------------------------------------------------------------------
/config/distance_maps/h3n2/ha/koel.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "default": 0,
 3 |     "map": {
 4 |         "HA1": {
 5 |             "145": 1,
 6 |             "155": 1,
 7 |             "156": 1,
 8 |             "158": 1,
 9 |             "159": 1,
10 |             "189": 1,
11 |             "193": 1
12 |         }
13 |     },
14 |     "name": "koel"
15 | }


--------------------------------------------------------------------------------
/ingest/vendored/.github/workflows/ci.yaml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 |   pull_request:
 8 |   workflow_dispatch:
 9 | 
10 | jobs:
11 |   shellcheck:
12 |     runs-on: ubuntu-latest
13 |     steps:
14 |       - uses: actions/checkout@v4
15 |       - uses: nextstrain/.github/actions/shellcheck@master
16 | 


--------------------------------------------------------------------------------
/ingest/vendored/sha256sum:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | """
 3 | Portable sha256sum utility.
 4 | """
 5 | from hashlib import sha256
 6 | from sys import stdin
 7 | 
 8 | chunk_size = 5 * 1024**2 # 5 MiB
 9 | 
10 | h = sha256()
11 | 
12 | for chunk in iter(lambda: stdin.buffer.read(chunk_size), b""):
13 |     h.update(chunk)
14 | 
15 | print(h.hexdigest())
16 | 


--------------------------------------------------------------------------------
/ingest/vendored/.github/workflows/pre-commit.yaml:
--------------------------------------------------------------------------------
 1 | name: pre-commit
 2 | 
 3 | on:
 4 |   - push
 5 | 
 6 | jobs:
 7 |   pre-commit:
 8 |     runs-on: ubuntu-latest
 9 |     steps:
10 |       - uses: actions/checkout@v4
11 |       - uses: actions/setup-python@v5
12 |         with:
13 |           python-version: "3.12"
14 |       - uses: pre-commit/action@v3.0.1
15 | 


--------------------------------------------------------------------------------
/notebooks/README.md:
--------------------------------------------------------------------------------
1 | For 2025-09-16-plot-ga-by-antigenic-advance.py:
2 | - run `marimo run notebooks/2025-09-16-plot-ga-by-antigenic-advance.py` from top-level repo directory
3 | - make sure that `forecasts-flu` is a sister directory to `seasonal-flu` and `../forecasts-flu/results/h3n2/region/mlr/ga.tsv` exists
4 | - include `full-h3n2_ha.tsv` in the top-level repo directory
5 | 


--------------------------------------------------------------------------------
/config/yam/outliers.txt:
--------------------------------------------------------------------------------
 1 | B/Catalonia/NSVH100562319/2017
 2 | B/England/581/2012
 3 | B/Gifu/62/2018
 4 | B/HongKong/2196/2010
 5 | B/Kisumu/7/2005
 6 | B/Kolkata/2546/2009
 7 | B/Kolkata/N-1272/2009
 8 | B/Kolkata/N-2047/2009
 9 | B/Nairobi/351/2005
10 | B/NewHampshire/1/2016
11 | B/Norway/2155/2017
12 | B/Palermo/2/2011
13 | B/Riyadh/3/2010
14 | B/Riyadh/4/2010
15 | B/Thailand/CU-B10303/2014
16 | 


--------------------------------------------------------------------------------
/profiles/scicore/submit.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | #SBATCH --output=log/%j.out                 # where to store the output ( %j is the JOBID )
 4 | #SBATCH --error=log/%j.err                  # where to store error messages
 5 | 
 6 | # activate conda environment
 7 | source ~/miniconda3/etc/profile.d/conda.sh
 8 | conda activate nextstrain
 9 | #Test
10 | export AUGUR_MINIFY_JSON=1
11 | 
12 | {exec_job}
13 | 


--------------------------------------------------------------------------------
/scripts/sha256sum:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Originally copied from nextstrain/ncov-ingest repo
 3 | """
 4 | Portable sha256sum utility.
 5 | """
 6 | from hashlib import sha256
 7 | from sys import stdin
 8 | 
 9 | chunk_size = 5 * 1024**2 # 5 MiB
10 | 
11 | h = sha256()
12 | 
13 | for chunk in iter(lambda: stdin.buffer.read(chunk_size), b""):
14 |     h.update(chunk)
15 | 
16 | print(h.hexdigest())
17 | 


--------------------------------------------------------------------------------
/profiles/nextstrain-public/deploy.smk:
--------------------------------------------------------------------------------
 1 | """
 2 | This part of the workflow handles automatic deployments of public builds.
 3 | Depends on the `all_public` rule from rename.smk
 4 | """
 5 | 
 6 | rule deploy_all:
 7 |     input: rules.all_public.input
 8 |     params:
 9 |         s3_dst = config["deploy_url"]
10 |     shell:
11 |         """
12 |         nextstrain remote upload {params.s3_dst} {input}
13 |         """
14 | 


--------------------------------------------------------------------------------
/config/vic/ha/genemap.gff:
--------------------------------------------------------------------------------
1 | ##gff-version 3
2 | ##sequence-region KX058884.1 1 1885
3 | ## Coordinates based on PDB entry 4FQM (https://www.rcsb.org/structure/4FQM)
4 | ## from Dreyfus et al. 2012 (https://www.science.org/doi/full/10.1126/science.1222908).
5 | KX058884.1	feature	gene	34	78	.	+	.	gene_name="SigPep"
6 | KX058884.1	feature	gene	79	1119	.	+	.	gene_name="HA1"
7 | KX058884.1	feature	gene	1120	1788	.	+	.	gene_name="HA2"
8 | 


--------------------------------------------------------------------------------
/source-data/2018_South_America_flu_vaccination_coverage.tsv:
--------------------------------------------------------------------------------
 1 | # South American data from http://ais.paho.org/imm/InfluenzaCoverageMap.asp
 2 | Bolivia	70
 3 | Colombia	50
 4 | Ecuador	90
 5 | Peru	50
 6 | Venezuela	15
 7 | Brazil	90
 8 | Costa Rica	75
 9 | Belize	3
10 | El Salvador	35
11 | Guatemala	75
12 | Honduras	85
13 | Panama	90
14 | Cuba	90
15 | Mexico	90
16 | Bermuda	15
17 | Argentina	60
18 | Chile	55
19 | Paraguay	35
20 | Uruguay	30
21 | 


--------------------------------------------------------------------------------
/config/distance_maps/h3n2/ha/welsh_epitope_sites.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "Welsh et al. epitope sites",
 3 |   "default": 0,
 4 |   "map": {
 5 |     "HA1": {
 6 |       "50": 1,
 7 |       "82": 1,
 8 |       "124": 1,
 9 |       "135": 1,
10 |       "137": 1,
11 |       "143": 1,
12 |       "144": 1,
13 |       "145": 1,
14 |       "157": 1,
15 |       "159": 1,
16 |       "189": 1,
17 |       "193": 1,
18 |       "275": 1,
19 |       "276": 1
20 |     }
21 |   }
22 | }


--------------------------------------------------------------------------------
/config/distance_maps.tsv:
--------------------------------------------------------------------------------
 1 | lineage	segment	compare_to	attribute	distance_map
 2 | h3n2	ha	root	ep	wolf
 3 | h3n2	ha	root	ne	wolf_nonepitope
 4 | h3n2	ha	root	rb	koel
 5 | h3n2	ha	ancestor	ne_star	luksza_nonepitope
 6 | h3n2	ha	ancestor	cell_entry	Yu_et_al_2025_cell_entry
 7 | h3n2	ha	ancestor	ph_stability	Yu_et_al_2025_ph_stability
 8 | h3n2	na	root	ep	bhatt
 9 | h3n2	na	root	ne	bhatt_nonepitope
10 | h1n1pdm	ha	root	ep	canton
11 | h1n1pdm	ha	root	ne	canton_nonepitope
12 | 


--------------------------------------------------------------------------------
/ingest/vendored/.gitrepo:
--------------------------------------------------------------------------------
 1 | ; DO NOT EDIT (unless you know what you are doing)
 2 | ;
 3 | ; This subdirectory is a git "subrepo", and this file is maintained by the
 4 | ; git-subrepo command. See https://github.com/ingydotnet/git-subrepo#readme
 5 | ;
 6 | [subrepo]
 7 | 	remote = https://github.com/nextstrain/ingest
 8 | 	branch = main
 9 | 	commit = cd6d31a3b35cd1bb7eddf830c565be6d6e69f27a
10 | 	parent = eed11b63f5b662da6e7400bc65cc7c618d3ef4b8
11 | 	method = merge
12 | 	cmdver = 0.4.6
13 | 


--------------------------------------------------------------------------------
/workflow/envs/nextstrain.yaml:
--------------------------------------------------------------------------------
 1 | name: nextstrain
 2 | channels:
 3 |   - conda-forge
 4 |   - bioconda
 5 | dependencies:
 6 |   - augur=31.4.0
 7 |   - awscli=1.27.9
 8 |   - epiweeks=2.1.2
 9 |   - nextclade=3.15.3
10 |   - python=3.11.*
11 |   - seaborn>=0.11*
12 |   - seqkit=2.2.0
13 |   - csvtk=0.31.0
14 |   - tsv-utils=2.2.3
15 |   - tabulate=0.9.0
16 |   - xlrd=1.*
17 |   - pip=23.0.1
18 |   - pathogen-embed=3.1.0
19 |   - jinja2=3.1.2
20 |   - pip:
21 |     - rethinkdb==2.3.0.post6
22 | 


--------------------------------------------------------------------------------
/models/welsh_escape.json:
--------------------------------------------------------------------------------
 1 | {
 2 |  "predictors": [
 3 |   "welsh_escape"
 4 |  ],
 5 |  "cv_error_mean": 5.444043356439342,
 6 |  "cv_error_std": 1.7586532985230514,
 7 |  "coefficients_mean": [
 8 |   1.0
 9 |  ],
10 |  "coefficients_std": [
11 |   0.3
12 |  ],
13 |  "mean_stds_mean": [
14 |   1.0
15 |  ],
16 |  "mean_stds_std": [
17 |   0.02
18 |  ],
19 |  "cost_function": "diffsum",
20 |  "l1_lambda": 0.1,
21 |  "delta_months": 12,
22 |  "training_window": 6,
23 |  "pseudocount": null
24 | }
25 | 


--------------------------------------------------------------------------------
/nextclade/dataset_config/h3n2/pb2/annotation.gff:
--------------------------------------------------------------------------------
1 | ##gff-version 3
2 | #!gff-spec-version 1.21
3 | #!processor NCBI annotwriter
4 | ##sequence-region NC_007373.1 1 2341
5 | ##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=335341
6 | NC_007373.1	RefSeq	region	1	2341	.	+	.	ID=NC_007373.1:1..2341
7 | NC_007373.1	RefSeq	CDS	28	2307	.	+	0	Name=PB2;gbkey=CDS;gene=PB2;protein_id=YP_308849.1;locus_tag=FLUAVH3N2_s1p1;product=polymerase PB2;ID=cds-YP_308849.1;Dbxref=GenBank:YP_308849.1,GeneID:3655161
8 | 


--------------------------------------------------------------------------------
/config/colors_for_titer_plots_vic.tsv:
--------------------------------------------------------------------------------
 1 | clade_test	V1A.3	#b8bc4a
 2 | clade_test	V1A.3/133R	#8cbb69
 3 | clade_test	V1A.3/155A	#b8bc4a
 4 | clade_test	V1A.3a.1	#e67932
 5 | clade_test	V1A.3a.2	#dc2f24
 6 | subclade_test	A.3.2	#3F52CD
 7 | subclade_test	C	#4681CA
 8 | subclade_test	C.1	#57A1AD
 9 | subclade_test	C.2	#70B487
10 | subclade_test	C.3	#90BC65
11 | subclade_test	C.4	#B4BD4C
12 | subclade_test	C.5	#D3B240
13 | subclade_test	C.5.1	#E59638
14 | subclade_test	C.5.2	#E4642E
15 | subclade_test	C.5.3	#DB2823
16 | 


--------------------------------------------------------------------------------
/nextclade/dataset_config/h3n2/np/annotation.gff:
--------------------------------------------------------------------------------
1 | ##gff-version 3
2 | #!gff-spec-version 1.21
3 | #!processor NCBI annotwriter
4 | ##sequence-region NC_007369.1 1 1566
5 | ##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=335341
6 | NC_007369.1	RefSeq	region	1	1566	.	+	.	ID=NC_007369.1:1..1566
7 | NC_007369.1	RefSeq	CDS	46	1542	.	+	0	Name=NP;gene=NP;gbkey=CDS;protein_id=YP_308843.1;locus_tag=FLUAVH3N2_s5p1;ID=cds-YP_308843.1;product=nucleocapsid protein;Dbxref=GenBank:YP_308843.1,GeneID:3655155
8 | 


--------------------------------------------------------------------------------
/nextclade/dataset_config/h1n1pdm/pb1/annotation.gff:
--------------------------------------------------------------------------------
1 | ##gff-version 3
2 | #!gff-spec-version 1.21
3 | #!processor NCBI annotwriter
4 | ##sequence-region NC_026435.1 1 2274
5 | ##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=641809
6 | NC_026435.1	RefSeq	region	1	2274	.	+	.	ID=NC_026435.1:1..2274;
7 | NC_026435.1	RefSeq	CDS	1	2274	.	+	0	Name=PB1;gbkey=CDS;gene=PB1;locus_tag=UJ99_s2gp1;product=polymerase PB1;protein_id=YP_009118628.1;ID=cds-YP_009118628.1;Dbxref=GenBank:YP_009118628.1,GeneID:23308122
8 | 


--------------------------------------------------------------------------------
/nextclade/dataset_config/h1n1pdm/pb2/annotation.gff:
--------------------------------------------------------------------------------
1 | ##gff-version 3
2 | #!gff-spec-version 1.21
3 | #!processor NCBI annotwriter
4 | ##sequence-region NC_026438.1 1 2280
5 | ##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=641809
6 | NC_026438.1	RefSeq	region	1	2280	.	+	.	ID=NC_026438.1:1..2280;
7 | NC_026438.1	RefSeq	CDS	1	2280	.	+	0	Name=PB2;gbkey=CDS;gene=PB2;locus_tag=UJ99_s1gp1;product=polymerase PB2;protein_id=YP_009118631.1;ID=cds-YP_009118631.1;Dbxref=GenBank:YP_009118631.1,GeneID:23308131
8 | 


--------------------------------------------------------------------------------
/profiles/nextclade.yaml:
--------------------------------------------------------------------------------
 1 | custom_rules:
 2 |   - workflow/snakemake_rules/download_from_s3.smk
 3 |   - profiles/nextclade/run-nextclade.smk
 4 | 
 5 | s3_dst: "s3://nextstrain-data-private/files/workflows/seasonal-flu"
 6 | 
 7 | segments:
 8 |   - ha
 9 |   - na
10 |   - pb2
11 |   - pb1
12 |   - pa
13 |   - np
14 |   - mp
15 |   - ns
16 | 
17 | builds:
18 |   h1n1pdm:
19 |     lineage: h1n1pdm
20 |   h3n2:
21 |     lineage: h3n2
22 |   vic:
23 |     lineage: vic
24 |     segments:
25 |       - ha
26 |       - na
27 | 


--------------------------------------------------------------------------------
/ingest/vendored/.github/pull_request_template.md:
--------------------------------------------------------------------------------
 1 | ### Description of proposed changes
 2 | 
 3 | <!-- What is the goal of this pull request? What does this pull request change? -->
 4 | 
 5 | ### Related issue(s)
 6 | 
 7 | <!-- Link any related issues here. -->
 8 | 
 9 | ### Checklist
10 | 
11 | <!-- Make sure checks are successful at the bottom of the PR. -->
12 | 
13 | - [ ] Checks pass
14 | - [ ] If adding a script, add an entry for it in the README.
15 | 
16 | <!-- 🙌 Thank you for contributing to Nextstrain! ✨ -->
17 | 


--------------------------------------------------------------------------------
/nextclade/dataset_config/h1n1pdm/np/annotation.gff:
--------------------------------------------------------------------------------
1 | ##gff-version 3
2 | #!gff-spec-version 1.21
3 | #!processor NCBI annotwriter
4 | ##sequence-region NC_026436.1 1 1497
5 | ##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=641809
6 | NC_026436.1	RefSeq	region	1	1497	.	+	.	ID=NC_026436.1:1..1497;
7 | NC_026436.1	RefSeq	CDS	1	1497	.	+	0	Name=NP;gene=NP;gbkey=CDS;locus_tag=UJ99_s5gp1;protein_id=YP_009118629.1;ID=cds-YP_009118629.1;product=nucleocapsid protein;Dbxref=GenBank:YP_009118629.1,GeneID:23308125
8 | 


--------------------------------------------------------------------------------
/config/references_for_titer_plots/vic/cell_hi.txt:
--------------------------------------------------------------------------------
 1 | B/Austria/1359417/2021 # C, vaccine strain
 2 | B/Victoria/16/2023 # C.3:208P
 3 | B/Pennsylvania/14/2025 # C.3.1, CDC
 4 | B/Tasmania/31/2025 # C.3.1
 5 | B/Kanagawa/AC2414/2025 # C.3.1
 6 | B/Utah/11/2023 # C.5.1:128K
 7 | B/Kansas/5/2024 # C.5.1:128K,202V
 8 | B/Ghana/3778/2024 # C.5.1:202V
 9 | B/Missouri/3/2024 # C.5.6:199A, CDC
10 | B/Switzerland/329/2024 # C.5.6:199A, Crick
11 | B/Alabama/7/2023 # C.5.6
12 | B/Ranong/373/2023 # C.5.7
13 | B/Tokyo/23150/2023 # C.5.7
14 | B/Texas/19/2024 # C.5.7
15 | 


--------------------------------------------------------------------------------
/profiles/scicore/cluster.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "__default__": {
 3 |       "time": "00:29:00",
 4 |       "qos": "30min",
 5 |       "n": 1,
 6 |       "mem": "8G",
 7 |       "conda_env": "nextstrain"
 8 |     },
 9 |     "refine": {
10 |       "time": "05:59:00",
11 |       "qos": "6hours",
12 |       "n": 1
13 |     },
14 |     "tree": {
15 |       "n": 4,
16 |       "qos": "6hours",
17 |       "time": "05:59:00"
18 |     },
19 |     "align": {
20 |       "n": 4,
21 |       "time": "00:29:00",
22 |       "mem": "16G",
23 |       "qos": "30min"
24 |     }
25 |   }
26 | 


--------------------------------------------------------------------------------
/ingest/build-configs/nextstrain-automation/config.yaml:
--------------------------------------------------------------------------------
 1 | # Custom rules to run as part of the Nextstrain automated workflow
 2 | # The paths should be relative to the ingest directory.
 3 | custom_rules:
 4 |   - build-configs/nextstrain-automation/fetch_from_s3.smk
 5 |   - build-configs/nextstrain-automation/upload.smk
 6 | 
 7 | # TODO: remove `/trials/ingest` after we switch to the ingest workflow
 8 | s3_dst: "s3://nextstrain-data-private/files/workflows/seasonal-flu/trials/ingest"
 9 | s3_src: "s3://nextstrain-data-private/files/workflows/seasonal-flu/trials/ingest"
10 | 


--------------------------------------------------------------------------------
/config/references_for_titer_plots/h3n2/cell_fra.txt:
--------------------------------------------------------------------------------
 1 | A/DistrictOfColumbia/27/2023 # J.2:145N, vaccine strain
 2 | A/Wisconsin/154/2024 # J.2:135K
 3 | A/Catalonia/NSVH102423723/2024 # J.2:135K, Crick
 4 | A/Michigan/32/2024 # J.2:145N
 5 | A/Colorado/209/2024 # J.2:223I
 6 | #A/Victoria/488/2024 # J.2:207R
 7 | A/Colorado/6/2024 # J.2.1
 8 | A/Minnesota/97/2024 # J.2.2:65K,145N
 9 | A/Alaska/8/2025 # J.2.3, CDC
10 | A/Netherlands/10685/2024 # J.2.3
11 | A/Valladolid/1187/2025 # J.2.4 (189R + 135K), Crick
12 | A/Mississippi/37/2025 # J.2.4, CDC
13 | A/Sydney/1359/2024 # J.2.4
14 | A/Kentucky/29/2024 # J.2.5
15 | 


--------------------------------------------------------------------------------
/nextclade/dataset_config/vic/ha/EPI1926632/pathogen.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "aaMotifs": [
 3 |     {
 4 |       "name": "glycosylation",
 5 |       "nameShort": "Glyc.",
 6 |       "nameFriendly": "Glycosylation",
 7 |       "description": "N-linked glycosylation motifs (N-X-S/T with X any amino acid other than P)",
 8 |       "includeCdses": [
 9 |         {
10 |           "cds":"HA1",
11 |           "ranges":[]
12 |         },
13 |         {
14 |           "cds":"HA2",
15 |           "ranges":[{"begin":0, "end":187}]
16 |         }
17 |       ],
18 |       "motifs": [
19 |         "N[^P][ST]"
20 |       ]
21 |     }
22 |   ]
23 | }


--------------------------------------------------------------------------------
/profiles/scicore/config.yaml:
--------------------------------------------------------------------------------
 1 | configfile:
 2 |   - profiles/europe/builds.yaml
 3 | 
 4 | cluster-config: profiles/scicore/cluster.json
 5 | cluster: "sbatch --time={cluster.time} --mem={cluster.mem} --cpus-per-task={cluster.n} --qos={cluster.qos}"
 6 | 
 7 | jobs: 128
 8 | 
 9 | jobscript: profiles/scicore/submit.sh
10 | 
11 | # Set the name for the job as display in the cluster queue.
12 | jobname: "{rulename}.{jobid}.sh"
13 | 
14 | 
15 | cores: 4
16 | keep-going: True
17 | printshellcmds: True
18 | show-failed-logs: True
19 | reason: True
20 | stats: stats.json
21 | # Print log files of failed jobs
22 | show-failed-logs: True
23 | 


--------------------------------------------------------------------------------
/config/nextstrain_clades_vic_ha.tsv:
--------------------------------------------------------------------------------
 1 | clade	gene	site	alt
 2 | 1A	nuc	206	G
 3 | 1A	nuc	644	C
 4 | 1A	nuc	1340	T
 5 | 1A	nuc	1821	T
 6 | 1B	nuc	1034	G
 7 | 1B	nuc	1172	G
 8 | V1A	HA1	117	V
 9 | V1A	HA1	146	I
10 | V1A	nuc	296	G
11 | V1A.1	HA2	152	K
12 | V1A.1	HA1	162	-
13 | V1A.1	HA1	163	-
14 | V1A.1	HA1	180	V
15 | V1A.1	nuc	1810	G
16 | V1A.2	HA1	163	-
17 | V1A.2	HA1	164	-
18 | V1A.2	HA1	180	T
19 | V1A.2	HA1	209	N
20 | V1A.3	HA1	136	E
21 | V1A.3	HA1	163	-
22 | V1A.3	HA1	164	-
23 | V1A.3	nuc	1646	C
24 | 3a	HA1	150	K
25 | 3a	HA1	184	E
26 | 3a	HA1	197	D
27 | 3a	HA1	279	K
28 | 3a2	HA1	150	K
29 | 3a2	HA1	144	L
30 | 3a1	HA1	150	K
31 | 3a1	HA1	220	M
32 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | # Dependabot configuration file
 2 | # <https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file>
 3 | #
 4 | # Each ecosystem is checked on a scheduled interval defined below.  To trigger
 5 | # a check manually, go to
 6 | #
 7 | #   https://github.com/nextstrain/seasonal-flu/network/updates
 8 | #
 9 | # and look for a "Check for updates" button.  You may need to click around a
10 | # bit first.
11 | ---
12 | version: 2
13 | updates:
14 |   - package-ecosystem: "github-actions"
15 |     directory: "/"
16 |     schedule:
17 |       interval: "weekly"
18 | 


--------------------------------------------------------------------------------
/config/references_for_titer_plots/h1n1pdm/egg_hi.txt:
--------------------------------------------------------------------------------
 1 | A/Victoria/4897/2022-egg # D, vaccine strain
 2 | A/France/BFC-IPP15052/2024-egg # D.3.1
 3 | #A/Missouri/11/2025-egg # D.3.1
 4 | A/Sydney/124/2025-egg # D.3.1
 5 | A/Switzerland/6849/2025-egg # D.3.1:113K
 6 | A/California/177/2024-egg # C.1.9.1
 7 | A/Victoria/42/2025-egg # C.1.9.3
 8 | A/Norway/10655/2024-egg # C.1.9.3:35N
 9 | #A/Washington/310/2024-egg # C.1.9.3:166V
10 | A/Caerphilly/5142/2024-egg # C.1.9.3:120T,137S,166V
11 | # A/Singapore/SAR3644/2024-egg # Missing?
12 | A/Iceland/34025/2024-egg # D.5
13 | A/Tokyo/EIS10-381/2024-egg # Missing?
14 | A/Tokyo/EIS10-554/2024-egg # Missing?
15 | 


--------------------------------------------------------------------------------
/ingest/vendored/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | # Dependabot configuration file
 2 | # <https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file>
 3 | #
 4 | # Each ecosystem is checked on a scheduled interval defined below.  To trigger
 5 | # a check manually, go to
 6 | #
 7 | #   https://github.com/nextstrain/ingest/network/updates
 8 | #
 9 | # and look for a "Check for updates" button.  You may need to click around a
10 | # bit first.
11 | ---
12 | version: 2
13 | updates:
14 |   - package-ecosystem: "github-actions"
15 |     directory: "/"
16 |     schedule:
17 |       interval: "weekly"
18 | 


--------------------------------------------------------------------------------
/config/yam/vaccine.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "nodes": {
 3 |         "B/Shanghai/361/2002": {
 4 |             "vaccine": {
 5 |                 "selection_date": "2004-09-25"
 6 |             }
 7 |         },
 8 |         "B/Florida/4/2006": {
 9 |             "vaccine": {
10 |                 "selection_date": "2008-09-25"
11 |             }
12 |         },
13 |         "B/Wisconsin/1/2010": {
14 |             "vaccine": {
15 |                 "selection_date": "2012-02-25"
16 |             }
17 |         },
18 |         "B/Phuket/3073/2013": {
19 |             "vaccine": {
20 |                 "selection_date": "2014-09-25"
21 |             }
22 |         }
23 |     }
24 | }


--------------------------------------------------------------------------------
/models/ne_star-lbi.json:
--------------------------------------------------------------------------------
 1 | {
 2 |  "predictors": [
 3 |   "ne_star",
 4 |   "lbi"
 5 |  ],
 6 |  "cv_error_mean": 5.444043356439342,
 7 |  "cv_error_std": 1.7586532985230514,
 8 |  "coefficients_mean": [
 9 |   -0.6756465974025607,
10 |   1.033713901753553
11 |  ],
12 |  "coefficients_std": [
13 |   0.3336818417473266,
14 |   0.3936161339928366
15 |  ],
16 |  "mean_stds_mean": [
17 |   1.1405240069244975,
18 |   0.1290705493482985
19 |  ],
20 |  "mean_stds_std": [
21 |   0.036754535005899855,
22 |   0.02083404312198264
23 |  ],
24 |  "cost_function": "diffsum",
25 |  "l1_lambda": 0.1,
26 |  "delta_months": 12,
27 |  "training_window": 6,
28 |  "pseudocount": null
29 | }


--------------------------------------------------------------------------------
/models/cTiter_x-ne_star.json:
--------------------------------------------------------------------------------
 1 | {
 2 |  "predictors": [
 3 |   "cTiter_x",
 4 |   "ne_star"
 5 |  ],
 6 |  "cv_error_mean": 5.823634831801705,
 7 |  "cv_error_std": 1.4714831128264352,
 8 |  "coefficients_mean": [
 9 |   0.8880449909700426,
10 |   -1.0074938116832592
11 |  ],
12 |  "coefficients_std": [
13 |   0.22289407223621063,
14 |   0.41396205335824204
15 |  ],
16 |  "mean_stds_mean": [
17 |   0.22238984824857977,
18 |   1.1405240069244975
19 |  ],
20 |  "mean_stds_std": [
21 |   0.051326848248942424,
22 |   0.036754535005899855
23 |  ],
24 |  "cost_function": "diffsum",
25 |  "l1_lambda": 0.1,
26 |  "delta_months": 12,
27 |  "training_window": 6,
28 |  "pseudocount": null
29 | }
30 | 


--------------------------------------------------------------------------------
/models/fra_cTiter_x-ne_star.json:
--------------------------------------------------------------------------------
 1 | {
 2 |  "predictors": [
 3 |   "fra_cTiter_x",
 4 |   "ne_star"
 5 |  ],
 6 |  "cv_error_mean": 8.288751542568207,
 7 |  "cv_error_std": 4.590061451047961,
 8 |  "coefficients_mean": [
 9 |   1.4022025868694787,
10 |   -0.46227708276390217
11 |  ],
12 |  "coefficients_std": [
13 |   0.22611682803638786,
14 |   0.1787549565201879
15 |  ],
16 |  "mean_stds_mean": [
17 |   0.10027528334129619,
18 |   1.1356939490365228
19 |  ],
20 |  "mean_stds_std": [
21 |   0.05129205379298049,
22 |   0.021887145941695038
23 |  ],
24 |  "cost_function": "diffsum",
25 |  "l1_lambda": 0.1,
26 |  "delta_months": 12,
27 |  "training_window": 6,
28 |  "pseudocount": null
29 | }


--------------------------------------------------------------------------------
/models/cell_fra_cTiter_x-ne_star.json:
--------------------------------------------------------------------------------
 1 | {
 2 |  "predictors": [
 3 |   "cell_fra_cTiter_x",
 4 |   "ne_star"
 5 |  ],
 6 |  "cv_error_mean": 5.823634831801705,
 7 |  "cv_error_std": 1.4714831128264352,
 8 |  "coefficients_mean": [
 9 |   0.8880449909700426,
10 |   -1.0074938116832592
11 |  ],
12 |  "coefficients_std": [
13 |   0.22289407223621063,
14 |   0.41396205335824204
15 |  ],
16 |  "mean_stds_mean": [
17 |   0.22238984824857977,
18 |   1.1405240069244975
19 |  ],
20 |  "mean_stds_std": [
21 |   0.051326848248942424,
22 |   0.036754535005899855
23 |  ],
24 |  "cost_function": "diffsum",
25 |  "l1_lambda": 0.1,
26 |  "delta_months": 12,
27 |  "training_window": 6,
28 |  "pseudocount": null
29 | }
30 | 


--------------------------------------------------------------------------------
/models/human_cell_fra_cTiter_x-ne_star.json:
--------------------------------------------------------------------------------
 1 | {
 2 |  "predictors": [
 3 |   "human_cell_fra_cTiter_x",
 4 |   "ne_star"
 5 |  ],
 6 |  "cv_error_mean": 5.823634831801705,
 7 |  "cv_error_std": 1.4714831128264352,
 8 |  "coefficients_mean": [
 9 |   0.8880449909700426,
10 |   -1.0074938116832592
11 |  ],
12 |  "coefficients_std": [
13 |   0.22289407223621063,
14 |   0.41396205335824204
15 |  ],
16 |  "mean_stds_mean": [
17 |   0.22238984824857977,
18 |   1.1405240069244975
19 |  ],
20 |  "mean_stds_std": [
21 |   0.051326848248942424,
22 |   0.036754535005899855
23 |  ],
24 |  "cost_function": "diffsum",
25 |  "l1_lambda": 0.1,
26 |  "delta_months": 12,
27 |  "training_window": 6,
28 |  "pseudocount": null
29 | }
30 | 


--------------------------------------------------------------------------------
/models/human_cell_hi_cTiter_x-ne_star.json:
--------------------------------------------------------------------------------
 1 | {
 2 |  "predictors": [
 3 |   "human_cell_hi_cTiter_x",
 4 |   "ne_star"
 5 |  ],
 6 |  "cv_error_mean": 5.823634831801705,
 7 |  "cv_error_std": 1.4714831128264352,
 8 |  "coefficients_mean": [
 9 |   0.8880449909700426,
10 |   -1.0074938116832592
11 |  ],
12 |  "coefficients_std": [
13 |   0.22289407223621063,
14 |   0.41396205335824204
15 |  ],
16 |  "mean_stds_mean": [
17 |   0.22238984824857977,
18 |   1.1405240069244975
19 |  ],
20 |  "mean_stds_std": [
21 |   0.051326848248942424,
22 |   0.036754535005899855
23 |  ],
24 |  "cost_function": "diffsum",
25 |  "l1_lambda": 0.1,
26 |  "delta_months": 12,
27 |  "training_window": 6,
28 |  "pseudocount": null
29 | }
30 | 


--------------------------------------------------------------------------------
/nextclade/dataset_config/h3n2/pb1/annotation.gff:
--------------------------------------------------------------------------------
1 | ##gff-version 3
2 | #!gff-spec-version 1.21
3 | #!processor NCBI annotwriter
4 | ##sequence-region NC_007372.1 1 2341
5 | ##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=335341
6 | NC_007372.1	RefSeq	region	1	2341	.	+	.	ID=NC_007372.1:1..2341
7 | NC_007372.1	RefSeq	CDS	25	2298	.	+	0	Name=PB1;gbkey=CDS;gene=PB1;protein_id=YP_308847.1;locus_tag=FLUAVH3N2_s2p1;product=polymerase PB1;ID=cds-YP_308847.1;Dbxref=GenBank:YP_308847.1,GeneID:3655159
8 | NC_007372.1	RefSeq	CDS	119	391	.	+	0	Name=PB1-F2;gbkey=CDS;gene=PB1-F2;protein_id=YP_308848.1;locus_tag=FLUAVH3N2_s2p2;product=PB1-F2 protein;ID=cds-YP_308848.1;Dbxref=GenBank:YP_308848.1,GeneID:3655160
9 | 


--------------------------------------------------------------------------------
/config/distance_maps/h3n2/ha/bush_epitope_A.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "default": 0,
 3 |     "output_type": "integer",
 4 |     "map": {
 5 |         "HA1": {
 6 |             "122": 1,
 7 |             "124": 1,
 8 |             "126": 1,
 9 |             "130": 1,
10 |             "131": 1,
11 |             "132": 1,
12 |             "133": 1,
13 |             "135": 1,
14 |             "137": 1,
15 |             "138": 1,
16 |             "140": 1,
17 |             "142": 1,
18 |             "143": 1,
19 |             "144": 1,
20 |             "145": 1,
21 |             "146": 1,
22 |             "150": 1,
23 |             "152": 1,
24 |             "168": 1
25 |         }
26 |     },
27 |     "name": "bush_epitope_A"
28 | }
29 | 


--------------------------------------------------------------------------------
/config/references_to_include_in_titer_plots_vic.txt:
--------------------------------------------------------------------------------
 1 | B/Austria/1359417/2021
 2 | B/Sichuan-Jingyang/12048/2019
 3 | B/Darwin/11/2021
 4 | B/Netherlands/11263/2022
 5 | B/Singapore/WUH4618/2021
 6 | B/Victoria/2113/2019
 7 | B/Victoria/16/2023
 8 | B/Washington/2/2019
 9 | B/Henan-Xigong/1118/2021
10 | B/Massachusetts/1/2021
11 | B/Kenya/186/2021
12 | B/NorthCarolina/1/2021
13 | B/Colorado/5/2022
14 | B/Netherlands/10900/2022
15 | B/Austria/1359417/2021-egg
16 | B/Sichuan-Jingyang/12048/2019-egg
17 | B/Singapore/WUH4618/2021-egg
18 | B/Washington/2/2019-egg
19 | B/Zhejiang-Xiacheng/11085/2021-egg
20 | B/Zhejiang-Nanhu/1854/2021-egg
21 | B/Netherlands/10894/2022-egg
22 | B/Henan-Xigong/1118/2021-egg
23 | B/Michigan/1/2021-egg
24 | 


--------------------------------------------------------------------------------
/config/colors_for_titer_plots_h1n1pdm.tsv:
--------------------------------------------------------------------------------
 1 | clade_test	1	#5c22be
 2 | clade_test	2	#4a3fdd
 3 | clade_test	3	#66bbbe
 4 | clade_test	5	#4c89e8
 5 | clade_test	6B.1A.5a	#57a5d7
 6 | clade_test	5a.1	#3e59cf
 7 | clade_test	5a.2	#705c94
 8 | clade_test	5a.2a	#e39b39
 9 | clade_test	5a.2a.1	#dfd24e
10 | clade_test	5B	#7fb975
11 | clade_test	6	#ff8e3a
12 | clade_test	6b	#ff8e3a
13 | clade_test	6b1	#f5c546
14 | clade_test	6B.1A	#dfd24e
15 | clade_test	7	#f93529
16 | subclade_test	B	#3E5DD0
17 | subclade_test	C.1	#4A8CC2
18 | subclade_test	C.1.1	#60AA9E
19 | subclade_test	C.1.1.1	#80B974
20 | subclade_test	C.1.2	#A6BE55
21 | subclade_test	C.1.3	#CBB742
22 | subclade_test	C.1.4	#E29D39
23 | subclade_test	C.1.5	#E56A2F
24 | subclade_test	C.1.6	#DB2823
25 | 


--------------------------------------------------------------------------------
/config/distance_maps/h3n2/ha/bush_epitope_E.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "default": 0,
 3 |     "output_type": "integer",
 4 |     "map": {
 5 |         "HA1": {
 6 |             "57": 1,
 7 |             "59": 1,
 8 |             "62": 1,
 9 |             "63": 1,
10 |             "67": 1,
11 |             "75": 1,
12 |             "78": 1,
13 |             "80": 1,
14 |             "81": 1,
15 |             "82": 1,
16 |             "83": 1,
17 |             "86": 1,
18 |             "87": 1,
19 |             "88": 1,
20 |             "91": 1,
21 |             "92": 1,
22 |             "94": 1,
23 |             "109": 1,
24 |             "260": 1,
25 |             "261": 1,
26 |             "262": 1,
27 |             "265": 1
28 |         }
29 |     },
30 |     "name": "bush_epitope_E"
31 | }
32 | 


--------------------------------------------------------------------------------
/profiles/nextflu-private/vic/ha/clades.tsv:
--------------------------------------------------------------------------------
 1 | clade	gene	site	alt
 2 | V1A	nuc	228	G
 3 | V1A	nuc	666	C
 4 | V1A	nuc	1362	T
 5 | V1A	nuc	1843	T
 6 | V1B	nuc	1056	G
 7 | V1B	nuc	1194	G
 8 | V1A.1	HA1	129	G
 9 | V1A.1	HA1	162	-
10 | V1A.1	HA1	163	-
11 | V1A.1	HA1	180	V
12 | V1A.1	HA2	152	K
13 | V1A.2	HA1	163	-
14 | V1A.2	HA1	164	-
15 | V1A.2	HA1	180	T
16 | V1A.2	HA1	209	N
17 | V1A.3	HA1	136	E
18 | V1A.3	HA1	163	-
19 | V1A.3	HA1	164	-
20 | V1A.3	nuc	484	G
21 | V1A.3/133R	nuc	153	C
22 | V1A.3/133R	HA1	133	R
23 | V1A.3/155A	nuc	153	C
24 | V1A.3/155A	HA1	155	A
25 | V1A.3a	HA1	150	K
26 | V1A.3a	HA1	184	E
27 | V1A.3a	HA1	197	D
28 | V1A.3a	HA1	279	K
29 | V1A.3a.1	HA1	150	K
30 | V1A.3a.1	HA1	220	M
31 | V1A.3a.1	HA1	241	Q
32 | V1A.3a.2	HA1	127	T
33 | V1A.3a.2	HA1	144	L
34 | V1A.3a.2	HA1	203	R
35 | 


--------------------------------------------------------------------------------
/config/distance_maps/h3n2/ha/bush_epitope_B.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "default": 0,
 3 |     "output_type": "integer",
 4 |     "map": {
 5 |         "HA1": {
 6 |             "128": 1,
 7 |             "129": 1,
 8 |             "155": 1,
 9 |             "156": 1,
10 |             "157": 1,
11 |             "158": 1,
12 |             "159": 1,
13 |             "160": 1,
14 |             "163": 1,
15 |             "164": 1,
16 |             "165": 1,
17 |             "186": 1,
18 |             "187": 1,
19 |             "188": 1,
20 |             "189": 1,
21 |             "190": 1,
22 |             "192": 1,
23 |             "193": 1,
24 |             "194": 1,
25 |             "196": 1,
26 |             "197": 1,
27 |             "198": 1
28 |         }
29 |     },
30 |     "name": "bush_epitope_B"
31 | }
32 | 


--------------------------------------------------------------------------------
/config/references_for_titer_plots/h3n2/egg_hi.txt:
--------------------------------------------------------------------------------
 1 | A/Croatia/10136RV/2023-egg # J.2:145N, vaccine strain
 2 | A/DistrictOfColumbia/27/2023-egg # J.2:145N, vaccine strain
 3 | A/Idaho/69/2023-egg # J.2:145N
 4 | A/Oregon/266/2024-egg # J.2:135K
 5 | A/Catalonia/NSVH102423723/2024-egg # J.2:135K
 6 | A/Oklahoma/5/2024-egg # J.2:158K
 7 | A/Norway/423/2024-egg # J.2:158K
 8 | A/Iowa/12/2024-egg # J.2:104N,145N
 9 | A/Netherlands/2093/2024-egg # J.2.3
10 | A/Netherlands/2093/2025-egg # J.2.3:195Y
11 | A/Michigan/73/2025-egg # J.2.3:195Y
12 | A/Nepal/N042/2025-egg # J.2.4
13 | A/Valladolid/1187/2025-egg # J.2.4
14 | A/Singapore/GP20238/2024-egg # J.2.4
15 | A/NewJersey/16/2025-egg # J.2.5
16 | #A/Idaho/47/2023-egg # J.1.1:214T
17 | A/Minnesota/117/2024-egg # Missing
18 | A/Wisconsin/160/2024-egg # Missing
19 | 


--------------------------------------------------------------------------------
/ingest/defaults/final_annotations.tsv:
--------------------------------------------------------------------------------
 1 | # Manually curated annotations TSV file
 2 | #
 3 | # This runs as the final step in the curate chain. If possible, please
 4 | # put hardcoded annotations in an annotations file specific to a relevant
 5 | # curation layer. For instance, lineage annotatoins should not go in this
 6 | # file but rather `defaults/lineages.tsv`
 7 | #
 8 | #
 9 | # This TSV should not have a header and should have exactly three columns:
10 | # COLUMN 1: (CURATED) STRAIN NAME
11 | # COLUMN 2: FIELD NAME (e.g. date)
12 | # COLUMN 3: NEW VALUE (e.g. 2025-01-01)
13 | #
14 | # If there are multiple annotations for the same id and field, then the last value is used
15 | # Lines starting with '#' are treated as comments
16 | # Any '#' after the field value are treated as comments.
17 | 
18 | 


--------------------------------------------------------------------------------
/config/colors_for_titer_plots_h3n2.tsv:
--------------------------------------------------------------------------------
 1 | clade_test	1	#571EA2
 2 | clade_test	1a	#4334BF
 3 | clade_test	1a.1	#3F55CE
 4 | clade_test	2	#4376CD
 5 | clade_test	2a	#4C91C0
 6 | clade_test	2a.1	#59A4A9
 7 | clade_test	2a.1a	#6AB18F
 8 | clade_test	2a.1b	#7FB975
 9 | clade_test	2a.2	#97BD5F
10 | clade_test	2a.3	#AFBD4F
11 | clade_test	2a.3a	#C7B944
12 | clade_test	2a.3a.1	#D9AD3D
13 | clade_test	2a.3b	#E49838
14 | clade_test	2b	#E67932
15 | clade_test	2c	#E1512A
16 | clade_test	2d	#DB2823
17 | subclade_test	F.1.1	#3F52CD
18 | subclade_test	G.1.1	#4681CA
19 | subclade_test	G.1.1.2	#57A1AD
20 | subclade_test	G.1.3	#70B487
21 | subclade_test	G.1.3.1	#90BC65
22 | subclade_test	G.1.3.1.1	#B4BD4C
23 | subclade_test	G.1.3.2	#D3B240
24 | subclade_test	G.2	#E59638
25 | subclade_test	G.2.1	#E4642E
26 | subclade_test	G.2.2	#DB2823
27 | 


--------------------------------------------------------------------------------
/config/references_for_titer_plots/h1n1pdm/cell_hi.txt:
--------------------------------------------------------------------------------
 1 | A/Wisconsin/67/2022 # C.1.1, vaccine strain
 2 | A/Victoria/4897/2022 # D
 3 | #A/Massachusetts/76/2024 # D:152I,308K
 4 | A/Missouri/11/2025 # D.3
 5 | A/Perth/456/2025 # D.3.1, VIDRL
 6 | A/Tokyo/EIS10-554/2024 # D.3.1, NIID
 7 | #A/Kanagawa/IC2435/2025 # D.3.1:155R
 8 | #A/Tokyo/EIS11-980/2025 # D.3.1:113K
 9 | A/Colorado/218/2024 # D.3.1:113K, CDC
10 | A/Norway/8388/2024 # D.3.1:113K, Crick
11 | A/Bahrain/25220003050/2025 # D.3.1:113K
12 | A/Darwin/1015/2025 # D.3.1:113K,139D,283K
13 | A/Delaware/83/2024 # D.5
14 | #A/Tajikistan/2-1057/2024 # D.5, Crick
15 | A/Victoria/376/2024 # C.1.9
16 | A/Tennessee/77/2024 # C.1.9:P137S
17 | A/Canberra/651/2024 # C.1.9.3
18 | A/Iowa/110/2024 # C.1.9.3:166V
19 | A/Norway/7606/2024 # C.1.9.3:166V
20 | A/Victoria/42/2025 # C.1.9.3:137S,155E,166V
21 | 


--------------------------------------------------------------------------------
/config/yam/reference_strains.txt:
--------------------------------------------------------------------------------
 1 | B/Arizona/10/2015
 2 | B/Arizona/10/2015-egg
 3 | B/Beijing/184/1993
 4 | B/Brisbane/9/2014
 5 | B/California/12/2015
 6 | B/Canterbury/5/2017
 7 | B/Darwin/58/2019
 8 | B/Florida/4/2006
 9 | B/Guangdong-Liwan/1133/2014
10 | B/Guyane/5/2018
11 | B/Guyane/5/2018-egg
12 | B/Hyogo/3210/2015
13 | B/Kanagawa/IC1649/2017
14 | B/Kuwait/7274/2017
15 | B/Kuwait/7274/2017-egg
16 | B/Massachusetts/2/2012
17 | B/Massachusetts/2/2012-egg
18 | B/Mauritius/1791/2017
19 | B/NewHampshire/1/2016
20 | B/NewHampshire/1/2016-egg
21 | B/Perth/4/2017
22 | B/Phuket/3073/2013
23 | B/Phuket/3073/2013-egg
24 | B/Sapporo/2/2015
25 | B/Shanghai/361/2002
26 | B/Sichuan/379/1999
27 | B/Sydney/10/2016
28 | B/Texas/81/2016
29 | B/Texas/81/2016-egg
30 | B/Utah/9/2014
31 | B/Wellington/40/2017
32 | B/Wisconsin/1/2010
33 | B/Yokohama/9/2019
34 | 


--------------------------------------------------------------------------------
/nextclade/dataset_config/h3n2/ha/CY163680/pathogen.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "nucMutLabelMap": {},
 3 |     "nucMutLabelMapReverse": {},
 4 |     "shortcuts": [
 5 |       "flu_h3n2_ha_broad",
 6 |       "nextstrain/flu/h3n2/ha/wisconsin-67-2005"
 7 |     ],
 8 |     "aaMotifs": [
 9 |       {
10 |         "name": "glycosylation",
11 |         "nameShort": "Glyc.",
12 |         "nameFriendly": "Glycosylation",
13 |         "description": "N-linked glycosylation motifs (N-X-S/T with X any amino acid other than P)",
14 |         "includeCdses": [
15 |           {
16 |             "cds":"HA1",
17 |             "ranges":[]
18 |           },
19 |           {
20 |             "cds":"HA2",
21 |             "ranges":[{"begin":0, "end":186}]
22 |           }
23 |         ],
24 |         "motifs": [
25 |           "N[^P][ST]"
26 |         ]
27 |       }
28 |     ]
29 | }


--------------------------------------------------------------------------------
/nextclade/config/vic/ha/KX058884/founder_sequences_SigPep.fasta:
--------------------------------------------------------------------------------
 1 | >A
 2 | MKAIIVLLMVVTSNA
 3 | >A.1
 4 | MKAIIVLLMVVTSNA
 5 | >A.2
 6 | MKAIIVLLMVVTSNA
 7 | >A.3
 8 | MKAIIVLLMVVTSNA
 9 | >A.3.1
10 | MKAIIVLLMVVTSNA
11 | >A.3.1.1
12 | MKAIIVLLMVVTSNA
13 | >A.3.2
14 | MKAIIVLLMVVTSNA
15 | >A.3.3
16 | MKAIIVLLMVVTSNA
17 | >B
18 | MKAIIVLLMVVTSNA
19 | >C
20 | MKAIIVLLMVVTSNA
21 | >C.1
22 | MKAIIVLLMVVTSNA
23 | >C.2
24 | MKAIIVLLMVVTSNA
25 | >C.3
26 | MKAIIVLLMVVTSNA
27 | >C.3.1
28 | MKAIIVLLMVVTSNA
29 | >C.3.2
30 | MKAIIVLLMVVTSNA
31 | >C.4
32 | MKAIIVLLMVVTSNA
33 | >C.5
34 | MKAIIVLLMVVTSNA
35 | >C.5.1
36 | MKAIIVLLMVVTSNA
37 | >C.5.2
38 | MKAIIVLLMVVTSNA
39 | >C.5.3
40 | MKAIIVLLMVVTSNA
41 | >C.5.4
42 | MKAIIVLLMVVTSNA
43 | >C.5.5
44 | MKAIIVLLMVVTSNA
45 | >C.5.6
46 | MKAIIVLLMVVTSNA
47 | >C.5.6.1
48 | MKAIIVLLMVVTSNA
49 | >C.5.7
50 | MKAIIVLLMVVTSNA
51 | 


--------------------------------------------------------------------------------
/nextclade/dataset_config/h1n1pdm/ha/CY121680/pathogen.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "nucMutLabelMap": {},
 3 |     "nucMutLabelMapReverse": {},
 4 |     "shortcuts": [
 5 |       "flu_h1n1pdm_ha_broad",
 6 |       "nextstrain/flu/h1n1pdm/ha/california-7-2009"
 7 |     ],
 8 |     "aaMotifs": [
 9 |       {
10 |         "name": "glycosylation",
11 |         "nameShort": "Glyc.",
12 |         "nameFriendly": "Glycosylation",
13 |         "description": "N-linked glycosylation motifs (N-X-S/T with X any amino acid other than P)",
14 |         "includeCdses": [
15 |           {
16 |             "cds":"HA1",
17 |             "ranges":[]
18 |           },
19 |           {
20 |             "cds":"HA2",
21 |             "ranges":[{"begin":0, "end":186}]
22 |           }
23 |         ],
24 |         "motifs": [
25 |           "N[^P][ST]"
26 |         ]
27 |       }
28 |     ]
29 | }


--------------------------------------------------------------------------------
/config/h3n2/ha/emerging_haplotypes.tsv:
--------------------------------------------------------------------------------
 1 | haplotype	gene	site	alt
 2 | J.2	clade	J.2
 3 | J.2.2	clade	J.2.2
 4 | J.2.3	clade	J.2.3
 5 | J.2.4	clade	J.2.4
 6 | 
 7 | J.2.4:160K	clade	J.2.4
 8 | J.2.4:160K	HA1	160	K
 9 | 
10 | J.2.4:79V-144N-158D-160K-328A	clade	J.2.4
11 | J.2.4:79V-144N-158D-160K-328A	HA1	79	V
12 | J.2.4:79V-144N-158D-160K-328A	HA1	144	N
13 | J.2.4:79V-144N-158D-160K-328A	HA1	158	D
14 | J.2.4:79V-144N-158D-160K-328A	HA1	160	K
15 | J.2.4:79V-144N-158D-160K-328A	HA1	328	A
16 | 
17 | J.2.4:62R-158D-160K-328A	clade	J.2.4
18 | J.2.4:62R-158D-160K-328A	HA1	62	R
19 | J.2.4:62R-158D-160K-328A	HA1	158	D
20 | J.2.4:62R-158D-160K-328A	HA1	160	K
21 | J.2.4:62R-158D-160K-328A	HA1	328	A
22 | 
23 | K	clade	K
24 | 
25 | K:80K	clade	K
26 | K:80K	HA1	80	K
27 | 
28 | K:88I	clade	K
29 | K:88I	HA1	88	I
30 | 
31 | K:145N	clade	K
32 | K:145N	HA1	145	N
33 | 
34 | K:272T	clade	K
35 | K:272T	HA1	272	T
36 | 


--------------------------------------------------------------------------------
/nextclade/dataset_config/vic/ha/KX058884/pathogen.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "nucMutLabelMap": {},
 3 |     "nucMutLabelMapReverse": {},
 4 |     "shortcuts": [
 5 |       "flu_vic_ha",
 6 |       "nextstrain/flu/vic",
 7 |       "nextstrain/flu/vic/ha",
 8 |       "nextstrain/flu/vic/ha/brisbane-60-2008"
 9 |     ],
10 |     "aaMotifs": [
11 |       {
12 |         "name": "glycosylation",
13 |         "nameShort": "Glyc.",
14 |         "nameFriendly": "Glycosylation",
15 |         "description": "N-linked glycosylation motifs (N-X-S/T with X any amino acid other than P)",
16 |         "includeCdses": [
17 |           {
18 |             "cds":"HA1",
19 |             "ranges":[]
20 |           },
21 |           {
22 |             "cds":"HA2",
23 |             "ranges":[{"begin":0, "end":186}]
24 |           }
25 |         ],
26 |         "motifs": [
27 |           "N[^P][ST]"
28 |         ]
29 |       }
30 |     ]
31 | }


--------------------------------------------------------------------------------
/nextclade/dataset_config/yam/ha/JN993010/pathogen.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "nucMutLabelMap": {},
 3 |     "nucMutLabelMapReverse": {},
 4 |     "shortcuts": [
 5 |       "flu_yam_ha",
 6 |       "nextstrain/flu/yam",
 7 |       "nextstrain/flu/yam/ha",
 8 |       "nextstrain/flu/yam/ha/wisconsin-1-2010"
 9 |     ],
10 |     "aaMotifs": [
11 |       {
12 |         "name": "glycosylation",
13 |         "nameShort": "Glyc.",
14 |         "nameFriendly": "Glycosylation",
15 |         "description": "N-linked glycosylation motifs (N-X-S/T with X any amino acid other than P)",
16 |         "includeCdses": [
17 |           {
18 |             "cds":"HA1",
19 |             "ranges":[]
20 |           },
21 |           {
22 |             "cds":"HA2",
23 |             "ranges":[{"begin":0, "end":186}]
24 |           }
25 |         ],
26 |         "motifs": [
27 |           "N[^P][ST]"
28 |         ]
29 |       }
30 |     ]
31 | }


--------------------------------------------------------------------------------
/.github/workflows/run-public-builds.yaml:
--------------------------------------------------------------------------------
 1 | name: Run the Nextstrain public builds
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |     inputs:
 6 |       dockerImage:
 7 |         description: "Specific container image to use for build (will override the default of `nextstrain build`)"
 8 |         required: false
 9 |         type: string
10 | 
11 | jobs:
12 |   run-build:
13 |     permissions:
14 |       id-token: write
15 |     uses: nextstrain/.github/.github/workflows/pathogen-repo-build.yaml@master
16 |     secrets: inherit
17 |     with:
18 |       runtime: aws-batch
19 |       env: |
20 |         NEXTSTRAIN_DOCKER_IMAGE: ${{ inputs.dockerImage }}
21 |       run: |
22 |         nextstrain build \
23 |           --detach \
24 |           --cpus 36 \
25 |           --memory 72gib \
26 |           . \
27 |           deploy_all \
28 |           -p \
29 |           --configfile profiles/nextstrain-public.yaml
30 | 


--------------------------------------------------------------------------------
/nextclade/dataset_config/h3n2/mp/annotation.gff:
--------------------------------------------------------------------------------
 1 | ##gff-version 3
 2 | #!gff-spec-version 1.21
 3 | #!processor NCBI annotwriter
 4 | ##sequence-region NC_007367.1 1 1027
 5 | ##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=335341
 6 | NC_007367.1	RefSeq	region	1	1027	.	+	.	ID=NC_007367.1:1..1027
 7 | NC_007367.1	RefSeq	CDS	26	51	.	+	0	Name=M2;gene=M2;gbkey=CDS;protein_id=YP_308840.1;locus_tag=FLUAVH3N2_s7p1;ID=cds-YP_308840.1;product=matrix protein 2;Dbxref=GenBank:YP_308840.1,GeneID:3655153
 8 | NC_007367.1	RefSeq	CDS	740	1007	.	+	1	Name=M2;gene=M2;gbkey=CDS;protein_id=YP_308840.1;locus_tag=FLUAVH3N2_s7p1;ID=cds-YP_308840.1;product=matrix protein 2;Dbxref=GenBank:YP_308840.1,GeneID:3655153
 9 | NC_007367.1	RefSeq	CDS	26	784	.	+	0	Name=M1;gene=M1;gbkey=CDS;protein_id=YP_308841.1;locus_tag=FLUAVH3N2_s7p2;ID=cds-YP_308841.1;product=matrix protein 1;Dbxref=GenBank:YP_308841.1,GeneID:3655152
10 | 


--------------------------------------------------------------------------------
/nextclade/dataset_config/h1n1pdm/mp/README.md:
--------------------------------------------------------------------------------
 1 | # Influenza A(H1N1pdm) MP based on reference "A/California/07/2009"
 2 | 
 3 | | Key                  | Value                |
 4 | | -------------------- | -------------------- |
 5 | | authors                | [Richard Neher](https://neherlab.org), [Nextstrain](https://nextstrain.org)                         |
 6 | | name                 | Influenza A(H1N1pdm) MA                      |
 7 | | reference            | A/California/07/2009                    |
 8 | | dataset path         | flu/h1n1pdm/mp                     |
 9 | | reference accession  | NC_026431   |
10 | 
11 | ## Features
12 | This dataset only provides a reference for alignment and an annotation for translation.
13 | 
14 | ## What is Nextclade dataset
15 | 
16 | Read more about Nextclade datasets in Nextclade documentation: https://docs.nextstrain.org/projects/nextclade/en/stable/user/datasets.html
17 | 


--------------------------------------------------------------------------------
/nextclade/dataset_config/h1n1pdm/np/README.md:
--------------------------------------------------------------------------------
 1 | # Influenza A(H1N1pdm) NP based on reference "A/California/07/2009"
 2 | 
 3 | | Key                  | Value                |
 4 | | -------------------- | -------------------- |
 5 | | authors                | [Richard Neher](https://neherlab.org), [Nextstrain](https://nextstrain.org)                         |
 6 | | name                 | Influenza A(H1N1pdm) np                      |
 7 | | reference            | A/California/07/2009                    |
 8 | | dataset path         | flu/h1n1pdm/np                     |
 9 | | reference accession  | NC_026436   |
10 | 
11 | ## Features
12 | This dataset only provides a reference for alignment and an annotation for translation.
13 | 
14 | ## What is Nextclade dataset
15 | 
16 | Read more about Nextclade datasets in Nextclade documentation: https://docs.nextstrain.org/projects/nextclade/en/stable/user/datasets.html
17 | 


--------------------------------------------------------------------------------
/nextclade/dataset_config/h1n1pdm/ns/README.md:
--------------------------------------------------------------------------------
 1 | # Influenza A(H1N1pdm) NS based on reference "A/California/07/2009"
 2 | 
 3 | | Key                  | Value                |
 4 | | -------------------- | -------------------- |
 5 | | authors                | [Richard Neher](https://neherlab.org), [Nextstrain](https://nextstrain.org)                         |
 6 | | name                 | Influenza A(H1N1pdm) NS                      |
 7 | | reference            | A/California/07/2009                    |
 8 | | dataset path         | flu/h1n1pdm/ns                     |
 9 | | reference accession  | NC_026432   |
10 | 
11 | ## Features
12 | This dataset only provides a reference for alignment and an annotation for translation.
13 | 
14 | ## What is Nextclade dataset
15 | 
16 | Read more about Nextclade datasets in Nextclade documentation: https://docs.nextstrain.org/projects/nextclade/en/stable/user/datasets.html
17 | 


--------------------------------------------------------------------------------
/nextclade/dataset_config/h1n1pdm/pb1/README.md:
--------------------------------------------------------------------------------
 1 | # Influenza A(H1N1pdm) PB1 based on reference "A/California/07/2009"
 2 | 
 3 | | Key                  | Value                |
 4 | | -------------------- | -------------------- |
 5 | | authors                | [Richard Neher](https://neherlab.org), [Nextstrain](https://nextstrain.org)                         |
 6 | | name                 | Influenza A(H1N1pdm) PB1                      |
 7 | | reference            | A/California/07/2009                    |
 8 | | dataset path         | flu/h1n1pdm/pb1                     |
 9 | | reference accession  | NC_026435   |
10 | 
11 | ## Features
12 | This dataset only provides a reference for alignment and an annotation for translation.
13 | 
14 | ## What is Nextclade dataset
15 | 
16 | Read more about Nextclade datasets in Nextclade documentation: https://docs.nextstrain.org/projects/nextclade/en/stable/user/datasets.html
17 | 


--------------------------------------------------------------------------------
/nextclade/dataset_config/h1n1pdm/pb2/README.md:
--------------------------------------------------------------------------------
 1 | # Influenza A(H1N1pdm) PB2 based on reference "A/California/07/2009"
 2 | 
 3 | | Key                  | Value                |
 4 | | -------------------- | -------------------- |
 5 | | authors                | [Richard Neher](https://neherlab.org), [Nextstrain](https://nextstrain.org)                         |
 6 | | name                 | Influenza A(H1N1pdm) PB2                      |
 7 | | reference            | A/California/07/2009                    |
 8 | | dataset path         | flu/h1n1pdm/pb2                     |
 9 | | reference accession  | NC_026438   |
10 | 
11 | ## Features
12 | This dataset only provides a reference for alignment and an annotation for translation.
13 | 
14 | ## What is Nextclade dataset
15 | 
16 | Read more about Nextclade datasets in Nextclade documentation: https://docs.nextstrain.org/projects/nextclade/en/stable/user/datasets.html
17 | 


--------------------------------------------------------------------------------
/config/distance_maps/h3n2/ha/bush_epitope_C.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "default": 0,
 3 |     "output_type": "integer",
 4 |     "map": {
 5 |         "HA1": {
 6 |             "44": 1,
 7 |             "45": 1,
 8 |             "46": 1,
 9 |             "47": 1,
10 |             "48": 1,
11 |             "50": 1,
12 |             "51": 1,
13 |             "53": 1,
14 |             "54": 1,
15 |             "273": 1,
16 |             "275": 1,
17 |             "276": 1,
18 |             "278": 1,
19 |             "279": 1,
20 |             "280": 1,
21 |             "294": 1,
22 |             "297": 1,
23 |             "299": 1,
24 |             "300": 1,
25 |             "304": 1,
26 |             "305": 1,
27 |             "307": 1,
28 |             "308": 1,
29 |             "309": 1,
30 |             "310": 1,
31 |             "311": 1,
32 |             "312": 1
33 |         }
34 |     },
35 |     "name": "bush_epitope_C"
36 | }
37 | 


--------------------------------------------------------------------------------
/nextclade/dataset_config/h1n1pdm/mp/annotation.gff:
--------------------------------------------------------------------------------
 1 | ##gff-version 3
 2 | #!gff-spec-version 1.21
 3 | #!processor NCBI annotwriter
 4 | ##sequence-region NC_026431.1 1 982
 5 | ##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=641809
 6 | NC_026431.1	RefSeq	region	1	982	.	+	.	ID=NC_026431.1:1..982;
 7 | NC_026431.1	RefSeq	CDS	1	26	.	+	0	Name=M2;gene=M2;gbkey=CDS;locus_tag=UJ99_s7gp1;protein_id=YP_009118622.1;product=matrix protein 2;ID=cds-YP_009118622.1;Dbxref=GenBank:YP_009118622.1,GeneID:23308108
 8 | NC_026431.1	RefSeq	CDS	715	982	.	+	1	Name=M2;gene=M2;gbkey=CDS;locus_tag=UJ99_s7gp1;protein_id=YP_009118622.1;product=matrix protein 2;ID=cds-YP_009118622.1;Dbxref=GenBank:YP_009118622.1,GeneID:23308108
 9 | NC_026431.1	RefSeq	CDS	1	759	.	+	0	Name=M1;gene=M1;gbkey=CDS;locus_tag=UJ99_s7gp2;protein_id=YP_009118623.1;product=matrix protein 1;ID=cds-YP_009118623.1;Dbxref=GenBank:YP_009118623.1,GeneID:23308107
10 | 


--------------------------------------------------------------------------------
/nextclade/dataset_config/h3n2/mp/README.md:
--------------------------------------------------------------------------------
 1 | # Influenza A(H3N2) MP (segment 7) based on reference "A/New York/392/2004"
 2 | 
 3 | | Key                  | Value                |
 4 | | -------------------- | -------------------- |
 5 | | authors                | [Richard Neher](https://neherlab.org), [Nextstrain](https://nextstrain.org)                         |
 6 | | name                 | Influenza A(H3N2) MP (segment 7)                    |
 7 | | reference            | A/New York/392/2004                    |
 8 | | dataset path         | flu/h3n2/mp                     |
 9 | | reference accession  | NC_007367   |
10 | 
11 | ## Features
12 | This dataset only provides a reference for alignment and an annotation for translation.
13 | 
14 | ## What is Nextclade dataset
15 | 
16 | Read more about Nextclade datasets in Nextclade documentation: https://docs.nextstrain.org/projects/nextclade/en/stable/user/datasets.html
17 | 


--------------------------------------------------------------------------------
/nextclade/dataset_config/h3n2/np/README.md:
--------------------------------------------------------------------------------
 1 | # Influenza A(H3N2) NP (segment 5) based on reference "A/New York/392/2004"
 2 | 
 3 | | Key                  | Value                |
 4 | | -------------------- | -------------------- |
 5 | | authors                | [Richard Neher](https://neherlab.org), [Nextstrain](https://nextstrain.org)                         |
 6 | | name                 | Influenza A(H3N2) NP (segment 5)                    |
 7 | | reference            | A/New York/392/2004                    |
 8 | | dataset path         | flu/h3n2/np                     |
 9 | | reference accession  | NC_007369   |
10 | 
11 | ## Features
12 | This dataset only provides a reference for alignment and an annotation for translation.
13 | 
14 | ## What is Nextclade dataset
15 | 
16 | Read more about Nextclade datasets in Nextclade documentation: https://docs.nextstrain.org/projects/nextclade/en/stable/user/datasets.html
17 | 


--------------------------------------------------------------------------------
/nextclade/dataset_config/h3n2/ns/README.md:
--------------------------------------------------------------------------------
 1 | # Influenza A(H3N2) NS (segment 8) based on reference "A/New York/392/2004"
 2 | 
 3 | | Key                  | Value                |
 4 | | -------------------- | -------------------- |
 5 | | authors                | [Richard Neher](https://neherlab.org), [Nextstrain](https://nextstrain.org)                         |
 6 | | name                 | Influenza A(H3N2) NS (segment 8)                    |
 7 | | reference            | A/New York/392/2004                    |
 8 | | dataset path         | flu/h3n2/ns                     |
 9 | | reference accession  | NC_007370   |
10 | 
11 | ## Features
12 | This dataset only provides a reference for alignment and an annotation for translation.
13 | 
14 | ## What is Nextclade dataset
15 | 
16 | Read more about Nextclade datasets in Nextclade documentation: https://docs.nextstrain.org/projects/nextclade/en/stable/user/datasets.html
17 | 


--------------------------------------------------------------------------------
/nextclade/dataset_config/h3n2/pb1/README.md:
--------------------------------------------------------------------------------
 1 | # Influenza A(H3N2) PB1 (segment 2) based on reference "A/New York/392/2004"
 2 | 
 3 | | Key                  | Value                |
 4 | | -------------------- | -------------------- |
 5 | | authors                | [Richard Neher](https://neherlab.org), [Nextstrain](https://nextstrain.org)                         |
 6 | | name                 | Influenza A(H3N2) PB1 (segment 2)                    |
 7 | | reference            | A/New York/392/2004                    |
 8 | | dataset path         | flu/h3n2/pb1                     |
 9 | | reference accession  | NC_007372   |
10 | 
11 | ## Features
12 | This dataset only provides a reference for alignment and an annotation for translation.
13 | 
14 | ## What is Nextclade dataset
15 | 
16 | Read more about Nextclade datasets in Nextclade documentation: https://docs.nextstrain.org/projects/nextclade/en/stable/user/datasets.html
17 | 


--------------------------------------------------------------------------------
/nextclade/dataset_config/h3n2/pb2/README.md:
--------------------------------------------------------------------------------
 1 | # Influenza A(H3N2) PB2 (segment 1) based on reference "A/New York/392/2004"
 2 | 
 3 | | Key                  | Value                |
 4 | | -------------------- | -------------------- |
 5 | | authors                | [Richard Neher](https://neherlab.org), [Nextstrain](https://nextstrain.org)                         |
 6 | | name                 | Influenza A(H3N2) PB2 (segment 1)                    |
 7 | | reference            | A/New York/392/2004                    |
 8 | | dataset path         | flu/h3n2/mp                     |
 9 | | reference accession  | NC_007373   |
10 | 
11 | ## Features
12 | This dataset only provides a reference for alignment and an annotation for translation.
13 | 
14 | ## What is Nextclade dataset
15 | 
16 | Read more about Nextclade datasets in Nextclade documentation: https://docs.nextstrain.org/projects/nextclade/en/stable/user/datasets.html
17 | 


--------------------------------------------------------------------------------
/nextclade/dataset_config/h1n1pdm/ha/MW626062/pathogen.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "nucMutLabelMap": {},
 3 |     "nucMutLabelMapReverse": {},
 4 |     "shortcuts": [
 5 |       "flu_h1n1pdm_ha",
 6 |       "nextstrain/flu/h1n1pdm",
 7 |       "nextstrain/flu/h1n1pdm/ha",
 8 |       "nextstrain/flu/h1n1pdm/ha/wisconsin-588-2019"
 9 |     ],
10 |       "aaMotifs": [
11 |       {
12 |         "name": "glycosylation",
13 |         "nameShort": "Glyc.",
14 |         "nameFriendly": "Glycosylation",
15 |         "description": "N-linked glycosylation motifs (N-X-S/T with X any amino acid other than P)",
16 |         "includeCdses": [
17 |           {
18 |             "cds":"HA1",
19 |             "ranges":[]
20 |           },
21 |           {
22 |             "cds":"HA2",
23 |             "ranges":[{"begin":0, "end":186}]
24 |           }
25 |         ],
26 |         "motifs": [
27 |           "N[^P][ST]"
28 |         ]
29 |       }
30 |     ]
31 | }


--------------------------------------------------------------------------------
/nextclade/dataset_config/h3n2/ns/annotation.gff:
--------------------------------------------------------------------------------
 1 | ##gff-version 3
 2 | #!gff-spec-version 1.21
 3 | #!processor NCBI annotwriter
 4 | ##sequence-region NC_007370.1 1 890
 5 | ##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=335341
 6 | NC_007370.1	RefSeq	region	1	890	.	+	.	ID=NC_007370.1:1..890
 7 | NC_007370.1	RefSeq	CDS	27	56	.	+	0	Name=NEP;gbkey=CDS;gene=NEP;protein_id=YP_308844.1;locus_tag=FLUAVH3N2_s8p1;ID=cds-YP_308844.1;product=nonstructural protein 2;Dbxref=GenBank:YP_308844.1,GeneID:3655157
 8 | NC_007370.1	RefSeq	CDS	529	864	.	+	0	Name=NEP;gbkey=CDS;gene=NEP;protein_id=YP_308844.1;locus_tag=FLUAVH3N2_s8p1;ID=cds-YP_308844.1;product=nonstructural protein 2;Dbxref=GenBank:YP_308844.1,GeneID:3655157
 9 | NC_007370.1	RefSeq	CDS	27	719	.	+	0	Name=NS1;gbkey=CDS;gene=NS1;protein_id=YP_308845.1;locus_tag=FLUAVH3N2_s8p2;ID=cds-YP_308845.1;product=nonstructural protein 1;Dbxref=GenBank:YP_308845.1,GeneID:3655156
10 | 


--------------------------------------------------------------------------------
/nextclade/config/h1n1pdm/ha/CY121680/founder_sequences_SigPep.fasta:
--------------------------------------------------------------------------------
 1 | >A
 2 | MKAILVVLLYTFTTANA
 3 | >B
 4 | MKAILVVLLYTFTTANA
 5 | >C
 6 | MKAILVVMLYTFTTANA
 7 | >C.1
 8 | MKAILVVMLYTFTTANA
 9 | >C.1.1
10 | MKAILVVMLYTFTTANA
11 | >C.1.2
12 | MKAILVVMLYTFTTANA
13 | >C.1.3
14 | MKAILVVMLYTFTTANA
15 | >C.1.4
16 | MKAILVVMLYTFTTANA
17 | >C.1.5
18 | MKAILVVMLYTFTTANA
19 | >C.1.6
20 | MKAILVVMLYTFTTANA
21 | >C.1.7
22 | MKAILVVMLYTFTTANA
23 | >C.1.7.1
24 | MKAILVVMLYTFTTANA
25 | >C.1.7.2
26 | MKAILVVILYTVTTANA
27 | >C.1.8
28 | MKAILVVMLYTLTTANA
29 | >C.1.9
30 | MKAILVVMLYTFTTANA
31 | >C.1.9.1
32 | MKAILVVMLYTFTTANA
33 | >C.1.9.2
34 | MKAILVVMLYTFTTANA
35 | >C.1.9.3
36 | MKAILVVMLYTFTTANA
37 | >C.1.9.4
38 | MKAILVVMLYTFTTANA
39 | >D
40 | MKAILVVMLYTFTTANA
41 | >D.1
42 | MKAILVVMLYTFTTANA
43 | >D.2
44 | MKAILVVMLYTFTTANA
45 | >D.3
46 | MKAILVVMLYTFTTANA
47 | >D.3.1
48 | MKAILVVMLYTFTTANA
49 | >D.4
50 | MKAILVVMLYTFTTANA
51 | >D.5
52 | MKAILVVMLYTFTTANA
53 | 


--------------------------------------------------------------------------------
/nextclade/config/h1n1pdm/ha/MW626062/founder_sequences_SigPep.fasta:
--------------------------------------------------------------------------------
 1 | >A
 2 | MKAILVVLLYTFTTANA
 3 | >B
 4 | MKAILVVLLYTFTTANA
 5 | >C
 6 | MKAILVVMLYTFTTANA
 7 | >C.1
 8 | MKAILVVMLYTFTTANA
 9 | >C.1.1
10 | MKAILVVMLYTFTTANA
11 | >C.1.2
12 | MKAILVVMLYTFTTANA
13 | >C.1.3
14 | MKAILVVMLYTFTTANA
15 | >C.1.4
16 | MKAILVVMLYTFTTANA
17 | >C.1.5
18 | MKAILVVMLYTFTTANA
19 | >C.1.6
20 | MKAILVVMLYTFTTANA
21 | >C.1.7
22 | MKAILVVMLYTFTTANA
23 | >C.1.7.1
24 | MKAILVVMLYTFTTANA
25 | >C.1.7.2
26 | MKAILVVILYTVTTANA
27 | >C.1.8
28 | MKAILVVMLYTLTTANA
29 | >C.1.9
30 | MKAILVVMLYTFTTANA
31 | >C.1.9.1
32 | MKAILVVMLYTFTTANA
33 | >C.1.9.2
34 | MKAILVVMLYTFTTANA
35 | >C.1.9.3
36 | MKAILVVMLYTFTTANA
37 | >C.1.9.4
38 | MKAILVVMLYTFTTANA
39 | >D
40 | MKAILVVMLYTFTTANA
41 | >D.1
42 | MKAILVVMLYTFTTANA
43 | >D.2
44 | MKAILVVMLYTFTTANA
45 | >D.3
46 | MKAILVVMLYTFTTANA
47 | >D.3.1
48 | MKAILVVMLYTFTTANA
49 | >D.4
50 | MKAILVVMLYTFTTANA
51 | >D.5
52 | MKAILVVMLYTFTTANA
53 | 


--------------------------------------------------------------------------------
/workflow/snakemake_rules/common.smk:
--------------------------------------------------------------------------------
 1 | from shlex import (
 2 |     quote as shquote,       # shquote() is used in this file and also other workflow files
 3 |     split as shsplitwords,
 4 | )
 5 | 
 6 | def shquotewords(s: str) -> str:
 7 |     """
 8 |     Split string *s* into (POSIX) shell words, quote each word, and join them
 9 |     back into a string.
10 | 
11 |     This is suitable for properly quoting multi-word, user-defined values which
12 |     should follow shell quoting and escaping semantics (e.g. to allow spaces in
13 |     single words) but not allow shell features like variable interpolation,
14 |     command substition, redirection, piping, etc.
15 | 
16 |     For example, quote a query string used as input to augur filter like this:
17 |     f"--query {shquote(query)}".
18 | 
19 |     See usage in https://github.com/nextstrain/ncov for more examples.
20 |     """
21 |     return " ".join(shquote(word) for word in shsplitwords(s))
22 | 


--------------------------------------------------------------------------------
/nextclade/dataset_config/h1n1pdm/pa/annotation.gff:
--------------------------------------------------------------------------------
 1 | ##gff-version 3
 2 | #!gff-spec-version 1.21
 3 | #!processor NCBI annotwriter
 4 | ##sequence-region NC_026437.1 1 2151
 5 | ##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=641809
 6 | NC_026437.1	RefSeq	region	1	2151	.	+	.	ID=NC_026437.1:1..2151;
 7 | NC_026437.1	RefSeq	CDS	1	2151	.	+	0	Name=PA;gene=PA;gbkey=CDS;locus_tag=UJ99_s3gp1;product=polymerase PA;protein_id=YP_009118630.1;ID=cds-YP_009118630.1;Dbxref=GenBank:YP_009118630.1,GeneID:23308128
 8 | NC_026437.1	RefSeq	CDS	1	570	.	+	0	Name=PA-X;gbkey=CDS;gene=PA-X;locus_tag=UJ99_s3gp2;product=PA-X protein;protein_id=YP_009121769.1;ID=cds-YP_009121769.1;exception=ribosomal slippage;Dbxref=GenBank:YP_009121769.1,GeneID:23561398
 9 | NC_026437.1	RefSeq	CDS	572	700	.	+	0	Name=PA-X;gbkey=CDS;gene=PA-X;locus_tag=UJ99_s3gp2;product=PA-X protein;protein_id=YP_009121769.1;ID=cds-YP_009121769.1;exception=ribosomal slippage;Dbxref=GenBank:YP_009121769.1,GeneID:23561398
10 | 


--------------------------------------------------------------------------------
/nextclade/dataset_config/h3n2/pa/annotation.gff:
--------------------------------------------------------------------------------
 1 | ##gff-version 3
 2 | #!gff-spec-version 1.21
 3 | #!processor NCBI annotwriter
 4 | ##sequence-region NC_007371.1 1 2233
 5 | ##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=335341
 6 | NC_007371.1	RefSeq	region	1	2233	.	+	.	ID=NC_007371.1:1..2233
 7 | NC_007371.1	RefSeq	CDS	25	2175	.	+	0	Name=PA;gene=PA;gbkey=CDS;protein_id=YP_308846.1;product=polymerase PA;locus_tag=FLUAVH3N2_s3p1;ID=cds-YP_308846.1;Dbxref=GenBank:YP_308846.1,GeneID:3655158
 8 | NC_007371.1	RefSeq	CDS	25	597	.	+	0	Name=PA-X;gbkey=CDS;gene=PA-X;product=PA-X protein;locus_tag=FLUAVH3N2_s3p2;protein_id=YP_006575868.1;ID=cds-YP_006575868.1;exception=ribosomal slippage;Dbxref=GenBank:YP_006575868.1,GeneID:13229459
 9 | NC_007371.1	RefSeq	CDS	599	784	.	+	0	Name=PA-X;gbkey=CDS;gene=PA-X;product=PA-X protein;locus_tag=FLUAVH3N2_s3p2;protein_id=YP_006575868.1;ID=cds-YP_006575868.1;exception=ribosomal slippage;Dbxref=GenBank:YP_006575868.1,GeneID:13229459
10 | 


--------------------------------------------------------------------------------
/profiles/nextflu-private/deploy.smk:
--------------------------------------------------------------------------------
 1 | """
 2 | This part of the workflow handles automatic deployments of nextflu-private builds.
 3 | """
 4 | 
 5 | rule all_private:
 6 |     input:
 7 |         jsons=_get_build_outputs(),
 8 |     output:
 9 |         json_dir=directory("auspice_renamed"),
10 |     params:
11 |         build_date=config.get("build_date", datetime.date.today().strftime("%Y-%m-%d")),
12 |     shell:
13 |         """
14 |         mkdir -p {output.json_dir};
15 |         for file in {input.jsons}
16 |         do
17 |             ln ${{file}} {output.json_dir}/"flu_seasonal_{params.build_date}_`basename ${{file}}`"
18 |         done
19 |         """
20 | 
21 | rule deploy_all:
22 |     input:
23 |         json_dir="auspice_renamed",
24 |     params:
25 |         deploy_url = config["deploy_url"]
26 |     shell:
27 |         """
28 |         nextstrain login --no-prompt;
29 |         nextstrain remote upload {params.deploy_url} {input.json_dir}/*.json
30 |         """
31 | 


--------------------------------------------------------------------------------
/config/vic/vaccine.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "nodes": {
 3 |         "B/Malaysia/2506/2004": {
 4 |             "vaccine": {
 5 |                 "selection_date": "2006-09-25"
 6 |             }
 7 |         },
 8 |         "B/Brisbane/60/2008": {
 9 |             "vaccine": {
10 |                 "selection_date": "2009-09-25"
11 |             }
12 |         },
13 |         "B/Colorado/6/2017": {
14 |             "vaccine": {
15 |                 "selection_date": "2018-02-22"
16 |             }
17 |         },
18 |         "B/Washington/2/2019": {
19 |             "vaccine": {
20 |                 "selection_date": "2019-09-27"
21 |             }
22 |         },
23 |         "B/Austria/1359417/2021": {
24 |             "vaccine": {
25 |                 "selection_date": "2021-09-24"
26 |             }
27 |         },
28 |         "B/Austria/1359417/2021-egg": {
29 |             "vaccine": {
30 |                 "selection_date": "2021-09-24"
31 |             }
32 |         }
33 |     }
34 | }
35 | 


--------------------------------------------------------------------------------
/ingest/scripts/lowercase-fields:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | """
 3 | Lowercases values in an NDJSON
 4 | """
 5 | import argparse
 6 | from sys import stdin, exit
 7 | from typing import Iterable
 8 | from augur.io.json import dump_ndjson, load_ndjson
 9 | from augur.io.print import print_err
10 | 
11 | def lowercase_fields(records: Iterable, fields=list[str]) -> Iterable:
12 |     for record in records:
13 |         for key in fields:
14 |             if key not in record:
15 |                 print_err(f"[FATAL] {key} not in record. EPI ISL: {record.get('gisaid_epi_isl', '[unknown]')}")
16 |                 exit(2)
17 |             record[key] = record[key].lower()
18 |         yield record
19 | 
20 | if __name__ == '__main__':
21 |     parser = argparse.ArgumentParser(description=__doc__)
22 |     parser.add_argument("--fields", nargs="+", help="record fields to turn into lowercase")
23 |     args = parser.parse_args()
24 |     dump_ndjson(lowercase_fields(load_ndjson(stdin), args.fields))


--------------------------------------------------------------------------------
/scripts/intersect_items.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import argparse
 3 | 
 4 | 
 5 | if __name__ == '__main__':
 6 |     parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 7 |     parser.add_argument("--items", nargs="+", required=True, help="one or more files containing a list of items")
 8 |     parser.add_argument("--output", required=True, help="list of items shared by all input files (the intersection)")
 9 | 
10 |     args = parser.parse_args()
11 | 
12 |     with open(args.items[0], "r", encoding="utf-8") as fh:
13 |         shared_items = {line.strip() for line in fh}
14 | 
15 |     for item_file in args.items[1:]:
16 |         with open(item_file, "r", encoding="utf-8") as fh:
17 |             items = {line.strip() for line in fh}
18 | 
19 |         shared_items = shared_items & items
20 | 
21 |     with open(args.output, "w", encoding="utf-8") as oh:
22 |         for item in sorted(shared_items):
23 |             print(item, file=oh)
24 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Files created by the pipeline, which we want to keep out of git
 2 | # (or at least out of _this_ git repo).
 3 | data/
 4 | builds/
 5 | results/
 6 | tables/
 7 | auspice/
 8 | auspice-who/
 9 | auspice_renamed/
10 | build/
11 | logs/
12 | figures/
13 | targets/
14 | benchmarks/
15 | 
16 | # Sensitive environment variables
17 | environment*
18 | 
19 | # Snakemake state dir
20 | .snakemake
21 | 
22 | # Local config overrides
23 | /config_local.yaml
24 | 
25 | # For Python #
26 | ##############
27 | *.pyc
28 | .tox/
29 | .cache/
30 | 
31 | # Compiled source #
32 | ###################
33 | *.com
34 | *.class
35 | *.dll
36 | *.exe
37 | *.o
38 | *.so
39 | 
40 | # OS generated files #
41 | ######################
42 | .DS_Store
43 | .DS_Store?
44 | ._*
45 | .Spotlight-V100
46 | .Trashes
47 | Icon?
48 | ehthumbs.db
49 | Thumbs.db
50 | *~
51 | 
52 | # nohup output
53 | nohup.out
54 | 
55 | # cluster logs
56 | slurm-*
57 | 
58 | # Jupyter/Altair droppings
59 | .ipynb_checkpoints
60 | geckodriver.log
61 | 


--------------------------------------------------------------------------------
/nextclade/dataset_config/h1n1pdm/ns/annotation.gff:
--------------------------------------------------------------------------------
 1 | ##gff-version 3
 2 | #!gff-spec-version 1.21
 3 | #!processor NCBI annotwriter
 4 | ##sequence-region NC_026432.1 1 863
 5 | ##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=641809
 6 | NC_026432.1	RefSeq	region	1	863	.	+	.	ID=NC_026432.1:1..863;
 7 | NC_026432.1	RefSeq	CDS	1	30	.	+	0	Name=NEP;gbkey=CDS;gene=NEP;locus_tag=UJ99_s8gp1;protein_id=YP_009118624.1;ID=cds-YP_009118624.1;product=nuclear export protein;Note=nonstructural protein 2;Dbxref=GenBank:YP_009118624.1,GeneID:23308112
 8 | NC_026432.1	RefSeq	CDS	503	838	.	+	0	Name=NEP;gbkey=CDS;gene=NEP;locus_tag=UJ99_s8gp1;protein_id=YP_009118624.1;ID=cds-YP_009118624.1;product=nuclear export protein;Note=nonstructural protein 2;Dbxref=GenBank:YP_009118624.1,GeneID:23308112
 9 | NC_026432.1	RefSeq	CDS	1	660	.	+	0	Name=NS1;gbkey=CDS;gene=NS1;locus_tag=UJ99_s8gp2;protein_id=YP_009118625.1;ID=cds-YP_009118625.1;product=nonstructural protein 1;Dbxref=GenBank:YP_009118625.1,GeneID:23308111
10 | 


--------------------------------------------------------------------------------
/.github/workflows/run-nextflu-private-builds.yaml:
--------------------------------------------------------------------------------
 1 | name: Run Nextstrain builds for the nextflu-private group
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |     inputs:
 6 |       dockerImage:
 7 |         description: "Specific container image to use for build (will override the default of `nextstrain build`)"
 8 |         required: false
 9 |         type: string
10 | 
11 | jobs:
12 |   run-build:
13 |     permissions:
14 |       id-token: write
15 |     uses: nextstrain/.github/.github/workflows/pathogen-repo-build.yaml@master
16 |     secrets: inherit
17 |     with:
18 |       runtime: aws-batch
19 |       env: |
20 |         NEXTSTRAIN_DOCKER_IMAGE: ${{ inputs.dockerImage }}
21 |       run: |
22 |         nextstrain build \
23 |           --detach \
24 |           --cpus 36 \
25 |           --memory 72gib \
26 |           --env NEXTSTRAIN_USERNAME \
27 |           --env NEXTSTRAIN_PASSWORD \
28 |           . \
29 |           deploy_all \
30 |           all_report_outputs \
31 |           -p \
32 |           --configfile profiles/nextflu-private.yaml
33 | 


--------------------------------------------------------------------------------
/profiles/full-trees/h1n1pdm_titer_strains.txt:
--------------------------------------------------------------------------------
 1 | A/Bangkok/P176/2025
 2 | A/Busan/277/2025
 3 | A/Colorado/218/2024
 4 | A/Hawaii/ISC-1140/2025
 5 | A/Illinois/65/2024
 6 | A/Iowa/123/2024
 7 | A/Kanagawa/AC2408/2025
 8 | A/Lisbon/188/2023
 9 | A/Madagascar/3/2025
10 | A/Maldives/2132/2024
11 | A/Maryland/64/2024
12 | A/Massachusetts/ISC-1679/2025
13 | A/Minnesota/131/2024
14 | A/NovaScotia/ET1801CP00018S/2025
15 | A/Ohio/259/2024
16 | A/Oregon/11/2025
17 | A/Oregon/261/2024
18 | A/Pakistan/306/2024
19 | A/Qatar/83328/2024
20 | A/Qinghai-Chengzhong/SWL1410/2024
21 | A/Santiago/101713/2024
22 | A/Shanghai-Huangpu/SWL12109/2024
23 | A/Singapore/MOH0547/2024
24 | A/StPetersburg/RII-04/2025
25 | A/Tambov/160-1V/2024
26 | A/Tennessee/4/2025
27 | A/Tokyo/EIS11-277/2024
28 | A/Ufa/CRIE/47/2024
29 | A/Uganda/UVRI_KIS6850/2024
30 | A/Ulsan/492/2025
31 | A/Utah/39/2025
32 | A/Vermont/10/2025
33 | A/Vermont/5/2025
34 | A/Victoria/3599/2024
35 | A/Vladimir/RII-MH223382S/2024
36 | A/Wisconsin/30/2025
37 | A/Wisconsin/67/2022
38 | A/Wisconsin/NIRC-IS-1111/2025
39 | A/Zacapa/FLU-012/2025
40 | 


--------------------------------------------------------------------------------
/nextclade/dataset_config/h3n2/ns/reference.fasta:
--------------------------------------------------------------------------------
 1 | >NC_007370.1 Influenza A virus (A/New York/392/2004(H3N2)) segment 8, complete sequence
 2 | AGCAAAAGCAGGGTGACAAAGACATAATGGATTCCAACACTGTGTCAAGTTTCCAGGTAG
 3 | ATTGCTTTCTTTGGCATATCCGGAAACAAGTTGTAGACCAAGAACTGAGTGATGCCCCAT
 4 | TCCTTGATCGGCTTCGCCGAGATCAGAGGTCCCTAAGGGGAAGAGGCAATACTCTCGGTC
 5 | TAGACATCAAAGCAGCCACCCATGTTGGAAAGCAAATTGTAGAAAAGATTCTGAAAGAAG
 6 | AATCTGATGAGGCACTTAAAATGACCATGGTCTCCACACCTGCTTCGCGATACATAACTG
 7 | ACATGACTATTGAGGAATTGTCAAGAAACTGGTTCATGCTAATGCCCAAGCAGAAAGTGG
 8 | AAGGACCTCTTTGCATCAGAATGGACCAGGCAATCATGGAGAAAAACATCATGTTGAAAG
 9 | CGAATTTCAGTGTGATTTTTGACCGACTAGAGACCATAGTATTACTAAGGGCTTTCACCG
10 | AAGAGGGAGCAATTGTTGGCGAAATCTCACCATTGCCTTCTTTTCCAGGACATACTATTG
11 | AGGATGTCAAAAATGCAATTGGGGTCCTCATCGGAGGACTTGAATGGAATGATAACACAG
12 | TTCGAGTCTCTAAAAATCTACAGAGATTCGCTTGGAGAAGCAGTAATGAGAATGGGGGAC
13 | CTCCACTTACTCCAAAACAGAAACGGAAAATGGCGAGAACAGCTAGGTCAAAAGTTTGAA
14 | GAGATAAGATGGCTGATTGAAGAAGTGAGACACAGACTAAAAACAACTGAAAATAGCTTT
15 | GAACAAATAACATTCATGCAAGCATTACAACTGCTGTTTGAAGTGGAACAGGAGATAAGA
16 | ACTTTCTCATTTCAGCTTATTTAATGATAAAAAACACCCTTGTTTCTACT
17 | 


--------------------------------------------------------------------------------
/ingest/vendored/notify-on-job-fail:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -euo pipefail
 3 | 
 4 | : "${SLACK_TOKEN:?The SLACK_TOKEN environment variable is required.}"
 5 | : "${SLACK_CHANNELS:?The SLACK_CHANNELS environment variable is required.}"
 6 | 
 7 | : "${AWS_BATCH_JOB_ID:=}"
 8 | : "${GITHUB_RUN_ID:=}"
 9 | 
10 | bin="$(dirname "$0")"
11 | job_name="${1:?A job name is required as the first argument}"
12 | github_repo="${2:?A GitHub repository with owner and repository name is required as the second argument}"
13 | 
14 | echo "Notifying Slack about failed ${job_name} job."
15 | message="❌ ${job_name} job has FAILED 😞 "
16 | 
17 | if [[ -n "${AWS_BATCH_JOB_ID}" ]]; then
18 |     message+="See AWS Batch job \`${AWS_BATCH_JOB_ID}\` (<https://console.aws.amazon.com/batch/v2/home?region=us-east-1#jobs/detail/${AWS_BATCH_JOB_ID}|link>) for error details. "
19 | elif [[ -n "${GITHUB_RUN_ID}" ]]; then
20 |     message+="See GitHub Action <https://github.com/${github_repo}/actions/runs/${GITHUB_RUN_ID}?check_suite_focus=true|${GITHUB_RUN_ID}> for error details. "
21 | fi
22 | 
23 | "$bin"/notify-slack "$message"
24 | 


--------------------------------------------------------------------------------
/config/references_for_titer_plots/h3n2/cell_hi.txt:
--------------------------------------------------------------------------------
 1 | A/DistrictOfColumbia/27/2023 # J.2:145N, vaccine strain
 2 | A/Croatia/10136RV/2023 # J.2:145N, vaccine strain
 3 | A/Idaho/69/2023 # J.2:145N,189R
 4 | #A/Wisconsin/154/2024 # J.2:135K
 5 | A/Tokyo/EIS11-171/2024 # J.2:135K
 6 | A/Catalonia/NSVH102423723/2024 # J.2:135K
 7 | #A/Slovenia/49/2024 # J.2:158K
 8 | #A/Oklahoma/5/2024 # J.2.:158K
 9 | A/Michigan/32/2024 # J.2:104N,145N
10 | A/California/166/2024 # J.2:124R,145N
11 | A/Perth/815/2024 # J.2:63D,189R,309I
12 | A/Colorado/209/2024 # J.2:160M,204I,223I
13 | A/Switzerland/47775/2024 # J.2.1
14 | #A/Pennsylvania/234/2024 # J.2.2:65K
15 | #A/Minnesota/97/2024 # J.2.2:65K,145N
16 | A/Perth/836/2024 # J.2.2:65K,145N
17 | A/Victoria/979/2024 # J.2.2:5E,65K,145N,262N
18 | A/Kanagawa/IC2405/2024 # J.2.2:65K,160K
19 | A/Alaska/8/2025 # J.2.3
20 | A/Netherlands/10685/2024 # J.2.3
21 | A/Tokyo/EIS13-029/2025 # J.2.3
22 | #A/Mississippi/37/2025 # J.2.4
23 | A/Sydney/1359/2024 # J.2.4
24 | A/Valladolid/1187/2025 # J.2.4, Crick
25 | A/Kanagawa/AC2413/2025 # J.2.4, NIID
26 | #A/Kentucky/29/2024 # J.2.5
27 | #A/NewJersey/16/2025 # J.2.5
28 | 


--------------------------------------------------------------------------------
/nextclade/dataset_config/h1n1pdm/ns/reference.fasta:
--------------------------------------------------------------------------------
 1 | >NC_026432.1 Influenza A virus (A/California/07/2009(H1N1)) segment 8 nuclear export protein (NEP) and nonstructural protein 1 (NS1) genes, complete cds
 2 | ATGGACTCCAACACCATGTCAAGCTTTCAGGTAGACTGTTTCCTTTGGCATATCCGCAAG
 3 | CGATTTGCAGACAATGGATTGGGTGATGCCCCATTCCTTGATCGGCTCCGCCGAGATCAA
 4 | AAGTCCTTAAAAGGAAGAGGCAACACCCTTGGCCTCGATATCGAAACAGCCACTCTTGTT
 5 | GGGAAACAAATCGTGGAATGGATCTTGAAAGAGGAATCCAGCGAGACACTTAGAATGACA
 6 | ATTGCATCTGTACCTACTTCGCGCTACCTTTCTGACATGACCCTCGAGGAAATGTCACGA
 7 | GACTGGTTCATGCTCATGCCTAGGCAAAAGATAATAGGCCCTCTTTGCGTGCGATTGGAC
 8 | CAGGCGATCATGGAAAAGAACATAGTACTGAAAGCGAACTTCAGTGTAATCTTTAACCGA
 9 | TTAGAGACCTTGATACTACTAAGGGCTTTCACTGAGGAGGGAGCAATAGTTGGAGAAATT
10 | TCACCATTACCTTCTCTTCCAGGACATACTTATGAGGATGTCAAAAATGCAGTTGGGGTC
11 | CTCATCGGAGGACTTGAATGGAATGGTAACACGGTTCGAGTCTCTGAAAATATACAGAGA
12 | TTCGCTTGGAGAAACTGTGATGAGAATGGGAGACCTTCACTACCTCCAGAGCAGAAATGA
13 | AAAGTGGCGAGAGCAATTGGGACAGAAATTTGAGGAAATAAGGTGGTTAATTGAAGAAAT
14 | GCGGCACAGATTGAAAGCGACAGAGAATAGTTTCGAACAAATAACATTTATGCAAGCCTT
15 | ACAACTACTGCTTGAAGTAGAACAAGAGATAAGAGCTTTCTCGTTTCAGCTTATTTAATG
16 | ATAAAAAACACCCTTGTTTCTAC
17 | 


--------------------------------------------------------------------------------
/nextclade/dataset_config/vic/ha/EPI1926632/README.md:
--------------------------------------------------------------------------------
 1 | # Nextclade dataset for "Influenza B Vic HA" based on reference "B/Austria/1359417/2021" (flu/vic/ha/EPI1926632)
 2 | 
 3 | 
 4 | ## Dataset attributes
 5 | 
 6 | | attribute            | value                | value friendly                           |
 7 | | -------------------- | -------------------- | ---------------------------------------- |
 8 | | name                 | flu/vic/ha           | Influenza B Vic HA                       |
 9 | | reference            | EPI1926632           | B/Austria/1359417/2021                   |
10 | 
11 | 
12 | ## Features
13 | This dataset supports
14 | 
15 |  * Assignment to clades and subclades based on the nomenclature defined in [github.com/influenza-clade-nomenclature/seasonal_B-Vic_HA/](https://github.com/influenza-clade-nomenclature/seasonal_B-Vic_HA/)
16 |  * Identification of glycosilation motifs
17 |  * Sequence QC
18 |  * Phylogenetic placement
19 | 
20 | ## What is Nextclade dataset
21 | 
22 | Read more about Nextclade datasets in Nextclade documentation: https://docs.nextstrain.org/projects/nextclade/en/stable/user/datasets.html
23 | 


--------------------------------------------------------------------------------
/scripts/join_tables.py:
--------------------------------------------------------------------------------
 1 | """Join two tables.
 2 | """
 3 | import argparse
 4 | 
 5 | import pandas as pd
 6 | 
 7 | 
 8 | if __name__ == "__main__":
 9 |     parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
10 |     parser.add_argument("--left", required=True, help="left table to join")
11 |     parser.add_argument("--right", required=True, help="right table to join")
12 |     parser.add_argument("--how", default="left", choices=["left", "inner", "outer"], help="how to join tables")
13 |     parser.add_argument("--on", default=["strain"], nargs="+", help="columns to join tables on")
14 |     parser.add_argument("--output", required=True, help="joined tables")
15 | 
16 |     args = parser.parse_args()
17 | 
18 |     left = pd.read_csv(args.left, sep="\t")
19 |     right = pd.read_csv(args.right, sep="\t")
20 | 
21 |     joined_table = left.merge(
22 |         right,
23 |         how=args.how,
24 |         on=args.on,
25 |     )
26 | 
27 |     joined_table.to_csv(
28 |         args.output,
29 |         sep="\t",
30 |         index=False,
31 |         header=True,
32 |         na_rep="N/A",
33 |     )
34 | 


--------------------------------------------------------------------------------
/config/h2n2/reference_strains.txt:
--------------------------------------------------------------------------------
 1 | A/Tashkent/1046/1967
 2 | A/Montevideo/2208/1967
 3 | A/Cornell/1001/1967
 4 | A/Ann Arbor/7/1967
 5 | A/Georgia/1/1967
 6 | A/Albany/9/1967
 7 | A/Albany/8/1967
 8 | A/Albany/7/1967
 9 | A/Albany/6/1967
10 | A/Albany/4/1967
11 | A/Albany/3/1967
12 | A/Johannesburg/617/1967
13 | A/Czech Republic/1/1966
14 | A/Moscow/1019/1965
15 | A/Potsdam/2/1965
16 | A/Cottbus/1/1964
17 | A/Berlin/3/1964
18 | A/Beijing/12/1964
19 | A/Georgia/1/1963
20 | A/Leningrad/29/1963
21 | A/Netherlands/56/1963
22 | A/Netherlands/001K1/1963
23 | A/Netherlands/65/1963
24 | A/England/12/1962
25 | A/England/1/1961
26 | A/Ann Arbor/6/1960
27 | A/Albany/1/1960
28 | A/Netherlands/056H1/1960
29 | A/Albany/1/1959
30 | A/Krasnodar/101/1959
31 | A/Albany/5/1958
32 | A/Albany/4/1958
33 | A/Albany/3/1958
34 | A/Albany/24/1958
35 | A/Albany/2/1958
36 | A/Albany/1/1958
37 | A/Ann Arbor/23/1957
38 | A/Rockville Illinois/5/1957
39 | A/Rockville Illinois/5-CA/1957
40 | A/Ann Arbor/23/1957
41 | A/Albany/22/1957
42 | A/Albany/20/1957
43 | A/Rotterdam/1957
44 | A/ITS/1/1957
45 | A/Okuda/1957
46 | A/Shanghai/202/1957
47 | A/Guiyang/1/1957
48 | A/Zhang/4/1957
49 | 


--------------------------------------------------------------------------------
/source-data/2018_Europe_flu_vaccination_coverage.tsv:
--------------------------------------------------------------------------------
 1 | # extracted from Fig 3 https://www.sciencedirect.com/science/article/pii/S0264410X17317620#b0020 using https://apps.automeris.io/wpd/
 2 | #Scotland	76.31027253668763
 3 | #Northern Ireland	72.95597484276729
 4 | #England	72.32704402515722
 5 | #Wales	68.55345911949685
 6 | United Kingdom	72
 7 | Belarus	73.79454926624737
 8 | Netherlands	66.66666666666666
 9 | Israel	63.94129979035639
10 | Ireland	59.74842767295597
11 | Spain	55.765199161425585
12 | Portugal	54.29769392033542
13 | Sweden	49.895178197064986
14 | Italy	48.00838574423481
15 | France	48.00838574423481
16 | Malta	42.76729559748427
17 | Iceland	40.88050314465409
18 | Finland	39.41299790356395
19 | Germany	35.84905660377358
20 | Russia	33.9622641509434
21 | Switzerland	29.35010482180293
22 | Norway	27.044025157232703
23 | Hungary	23.689727463312366
24 | Lithuania	21.59329140461216
25 | Croatia	19.077568134171905
26 | Montenegro	14.67505241090146
27 | Poland	13.626834381551356
28 | Slovakia	13.207547169811324
29 | Kazakhstan	10.69182389937106
30 | Romania	7.756813417190765
31 | Serbia	7.547169811320748
32 | Latvia	4.821802935010488
33 | Estonia	2.725366876310279
34 | 


--------------------------------------------------------------------------------
/nextclade/dataset_config/vic/pa/README.md:
--------------------------------------------------------------------------------
 1 | # Influenza A(H1N1pdm) PA based on reference "A/California/07/2009"
 2 | 
 3 | | Key                  | Value                |
 4 | | -------------------- | -------------------- |
 5 | | authors                | [Richard Neher](https://neherlab.org), [Nextstrain](https://nextstrain.org)                         |
 6 | | name                 | Influenza A(H1N1pdm) PA                      |
 7 | | reference            | A/California/07/2009                    |
 8 | | dataset path         | flu/h1n1pdm/pa                     |
 9 | | reference accession  | NC_026437   |
10 | 
11 | ## Features
12 | This dataset only provides a reference for alignment and an annotation for translation.
13 | 
14 | ### Resistance mutations
15 | Resistance markers are taken from the [summary of by the WHO/GISRS](https://www.who.int/teams/global-influenza-programme/laboratory-network/quality-assurance/antiviral-susceptibility-influenza/polymerase-acidic-protein-inhibitor).
16 | 
17 | 
18 | ## What is Nextclade dataset
19 | 
20 | Read more about Nextclade datasets in Nextclade documentation: https://docs.nextstrain.org/projects/nextclade/en/stable/user/datasets.html
21 | 


--------------------------------------------------------------------------------
/nextclade/dataset_config/h1n1pdm/pa/README.md:
--------------------------------------------------------------------------------
 1 | # Influenza A(H1N1pdm) PA based on reference "A/California/07/2009"
 2 | 
 3 | | Key                  | Value                |
 4 | | -------------------- | -------------------- |
 5 | | authors                | [Richard Neher](https://neherlab.org), [Nextstrain](https://nextstrain.org)                         |
 6 | | name                 | Influenza A(H1N1pdm) PA                      |
 7 | | reference            | A/California/07/2009                    |
 8 | | dataset path         | flu/h1n1pdm/pa                     |
 9 | | reference accession  | NC_026437   |
10 | 
11 | ## Features
12 | This dataset only provides a reference for alignment and an annotation for translation.
13 | 
14 | ### Resistance mutations
15 | Resistance markers are taken from the [summary of by the WHO/GISRS](https://www.who.int/teams/global-influenza-programme/laboratory-network/quality-assurance/antiviral-susceptibility-influenza/polymerase-acidic-protein-inhibitor).
16 | 
17 | 
18 | ## What is Nextclade dataset
19 | 
20 | Read more about Nextclade datasets in Nextclade documentation: https://docs.nextstrain.org/projects/nextclade/en/stable/user/datasets.html
21 | 


--------------------------------------------------------------------------------
/nextclade/dataset_config/h3n2/pa/README.md:
--------------------------------------------------------------------------------
 1 | # Influenza A(H3N2) PA (segment 3) based on reference "A/New York/392/2004"
 2 | 
 3 | | Key                  | Value                |
 4 | | -------------------- | -------------------- |
 5 | | authors                | [Richard Neher](https://neherlab.org), [Nextstrain](https://nextstrain.org)                         |
 6 | | name                 | Influenza A(H1N1pdm) PA (segment 3)                    |
 7 | | reference            | A/New York/392/2004                    |
 8 | | dataset path         | flu/h3n2/pa                     |
 9 | | reference accession  | NC_007371   |
10 | 
11 | ## Features
12 | This dataset only provides a reference for alignment and an annotation for translation.
13 | 
14 | ### Resistance mutations
15 | Resistance markers are taken from the [summary of by the WHO/GISRS](https://www.who.int/teams/global-influenza-programme/laboratory-network/quality-assurance/antiviral-susceptibility-influenza/polymerase-acidic-protein-inhibitor).
16 | 
17 | 
18 | 
19 | ## What is Nextclade dataset
20 | 
21 | Read more about Nextclade datasets in Nextclade documentation: https://docs.nextstrain.org/projects/nextclade/en/stable/user/datasets.html
22 | 


--------------------------------------------------------------------------------
/flu-forecasting/scripts/merge_weighted_distances_to_future.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import argparse
 4 | import json
 5 | 
 6 | 
 7 | if __name__ == '__main__':
 8 |     parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 9 |     parser.add_argument("--distances", nargs="+", required=True, help="node data JSON files of distances to be merged into a single output node data JSON")
10 |     parser.add_argument("--output", required=True, help="merged node data JSON file")
11 | 
12 |     args = parser.parse_args()
13 | 
14 |     # Start with a single base JSON in node data format.
15 |     with open(args.distances[0], "r", encoding="utf-8") as fh:
16 |         base_json = json.load(fh)
17 | 
18 |     # Update the base JSON with each subsequent model's distances to the future.
19 |     for json_file in args.distances[1:]:
20 |         with open(json_file, "r", encoding="utf-8") as fh:
21 |             model_json = json.load(fh)
22 |             for strain, distances in model_json["nodes"].items():
23 |                 base_json["nodes"][strain].update(distances)
24 | 
25 |     # Save merged data.
26 |     with open(args.output, "w", encoding="utf-8") as oh:
27 |         json.dump(base_json, oh)
28 | 


--------------------------------------------------------------------------------
/ingest/vendored/notify-on-diff:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -euo pipefail
 4 | 
 5 | : "${SLACK_TOKEN:?The SLACK_TOKEN environment variable is required.}"
 6 | : "${SLACK_CHANNELS:?The SLACK_CHANNELS environment variable is required.}"
 7 | 
 8 | bin="$(dirname "$0")"
 9 | 
10 | src="${1:?A source file is required as the first argument.}"
11 | dst="${2:?A destination s3:// URL is required as the second argument.}"
12 | 
13 | dst_local="$(mktemp -t s3-file-XXXXXX)"
14 | diff="$(mktemp -t diff-XXXXXX)"
15 | 
16 | trap "rm -f '$dst_local' '$diff'" EXIT
17 | 
18 | # if the file is not already present, just exit
19 | "$bin"/s3-object-exists "$dst" || exit 0
20 | 
21 | "$bin"/download-from-s3 "$dst" "$dst_local"
22 | 
23 | # diff's exit code is 0 for no differences, 1 for differences found, and >1 for errors
24 | diff_exit_code=0
25 | diff "$dst_local" "$src" > "$diff" || diff_exit_code=$?
26 | 
27 | if [[ "$diff_exit_code" -eq 1 ]]; then
28 |     echo "Notifying Slack about diff."
29 |     "$bin"/notify-slack --upload "$src.diff" < "$diff"
30 | elif [[ "$diff_exit_code" -gt 1 ]]; then
31 |     echo "Notifying Slack about diff failure"
32 |     "$bin"/notify-slack "Diff failed for $src"
33 | else
34 |     echo "No change in $src."
35 | fi
36 | 


--------------------------------------------------------------------------------
/nextclade/dataset_config/h3n2/mp/reference.fasta:
--------------------------------------------------------------------------------
 1 | >NC_007367.1 Influenza A virus (A/New York/392/2004(H3N2)) segment 7, complete sequence
 2 | AGCAAAAGCAGGTAGATATTGAAAGATGAGCCTTCTAACCGAGGTCGAAACGTATGTTCT
 3 | CTCTATCGTTCCATCAGGCCCCCTCAAAGCCGAGATCGCGCAGAGACTTGAAGATGTCTT
 4 | TGCTGGGAAAAACACAGATCTTGAGGCTCTCATGGAATGGCTAAAGACAAGACCAATTCT
 5 | GTCACCTCTGACTAAGGGGATTTTGGGGTTTGTGTTCACGCTCACCGTGCCCAGTGAGCG
 6 | AGGACTGCAGCGTAGACGCTTTGTCCAAAATGCCCTCAATGGGAATGGAGATCCAAATAA
 7 | CATGGACAAAGCAGTTAAACTGTATAGGAAACTTAAGAGGGAGATAACGTTCCATGGGGC
 8 | CAAAGAAATAGCTCTCAGTTATTCTGCTGGTGCACTTGCCAGTTGCATGGGCCTCATATA
 9 | CAATAGGATGGGGGCTGTAACCACTGAAGTGGCATTTGGCCTGGTATGTGCAACATGTGA
10 | ACAGATTGCTGACTCCCAGCACAGGTCTCATAGGCAAATGGTGGCAACAACCAATCCATT
11 | AATAAAACATGAGAACAGAATGGTTTTGGCCAGCACTACAGCTAAGGCTATGGAGCAAAT
12 | GGCTGGATCAAGTGAGCAGGCAGCGGAGGCCATGGAAATTGCTAGTCAGGCCAGGCAAAT
13 | GGTGCAGGCAATGAGAGCCGTTGGGACTCATCCTAGCTCCAGTACTGGTCTAAGAGATGA
14 | TCTTCTTGAAAATTTGCAGACCTATCAGAAACGAATGGGGGTGCAGATGCAACGATTCAA
15 | GTGACCCGCTTGTTGTTGCCGCGAGTATCATTGGGATCTTGCACTTGATATTGTGGATTC
16 | TTGATCGTCTTTTTTTCAAATGCGTCTATCGACTCTTCAAACACGGCCTTAAAAGAGGCC
17 | CTTCTACGGAAGGAGTACCTGAGTCTATGAGGGAAGAATATCGAAAGGAACAGCAGAATG
18 | CTGTGGATGCTGACGACAGTCATTTTGTCAGCATAGAGTTGGAGTAAAAAACTACCTTGT
19 | TTCTACT
20 | 


--------------------------------------------------------------------------------
/nextclade/dataset_config/h1n1pdm/mp/reference.fasta:
--------------------------------------------------------------------------------
 1 | >NC_026431.1 Influenza A virus (A/California/07/2009(H1N1)) segment 7 matrix protein 2 (M2) and matrix protein 1 (M1) genes, complete cds
 2 | ATGAGTCTTCTAACCGAGGTCGAAACGTACGTTCTTTCTATCATCCCGTCAGGCCCCCTC
 3 | AAAGCCGAGATCGCGCAGAGACTGGAAAGTGTCTTTGCAGGAAAGAACACAGATCTTGAG
 4 | GCTCTCATGGAATGGCTAAAGACAAGACCAATCTTGTCACCTCTGACTAAGGGAATTTTA
 5 | GGATTTGTGTTCACGCTCACCGTGCCCAGTGAGCGAGGACTGCAGCGTAGACGCTTTGTC
 6 | CAAAATGCCCTAAATGGGAATGGGGACCCGAACAACATGGATAGAGCAGTTAAACTATAC
 7 | AAGAAGCTCAAAAGAGAAATAACGTTCCATGGGGCCAAGGAGGTGTCACTAAGCTATTCA
 8 | ACTGGTGCACTTGCCAGTTGCATGGGCCTCATATACAACAGGATGGGAACAGTGACCACA
 9 | GAAGCTGCTTTTGGTCTAGTGTGTGCCACTTGTGAACAGATTGCTGATTCACAGCATCGG
10 | TCTCACAGACAGATGGCTACTACCACCAATCCACTAATCAGGCATGAAAACAGAATGGTG
11 | CTGGCTAGCACTACGGCAAAGGCTATGGAACAGATGGCTGGATCGAGTGAACAGGCAGCG
12 | GAGGCCATGGAGGTTGCTAATCAGACTAGGCAGATGGTACATGCAATGAGAACTATTGGG
13 | ACTCATCCTAGCTCCAGTGCTGGTCTGAAAGATGACCTTCTTGAAAATTTGCAGGCCTAC
14 | CAGAAGCGAATGGGAGTGCAGATGCAGCGATTCAAGTGATCCTCTCGTCATTGCAGCAAA
15 | TATCATTGGGATCTTGCACCTGATATTGTGGATTACTGATCGTCTTTTTTTCAAATGTAT
16 | TTATCGTCGCTTTAAATACGGTTTGAAAAGAGGGCCTTCTACGGAAGGAGTGCCTGAGTC
17 | CATGAGGGAAGAATATCAACAGGAACAGCAGAGTGCTGTGGATGTTGACGATGGTCATTT
18 | TGTCAACATAGAGCTAGAGTAA
19 | 


--------------------------------------------------------------------------------
/profiles/gisaid/builds.yaml:
--------------------------------------------------------------------------------
 1 | custom_rules:
 2 |   - profiles/gisaid/prepare_data.smk
 3 | 
 4 | metadata_fields:
 5 |   - Isolate_Name
 6 |   - Isolate_Id
 7 |   - Passage_History
 8 |   - Location
 9 |   - Collection_Date
10 |   - Submission_Date
11 | renamed_metadata_fields:
12 |   - strain
13 |   - accession
14 |   - passage
15 |   - full_location
16 |   - date
17 |   - date_submitted
18 | 
19 | lat-longs: "config/lat_longs.tsv"
20 | 
21 | segments:
22 |   - ha
23 | 
24 | submission_date_field: date_submitted
25 | recency:
26 |   date_bins: [7, 30, 90]
27 |   date_bin_labels: ["last week", "last month", "last quarter"]
28 |   upper_bin_label: older
29 | 
30 | builds:
31 |     "h3n2":
32 |       lineage: h3n2
33 |       reference: "config/h3n2/{segment}/reference.fasta"
34 |       annotation: "config/h3n2/{segment}/genemap.gff"
35 |       tree_exclude_sites: "config/h3n2/{segment}/exclude-sites.txt"
36 |       clades: "config/h3n2/ha/clades.tsv"
37 |       subclades: "config/h3n2/{segment}/subclades.tsv"
38 |       auspice_config: "config/h3n2/{segment}/auspice_config.json"
39 |       enable_lbi: true
40 |       enable_glycosylation: true
41 |       subsamples:
42 |         global:
43 |             filters: "--group-by region year month --subsample-max-sequences 100"
44 | 


--------------------------------------------------------------------------------
/config/distance_maps/h3n2/ha/bush_epitope_D.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "default": 0,
 3 |     "output_type": "integer",
 4 |     "map": {
 5 |         "HA1": {
 6 |             "96": 1,
 7 |             "102": 1,
 8 |             "103": 1,
 9 |             "117": 1,
10 |             "121": 1,
11 |             "167": 1,
12 |             "170": 1,
13 |             "171": 1,
14 |             "172": 1,
15 |             "173": 1,
16 |             "174": 1,
17 |             "175": 1,
18 |             "176": 1,
19 |             "177": 1,
20 |             "179": 1,
21 |             "182": 1,
22 |             "201": 1,
23 |             "203": 1,
24 |             "207": 1,
25 |             "208": 1,
26 |             "209": 1,
27 |             "212": 1,
28 |             "213": 1,
29 |             "214": 1,
30 |             "215": 1,
31 |             "216": 1,
32 |             "217": 1,
33 |             "218": 1,
34 |             "219": 1,
35 |             "226": 1,
36 |             "227": 1,
37 |             "228": 1,
38 |             "229": 1,
39 |             "230": 1,
40 |             "238": 1,
41 |             "240": 1,
42 |             "242": 1,
43 |             "244": 1,
44 |             "246": 1,
45 |             "247": 1,
46 |             "248": 1
47 |         }
48 |     },
49 |     "name": "bush_epitope_D"
50 | }
51 | 


--------------------------------------------------------------------------------
/ingest/vendored/notify-on-job-start:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -euo pipefail
 3 | 
 4 | : "${SLACK_TOKEN:?The SLACK_TOKEN environment variable is required.}"
 5 | : "${SLACK_CHANNELS:?The SLACK_CHANNELS environment variable is required.}"
 6 | 
 7 | : "${AWS_BATCH_JOB_ID:=}"
 8 | : "${GITHUB_RUN_ID:=}"
 9 | 
10 | bin="$(dirname "$0")"
11 | job_name="${1:?A job name is required as the first argument}"
12 | github_repo="${2:?A GitHub repository with owner and repository name is required as the second argument}"
13 | build_dir="${3:-ingest}"
14 | 
15 | echo "Notifying Slack about started ${job_name} job."
16 | message="${job_name} job has started."
17 | 
18 | if [[ -n "${GITHUB_RUN_ID}" ]]; then
19 |   message+=" The job was submitted by GitHub Action <https://github.com/${github_repo}/actions/runs/${GITHUB_RUN_ID}?check_suite_focus=true|${GITHUB_RUN_ID}>."
20 | fi
21 | 
22 | if [[ -n "${AWS_BATCH_JOB_ID}" ]]; then
23 |   message+=" The job was launched as AWS Batch job \`${AWS_BATCH_JOB_ID}\` (<https://console.aws.amazon.com/batch/v2/home?region=us-east-1#jobs/detail/${AWS_BATCH_JOB_ID}|link>)."
24 |   message+=" Follow along in your local clone of ${github_repo} with: "'```'"nextstrain build --aws-batch --no-download --attach ${AWS_BATCH_JOB_ID} ${build_dir}"'```'
25 | fi
26 | 
27 | "$bin"/notify-slack "$message"
28 | 


--------------------------------------------------------------------------------
/scripts/flu_regions.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | 
 4 | with open(os.path.join(os.path.dirname(os.path.realpath(__file__)),'../config/frequency_weights_by_region.json')) as fh:
 5 |     population_sizes = json.load(fh)
 6 | 
 7 | region_properties = {
 8 |     "global":       {'label':"Global", "color":"#111111"},
 9 |     'Africa':       {'abbr':'AF', 'label':'Africa', 'color':'#A0CCA5'},
10 |     'Europe':       {'abbr':'EU', 'label':'Europe', 'color':'#658447'},
11 |     'North America':{'abbr':'NA', 'label':'N America', 'color':'#D6C568'},
12 |     'China':        {'abbr':'CN', 'label':'China', 'color':'#A76BB1'},
13 |     'South Asia':   {'abbr':'SAS', 'label':'South Asia', 'color':'#5199B7'},
14 |     'Japan Korea':  {'abbr':'JK', 'label':'Japan/Korea', 'color':'#2A4786'},
15 |     'Oceania':      {'abbr':'OC', 'label':'Oceania', 'color':'#8E1616'},
16 |     'South America':{'abbr':'SA', 'label':'S America', 'color':'#EBA85F'},
17 |     'Southeast Asia':{'abbr':'SEA', 'label':'SE Asia', 'color':'#8FBDD0'},
18 |     'West Asia':    {'abbr':'WA', 'label':'W Asia', 'color':'#76104B'},
19 | }
20 | 
21 | for region in region_properties:
22 |     if region in population_sizes:
23 |         region_properties[region]['popsize'] = population_sizes[region]
24 | 
25 | region_names = [x for x in region_properties.keys() if x!='global']
26 | 


--------------------------------------------------------------------------------
/zoltar/project.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "name": "Nextstrain seasonal influenza forecasts",
 3 |     "is_public": false,
 4 |     "description": "Forecasts for seasonal influenza developed by the Nextstrain team.",
 5 |     "home_url": "https://nextstrain.org/flu/seasonal/",
 6 |     "logo_url": "https://nextstrain.org/static/nextstrain-logo-small-ea8c3e13e8c17436264760d638ab970e.png",
 7 |     "core_data": "",
 8 |     "time_interval_type": "Month",
 9 |     "visualization_y_label": "Clade frequency",
10 |     "units": [
11 |         {"name": "global:3C.2a1b.2a.2", "abbreviation": "global:3C.2a1b.2a.2"},
12 |         {"name": "global:3C.2a1b.2a.2/53G", "abbreviation": "global:3C.2a1b.2a.2/53G"},
13 |         {"name": "global:3C.2a1b.2a.2/53N", "abbreviation": "global:3C.2a1b.2a.2/53N"},
14 |         {"name": "global:3C.2a1b.2a.2/50K", "abbreviation": "global:3C.2a1b.2a.2/50K"},
15 |         {"name": "global:3C.2a1b.1a", "abbreviation": "global:3C.2a1b.1a"}
16 |     ],
17 |     "targets": [
18 |         {
19 |             "type": "continuous",
20 |             "name": "frequency in one year",
21 |             "description": "clade frequency in 1 year",
22 |             "outcome_variable": "frequency in one year",
23 |             "is_step_ahead": false,
24 |             "range": [0.0, 1.0]
25 |         }
26 |     ],
27 |     "timezeros": []
28 | }
29 | 


--------------------------------------------------------------------------------
/config/distance_maps/h3n2/na/munoz.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "default": 0,
 3 |     "map": {
 4 |         "NA": {
 5 |             "197": 1,
 6 |             "198": 1,
 7 |             "199": 1,
 8 |             "200": 1,
 9 |             "221": 1,
10 |             "222": 1,
11 |             "328": 1,
12 |             "329": 1,
13 |             "330": 1,
14 |             "331": 1,
15 |             "332": 1,
16 |             "334": 1,
17 |             "336": 1,
18 |             "338": 1,
19 |             "339": 1,
20 |             "341": 1,
21 |             "342": 1,
22 |             "343": 1,
23 |             "344": 1,
24 |             "346": 1,
25 |             "347": 1,
26 |             "357": 1,
27 |             "358": 1,
28 |             "359": 1,
29 |             "366": 1,
30 |             "367": 1,
31 |             "368": 1,
32 |             "369": 1,
33 |             "370": 1,
34 |             "383": 1,
35 |             "384": 1,
36 |             "385": 1,
37 |             "386": 1,
38 |             "387": 1,
39 |             "389": 1,
40 |             "390": 1,
41 |             "391": 1,
42 |             "392": 1,
43 |             "393": 1,
44 |             "394": 1,
45 |             "396": 1,
46 |             "399": 1,
47 |             "400": 1,
48 |             "401": 1,
49 |             "403": 1
50 |         }
51 |     },
52 |     "name": "munoz"
53 | }


--------------------------------------------------------------------------------
/ingest/vendored/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | default_language_version:
 2 |   python: python3
 3 | repos:
 4 |   - repo: https://github.com/pre-commit/sync-pre-commit-deps
 5 |     rev: v0.0.1
 6 |     hooks:
 7 |       - id: sync-pre-commit-deps
 8 |   - repo: https://github.com/shellcheck-py/shellcheck-py
 9 |     rev: v0.10.0.1
10 |     hooks:
11 |       - id: shellcheck
12 |   - repo: https://github.com/rhysd/actionlint
13 |     rev: v1.6.27
14 |     hooks:
15 |       - id: actionlint
16 |         entry: env SHELLCHECK_OPTS='--exclude=SC2027' actionlint
17 |   - repo: https://github.com/pre-commit/pre-commit-hooks
18 |     rev: v4.6.0
19 |     hooks:
20 |       - id: trailing-whitespace
21 |       - id: check-ast
22 |       - id: check-case-conflict
23 |       - id: check-docstring-first
24 |       - id: check-json
25 |       - id: check-executables-have-shebangs
26 |       - id: check-merge-conflict
27 |       - id: check-shebang-scripts-are-executable
28 |       - id: check-symlinks
29 |       - id: check-toml
30 |       - id: check-yaml
31 |       - id: destroyed-symlinks
32 |       - id: detect-private-key
33 |       - id: end-of-file-fixer
34 |       - id: fix-byte-order-marker
35 |   - repo: https://github.com/astral-sh/ruff-pre-commit
36 |     # Ruff version.
37 |     rev: v0.4.6
38 |     hooks:
39 |       # Run the linter.
40 |       - id: ruff
41 | 


--------------------------------------------------------------------------------
/profiles/nextstrain-public/rename.smk:
--------------------------------------------------------------------------------
 1 | 
 2 | rule all_public:
 3 |     input:
 4 |         [
 5 |             "auspice_renamed/" + build.get("auspice_name", f"{build_name}_{{segment}}").format(segment=segment) + suffix + ".json"
 6 |             for build_name, build in config["builds"].items()
 7 |             for segment in config["segments"]
 8 |             for suffix in ["", "_tip-frequencies"]
 9 |         ],
10 | 
11 | def _get_file_by_auspice_name(wildcards):
12 |     for build_name, build_params in config["builds"].items():
13 |         for segment in config["segments"]:
14 |             if build_params.get("auspice_name", f"{build_name}_{{segment}}").format(segment=segment) == wildcards.auspice_name:
15 |                 return f"auspice/{build_name}_{segment}.json"
16 | 
17 |     return ""
18 | 
19 | rule rename_auspice_main:
20 |     input:
21 |         _get_file_by_auspice_name,
22 |     output:
23 |         "auspice_renamed/{auspice_name}.json",
24 |     shell:
25 |         """
26 |         ln {input} {output}
27 |         """
28 | 
29 | rule rename_auspice_tip_frequencies:
30 |     input:
31 |         lambda wildcards: _get_file_by_auspice_name(wildcards).replace(".json", "_tip-frequencies.json"),
32 |     output:
33 |         "auspice_renamed/{auspice_name}_tip-frequencies.json",
34 |     shell:
35 |         """
36 |         ln {input} {output}
37 |         """
38 | 


--------------------------------------------------------------------------------
/scripts/export_titers_for_auspice_v1.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import argparse
 3 | import json
 4 | 
 5 | 
 6 | if __name__ == '__main__':
 7 |     parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 8 |     parser.add_argument("--titers-sub")
 9 |     parser.add_argument("--titers-tree")
10 |     parser.add_argument("--output-titers")
11 |     parser.add_argument("--output-titers-sub")
12 |     parser.add_argument("--output-titers-tree")
13 | 
14 |     args = parser.parse_args()
15 | 
16 |     with open(args.titers_sub) as fh:
17 |         sub = json.load(fh)
18 | 
19 |     with open(args.output_titers_sub, 'wt') as sub_file:
20 |         json.dump({'avidity': sub['avidity'],
21 |                     'potency': sub['potency'],
22 |                     'substitution': sub['substitution']},
23 |                     sub_file, indent=1)
24 | 
25 |     with open(args.output_titers, 'wt') as raw_file:
26 |         json.dump(sub['titers'], raw_file, indent=1)
27 | 
28 |     with open(args.titers_tree) as fh:
29 |         tree = json.load(fh)
30 | 
31 |     with open(args.output_titers_tree, 'wt') as tree_file:
32 |         json.dump({'avidity': tree['avidity'],
33 |                     'potency': tree['potency'],
34 |                     'dTiter': {k:v['dTiter'] for k,v in tree['nodes'].items()}},
35 |                     tree_file, indent=1)
36 | 


--------------------------------------------------------------------------------
/ingest/Snakefile:
--------------------------------------------------------------------------------
 1 | """
 2 | This is the main ingest Snakefile that orchestrates the full ingest workflow
 3 | and defines its default outputs.
 4 | """
 5 | # The workflow filepaths are written relative to this Snakefile's base directory
 6 | workdir: workflow.current_basedir
 7 | 
 8 | # Use default configuration values. Override with Snakemake's --configfile/--config options.
 9 | configfile: "defaults/config.yaml"
10 | 
11 | VALID_DATASETS = list(config['filtering'].keys())
12 | 
13 | wildcard_constraints:
14 |     # Expected datasets should match the standardized outputs of the `filtering` block
15 |     # (example datasets are "h3n2", "avian-flu")
16 |     # in scripts/standardized-lineage
17 |     dataset = r'|'.join(VALID_DATASETS),
18 |     segment = r'pb2|pb1|pa|ha|np|na|mp|ns',
19 |     # Constrain GISAID pair names to "gisaid_cache" or YYYY-MM-DD-N
20 |     gisaid_pair = r'gisaid_cache|\d{4}-\d{2}-\d{2}(-\d+)?'
21 | 
22 | 
23 | rule all:
24 |     input:
25 |         metadata = expand("results/{dataset}/metadata.tsv", dataset=VALID_DATASETS),
26 |         sequences = expand("results/{dataset}/{segment}.fasta", dataset=VALID_DATASETS, segment=config["segments"]),
27 | 
28 | 
29 | include: "rules/prepare_ndjson.smk"
30 | include: "rules/curate.smk"
31 | 
32 | 
33 | if "custom_rules" in config:
34 |     for rule_file in config["custom_rules"]:
35 | 
36 |         include: rule_file
37 | 


--------------------------------------------------------------------------------
/profiles/example/builds.yaml:
--------------------------------------------------------------------------------
 1 | custom_rules:
 2 |   - profiles/ci/prepare_data.smk
 3 | 
 4 | fasta_fields:
 5 |   - strain
 6 |   - virus
 7 |   - accession
 8 |   - date
 9 |   - date_submitted
10 |   - region
11 |   - country
12 |   - division
13 |   - location
14 |   - passage_category
15 |   - originating_lab
16 |   - submitting_lab
17 |   - age
18 |   - gender
19 | prettify_fields:
20 |   - region
21 |   - country
22 |   - division
23 |   - location
24 |   - originating_lab
25 |   - submitting_lab
26 | 
27 | lat-longs: "config/lat_longs.tsv"
28 | 
29 | segments:
30 |   - ha
31 |   - na
32 | 
33 | tree:
34 |   tree-builder-args: "'-ninit 10 -n 4 -czb'"
35 | 
36 | submission_date_field: date_submitted
37 | recency:
38 |   date_bins: [7, 30, 90]
39 |   date_bin_labels: ["last week", "last month", "last quarter"]
40 |   upper_bin_label: older
41 | 
42 | builds:
43 |     "example_build":
44 |       reference: "config/h3n2/{segment}/reference.fasta"
45 |       annotation: "config/h3n2/{segment}/genemap.gff"
46 |       tree_exclude_sites: "config/h3n2/{segment}/exclude-sites.txt"
47 |       clades: "config/h3n2/ha/clades.tsv"
48 |       emerging_haplotypes: "example_data/haplotypes.tsv"
49 |       lineage: "h3n2"
50 |       auspice_config: "config/h3n2/{segment}/auspice_config.json"
51 |       subsamples:
52 |         global:
53 |             filters: "--exclude-where 'ha!=True' 'na!=True'"
54 | 


--------------------------------------------------------------------------------
/profiles/neut-library.yaml:
--------------------------------------------------------------------------------
 1 | custom_rules:
 2 |   - workflow/snakemake_rules/download_from_s3.smk
 3 | 
 4 | lat-longs: "config/lat_longs.tsv"
 5 | 
 6 | segments:
 7 |   - ha
 8 | 
 9 | tree:
10 |   tree-builder-args: "'-ninit 10 -n 4 -czb'"
11 | 
12 | submission_date_field: date_submitted
13 | recency:
14 |   date_bins: [7, 30, 90]
15 |   date_bin_labels: ["last week", "last month", "last quarter"]
16 |   upper_bin_label: older
17 | 
18 | builds:
19 |     h3n2:
20 |       lineage: h3n2
21 |       reference: "config/h3n2/{segment}/reference.fasta"
22 |       annotation: "config/h3n2/{segment}/genemap.gff"
23 |       clades: "config/h3n2/ha/clades.tsv"
24 |       subclades: "config/h3n2/{segment}/subclades.tsv"
25 |       auspice_config: "config/h3n2/{segment}/auspice_config.json"
26 |       subsamples: &subsampling
27 |         global:
28 |             filters: --min-date 2025-05-01 --exclude-ambiguous-dates-by month --exclude-where 'ha!=True' 'passage_category=egg' --query "\`qc.overallStatus\` == 'good'"
29 |     h1n1pdm:
30 |       lineage: h1n1pdm
31 |       reference: "config/h1n1pdm/{segment}/reference.fasta"
32 |       annotation: "config/h1n1pdm/{segment}/genemap.gff"
33 |       clades: "config/h1n1pdm/ha/clades.tsv"
34 |       subclades: "config/h1n1pdm/{segment}/subclades.tsv"
35 |       auspice_config: "config/h1n1pdm/{segment}/auspice_config.json"
36 |       subsamples: *subsampling
37 | 


--------------------------------------------------------------------------------
/config/h1n1/reference_strains.txt:
--------------------------------------------------------------------------------
 1 | A/WSN/1933
 2 | A/PuertoRico/8/1934
 3 | A/Melbourne/1935
 4 | A/Iowa/1943
 5 | A/Melbourne/1/1946
 6 | A/Liverpool/1951
 7 | A/Denver/1957
 8 | A/USSR/90/1977
 9 | A/USSR/92/1977
10 | A/Brazil/11/1978
11 | A/Lackland/3/1978
12 | A/Arizona/14/1978
13 | A/Lackland/7/1978
14 | A/Fukushima/103/1978
15 | A/California/45/1978
16 | A/Kumamoto/103/1978
17 | A/Texas/23/1979
18 | A/USSR/50/1979
19 | A/England/333/1980
20 | A/India/6263/1980
21 | A/HongKong/2/1982
22 | A/Texas/29/1982
23 | A/Chile/1/1983
24 | A/Dunedin/27/1983
25 | A/Victoria/7/1983
26 | A/Switzerland/79/1985
27 | A/Singapore/6/1986
28 | A/Taiwan/1/1986
29 | A/Victoria/36/1988
30 | A/Texas/36/1991
31 | A/Beijing/262/1995
32 | A/Bayern/7/1995
33 | A/Johannesburg/82/1996
34 | A/NewCaledonia/20/1999
35 | A/Madagascar/57794/2000
36 | A/HongKong/1252/2000
37 | A/Egypt/96/2002
38 | A/Chile/8885/2002
39 | A/Hungary/2/2003
40 | A/Netherlands/128/2004
41 | A/HongKong/2637/2004
42 | A/Thessaloniki/24/2005
43 | A/Egypt/39/2005
44 | A/SolomonIslands/3/2006
45 | A/HongKong/2652/2006
46 | A/Fukushima/141/2006
47 | A/Fukushima/97/2006
48 | A/Brisbane/59/2007
49 | A/Netherlands/345/2007
50 | A/Egypt/10/2007
51 | A/StPetersburg/10/2007
52 | A/SouthDakota/6/2007
53 | A/StPetersburg/12/2008
54 | A/HongKong/1870/2008
55 | A/HongKong/1856/2008
56 | A/Seychelles/2239/2008
57 | A/StPetersburg/5/2008
58 | A/Perth/200/2008
59 | A/HongKong/1988/2009
60 | 


--------------------------------------------------------------------------------
/scripts/table_to_node_data.py:
--------------------------------------------------------------------------------
 1 | """Create Augur-compatible node data JSON from a pandas data frame.
 2 | """
 3 | import argparse
 4 | import pandas as pd
 5 | from augur.utils import write_json
 6 | 
 7 | 
 8 | if __name__ == "__main__":
 9 |     parser = argparse.ArgumentParser()
10 |     parser.add_argument("--table", help="table to convert to a node data JSON")
11 |     parser.add_argument("--index-column", default="strain", help="name of the column to use as an index")
12 |     parser.add_argument("--delimiter", default=",", help="separator between columns in the given table")
13 |     parser.add_argument("--node-name", default="nodes", help="name of the node data attribute in the JSON output")
14 |     parser.add_argument("--output", help="node data JSON file")
15 | 
16 |     args = parser.parse_args()
17 | 
18 |     if args.output is not None:
19 |         table = pd.read_csv(
20 |             args.table,
21 |             sep=args.delimiter,
22 |             index_col=args.index_column,
23 |             dtype=str,
24 |         )
25 | 
26 |         # # Convert columns that aren't strain names or labels to floats.
27 |         # for column in table.columns:
28 |         #     if column != "strain" and not "label" in column:
29 |         #         table[column] = table[column].astype(float)
30 | 
31 |         table_dict = table.transpose().to_dict()
32 |         write_json({args.node_name: table_dict}, args.output)
33 | 


--------------------------------------------------------------------------------
/config/distance_maps/h3n2/ha/luksza.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "default": 0,
 3 |     "map": {
 4 |         "HA1": {
 5 |             "50": 1,
 6 |             "53": 1,
 7 |             "54": 1,
 8 |             "121": 1,
 9 |             "122": 1,
10 |             "124": 1,
11 |             "126": 1,
12 |             "131": 1,
13 |             "133": 1,
14 |             "135": 1,
15 |             "137": 1,
16 |             "142": 1,
17 |             "143": 1,
18 |             "144": 1,
19 |             "145": 1,
20 |             "146": 1,
21 |             "155": 1,
22 |             "156": 1,
23 |             "157": 1,
24 |             "158": 1,
25 |             "159": 1,
26 |             "160": 1,
27 |             "163": 1,
28 |             "164": 1,
29 |             "172": 1,
30 |             "173": 1,
31 |             "174": 1,
32 |             "186": 1,
33 |             "188": 1,
34 |             "189": 1,
35 |             "190": 1,
36 |             "192": 1,
37 |             "193": 1,
38 |             "196": 1,
39 |             "197": 1,
40 |             "201": 1,
41 |             "207": 1,
42 |             "213": 1,
43 |             "217": 1,
44 |             "226": 1,
45 |             "227": 1,
46 |             "242": 1,
47 |             "244": 1,
48 |             "248": 1,
49 |             "275": 1,
50 |             "276": 1,
51 |             "278": 1,
52 |             "299": 1,
53 |             "307": 1
54 |         }
55 |     },
56 |     "name": "luksza"
57 | }


--------------------------------------------------------------------------------
/scripts/xls2csv.py:
--------------------------------------------------------------------------------
 1 | """Minimal script to convert Excel XLS format to CSV.
 2 | """
 3 | import argparse
 4 | import csv
 5 | import xlrd
 6 | 
 7 | 
 8 | if __name__ == '__main__':
 9 |     parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
10 |     parser.add_argument("--xls", required=True, help="path to XLS file to convert")
11 |     parser.add_argument("--output", required=True, help="path to CSV output")
12 | 
13 |     args = parser.parse_args()
14 | 
15 |     workbook = xlrd.open_workbook(args.xls)
16 |     sheet = workbook.sheet_by_index(0)
17 |     field_names = [field.value for field in sheet.row(0)]
18 | 
19 |     with open(args.output, "w", encoding="utf-8") as csvfile:
20 |         writer = csv.writer(csvfile, dialect="unix")
21 |         writer.writerow(field_names)
22 | 
23 |         for row_index in range(1, sheet.nrows):
24 |             row = []
25 |             for field in sheet.row(row_index):
26 |                 value = field.value
27 |                 if isinstance(value, str):
28 |                     # Handle the case where cells can contain newline-delimited
29 |                     # values which will appear as new lines in our CSV output
30 |                     # unless we remove those characters. For example, see H3N2
31 |                     # record EPI_ISL_18856352.
32 |                     value = value.replace("\n", "")
33 | 
34 |                 row.append(value)
35 | 
36 |             writer.writerow(row)
37 | 


--------------------------------------------------------------------------------
/config/distance_maps/h1n1pdm/ha/canton.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "default": 0,
 3 |     "map": {
 4 |         "HA1": {
 5 |             "124": 1,
 6 |             "125": 1,
 7 |             "137": 1,
 8 |             "138": 1,
 9 |             "139": 1,
10 |             "140": 1,
11 |             "141": 1,
12 |             "142": 1,
13 |             "153": 1,
14 |             "154": 1,
15 |             "155": 1,
16 |             "156": 1,
17 |             "157": 1,
18 |             "159": 1,
19 |             "160": 1,
20 |             "161": 1,
21 |             "162": 1,
22 |             "163": 1,
23 |             "164": 1,
24 |             "166": 1,
25 |             "167": 1,
26 |             "168": 1,
27 |             "169": 1,
28 |             "170": 1,
29 |             "184": 1,
30 |             "185": 1,
31 |             "186": 1,
32 |             "187": 1,
33 |             "188": 1,
34 |             "189": 1,
35 |             "190": 1,
36 |             "191": 1,
37 |             "192": 1,
38 |             "193": 1,
39 |             "194": 1,
40 |             "195": 1,
41 |             "203": 1,
42 |             "204": 1,
43 |             "205": 1,
44 |             "221": 1,
45 |             "222": 1,
46 |             "235": 1,
47 |             "236": 1,
48 |             "237": 1,
49 |             "70": 1,
50 |             "71": 1,
51 |             "72": 1,
52 |             "73": 1,
53 |             "74": 1,
54 |             "75": 1
55 |         }
56 |     },
57 |     "name": "canton"
58 | }


--------------------------------------------------------------------------------
/config/nextstrain_clades_h1n1pdm_ha.tsv:
--------------------------------------------------------------------------------
 1 | clade	gene	site	alt
 2 | 2	HA1	31	D
 3 | 2	HA1	162	N
 4 | 2	HA1	186	T
 5 | 2	nuc	1100	A
 6 | 3	HA1	183	P
 7 | 3	HA1	134	T
 8 | 3	nuc	1484	T
 9 | 4	HA1	125	D
10 | 4	nuc	1301	A
11 | 4	HA2	47	K
12 | 4	HA1	283	K
13 | 6	HA1	97	N
14 | 6	HA2	124	N
15 | 6	HA1	185	T
16 | 7	HA1	143	G
17 | 7	nuc	164	T
18 | 7	nuc	1457	T
19 | 7	nuc	1676	T
20 | 8	SigPep	15	T
21 | 8	HA1	186	T
22 | 8	HA1	272	A
23 | 8	HA2	146	D
24 | 8	HA2	147	K
25 | 6b	HA1	163	Q
26 | 6b	HA1	256	T
27 | 6b	nuc	1673	C
28 | 6c	HA1	234	I
29 | 6c	nuc	50	C
30 | 6c	HA1	283	E
31 | 6c	HA2	172	K
32 | 6b1	HA1	162	N
33 | 6b1	HA1	216	T
34 | 6b1	HA1	84	N
35 | 6b1	SigPep	13	T
36 | 6b2	SigPep	13	T
37 | 6b2	HA1	152	V
38 | 6b2	HA2	174	E
39 | 6b2	HA1	173	I
40 | 6b1.A	HA1	164	T
41 | 6b1.A	nuc	1163	T
42 | 6b1.A	nuc	1271	T
43 | A1	nuc	1703	A
44 | A1	nuc	384	C
45 | A1	nuc	618	C
46 | A1	nuc	1010	G
47 | A2	nuc	49	G
48 | A2	nuc	618	C
49 | A2	nuc	704	A
50 | A2	nuc	1460	A
51 | A3	nuc	425	A
52 | A3	nuc	618	C
53 | A3	nuc	1631	C
54 | A4	nuc	410	A
55 | A4	nuc	456	G
56 | A4	nuc	493	A
57 | A4	nuc	618	C
58 | A4	nuc	1205	G
59 | A4	nuc	1577	G
60 | A5	nuc	618	C
61 | A5	nuc	1364	C
62 | A5	nuc	1487	C
63 | A5a	HA1	129	D
64 | A5a	HA1	185	I
65 | A5b	HA1	160	M
66 | A6	nuc	209	A
67 | A6	nuc	536	G
68 | A6	nuc	618	C
69 | A6	nuc	1403	T
70 | A7	nuc	317	G
71 | A7	nuc	976	C
72 | A7	nuc	1558	G
73 | A5a.1	HA1	187	A
74 | A5a.1	HA1	189	E
75 | A5a.2	HA1	156	K
76 | A5a.2	HA1	161	I


--------------------------------------------------------------------------------
/nextclade/config/auspice_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "title": "Nextclade reference dataset for seasonal influenza viruses",
 3 |   "build_url": "https://github.com/nextstrain/seasonal-flu",
 4 |   "maintainers": [
 5 |     { "name": "Nextstrain team", "url": "https://nextstrain.org" }
 6 |   ],
 7 |   "extensions": {
 8 |     "nextclade": {
 9 |     }
10 |   },
11 |   "data_provenance": [
12 |     {
13 |       "name": "GISAID"
14 |     }
15 |   ],
16 |   "colorings": [
17 |     {
18 |       "key": "country",
19 |       "title": "Country",
20 |       "type": "categorical"
21 |     },
22 |     {
23 |       "key": "legacy-clade",
24 |       "title": "Legacy clade",
25 |       "type": "categorical"
26 |     },
27 |     {
28 |       "key": "proposedSubclade",
29 |       "title": "Subclade proposals",
30 |       "type": "categorical"
31 |     },
32 |     {
33 |       "key": "region",
34 |       "title": "Region",
35 |       "type": "categorical"
36 |     },
37 |     {
38 |       "key": "date",
39 |       "title": "Sample Date",
40 |       "type": "ordinal"
41 |     },
42 |     {
43 |       "key": "EPI_ISL",
44 |       "title": "EPI_ISL",
45 |       "type": "categorical"
46 |     }
47 |   ],
48 |   "filters": [
49 |     "region",
50 |     "country",
51 |     "clade_membership",
52 |     "subclade"
53 |   ],
54 |   "display_defaults": {
55 |     "color_by": "clade_membership",
56 |     "distance_measure": "div",
57 |     "branch_label": "clade"
58 |   },
59 |   "panels": ["tree","entropy"]
60 | }
61 | 


--------------------------------------------------------------------------------
/ingest/vendored/cloudfront-invalidate:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # Originally from @tsibley's gist: https://gist.github.com/tsibley/a66262d341dedbea39b02f27e2837ea8
 3 | set -euo pipefail
 4 | 
 5 | main() {
 6 |     local domain="$1"
 7 |     shift
 8 |     local paths=("$@")
 9 |     local distribution invalidation
10 | 
11 |     echo "-> Finding CloudFront distribution"
12 |     distribution=$(
13 |         aws cloudfront list-distributions \
14 |             --query "DistributionList.Items[?contains(Aliases.Items, \`$domain\`)] | [0].Id" \
15 |             --output text
16 |     )
17 | 
18 |     if [[ -z $distribution || $distribution == None ]]; then
19 |         exec >&2
20 |         echo "Unable to find CloudFront distribution id for $domain"
21 |         echo
22 |         echo "Are your AWS CLI credentials for the right account?"
23 |         exit 1
24 |     fi
25 | 
26 |     echo "-> Creating CloudFront invalidation for distribution $distribution"
27 |     invalidation=$(
28 |         aws cloudfront create-invalidation \
29 |             --distribution-id "$distribution" \
30 |             --paths "${paths[@]}" \
31 |             --query Invalidation.Id \
32 |             --output text
33 |     )
34 | 
35 |     echo "-> Waiting for CloudFront invalidation $invalidation to complete"
36 |     echo "   Ctrl-C to stop waiting."
37 |     aws cloudfront wait invalidation-completed \
38 |         --distribution-id "$distribution" \
39 |         --id "$invalidation"
40 | }
41 | 
42 | main "$@"
43 | 


--------------------------------------------------------------------------------
/nextclade/dataset_config/h3n2/na/EPI1857215/reference.fasta:
--------------------------------------------------------------------------------
1 | >EPI1857215
2 | AGTAAAGATGAATCCAAATCAAAAGATAATAACGATTGGCTCTGTTTCTCTCACAATTTCCACAATATGCTTCTTCATGCAAATTGCCATCCTGATAACTACTGTAACATTGCATTTCAAGCAATATGAATTCAACTCCCCCCCAAATAACCAAGTGATGCTGTGTGAACCAACAATAATAGAAAGAAACATAACAGAGATAGTGTATTTGACCAACACCACCATAGAGAAGGAAATATGCCCCAAACCAGCAGAATACAGAAATTGGTCAAAACCGCAATGTGGCATTACAGGATTTGCACCTTTCTCTAAGGACAATTCGATTAGGCTTTCCGCTGGTGGGGACATCTGGGTGACAAGAGAACCTTATGTGTCATGCGATCTTGACAAGTGTTATCAATTTGCCCTTGGACAGGGAACAACACTAAACAATGTGCATTCAAATAACACAGTACGTGATAGAACCCCTTATCGGACTCTATTGATGAATGAGTTGGGTGTTCCTTTCCATCTGGGGACCAAGCAAGTGTGCATAGCATGGTCCAGCTCAAGTTGTCACGATGGAAAAGCATGGCTGCATGTTTGTATAACGGGGGATGATAAAAATGCAACTGCTAGCTTCATTTACAATGGGAGGCTTGTAGATAGTGTTGTTTCATGGTCCAACGATATTCTCAGAACCCAGGAGTCAGAATGCGTTTGTATCAATGGAACTTGTACAGTAGTAATGACTGATGGAAATGCTACAGGAAAAGCTGATACTAAAATACTATTCATTGAGGAGGGGAAAATCGTTCATACTAGCAAATTGTCAGGAAGTGCTCAGCATGTCGAAGAGTGCTCTTGCTATCCTCGATATCCTGGTGTCAGATGTGTCTGCAGAGACAACTGGAAAGGATCCAACCGGCCCATCATAGATATAAACATAAAGGATCATAGCATTGTTTCCAGGTATGTGTGTTCTGGACTTGTTGGAGACACACCCAGAAAAAGCGACAGCTCCAGCAGTAGCCATTGTTTGAACCCTAACAATGAAAAAGGTGATCATGGAGTGAAAGGCTGGGCCTTTGATGATGGAAATGACGTGTGGATGGGGAGAACAATCAACGAGACGTCACGCTTAGGGTATGAAACCTTCAAAGTCGTTGAAGGCTGGTCCAACCCTAAGTCCAAATTGCAGATAAATAGGCAAGTCATAGTTGACAGAGGCGATAGGTCCGGTTATTCTGGTATTTTCTCTGTTGAAGGCAAAAGCTGCATCAATCGGTGCTTTTATGTGGAGTTGATTAGGGGAAGAAAAGAGGAAACTGAAGTCTTGTGGACTTCAAACAGTATTGTTGTGTTTTGTGGCACCTCAGGTACATATGGAACAGGCTCATGGCCTGATGGGGCGAACCTCAGTCTCATGCATATATAAGCTTTCGCAATTTTAGAAAAAA
3 | 


--------------------------------------------------------------------------------
/flu-forecasting/scripts/calculate_clade_frequency_forecasts.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import argparse
 3 | import pandas as pd
 4 | 
 5 | from augur.utils import read_node_data
 6 | 
 7 | 
 8 | if __name__ == '__main__':
 9 |     parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
10 |     parser.add_argument("--forecasts", required=True, help="TSV of forecasts per strain")
11 |     parser.add_argument("--clades", required=True, help="node data JSON of clades per strain")
12 |     parser.add_argument("--output", required=True, help="TSV of forecasts per clade")
13 | 
14 |     args = parser.parse_args()
15 | 
16 |     # Load forecasts.
17 |     forecasts = pd.read_csv(
18 |         args.forecasts,
19 |         sep="\t",
20 |         usecols=("timepoint", "strain", "projected_frequency"),
21 |     )
22 | 
23 |     # Load clades.
24 |     clades = read_node_data(args.clades)
25 |     clade_by_strain = {
26 |         name: data["clade_membership"]
27 |         for name, data in clades["nodes"].items()
28 |     }
29 | 
30 |     # Assign clades to strains in the forecasts table.
31 |     forecasts["clade"] = forecasts["strain"].map(clade_by_strain)
32 | 
33 |     # Calculate projected frequency per clade.
34 |     forecasts_by_clade = forecasts.groupby(["timepoint", "clade"])["projected_frequency"].sum().reset_index()
35 | 
36 |     # Save forecasts by clade.
37 |     forecasts_by_clade.to_csv(
38 |         args.output,
39 |         sep="\t",
40 |         header=True,
41 |         index=False,
42 |     )
43 | 


--------------------------------------------------------------------------------
/nextclade/dataset_config/yam/ha/JN993010/README.md:
--------------------------------------------------------------------------------
 1 | # Nextclade dataset for "Influenza B Yam HA" based on reference "B/Wisconsin/01/2010" (flu/yam/ha/JN993010)
 2 | 
 3 | | Key                  | Value                |
 4 | | -------------------- | -------------------- |
 5 | | authors                | [Richard Neher](https://neherlab.org), [Nextstrain](https://nextstrain.org)                         |
 6 | | name                 | Influenza B(Yam) HA                      |
 7 | | reference            | B/Wisconsin/01/2010                      |
 8 | | dataset path         | flu/yam/ha/JN993010                     |
 9 | | reference accession  | JN993010   |
10 | 
11 | ## Scope of this dataset
12 | B/Yamagata viruses have not been observed since 2020. This dataset is provided for analysis of old sequences or suspected Yamagata sequences.
13 | 
14 | 
15 | ## Features
16 | This dataset supports
17 | 
18 |  * Assignment to clades
19 |  * Identification of glycosilation motifs
20 |  * Sequence QC
21 |  * Phylogenetic placement
22 | 
23 | ## Clades of seasonal influenza viruses
24 | 
25 | The WHO Collaborating centers define "clades" as genetic groups of viruses with signature mutations to facilitate discussion of circulating diversity of the viruses.
26 | Clade demarcation do not always coincide with significantly different antigenic properties of the viruses.
27 | 
28 | ## What is Nextclade dataset
29 | 
30 | Read more about Nextclade datasets in Nextclade documentation: https://docs.nextstrain.org/projects/nextclade/en/stable/user/datasets.html
31 | 


--------------------------------------------------------------------------------
/ingest/vendored/trigger-on-new-data:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -euo pipefail
 3 | 
 4 | : "${PAT_GITHUB_DISPATCH:?The PAT_GITHUB_DISPATCH environment variable is required.}"
 5 | 
 6 | bin="$(dirname "$0")"
 7 | 
 8 | github_repo="${1:?A GitHub repository with owner and repository name is required as the first argument.}"
 9 | event_type="${2:?An event type is required as the second argument.}"
10 | metadata="${3:?A metadata upload output file is required as the third argument.}"
11 | sequences="${4:?An sequence FASTA upload output file is required as the fourth argument.}"
12 | identical_file_message="${5:-files are identical}"
13 | 
14 | new_metadata=$(grep "$identical_file_message" "$metadata" >/dev/null; echo $?)
15 | new_sequences=$(grep "$identical_file_message" "$sequences" >/dev/null; echo $?)
16 | 
17 | slack_message=""
18 | 
19 | # grep exit status 0 for found match, 1 for no match, 2 if an error occurred
20 | if [[ $new_metadata -eq 1 || $new_sequences -eq 1 ]]; then
21 |     slack_message="Triggering new builds due to updated metadata and/or sequences"
22 |     "$bin"/trigger "$github_repo" "$event_type"
23 | elif [[ $new_metadata -eq 0 && $new_sequences -eq 0 ]]; then
24 |     slack_message="Skipping trigger of rebuild: Both metadata TSV and sequences FASTA are identical to S3 files."
25 | else
26 |     slack_message="Skipping trigger of rebuild: Unable to determine if data has been updated."
27 | fi
28 | 
29 | 
30 | if ! "$bin"/notify-slack "$slack_message"; then
31 |     echo "Notifying Slack failed, but exiting with success anyway."
32 | fi
33 | 


--------------------------------------------------------------------------------
/scripts/prune_reference.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | """
 3 | Prunes a reference strain from the provided tree.
 4 | """
 5 | import argparse
 6 | from augur.io import read_sequences
 7 | from Bio import Phylo
 8 | import shutil
 9 | import sys
10 | 
11 | 
12 | if __name__ == '__main__':
13 |     parser = argparse.ArgumentParser(
14 |         description=__doc__,
15 |         formatter_class=argparse.ArgumentDefaultsHelpFormatter
16 |     )
17 |     parser.add_argument("--tree", help="Newick tree to prune")
18 |     parser.add_argument("--reference", nargs="?", help="FASTA file for the reference used to root the tree and to prune from the input")
19 |     parser.add_argument("--output", help="Output Newick tree file")
20 | 
21 |     args = parser.parse_args()
22 | 
23 |     # If reference is not provided, then just copy the input to output without modifications
24 |     if not args.reference:
25 |         print("WARNING: No reference was provided, copying input tree to output tree", file=sys.stdout)
26 |         shutil.copy(args.tree, args.output)
27 |     else:
28 |         # Open the reference sequence to get the name of the reference strain.
29 |         reference = next(read_sequences(args.reference))
30 |         reference_name = reference.id
31 | 
32 |         T = Phylo.read(args.tree, "newick")
33 |         references = [c for c in T.find_clades(terminal=True) if c.name == reference_name]
34 |         if references:
35 |             T.root_with_outgroup(references[0])
36 |             T.prune(references[0])
37 | 
38 |         Phylo.write(T, args.output, "newick")
39 | 


--------------------------------------------------------------------------------
/scripts/sanitize_trees.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import argparse
 3 | import sys
 4 | from treetime import TreeAnc
 5 | from augur.utils import read_tree, InvalidTreeError
 6 | import Bio.Phylo
 7 | 
 8 | 
 9 | if __name__ == '__main__':
10 |     parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
11 |     parser.add_argument("--trees", nargs="+", help="trees to sanitize by pruning leaves that do not appear in all trees.")
12 |     parser.add_argument("--alignments", nargs="+", help="corresponding sequence alignments to remove short branches.")
13 |     parser.add_argument("--output-trees", nargs="+", help="sanitized trees, one for each input tree.")
14 |     args = parser.parse_args()
15 | 
16 |     trees = []
17 |     try:
18 |         for tree_file in args.trees:
19 |             tree = read_tree(tree_file)
20 |             trees.append(tree)
21 |     except InvalidTreeError as error:
22 |         print(error, file=sys.stderr)
23 |         sys.exit(1)
24 | 
25 |     common_leaves = set.intersection(*[set(x.name for x in tree.find_clades(terminal=True)) for tree in trees])
26 |     for output_tree_file, tree, aln in zip(args.output_trees, trees, args.alignments):
27 |         for leaf in set(x.name for x in tree.find_clades(terminal=True)).difference(common_leaves):
28 |             tree.prune(leaf)
29 | 
30 |         tt = TreeAnc(tree=tree, aln=aln)
31 |         tt.infer_ancestral_sequences(infer_gtr=True)
32 |         tt.prune_short_branches()
33 |         tt.tree.ladderize()
34 |         Bio.Phylo.write(tt.tree, output_tree_file, 'newick')
35 | 


--------------------------------------------------------------------------------
/ingest/scripts/dedup-by-gisaid-id:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | """
 3 | Deduplicate records by GISAID EPI ISL.
 4 | Only keeps the first record of duplicates.
 5 | """
 6 | import argparse
 7 | from sys import stdin
 8 | from typing import Iterable
 9 | from augur.io.json import dump_ndjson, load_ndjson
10 | from augur.io.print import print_err
11 | 
12 | 
13 | def deduplicate_records(records: Iterable[dict],
14 |                         id_field: str) -> Iterable:
15 |     """
16 |     Deduplicate *records* by *id_field*, will only keeping the first record of
17 |     duplicate ids.
18 | 
19 |     Yields records with unique ids.
20 |     """
21 |     seen_ids = set()
22 |     for index, record in enumerate(records):
23 |         record_id = record.get(id_field)
24 | 
25 |         if record_id is None:
26 |             raise Exception(f"Records must have the expected id field {id_field!r}")
27 | 
28 |         if record_id in seen_ids:
29 |             print_err(
30 |                 f"Dropping record (index {index!r}) with duplicate record id {record_id!r}"
31 |             )
32 |             continue
33 | 
34 |         seen_ids.add(record_id)
35 |         yield record
36 | 
37 | 
38 | if __name__ == "__main__":
39 |     parser = argparse.ArgumentParser(description=__doc__)
40 | 
41 |     parser.add_argument("--id-field", default="gisaid_epi_isl",
42 |         help="The record field containing a record id. ")
43 | 
44 |     args = parser.parse_args()
45 | 
46 |     records = load_ndjson(stdin)
47 |     deduped_records = deduplicate_records(records, args.id_field)
48 |     dump_ndjson(deduped_records)
49 | 


--------------------------------------------------------------------------------
/nextclade/config/h3n2/ha/CY163680/founder_sequences_SigPep.fasta:
--------------------------------------------------------------------------------
 1 | >A
 2 | MKTIIALSYILCLVFA
 3 | >A.2
 4 | MKTIIALSYILCLVFA
 5 | >A.3
 6 | MKTIIALSYILCLVFA
 7 | >A.3.2
 8 | MKTIIALSYILCLVFA
 9 | >B
10 | MKTIIALSYILCLVFA
11 | >B.1
12 | MKTIIALSYILCLVFA
13 | >B.1.1
14 | MKTIIALSYILCLVFA
15 | >B.1.2
16 | MKTIIALSYILCLVFA
17 | >B.1.2.1
18 | MKTIIALSYILCLVFA
19 | >B.1.2.1.1
20 | MKTIIALSYILCLVFA
21 | >B.2
22 | MKTIIALSYILCLVFA
23 | >B.3
24 | MKTIIALSYILCLVFA
25 | >B.4
26 | MKTIIALSYILCLVFA
27 | >C
28 | MKTIIALSYILCLVFA
29 | >C.1
30 | MKTIIALSCILCLVFA
31 | >D
32 | MKTIIALSYILCLVFT
33 | >E
34 | MKTIIALSYILCLVFA
35 | >E.1
36 | MKTIIALSYILCLVFA
37 | >E.2
38 | MKTIIALSYILCLVFA
39 | >F
40 | MKTIIALSYILCLVFA
41 | >F.1
42 | MKTIIALSYILCLVFA
43 | >F.1.1
44 | MKTIIALSYTLCLVFA
45 | >G
46 | MKTIIALSYILCLVFA
47 | >G.1
48 | MKTIIALSNILCLVFA
49 | >G.1.1
50 | MKTIIALSNILCLVFA
51 | >G.1.1.1
52 | MKTIIALSNILCLVFA
53 | >G.1.1.2
54 | MKTIIALSNILCLVFA
55 | >G.1.2
56 | MKTIIALSNILCLVFA
57 | >G.1.3
58 | MKTIIALSNILCLVFA
59 | >G.1.3.1
60 | MKTIIALSNILCLVFA
61 | >G.1.3.2
62 | MKTIIALSNILCLVFA
63 | >G.2
64 | MKTIIALSNILCLVFA
65 | >G.2.1
66 | MKTIIALSNILCLVFA
67 | >G.2.2
68 | MKTIIALSNILCLVFA
69 | >G.3
70 | MKTIIALSYILCLVFA
71 | >G.4
72 | MKTIIALSYILCLVFA
73 | >J
74 | MKAIIALSNILCLVFA
75 | >J.1
76 | MKAIIALSNILCLVFA
77 | >J.1.1
78 | MKAIIALSNILCLVFA
79 | >J.2
80 | MKAIIALSNILCLVFA
81 | >J.2.1
82 | MKAIIALSNILCLVFA
83 | >J.2.2
84 | MKAIIALSNILCLVFA
85 | >J.2.3
86 | MKAIIALSNILCLVFA
87 | >J.2.4
88 | MKAIIALSNILCLVFA
89 | >J.2.5
90 | MKAIIALSNILCLVFA
91 | >J.3
92 | MKAIIALSNILCLVFA
93 | >J.4
94 | MKTIIALSNILCLVFA
95 | >K
96 | MKAIIALSNILCLVFA
97 | 


--------------------------------------------------------------------------------
/nextclade/config/h3n2/ha/EPI1857216/founder_sequences_SigPep.fasta:
--------------------------------------------------------------------------------
 1 | >A
 2 | MKTIIALSYILCLVFA
 3 | >A.2
 4 | MKTIIALSYILCLVFA
 5 | >A.3
 6 | MKTIIALSYILCLVFA
 7 | >A.3.2
 8 | MKTIIALSYILCLVFA
 9 | >B
10 | MKTIIALSYILCLVFA
11 | >B.1
12 | MKTIIALSYILCLVFA
13 | >B.1.1
14 | MKTIIALSYILCLVFA
15 | >B.1.2
16 | MKTIIALSYILCLVFA
17 | >B.1.2.1
18 | MKTIIALSYILCLVFA
19 | >B.1.2.1.1
20 | MKTIIALSYILCLVFA
21 | >B.2
22 | MKTIIALSYILCLVFA
23 | >B.3
24 | MKTIIALSYILCLVFA
25 | >B.4
26 | MKTIIALSYILCLVFA
27 | >C
28 | MKTIIALSYILCLVFA
29 | >C.1
30 | MKTIIALSCILCLVFA
31 | >D
32 | MKTIIALSYILCLVFT
33 | >E
34 | MKTIIALSYILCLVFA
35 | >E.1
36 | MKTIIALSYILCLVFA
37 | >E.2
38 | MKTIIALSYILCLVFA
39 | >F
40 | MKTIIALSYILCLVFA
41 | >F.1
42 | MKTIIALSYILCLVFA
43 | >F.1.1
44 | MKTIIALSYTLCLVFA
45 | >G
46 | MKTIIALSYILCLVFA
47 | >G.1
48 | MKTIIALSNILCLVFA
49 | >G.1.1
50 | MKTIIALSNILCLVFA
51 | >G.1.1.1
52 | MKTIIALSNILCLVFA
53 | >G.1.1.2
54 | MKTIIALSNILCLVFA
55 | >G.1.2
56 | MKTIIALSNILCLVFA
57 | >G.1.3
58 | MKTIIALSNILCLVFA
59 | >G.1.3.1
60 | MKTIIALSNILCLVFA
61 | >G.1.3.2
62 | MKTIIALSNILCLVFA
63 | >G.2
64 | MKTIIALSNILCLVFA
65 | >G.2.1
66 | MKTIIALSNILCLVFA
67 | >G.2.2
68 | MKTIIALSNILCLVFA
69 | >G.3
70 | MKTIIALSYILCLVFA
71 | >G.4
72 | MKTIIALSYILCLVFA
73 | >J
74 | MKAIIALSNILCLVFA
75 | >J.1
76 | MKAIIALSNILCLVFA
77 | >J.1.1
78 | MKAIIALSNILCLVFA
79 | >J.2
80 | MKAIIALSNILCLVFA
81 | >J.2.1
82 | MKAIIALSNILCLVFA
83 | >J.2.2
84 | MKAIIALSNILCLVFA
85 | >J.2.3
86 | MKAIIALSNILCLVFA
87 | >J.2.4
88 | MKAIIALSNILCLVFA
89 | >J.2.5
90 | MKAIIALSNILCLVFA
91 | >J.3
92 | MKAIIALSNILCLVFA
93 | >J.4
94 | MKTIIALSNILCLVFA
95 | >K
96 | MKAIIALSNILCLVFA
97 | 


--------------------------------------------------------------------------------
/nextclade/dataset_config/vic/pa/pathogen.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "shortcuts": [
 3 |         "flu_vic_ha",
 4 |         "nextstrain/flu/vic/pa",
 5 |         "nextstrain/flu/vic/pa/brisbane-60-2008"
 6 |       ],
 7 |   "phenotypeData":[
 8 |     {
 9 |       "name": "PAI",
10 |       "nameFriendly": "PAI",
11 |       "description": "This column displays a score associated with reduced susceptibility to polymerase inhibitors. The score is a weighted sum of known substitutions (x1) or major substitutions (x2), while substitutions at positions that confer resistance but are to amino acids not previously described to confer resistance count x0.5.",
12 |       "cds": "PA",
13 |       "aaRange": {
14 |         "begin":0,
15 |         "end": 460
16 |       },
17 |       "data": [
18 |         {
19 |           "name": "PAI_markers",
20 |           "weight": 1,
21 |           "locations": {
22 |             "22": {"K":1, "default":0.5},
23 |             "33": {"I":1, "default":0.5},
24 |             "35": {"V":1, "default":0.5},
25 |             "37": {"F":1, "M":1, "T":2, "V":1, "default":0.5},
26 |             "119": {"D":1, "default":0.5},
27 |             "198": {"R":1, "default":0.5}
28 |           }
29 |         }
30 |       ]
31 |     }
32 |   ],
33 |   "mutLabels": {
34 |     "aaMutLabelMap": {
35 |       "PA:23K":["Baloxavir"],
36 |       "PA:34I":["Baloxavir"],
37 |       "PA:36V":["Baloxavir"],
38 |       "PA:38F":["Baloxavir"],
39 |       "PA:38M":["Baloxavir"],
40 |       "PA:38T":["Baloxavir"],
41 |       "PA:38V":["Baloxavir"],
42 |       "PA:120D":["Baloxavir"],
43 |       "PA:199R":["Baloxavir"]
44 |     }
45 |   }
46 | }
47 | 


--------------------------------------------------------------------------------
/nextclade/dataset_config/vic/na/CY073894/reference.fasta:
--------------------------------------------------------------------------------
 1 | >CY073894.1 Influenza B virus (B/Brisbane/60/2008) segment 6 sequence
 2 | ATGCTACCTTCAACTATACAAACGTTAACCCTATTTCTCACATCAGGGGGAGTATTATTA
 3 | TCACTATATGTGTCAGCTTCATTATCATACTTACTATATTCGGATATATTGCTAAAATTC
 4 | TCACCAACAGAAATAACTGCACCAACAATGCCATTGGATTGTGCAAACGCATCAAATGTT
 5 | CAGGCTGTGAACCGTTCTGCAACAAAAGGGGTGACACTTCTTCTCCCAGAACCGGAGTGG
 6 | ACATACCCGCGTTTATCTTGCCCGGGCTCAACCTTTCAGAAAGCACTCCTAATTAGCCCT
 7 | CATAGATTCGGAGAAACCAAAGGAAACTCAGCTCCCTTGATAATAAGGGAACCTTTTATT
 8 | GCTTGTGGACCAAATGAATGCAAACACTTTGCTCTAACCCATTATGCAGCCCAACCAGGG
 9 | GGATACTACAATGGAACAAGAGGAGACAGAAACAAGCTGAGGCATCTAATTTCAGTCAAA
10 | TTGGGCAAAATCCCAACAGTAGAAAACTCCATTTTCCACATGGCAGCATGGAGCGGGTCC
11 | GCGTGCCATGATGGTAAGGAATGGACATATATCGGAGTTGATGGCCCTGACAATAATGCA
12 | TTGCTCAAAGTAAAATATGGAGAAGCATATACTGACACATACCATTCCTATGCAAACAAA
13 | ATCCTAAGAACACAAGAAAGTGCCTGCAATTGCATCGGGGGAAATTGTTATCTTATGATA
14 | ACTGATGGCTCAGCTTCAGGTGTTAGTGAATGCAGATTTCTTAAGATTCGAGAGGGCCGA
15 | ATAATAAAAGAAATATTTCCAACAGGAAGAGTAAAACACACTGAGGAATGCACATGCGGA
16 | TTTGCCAGCAATAAAACCATAGAATGTGCCTGTAGAGATAACAGTTACACAGCAAAAAGA
17 | CCTTTTGTCAAATTAAACGTGGAGACTGATACAGCAGAAATAAGATTGATGTGCACAGAT
18 | ACTTATTTGGACACCCCCAGACCAAACGATGGAAGCATAACAGGCCCTTGTGAATCTAAT
19 | GGGGACAAAGGGAGTGGAGGCATCAAGGGAGGATTTGTTCATCAAAGAATGGAATCCAAG
20 | ATTGGAAGGTGGTACTCTCGAACGATGTCTAAAACTGAAAGGATGGGGATGGGACTGTAT
21 | GTCAAGTATGATGGAGACCCATGGGCTGACAGTGATGCCCTAGCTTTTAGTGGAGTAATG
22 | GTTTCAATGAAAGAACCTGGTTGGTACTCCTTTGGCTTCGAAATAAAAGATAAGAAATGC
23 | GATGTCCCCTGTATTGGGATAGAGATGGTACATGATGGTGGAAAAGAGACTTGGCACTCA
24 | GCAGCAACAGCCATTTACTGTTTAATGGGCTCAGGACAGCTGCTGTGGGACACTGTCACA
25 | GGTGTTGACATGGCTCTGTAA
26 | 


--------------------------------------------------------------------------------
/config/h3n2/na/reference.fasta:
--------------------------------------------------------------------------------
 1 | >CY114383 Organism:Influenza A virus|Strain Name:A/Wisconsin/67/2005|Segment:6|Subtype:H3N2
 2 | AAGATGAATCCAAATCAAAAGATAATAACGATTGGCTCTGTTTCTCTCACCATTTCCACAATATGCTTCT
 3 | TCATGCAAATTGCCATCTTGATAACTACTGTAACATTGCATTTCAAGCAATATGAATTCAACTCCCCCCC
 4 | AAACAACCAAGTGATGCTGTGTGAACCAACAATAATAGAAAGAAACATAACAGAGATAGTGTATCTGACC
 5 | AACACCACCATAGAGAAGGAAATATGCCCCAAACTAGCAGAATACAGAAATTGGTCAAAGCCGCAATGTA
 6 | ACATTACAGGATTTGCACCTTTTTCTAAGGACAATTCGATTAGGCTTTCCGCTGGTGGGGACATCTGGGT
 7 | GACAAGAGAACCTTATGTGTCATGCGATCCTGACAAGTGTTATCAATTTGCCCTTGGGCAGGGAACAACA
 8 | CTAAACAACGTGCATTCAAATGACACAGTACATGATAGGACCCCTTATCGGACCCTATTGATGAATGAGT
 9 | TAGGTGTTCCATTTCATCTGGGGACCAAGCAAGTGTGCATAGCATGGTCCAGCTCAAGTTGTCACGATGG
10 | AAAAGCATGGCTGCATGTTTGTGTAACGGGGGATGATAAAAATGCAACTGCTAGCTTCATTTACAATGGG
11 | AGGCTTGTAGATAGTATTGTTTCATGGTCCAAAGAAATCCTCAGGACCCAGGAGTCAGAATGCGTTTGTA
12 | TCAATGGAACTTGTACAGTAGTAATGACTGATGGGAGTGCTTCAGGAAAAGCTGATACTAAAATACTATT
13 | CATTGAGGAGGGGAAAATCGTTCATACTAGCACATTGTCAGGAAGTGCTCAGCATGTCGAGGAGTGCTCC
14 | TGCTATCCTCGATATCTTGGTGTCAGATGTGTCTGCAGAGACAACTGGAAAGGCTCCAATAGGCCCATAG
15 | TAGATATAAACATAAAGGATTATAGCATTGTTTCCAGTTATGTGTGCTCAGGACTTGTTGGAGACACACC
16 | CAGAAAAAACGACAGCTCCAGCAGTAGCCATTGCTTGGATCCTAACAATGAAGAAGGTGGTCATGGAGTG
17 | AAAGGCTGGGCCTTTGATGATGGAAATGACGTGTGGATGGGAAGAACGATCAGCGAGAAGTTACGCTCAG
18 | GATATGAAACCTTCAAAGTCATTGAAGGCTGGTCCAACCCTAATTCCAAATTGCAGATAAATAGGCAAGT
19 | CATAGTTGACAGAGGTAATAGGTCCGGTTATTCTGGTATTTTCTCTGTTGAAGGCAAAAGCTGCATCAAT
20 | CGGTGCTTTTATGTGGAGTTGATAAGGGGAAGAAAAGAGGAAACTGAAGTCTTGTGGACCTCAAACAGTA
21 | TTGTTGTGTTTTGTGGCACCTCAGGTACATATGGAACAGGCTCATGGCCTGATGGGGCGGACATCAATCT
22 | CATGCCTATATAAGCTTTCGCAATTTTAGAAAAAAC


--------------------------------------------------------------------------------
/.github/workflows/run-private-nextflu-builds.yaml:
--------------------------------------------------------------------------------
 1 | name: Run the private Nextflu builds
 2 | 
 3 | on:
 4 |   schedule:
 5 |     # Scheduled to run at 5pm UTC (9am PST/10am PDT) on the first Friday of the month
 6 |     # cron hack based on <https://blog.healthchecks.io/2022/09/schedule-cron-job-the-funky-way/>
 7 |     - cron: '0 17 */100,1-7 * FRI'
 8 | 
 9 |   workflow_dispatch:
10 |     inputs:
11 |       dockerImage:
12 |         description: "Specific container image to use for build (will override the default of `nextstrain build`)"
13 |         required: false
14 |         type: string
15 | 
16 | jobs:
17 |   run-build:
18 |     permissions:
19 |       id-token: write
20 |     uses: nextstrain/.github/.github/workflows/pathogen-repo-build.yaml@master
21 |     secrets: inherit
22 |     with:
23 |       runtime: aws-batch
24 |       env: |
25 |         NEXTSTRAIN_DOCKER_IMAGE: ${{ inputs.dockerImage }}
26 |       run: |
27 |         nextstrain build \
28 |           --detach \
29 |           --cpus 36 \
30 |           --memory 72gib \
31 |           . \
32 |           all_who \
33 |           -p \
34 |           --configfile profiles/private.nextflu.org.yaml
35 | 
36 |   deploy-private-nextflu:
37 |     needs: [run-build]
38 |     runs-on: ubuntu-latest
39 |     steps:
40 |       - name: Trigger deploy-private-nextflu
41 |         run: |
42 |           gh workflow run \
43 |             deploy-private-nextflu.yaml \
44 |             --repo nextstrain/seasonal-flu \
45 |             -f aws_batch_job_id=${{ needs.run-build.outputs.aws-batch-job-id }} \
46 |             -f deploy_to_staging=false
47 |         env:
48 |           GITHUB_TOKEN: ${{ github.token }}
49 | 


--------------------------------------------------------------------------------
/config/vic/outliers.txt:
--------------------------------------------------------------------------------
 1 | A/Malaysia/438/2016
 2 | B/Alagoas/4386/2023
 3 | B/Auckland/1/2008
 4 | B/Bangkok/SI17/2012
 5 | B/Bangkok/SI58/2012
 6 | B/Bari/53/2023
 7 | B/Brisbane/14/2016
 8 | B/Brisbine/33/2008
 9 | B/California/87/2017-egg
10 | B/Cambodia/26/2011
11 | B/Cambodia/30/2011
12 | B/Cambodia/62/2011
13 | B/Cambodia/89/2011
14 | B/Cambodia/V1005378/2011
15 | B/Darwin/14/2011
16 | B/Guangdong-Jinping/8139/2022
17 | B/Guizhou-Bijiang/1344/2022
18 | B/Hunan-Wuling/11182/2021
19 | B/Jiangsu-Suzhou/7-1123A11/2021
20 | B/Jiangsu-Suzhou/7-1123A14/2021
21 | B/Kol/2024/2008
22 | B/Kolkata/1373/2008
23 | B/Kolkata/2024/2008
24 | B/Kolkata/372/2010
25 | B/Krabi/FS002/2022
26 | B/Lisbon/niSU182_17-18/2018
27 | B/Malaysia/RP0995/2020
28 | B/Moscow/137-90V/2021
29 | B/NakhonPhanom/P3759/2022
30 | B/Netherlands/76/2014
31 | B/Netherlands/883/2016
32 | B/NewCaledonia/119/2015
33 | B/Novosibirsk/RII-26903S/2020
34 | B/Philippines/28/2019
35 | B/RheinlandPfalz/43/2016
36 | B/Shanghai-Qingpu/1539/2021
37 | B/SouthAustralia/81/2012
38 | B/Stockholm/7/2011
39 | B/SuratThani/P4519/2022
40 | B/Sydney/6/2016
41 | B/Thailand/CU-B11637/2015
42 | B/Togo/LNG/419/2013
43 | B/Virginia/20/2018
44 | B/Xinjiang-Tianshan/31/2021
45 | B/Zhejiang-Lanxi/320/2019
46 | B/Zhejiang-Linhai/11097/2019
47 | B/Zhejiang-Wuxin/1211/2021
48 | B/Zhejiang-Yiwu/1101/2021
49 | B/Zhejiang-Yiwu/1104/2021
50 | B/Zhejiang-Yiwu/1168/2020
51 | B/Zhejiang-Yiwu/1226/2021
52 | B/Zhejiang-Yongkang/1274/2021
53 | B/Ukraine/2194/2024 
54 | B/Ukraine/2308/2024 
55 | B/Ukraine/2309/2024 
56 | B/Ukraine/2270/2024 
57 | B/Ukraine/2276/2024 
58 | B/Ukraine/2417/2024 
59 | B/Ukraine/2318/2024 
60 | B/Ukraine/2319/2024 
61 | 


--------------------------------------------------------------------------------
/scripts/import_tip_clades.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Take clades.json file that lists:
 3 | {
 4 |  "nodes": {
 5 |   "A/AbuDhabi/16/2017": {
 6 |    "clade_membership": "A1b/135N"
 7 |   },
 8 | ...
 9 | and creates a new file that has internal nodes 'clade_membership' set to 'unassigned'.
10 | """
11 | 
12 | import argparse
13 | import Bio
14 | import Bio.Phylo
15 | import json
16 | 
17 | 
18 | if __name__ == '__main__':
19 |     parser = argparse.ArgumentParser(
20 |         description="Import clade membership",
21 |         formatter_class=argparse.ArgumentDefaultsHelpFormatter
22 |     )
23 |     parser.add_argument("--tree", required=True, help="Newick tree originally used to assign tips to clades")
24 |     parser.add_argument("--clades", required=True, help="JSON file with clade memberships")
25 |     parser.add_argument("--output", required=True, help="JSON file with scrubbed clade memberships")
26 |     args = parser.parse_args()
27 | 
28 |     tree = Bio.Phylo.read(args.tree, 'newick')
29 | 
30 |     with open(args.clades) as infile:
31 |         json_data = json.load(infile)
32 | 
33 |     scrubbed_json_data = {'nodes':{}}
34 | 
35 |     # Copy clade membership for tips to a new JSON and omit internal nodes from
36 |     # the original tree that was used to assign tips to clades.
37 |     for node in tree.find_clades():
38 |         if node.is_terminal():
39 |             clade_membership = json_data['nodes'][node.name]['clade_membership']
40 |             scrubbed_json_data['nodes'][node.name] = {'clade_membership': clade_membership}
41 | 
42 |     with open(args.output, 'w') as outfile:
43 |         json.dump(scrubbed_json_data, outfile, indent=1, sort_keys=True)
44 | 


--------------------------------------------------------------------------------
/config/h1n1pdm/na/reference.fasta:
--------------------------------------------------------------------------------
 1 | >CY121682.1 Influenza A virus (A/California/07/2009(H1N1)) neuraminidase (NA) gene, complete cds
 2 | AGTTTAAAATGAATCCAAACCAAAAGATAATAACCATTGGTTCGGTCTGTATGACAATTG
 3 | GAATGGCTAACTTAATATTACAAATTGGAAACATAATCTCAATATGGATTAGCCACTCAA
 4 | TTCAACTTGGGAATCAAAATCAGATTGAAACATGCAATCAAAGCGTCATTACTTATGAAA
 5 | ACAACACTTGGGTAAATCAGACATATGTTAACATCAGCAACACCAACTTTGCTGCTGGAC
 6 | AGTCAGTGGTTTCCGTGAAATTAGCGGGCAATTCCTCTCTCTGCCCTGTTAGTGGATGGG
 7 | CTATATACAGTAAAGACAACAGTGTAAGAATCGGTTCCAAGGGGGATGTGTTTGTCATAA
 8 | GGGAACCATTCATATCATGCTCCCCCTTGGAATGCAGAACCTTCTTCTTGACTCAAGGGG
 9 | CCTTGCTAAATGACAAACATTCCAATGGAACCATTAAAGACAGGAGCCCATATCGAACCC
10 | TAATGAGCTGTCCTATTGGTGAAGTTCCCTCTCCATACAACTCAAGATTTGAGTCAGTCG
11 | CTTGGTCAGCAAGTGCTTGTCATGATGGCATCAATTGGCTAACAATTGGAATTTCTGGCC
12 | CAGACAATGGGGCAGTGGCTGTGTTAAAGTACAACGGCATAATAACAGACACTATCAAGA
13 | GTTGGAGAAACAATATATTGAGAACACAAGAGTCTGAATGTGCATGTGTAAATGGTTCTT
14 | GCTTTACTGTAATGACCGATGGACCAAGTAATGGACAGGCCTCATACAAGATCTTCAGAA
15 | TAGAAAAGGGAAAGATAGTCAAATCAGTCGAAATGAATGCCCCTAATTATCACTATGAGG
16 | AATGCTCCTGTTATCCTGATTCTAGTGAAATCACATGTGTGTGCAGGGATAACTGGCATG
17 | GCTCGAATCGACCGTGGGTGTCTTTCAACCAGAATCTGGAATATCAGATAGGATACATAT
18 | GCAGTGGGATTTTCGGAGACAATCCACGCCCTAATGATAAGACAGGCAGTTGTGGTCCAG
19 | TATCGTCTAATGGAGCAAATGGAGTAAAAGGGTTTTCATTCAAATACGGCAATGGTGTTT
20 | GGATAGGGAGAACTAAAAGCATTAGTTCAAGAAACGGTTTTGAGATGATTTGGGATCCGA
21 | ACGGATGGACTGGGACAGACAATAACTTCTCAATAAAGCAAGATATCGTAGGAATAAATG
22 | AGTGGTCAGGATATAGCGGGAGTTTTGTTCAGCATCCAGAACTAACAGGGCTGGATTGTA
23 | TAAGACCTTGCTTCTGGGTTGAACTAATCAGAGGGCGACCCAAAGAGAACACAATCTGGA
24 | CTAGCGGGAGCAGCATATCCTTTTGTGGTGTAAACAGTGACACTGTGGGTTGGTCTTGGC
25 | CAGACGGTGCTGAGTTGCCATTTACCATTGACAAGTAATTTGTTCAAAAAAC
26 | 


--------------------------------------------------------------------------------
/nextclade/dataset_config/h1n1pdm/na/MW626056/reference.fasta:
--------------------------------------------------------------------------------
 1 | >MW626056.1 Influenza A virus (A/Wisconsin/588/2019(H1N1)) segment 6 neuraminidase (NA) gene, complete cds
 2 | AGTTTAAAATGAATCCAAACCAAAAGATAATAACCATTGGTTCTATCTGTATGACAATTG
 3 | GAACGGCTAACTTAATATTACAAATTGGAAACATAATCTCAATATGGGTTAGCCACTCAA
 4 | TTCAAATTGGAAATCAAAGCCAGATTGAAACATGCAATAAAAGCGTCATTACTTATGAAA
 5 | ACAACACTTGGGTAAATCAGACATTTGTTAACATCAGCAACACTAACTCTGCTGCTAGAC
 6 | AGTCAGTGGCTTCCGTGAAATTAGCGGGCAATTCCTCTCTCTGCCCTGTTAGTGGATGGG
 7 | CTATATACAGTAAAGACAACAGTGTAAGAATCGGTTCCAAGGGGGATGTGTTTGTCATAA
 8 | GGGAACCATTCATATCATGCTCTCCCTTGGAATGCAGAACCTTCTTCTTGACTCAAGGGG
 9 | CTTTGCTAAATGACAAACATTCCAATGGAACCATTAAAGACAGAAGCCCATATCGAACCC
10 | TAATGAGCTGTCCTATTGGTGAAGTTCCCTCTCCATACAACTCAAGATTTGAGTCAGTCG
11 | CTTGGTCAGCAAGTGCTTGTCATGATGGCACCAATTGGCTAACAATTGGAATTTCTGGCC
12 | CAGACAGTGGGGCAGTGGCTGTGTTAAAATACAATGGCATAATAACAGACACTATCAAGA
13 | GTTGGAGGAACAAGATATTGAGAACACAAGAGTCTGAATGTGCATGTGTAAATGGTTCTT
14 | GCTTTACCATAATGACCGATGGACCAAGTGATGGACAGGCCTCATACAAAATCTTCAGAA
15 | TAGAAAAGGGAAAGATAATCAAATCAGTCGAAATGAAAGCCCCTAATTATCACTATGAAG
16 | AATGCTCCTGTTACCCTGATTCTAGTGAAATCACATGTGTGTGCAGGGATAACTGGCATG
17 | GCTCGAATCGACCGTGGGTGTCTTTCAACCAGAATCTGGAATATCAGATGGGATACATAT
18 | GCAGTGGGGTTTTCGGAGACAATCCACGCCCTAATGATAAGACAGGCAGTTGTGGTCCAG
19 | TATCGTCTAATGGAGCAAATGGGGTAAAAGGATTTTCATTCAAATACGGCAATGGTGTTT
20 | GGATAGGGAGAACTAAGAGCATTAGTTCAAGAAAAGGTTTTGAGATGATTTGGGATCCGA
21 | ATGGATGGACTGGGACTGACAATAAATTCTCAAAAAAGCAAGATATCGTAGGAATAAATG
22 | AGTGGTCAGGGTATAGCGGGAGTTTTGTTCAGCATCCAGAACTAACAGGGCTGAATTGTA
23 | TAAGACCTTGCTTCTGGGTTGAACTAATAAGAGGACGACCCGAAGAGAACACAATCTGGA
24 | CTAGCGGGAGCAGCATATCCTTTTGTGGTGTAGACAGTGACATTGTGGGTTGGTCTTGGC
25 | CAGACGGTGCTGAGTTGCCATTTACCATTGACAAGTAATTTGTTCAAAAAACT
26 | 


--------------------------------------------------------------------------------
/scripts/glyc.py:
--------------------------------------------------------------------------------
 1 | import argparse, json
 2 | from random import sample
 3 | import numpy as np
 4 | from Bio import Phylo, AlignIO
 5 | import re
 6 | 
 7 | def glycosylation_count(total_aa_seq, glyc_mask=None):
 8 |     if glyc_mask is None:
 9 |         glyc_mask = np.ones(len(total_aa_seq), dtype=bool)
10 | 
11 |     # TODO: need to restrict to surface residues.
12 |     total_aa_seq_masked = "".join([aa if mask else 'X'
13 |                                    for (mask, aa) in zip(glyc_mask, total_aa_seq)])
14 | 
15 |     return len(re.findall('N[^P][ST][^P]', total_aa_seq_masked))
16 | 
17 | 
18 | if __name__ == '__main__':
19 |     parser = argparse.ArgumentParser(
20 |         description="",
21 |         formatter_class=argparse.ArgumentDefaultsHelpFormatter
22 |     )
23 | 
24 |     parser.add_argument('--tree', type=str, required=True,
25 |                         help="newick file with the tree")
26 |     parser.add_argument('--alignment',  help="fasta file with ancestral translations")
27 |     parser.add_argument('--output', type=str, help="names of files to write selected strains to, one for each gene")
28 | 
29 |     args = parser.parse_args()
30 | 
31 |     T = Phylo.read(args.tree, 'newick')
32 | 
33 |     glyc_json = {}
34 |     aln = {s.name:str(s.seq) for s in AlignIO.read(args.alignment, 'fasta')}
35 |     root_seq = aln[T.root.name]
36 |     root_glyc = glycosylation_count(root_seq)
37 |     for n in T.find_clades(order='preorder'):
38 |         glyc_json[n.name] = {'glyc':glycosylation_count(aln[n.name]) - root_glyc}
39 | 
40 |     with open(args.output, 'wt') as fh:
41 |         json.dump({'nodes':glyc_json, 'comment':"glycosylation motif count in HA1/NA relative to root sequence."}, fh)
42 | 
43 | 


--------------------------------------------------------------------------------
/profiles/nextflu-private-forecasts/rename.smk:
--------------------------------------------------------------------------------
 1 | 
 2 | rule all_public:
 3 |     input:
 4 |         [
 5 |             "auspice_renamed/" + build.get("auspice_name", f"{build_name}_{{segment}}").format(segment=segment) + suffix + ".json"
 6 |             for build_name, build in config["builds"].items()
 7 |             for segment in config["segments"]
 8 |             for suffix in ["", "_tip-frequencies", "_measurements"]
 9 |         ],
10 | 
11 | def _get_file_by_auspice_name(wildcards):
12 |     for build_name, build_params in config["builds"].items():
13 |         for segment in config["segments"]:
14 |             if build_params.get("auspice_name", f"{build_name}_{{segment}}").format(segment=segment) == wildcards.auspice_name:
15 |                 return f"auspice/{build_name}_{segment}.json"
16 | 
17 |     return ""
18 | 
19 | rule rename_auspice_main:
20 |     input:
21 |         _get_file_by_auspice_name,
22 |     output:
23 |         "auspice_renamed/{auspice_name}.json",
24 |     shell:
25 |         """
26 |         ln {input} {output}
27 |         """
28 | 
29 | rule rename_auspice_tip_frequencies:
30 |     input:
31 |         lambda wildcards: _get_file_by_auspice_name(wildcards).replace(".json", "_tip-frequencies.json"),
32 |     output:
33 |         "auspice_renamed/{auspice_name}_tip-frequencies.json",
34 |     shell:
35 |         """
36 |         ln {input} {output}
37 |         """
38 | 
39 | rule rename_measurements:
40 |     input:
41 |         lambda wildcards: _get_file_by_auspice_name(wildcards).replace(".json", "_measurements.json"),
42 |     output:
43 |         "auspice_renamed/{auspice_name}_measurements.json",
44 |     shell:
45 |         """
46 |         ln {input} {output}
47 |         """
48 | 


--------------------------------------------------------------------------------
/config/vic/na/reference.fasta:
--------------------------------------------------------------------------------
 1 | >CY018815.1 Influenza B virus (B/Hong Kong/02/1993) segment 6, complete sequence
 2 | AAACTGAAGCAAATAAGCCAAAAATGAACAATGCTACCTTCAACTATACAAACGTTAACC
 3 | CTATTTCTCACATCAGGGGGAGTGTTATTATCACTATATGTGTCAGCCTTACTGTCATAC
 4 | TCACTGTATTCGGATATATTGCTAAAATTTTCACCAACAAAAACAATCGCACCAACAATG
 5 | TCGCTGGACTGCGCGAACGCATCAAATGTTCAGGCTGTGAACCATTCTGCAACAAAAGGG
 6 | ATGACACTTCTTCTCCCAGAACCGGAGTGGACATACCCTCGTTTATCTTGCCAGGGCTCA
 7 | ACTTTCCAGAAAGCACTCCTAATTAGCCCTCATAGATTCGGAGAAACCAAAGGAAACTCA
 8 | GCTCCCTTGATAATAAGGGAACCCTTTATTGCTTGTGGACCAAAGGAGTGCAAACACTTT
 9 | GCTCTAACCCATTATGCAGCTCAACCAGGGGGATACTACAATGGAACAAGAGAGGACAGA
10 | AACAAGCTGAGGCATTTGATTTCAGTCAGCTTAGGCAAAATCCCAACTGTAGAAAACTCC
11 | ATTTTCCACATGGCAGCTTGGAGTGGATCCGCATGCCATGATGGTAGAGAATGGACATAT
12 | ATCGGAGTTGATGGCCCTGACAGTAATGCATTGATCAAAATAAAATATGGAGAAGCATAC
13 | ACTGACACATACCATTCCTATGCAAACAACATCCTAAGAACACAAGAAAGTGCCTGCAAT
14 | TGCATCGGGGGAGATTGTTATCTTATGATAACCGATGGCTCAGCTTCAGGAATTAGTAAA
15 | TGCAGATTTCTTAAGATTCGAGAGGGTCGAATAATAAAAGAAATATTTCCAACAGGAAGA
16 | GTAGATCATACTGAAGAATGCACATGCGGATTTGCCAGCAATAAAACCATAGAATGTGCC
17 | TGTAGAGATAACAGTTACACAGCAAAAAGACCCTTTGTCAAATTAAATGTGGAGACTGAT
18 | ACAGCTGAAATAAGATTGATGTGCACAGAGACTTATTTGGACACCCCCAGACCAGATGAT
19 | GGAAGCATAACAGGGCCTTGCGAATCCAATGGGGACAAAGGGCGTGGAGGTATCAAGGGA
20 | GGATTTGTCCATCAAAGAATGGCATCCAAGATTGGAAGATGGTACTCCCGAACGATGTCT
21 | AAAACTGAAAGACTGGGGATGGAACTGTATGTCAAGTATGATGGAGACCCATGGACTGAC
22 | AGTGACGCCCTTGCTCCTAGTGGAGTAATGGTCTCAGCGGAAGAACCTGGTTGGTATTCT
23 | TTCGGCTTCGAAATAAAAGATAAGAAATGTGATGTCCCCTGTATTGGGATAGAGATGGTA
24 | CACGATGGTGGAAAAAAGACTTGGCACTCAGCAGCAACAGCCATTTACTGTTTAATGGGC
25 | TCAGGACAGTTGCTATGGGACACTGTCACAGGCGTTGATATGGCTCTGTAATGGAGGAAT
26 | GGTTGAATCTGTTCTAAACCCTTTACTCCTATTTTGTTTGAACAATTGTCCTTACTGGAC
27 | TTAATTGTTTCTGAAA
28 | 


--------------------------------------------------------------------------------
/config/yam/na/reference.fasta:
--------------------------------------------------------------------------------
 1 | >CY019709.1 Influenza B virus (B/Singapore/11/1994) segment 6, complete sequence
 2 | AAACTGAGGCAAATAGGCCAAAAATGAACAATGCTACCTTCAACTATACAAACGTTAACC
 3 | CTATTTCTCACATCAGGGGGAGTGCTATTATCACTATATGTGTCAGCTTCACTGTCATAC
 4 | TTACTGTATTCGGGTATATTGCTAAAATTTTCACCAACAGAAATAACTGCACCAACAATG
 5 | CCATTGGATTGTGCAAACGCATCAAATGTTCAGGCTGTGAACCGTTCTGCAACAAAAGGG
 6 | GTGACACTTCTTCTCCCAGAACCGGAGTGGACATACCCTCGTTTATCTTGCCCGGGCTCA
 7 | ACCTTTCAGAAAGCACTCCTAATTAGCCCTCATAGATTCGGAGAAACCAGAGGAAACTCA
 8 | GCTCCCTTGATAATAAGGGAACCTTTTATTGCTTGTGGACCAAAGGAATGCAAACACTTT
 9 | GCTCTAACCCATTATGCAGCTCAACCAGGGGGATACTACAATGGAACAAGAGAAGACAGA
10 | AACAAGCTGAGGCATCTAATTTCAGTCAAATTGGGCAAAATCCCAACAGTAGAAAACTCC
11 | ATTTTCCACATGGCAGCTTGGAGCGGGTCCGCATGCCATGATGGTAGAGAATGGACATAT
12 | ATCGGAGTTGATGGCCCTGACAGTAATGCATTGCTCAAAATAAAATATGGAGAAGCATAT
13 | ACTGACACATACCATTCCTATGCAAACAACATCCTAAGAACACAAGAAAGTGCCTGCAAT
14 | TGCATCGGGGGAGATTGTTATCTTATGATAACTGATGGCTCAGCTTCAGGGATTAGTAAA
15 | TGCAGATTTCTTAAGATTCGAGAGGGCCGAATAATAAAAGAAATATTCCCAACAGGAAGA
16 | GTAGAACATACTGAAGAATGCACATGCGGATTTGCCAGCAATAAAACCATAGAATGTGCC
17 | TGTAGAGATAACAGTTACACAGCAAAAAGACCCTTTGTCAAATTAAATGTGGAGACTGAT
18 | ACAGCGGAAATAAGATTGATGTGCACAGAGACTTATTTGGACACCCCCAGACCAGATGAT
19 | GGAAGCATAACAGGGCCTTGCGAATCTAATGGGGATAAAGGAAGTGGAGGCATCAAGGGA
20 | GGATTTGTTCATCAAAGAATGGCATCCAAGATTGGAAGGTGGTACTCTCGAACGATGTCT
21 | AAAACTAAAAGGATGGGGATGGGACTGTATGTCAAGTATGATGGAGACCCATGGACTGAC
22 | AGTGACGCCCTTGCTCTTAGTGGAGTAATGGTTTCAATGGAAGAACCTGGTTGGTATTCC
23 | TTTGGCTTCGAAATAAAAGATAAGAAATGTGATGTCCCCTGTATTGGGATAGAGATGGTA
24 | CATGATGGTGGAAAAAAGACTTGGCACTCAGCAGCAACAGCCATTTACTGTTTAATGGGC
25 | TCAGGACAACTGCTATGGGACACTGTCACAGGTGTTGATATGGCTCTGTAATGTAGGAAT
26 | GGTTGAGTCTGTTCTAAACCCTTTGTTCCTATTTTGTTTGAACAATTGTCCTTACTGAAC
27 | TTAATTGTTTCTGAAA
28 | 


--------------------------------------------------------------------------------
/.github/workflows/run-nextclade.yaml:
--------------------------------------------------------------------------------
 1 | name: Run Nextclade on all sequences
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |     inputs:
 6 |       dockerImage:
 7 |         description: "Specific container image to use for build (will override the default of `nextstrain build`)"
 8 |         required: false
 9 |         type: string
10 |       nextcladeServer:
11 |         description: "URL for a Nextclade server from which Nextclade datasets should be downloaded instead of the default public server"
12 |         required: false
13 |         type: string
14 |       artifact-name:
15 |         description: "Name to use for final artifact uploaded by this action"
16 |         required: false
17 |         type: string
18 |         default: "build-outputs-nextclade"
19 |   workflow_call:
20 |     inputs:
21 |       artifact-name:
22 |         description: "Name to use for final artifact uploaded by this action"
23 |         required: false
24 |         type: string
25 |         default: "build-outputs-nextclade"
26 | 
27 | jobs:
28 |   run-build:
29 |     permissions:
30 |       id-token: write
31 |     uses: nextstrain/.github/.github/workflows/pathogen-repo-build.yaml@master
32 |     secrets: inherit
33 |     with:
34 |       artifact-name: ${{ inputs.artifact-name }}
35 |       runtime: aws-batch
36 |       env: |
37 |         NEXTSTRAIN_DOCKER_IMAGE: ${{ inputs.dockerImage }}
38 |       run: |
39 |         nextstrain build \
40 |           --detach \
41 |           --cpus 36 \
42 |           --memory 72gib \
43 |           . \
44 |           upload_all_nextclade_files \
45 |           -p \
46 |           --configfile profiles/nextclade.yaml \
47 |           --config nextclade_server="${{ inputs.nextcladeServer }}" \
48 |           --set-threads run_nextclade=12
49 | 


--------------------------------------------------------------------------------
/profiles/ci/builds.yaml:
--------------------------------------------------------------------------------
 1 | custom_rules:
 2 |   - profiles/ci/prepare_data.smk
 3 | 
 4 | fasta_fields:
 5 |   - strain
 6 |   - virus
 7 |   - accession
 8 |   - date
 9 |   - date_submitted
10 |   - region
11 |   - country
12 |   - division
13 |   - location
14 |   - passage_category
15 |   - originating_lab
16 |   - submitting_lab
17 |   - age
18 |   - gender
19 | prettify_fields:
20 |   - region
21 |   - country
22 |   - division
23 |   - location
24 |   - originating_lab
25 |   - submitting_lab
26 | 
27 | lat-longs: "config/lat_longs.tsv"
28 | 
29 | segments:
30 |   - ha
31 |   - na
32 | 
33 | tree:
34 |   tree-builder-args: "'-ninit 10 -n 4 -czb'"
35 | 
36 | submission_date_field: date_submitted
37 | recency:
38 |   date_bins: [7, 30, 90]
39 |   date_bin_labels: ["last week", "last month", "last quarter"]
40 |   upper_bin_label: older
41 | 
42 | embedding:
43 |   # Set lower value of perplexity when sample size is small.
44 |   perplexity: 9
45 | 
46 | builds:
47 |     "ci_build":
48 |       lineage: h3n2
49 |       reference: "config/h3n2/{segment}/reference.fasta"
50 |       annotation: "config/h3n2/{segment}/genemap.gff"
51 |       tree_exclude_sites: "config/h3n2/{segment}/exclude-sites.txt"
52 |       clades: "config/h3n2/ha/clades.tsv"
53 |       subclades: "config/h3n2/{segment}/subclades.tsv"
54 |       min_date: "12Y"
55 |       auspice_config: "config/h3n2/ha/auspice_config.json"
56 |       enable_titer_models: true
57 |       enable_lbi: true
58 |       enable_glycosylation: true
59 |       enable_embeddings: true
60 |       titer_collections:
61 |         - name: cdc_cell_fra
62 |           data: "example_data/cdc_h3n2_cell_fra_titers.tsv"
63 |       subsamples:
64 |         global:
65 |             filters: "--exclude-where 'ha!=True' 'na!=True'"
66 | 


--------------------------------------------------------------------------------
/config/distance_maps/h3n2/ha/shih.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "default": 0,
 3 |     "map": {
 4 |         "HA1": {
 5 |             "105": 1,
 6 |             "106": 1,
 7 |             "108": 1,
 8 |             "110": 1,
 9 |             "115": 1,
10 |             "117": 1,
11 |             "119": 1,
12 |             "121": 1,
13 |             "126": 1,
14 |             "127": 1,
15 |             "128": 1,
16 |             "129": 1,
17 |             "130": 1,
18 |             "139": 1,
19 |             "140": 1,
20 |             "141": 1,
21 |             "142": 1,
22 |             "143": 1,
23 |             "144": 1,
24 |             "147": 1,
25 |             "148": 1,
26 |             "156": 1,
27 |             "157": 1,
28 |             "158": 1,
29 |             "170": 1,
30 |             "172": 1,
31 |             "173": 1,
32 |             "174": 1,
33 |             "176": 1,
34 |             "177": 1,
35 |             "180": 1,
36 |             "181": 1,
37 |             "186": 1,
38 |             "191": 1,
39 |             "197": 1,
40 |             "201": 1,
41 |             "210": 1,
42 |             "211": 1,
43 |             "226": 1,
44 |             "228": 1,
45 |             "232": 1,
46 |             "244": 1,
47 |             "246": 1,
48 |             "259": 1,
49 |             "260": 1,
50 |             "262": 1,
51 |             "283": 1,
52 |             "291": 1,
53 |             "34": 1,
54 |             "37": 1,
55 |             "38": 1,
56 |             "41": 1,
57 |             "46": 1,
58 |             "47": 1,
59 |             "51": 1,
60 |             "59": 1,
61 |             "62": 1,
62 |             "65": 1,
63 |             "66": 1,
64 |             "67": 1,
65 |             "76": 1,
66 |             "78": 1
67 |         }
68 |     },
69 |     "name": "shih"
70 | }


--------------------------------------------------------------------------------
/config/nextstrain_clades_h3n2_ha.tsv:
--------------------------------------------------------------------------------
 1 | clade	gene	site	alt
 2 | 3b	HA1	145	S
 3 | 3b	HA1	159	F
 4 | 3b	HA1	160	K
 5 | 3b	HA1	198	S
 6 | 3b	HA1	223	I
 7 | 3b	HA1	312	S
 8 | 3b	HA2	158	N
 9 | 3b	nuc	1195	C
10 | 3b	nuc	1671	G
11 | 3c	HA1	48	I
12 | 3c	HA1	45	N
13 | 3c	nuc	456	T
14 | 3c2	HA2	160	N
15 | 3c2	nuc	693	A
16 | 3c2	nuc	1518	G
17 | 3c3	HA1	128	A
18 | 3c3	HA1	142	G
19 | 3c3	nuc	1296	A
20 | 3c2.A	HA1	159	Y
21 | 3c2.A	nuc	1260	A
22 | 3c2.A	HA1	3	I
23 | 3c2.A	HA1	144	S
24 | 3c2.A	HA1	160	T
25 | 3c3.A	HA1	159	S
26 | 3c3.A	HA1	225	D
27 | 3c3.A	HA1	138	S
28 | 3c3.A	HA1	326	R
29 | 3c3.B	HA1	83	R
30 | 3c3.B	HA1	261	Q
31 | 3c3.B	HA1	62	K
32 | 3c3.B	HA2	18	K
33 | A1	HA1	171	K
34 | A1	HA2	77	V
35 | A1	HA2	155	E
36 | A1a	HA1	171	K
37 | A1a	HA2	77	V
38 | A1a	HA2	150	E
39 | A1a	HA2	155	E
40 | A1a	nuc	81	A
41 | A1a	nuc	114	T
42 | A1a	nuc	1484	A
43 | A1b	HA1	92	R
44 | A1b	HA1	311	Q
45 | A1b	nuc	264	G
46 | A1b	nuc	538	C
47 | A1b/131K	HA1	131	K
48 | A1b/131K	HA1	62	G
49 | A1b/131K	HA1	142	G
50 | A1b/135K	HA1	135	K
51 | A1b/135K	HA1	62	G
52 | A1b/135K	HA1	142	G
53 | A1b/135N	HA1	135	N
54 | A1b/135N	nuc	81	G
55 | A1b/137F	HA1	135	K
56 | A1b/137F	HA1	62	G
57 | A1b/137F	HA1	142	G
58 | A1b/137F	HA1	193	S
59 | A1b/186D	HA1	135	K
60 | A1b/186D	HA1	186	D
61 | A1b/186D	HA1	190	N
62 | A1b/197R	HA1	131	K
63 | A1b/197R	HA1	62	G
64 | A1b/197R	HA1	142	G
65 | A1b/197R	HA1	197	R
66 | A1b/159N	HA1	159	N
67 | A1b/159N	HA1	160	I
68 | A1b/186S	HA1	186	S
69 | A1b/186S	HA1	198	P
70 | A1b/94N	nuc	328	A
71 | A2	HA1	261	Q
72 | A2	HA1	142	K
73 | A2	nuc	1485	T
74 | A2/re	nuc	1689	T
75 | A2/re	nuc	1125	A
76 | A3	HA1	121	K
77 | A3	nuc	1134	G
78 | A3	nuc	1320	T
79 | A4	HA1	192	T
80 | A4	HA1	197	H
81 | A4	HA1	31	S
82 | A4	HA1	53	N
83 | A4	HA1	144	R
84 | 


--------------------------------------------------------------------------------
/nextclade/dataset_config/h1n1pdm/np/reference.fasta:
--------------------------------------------------------------------------------
 1 | >NC_026436.1 Influenza A virus (A/California/07/2009(H1N1)) segment 5 nucleocapsid protein (NP) gene, complete cds
 2 | ATGGCGTCTCAAGGCACCAAACGATCATATGAACAAATGGAGACTGGTGGGGAGCGCCAG
 3 | GATGCCACAGAAATCAGAGCATCTGTCGGAAGAATGATTGGTGGAATCGGGAGATTCTAC
 4 | ATCCAAATGTGCACTGAACTCAAACTCAGTGATTATGATGGACGACTAATCCAGAATAGC
 5 | ATAACAATAGAGAGGATGGTGCTTTCTGCTTTTGATGAGAGAAGAAATAAATACCTAGAA
 6 | GAGCATCCCAGTGCTGGGAAGGACCCTAAGAAAACAGGAGGACCCATATATAGAAGAGTA
 7 | GACGGAAAGTGGATGAGAGAACTCATCCTTTATGACAAAGRAGAAATAAGGAGAGTTTGG
 8 | CGCCTAGCAAACAATGGCGAAGATGCAACAGCAGGTCTTACTCATATCATGATTTGGCAT
 9 | TCCAACCTGAATGATGCCACATATCAGAGAACAAGAGCGCTTGTTCGCACCGGAATGGAT
10 | CCCAGAATGTGCTCTCTAATGCAAGGTTCAACACTTCCCAGAAGGTCTGGTGCCGCAGGT
11 | GCTGCGGTGAAAGGAGTTGGAACAATAGCAATGGAGTTAATCAGAATGATCAAACGTGGA
12 | ATCAATGACCGAAATTTCTGGAGGGGTGAAAATGGACGAAGGACAAGGGTTGCTTATGAA
13 | AGAATGTGCAATATCCTCAAAGGAAAATTTCAAACAGCTGCCCAGAGGGCAATGATGGAT
14 | CAAGTAAGAGAAAGTCGAAACCCAGGAAACGCTGAGATTGAAGACCTCATTTTCCTGGCA
15 | CGGTCAGCACTCATTCTGAGGGGATCAGTTGCACATAAATCCTGCCTGCCTGCTTGTGTG
16 | TATGGGCTTGCAGTAGCAAGTGGGCATGACTTTGAAAGGGAAGGGTACTCACTGGTCGGG
17 | ATAGACCCATTCAAATTACTCCAAAACAGCCAAGTGGTCAGCCTGATGAGACCAAATGAA
18 | AACCCAGCTCACAAGAGTCAATTGGTGTGGATGGCATGCCACTCTGCTGCATTTGAAGAT
19 | TTAAGAGTATCAAGTTTCATAAGAGGAAAGAAAGTGATTCCAAGAGGAAAGCTTTCCACA
20 | AGAGGGGTCCAGATTGCTTCAAATGAGAATGTGGAAACCATGGACTCCAATACCCTGGAA
21 | CTGAGAAGCAGATACTGGGCCATAAGGACCAGGAGTGGAGGAAATACCAATCAACAAAAG
22 | GCATCCGCAGGCCAGATCAGTGTGCAGCCTACATTCTCAGTGCAGCGGAATCTCCCTTTT
23 | GAAAGAGCAACCGTTATGGCAGCATTCAGCGGGAACAATGAAGGACGGACATCCGACATG
24 | CGAACAGAAGTTATAAGAATGATGGAAAGTGCAAAGCCAGAAGATTTGTCCTTCCAGGGG
25 | CGGGGAGTCTTCGAGCTCTCGGACGAAAAGGCAACGAACCCGATCGTGCCTTCCTTTGAC
26 | ATGAGTAATGAAGGGTCTTATTTCTTCGGAGACAATGCAGAGGAGTATGACAGTTGA
27 | 


--------------------------------------------------------------------------------
/nextclade/dataset_config/h1n1pdm/pa/pathogen.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "shortcuts": [
 3 |       "flu_h1n1pdm_pa",
 4 |       "nextstrain/flu/h1n1pdm/pa"
 5 |     ],
 6 | "phenotypeData":[
 7 |     {
 8 |       "name": "PAI",
 9 |       "nameFriendly": "PAI",
10 |       "description": "This column displays a score associated with reduced susceptibility to polymerase inhibitors. The score is a weighted sum of known substitutions (x1) or major substitutions (x2), while substitutions at positions that confer resistance but are to amino acids not previously described to confer resistance count x0.5.",
11 |       "cds": "PA",
12 |       "aaRange": {
13 |         "begin":0,
14 |         "end": 460
15 |       },
16 |       "data": [
17 |         {
18 |           "name": "PAI_markers",
19 |           "weight": 1,
20 |           "locations": {
21 |             "22":  {"G":1, "K":1, "R":1, "default":0.5},
22 |             "33":  {"R":1, "default":0.5},
23 |             "36":  {"T":1, "default":0.5},
24 |             "37":  {"F":2, "L":2, "M":2, "S":2, "T":2, "V":1, "default":0.5},
25 |             "197": {"K":1, "default":0.5},
26 |             "198": {"D":1, "G":1, "default":0.5}
27 |           }
28 |         }
29 |       ]
30 |     }
31 |   ],
32 |   "mutLabels": {
33 |     "aaMutLabelMap": {
34 |       "PA:23G":["Baloxavir"],
35 |       "PA:23K":["Baloxavir"],
36 |       "PA:23R":["Baloxavir"],
37 |       "PA:34R":["Baloxavir"],
38 |       "PA:37T":["Baloxavir"],
39 |       "PA:38F":["Baloxavir"],
40 |       "PA:38L":["Baloxavir"],
41 |       "PA:38M":["Baloxavir"],
42 |       "PA:38S":["Baloxavir"],
43 |       "PA:38T":["Baloxavir"],
44 |       "PA:38V":["Baloxavir"],
45 |       "PA:198K":["Baloxavir"],
46 |       "PA:199D":["Baloxavir"],
47 |       "PA:199G":["Baloxavir"]
48 |     }
49 |   }
50 | }
51 | 


--------------------------------------------------------------------------------
/.github/workflows/ingest.yaml:
--------------------------------------------------------------------------------
 1 | name: Ingest
 2 | 
 3 | defaults:
 4 |   run:
 5 |     # This is the same as GitHub Action's `bash` keyword as of 20 June 2023:
 6 |     # https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsshell
 7 |     #
 8 |     # Completely spelling it out here so that GitHub can't change it out from under us
 9 |     # and we don't have to refer to the docs to know the expected behavior.
10 |     shell: bash --noprofile --norc -eo pipefail {0}
11 | 
12 | on:
13 |   workflow_dispatch:
14 |     inputs:
15 |       dockerImage:
16 |         description: "Specific container image to use for build (will override the default of `nextstrain build`)"
17 |         required: false
18 |         type: string
19 |       runtime:
20 |         description: "Nextstrain runtime"
21 |         type: choice
22 |         default: "docker"
23 |         options:
24 |           - "docker"
25 |           - "aws-batch"
26 | 
27 | jobs:
28 |   ingest:
29 |     permissions:
30 |       id-token: write
31 |     uses: nextstrain/.github/.github/workflows/pathogen-repo-build.yaml@master
32 |     secrets: inherit
33 |     with:
34 |       runtime: ${{ inputs.runtime }}
35 |       env: |
36 |         NEXTSTRAIN_DOCKER_IMAGE: ${{ inputs.dockerImage }}
37 |       run: |
38 |         nextstrain build \
39 |           ingest \
40 |           upload_all \
41 |           --configfile build-configs/nextstrain-automation/config.yaml
42 |       # Explicitly excluding `ingest/data` and `ingest/fauna/data`
43 |       # since this is private data and should not available through the public artifacts
44 |       artifact-paths: |
45 |         !ingest/data/
46 |         !ingest/results/
47 |         ingest/build.log
48 |         ingest/logs/
49 |         ingest/benchmarks/
50 | 


--------------------------------------------------------------------------------
/nextclade/dataset_config/h3n2/np/reference.fasta:
--------------------------------------------------------------------------------
 1 | >NC_007369.1 Influenza A virus (A/New York/392/2004(H3N2)) segment 5, complete sequence
 2 | AGCAAAAGCAGGGTTAATAATCACTCACCGAGTGACATCAAAATCATGGCGTCCCAAGGC
 3 | ACCAAACGGTCTTATGAACAGATGGAAACTGATGGGGATCGCCAGAATGCAACTGAGATT
 4 | AGGGCATCCGTCGGGAAGATGATTGATGGAATTGGGAGATTCTACATCCAAATGTGCACT
 5 | GAACTTAAACTCAGTGATCATGAAGGGCGGTTGATCCAGAACAGCTTGACAATAGAGAAA
 6 | ATGGTGCTCTCTGCTTTTGATGAAAGAAGGAATAAATACCTGGAAGAACACCCCAGCGCG
 7 | GGGAAAGATCCCAAGAAAACTGGGGGGCCCATATACAGGAGAGTAGATGGAAAATGGATG
 8 | AGGGAACTCGTCCTTTATGACAAAGAAGAGATAAGGCGAATCTGGCGCCAAGCCAACAAT
 9 | GGTGAGGATGCGACAGCTGGTCTAACTCACATAATGATCTGGCATTCCAATTTGAATGAT
10 | GCAACATACCAGAGGACAAGAGCTCTTGTTCGAACTGGAATGGATCCCAGAATGTGCTCT
11 | CTGATGCAGGGCTCGACTCTCCCTAGAAGGTCCGGAGCTGCAGGTGCTGCAGTCAAAGGA
12 | ATCGGGACAATGGTGATGGAACTGATCAGAATGGTCAAACGGGGGATCAACGATCGAAAT
13 | TTCTGGAGAGGTGAGAATGGGCGGAAAACAAGAAGTGCTTATGAGAGAATGTGCAACATT
14 | CTTAAAGGAAAATTTCAAACAGCTGCACAAAGAGCAATGGTGGATCAAGTGAGAGAAAGT
15 | CGGAACCCAGGAAATGCTGAGATCGAAGATCTCATATTTTTGGCAAGATCTGCATTGATA
16 | TTGAGAGGGTCAGTTGCTCACAAATCTTGCCTACCTGCCTGTGCGTATGGACCTGCAGTA
17 | TCCAGTGGGTACGACTTCGAAAAAGAGGGATATTCCTTGGTGGGAATAGACCCTTTCAAA
18 | CTACTTCAAAATAGCCAAATATACAGCCTAATCAGACCTAACGAGAATCCAGCACACAAG
19 | AGTCAGCTGGTGTGGATGGCATGCCATTCTGCTGCATTTGAAGATTTAAGATTGTTAAGC
20 | TTCATCAGAGGGACAAAAGTATCTCCGCGGGGGAAACTGTCAACTAGAGGAGTACAAATT
21 | GCTTCAAATGAGAACATGGATAATATGGGATCGAGCACTCTTGAACTGAGAAGCGGGTAC
22 | TGGGCCATAAGGACCAGGAGTGGAGGAAACACTAATCAACAGAGGGCCTCCGCAGGCCAA
23 | ACCAGTGTGCAACCTACGTTTTCTGTACAAAGAAACCTCCCATTTGAAAAGTCAACCATC
24 | ATGGCAGCATTCACTGGAAATACGGAGGGAAGGACTTCAGACATGAGGGCAGAAATCATA
25 | AGAATGATGGAAGGTGCAAAACCAGAAGAAGTGTCATTCCGGGGGAGGGGAGTTTTCGAG
26 | CTCTCAGACGAGAAGGCAACGAACCCGATCGTGCCCTCTTTTGATATGAGTAATGAAGGA
27 | TCTTATTTCTTCGGAGACAATGCAGAAGAGTACGACAATTAAGGAAAAAATACCCTTGTT
28 | TCTACT
29 | 


--------------------------------------------------------------------------------
/config/yam/ha/reference.fasta:
--------------------------------------------------------------------------------
1 | >JN993010.1 Influenza B virus (B/Wisconsin/01/2010) segment 4 hemagglutinin (HA) gene, complete cds
2 | ATGAAGGCAATAATTGTACTACTCATGGTAGTAACATCCAATGCAGATCGAATCTGCACTGGGATAACATCTTCAAACTCACCTCATGTGGTCAAAACAGCTACTCAAGGGGAGGTCAATGTGACTGGCGTGATACCACTGACAACAACACCAACAAAATCTTATTTTGCAAATCTCAAAGGAACAAGGACCAGAGGGAAACTATGCCCGGACTGTCTCAACTGTACAGATCTGGATGTGGCCTTGGGCAGGCCAATGTGTGTGGGGACCACACCTTCTGCTAAAGCTTCAATACTCCACGAGGTCAGACCTGTTACATCCGGGTGCTTTCCTATAATGCACGACAGAACAAAAATCAGGCAACTACCCAATCTTCTCAGAGGATATGAAAATATCAGGTTATCAACCCAAAACGTTATCGATGCAGAAAAAGCACCAGGAGGACCCTACAGACTTGGAACCTCAGGATCTTGCCCTAACGCTACCAGTAAAATCGGATTTTTTGCAACAATGGCTTGGGCTGTCCCAAAGGACAACTACAAAAATGCAACGAACCCACTAACAGTAGAAGTACCATACATTTGTACAGAAGGGGAAGACCAAATTACTGTTTGGGGGTTCCATTCAGATAACAAAACCCAAATGAAGAGCCTCTATGGAGACTCAAATCCTCAAAAGTTCACCTCATCTGCTAATGGAGTAACCACACATTATGTTTCTCAGATTGGCGACTTCCCAGATCAAACAGAAGACGGAGGACTACCACAAAGCGGCAGAATTGTTGTTGATTACATGATGCAAAAACCTGGGAAAACAGGAACAATTGTCTATCAAAGAGGTGTTTTGTTGCCTCAAAAGGTGTGGTGCGCGAGTGGCAGGAGCAAAGTAATAAAAGGGTCATTGCCTTTAATTGGTGAAGCAGATTGCCTTCATGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTACTACACAGGAGAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTAAAAACACCTTTGAAGCTTGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTGAAGGAAAGGGGTTTCTTCGGAGCTATTGCTGGTTTCCTAGAAGGAGGATGGGAAGGAATGATTGCAGGTTGGCACGGATACACATCTCACGGAGCACATGGAGTGGCAGTGGCGGCAGACCTTAAGAGTACACAAGAAGCTATAAATAAGATAACAAAAAATCTCAATTCTTTGAGTGAGCTAGAAGTAAAGAACCTTCAAAGACTAAGTGGTGCCATGGATGAACTCCACAACGAAATACTCGAGCTGGATGAGAAAGTGGATGATCTCAGAGCTGACACTATAAGCTCACAAATAGAACTTGCAGTCTTGCTTTCCAACGAAGGAATAATAAACAGTGAAGACGAGCATCTATTGGCACTTGAGAGAAAACTAAAGAAAATGCTGGGTCCCTCTGCTGTAGACATAGGAAACGGATGCTTCGAAACCAAACACAAATGCAACCAGACCTGCTTAGACAGGATAGCTGCTGGCACCTTTAATGCAGGAGAATTTTCTCTCCCCACTTTTGATTCATTGAACATTACTGCTGCATCTTTAAATGATGATGGATTGGATAACCATACTATACTGCTCTATTACTCAACTGCTGCTTCTAGTTTGGCTGTAACATTAATGCTAGCTATTTTTATTGTTTATATGGTCTCCAGAGACAACGTTTCATGCTCCATCTGTCTATAA
3 | 


--------------------------------------------------------------------------------
/nextclade/dataset_config/h1n1pdm/ha/CY121680/reference.fasta:
--------------------------------------------------------------------------------
1 | >CY121680.1 Influenza A virus (A/California/07/2009(H1N1)) hemagglutinin (HA) gene, complete cds
2 | GGAAAACAAAAGCAACAAAAATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAACTAAGAGGGGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACACCTAGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGCTTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGTCATCAAGATACAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGATCGAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAAACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATATCCCGTCTATTCAATCTAGAGGCCTATTTGGGGCCATTGCCGGTTTCATTGAAGGGGGGTGGACAGGGATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACCTGAAGAGCACACAGAATGCCATTGACGAGATTACTAACAAAGTAAATTCTGTTATTGAAAAGATGAATACACAGTTCACAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATTTAAATAAAAAAGTTGATGATGGTTTCCTGGACATTTGGACTTACAATGCCGAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTACCACGATTCAAATGTGAAGAACTTATATGAAAAGGTAAGAAGCCAGCTAAAAAACAATGCCAAGGAAATTGGAAACGGCTGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACTTATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAACATTAGGATTTCAGAAGCATGAGAAAAACAC
3 | 


--------------------------------------------------------------------------------
/nextclade/dataset_config/yam/ha/JN993010/reference.fasta:
--------------------------------------------------------------------------------
1 | >JN993010.1 Influenza B virus (B/Wisconsin/01/2010) segment 4 hemagglutinin (HA) gene, complete cds
2 | ATGAAGGCAATAATTGTACTACTCATGGTAGTAACATCCAATGCAGATCGAATCTGCACTGGGATAACATCTTCAAACTCACCTCATGTGGTCAAAACAGCTACTCAAGGGGAGGTCAATGTGACTGGCGTGATACCACTGACAACAACACCAACAAAATCTTATTTTGCAAATCTCAAAGGAACAAGGACCAGAGGGAAACTATGCCCGGACTGTCTCAACTGTACAGATCTGGATGTGGCCTTGGGCAGGCCAATGTGTGTGGGGACCACACCTTCTGCTAAAGCTTCAATACTCCACGAGGTCAGACCTGTTACATCCGGGTGCTTTCCTATAATGCACGACAGAACAAAAATCAGGCAACTACCCAATCTTCTCAGAGGATATGAAAATATCAGGTTATCAACCCAAAACGTTATCGATGCAGAAAAAGCACCAGGAGGACCCTACAGACTTGGAACCTCAGGATCTTGCCCTAACGCTACCAGTAAAATCGGATTTTTTGCAACAATGGCTTGGGCTGTCCCAAAGGACAACTACAAAAATGCAACGAACCCACTAACAGTAGAAGTACCATACATTTGTACAGAAGGGGAAGACCAAATTACTGTTTGGGGGTTCCATTCAGATAACAAAACCCAAATGAAGAGCCTCTATGGAGACTCAAATCCTCAAAAGTTCACCTCATCTGCTAATGGAGTAACCACACATTATGTTTCTCAGATTGGCGACTTCCCAGATCAAACAGAAGACGGAGGACTACCACAAAGCGGCAGAATTGTTGTTGATTACATGATGCAAAAACCTGGGAAAACAGGAACAATTGTCTATCAAAGAGGTGTTTTGTTGCCTCAAAAGGTGTGGTGCGCGAGTGGCAGGAGCAAAGTAATAAAAGGGTCATTGCCTTTAATTGGTGAAGCAGATTGCCTTCATGAAAAATACGGTGGATTAAACAAAAGCAAGCCTTACTACACAGGAGAACATGCAAAAGCCATAGGAAATTGCCCAATATGGGTAAAAACACCTTTGAAGCTTGCCAATGGAACCAAATATAGACCTCCTGCAAAACTATTGAAGGAAAGGGGTTTCTTCGGAGCTATTGCTGGTTTCCTAGAAGGAGGATGGGAAGGAATGATTGCAGGTTGGCACGGATACACATCTCACGGAGCACATGGAGTGGCAGTGGCGGCAGACCTTAAGAGTACACAAGAAGCTATAAATAAGATAACAAAAAATCTCAATTCTTTGAGTGAGCTAGAAGTAAAGAACCTTCAAAGACTAAGTGGTGCCATGGATGAACTCCACAACGAAATACTCGAGCTGGATGAGAAAGTGGATGATCTCAGAGCTGACACTATAAGCTCACAAATAGAACTTGCAGTCTTGCTTTCCAACGAAGGAATAATAAACAGTGAAGACGAGCATCTATTGGCACTTGAGAGAAAACTAAAGAAAATGCTGGGTCCCTCTGCTGTAGACATAGGAAACGGATGCTTCGAAACCAAACACAAATGCAACCAGACCTGCTTAGACAGGATAGCTGCTGGCACCTTTAATGCAGGAGAATTTTCTCTCCCCACTTTTGATTCATTGAACATTACTGCTGCATCTTTAAATGATGATGGATTGGATAACCATACTATACTGCTCTATTACTCAACTGCTGCTTCTAGTTTGGCTGTAACATTAATGCTAGCTATTTTTATTGTTTATATGGTCTCCAGAGACAACGTTTCATGCTCCATCTGTCTATAA
3 | 


--------------------------------------------------------------------------------
/nextclade/scripts/extract_founder_sequences.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | def read_founder_nodes(clades_json_file):
 4 |     import json
 5 |     with open(clades_json_file) as fh:
 6 |         clades_data = json.load(fh)
 7 |     founder_nodes = dict()
 8 |     for node_id, node_data in clades_data.get('branches', {}).items():
 9 |         if 'labels' in node_data and 'clade' in node_data['labels']:
10 |             founder_nodes[node_id] = node_data['labels']['clade']
11 | 
12 |     return founder_nodes
13 | 
14 | 
15 | def extract_sequences_from_node_data(node_clade_map, ancestral_json):
16 |     import json
17 |     from Bio import SeqRecord, Seq
18 | 
19 |     sequences = dict()
20 |     with open(ancestral_json) as fh:
21 |         ancestral_json = json.load(fh)['nodes']
22 | 
23 |     for node_id, node_data in ancestral_json.items():
24 |         if node_id in node_clade_map:
25 |             clade = node_clade_map[node_id]
26 |             sequences[clade] = SeqRecord.SeqRecord(Seq.Seq(node_data['sequence']), id=clade, description='')
27 | 
28 |     return sequences
29 | 
30 | 
31 | if __name__=="__main__":
32 |     import argparse
33 |     from Bio import SeqIO
34 | 
35 |     parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
36 |     parser.add_argument("--clades-json", required=True, help="JSON file with clade definitions")
37 |     parser.add_argument("--ancestral-json", required=True, help="Ancestral sequence JSON file")
38 |     parser.add_argument("--output-fasta", required=True, help="Output FASTA file with founder sequences")
39 |     args = parser.parse_args()
40 | 
41 |     node_clade_map = read_founder_nodes(args.clades_json)
42 |     sequences = extract_sequences_from_node_data(node_clade_map, args.ancestral_json)
43 | 
44 |     with open(args.output_fasta, 'w') as fh:
45 |         SeqIO.write([sequences[clade] for clade in sorted(sequences.keys())], fh, 'fasta')
46 | 


--------------------------------------------------------------------------------
/ingest/vendored/download-from-s3:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -euo pipefail
 3 | 
 4 | bin="$(dirname "$0")"
 5 | 
 6 | main() {
 7 |     local src="${1:?A source s3:// URL is required as the first argument.}"
 8 |     local dst="${2:?A destination file path is required as the second argument.}"
 9 |     # How many lines to subsample to. 0 means no subsampling. Optional.
10 |     # It is not advised to use this for actual subsampling! This is intended to be
11 |     # used for debugging workflows with large datasets such as ncov-ingest as
12 |     # described in https://github.com/nextstrain/ncov-ingest/pull/367
13 | 
14 |     # Uses `tsv-sample` to subsample, so it will not work as expected with files
15 |     # that have a single record split across multiple lines (i.e. FASTA sequences)
16 |     local n="${3:-0}"
17 | 
18 |     local s3path="${src#s3://}"
19 |     local bucket="${s3path%%/*}"
20 |     local key="${s3path#*/}"
21 | 
22 |     local src_hash dst_hash no_hash=0000000000000000000000000000000000000000000000000000000000000000
23 |     dst_hash="$("$bin/sha256sum" < "$dst" || true)"
24 |     src_hash="$(aws s3api head-object --bucket "$bucket" --key "$key" --query Metadata.sha256sum --output text 2>/dev/null || echo "$no_hash")"
25 | 
26 |     echo "[ INFO] Downloading $src → $dst"
27 |     if [[ $src_hash != "$dst_hash" ]]; then
28 |         aws s3 cp --no-progress "$src" - |
29 |         if [[ "$src" == *.gz ]]; then
30 |             gunzip -cfq
31 |         elif  [[ "$src" == *.xz ]]; then
32 |             xz -T0 -dcq
33 |         elif [[ "$src" == *.zst ]]; then
34 |             zstd -T0 -dcq
35 |         else
36 |             cat
37 |         fi |
38 |         if [[ "$n" -gt 0 ]]; then
39 |             tsv-sample -H -i -n "$n"
40 |         else
41 |             cat
42 |         fi >"$dst"
43 |     else
44 |         echo "[ INFO] Files are identical, skipping download"
45 |     fi
46 | }
47 | 
48 | main "$@"
49 | 


--------------------------------------------------------------------------------
/nextclade/dataset_config/h3n2/ha/EPI1857216/reference.fasta:
--------------------------------------------------------------------------------
 1 | >EPI_ISL_1563628 | A/Darwin/6/2021 | A / H3N2 |  | 2021-03-16
 2 | ATGAAGACTATCATTGCTTTGAGCAACATTCTATGTCTTGTTTTCGCTCAAAAAATACCTGGAAATGACAATAGCACGGC
 3 | AACGCTGTGCCTTGGGCACCATGCAGTACCAAACGGAACGATAGTGAAAACAATCACAAATGACCGAATTGAAGTTACTA
 4 | ATGCTACTGAGTTGGTTCAGAATTCATCAATAGGTGAAATATGCGGCAGTCCTCATCAGATCCTTGATGGAGGGAACTGC
 5 | ACACTAATAGATGCTCTATTGGGGGACCCTCAGTGTGACGGCTTTCAAAATAAGGAATGGGACCTTTTTGTTGAAAGAAG
 6 | CAGAGCCAACAGCAACTGTTACCCTTATGATGTGCCGGATTATGCCTCCCTTAGGTCACTAGTTGCCTCATCCGGCACAC
 7 | TGGAGTTTAAAAATGAAAGCTTCAATTGGACTGGAGTCAAACAAAACGGAACAAGTTCTGCGTGCATAAGGGGATCTAGT
 8 | AGTAGTTTTTTTAGTAGATTAAATTGGTTGACCAGCTTAAACAACATATATCCAGCACAGAACGTGACTATGCCAAACAA
 9 | GGAACAATTTGACAAATTGTACATTTGGGGGGTTCACCACCCGGATACGGACAAGAACCAAATCTCCCTGTTTGCTCAAT
10 | CATCAGGAAGAATCACAGTATCTACCAAAAGAAGCCAACAAGCTGTAATCCCAAATATCGGATCTAGACCCAGAATAAGG
11 | GATATCCCTAGCAGAATAAGCATCTATTGGACAATAGTAAAACCGGGAGACATACTTTTGATTAACAGCACAGGGAATCT
12 | AATTGCTCCTAGGGGTTACTTCAAAATACGAAGTGGGAAAAGCTCAATAATGAGATCAGATGCACCCATTGGCAAATGTA
13 | AGTCTGAATGCATCACTCCAAATGGAAGCATTCCCAATGACAAACCGTTCCAAAATGTAAACAGGATCACATACGGGGCC
14 | TGTCCCAGATATGTTAAGCAAAGCACCCTGAAATTGGCAACAGGAATGCGAAATGTACCAGAGAAACAAACCAGAGGCAT
15 | ATTTGGCGCAATAGCGGGTTTCATAGAAAATGGATGGGAGGGAATGGTGGATGGTTGGTACGGTTTCAGGCATCAAAATT
16 | CTGAGGGAAGAGGACAAGCAGCAGATCTCAAAAGCACTCAAGCAGCAATCGATCAAATCAATGGGAAGCTGAATCGATTG
17 | ATCGGAAAAACCAACGAGAAATTCCATCAGATTGAAAAAGAATTCTCAGAAGTAGAAGGAAGAGTTCAAGACCTTGAGAA
18 | ATATGTTGAGGACACTAAAATAGATCTCTGGTCATACAACGCGGAGCTTCTTGTTGCCCTGGAGAACCAACATACGATTG
19 | ACCTAACTGACTCAGAAATGAACAAACTGTTTGAAAAAACAAAGAAGCAACTGAGGGAAAATGCTGAGGATATGGGAAAT
20 | GGTTGTTTCAAAATATACCACAAATGTGACAATGCCTGCATAGGATCAATAAGAAATGAAACTTATGACCACAATGTGTA
21 | CAGGGATGAAGCATTAAACAACCGGTTCCAGATCAAGGGAGTTGAGCTGAAGTCAGGGTACAAAGATTGGATCCTATGGA
22 | TTTCCTTTGCCATGTCATGTTTTTTGCTTTGTATTGCTTTGTTGGGGTTCATCATGTGGGCCTGCCAAAAGGGCAACATT
23 | AGATGCAACATTTGCATTTGAGTGCATTAATTAAAAAC
24 | 


--------------------------------------------------------------------------------
/ingest/build-configs/manual-upload/Snakefile:
--------------------------------------------------------------------------------
 1 | """
 2 | This handles uploads of files downloaded from GISAID to AWS S3.
 3 | """
 4 | import os.path
 5 | 
 6 | 
 7 | # Use default configuration values. Extend with Snakemake's --configfile/--config options.
 8 | configfile: os.path.join(workflow.basedir, "config.yaml")
 9 | 
10 | # Use custom configuration from analysis directory (i.e. working dir), if any.
11 | if os.path.exists("config.yaml"):
12 |     configfile: "config.yaml"
13 | 
14 | 
15 | wildcard_constraints:
16 |     # Constrain GISAID pair names to YYYY-MM-DD-N
17 |     gisaid_pair = r'\d{4}-\d{2}-\d{2}(-\d+)?'
18 | 
19 | 
20 | rule upload_gisaid_pairs:
21 |     input:
22 |         upload_flags=expand([
23 |             "data/{gisaid_pair}-metadata.upload",
24 |             "data/{gisaid_pair}-sequences.upload",
25 |         ], gisaid_pair=config["gisaid_pairs"]),
26 | 
27 | 
28 | rule upload_gisaid_metadata:
29 |     input:
30 |         metadata="data/{gisaid_pair}-metadata.xls",
31 |     output:
32 |         flag="data/{gisaid_pair}-metadata.upload",
33 |     params:
34 |         s3_dst=config["s3_dst"],
35 |     shell:
36 |         r"""
37 |         {workflow.basedir}/../../vendored/upload-to-s3 \
38 |             --quiet \
39 |             {input.metadata:q} \
40 |             {params.s3_dst:q}/{wildcards.gisaid_pair}-metadata.xls.zst \
41 |             2>&1 | tee {output.flag:q}
42 |         """
43 | 
44 | 
45 | rule upload_gisaid_sequences:
46 |     input:
47 |         sequences="data/{gisaid_pair}-sequences.fasta",
48 |     output:
49 |         flag="data/{gisaid_pair}-sequences.upload",
50 |     params:
51 |         s3_dst=config["s3_dst"],
52 |     shell:
53 |         r"""
54 |         {workflow.basedir}/../../vendored/upload-to-s3 \
55 |             --quiet \
56 |             {input.sequences:q} \
57 |             {params.s3_dst:q}/{wildcards.gisaid_pair}-sequences.fasta.zst \
58 |             2>&1 | tee {output.flag:q}
59 |         """
60 | 


--------------------------------------------------------------------------------
/scripts/sequence_export.py:
--------------------------------------------------------------------------------
 1 | # this code export the sequence json needed for the old deprecated auspice
 2 | import argparse, json
 3 | from random import sample
 4 | import numpy as np
 5 | from Bio import Phylo, AlignIO
 6 | 
 7 | 
 8 | if __name__ == '__main__':
 9 |     parser = argparse.ArgumentParser(
10 |         description="",
11 |         formatter_class=argparse.ArgumentDefaultsHelpFormatter
12 |     )
13 | 
14 |     parser.add_argument('--tree', type=str, required=True,
15 |                         help="newick file with the tree")
16 |     parser.add_argument('--alignment', type=str, help="json file with ancestral reconstructions. assumes full sequence")
17 |     parser.add_argument('--translations', nargs='+', help="fasta files with ancestral translations")
18 |     parser.add_argument('--genes', nargs='+', help="names of the genes corresponding to the translations")
19 |     parser.add_argument('--output', type=str, help="names of files to write selected strains to, one for each gene")
20 | 
21 |     args = parser.parse_args()
22 | 
23 |     with open(args.alignment) as fh:
24 |         nuc = json.load(fh)["nodes"]
25 | 
26 |     T = Phylo.read(args.tree, 'newick')
27 |     root_seq=nuc[T.root.name]['sequence']
28 |     sequence_json = {'root':{'nuc':root_seq}}
29 |     for n in T.find_clades(order='preorder'):
30 |         sequence_json[n.name]={'nuc':{p:d for p,a,d in zip(range(len(root_seq)), root_seq, nuc[n.name]['sequence']) if a!=d}}
31 | 
32 |     for gene, fname in zip(args.genes, args.translations):
33 |         aln = {s.name:str(s.seq) for s in AlignIO.read(fname, 'fasta')}
34 |         root_seq = aln[T.root.name]
35 |         sequence_json['root'][gene]=root_seq
36 |         for n in T.find_clades(order='preorder'):
37 |             sequence_json[n.name][gene] = {p:d for p,a,d in zip(range(len(root_seq)), root_seq, aln[n.name]) if a!=d}
38 | 
39 |     with open(args.output, 'wt') as fh:
40 |         json.dump(sequence_json, fh)
41 | 
42 | 


--------------------------------------------------------------------------------
/ingest/vendored/trigger:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -euo pipefail
 3 | 
 4 | : "${PAT_GITHUB_DISPATCH:=}"
 5 | 
 6 | github_repo="${1:?A GitHub repository with owner and repository name is required as the first argument.}"
 7 | event_type="${2:?An event type is required as the second argument.}"
 8 | shift 2
 9 | 
10 | if [[ $# -eq 0 && -z $PAT_GITHUB_DISPATCH ]]; then
11 |     cat >&2 <<.
12 | You must specify options to curl for your GitHub credentials.  For example, you
13 | can specify your GitHub username, and will be prompted for your password:
14 | 
15 |   $0 $github_repo $event_type --user <your-github-username>
16 | 
17 | Be sure to enter a personal access token¹ as your password since GitHub has
18 | discontinued password authentication to the API starting on November 13, 2020².
19 | 
20 | You can also store your credentials or a personal access token in a netrc
21 | file³:
22 | 
23 |   machine api.github.com
24 |   login <your-username>
25 |   password <your-token>
26 | 
27 | and then tell curl to use it:
28 | 
29 |   $0 $github_repo $event_type --netrc
30 | 
31 | which will then not require you to type your password every time.
32 | 
33 | ¹ https://help.github.com/en/github/authenticating-to-github/creating-a-personal-access-token-for-the-command-line
34 | ² https://docs.github.com/en/rest/overview/other-authentication-methods#via-username-and-password
35 | ³ https://ec.haxx.se/usingcurl/usingcurl-netrc
36 | .
37 |     exit 1
38 | fi
39 | 
40 | auth=':'
41 | if [[ -n $PAT_GITHUB_DISPATCH ]]; then
42 |   auth="Authorization: Bearer ${PAT_GITHUB_DISPATCH}"
43 | fi
44 | 
45 | if curl -fsS "https://api.github.com/repos/${github_repo}/dispatches" \
46 |     -H 'Accept: application/vnd.github.v3+json' \
47 |     -H 'Content-Type: application/json' \
48 |     -H "$auth" \
49 |     -d '{"event_type":"'"$event_type"'"}' \
50 |     "$@"
51 | then
52 |     echo "Successfully triggered $event_type"
53 | else
54 |     echo "Request failed" >&2
55 |     exit 1
56 | fi
57 | 


--------------------------------------------------------------------------------
/ingest/scripts/annotate-with-gihsn:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | """
 3 | Annotates whether the GISAID record is part of the Global Influenza Hospital
 4 | Surveillance Network (GIHSN) by checking for "GIHSN" in the strain name.
 5 | """
 6 | import argparse
 7 | import re
 8 | import sys
 9 | from pathlib import Path
10 | from typing import Iterable
11 | from augur.io.json import dump_ndjson, load_ndjson
12 | 
13 | SCRIPT_NAME = Path(sys.argv[0]).stem
14 | 
15 | def print_err(*args):
16 |     print(f"[{SCRIPT_NAME}] ", *args, file=sys.stderr)
17 | 
18 | GIHSN_PATTERN = r"(GIHSN)"
19 | 
20 | 
21 | def annotate_records_with_gihsn(records: Iterable, strain_field: str, gihsn_field: str) -> Iterable:
22 |     """
23 |     Adds the *gihsn_field* to the *records*, with value "True" if it has the
24 |     "GIHSN" substring in the *strain_field*.
25 | 
26 |     Yields the modified records.
27 |     """
28 |     for record in records:
29 |         record = record.copy()
30 |         strain = record.get(strain_field)
31 | 
32 |         if strain is None:
33 |             raise Exception(f"Records must have the expected strain field: {strain_field!r}")
34 | 
35 |         record[gihsn_field] = str(bool(re.search(GIHSN_PATTERN, strain)))
36 | 
37 |         yield record
38 | 
39 | 
40 | if __name__ == '__main__':
41 |     parser = argparse.ArgumentParser(description=__doc__)
42 | 
43 |     parser.add_argument("--strain-field", default="strain",
44 |         help="The record field containing the GISAID strain name")
45 |     parser.add_argument("--gihsn-field", default="gihsn_sample",
46 |         help="The name of the new field to add to the record to indicate if it " + \
47 |              "is part of the Global Influenza Hospital Surveillance Network (GIHSN)")
48 | 
49 |     args = parser.parse_args()
50 | 
51 |     records = load_ndjson(sys.stdin)
52 |     modified_records = annotate_records_with_gihsn(records, args.strain_field, args.gihsn_field)
53 |     dump_ndjson(modified_records)
54 | 


--------------------------------------------------------------------------------
/config/h3n2/ha/reference.fasta:
--------------------------------------------------------------------------------
 1 | >CY163680.1 Influenza A virus (A/Wisconsin/67/2005(H3N2)) hemagglutinin (HA) gene, complete cds
 2 | GGATAATTCTATTAACCATGAAGACTATCATTGCTTTGAGCTACATTCTATGTCTGGTTTTCGCTCAAAA
 3 | ACTTCCCGGAAATGACAACAGCACGGCAACGCTGTGCCTTGGGCACCATGCAGTACCAAACGGAACGATA
 4 | GTGAAAACAATCACGAATGACCAAATTGAAGTTACTAATGCTACTGAGCTGGTTCAGAGTTCCTCAACAG
 5 | GTGGAATATGCGACAGTCCTCATCAGATCCTTGATGGAGAAAACTGCACACTAATAGATGCTCTATTGGG
 6 | AGACCCTCAGTGTGATGGCTTCCAAAATAAGAAATGGGACCTTTTTGTTGAACGCAGCAAAGCCTACAGC
 7 | AACTGTTACCCTTATGATGTGCCGGATTATGCCTCCCTTAGGTCACTAGTTGCCTCATCCGGCACACTGG
 8 | AGTTTAACGATGAAAGCTTCAATTGGACTGGAGTCACTCAAAATGGAACAAGCTCTTCTTGCAAAAGGAG
 9 | ATCTAATAACAGTTTCTTTAGTAGATTGAATTGGTTGACCCACTTAAAATTCAAATACCCAGCATTGAAC
10 | GTGACTATGCCAAACAATGAAAAATTTGACAAATTGTACATTTGGGGGGTTCACCACCCGGTTACGGACA
11 | ATGACCAAATCTTCCTGTATGCTCAAGCATCAGGAAGAATCACAGTCTCTACCAAAAGAAGCCAACAAAC
12 | TGTAATCCCGAATATCGGATCTAGACCCAGAATAAGGAATATCCCCAGCAGAATAAGCATCTATTGGACA
13 | ATAGTAAAACCGGGAGACATACTTTTGATTAACAGCACAGGGAATCTAATTGCTCCTAGGGGTTACTTCA
14 | AAATACGAAGTGGGAAAAGCTCAATAATGAGATCAGATGCACCCATTGGCAAATGCAATTCTGAATGCAT
15 | CACTCCAAATGGAAGCATTCCCAATGACAAACCATTTCAAAATGTAAACAGGATCACATATGGGGCCTGT
16 | CCCAGATATGTTAAGCAAAACACTCTGAAATTGGCAACAGGGATGCGAAATGTACCAGAGAAACAAACTA
17 | GAGGCATATTTGGCGCAATCGCGGGTTTCATAGAAAATGGTTGGGAGGGAATGGTGGATGGTTGGTACGG
18 | TTTCAGGCATCAAAATTCTGAGGGAATAGGACAAGCAGCAGATCTCAAAAGCACTCAAGCAGCAATCAAT
19 | CAAATCAATGGGAAGCTGAATAGGTTGATCGGGAAAACCAACGAGAAATTCCATCAGATTGAAAAAGAAT
20 | TCTCAGAAGTAGAAGGGAGAATTCAGGACCTCGAGAAATATGTTGAGGACACTAAAATAGATCTCTGGTC
21 | ATACAACGCGGAGCTTCTTGTTGCCCTGGAGAACCAACATACAATTGATCTAACTGACTCAGAAATGAAC
22 | AAACTGTTTGAAAGAACAAAGAAGCAACTGAGGGAAAATGCTGAGGATATGGGCAATGGTTGTTTCAAAA
23 | TATACCACAAATGTGACAATGCCTGCATAGGATCAATCAGAAATGGAACTTATGACCATGATGTATACAG
24 | AGATGAAGCATTAAACAACCGGTTCCAGATCAAAGGCGTTGAGCTGAAGTCAGGATACAAAGATTGGATC
25 | CTATGGATTTCCTTTGCCATATCATGTTTTTTGCTTTGTGTTGCTTTGTTGGGGTTCATCATGTGGGCCT
26 | GCCAAAAAGGCAACATTAGGTGCAACATTTGCATTTGAGTGCATTAATTAAAAACAC
27 | 


--------------------------------------------------------------------------------
/nextclade/dataset_config/h3n2/ha/CY163680/reference.fasta:
--------------------------------------------------------------------------------
 1 | >CY163680.1 Influenza A virus (A/Wisconsin/67/2005(H3N2)) hemagglutinin (HA) gene, complete cds
 2 | GGATAATTCTATTAACCATGAAGACTATCATTGCTTTGAGCTACATTCTATGTCTGGTTTTCGCTCAAAA
 3 | ACTTCCCGGAAATGACAACAGCACGGCAACGCTGTGCCTTGGGCACCATGCAGTACCAAACGGAACGATA
 4 | GTGAAAACAATCACGAATGACCAAATTGAAGTTACTAATGCTACTGAGCTGGTTCAGAGTTCCTCAACAG
 5 | GTGGAATATGCGACAGTCCTCATCAGATCCTTGATGGAGAAAACTGCACACTAATAGATGCTCTATTGGG
 6 | AGACCCTCAGTGTGATGGCTTCCAAAATAAGAAATGGGACCTTTTTGTTGAACGCAGCAAAGCCTACAGC
 7 | AACTGTTACCCTTATGATGTGCCGGATTATGCCTCCCTTAGGTCACTAGTTGCCTCATCCGGCACACTGG
 8 | AGTTTAACGATGAAAGCTTCAATTGGACTGGAGTCACTCAAAATGGAACAAGCTCTTCTTGCAAAAGGAG
 9 | ATCTAATAACAGTTTCTTTAGTAGATTGAATTGGTTGACCCACTTAAAATTCAAATACCCAGCATTGAAC
10 | GTGACTATGCCAAACAATGAAAAATTTGACAAATTGTACATTTGGGGGGTTCACCACCCGGTTACGGACA
11 | ATGACCAAATCTTCCTGTATGCTCAAGCATCAGGAAGAATCACAGTCTCTACCAAAAGAAGCCAACAAAC
12 | TGTAATCCCGAATATCGGATCTAGACCCAGAATAAGGAATATCCCCAGCAGAATAAGCATCTATTGGACA
13 | ATAGTAAAACCGGGAGACATACTTTTGATTAACAGCACAGGGAATCTAATTGCTCCTAGGGGTTACTTCA
14 | AAATACGAAGTGGGAAAAGCTCAATAATGAGATCAGATGCACCCATTGGCAAATGCAATTCTGAATGCAT
15 | CACTCCAAATGGAAGCATTCCCAATGACAAACCATTTCAAAATGTAAACAGGATCACATATGGGGCCTGT
16 | CCCAGATATGTTAAGCAAAACACTCTGAAATTGGCAACAGGGATGCGAAATGTACCAGAGAAACAAACTA
17 | GAGGCATATTTGGCGCAATCGCGGGTTTCATAGAAAATGGTTGGGAGGGAATGGTGGATGGTTGGTACGG
18 | TTTCAGGCATCAAAATTCTGAGGGAATAGGACAAGCAGCAGATCTCAAAAGCACTCAAGCAGCAATCAAT
19 | CAAATCAATGGGAAGCTGAATAGGTTGATCGGGAAAACCAACGAGAAATTCCATCAGATTGAAAAAGAAT
20 | TCTCAGAAGTAGAAGGGAGAATTCAGGACCTCGAGAAATATGTTGAGGACACTAAAATAGATCTCTGGTC
21 | ATACAACGCGGAGCTTCTTGTTGCCCTGGAGAACCAACATACAATTGATCTAACTGACTCAGAAATGAAC
22 | AAACTGTTTGAAAGAACAAAGAAGCAACTGAGGGAAAATGCTGAGGATATGGGCAATGGTTGTTTCAAAA
23 | TATACCACAAATGTGACAATGCCTGCATAGGATCAATCAGAAATGGAACTTATGACCATGATGTATACAG
24 | AGATGAAGCATTAAACAACCGGTTCCAGATCAAAGGCGTTGAGCTGAAGTCAGGATACAAAGATTGGATC
25 | CTATGGATTTCCTTTGCCATATCATGTTTTTTGCTTTGTGTTGCTTTGTTGGGGTTCATCATGTGGGCCT
26 | GCCAAAAAGGCAACATTAGGTGCAACATTTGCATTTGAGTGCATTAATTAAAAACAC
27 | 


--------------------------------------------------------------------------------
/nextclade/dataset_config/h3n2/pa/pathogen.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "shortcuts": [
 3 |       "flu_h3n2_pa",
 4 |       "nextstrain/flu/h3n2/pa"
 5 |     ],
 6 | "phenotypeData":[
 7 |     {
 8 |       "name": "PAI",
 9 |       "nameFriendly": "PAI",
10 |       "description": "This column displays a score associated with reduced susceptibility to polymerase inhibitors. The score is a weighted sum of known substitutions (x1) or major substitutions (x2), while substitutions at positions that confer resistance but are to amino acids not previously described to confer resistance count x0.5.",
11 |       "cds": "PA",
12 |       "aaRange": {
13 |         "begin":0,
14 |         "end": 460
15 |       },
16 |       "data": [
17 |         {
18 |           "name": "PAI_markers",
19 |           "weight": 1,
20 |           "locations": {
21 |             "27": {"P":1, "default":0.5},
22 |             "22": {"G":1, "K":2, "R": 2, "default":0.5},
23 |             "33": {"R":1, "default":0.5},
24 |             "35": {"V":2, "default":0.5},
25 |             "36": {"T":2, "default":0.5},
26 |             "37": {"F":2, "L":1, "M":2, "N":2, "S":1, "T":2,  "V":1, "default":0.5},
27 |             "118": {"D":1, "default":0.5},
28 |             "197": {"K":1, "default":0.5},
29 |             "198": {"G":1, "default":0.5}
30 |           }
31 |         }
32 |       ]
33 |     }
34 |   ],
35 |   "mutLabels": {
36 |     "aaMutLabelMap": {
37 |       "PA:23G":["Baloxavir"],
38 |       "PA:23K":["Baloxavir"],
39 |       "PA:23R":["Baloxavir"],
40 |       "PA:28P":["Baloxavir"],
41 |       "PA:34R":["Baloxavir"],
42 |       "PA:36V":["Baloxavir"],
43 |       "PA:37T":["Baloxavir"],
44 |       "PA:38F":["Baloxavir"],
45 |       "PA:38L":["Baloxavir"],
46 |       "PA:38M":["Baloxavir"],
47 |       "PA:38N":["Baloxavir"],
48 |       "PA:38S":["Baloxavir"],
49 |       "PA:38T":["Baloxavir"],
50 |       "PA:38V":["Baloxavir"],
51 |       "PA:119D":["Baloxavir"],
52 |       "PA:198K":["Baloxavir"],
53 |       "PA:199G":["Baloxavir"]
54 |     }
55 |   }
56 | }
57 | 


--------------------------------------------------------------------------------