├── tests
    ├── testthat
    │   ├── test-epiflu.R
    │   ├── data
    │   │   └── test.fa
    │   ├── test-core.R
    │   ├── test-login.R
    │   ├── test-fasta.R
    │   ├── test-download.R
    │   └── test-query.R
    └── testthat.R
├── NAMESPACE
├── .Rbuildignore
├── .gitignore
├── man
    ├── read_fasta.Rd
    ├── login.Rd
    ├── download.Rd
    └── query.Rd
├── GISAIDR.Rproj
├── DESCRIPTION
├── R
    ├── read_fasta.R
    ├── export_fasta.R
    ├── query.R
    ├── internal_query.R
    ├── login.R
    ├── core.R
    └── download.R
├── .github
    └── workflows
    │   └── r.yml
├── README.md
└── GISAID_LOCATIONS.txt


/tests/testthat/test-epiflu.R:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
1 | exportPattern("^[[:alpha:]]+")
2 | 


--------------------------------------------------------------------------------
/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^.*\.Rproj$
2 | ^\.Rproj\.user$
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | .Ruserdata
5 | .DS_Store
6 | 


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(GISAIDR)
3 | 
4 | test_check("GISAIDR")
5 | 


--------------------------------------------------------------------------------
/tests/testthat/data/test.fa:
--------------------------------------------------------------------------------
1 | >A
2 | TTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAG
3 | >B
4 | TTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAG
5 | >C
6 | TTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAG
7 | >D
8 | TTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAG
9 | 


--------------------------------------------------------------------------------
/tests/testthat/test-core.R:
--------------------------------------------------------------------------------
1 | test_that("createCommand returns list", {
2 |   expect_true(is.list(createCommand(1,2,3,"Go", list())))
3 | })
4 | 
5 | 
6 | test_that("formatDataForRequest returns str", {
7 |   expect_true(is.character(formatDataForRequest(1,2,3, list(), 1)))
8 | })
9 | 


--------------------------------------------------------------------------------
/man/read_fasta.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/read_fasta.R
 3 | \name{read_fasta}
 4 | \alias{read_fasta}
 5 | \title{Read FASTA amino acids file into a dataframe}
 6 | \usage{
 7 | read_fasta(file = NULL, get_sequence = TRUE)
 8 | }
 9 | \description{
10 | This function reads a FASTA amino acids file into a dataframe
11 | }
12 | \details{
13 | TAKEN FROM AMPIR: https://github.com/Legana/ampir
14 | }
15 | 


--------------------------------------------------------------------------------
/GISAIDR.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | 
18 | BuildType: Package
19 | PackageUseDevtools: Yes
20 | PackageInstallArgs: --no-multiarch --with-keep.source
21 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: GISAIDR
 2 | Type: Package
 3 | Title: R wrapper for GISAID "API"
 4 | Version: 0.9.10
 5 | Author: Wytamma Wirth
 6 | Maintainer: Wytamma Wirth <wytamma.wirth@me.com>
 7 | Description: programmatically interact with the GISAID database query.
 8 | License: Please see the GISAID license
 9 | Encoding: UTF-8
10 | LazyData: true
11 | Depends: R (>= 3.6.0)
12 | Suggests: 
13 |     testthat (>= 3.0.0)
14 | Config/testthat/edition: 3
15 | Imports: 
16 |     httr,
17 |     rjson,
18 |     xml2,
19 |     openssl
20 | RoxygenNote: 7.1.1
21 | 


--------------------------------------------------------------------------------
/man/login.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/login.R
 3 | \name{login}
 4 | \alias{login}
 5 | \title{Login to GISAID}
 6 | \usage{
 7 | login(username, password, database = "EpiCoV")
 8 | }
 9 | \arguments{
10 | \item{username}{GISAID username.}
11 | 
12 | \item{password}{GISAID password.}
13 | }
14 | \value{
15 | credentials used to query GISAID.
16 | }
17 | \description{
18 | Login to GISAID
19 | }
20 | \examples{
21 | username = Sys.getenv("GISAIDR_USERNAME")
22 | password = Sys.getenv("GISAIDR_PASSWORD")
23 | login(username, password)
24 | }
25 | 


--------------------------------------------------------------------------------
/man/download.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/download.R
 3 | \name{download}
 4 | \alias{download}
 5 | \title{Download from GISAID}
 6 | \usage{
 7 | download(
 8 |   credentials,
 9 |   list_of_accession_ids,
10 |   get_sequence = FALSE,
11 |   clean_up = TRUE
12 | )
13 | }
14 | \arguments{
15 | \item{credentials}{GISAID credentials.}
16 | 
17 | \item{list_of_accession_ids}{list of accession_id from GISAID}
18 | 
19 | \item{get_sequence}{load sequences into data.frame after download}
20 | 
21 | \item{clean_up}{delete downloaded files (e.g. fasta files) after download}
22 | }
23 | \value{
24 | data.frame of complete data
25 | }
26 | \description{
27 | Download from GISAID
28 | }
29 | 


--------------------------------------------------------------------------------
/tests/testthat/test-login.R:
--------------------------------------------------------------------------------
 1 | test_that("login returns credentials", {
 2 |   username = Sys.getenv("GISAIDR_USERNAME")
 3 |   password = Sys.getenv("GISAIDR_PASSWORD")
 4 |   credentials <- login(username = username, password = password)
 5 |   expect_true(is.list(credentials))
 6 | 
 7 | })
 8 | 
 9 | test_that("invalid login fails", {
10 |   username = 'NOT_A_REAL_PERSON'
11 |   password = Sys.getenv("GISAIDR_PASSWORD")
12 |   expect_error(login(username = username, password = password),
13 |                "Username or password wrong!")
14 | })
15 | 
16 | test_that("EpiPox login works", {
17 |   username = Sys.getenv("GISAIDR_USERNAME")
18 |   password = Sys.getenv("GISAIDR_PASSWORD")
19 |   credentials <- login(username = username, password = password, database="EpiPox")
20 |   expect_true(is.list(credentials))
21 | })
22 | 
23 | 
24 | test_that("EpiRSV login works", {
25 |   username = Sys.getenv("GISAIDR_USERNAME")
26 |   password = Sys.getenv("GISAIDR_PASSWORD")
27 |   credentials <- login(username = username, password = password, database="EpiRSV")
28 |   expect_true(is.list(credentials))
29 | })
30 | 


--------------------------------------------------------------------------------
/R/read_fasta.R:
--------------------------------------------------------------------------------
 1 | #' Read FASTA amino acids file into a dataframe
 2 | #'
 3 | #' This function reads a FASTA amino acids file into a dataframe
 4 | #'
 5 | #' TAKEN FROM AMPIR: https://github.com/Legana/ampir
 6 | 
 7 | 
 8 | read_fasta <- function (file = NULL, get_sequence=TRUE) {
 9 |   fasta_lines <- readLines(file)
10 | 
11 |   ### get sequence names
12 |   seq_name_index <- grep(">", fasta_lines)
13 |   strain <- gsub(">", "", fasta_lines[seq_name_index])
14 | 
15 |   if (get_sequence) {
16 |     ### get sequence
17 |     seq_aa_start_index <- seq_name_index + 1
18 |     seq_aa_end_index <- c(seq_name_index, length(fasta_lines)+1)[-1]-1
19 | 
20 |     sequence <- rep(NA, length(seq_name_index))
21 | 
22 |     ### replace NA content with actual sequence content, and concatenate the lines
23 |     for(i in seq_along(seq_name_index)){
24 |       seq_aa_start <- seq_aa_start_index[i]
25 |       seq_aa_end   <- seq_aa_end_index[i]
26 |       sequence[i] <- gsub("[[:space:]]", "",
27 |                           paste(fasta_lines[seq_aa_start:seq_aa_end],
28 |                                 collapse = ""))
29 |     }
30 | 
31 |     return(data.frame(strain, sequence, stringsAsFactors = FALSE))
32 |   } else {
33 |     return(data.frame(strain, stringsAsFactors = FALSE))
34 |   }
35 | 
36 | }
37 | 


--------------------------------------------------------------------------------
/.github/workflows/r.yml:
--------------------------------------------------------------------------------
 1 | # This workflow uses actions that are not certified by GitHub.
 2 | # They are provided by a third-party and are governed by
 3 | # separate terms of service, privacy policy, and support
 4 | # documentation.
 5 | #
 6 | # See https://github.com/r-lib/actions/tree/master/examples#readme for
 7 | # additional example workflows available for the R community.
 8 | 
 9 | name: Build
10 | 
11 | on:
12 |   push:
13 |     branches: [ master ]
14 |   pull_request:
15 |     branches: [ master ]
16 |   schedule:
17 |     - cron: '0 0 * * *'
18 | 
19 | 
20 | jobs:
21 |   build:
22 |     runs-on: ${{ matrix.config.os }}
23 |     name: ${{ matrix.config.os }} (${{ matrix.config.r }})
24 |     strategy:
25 |       fail-fast: false
26 |       max-parallel: 1
27 |       matrix:
28 |         config:
29 |           - {os: macOS-latest,   r: 'release'}
30 |           - {os: windows-latest, r: 'release'}
31 | 
32 |     steps:
33 |       - uses: actions/checkout@v2
34 |       - name: Set up R ${{ matrix.config.os }} (${{ matrix.config.r }})
35 |         uses: r-lib/actions/setup-r@v2
36 |         with:
37 |           r-version: ${{ matrix.config.r }}
38 |           http-user-agent: ${{ matrix.config.http-user-agent }}
39 |       - name: Install dependencies
40 |         run: |
41 |           install.packages(c("remotes", "rcmdcheck", "curl"))
42 |           remotes::install_deps(dependencies = TRUE)
43 |         shell: Rscript {0}
44 |       - name: Check
45 |         env:
46 |           GISAIDR_PASSWORD: ${{ secrets.GISAIDR_PASSWORD }}
47 |           GISAIDR_USERNAME: ${{ secrets.GISAIDR_USERNAME }}
48 |         run: rcmdcheck::rcmdcheck(args = "--no-manual", error_on = "error")
49 |         shell: Rscript {0}
50 | 


--------------------------------------------------------------------------------
/man/query.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/query.R
 3 | \name{query}
 4 | \alias{query}
 5 | \title{Query GISAID Database}
 6 | \usage{
 7 | query(
 8 |   credentials,
 9 |   location = NULL,
10 |   lineage = NULL,
11 |   from = NULL,
12 |   from_subm = NULL,
13 |   to = NULL,
14 |   to_subm = NULL,
15 |   start_index = 0,
16 |   nrows = 50,
17 |   load_all = FALSE,
18 |   low_coverage_excl = FALSE,
19 |   complete = FALSE,
20 |   high_coverage = FALSE,
21 |   collection_date_complete = FALSE,
22 |   total = FALSE
23 | )
24 | }
25 | \arguments{
26 | \item{credentials}{GISAID credentials.}
27 | 
28 | \item{location}{search for entries based on geographic location.}
29 | 
30 | \item{lineage}{search for entries based on pango lineage designations.}
31 | 
32 | \item{from}{search from specific collection date.}
33 | 
34 | \item{from_subm}{search from specific submission date.}
35 | 
36 | \item{to}{search to specific collection date.}
37 | 
38 | \item{to_subm}{search to specific submission date.}
39 | 
40 | \item{start_index}{page through results.}
41 | 
42 | \item{nrows}{number of results to return.}
43 | 
44 | \item{load_all}{return all results.}
45 | 
46 | \item{low_coverage_excl}{exclude low coverage entries from the results.}
47 | 
48 | \item{complete}{include only complete entries in the results.}
49 | 
50 | \item{high_coverage}{include only high coverage entries in the results.}
51 | 
52 | \item{collection_date_complete}{include only entries with complete in collection date the results.}
53 | 
54 | \item{total}{returns the total number of sequences matching the query.}
55 | }
56 | \value{
57 | Dataframe.
58 | }
59 | \description{
60 | Query GISAID Database
61 | }
62 | 


--------------------------------------------------------------------------------
/tests/testthat/test-fasta.R:
--------------------------------------------------------------------------------
 1 | test_that("read_fasta works", {
 2 |   fa_df <- read_fasta('data/test.fa')
 3 |   expect_equal(names(fa_df), c('strain', 'sequence'))
 4 |   expect_equal(fa_df$strain, c('A', 'B', 'C', 'D'))
 5 |   expect_equal(fa_df$sequence[4], 'TTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAG')
 6 | })
 7 | 
 8 | 
 9 | test_that("export_fasta works", {
10 | 
11 |   # Define a temporary file for testing
12 |   temp_file <- tempfile(fileext = ".fasta")
13 | 
14 |   # Mock seqs data frame
15 |   seqs <- data.frame(country = c("Country1", "Country2"),
16 |                      pangolin_lineage = c("Lineage1", "Lineage2"),
17 |                      accession_id = c("Accession1", "Accession2"),
18 |                      date = as.Date(c("2023-01-01", "2023-02-01")),
19 |                      description = c("Description1", "Description2"),
20 |                      sequence = c("AGCT", "TCGA"))
21 | 
22 |   # Apply the function
23 |   export_fasta(seqs, temp_file, columns = c("country", "pangolin_lineage", "accession_id", "date"), date_format = "%Y-%m")
24 | 
25 |   # Check if file exists
26 |   expect_true(file.exists(temp_file))
27 | 
28 |   # Read the generated file
29 |   fasta_content <- readLines(temp_file)
30 | 
31 |   # Check the format of the FASTA headers
32 |   fasta_headers <- fasta_content[seq(1, length(fasta_content), 2)]
33 |   expected_headers <- c(">Country1@Lineage1@Accession1@2023-01",
34 |                         ">Country2@Lineage2@Accession2@2023-02")
35 |   expect_equal(fasta_headers, expected_headers)
36 | 
37 |   # Check the sequences
38 |   fasta_sequences <- trimws(fasta_content[seq(2, length(fasta_content), 2)])
39 |   expected_sequences <- c("AGCT", "TCGA")
40 |   expect_equal(fasta_sequences, expected_sequences)
41 | 
42 |   # Remove temporary file
43 |   unlink(temp_file)
44 | })
45 | 


--------------------------------------------------------------------------------
/R/export_fasta.R:
--------------------------------------------------------------------------------
 1 | #' Export Fasta
 2 | #'
 3 | #' @param seqs dataframe containing sequence information, output of function `query`
 4 | #' @param out_file_name output file name
 5 | #' @param export_dated_only Should entries without dates be exported? Set to TRUE to exlude sequences without a date (default)
 6 | #' @param delimiter fasta sequence delimiter. Default `@`
 7 | #' @param data_format Optional, specify date format eg for ISO format use '%Y-%m-%d'
 8 | #' @param columns Specify columns in the input dataframe to use as fasta headers
 9 | #' @return data.frame of complete data
10 | export_fasta <- function(
11 |     seqs,
12 |     out_file_name,
13 |     export_dated_only = T,
14 |     delimiter='@',
15 |     date_format=NULL,
16 |     columns=c('country', 'pangolin_lineage', 'accession_id', 'date')
17 | ){
18 |   if (all(columns %in% names(seqs))) {
19 |     if('date' %in% columns){
20 |       if (is.null(date_format)) {
21 |         dates <- round(lubridate::decimal_date(lubridate::ymd(seqs$date)), 3)
22 |       } else {
23 |         dates <- format(as.Date(seqs$date), date_format)
24 |       }
25 |       if(export_dated_only){
26 |         seqs <- seqs[!is.na(dates), ]
27 |         dates <- dates[!is.na(dates)]
28 |       }
29 |       seqs$date <- dates
30 |     }
31 |     newnames <- do.call(paste, c(lapply(columns, function(x) {seqs[,x]}), sep = delimiter))
32 |   } else {
33 |     missing_columns <- setdiff(columns, names(seqs))
34 |     message(sprintf("Missing columns %s. Defaulting to `description`", missing_columns))
35 |     newnames <- paste(seqs$description, sep = delimiter)
36 |   }
37 | 
38 |   cat('', file = out_file_name)
39 |   for(i in 1:nrow(seqs)){
40 |     cat('>', newnames[i], '\n', sep= '', file = out_file_name, append = T)
41 |     cat(seqs$sequence[i], '\n', file = out_file_name, append = T)
42 |   }
43 | }
44 | 


--------------------------------------------------------------------------------
/tests/testthat/test-download.R:
--------------------------------------------------------------------------------
 1 | username = Sys.getenv("GISAIDR_USERNAME")
 2 | password = Sys.getenv("GISAIDR_PASSWORD")
 3 | credentials <- login(username = username, password = password)
 4 | 
 5 | test_that("download returns data.frame", {
 6 |   df <- query(credentials = credentials, nrows = 2)
 7 |   list_of_accession_ids <- df$accession_id
 8 |   full_df <- download(credentials, list_of_accession_ids)
 9 |   expect_true(is.data.frame(full_df))
10 | })
11 | 
12 | test_that("get sequence works", {
13 |   df <- query(credentials = credentials, nrows = 2)
14 |   list_of_accession_ids <- df$accession_id
15 |   full_df <- download(credentials, list_of_accession_ids, get_sequence=TRUE)
16 |   expect_true(hasName(full_df, "sequence"))
17 | })
18 | 
19 | test_that("download_files works for EpiCoV", {
20 |   df <- query(credentials = credentials, nrows = 2)
21 |   list_of_accession_ids <- df$accession_id
22 |   download_results <- download_files(credentials, list_of_accession_ids, dates_and_location=TRUE, patient_status=TRUE, sequencing_technology=TRUE, augur_input=TRUE)
23 |   dates_and_location <- download_results$dates_and_location
24 |   patient_status <- download_results$patient_status
25 |   sequencing_technology <- download_results$sequencing_technology
26 |   augur_input_metadata <- download_results$augur_input$metadata
27 |   augur_input_sequences <- download_results$augur_input$sequences
28 | 
29 |   download_results <- download_files(credentials, list_of_accession_ids, sequences=TRUE)
30 |   sequences = download_results$sequences
31 | 
32 |   expect_true(
33 |     is.data.frame(dates_and_location) && is.data.frame(patient_status) && is.data.frame(sequencing_technology) && is.data.frame(augur_input_metadata) &&
34 |       !is.null(augur_input_sequences) && !is.null(sequences)
35 |   )
36 | })
37 | 
38 | credentials <- login(username = username, password = password, database="EpiPox")
39 | 
40 | test_that("download_files works for EpiPox", {
41 |   df <- query(credentials = credentials, nrows = 2)
42 |   list_of_accession_ids <- df$accession_id
43 |   download_results <- download_files(credentials, list_of_accession_ids, dates_and_location=TRUE, patient_status=TRUE, sequencing_technology=TRUE, sequences=TRUE)
44 |   dates_and_location <- download_results$dates_and_location
45 |   patient_status <- download_results$patient_status
46 |   sequencing_technology <- download_results$sequencing_technology
47 |   sequences = download_results$sequences
48 | 
49 |   expect_true(
50 |     is.data.frame(dates_and_location) && is.data.frame(patient_status) && is.data.frame(sequencing_technology) &&
51 |       !is.null(sequences)
52 |   )
53 | })
54 | 
55 | credentials <- login(username = username, password = password, database="EpiRSV")
56 | 
57 | test_that("download_files works for EpiRSV", {
58 |   df <- query(credentials = credentials, nrows = 2)
59 |   list_of_accession_ids <- df$accession_id
60 |   download_results <- download_files(credentials, list_of_accession_ids, dates_and_location=TRUE, patient_status=TRUE, sequencing_technology=TRUE, sequences=TRUE)
61 |   dates_and_location <- download_results$dates_and_location
62 |   patient_status <- download_results$patient_status
63 |   sequencing_technology <- download_results$sequencing_technology
64 |   sequences = download_results$sequences
65 | 
66 |   expect_true(
67 |     is.data.frame(dates_and_location) && is.data.frame(patient_status) && is.data.frame(sequencing_technology) &&
68 |       !is.null(sequences)
69 |   )
70 | })
71 | 


--------------------------------------------------------------------------------
/R/query.R:
--------------------------------------------------------------------------------
  1 | 
  2 | #' Query GISAID Database
  3 | #'
  4 | #' @param credentials GISAID credentials.
  5 | #' @param text full text search.
  6 | #' @param location search for entries based on geographic location.
  7 | #' @param lineage search for entries based on pango lineage designations.
  8 | #' @param variant search for entries based on variant designation
  9 | #' @param from search from specific collection date.
 10 | #' @param to search to specific collection date.
 11 | #' @param from_subm search from specific submission date.
 12 | #' @param to_subm search to specific submission date.
 13 | #' @param virus_name search for a virus_name.
 14 | #' @param order_by order results by a column.
 15 | #' @param order_asc order_by results in ascending order.
 16 | #' @param start_index page through results.
 17 | #' @param nrows number of results to return.
 18 | #' @param load_all return all results.
 19 | #' @param low_coverage_excl exclude low coverage entries from the results.
 20 | #' @param complete include only complete entries in the results.
 21 | #' @param high_coverage include only high coverage entries in the results.
 22 | #' @param collection_date_complete include only entries with complete in collection date the results.
 23 | #' @param total returns the total number of sequences matching the query.
 24 | #' @param fast returns all of the accession_ids that match the query.
 25 | #' @param aa_substitution_ceid returns all sequences with the selected amino acid mutation
 26 | #' @param nucl_mutation_ceid returns all sequences with the selected nucleotide mutation
 27 | #' @param subtype returns all sequences with the selected subtype
 28 | #' @return data.frame
 29 | query <-
 30 |   function(credentials,
 31 |            text = NULL,
 32 |            location = NULL,
 33 |            lineage = NULL,
 34 |            variant = NULL,
 35 |            from = NULL,
 36 |            from_subm = NULL,
 37 |            to = NULL,
 38 |            to_subm = NULL,
 39 |            virus_name = NULL,
 40 |            order_by = NULL,
 41 |            aa_substitution = NULL,
 42 |            nucl_mutation = NULL,
 43 |            subtype = NULL,
 44 |            order_asc = TRUE,
 45 |            start_index = 0,
 46 |            nrows = 50,
 47 |            load_all = FALSE,
 48 |            low_coverage_excl = FALSE,
 49 |            complete = FALSE,
 50 |            high_coverage = FALSE,
 51 |            collection_date_complete = FALSE,
 52 |            total = FALSE,
 53 |            fast = FALSE) {
 54 | 
 55 |     if (nrows > 50 && !total && !load_all && !fast) {
 56 |       message(paste0("Loading entries in batches..."))
 57 |       batches <- create_batches(start_index = start_index, nrows = nrows)
 58 |       results <- data.frame()
 59 |       for (i in 1:nrow(batches)) {
 60 |         results <- rbind(results, internal_query(
 61 |           credentials = credentials,
 62 |           text = text,
 63 |           location = location,
 64 |           lineage = lineage,
 65 |           variant = variant,
 66 |           from = from,
 67 |           from_subm = from_subm,
 68 |           to = to,
 69 |           to_subm = to_subm,
 70 |           virus_name = virus_name,
 71 |           order_by = order_by,
 72 |           order_asc = order_asc,
 73 |           start_index = batches[i,1],
 74 |           nrows = batches[i,2],
 75 |           low_coverage_excl = low_coverage_excl,
 76 |           complete = complete,
 77 |           high_coverage = high_coverage,
 78 |           collection_date_complete = collection_date_complete,
 79 |           aa_substitution = aa_substitution,
 80 |           nucl_mutation = nucl_mutation,
 81 |           subtype = subtype
 82 |         ))
 83 |       }
 84 |       results <- results[!duplicated(results$accession_id), ] # Remove duplicate entries
 85 |       rownames(results) <- NULL # reset index
 86 |       return(results)
 87 |     } else if (total | load_all | fast) {
 88 |       return(
 89 |         internal_query(
 90 |           credentials = credentials,
 91 |           text = text,
 92 |           location = location,
 93 |           lineage = lineage,
 94 |           variant = variant,
 95 |           from = from,
 96 |           from_subm = from_subm,
 97 |           to = to,
 98 |           to_subm = to_subm,
 99 |           virus_name = virus_name,
100 |           order_by = order_by,
101 |           order_asc = order_asc,
102 |           load_all = load_all,
103 |           low_coverage_excl = low_coverage_excl,
104 |           complete = complete,
105 |           high_coverage = high_coverage,
106 |           collection_date_complete = collection_date_complete,
107 |           aa_substitution = aa_substitution,
108 |           nucl_mutation = nucl_mutation,
109 |           total = total,
110 |           fast = fast,
111 |           subtype = subtype
112 |         )
113 |       )
114 |     } else  {
115 |       return(
116 |         internal_query(
117 |           credentials = credentials,
118 |           text = text,
119 |           location = location,
120 |           lineage = lineage,
121 |           variant = variant,
122 |           from = from,
123 |           from_subm = from_subm,
124 |           to = to,
125 |           to_subm = to_subm,
126 |           virus_name = virus_name,
127 |           order_by = order_by,
128 |           order_asc = order_asc,
129 |           start_index = start_index,
130 |           nrows = nrows,
131 |           low_coverage_excl = low_coverage_excl,
132 |           complete = complete,
133 |           high_coverage = high_coverage,
134 |           collection_date_complete = collection_date_complete,
135 |           aa_substitution = aa_substitution,
136 |           nucl_mutation = nucl_mutation,
137 |           subtype = subtype
138 |         )
139 |       )
140 |     }
141 | 
142 | 
143 |   }
144 | 


--------------------------------------------------------------------------------
/tests/testthat/test-query.R:
--------------------------------------------------------------------------------
  1 | username = Sys.getenv("GISAIDR_USERNAME")
  2 | password = Sys.getenv("GISAIDR_PASSWORD")
  3 | credentials <- login(username = username, password = password)
  4 | # will break if log in fails...
  5 | 
  6 | test_that("basic query works", {
  7 |   df <- query(credentials = credentials)
  8 |   expect_true(is.data.frame(df))
  9 | })
 10 | 
 11 | test_that("can change number of rows", {
 12 |   df <- query(credentials = credentials, nrows = 100, location = "Africa / ...")
 13 |   expect_equal(nrow(df), 100)
 14 | })
 15 | 
 16 | test_that("can change index", {
 17 |   df1 <- query(credentials = credentials,
 18 |                start_index = 0)
 19 |   df2 <- query(credentials = credentials,
 20 |                start_index = 49)
 21 |   expect_true(df2[1, 1] == df1[50, 1])
 22 | })
 23 | 
 24 | test_that("expried session fails", {
 25 |   sid <- credentials$sid
 26 |   credentials$sid = "890C95496CO0CQ007Z8HDSG34GF1JZ3Z"
 27 |   expect_error(query(
 28 |     credentials = credentials
 29 |   ),
 30 |   "The session has expired. Please login again.")
 31 |   credentials$sid <- sid
 32 | })
 33 | 
 34 | test_that("location search works", {
 35 |   df <- query(credentials = credentials,
 36 |               location = 'Australia')
 37 |   expect_true(all(lapply(df$location,
 38 |                          function(x)
 39 |                            grepl("Australia", x, fixed = TRUE))))
 40 | })
 41 | 
 42 | test_that("lineage search works", {
 43 |   df <- query(credentials = credentials,
 44 |               lineage = 'W.1')
 45 |   # need a better way to test this...
 46 |   expect_true(is.data.frame(df))
 47 |   expect_true(nrow(df) == 50)
 48 | })
 49 | 
 50 | test_that("combination search works", {
 51 |   df <- query(credentials = credentials,
 52 |               location = 'Australia',
 53 |               lineage = 'W.1')
 54 |   expect_true(df$accession_id[1] == "EPI_ISL_678350")
 55 | })
 56 | 
 57 | test_that("load all works", {
 58 |   df <- query(credentials = credentials,
 59 |               lineage = 'W.1',
 60 |               load_all = TRUE)
 61 |   expect_true(nrow(df) > 50)
 62 | })
 63 | 
 64 | test_that("date search works", {
 65 |   df <-
 66 |     query(credentials = credentials,
 67 |           from = '2021-04-05',
 68 |           to = '2021-04-05')
 69 |   expect_true(nrow(df) == 50)
 70 |   expect_true(df[10,]$collection_date == "2021-04-05")
 71 | })
 72 | 
 73 | test_that("low_coverage_excl works", {
 74 |   df <- query(credentials = credentials, low_coverage_excl = TRUE)
 75 |   expect_true(nrow(df) == 50)
 76 |   expect_true(length(grep("Long stretches of NNNs", df$information)) == 0)
 77 | })
 78 | 
 79 | test_that("complete works", {
 80 |   df <- query(credentials = credentials, complete = TRUE)
 81 |   expect_true(nrow(df) == 50)
 82 |   expect_true(all(df$length > 29000))
 83 | })
 84 | 
 85 | test_that("submission date search works", {
 86 |   df <-
 87 |     query(credentials = credentials,
 88 |           from_subm = '2021-04-05',
 89 |           to_subm = '2021-04-05')
 90 |   expect_true(nrow(df) == 50)
 91 |   expect_true(all(df$submission_date == "2021-04-05"))
 92 | })
 93 | 
 94 | test_that("collection date complete works", {
 95 |   df <-
 96 |     query(
 97 |       credentials = credentials,
 98 |       lineage = 'BA.1',
 99 |       location = 'Australia',
100 |       collection_date_complete = T
101 |     )
102 |   expect_true(nrow(df) == 50)
103 |   expect_true(all(nchar(df$collection_date) == 10))
104 | })
105 | 
106 | test_that("high coverage works", {
107 |   df <-
108 |     query(credentials = credentials, high_coverage = T)
109 |   expect_true(nrow(df) == 50)
110 |   expect_true(length(grep("warn_sign", df$information)) == 0)
111 | })
112 | 
113 | test_that("total returns total", {
114 |   total <-
115 |     query(credentials = credentials, total=T)
116 |   expect_true(is.numeric(total))
117 | })
118 | 
119 | test_that("variant search works", {
120 |   df <-
121 |     query(credentials = credentials, variant='omicron')
122 |   # variant information is not returned from query or download...
123 |   # need a better way to test this...
124 |   expect_true(is.data.frame(df))
125 |   expect_true(nrow(df) == 50)
126 | })
127 | 
128 | test_that("virus name search works", {
129 |   df <-
130 |     query(credentials = credentials, virus_name='hCoV-19/Ireland/D-BHTEST/2022')
131 |   expect_true(is.data.frame(df))
132 |   expect_true(nrow(df) == 1)
133 |   expect_true(df[,'virus_name'] == 'hCoV-19/Ireland/D-BHTEST/2022')
134 | })
135 | 
136 | test_that("fast works", {
137 |   df <- query(credentials = credentials,
138 |               lineage = 'W.1',
139 |               fast = TRUE)
140 |   expect_true(nrow(df) > 50)
141 | })
142 | 
143 | test_that("order_by works", {
144 |   df <- query(credentials = credentials, order_by = 'submission_date')
145 |   expect_true(df$submission_date[1] == "2020-01-10")
146 | })
147 | 
148 | test_that("aa_substitution works", {
149 |   df <- query(credentials = credentials,
150 |       aa_substitution = 'Spike_E484Q, Spike_H69del, -N_P13L',
151 |       to_subm =  '2023-02-22',
152 |       order_by='submission_date')
153 |   expect_true(is.data.frame(df))
154 |   expect_equal(df$submission_date[1], "2021-01-25")
155 |   expect_equal(df$virus_name[1], "hCoV-19/England/PORT-2E19CC/2021")
156 | })
157 | 
158 | test_that("nucl_mutation works", {
159 |   df <- query(credentials = credentials,
160 |     nucl_mutation = '-T23599G, -C10029T, -C14408T, -A23403G, T22679C, G28881A, A24424T',
161 |     to_subm = '2023-02-22',
162 |     order_by='submission_date')
163 |   expect_true(is.data.frame(df))
164 | 
165 | 
166 |   expect_equal(df$submission_date[1],"2021-12-29")
167 | })
168 | 
169 | test_that("text search works", {
170 |   accession_ids = c("EPI_ISL_17398411", "EPI_ISL_17199001", "EPI_ISL_17409201", "EPI_ISL_17243716")
171 |   df <- query(credentials = credentials, text = paste(accession_ids, collapse = "\n"))
172 |   expect_true(nrow(df) == 4)
173 | })
174 | 
175 | test_that("empty result returns empty df", {
176 |   df <- query(
177 |     credentials = credentials,
178 |     lineage =  "FAKE.1.1.2",
179 |     location = 'Australia',
180 |     fast = TRUE
181 |   )
182 |   expect_true(nrow(df) == 0)
183 | })
184 | 
185 | # -----------------------------------------------------------------------------
186 | # EpiPox
187 | 
188 | credentials <- login(username = username, password = password, database="EpiPox")
189 | # will break if log in fails...
190 | 
191 | test_that("EpiPox basic query works", {
192 |   df <- query(credentials = credentials)
193 |   expect_true(is.data.frame(df))
194 | })
195 | 
196 | test_that("EpiPox lineage search works using clades", {
197 |   df <- query(credentials = credentials, lineage = 'IIb')
198 |   # need a better way to test this...
199 |   expect_true(is.data.frame(df))
200 |   expect_true(nrow(df) == 50)
201 | })
202 | 
203 | test_that("EpiPox lineage search works using lineages", {
204 |   df <- query(credentials = credentials, lineage = 'IIb B.1.20')
205 |   # need a better way to test this...
206 |   expect_true(is.data.frame(df))
207 |   expect_true(nrow(df) == 50)
208 | })
209 | 
210 | test_that("EpiPox high coverage works", {
211 |   df <- query(credentials = credentials, high_coverage = TRUE)
212 |   expect_true(nrow(df) == 50)
213 |   expect_true(length(grep("warn_sign", df$information)) == 0)
214 | })
215 | 
216 | # -----------------------------------------------------------------------------
217 | # EpiRSV
218 | 
219 | credentials <- login(username = username, password = password, database="EpiRSV")
220 | # will break if log in fails...
221 | 
222 | test_that("EpiRSV basic query works", {
223 |   df <- query(credentials = credentials)
224 |   expect_true(is.data.frame(df))
225 | })
226 | 
227 | test_that("EpiRSV subtype search works", {
228 |   total_a <- query(credentials = credentials, subtype="A", total=TRUE)
229 |   total_b <- query(credentials = credentials, subtype="B", total=TRUE)
230 |   expect_true(total_a != total_b)
231 | })
232 | 
233 | test_that("EpiRSV complete works", {
234 |   df <- query(credentials = credentials, complete = TRUE)
235 |   expect_true(nrow(df) == 50)
236 |   expect_true(all(df$length > 15000))
237 | })
238 | 
239 | test_that("EpiRSV high coverage works", {
240 |   df <- query(credentials = credentials, high_coverage = TRUE)
241 |   expect_true(nrow(df) == 50)
242 |   expect_true(length(grep("warn_sign", df$information)) == 0)
243 | })
244 | 


--------------------------------------------------------------------------------
/R/internal_query.R:
--------------------------------------------------------------------------------
  1 | #' Query GISAID Database
  2 | #'
  3 | #' @param credentials GISAID credentials.
  4 | #' @param text full text search.
  5 | #' @param location search for entries based on geographic location.
  6 | #' @param lineage search for entries based on pango lineage designations.
  7 | #' @param variant search for entries based on variant designation
  8 | #' @param from search from specific collection date.
  9 | #' @param to search to specific collection date.
 10 | #' @param from_subm search from specific submission date.
 11 | #' @param to_subm search to specific submission date.
 12 | #' @param virus_name search for a virus_name.
 13 | #' @param order_by order results by a column.
 14 | #' @param order_asc order_by results in ascending order.
 15 | #' @param start_index page through results.
 16 | #' @param nrows number of results to return.
 17 | #' @param load_all return all results.
 18 | #' @param low_coverage_excl exclude low coverage entries from the results.
 19 | #' @param complete include only complete entries in the results.
 20 | #' @param high_coverage include only high coverage entries in the results.
 21 | #' @param collection_date_complete include only entries with complete in collection date the results.
 22 | #' @param total returns the total number of sequences matching the query.
 23 | #' @param fast returns all of the accession_ids that match the query.
 24 | #' @param aa_substitution returns all sequences with the amino acid mutation(s), negative selection by '-' prefix
 25 | #' @param nucl_mutation returns all sequences with the nucleotide mutation(s), negative selection by '-' prefix
 26 | #' @param subtype returns all sequences with the subtype
 27 | #' @return Dataframe.
 28 | internal_query <-
 29 |   function(credentials,
 30 |            text = NULL,
 31 |            location = NULL,
 32 |            lineage = NULL,
 33 |            variant = NULL,
 34 |            from = NULL,
 35 |            from_subm = NULL,
 36 |            to = NULL,
 37 |            to_subm = NULL,
 38 |            virus_name = NULL,
 39 |            order_by = NULL,
 40 |            aa_substitution = NULL,
 41 |            nucl_mutation = NULL,
 42 |            subtype = NULL,
 43 |            order_asc = TRUE,
 44 |            start_index = 0,
 45 |            nrows = 50,
 46 |            load_all = FALSE,
 47 |            low_coverage_excl = FALSE,
 48 |            complete = FALSE,
 49 |            high_coverage = FALSE,
 50 |            collection_date_complete = FALSE,
 51 |            total = FALSE,
 52 |            fast = FALSE
 53 |   ) {
 54 | 
 55 |     df <- tryCatch({
 56 |       queue = list()
 57 | 
 58 |       # Simple Text Filter (EpiCoV only)
 59 |       if (!is.null(text) && credentials$database == "EpiCoV") {
 60 |         new_queue <- create_search_queue(credentials, credentials$text_ceid, text, 'DoSimpleSearch')
 61 |         queue     <- append(queue, new_queue)
 62 |       }
 63 | 
 64 |       # Location Filter (All)
 65 |       if (!is.null(location)) {
 66 |         new_queue <- create_search_queue(credentials, credentials$location_ceid, location, 'FilterChange')
 67 |         queue     <- append(queue, new_queue)
 68 |       }
 69 | 
 70 |       # Collection Date Complete (All)
 71 |       if (collection_date_complete) {
 72 |         new_queue <- create_search_queue(credentials, credentials$collection_date_complete_ceid, list('coldc'), 'FilterChange')
 73 |         queue     <- append(queue, new_queue)
 74 |       }
 75 | 
 76 |       # Lineage (EpiCoV, EpiPox)
 77 |       if (!is.null(lineage)) {
 78 |         if (credentials$database == "EpiCoV"){
 79 |           new_queue <- create_search_queue(credentials, credentials$lineage_ceid, lineage, 'LineageChange')
 80 |           queue     <- append(queue, new_queue)
 81 |         } else if (credentials$database == "EpiPox") {
 82 |           new_queue <- create_search_queue(credentials, credentials$lineage_ceid, lineage, 'FilterChange')
 83 |           queue     <- append(queue, new_queue)
 84 |         }
 85 |       }
 86 | 
 87 |       # Subtype (EpiRSV)
 88 |       if (!is.null(subtype) && credentials$database == "EpiRSV") {
 89 |         new_queue <- create_search_queue(credentials, credentials$subtype_ceid, subtype, 'FilterChange')
 90 |         queue     <- append(queue, new_queue)
 91 |       }
 92 | 
 93 |       # Variant (EpiCoV)
 94 |       if (!is.null(variant) && credentials$database == "EpiCoV") {
 95 |         new_queue <- create_search_queue(credentials, credentials$variant_ceid, variant, 'VariantsChange')
 96 |         queue     <- append(queue, new_queue)
 97 |       }
 98 | 
 99 |       # Virus Name (All)
100 |       if (!is.null(virus_name)) {
101 |         new_queue <- create_search_queue(credentials, credentials$virus_name_ceid, virus_name, 'FilterChange')
102 |         queue     <- append(queue, new_queue)
103 |       }
104 | 
105 |       # From, collection date (All)
106 |       if (!is.null(from)) {
107 |         new_queue <- create_search_queue(credentials, credentials$from_ceid, from, 'FilterChange')
108 |         queue     <- append(queue, new_queue)
109 |       }
110 | 
111 |       # From, submission date (All)
112 |       if (!is.null(from_subm)) {
113 |         new_queue <- create_search_queue(credentials, credentials$from_sub_ceid, from_subm, 'FilterChange')
114 |         queue     <- append(queue, new_queue)
115 |       }
116 | 
117 |       # To, collection date (All)
118 |       if (!is.null(to)) {
119 |         new_queue <- create_search_queue(credentials, credentials$to_ceid, to, 'FilterChange')
120 |         queue     <- append(queue, new_queue)
121 |       }
122 | 
123 |       # To, submission date (All)
124 |       if (!is.null(to_subm)) {
125 |         new_queue <- create_search_queue(credentials, credentials$to_sub_ceid, to_subm, 'FilterChange')
126 |         queue     <- append(queue, new_queue)
127 |       }
128 | 
129 |       # Amino acid changes (all)
130 |       if (!is.null(aa_substitution)) {
131 |         new_queue <- create_search_queue(credentials, credentials$aa_substitution_ceid, aa_substitution, 'FilterChange')
132 |         queue     <- append(queue, new_queue)
133 |       }
134 | 
135 |       # Nucleotide changes (EpiCoV)
136 |       if (!is.null(nucl_mutation) && credentials$database == "EpiCoV") {
137 |         new_queue <- create_search_queue(credentials, credentials$nucl_mutation_ceid, nucl_mutation, 'FilterChange')
138 |         queue     <- append(queue, new_queue)
139 |       }
140 | 
141 |       # Low Coverage Exclude (All)
142 |       if (low_coverage_excl) {
143 |         new_queue <- create_search_queue(credentials, credentials$low_coverage_excl_ceid, list('lowco'), 'FilterChange')
144 |         queue     <- append(queue, new_queue)
145 |       }
146 | 
147 |       # Quality (mixed)
148 |       if (credentials$database == 'EpiCoV') {
149 |         if (complete) {
150 |           new_queue <- create_search_queue(credentials, credentials$complete_ceid, list('complete'), 'FilterChange')
151 |           queue     <- append(queue, new_queue)
152 |         }
153 |         if (high_coverage) {
154 |           new_queue <- create_search_queue(credentials, credentials$highq_ceid, list('highq'), 'FilterChange')
155 |           queue     <- append(queue, new_queue)
156 |         }
157 |       } else {
158 |         quality <- list()
159 | 
160 |         if (complete) {
161 |           quality <- append(quality, 'complete')
162 |         }
163 |         if (high_coverage) {
164 |           quality <- append(quality, 'highq')
165 |         }
166 | 
167 |         if (length(quality) > 0) {
168 |           new_queue <- create_search_queue(credentials, credentials$quality_ceid, quality, 'FilterChange')
169 |           queue     <- append(queue, new_queue)
170 |         }
171 |       }
172 | 
173 |       if (length(queue) > 0) {
174 |         command_queue <- list(queue = queue)
175 |         data <-
176 |           formatDataForRequest(
177 |             sid = credentials$sid,
178 |             wid = credentials$wid,
179 |             pid = credentials$pid,
180 |             queue = command_queue,
181 |             timestamp = timestamp()
182 |           )
183 |         response <- httr::POST(GISAID_URL, httr::add_headers(.headers = headers), body = data)
184 |         response_data <- parseResponse(response)
185 |         log.debug(response_data)
186 |       }
187 | 
188 |       queue = list()
189 | 
190 |       # ordering
191 |       if (!is.null(order_by)) {
192 |         if (credentials$database == 'EpiCoV') {
193 |           order_by = covid_order_by_col_map[[order_by]]
194 |         } else {
195 |           # epipox and epirsv are missing the host column
196 |           order_by = other_order_by_col_map[[order_by]]
197 |         }
198 |         command <- createCommand(
199 |           wid = credentials$wid,
200 |           pid = credentials$pid,
201 |           cid = credentials$query_cid,
202 |           cmd = 'SetSorting',
203 |           params = list(order_by = order_by, order_asc = order_asc)
204 |         )
205 |         queue <- append(queue, list(command))
206 |       }
207 | 
208 |       # pagination
209 |       command <- createCommand(
210 |         wid = credentials$wid,
211 |         pid = credentials$pid,
212 |         cid = credentials$query_cid,
213 |         cmd = 'SetPaginating',
214 |         params = list(start_index = start_index, rows_per_page = nrows)
215 |       )
216 |       queue <- append(queue, list(command))
217 | 
218 |       # get data
219 |       command <- createCommand(
220 |         wid = credentials$wid,
221 |         pid = credentials$pid,
222 |         cid = credentials$query_cid,
223 |         cmd = 'GetData'
224 |       )
225 | 
226 |       queue <- append(queue, list(command))
227 | 
228 |       command_queue <- list(queue = queue)
229 | 
230 |       data <-
231 |         formatDataForRequest(
232 |           sid = credentials$sid,
233 |           wid = credentials$wid,
234 |           pid = credentials$pid,
235 |           queue = command_queue,
236 |           timestamp = timestamp()
237 |         )
238 |       response <- httr::GET(paste0(GISAID_URL, '?', data))
239 |       response_data <- parseResponse(response)
240 | 
241 |       if (total) {
242 |         return(as.numeric(response_data$totalRecords))
243 |       }
244 | 
245 |       if (fast) {
246 |         log.debug(paste0(GISAID_URL, "?sid=", credentials$sid))
247 |         accession_id_count <- response_data$totalRecords
248 |         log.info(paste('Selecting all', accession_id_count, "accession_ids."))
249 |         df <- get_accession_ids(credentials = credentials)
250 |         log.info(paste("Returning", start_index, "-", nrow(df), "of", accession_id_count, "accession_ids."))
251 | 
252 |         if (accession_id_count > nrow(df)) {
253 |           log.warn(paste("Could only get", nrow(df), "accession_ids. Narrow your search."))
254 |         }
255 |         return(df)
256 |       }
257 | 
258 |       if (load_all && response_data$totalRecords > nrows) {
259 |         log.info(paste("Loading all", response_data$totalRecords, "entries."))
260 |         return(
261 |           query(
262 |             credentials = credentials,
263 |             text = text,
264 |             location = location,
265 |             lineage = lineage,
266 |             variant = variant,
267 |             from = from,
268 |             from_subm = from_subm,
269 |             to = to,
270 |             to_subm = to_subm,
271 |             aa_substitution = aa_substitution,
272 |             nucl_mutation = nucl_mutation,
273 |             nrows = response_data$totalRecords,
274 |             # set load_all to false to break the recursion
275 |             load_all = FALSE,
276 |             low_coverage_excl = low_coverage_excl,
277 |             complete = complete,
278 |             high_coverage = high_coverage,
279 |             collection_date_complete = collection_date_complete,
280 |             subtype = subtype
281 |           )
282 |         )
283 |       }
284 |       log.info(paste("Returning", start_index, "-", start_index + response_data$recordsReturned, "of ", response_data$totalRecords, "entries."))
285 |       log.debug(response_data$records)
286 |       if (length(response_data$records) >= 1) {
287 |         df <- data.frame(do.call(rbind, response_data$records))
288 |         df <-
289 |           data.frame(lapply(df, function(col) {
290 |             col[sapply(col, is.null)] <- NA
291 |             unlist(col)
292 |           }))
293 |       } else {
294 |         df <- data.frame(response_data$records)
295 |       }
296 |       df <- setColumnNames(df, credentials$database)
297 |       df <- setDataTypes(df)
298 |     },
299 |     finally = {
300 |       # reset search params
301 |       resetQuery(credentials)
302 |     })
303 |     return(df)
304 |   }
305 | 


--------------------------------------------------------------------------------
/R/login.R:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | #' Login to GISAID
  4 | #'
  5 | #' @param username GISAID username.
  6 | #' @param password GISAID password.
  7 | #' @return credentials used to query GISAID.
  8 | #' @examples
  9 | #' username = Sys.getenv("GISAIDR_USERNAME")
 10 | #' password = Sys.getenv("GISAIDR_PASSWORD")
 11 | #' login(username, password)
 12 | login <- function(username, password, database="EpiCoV") {
 13 |   if (!database %in% c("EpiCoV", "EpiRSV", "EpiPox")) {
 14 |     stop(sprintf("Database must be EpiCoV, EpiRSV or EpiPox (database=%s)", database))
 15 |   }
 16 |   # get a session ID
 17 |   response <- send_request()
 18 |   home_page_text = httr::content(response, as = 'text')
 19 |   session_id <-
 20 |     extract_first_match("name=\"sid\" value='([^']*)", home_page_text)
 21 |   response <- send_request(paste0('sid=', session_id))
 22 | 
 23 |   # Load the home page
 24 |   # TODO: ensure it's the covid-19 page
 25 |   login_page_text = httr::content(response, as = 'text')
 26 | 
 27 |   # extract the other IDs for log in stage 1
 28 |   WID <- extract_first_match('"WID"] = "([^"]*)', login_page_text)
 29 |   login_page_ID <-
 30 |     extract_first_match('PID"] = "([^"]*)', login_page_text)
 31 |   login_component_ID <-
 32 |     extract_first_match("sys.getC\\('(.*)').call\\('doLogin'", login_page_text)
 33 | 
 34 |   # frontend page
 35 |   # create doLogin command
 36 |   doLogin_command <- createCommand(
 37 |     wid = WID,
 38 |     pid = login_page_ID,
 39 |     cid = login_component_ID,
 40 |     cmd = 'doLogin',
 41 |     params = list(login = username, hash = openssl::md5(password))
 42 |   )
 43 | 
 44 |   # add the doLogin command to queue
 45 |   # commands can be built up into a pipeline in the queue
 46 |   queue <- list(queue = list(doLogin_command))
 47 |   data <-
 48 |     formatDataForRequest(session_id, WID, login_page_ID, queue, timestamp())
 49 | 
 50 |   response <- send_request(method = 'POST', data=data)
 51 |   response_data <- parseResponse(response)
 52 | 
 53 |   if (length(grep("^sys.goPage", response_data$responses[[1]]$data)) == 0) {
 54 |     # handle event that the default page is not frontend
 55 |     default_page <- send_request(paste0('sid=', session_id))
 56 |     default_page_text = httr::content(default_page, as = 'text')
 57 | 
 58 |     EpiCov_CID <- extract_first_match("sys.call\\('(.{5,20})','Go'", default_page_text)
 59 | 
 60 |     goto_EpiCov_page_command <- createCommand(
 61 |       wid = WID,
 62 |       pid = login_page_ID,
 63 |       cid = EpiCov_CID,
 64 |       cmd = 'Go',
 65 |       params = list(page = 'corona2020')
 66 |     )
 67 | 
 68 |     queue <- list(queue = list(goto_EpiCov_page_command))
 69 | 
 70 |     data <-
 71 |       formatDataForRequest(session_id, WID, login_page_ID, queue, timestamp())
 72 | 
 73 |     response <- send_request(data)
 74 |     response_data <- parseResponse(response)
 75 |   }
 76 | 
 77 |   frontend_page_ID <- extract_first_match("\\('(.*)')",response_data$responses[[1]]$data)
 78 | 
 79 |   frontend_page <- send_request(paste0('sid=', session_id, '&pid=', frontend_page_ID))
 80 |   frontend_page_text = httr::content(frontend_page, as = 'text')
 81 |   if (grepl('sys.openOverlay', frontend_page_text, fixed = TRUE)) {
 82 |     # extract overlay pid
 83 |     overlay_window_ID <- paste0('wid_', extract_first_match("openOverlay\\('wid_(.{5,20})','pid_.{5,20}'", frontend_page_text))
 84 |     overlay_page_ID <- paste0('pid_', extract_first_match("openOverlay\\('wid_.{5,20}','pid_(.{5,20})'", frontend_page_text))
 85 | 
 86 |     # load overlay
 87 |     overlay_page <- send_request(paste0('sid=', session_id, '&pid=', overlay_page_ID))
 88 |     overlay_page_text <- httr::content(overlay_page, as = 'text')
 89 | 
 90 |     # extract close cid
 91 |     close_overlay_cid <- extract_first_match("createComponent\\('(.{5,20})','CloseButtonComponent", overlay_page_text)
 92 | 
 93 |     # send close cmd
 94 |     close_overlay_command <- createCommand(
 95 |       wid = overlay_window_ID,
 96 |       pid = overlay_page_ID,
 97 |       cid = close_overlay_cid,
 98 |       cmd = 'Back'
 99 |     )
100 |     queue <- list(queue = list(close_overlay_command))
101 |     data <-
102 |       formatDataForRequest(session_id, overlay_window_ID, overlay_page_ID, queue, timestamp())
103 |     response <- send_request(data)
104 |     response_data <- parseResponse(response)
105 |   }
106 | 
107 |   if (database=="EpiRSV") {
108 |     EpiRSV_CID <- extract_first_match("sys.call\\('(.{5,20})','Go'", frontend_page_text)
109 | 
110 |     goto_EpiRSV_page_command <- createCommand(
111 |       wid = WID,
112 |       pid = frontend_page_ID,
113 |       cid = EpiRSV_CID,
114 |       cmd = 'Go',
115 |       params = list(page = 'rsv')
116 |     )
117 | 
118 |     queue <- list(queue = list(goto_EpiRSV_page_command))
119 | 
120 |     data <-
121 |       formatDataForRequest(session_id, WID, login_page_ID, queue, timestamp())
122 | 
123 |     response <- send_request(data)
124 |     response_data <- parseResponse(response)
125 |     RSV_page_ID <-
126 |       extract_first_match("\\('(.*)')",response_data$responses[[1]]$data)
127 |     RSV_page <- send_request(paste0('sid=', session_id, '&pid=', RSV_page_ID))
128 |     RSV_page_text = httr::content(RSV_page, as = 'text')
129 | 
130 |     RSV_actionbar_component_ID <-
131 |       extract_first_match("sys-actionbar-action-ni\" onclick=\"sys.getC\\('([^']*)",
132 |                           RSV_page_text)
133 | 
134 |     response_data <- go_to_page(session_id, WID, RSV_page_ID, RSV_actionbar_component_ID, 'page_rsv.BrowsePage')
135 |     customSearch_page_ID <-
136 |       extract_first_match("\\('(.*)')",response_data$responses[[1]]$data)
137 |   } else if (database=="EpiPox") {
138 |     EpiPox_CID <- extract_first_match("sys.call\\('(.{5,20})','Go'", frontend_page_text)
139 | 
140 |     goto_EpiPox_page_command <- createCommand(
141 |       wid = WID,
142 |       pid = frontend_page_ID,
143 |       cid = EpiPox_CID,
144 |       cmd = 'Go',
145 |       params = list(page = 'mpox')
146 |     )
147 | 
148 |     queue <- list(queue = list(goto_EpiPox_page_command))
149 | 
150 |     data <-
151 |       formatDataForRequest(session_id, WID, login_page_ID, queue, timestamp())
152 | 
153 |     response <- send_request(data)
154 |     response_data <- parseResponse(response)
155 |     POX_page_ID <-
156 |       extract_first_match("\\('(.*)')",response_data$responses[[1]]$data)
157 |     POX_page <- send_request(paste0('sid=', session_id, '&pid=', POX_page_ID))
158 |     POX_page_text = httr::content(POX_page, as = 'text')
159 | 
160 |     POX_actionbar_component_ID <-
161 |       extract_first_match("sys-actionbar-action-ni\" onclick=\"sys.getC\\('([^']*)",
162 |                           POX_page_text)
163 | 
164 |     response_data <- go_to_page(session_id, WID, POX_page_ID, POX_actionbar_component_ID, 'page_mpox.BrowsePage')
165 |     customSearch_page_ID <-
166 |       extract_first_match("\\('(.*)')",response_data$responses[[1]]$data)
167 |   } else {
168 |     # check for overlay
169 | 
170 |     COVID_actionbar_component_ID <-
171 |       extract_first_match("sys-actionbar-action-ni\" onclick=\"sys.getC\\('([^']*)",
172 |                           frontend_page_text)
173 |     response_data <- go_to_page(session_id, WID, frontend_page_ID, COVID_actionbar_component_ID, 'page_corona2020.Corona2020BrowsePage')
174 |     customSearch_page_ID <-
175 |       extract_first_match("\\('(.*)')",response_data$responses[[1]]$data)
176 |   }
177 |   customSearch_page_response <- send_request(paste0('sid=', session_id, '&pid=', customSearch_page_ID))
178 |   customSearch_page_text = httr::content(customSearch_page_response, as = 'text')
179 | 
180 |   query_cid <- extract_first_match("div class=\"sys-datatable.{0,30}\" id=\"(.{5,20})_table", customSearch_page_text)
181 | 
182 |   #selectAll_ceid <- extract_first_match("onclick=\"sys.getC\\(\"(.{5,20})\"\\).selectAll", customSearch_page_text)
183 | 
184 |     # Search
185 |   if (database == 'EpiRSV'){
186 |     SearchComponent <- 'SearchComponent'
187 |   } else if (database == 'EpiPox') {
188 |     SearchComponent <- 'SearchComponent'
189 |   } else {
190 |     SearchComponent <- 'Corona2020SearchComponent'
191 |   }
192 |   search_cid <- extract_first_match(sprintf("sys.createComponent\\('(.{5,20})','%s'", SearchComponent), customSearch_page_text)
193 | 
194 |   # Location
195 |   location_ceid <- extract_search_ceid('covv_location', customSearch_page_text)
196 | 
197 |   # Lineage
198 |   if (database == 'EpiCoV'){
199 |     lineage_ceid <- extract_search_ceid('pangolin_lineage', customSearch_page_text)
200 |   } else if (database == 'EpiPox') {
201 |     lineage_ceid <- extract_search_ceid('covsurver_cladelineage', customSearch_page_text)
202 |   } else {
203 |     lineage_ceid <- NULL
204 |   }
205 | 
206 |   # Subtype
207 |   if (database == 'EpiRSV'){
208 |     subtype_ceid <- extract_search_ceid('covv_subtype', customSearch_page_text)
209 |   } else {
210 |     subtype_ceid <- NULL
211 |   }
212 | 
213 |   # Virus Name
214 |   virus_name_ceid <- extract_search_ceid('covv_virus_name', customSearch_page_text)
215 | 
216 |   # From
217 |   from_ceid <- extract_search_ceid('covv_collection_date_from', customSearch_page_text)
218 | 
219 |   # from submission
220 |   from_sub_ceid <- extract_search_ceid('covv_subm_date_from', customSearch_page_text)
221 | 
222 |   # To
223 |   to_ceid <- extract_search_ceid('covv_collection_date_to', customSearch_page_text)
224 | 
225 |   # To submission
226 |   to_sub_ceid <- extract_search_ceid('covv_subm_date_to', customSearch_page_text)
227 | 
228 |   # low_coverage_excl
229 |   if (database != 'EpiPox'){
230 |     low_coverage_excl_ceid <- extract_search_ceid('low_quality', customSearch_page_text)
231 |   } else {
232 |     low_coverage_excl_ceid <- NULL
233 |   }
234 |   if (database == 'EpiCoV'){
235 |     # full text search
236 |     text_ceid <-
237 |       extract_search_ceid("fts", customSearch_page_text)
238 |     # AA substitution/mutation- ", " separated values
239 |     aa_substitution_ceid <- extract_search_ceid('mutation', customSearch_page_text)
240 |     # nucleotide substitution/nuc mutation, ", " separated values
241 |     nucl_mutation_ceid <- extract_search_ceid('nuc_mutation', customSearch_page_text)
242 |     # Highq
243 |     highq_ceid <-
244 |       extract_search_ceid("highq", customSearch_page_text)
245 |     # Complete
246 |     complete_ceid <-
247 |       extract_search_ceid("complete", customSearch_page_text)
248 |     # collection date complete
249 |     collection_date_complete_ceid <-
250 |       extract_search_ceid('coldc', customSearch_page_text)
251 |     # variants
252 |     variant_ceid <-
253 |       extract_search_ceid('variants', customSearch_page_text)
254 |     # quality not used by EpiCov
255 |     quality_ceid <- NULL
256 |   } else {
257 |     text_ceid <- NULL
258 |     aa_substitution_ceid <- extract_search_ceid('mutation', customSearch_page_text)
259 |     nucl_mutation_ceid <- NULL
260 |     variant_ceid <- NULL
261 |     complete_ceid <- NULL
262 |     highq_ceid <- NULL
263 |     # Complete and Highq
264 |     quality_ceid <-
265 |       extract_search_ceid("quality'", customSearch_page_text) # avoid match with quality2
266 |     # collection date complete
267 |     collection_date_complete_ceid <-
268 |       extract_search_ceid('quality2', customSearch_page_text)
269 |   }
270 | 
271 |   # send selection command
272 |   selection_pid_wid <- get_selection_panel(session_id, WID, customSearch_page_ID, query_cid)
273 | 
274 |   #load panel
275 |   # REFACTOR
276 |   selection_page <-
277 |     send_request(paste0('sid=', session_id, '&pid=', selection_pid_wid$pid))
278 | 
279 |   selection_page_text = httr::content(selection_page, as = 'text')
280 | 
281 |   selection_panel_cid <- extract_first_match("onselect=\"sys.getC\\('(.{5,20})')", selection_page_text)
282 |   selection_ceid <- extract_first_match("getFI\\('(.{5,20})').onSelect", selection_page_text)
283 | 
284 |   send_back_cmd(session_id, selection_pid_wid$wid, selection_pid_wid$pid, selection_panel_cid)
285 | 
286 |   # back
287 |   credentials <-
288 |     list(
289 |       database = database,
290 |       pid = customSearch_page_ID,
291 |       sid = session_id,
292 |       wid = WID,
293 |       query_cid = query_cid,
294 |       selection_panel_cid = selection_panel_cid,
295 |       selection_ceid = selection_ceid,
296 |       download_panel_cid = query_cid,
297 |       text_ceid = text_ceid,
298 |       location_ceid = location_ceid,
299 |       search_cid = search_cid,
300 |       aa_substitution_ceid = aa_substitution_ceid,
301 |       nucl_mutation_ceid = nucl_mutation_ceid,
302 |       lineage_ceid = lineage_ceid,
303 |       virus_name_ceid = virus_name_ceid,
304 |       from_ceid = from_ceid,
305 |       from_sub_ceid = from_sub_ceid,
306 |       to_ceid = to_ceid,
307 |       to_sub_ceid = to_sub_ceid,
308 |       low_coverage_excl_ceid = low_coverage_excl_ceid,
309 |       highq_ceid = highq_ceid,
310 |       complete_ceid = complete_ceid,
311 |       collection_date_complete_ceid = collection_date_complete_ceid,
312 |       quality_ceid = quality_ceid,
313 |       variant_ceid = variant_ceid,
314 |       subtype_ceid = subtype_ceid
315 |     )
316 | 
317 |   return(credentials)
318 | }
319 | 


--------------------------------------------------------------------------------
/R/core.R:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | 
  4 | GISAID_URL = "https://www.epicov.org/epi3/frontend"
  5 | 
  6 | headers = c(accept = "application/json, text/javascript, */*; q=0.01",
  7 |             "content-type" = "application/x-www-form-urlencoded; charset=UTF-8")
  8 | 
  9 | timestamp <- function() {
 10 |   return(as.character(as.integer(Sys.time()) * 1000))
 11 | }
 12 | 
 13 | createCommand <-
 14 |   function(wid,
 15 |            pid,
 16 |            cid,
 17 |            cmd,
 18 |            params = setNames(list(), character(0)),
 19 |            equiv = NULL) {
 20 |     ev = list(
 21 |       wid = wid,
 22 |       pid = pid,
 23 |       cid = cid,
 24 |       cmd = cmd,
 25 |       params = params,
 26 |       equiv = equiv
 27 |     )
 28 |     return(ev)
 29 |   }
 30 | 
 31 | formatDataForRequest <-
 32 |   function(sid, wid, pid, queue, timestamp, mode = 'ajax') {
 33 |     data <- paste0(
 34 |       "sid=",
 35 |       sid,
 36 |       "&wid=",
 37 |       wid,
 38 |       "&pid=",
 39 |       pid,
 40 |       "&data=",
 41 |       utils::URLencode(rjson::toJSON(queue), reserved = TRUE),
 42 |       "&ts=",
 43 |       timestamp,
 44 |       "&mode=",
 45 |       mode
 46 |     )
 47 |     return(data)
 48 |   }
 49 | 
 50 | parseResponse <- function(res) {
 51 |   j = httr::content(res, as = 'parsed')
 52 |   if (length(j$responses) == 0 & length(j) == 2) {
 53 |     warning("There was an error please see: https://github.com/Wytamma/GISAIDR/issues/1")
 54 |     stop("Error! Please login again.")
 55 |   }
 56 |   if (isTRUE(grep('Error', j$responses[[1]]$data) == 1)) {
 57 |     warning(utils::URLdecode(strsplit(j$responses[[1]]$data, '"')[[1]][2]))
 58 |     stop("Internal server Error.")
 59 |   }
 60 |   if (isTRUE(grep('expired', j$responses[[1]]$data) == 1)) {
 61 |     stop("The session has expired. Please login again.")
 62 |   }
 63 |   if (isTRUE(grep('password', j$responses[[1]]$data) == 1)) {
 64 |     # make a better check
 65 |     stop("Username or password wrong!")
 66 |   }
 67 |   if (isTRUE(grep('No data.', j$responses[[1]]$data) == 1)) {
 68 |     # make a better check
 69 |     stop("No data found.")
 70 |   }
 71 |   if (isTRUE(grep('You have no access to this page', j$responses[[1]]$data) == 1)) {
 72 |     stop("You have don't have access to this page!")
 73 |   }
 74 |   return(j)
 75 | 
 76 | }
 77 | 
 78 | 
 79 | 
 80 | get_accession_ids <- function(credentials) {
 81 |   # select all check box
 82 |   #onclick="sys.getC("c_rfsc9v_w9").selectAll(this)"
 83 |   queue = list()
 84 |   # data: {"queue":[{"wid":"wid_rfsc9v_2ktt","pid":"pid_rfsc9v_2kwd","cid":"c_rfsc9v_w9","cmd":"CallAsync","params":{"col_name":"c","checked":true,"_async_cmd":"SelectAll"},"equiv":null}]}
 85 |   command <- createCommand(
 86 |     wid = credentials$wid,
 87 |     pid = credentials$pid,
 88 |     cid = credentials$query_cid,
 89 |     cmd = 'CallAsync',
 90 |     params = list(col_name = 'c', checked = TRUE, '_async_cmd' = 'SelectAll')
 91 |   )
 92 |   queue <- append(queue, list(command))
 93 |   command_queue <- list(queue = queue)
 94 | 
 95 |   data <-
 96 |     formatDataForRequest(
 97 |       sid = credentials$sid,
 98 |       wid = credentials$wid,
 99 |       pid = credentials$pid,
100 |       queue = command_queue,
101 |       timestamp = timestamp()
102 |     )
103 |   res <-
104 |     httr::POST(GISAID_URL, httr::add_headers(.headers = headers), body = data)
105 |   j = httr::content(res, as = 'parsed')
106 | 
107 |   # {"callback_response": {"msg": null, "async_id": "_rfsc9v_2o8a"}, "__ready__": true}
108 |   # wait for selection
109 |   # extract check_async
110 |   check_async_id = j$callback_response$async_id
111 |   # while generateDownloadDone not ready
112 |   is_ready = FALSE
113 |   while (!is_ready) {
114 |     res <- httr::GET(paste0('https://www.epicov.org/epi3/check_async/', check_async_id, '?_=', timestamp()))
115 |     j <- parseResponse(res)
116 |     is_ready <- j$is_ready
117 |     if (!is_ready) {
118 |       Sys.sleep(1)
119 |     }
120 |   }
121 |   log.debug(j)
122 | 
123 |   # select button
124 |   selection_pid_wid <- get_selection_panel(credentials$sid, credentials$wid, credentials$pid, credentials$query_cid)
125 |   selection_page <-
126 |     send_request(paste0('sid=', credentials$sid, '&pid=', selection_pid_wid$pid))
127 | 
128 |   # csv button
129 |   #{"queue":[{"wid":"wid_rfsc9v_2p1c","pid":"pid_rfsc9v_2p1d","cid":"c_rfsc9v_15u","cmd":"Download","params":{},"equiv":null}]}
130 |   queue = list()
131 |   command <- createCommand(
132 |     wid = selection_pid_wid$wid,
133 |     pid = selection_pid_wid$pid,
134 |     cid = credentials$selection_panel_cid,
135 |     cmd = 'Download',
136 |     params = setNames(list(), character(0))
137 |   )
138 |   queue <- append(queue, list(command))
139 |   command_queue <- list(queue = queue)
140 | 
141 |   data <-
142 |     formatDataForRequest(
143 |       sid = credentials$sid,
144 |       wid = credentials$wid,
145 |       pid = credentials$pid,
146 |       queue = command_queue,
147 |       timestamp = timestamp()
148 |     )
149 |   res <-
150 |     httr::POST(GISAID_URL, httr::add_headers(.headers = headers), body = data)
151 |   j = httr::content(res, as = 'parsed')
152 |   url <- extract_first_match("sys.downloadFile\\(\"(.*)\",", j$responses[[1]]$data)
153 |   log.debug(paste0('https://www.epicov.org/', url))
154 |   tryCatch(
155 |     df <- read.csv(paste0('https://www.epicov.org/', url), header=F, col.names = c('accession_id')),
156 |     error = function(e) df <- data.frame(col.names = c('accession_id'))
157 |   )
158 |   # back
159 |   send_back_cmd(credentials$sid, selection_pid_wid$wid, selection_pid_wid$pid, credentials$selection_panel_cid)
160 |   resetQuery(credentials)
161 |   return(df)
162 | }
163 | 
164 | get_selection_panel <- function(session_id, WID, customSearch_page_ID, query_cid) {
165 |   # selection changes every time you open it
166 |   selection_command <- createCommand(
167 |     wid = WID,
168 |     pid = customSearch_page_ID,
169 |     cid = query_cid,
170 |     cmd = 'Selection',
171 |     params = setNames(list(), character(0)) #hack for empty {}
172 |   )
173 |   queue <- list(queue = list(selection_command))
174 | 
175 |   data <-
176 |     formatDataForRequest(session_id, WID, customSearch_page_ID, queue, timestamp())
177 | 
178 |   response <-
179 |     send_request(method='POST', data=data)
180 | 
181 |   response_data <- parseResponse(response)
182 |   # extract PID
183 |   # selection changes every time
184 |   selection_pid <-
185 |     strsplit(response_data$responses[[1]]$data, "'")[[1]][4]
186 |   selection_wid <-
187 |     strsplit(response_data$responses[[1]]$data, "'")[[1]][2]
188 |   log.debug(sprintf("get_selection_panel (response_data): %s", response_data))
189 |   list(pid=selection_pid, wid=selection_wid)
190 | }
191 | 
192 | get_download_panel <- function(session_id, WID, customSearch_page_ID, query_cid) {
193 |   # selection changes every time you open it
194 |   selection_command <- createCommand(
195 |     wid = WID,
196 |     pid = customSearch_page_ID,
197 |     cid = query_cid,
198 |     cmd = 'DownloadAllSequences',
199 |     params = setNames(list(), character(0)) #hack for empty {}
200 |   )
201 |   queue <- list(queue = list(selection_command))
202 | 
203 |   data <-
204 |     formatDataForRequest(session_id, WID, customSearch_page_ID, queue, timestamp())
205 | 
206 |   response <-
207 |     send_request(method='POST', data=data)
208 | 
209 |   response_data <- parseResponse(response)
210 |   log.debug(sprintf("get_download_panel_pid_wid (response_data): %s", response_data))
211 |   # extract PID
212 |   # selection changes every time
213 |   download_pid <-
214 |     strsplit(response_data$responses[[1]]$data, "'")[[1]][4]
215 |   download_wid <-
216 |     strsplit(response_data$responses[[1]]$data, "'")[[1]][2]
217 | 
218 |   list(pid=download_pid, wid=download_wid)
219 | }
220 | 
221 | send_back_cmd <- function(session_id, WID, PID, CID ) {
222 |   # send back command to get back to page
223 |   # {"queue":[{"wid":"wid_r8fuui_7jgp","pid":"pid_r8fuui_7jgq","cid":"c_r8fuui_3uj","cmd":"Back","params":{},"equiv":null}]}
224 |   selection_command <- createCommand(
225 |     wid = WID,
226 |     pid = PID,
227 |     cid = CID,
228 |     cmd = 'Back',
229 |     params = setNames(list(), character(0)) #hack for empty {}
230 |   )
231 |   queue <- list(queue = list(selection_command))
232 | 
233 |   data <-
234 |     formatDataForRequest(session_id, WID, PID, queue, timestamp())
235 | 
236 |   response <-
237 |     send_request(method='POST', data=data)
238 | 
239 |   response_data <- parseResponse(response)
240 | }
241 | 
242 | select_entries <- function(credentials, list_of_accession_ids) {
243 |   accession_ids_string <- paste(list_of_accession_ids, collapse=", ")
244 | 
245 |   selection_pid_wid <- get_selection_panel(credentials$sid, credentials$wid, credentials$pid, credentials$query_cid)
246 | 
247 |   #load panel
248 |   selection_page <-
249 |     send_request(paste0('sid=', credentials$sid, '&pid=', selection_pid_wid$pid))
250 | 
251 |   ev1 <- createCommand(
252 |     wid = selection_pid_wid$wid,
253 |     pid = selection_pid_wid$pid,
254 |     cid = credentials$selection_panel_cid,
255 |     cmd = 'setTarget',
256 |     params = list(cvalue=accession_ids_string, ceid=credentials$selection_ceid), #hack for empty {}
257 |     equiv = paste0("ST", credentials$selection_ceid)
258 |   )
259 | 
260 |   ev2 <- createCommand(
261 |     wid = selection_pid_wid$wid,
262 |     pid = selection_pid_wid$pid,
263 |     cid = credentials$selection_panel_cid,
264 |     cmd = 'ChangeValue',
265 |     params = list(cvalue=accession_ids_string, ceid=credentials$selection_ceid), #hack for empty {}
266 |     equiv = paste0("CV", credentials$selection_ceid)
267 |   )
268 |   ev3 <- createCommand(
269 |     wid = selection_pid_wid$wid,
270 |     pid = selection_pid_wid$pid,
271 |     cid = credentials$selection_panel_cid,
272 |     cmd = 'OK',
273 |     params = setNames(list(), character(0)) #hack for empty {}
274 |   )
275 |   json_queue <- list(queue = list(ev1, ev2, ev3))
276 |   data <- formatDataForRequest(credentials$sid, selection_pid_wid$wid, selection_pid_wid$pid, json_queue, timestamp())
277 |   response <-
278 |     send_request(method='POST', data=data)
279 |   response_data <-parseResponse(response)
280 |   log.debug(response_data)
281 |   if (isTRUE(grep('Back', response_data$responses[[2]]$data) == 1)) {
282 |     send_back_cmd(credentials$sid, selection_pid_wid$wid, selection_pid_wid$pid, credentials$selection_panel_cid)
283 |   }
284 | 
285 |   return(response)
286 | }
287 | 
288 | resetQuery <- function(credentials) {
289 |   queue = list()
290 |   command <- createCommand(
291 |     wid = credentials$wid,
292 |     pid = credentials$pid,
293 |     cid = credentials$search_cid,
294 |     cmd = "Reset"
295 |   )
296 |   queue <- append(queue, list(command))
297 |   command_queue <- list(queue = queue)
298 |   data <-
299 |     formatDataForRequest(
300 |       sid = credentials$sid,
301 |       wid = credentials$wid,
302 |       pid = credentials$pid,
303 |       queue = command_queue,
304 |       timestamp = timestamp()
305 |     )
306 |   res <-
307 |     httr::POST(GISAID_URL, httr::add_headers(.headers = headers), body = data)
308 | }
309 | 
310 | extract_search_ceid <- function(identifier, t) {
311 |   regex <- paste0(".createFI\\('(.*)','.*Widget','", identifier)
312 |   log.debug(sprintf("Extracting '%s' from '%s'", regex, substr(t, 0, 30)))
313 |   ceid <-
314 |     regmatches(t,
315 |                regexpr(
316 |                  regex,
317 |                  t,
318 |                  perl = TRUE
319 |                ))
320 |   ceid <- strsplit(ceid, "'")
321 |   tryCatch(
322 |     ceid <- ceid[[1]][length(ceid[[1]]) - 4],
323 |     error = function(e) {
324 |       warning(paste0("Could not extract ", regex, " from ", substr(t, 0, 30)))
325 |       e
326 |     }
327 |   )
328 | 
329 |   return(ceid)
330 | }
331 | 
332 | log.debug <- function(msg) {
333 |   if (Sys.getenv("GISAIDR_DEBUG") == 1) {
334 |     message(msg)
335 |   }
336 |   invisible()
337 | }
338 | 
339 | log.error <- function(msg) {
340 |   message(paste0(Sys.time(), "\tERROR: ", gsub("\n", " ", msg)))
341 |   flush.console()
342 |   invisible()
343 | }
344 | 
345 | log.warn <- function(msg) {
346 |   message(paste0(Sys.time(), "\tWARNING: ", gsub("\n", " ", msg)))
347 |   flush.console()
348 |   invisible()
349 | }
350 | 
351 | log.info <- function(msg, level=1) {
352 |   if (Sys.getenv("GISAIDR_VERBOSITY") != ""){
353 |     verbosity <- Sys.getenv("GISAIDR_VERBOSITY")
354 |   } else {
355 |     verbosity <- 1
356 |   }
357 |   if (verbosity >= level){
358 |     message(paste0(Sys.time(), "\tINFO: ", gsub("\n", " ", msg)))
359 |   }
360 |   flush.console()
361 |   invisible()
362 | }
363 | 
364 | send_request <-
365 |   function(parameter_string = "",
366 |            data = NULL,
367 |            method = 'GET') {
368 |     URL <- paste0(GISAIDR::GISAID_URL, '?', parameter_string)
369 |     if (is.null(data)) {
370 |       data <- ""
371 |     }
372 |     log.debug(sprintf("Sending request:\n Method -> %s\n URL -> %s\n data -> %s", method, URL, data))
373 |     if (method == 'GET') {
374 |       response <- httr::GET(URL)
375 |     } else if (method == 'POST') {
376 |       response <-
377 |         httr::POST(URL, httr::add_headers(.headers = GISAIDR::headers), body = data)
378 |     } else {
379 |       stop(sprintf("Method '%s' not allowed", method))
380 |     }
381 |     if (response$status_code >= 500) {
382 |       warning(sprintf("An error occurred while trying to %s %s", method, URL))
383 |       stop("Server error!")
384 |     }
385 |     response
386 |   }
387 | 
388 | extract_first_match <- function(regex, text) {
389 |   log.debug(sprintf("Extracting '%s' from '%s'", regex, substr(text, 0, 30)))
390 |   matches <- regmatches(text, regexec(regex, text))
391 |   tryCatch(
392 |     return(matches[[1]][[2]]),
393 |     error = function(e) stop(sprintf("Could not extract '%s' from '%s'", regex, substr(text, 0, 100)))
394 |   )
395 | 
396 | }
397 | 
398 | 
399 | go_to_page <- function(session_id, WID, PID, CID, link) {
400 |   go_command <- createCommand(
401 |     wid = WID,
402 |     pid = PID,
403 |     cid = CID,
404 |     cmd = 'Go',
405 |     params = list(link = link)
406 |   )
407 | 
408 |   queue <- list(queue = list(go_command))
409 | 
410 |   data <-
411 |     formatDataForRequest(session_id, WID, PID, queue, timestamp())
412 | 
413 |   response <- send_request(data)
414 |   response_data <- parseResponse(response)
415 |   return(response_data)
416 | }
417 | 
418 | #
419 | # Variants <-
420 | #   list(
421 | #     alpha = 'B.1.1.7 / Q.*',
422 | #     beta = 'B.1.351 / B.1.351.2 / B.1.351.3',
423 | #     gamma = 'P.1 / P.1.*',
424 | #     delta = 'B.1.617.2 / AY.*',
425 | #     epsilon = 'B.1.427 / B.1.429',
426 | #     eta = 'B.1.525',
427 | #     iota = 'B.1.526',
428 | #     kappa = 'B.1.617.1',
429 | #     lambda = 'C.37 / C.37.1',
430 | #     mu = 'B.1.621 / B.1.621.1',
431 | #     omicron = 'B.1.1.529 / BA.*',
432 | #     GH_490R = 'B.1.640 / B.1.640.*'
433 | #   )
434 | 
435 | 
436 | create_batches <- function(start_index, nrows, batch_size=50) {
437 |   batches <- cbind(
438 |       seq(0,nrows,batch_size),
439 |       c(seq(batch_size,nrows,batch_size),
440 |         nrows)
441 |   )
442 |   batches <- batches + start_index
443 |   if (batches[nrow(batches),1] - batches[nrow(batches),2] == 0) {
444 |     batches <- head(batches, -1)
445 |   }
446 |   #colnames(batches) <- c('start_index', 'nrows')
447 |   return (batches)
448 | }
449 | 
450 | covid_order_by_col_map <-
451 |   list(
452 |     id = "b",
453 |     virus_name = "d",
454 |     passage_details_history = "e",
455 |     accession_id = "f",
456 |     collection_date = "g",
457 |     submission_date = "h",
458 |     information = "i",
459 |     length = "j",
460 |     host = "k",
461 |     location = "l",
462 |     originating_lab = "m",
463 |     submitting_lab = "n"
464 |   )
465 | 
466 | other_order_by_col_map <-
467 |   list(
468 |     id = "b",
469 |     virus_name = "d",
470 |     passage_details_history = "e",
471 |     accession_id = "f",
472 |     collection_date = "g",
473 |     submission_date = "h",
474 |     information = "i",
475 |     length = "j",
476 |     location = "k",
477 |     originating_lab = "l",
478 |     submitting_lab = "m"
479 |   )
480 | 
481 | 
482 | setColumnNames <- function(df, database) {
483 |   if (database == 'EpiRSV') {
484 |     names(df)[names(df) == "b"] <- "id"
485 |     names(df)[names(df) == "d"] <- "virus_name"
486 |     names(df)[names(df) == "e"] <- "passage_details_history"
487 |     names(df)[names(df) == "f"] <- "accession_id"
488 |     names(df)[names(df) == "g"] <- "collection_date"
489 |     names(df)[names(df) == "h"] <- "submission_date"
490 |     names(df)[names(df) == "i"] <- "information"
491 |     names(df)[names(df) == "j"] <- "length"
492 |     names(df)[names(df) == "k"] <- "location"
493 |     names(df)[names(df) == "l"] <- "originating_lab"
494 |     names(df)[names(df) == "m"] <- "submitting_lab"
495 |   } else if (database == 'EpiPox') {
496 |     names(df)[names(df) == "b"] <- "id"
497 |     names(df)[names(df) == "d"] <- "virus_name"
498 |     names(df)[names(df) == "e"] <- "passage_details_history"
499 |     names(df)[names(df) == "f"] <- "accession_id"
500 |     names(df)[names(df) == "g"] <- "collection_date"
501 |     names(df)[names(df) == "h"] <- "submission_date"
502 |     names(df)[names(df) == "i"] <- "information"
503 |     names(df)[names(df) == "j"] <- "length"
504 |     names(df)[names(df) == "k"] <- "location"
505 |     names(df)[names(df) == "l"] <- "originating_lab"
506 |     names(df)[names(df) == "m"] <- "submitting_lab"
507 |   } else {
508 |     colnames(df)[colnames(df) %in% c("b", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n")] <-
509 |       c(
510 |         "id",
511 |         "virus_name",
512 |         "passage_details_history",
513 |         "accession_id",
514 |         "collection_date",
515 |         "submission_date",
516 |         "information",
517 |         "length",
518 |         "host",
519 |         "location",
520 |         "originating_lab",
521 |         "submitting_lab"
522 |       )
523 |   }
524 |   return(df)
525 | }
526 | 
527 | setDataTypes <- function(df) {
528 |   # date
529 |   return(df)
530 | }
531 | 
532 | create_search_queue <- function(credentials, ceid, cvalue, cmd) {
533 |   queue = list()
534 |   command <- createCommand(
535 |     wid = credentials$wid,
536 |     pid = credentials$pid,
537 |     cid = credentials$search_cid,
538 |     cmd = 'setTarget',
539 |     params = list(cvalue = cvalue, ceid = ceid),
540 |     equiv = paste0('ST', ceid)
541 |   )
542 |   queue <- append(queue, list(command))
543 | 
544 |   command <- createCommand(
545 |     wid = credentials$wid,
546 |     pid = credentials$pid,
547 |     cid = credentials$search_cid,
548 |     cmd = 'ChangeValue',
549 |     params = list(cvalue = cvalue, ceid = ceid),
550 |     equiv = paste0('CV', ceid)
551 |   )
552 | 
553 |   queue <- append(queue, list(command))
554 | 
555 |   command <- createCommand(
556 |     wid = credentials$wid,
557 |     pid = credentials$pid,
558 |     cid = credentials$search_cid,
559 |     cmd = cmd,
560 |     params = list(ceid = ceid),
561 |   )
562 | 
563 |   queue <- append(queue, list(command))
564 | 
565 |   return(queue)
566 | }
567 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # GISAIDR
  2 | 
  3 | [![Build](https://github.com/Wytamma/GISAIDR/actions/workflows/r.yml/badge.svg)](https://github.com/Wytamma/GISAIDR/actions/workflows/r.yml) 
  4 | [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5363500.svg)](https://doi.org/10.5281/zenodo.5363500)
  5 | 
  6 | Programmatically interact with the GISAID EpiCoV, EpiPox, and EpiRSV databases.
  7 | 
  8 | > [!TIP]
  9 | > Please consider moving your research focus to an open pathogen on [pathoplexus.org](https://pathoplexus.org/).
 10 | 
 11 | 
 12 | ## Citation
 13 | 
 14 | If you use GISAIDR in your research please cite as:
 15 | 
 16 | Wytamma Wirth, & Sebastian Duchene. (2022). GISAIDR: Programmatically interact with the GISAID databases. Zenodo. <https://doi.org/10.5281/zenodo.6474693>
 17 | 
 18 | ## Installation
 19 | 
 20 | Install from github using `devtools`.
 21 | 
 22 | ``` r
 23 | install.packages("devtools") # if you have not installed "devtools" package
 24 | devtools::install_github("Wytamma/GISAIDR")
 25 | library(GISAIDR)
 26 | ```
 27 | 
 28 | ## Login
 29 | 
 30 | Get `username` and `password` from [GISAID](https://www.epicov.org/).
 31 | 
 32 | ``` r
 33 | username = Sys.getenv("GISAIDR_USERNAME")
 34 | password = Sys.getenv("GISAIDR_PASSWORD")
 35 | ```
 36 | 
 37 | Login and save your credentials (they are used for all future database queries)
 38 | 
 39 | ``` r
 40 | credentials <- login(username = username, password = password)
 41 | ```
 42 | 
 43 | ## Select a database
 44 | 
 45 | The EpiCoV database is selected by default, however, GISAIDR also works with the EpiRSV and EpiPox databases (limited testing).
 46 | 
 47 | ``` r
 48 | credentials <- login(username = username, password = password, database="EpiRSV")
 49 | # or
 50 | credentials <- login(username = username, password = password, database="EpiPox")
 51 | ```
 52 | 
 53 | Note: You need a GISAID account with access to EpiRSV and EpiPox.
 54 | 
 55 | ## Get Data
 56 | 
 57 | Query the database with `query()` using your credentials
 58 | 
 59 | ``` r
 60 | df <- query(credentials = credentials)
 61 | head(df[0:6])
 62 | ```
 63 | 
 64 | | \#  | id              | virus_name                    | passage_details_history | accession_id    | collection_date | submission_date |
 65 | |-----|-----------------|-------------------------------|-------------------------|-----------------|-----------------|-----------------|
 66 | | 1   | EPI_ISL_1789201 | hCoV-19/USA/IL-S21WGS954/2021 | Original                | EPI_ISL_1789201 | 2021-04-16      | 2021-04-29      |
 67 | | 2   | EPI_ISL_1789200 | hCoV-19/USA/IL-S21WGS885/2021 | Original                | EPI_ISL_1789200 | 2021-04-02      | 2021-04-29      |
 68 | | 3   | EPI_ISL_1789199 | hCoV-19/USA/IL-S21WGS884/2021 | Original                | EPI_ISL_1789199 | 2021-04-12      | 2021-04-29      |
 69 | | 4   | EPI_ISL_1789198 | hCoV-19/USA/IL-S21WGS883/2021 | Original                | EPI_ISL_1789198 | 2021-04-14      | 2021-04-29      |
 70 | | 5   | EPI_ISL_1789197 | hCoV-19/USA/IL-S21WGS882/2021 | Original                | EPI_ISL_1789197 | 2021-04-15      | 2021-04-29      |
 71 | | 6   | EPI_ISL_1789196 | hCoV-19/USA/IL-S21WGS881/2021 | Original                | EPI_ISL_1789196 | 2021-04-13      | 2021-04-29      |
 72 | 
 73 | ### Pagination
 74 | 
 75 | Use `nrows` and `start_index` to page through results. GISAID limits the number of results returned with each request to 50. Internally GISAIDR runs a loop to batch queries with > 50 rows requested. See fast option below.
 76 | 
 77 | ``` r
 78 | df <- query(credentials = credentials, nrows = 1000, start_index = 100)
 79 | nrow(df)
 80 | ```
 81 | 
 82 | [1] 1000
 83 | 
 84 | ### Fast query
 85 | 
 86 | Use `fast` to load all of the accesion_ids that match the query. These accesion_ids can then be used in the `download` function to download up to 5000 sequences at a time. 
 87 | 
 88 | ``` r
 89 | df <- query(
 90 |   credentials = credentials, 
 91 |   location = "Oceania", 
 92 |   from_subm = "2022-07-26", 
 93 |   to_subm = "2022-07-28",
 94 |   fast = TRUE
 95 | )
 96 | head(df$accession_id)
 97 | ```
 98 | Selecting all 484 accession_ids.  
 99 | Returning 0-484 of 484 accession_ids.  
100 | [1] "EPI_ISL_14061265" "EPI_ISL_14061266" "EPI_ISL_14061267" "EPI_ISL_14061268" "EPI_ISL_14061269" "EPI_ISL_14061270"
101 | 
102 | ### Ordering
103 | 
104 | Use `order_by` to order the results or `query` by a column. Use `order_asc` to change the direction of `order_by` (defaults to TRUE).  
105 | 
106 | ``` r
107 | df <- query(credentials = credentials, order_by = 'submission_date')
108 | df$submission_date
109 | ```
110 | [1] "2020-01-10" "2020-01-10" "2020-01-11" "2020-01-11" "2020-01-11" "2020-01-12" "2020-01-14"  
111 | [8] "2020-01-14" "2020-01-14" "2020-01-14" "2020-01-16" "2020-01-17" "2020-01-17" ...
112 | 
113 | ### Full text search
114 | 
115 | Use `text` for full text search.
116 | 
117 | ``` r
118 | accession_ids = c("EPI_ISL_17398411", "EPI_ISL_17199001", "EPI_ISL_17409201", "EPI_ISL_17243716")
119 | df <- query(credentials = credentials, text = paste(accession_ids, collapse = "\n"))
120 | > df$accession_id
121 | ```
122 | [1] "EPI_ISL_17199001" "EPI_ISL_17243716" "EPI_ISL_17398411" "EPI_ISL_17409201"
123 | 
124 | ### Search by location
125 | 
126 | Use `location` to search for entries based on geographic location.
127 | 
128 | ``` r
129 | df <- query(credentials = credentials, location = 'Australia')
130 | df$location
131 | ```
132 | 
133 | [1] "Oceania / Australia / Western Australia" "Oceania / Australia / Queensland"  
134 | [3] "Oceania / Australia / Queensland" "Oceania / Australia / Queensland"  
135 | [5] "Oceania / Australia / Western Australia" ...
136 | 
137 | A list of GISAID locations (not complete) can be found in [GISAID_LOCATIONS.txt](https://github.com/Wytamma/GISAIDR/blob/master/GISAID_LOCATIONS.txt). The location search is hierarchical e.g. querying for 'Africa / ...' will return all the regions within Africa, while querying for 'Africa / Angola / ...' will only return the regions in Angola. Region can be further subdivided by specifying more levels e.g. 'North America / USA / Alabama / Butler County'. The search uses pattern matching and does not have to follow the hierarchical format above.
138 | 
139 | ### Search by lineage (EpiCoV)
140 | 
141 | Use `lineage` to search for entries based on pango lineage designations.
142 | 
143 | ``` r
144 | df <- query(credentials = credentials, lineage = 'B.1.1.7')
145 | full_df <- download(credentials = credentials, list_of_accession_ids = df$accession_id)  # see below for download() info.
146 | full_df$pangolin_lineage
147 | ```
148 | 
149 | [1] "B.1.1.7" "B.1.1.7" "B.1.1.7" "B.1.1.7" "B.1.1.7" "B.1.1.7" "B.1.1.7" "B.1.1.7" "B.1.1.7" "B.1.1.7"  
150 | [11] "B.1.1.7" "B.1.1.7" "B.1.1.7" "B.1.1.7" "B.1.1.7" "B.1.1.7" "B.1.1.7" "B.1.1.7" "B.1.1.7" "B.1.1.7"  
151 | [21] ...
152 | 
153 | ### Search by Variant (EpiCoV)
154 | 
155 | Variants can be queried by name e.g. 'omicron', 'gh/490r', 'delta', 'alpha', 'beta', 'gamma', 'lambda', or 'mu'. Unfortunately GISAID doesn't return the variant designation from the query or download so variants must be confirmed with pangolin_lineage or GISAID_clade.
156 | 
157 | ``` r
158 | # VOC Omicron GRA (B.1.1.529+BA.*) first detected in Botswana/Hong Kong/South Africa
159 | omicron_df <- query(credentials = credentials, variant = 'omicron')
160 | omicron_full_df <- download(credentials = credentials, list_of_accession_ids = omicron_df$accession_id)
161 | omicron_full_df$pangolin_lineage
162 | ```
163 | 
164 | [1] "BA.2" "BA.2" "BA.2.10.1" "BA.2" "BA.2" "BA.5" "BA.2" "BA.2" "BA.2.3"  
165 | [10] "BA.4" "BA.2" "BA.2" "BA.2" "BA.2" "BA.2" "BA.2" "BA.2" "BA.1.17"  
166 | [19] ...
167 | 
168 | ### Search by collection date
169 | 
170 | Use `from` and `to` to search for entries from specific dates.
171 | 
172 | ``` r
173 | df <- query(credentials = credentials, from = '2021-04-05', to = '2021-04-06')
174 | df$collection_date
175 | ```
176 | 
177 | [1] "2021-04-05" "2021-04-05" "2021-04-05" "2021-04-05" "2021-04-05" "2021-04-05" "2021-04-05"  
178 | [8] "2021-04-05" "2021-04-05" "2021-04-05" "2021-04-05" "2021-04-05" "2021-04-05" "2021-04-05"  
179 | [15] ...
180 | 
181 | ### Search by submission date
182 | 
183 | Use `from_subm` and `to_subm` to search for entries from specific dates.
184 | 
185 | ``` r
186 | df <- query(credentials = credentials, from_subm = '2021-04-05', to_subm = '2021-04-05')
187 | df$submission_date
188 | ```
189 | 
190 | [1] "2021-04-05" "2021-04-05" "2021-04-05" "2021-04-05" "2021-04-05" "2021-04-05" "2021-04-05"  
191 | [8] "2021-04-05" "2021-04-05" "2021-04-05" "2021-04-05" "2021-04-05" "2021-04-05" "2021-04-05"  
192 | [15] ...
193 | 
194 | ### Search by virus name
195 | 
196 | Use `virus_name` to search for entries using the virus name.
197 | 
198 | ``` r
199 | df <- query(credentials = credentials, virus_name="hCoV-19/Ireland/D-BHTEST/2022")
200 | df$virus_name
201 | ```
202 | 
203 | [1] "hCoV-19/Ireland/D-BHTEST/2022"
204 | 
205 | Search for multiple virus names using a list.
206 | 
207 | ```
208 | virus_names <- list("hCoV-19/Ireland/KY-Enfer-230922004_A4/2022", "hCoV-19/Ireland/CO-Enfer-240922010_E9/2022")
209 | df <- query(credentials = credentials, virus_name=virus_names)
210 | df$virus_name
211 | ```
212 | [1] "hCoV-19/Ireland/CO-Enfer-240922010_E9/2022" "hCoV-19/Ireland/KY-Enfer-230922004_A4/2022"
213 | 
214 | You can also match parts of the virus name e.g.
215 | 
216 | ```
217 | df <- query(credentials = credentials, virus_name="hCoV-19/Ireland")
218 | df$virus_name
219 | ```
220 | [1] "hCoV-19/Ireland/KY-Enfer-260922007_C6/2022"  "hCoV-19/Ireland/KY-Enfer-260922007_C4/2022"   
221 | [3] "hCoV-19/Ireland/KY-Enfer-260922007_C2/2022"  "hCoV-19/Ireland/KY-Enfer-260922007_C10/2022"  
222 | [5] "hCoV-19/Ireland/KY-Enfer-260922007_C1/2022"  "hCoV-19/Ireland/CO-Enfer-260922007_B7/2022"...  
223 | 
224 | ### Search by AA Substitutions and Nucleotide Mutations
225 | 
226 | Use `aa_substitution` and `nucl_mutation` to search for entries using amino acid Substitutions and nucleotide mutations.
227 | 
228 | ``` r
229 | aa_substitution_df <- query(credentials = credentials, aa_substitution = 'Spike_E484Q, Spike_H69del, -N_P13L')
230 | nucl_mutation_df <- query(credentials = credentials, nucl_mutation = '-T23599G, -C10029T')
231 | ```
232 | 
233 | ### Exclude low coverage entries
234 | 
235 | Use `low_coverage_excl` to exclude low coverage entries from the results.
236 | 
237 | ``` r
238 | df <- query(credentials = credentials, low_coverage_excl = TRUE)
239 | grep("Long stretches of NNNs", df$information)
240 | ```
241 | 
242 | integer(0)
243 | 
244 | ### Include only complete entries
245 | 
246 | GISAID considers genomes \>29,000 nt as complete. Use `complete` to include only complete entries in the results.
247 | 
248 | ``` r
249 | df <- query(credentials = credentials, complete = TRUE)
250 | all(df$length > 29000)
251 | ```
252 | 
253 | [1] TRUE
254 | 
255 | ### Include only high coverage entries
256 | 
257 | GISAID considers genomes with \<1% Ns and \<0.05% unique amino acid mutations as high coverage . Use `high_coverage` to include only high coverage entries in the results.
258 | 
259 | ``` r
260 | df <- query(credentials = credentials, high_coverage = TRUE)
261 | length(grep("warn_sign", df$information)) == 0
262 | ```
263 | 
264 | [1] TRUE
265 | 
266 | ### Include only entries with complete collection date
267 | 
268 | Use `collection_date_complete` to include only entries with complete collection date.
269 | 
270 | ``` r
271 | df <- query(credentials = credentials, collection_date_complete = TRUE)
272 | ```
273 | 
274 | ### Load all entries
275 | 
276 | Use `load_all` to get all the entries that match your query without having to specify `nrows`.
277 | 
278 | ``` r
279 | df <- query(credentials = credentials, lineage = 'W.1', load_all = TRUE)
280 | nrow(df)
281 | ```
282 | 
283 | [1] 100
284 | 
285 | Note: you may end up downloading the entire GISAID database if your query is too general.
286 | 
287 | ### Get total query matches
288 | 
289 | Use `total` to get the number of entries that match you query.
290 | 
291 | ``` r
292 | total <- query(credentials = credentials, total = TRUE)
293 | total
294 | ```
295 | 
296 | [1] 10145747
297 | 
298 | ## Download Files
299 | 
300 | The `download_files` function provides flexible downloading of specific data types from GISAID. Unlike the deprecated `download` function, it allows you to selectively download different types of files and returns them in a structured format.
301 | 
302 | ### Basic usage
303 | 
304 | To download specific file types, set the corresponding parameter to `TRUE`:
305 | 
306 | ``` r
307 | df <- query(credentials = credentials, location = "Australia", nrows = 10)
308 | list_of_accession_ids <- df$accession_id
309 | 
310 | # Download dates and location metadata
311 | results <- download_files(
312 |   credentials = credentials,
313 |   list_of_accession_ids = list_of_accession_ids,
314 |   dates_and_location = TRUE
315 | )
316 | head(results$dates_and_location)
317 | ```
318 | 
319 | ### Available download types
320 | 
321 | The function supports the following download types:
322 | 
323 | - `dates_and_location`: Date and Location metadata (TSV format)
324 | - `patient_status`: Patient Status metadata (TSV format) 
325 | - `sequencing_technology`: Sequencing Technology metadata (TSV format)
326 | - `sequences`: Nucleotide sequences (FASTA format)
327 | - `augur_input`: Combined metadata and sequences for Augur analysis (EpiCoV only)
328 | 
329 | ### Download multiple file types
330 | 
331 | You can download multiple file types in a single call:
332 | 
333 | ``` r
334 | results <- download_files(
335 |   credentials = credentials,
336 |   list_of_accession_ids = list_of_accession_ids,
337 |   dates_and_location = TRUE,
338 |   patient_status = TRUE,
339 |   sequences = TRUE
340 | )
341 | 
342 | # Access different data types
343 | location_data <- results$dates_and_location
344 | patient_data <- results$patient_status
345 | sequence_data <- results$sequences
346 | ```
347 | 
348 | ### Augur input format (EpiCoV only)
349 | 
350 | For phylogenetic analysis with Augur, you can download the special `augur_input` format which includes both metadata and sequences:
351 | 
352 | ``` r
353 | results <- download_files(
354 |   credentials = credentials,
355 |   list_of_accession_ids = list_of_accession_ids,
356 |   augur_input = TRUE
357 | )
358 | 
359 | metadata <- results$augur_input$metadata
360 | sequences <- results$augur_input$sequences
361 | ```
362 | 
363 | Note: The `augur_input` option is only available for the EpiCoV database. If you specify both `augur_input` and `sequences`, the standalone `sequences` download will be skipped to avoid redundancy.
364 | 
365 | ### Return format
366 | 
367 | The function returns a named list where each element corresponds to a requested download type:
368 | 
369 | - For metadata types (`dates_and_location`, `patient_status`, `sequencing_technology`): Returns a data frame
370 | - For `sequences`: Returns a character string containing FASTA data
371 | - For `augur_input`: Returns a list with `metadata` (data frame) and `sequences` (character string) elements
372 | 
373 | ### File cleanup
374 | 
375 | By default, temporary files are automatically cleaned up after download. You can preserve them by setting `clean_up = FALSE`:
376 | 
377 | ``` r
378 | results <- download_files(
379 |   credentials = credentials,
380 |   list_of_accession_ids = list_of_accession_ids,
381 |   sequences = TRUE,
382 |   clean_up = FALSE
383 | )
384 | ```
385 | 
386 | Note: A maximum of 5000 sequences can be downloaded at a time.
387 | 
388 | ## Download (deprecated)
389 | 
390 | To download the full data set you need a list of accession IDs (which can be obtained from `query` results). This will also download the sequence data for each entry.
391 | 
392 | ``` r
393 | full_df_with_seq <- download(
394 |     credentials = credentials, 
395 |     list_of_accession_ids = list_of_accession_ids, 
396 | )
397 | full_df_with_seq$sequence
398 | ```
399 | 
400 | [1] "AGATCTGTTCTCTAAACGAACTTTAAAATCT...  
401 | [2] "AGATCTGTTCTCTAAACGAACTTTAAAATCT...  
402 | [3] "AGATCTGTTCTCTAAACGAACTTTAAAATCT...  
403 | ...
404 | 
405 | You can stop GISAIDR from loading the sequence data into the memory by setting get_sequence=FALSE. Note: the sequence data will still be downloaded.
406 | 
407 | ``` r
408 | df <- query(credentials = credentials)
409 | list_of_accession_ids <- df$accession_id
410 | full_df <- download(credentials = credentials, list_of_accession_ids = list_of_accession_ids, get_sequence=FALSE)
411 | colnames(full_df)
412 | ```
413 | 
414 | [1] "strain" "virus" "gisaid_epi_isl" "genbank_accession"  
415 | [5] "date" "region" "country" "division"  
416 | [9] "location" "region_exposure" "country_exposure" "division_exposure"  
417 | [13] "segment" "length" "host" "age"  
418 | [17] "sex" "Nextstrain_clade" "pangolin_lineage" "GISAID_clade"  
419 | [21] "originating_lab" "submitting_lab" "authors" "url"  
420 | [25] "title" "paper_url" "date_submitted" "purpose_of_sequencing"
421 | 
422 | Note: a maximum of 5000 results can be downloaded at a time.
423 | 
424 | ### Export to fasta file
425 | 
426 | Use the export_fasta function to write sequence data to a file in fasta format. The sequence names will be [country\@pango_lineage\@accesion_id\@date](mailto:country@pango_lineage@accesion_id@date), with the date in decimal format (requires the [lubridate](https://cran.r-project.org/web/packages/lubridate/index.html) package). The default is to only export sequences for which a decimal date could be set. To prevent this, use the argument export_dated_only = F.
427 | 
428 | ``` r
429 | export_fasta(full_df_with_seq, out_file_name = 'GISAID_sequences.fasta')
430 | ```
431 | 
432 | Date format (default: decimal year) and delimiter (default: \@) can be set with the date_format and delimiter arguments respectively.
433 | 
434 | ``` r
435 | export_fasta(full_df_with_seq, out_file_name = 'GISAID_sequences.fasta', date_format='%Y-%m-%d', delimiter='|')
436 | ```
437 | 
438 | Use the `columns` argument to choose which columns are included in the export.
439 | 
440 | ``` r
441 | export_fasta(full_df_with_seq, out_file_name = 'GISAID_sequences.fasta', columns = c("accession_id", "country", "pangolin_lineage", "date"))
442 | ```
443 | 
444 | ## Errors
445 | 
446 | GISAIDR relies on the custom selection interface of [gisaid.org](https://www.gisaid.org/). If GISAIDR is giving you errors, first check that it is not gisaid.org producing these errors. We can't do anything to fix errors with gisaid.org.
447 | 
448 | If you have an epiflu account (i.e. you were using GISAID before COVID-19) you may have issues logging in as GISAID may default you to the epiflu database.
449 | 
450 | ## Updating
451 | 
452 | When updating GISAIDR run `detach("package:GISAIDR", unload=TRUE)` first to ensure the update is applied.
453 | 
454 | ## Examples
455 | 
456 | Download all of the 2020 entries from Asia but outside China.
457 | 
458 | ``` r
459 | library(GISAIDR)
460 | 
461 | # country list from GISAID
462 | Asia <- c('Asia / Afghanistan',
463 |  'Asia / Armenia',
464 |  'Asia / Bahrain',
465 |  'Asia / Bangladesh',
466 |  'Asia / Brunei',
467 |  'Asia / Cambodia',
468 |  # 'Asia / China', # remove China
469 |  'Asia / Georgia',
470 |  'Asia / Hong Kong',
471 |  'Asia / India',
472 |  'Asia / Indonesia / ...',
473 |  'Asia / Iran',
474 |  'Asia / Iraq',
475 |  'Asia / Israel',
476 |  'Asia / Japan',
477 |  'Asia / Jordan / ...',
478 |  'Asia / Kazakhstan / ...',
479 |  'Asia / Kuwait',
480 |  'Asia / Lebanon',
481 |  'Asia / Malaysia',
482 |  'Asia / Mongolia',
483 |  'Asia / Myanmar',
484 |  'Asia / Nepal',
485 |  'Asia / Oman / ...',
486 |  'Asia / Pakistan',
487 |  'Asia / Palestine / ...',
488 |  'Asia / Philippines',
489 |  'Asia / Qatar / ...',
490 |  'Asia / Saudi Arabia',
491 |  'Asia / Singapore',
492 |  'Asia / South Korea',
493 |  'Asia / Sri Lanka',
494 |  'Asia / Taiwan',
495 |  'Asia / Thailand',
496 |  'Asia / Timor-Leste',
497 |  'Asia / United Arab Emirates',
498 |  'Asia / Uzbekistan',
499 |  'Asia / Vietnam')
500 | 
501 | credentials <- login(username = username, password = password)
502 | 
503 | asia_not_china_df <- data.frame()
504 | for (country in Asia) {
505 |   print(country)
506 |   df <- query(credentials = credentials, location = country, load_all = TRUE, from = "2020-01-01", to = "2020-12-31")
507 |   asia_not_china_df <- rbind(asia_not_china_df, df)
508 | }
509 | head(asia_not_china_df)
510 | ```
511 | 
512 | ## Dev guide
513 | 
514 | 1.  Go to the search interface on <https://www.epicov.org/epi3/frontend> (EpiCoV \> Seach).
515 | 2.  Right click on the feature you want to add (e.g. the `complete` checkbox) and inspect the source code.
516 | 3.  Find the `ceid` for this element (`<div id="ce_qxos9a_bi">`) e.g. `ce_qxos9a_bi`
517 | 4.  Find the the value of the checkbox element (`<input class="sys-event-hook" name="ce_qxos9a_bi_name" style="vertical-align: middle;" type="checkbox" value="complete">`) e.g. `complete`.
518 | 5.  Search for the id in the page source. In one of the `<script>` header tags you'll find a createFI() function e.g. `this.getForm().createFI('ce_qxos9a_bi','CheckboxWidget','quality'`. The `ceid` is dynamic so you need to use the widget name i.e. `quality` to find the `ceid` dynamically.
519 | 6.  In the GISAIDR `login.R` file add code to extract the `ceid` using the `extract_search_ceid()` function and the widget e.g.
520 | 
521 | ``` r
522 | # Complete 
523 | quality_ceid <- extract_search_ceid("quality'", customSearch_page_text)
524 | ```
525 | 
526 | 1.  Add the extracted `ceid` to the list of `credentials` e.g. `complete_ceid = complete_ceid`
527 | 2.  Add the new argument and default value to **all** `query()` function in `query.R` and `internal_query.R` e.g. `complete = FALSE`.
528 | 3.  Add the new argument and default value to the `(load_all && j$totalRecords > nrows)` load_all recursion loop so all paginations will continue using the argument.
529 | 4.  Create and append a search queue to the main queue if the `complete` argument is used. Create the command using the `create_search_queue()` function. Use the `complete_ceid` for the `ceid` and the checkbox value (identified in step 4) for the `cvalue` e.g.
530 | 
531 | ``` r
532 | if (complete) {
533 |   queue <-
534 |     append(
535 |       queue,
536 |       create_search_queue(credentials,
537 |                           ceid=credentials$complete_ceid,
538 |                           cvalue=list('complete'),
539 |                           cmd='FilterChange')
540 |     )
541 | }
542 | ```
543 | 
544 | 1.  Add the new argument to the recursive load all `query()` call e.g. `complete = complete`
545 | 2.  Add test for new feature in `test-query.R` e.g.
546 | 
547 | ``` r
548 | test_that("complete works", {
549 |   df <- query(credentials = credentials, complete = TRUE)
550 |   expect_true(all(df$length > 29000))
551 | })
552 | ```
553 | 
554 | 1.  Run **all** tests. If tests pass create a PR to merge the new feature.
555 | 
556 | See [this PR](https://github.com/Wytamma/GISAIDR/pull/9/commits/72e0a4456563d03ded0626b7d47e0f4f90d2ef44) for the full details.
557 | 


--------------------------------------------------------------------------------
/R/download.R:
--------------------------------------------------------------------------------
  1 | #' Download from GISAID
  2 | #'
  3 | #' @param credentials GISAID credentials.
  4 | #' @param list_of_accession_ids list of accession_id from GISAID
  5 | #' @param get_sequence load sequences into data.frame after download
  6 | #' @param clean_up delete downloaded files (e.g. fasta files) after download
  7 | #' @return data.frame of complete data
  8 | download <- function(credentials, list_of_accession_ids, get_sequence=TRUE, clean_up=TRUE) {
  9 |   if (length(list_of_accession_ids) > 5000) {
 10 |     stop('Can only download a maxium of 5000 samples at a time.')
 11 |   } else if (length(list_of_accession_ids) == 0) {
 12 |     stop('Select at least one sequence!')
 13 |   }
 14 |   # select
 15 |   message('Selecting entries...')
 16 | 
 17 |   response <- select_entries(credentials = credentials, list_of_accession_ids=list_of_accession_ids)
 18 | 
 19 |   # download_cmd <- 'ToolDownload'
 20 |   # download_pid_wid <- list(pid=credentials$pid, wid=credentials$wid)
 21 |   download_cmd <- 'Download'
 22 |   download_pid_wid <- get_download_panel(credentials$sid, credentials$wid, credentials$pid, credentials$query_cid)
 23 |   #load panel
 24 |   downloal_page <-
 25 |     send_request(paste0('sid=', credentials$sid, '&pid=', download_pid_wid$pid))
 26 | 
 27 |   download_page_text = httr::content(downloal_page, as = 'text')
 28 |   if (isTRUE(grep('captcha', download_page_text) == 1)) {
 29 |     # Manual captcha
 30 |     cat("Captcha required! Please go to the following url, enter the captcha and close the window.\n")
 31 |     cat(paste0(GISAIDR::GISAID_URL, '?', 'sid=', credentials$sid))
 32 |     readline(prompt="Press [enter] to continue once you have completed the captcha")
 33 |     downloal_page <- send_request(paste0('sid=', credentials$sid))
 34 |     download_page_text = httr::content(downloal_page, as = 'text')
 35 | 
 36 |     download_pid_wid$pid  <- extract_first_match(",'(.{5,20})',new Object\\(\\{'window_name", download_page_text)
 37 |     download_pid_wid$wid  <- extract_first_match("sys.openOverlay\\('(.{5,20})','", download_page_text)
 38 | 
 39 |     downloal_page <-
 40 |       send_request(paste0('sid=', credentials$sid, '&pid=', download_pid_wid$pid))
 41 |     download_page_text = httr::content(downloal_page, as = 'text')
 42 | 
 43 |   }
 44 |   if (credentials$database == 'EpiRSV') {
 45 |     credentials$download_panel_cid <- extract_first_match("'(.{5,20})','DownloadSelectionComponent", download_page_text)
 46 |     #send_back_cmd(credentials$sid, download_pid_wid$wid, download_pid_wid$pid, credentials$download_panel_cid)
 47 |   } else if (credentials$database == 'EpiPox') {
 48 |     credentials$download_panel_cid <- extract_first_match("'(.{5,20})','DownloadSelectionComponent", download_page_text)
 49 |   } else {
 50 |     credentials$download_panel_cid <- extract_first_match("'(.{5,20})','DownloadSelectionComponent", download_page_text)
 51 |     radio_button_widget_cid <- extract_first_match("'(.{5,20})','RadiobuttonWidget", download_page_text)
 52 |     # augur
 53 |     queue = list()
 54 |     command <- createCommand(
 55 |       wid = download_pid_wid$wid,
 56 |       pid = download_pid_wid$pid,
 57 |       cid = credentials$download_panel_cid,
 58 |       cmd = 'setTarget',
 59 |       params = list(cvalue="augur_input", ceid=radio_button_widget_cid)
 60 |     )
 61 |     queue <- append(queue, list(command))
 62 |     command <- createCommand(
 63 |       wid = download_pid_wid$wid,
 64 |       pid = download_pid_wid$pid,
 65 |       cid = credentials$download_panel_cid,
 66 |       cmd = 'ChangeValue',
 67 |       params = list(cvalue="augur_input", ceid=radio_button_widget_cid)
 68 |     )
 69 |     queue <- append(queue, list(command))
 70 |     command <- createCommand(
 71 |       wid = download_pid_wid$wid,
 72 |       pid = download_pid_wid$pid,
 73 |       cid = credentials$download_panel_cid,
 74 |       cmd = 'FormatChange',
 75 |       params = list(ceid=radio_button_widget_cid)
 76 |     )
 77 |     queue <- append(queue, list(command))
 78 |     command_queue <- list(queue = queue)
 79 |     data <- formatDataForRequest(credentials$sid, download_pid_wid$wid, download_pid_wid$pid, command_queue, timestamp())
 80 |     response <- send_request(method = 'POST', data=data)
 81 |     response_data <- parseResponse(response)
 82 | 
 83 |     # accept agreement
 84 |     ev <- createCommand(
 85 |       wid = download_pid_wid$wid,
 86 |       pid = download_pid_wid$pid,
 87 |       cid = credentials$download_panel_cid,
 88 |       cmd = "DownloadReminder",
 89 |       params = setNames(list(), character(0)) #hack for empty {}
 90 |     )
 91 |     json_queue <- list(queue = list(ev))
 92 |     data <- formatDataForRequest(credentials$sid, download_pid_wid$wid, download_pid_wid$pid, json_queue, timestamp())
 93 |     res <-
 94 |       httr::POST(GISAID_URL, httr::add_headers(.headers = headers), body = data)
 95 |     j <- parseResponse(res) #
 96 | 
 97 |     download_pid_wid$wid <- extract_first_match("sys.openOverlay\\('(.{5,20})',", j$responses[[3]]$data)
 98 | 
 99 |     download_pid_wid$pid <- extract_first_match(",'(.{5,20})',new Object", j$responses[[3]]$data)
100 | 
101 |     agreement_page <- send_request(paste0('sid=', credentials$sid, '&pid=', download_pid_wid$pid, '&wid=', download_pid_wid$wid, '&mode=page') , method = "POST")
102 |     agreement_page_text = httr::content(agreement_page, as = 'text')
103 |     # extract the cid for download button
104 |     credentials$download_panel_cid <- extract_first_match("'(.{5,20})','Corona2020DownloadReminderButtonsComponent", agreement_page_text)
105 |     # accept the agreement
106 |     agree_check_box_ceid <-  extract_first_match("createFI\\('(.{5,20})','CheckboxWidget'", agreement_page_text)
107 |     queue = list()
108 |     command <- createCommand(
109 |       wid = download_pid_wid$wid,
110 |       pid = download_pid_wid$pid,
111 |       cid = credentials$download_panel_cid,
112 |       cmd = 'setTarget',
113 |       params = list(cvalue=list("agreed"), ceid=agree_check_box_ceid)
114 |     )
115 |     queue <- append(queue, list(command))
116 |     command <- createCommand(
117 |       wid = download_pid_wid$wid,
118 |       pid = download_pid_wid$pid,
119 |       cid = credentials$download_panel_cid,
120 |       cmd = 'ChangeValue',
121 |       params = list(cvalue=list("agreed"), ceid=agree_check_box_ceid)
122 |     )
123 |     queue <- append(queue, list(command))
124 |     command <- createCommand(
125 |       wid = download_pid_wid$wid,
126 |       pid = download_pid_wid$pid,
127 |       cid = credentials$download_panel_cid,
128 |       cmd = 'Agreed',
129 |       params = list(ceid=agree_check_box_ceid)
130 |     )
131 |     queue <- append(queue, list(command))
132 |     command_queue <- list(queue = queue)
133 |     data <- formatDataForRequest(credentials$sid, download_pid_wid$wid, download_pid_wid$pid, command_queue, timestamp())
134 |     response <- send_request(method = 'POST', data=data)
135 |     response_data <- parseResponse(response)
136 |     log.debug(response_data)
137 |   }
138 | 
139 |   ev <- createCommand(
140 |     wid = download_pid_wid$wid,
141 |     pid = download_pid_wid$pid,
142 |     cid = credentials$download_panel_cid,
143 |     cmd = download_cmd,
144 |     params = setNames(list(), character(0)) #hack for empty {}
145 |   )
146 |   json_queue <- list(queue = list(ev))
147 |   data <- formatDataForRequest(credentials$sid, download_pid_wid$wid, download_pid_wid$pid, json_queue, timestamp())
148 |   message('Compressing data. Please wait...')
149 |   res <-
150 |     httr::POST(GISAID_URL, httr::add_headers(.headers = headers), body = data)
151 |   j <- parseResponse(res)
152 |   # extract check_async
153 |   check_async_id = strsplit(j$responses[[1]]$data, "'")[[1]][4]
154 | 
155 |   # while generateDownloadDone not ready
156 |   is_ready = FALSE
157 |   while (!is_ready) {
158 |     res <- httr::GET(paste0('https://www.epicov.org/epi3/check_async/', check_async_id, '?_=', timestamp()))
159 |     j <- parseResponse(res)
160 |     is_ready <- j$is_ready
161 |     if (!is_ready) {
162 |       Sys.sleep(1)
163 |     }
164 |   }
165 |   log.debug(j)
166 |   # get download link
167 |   message('Data ready.')
168 |   ev <- createCommand(
169 |     wid = credentials$wid,
170 |     pid = credentials$pid,
171 |     cid = credentials$query_cid,
172 |     cmd = "generateDownloadDone",
173 |     params = setNames(list(), character(0)) #hack for empty {}
174 |   )
175 |   json_queue <- list(queue = list(ev))
176 |   data <- formatDataForRequest(credentials$sid, credentials$wid, credentials$pid, json_queue, timestamp())
177 |   res <-
178 |     send_request(method = 'POST', data=data)
179 |   j <- parseResponse(res)
180 | 
181 |   # extract download url
182 |   download_url <- paste0("https://www.epicov.org",strsplit(j$responses[[1]]$data, '"')[[1]][2])
183 |   message(paste("Download url:", download_url))
184 |   # download zip
185 |   tryCatch({
186 |     message('Downloading...')
187 | 
188 |     if (credentials$database == "EpiCoV") {
189 |       tmpTarFile <- "gisaidr_data_tmp.tar"
190 |       download.file(download_url, tmpTarFile, quiet = TRUE, method = 'auto', mode = "wb")
191 |       # unzip
192 |       untar(tmpTarFile, exdir="gisaidr_data_tmp", restore_times = FALSE, verbose=FALSE)
193 |       # load into df
194 |       metadataFile <- list.files("gisaidr_data_tmp", pattern = "*.metadata.tsv")[1]
195 |       if (is.na(metadataFile)) {
196 |         message("gisaid_data files:")
197 |         message(list.files("gisaidr_data_tmp"))
198 |         stop("Could not find metadata file.")
199 |       }
200 |       if(.Platform$OS.type == "unix") {
201 |         metadataFilePath <- paste0("gisaidr_data_tmp/", metadataFile)
202 |       } else {
203 |         metadataFilePath <- paste0("gisaidr_data_tmp\\", metadataFile)
204 |       }
205 |       df <- read.csv(metadataFilePath, sep="\t", quote="")
206 |       df <- df[order(df$gisaid_epi_isl, decreasing = TRUE),]
207 |       colnames(df)[3] <- "accession_id"
208 |       if (get_sequence) {
209 |         # join sequence
210 |         sequencesFile <- list.files("gisaidr_data_tmp", pattern = "*.sequences.fasta")[1]
211 |         if (is.na(sequencesFile)) {
212 |           stop("Could not find sequences file.")
213 |         }
214 |         if(.Platform$OS.type == "unix") {
215 |           sequencesFilePath <- paste0("gisaidr_data_tmp/", sequencesFile)
216 |         } else {
217 |           sequencesFilePath <- paste0("gisaidr_data_tmp\\", sequencesFile)
218 |         }
219 |         seq_df <- read_fasta(sequencesFilePath)
220 |         df <- merge(x = df, y = seq_df, by = "strain", all = TRUE)
221 |       }
222 |     } else {
223 |       sequencesFile <- "gisaidr_data_tmp.fasta"
224 |       download.file(download_url, sequencesFile, quiet = TRUE, method = 'auto')
225 |       fdf <- read_fasta(sequencesFile, get_sequence)
226 |       df <- merge(data.frame(do.call('rbind', strsplit(as.character(fdf$strain), '|', fixed=TRUE))), fdf, by = 0)
227 |       df <- df[-c(1)]
228 |       if (get_sequence) {
229 |         names(df) <- c('strain', 'accession_id', 'collection_date', 'description', 'sequence')
230 |       } else {
231 |         names(df) <- c('strain', 'accession_id', 'collection_date', 'description')
232 |       }
233 |     }
234 | 
235 |   }, finally = {
236 |     # clean up
237 |     if (clean_up){
238 |       if (credentials$database == "EpiCoV") {
239 |         if (file.exists(tmpTarFile)) {
240 |           file.remove(tmpTarFile)
241 |         }
242 |         if (file.exists("gisaidr_data_tmp")) {
243 |           unlink("gisaidr_data_tmp", recursive = TRUE)
244 |         }
245 |       } else {
246 |         if (file.exists(sequencesFile)) {
247 |           file.remove(sequencesFile)
248 |         }
249 |       }
250 |     }
251 |   })
252 |   # replace ? with NA
253 |   df[ df == "?" ] <- NA
254 |   return(df)
255 | }
256 | 
257 | #' Download additional files from GISAID.
258 | #'
259 | #' @param credentials GISAID credentials.
260 | #' @param list_of_accession_ids list of accession_id from GISAID
261 | #' @param dates_and_location TRUE if Date and Location metadata should be downloaded
262 | #' @param patient_status TRUE if Patient Status metadata should be downloaded
263 | #' @param sequencing_technology TRUE if Sequencing Technology metadata should be downloaded
264 | #' @param sequences TRUE if Nucleotide sequences should be downloaded
265 | #' @param augur_input TRUE if Augur Input should be downloaded (metadata + sequences)
266 | #' @param clean_up delete downloaded files (e.g. fasta files) after download
267 | #' @return named list of 'metadata' df and/or 'sequences' CHAR str
268 | download_files <- function(
269 |     credentials,
270 |     list_of_accession_ids,
271 |     dates_and_location    = NULL,
272 |     patient_status        = NULL,
273 |     sequencing_technology = NULL,
274 |     sequences             = NULL,
275 |     augur_input           = NULL,
276 |     clean_up              = TRUE
277 | ) {
278 | 
279 |   # Store the possible download type results
280 |   download_results <- list(
281 |     dates_and_location    = dates_and_location,
282 |     patient_status        = patient_status,
283 |     sequencing_technology = sequencing_technology,
284 |     augur_input           = augur_input,
285 |     sequences             = sequences
286 |   )
287 | 
288 |   # ---------------------------------------------------------------------------
289 |   # Select Accession
290 | 
291 |   if (length(list_of_accession_ids) > 5000) {
292 |     stop(log.error("Can only download a maxium of 5000 samples at a time."))
293 |   } else if (length(list_of_accession_ids) == 0) {
294 |     stop(log.error("Select at least one sequence!"))
295 |   }
296 | 
297 |   log.info("Selecting entries.", level=2)
298 |   response <- select_entries(credentials = credentials, list_of_accession_ids=list_of_accession_ids)
299 | 
300 |   # ---------------------------------------------------------------------------
301 |   # Download Type Loop
302 | 
303 |   for (download_type in names(download_results)){
304 |     if (is.null(download_results[[download_type]])) { next }
305 |     if (download_type == "augur_input" && credentials$database != "EpiCoV"){
306 |       stop(log.error("The download type augur_input is only available for EpiCoV"))
307 |     }
308 |     # Downloading both augur_input and sequences is redundant
309 |     if (download_type == "sequences" && !is.null(download_results[["augur_input"]])){
310 |       log.warn("Download types augur_input and sequences are redundant, skipping sequences download.")
311 |     }
312 | 
313 | 
314 |     log.info(paste("Download type:", download_type), level=2)
315 | 
316 |     # ---------------------------------------------------------------------------
317 |     # Open Download Panel
318 | 
319 |     log.info("Opening download panel.", level=2)
320 |     download_cmd       <- 'Download'
321 |     download_pid_wid   <- get_download_panel(credentials$sid, credentials$wid, credentials$pid, credentials$query_cid)
322 |     download_page      <- send_request(paste0('sid=', credentials$sid, '&pid=', download_pid_wid$pid))
323 |     download_page_text <- httr::content(download_page, as = 'text')
324 | 
325 |     if (isTRUE(grep('captcha', download_page_text) == 1)) {
326 |       # Manual Captcha
327 |       cat("Captcha required! Please go to the following url, enter the captcha and close the window.\n")
328 |       cat(paste0(GISAIDR::GISAID_URL, '?', 'sid=', credentials$sid))
329 |       readline(prompt="Press [enter] to continue once you have completed the captcha")
330 |       downloal_page <- send_request(paste0('sid=', credentials$sid))
331 |       download_page_text = httr::content(downloal_page, as = 'text')
332 | 
333 |       download_pid_wid$pid  <- extract_first_match(",'(.{5,20})',new Object\\(\\{'window_name", download_page_text)
334 |       download_pid_wid$wid  <- extract_first_match("sys.openOverlay\\('(.{5,20})','", download_page_text)
335 | 
336 |       downloal_page <-
337 |         send_request(paste0('sid=', credentials$sid, '&pid=', download_pid_wid$pid))
338 |       download_page_text = httr::content(downloal_page, as = 'text')
339 | 
340 |     }
341 | 
342 |     # Identify the download panel CID basedon which database we're in
343 |     if (credentials$database == 'EpiRSV') {
344 |       credentials$download_panel_cid <- extract_first_match("'(.{5,20})','DownloadSelectionComponent", download_page_text)
345 |     } else if (credentials$database == 'EpiPox') {
346 |       credentials$download_panel_cid <- extract_first_match("'(.{5,20})','DownloadSelectionComponent", download_page_text)
347 |     } else {
348 |       credentials$download_panel_cid <- extract_first_match("'(.{5,20})','DownloadSelectionComponent", download_page_text)
349 |     }
350 | 
351 |     # Store separate variables for the initial download (tmpFile) and
352 |     # any subsequent extraction, such as metadataFile and sequencesFile
353 |     # which come from the augur_input option.
354 |     if (download_type == 'dates_and_location'){
355 |       radio_button_value <- 'dateloc'
356 |       tmpFile            <- 'gisaidr_dates_and_location_tmp.tsv'
357 |       metadataFile       <- tmpFile
358 |       sequencesFile      <- NULL
359 |     } else if (download_type == 'patient_status'){
360 |       radio_button_value <- 'meta_epi'
361 |       tmpFile            <- 'gisaidr_patient_status_tmp.tsv'
362 |       metadataFile       <- tmpFile
363 |       sequencesFile      <- NULL
364 |     } else if (download_type == 'sequencing_technology'){
365 |       radio_button_value <- 'meta_tech'
366 |       tmpFile            <- 'gisaidr_sequencing_technology_tmp.tsv'
367 |       metadataFile       <- tmpFile
368 |       sequencesFile      <- NULL
369 |     } else if (download_type == 'sequences'){
370 |       radio_button_value <- 'fasta'
371 |       tmpFile            <- 'gisaidr_sequences_tmp.fasta'
372 |       metadataFile       <- NULL
373 |       sequencesFile      <- tmpFile
374 |     } else if (download_type == 'augur_input'){
375 |       radio_button_value <- 'augur_input'
376 |       tmpFile            <- 'gisaidr_augur_input_tmp.tar'
377 |       metadataFile       <- 'gisaidr_augur_input_metadata_tmp.tsv'
378 |       sequencesFile      <- 'gisaidr_augur_input_sequences_tmp.fasta'
379 |     }
380 | 
381 |     # ---------------------------------------------------------------------------
382 |     # Select Download Type
383 | 
384 |     log.info("Configuring download options.", level=2)
385 |     radio_button_widget_cid <- extract_first_match("'(.{5,20})','RadiobuttonWidget", download_page_text)
386 | 
387 |     queue = list()
388 |     command <- createCommand(
389 |       wid = download_pid_wid$wid,
390 |       pid = download_pid_wid$pid,
391 |       cid = credentials$download_panel_cid,
392 |       cmd = 'setTarget',
393 |       params = list(cvalue=radio_button_value, ceid=radio_button_widget_cid)
394 |     )
395 |     queue <- append(queue, list(command))
396 |     command <- createCommand(
397 |       wid = download_pid_wid$wid,
398 |       pid = download_pid_wid$pid,
399 |       cid = credentials$download_panel_cid,
400 |       cmd = 'ChangeValue',
401 |       params = list(cvalue=radio_button_value, ceid=radio_button_widget_cid)
402 |     )
403 |     queue <- append(queue, list(command))
404 |     command <- createCommand(
405 |       wid = download_pid_wid$wid,
406 |       pid = download_pid_wid$pid,
407 |       cid = credentials$download_panel_cid,
408 |       cmd = 'FormatChange',
409 |       params = list(ceid=radio_button_widget_cid)
410 |     )
411 |     queue         <- append(queue, list(command))
412 |     command_queue <- list(queue = queue)
413 |     data          <- formatDataForRequest(credentials$sid, download_pid_wid$wid, download_pid_wid$pid, command_queue, timestamp())
414 |     response      <- send_request(method = 'POST', data=data)
415 |     response_data <- parseResponse(response)
416 | 
417 |     # ---------------------------------------------------------------------------
418 |     # Accept EpiCoV Agreement
419 | 
420 |     if (credentials$database == 'EpiCoV') {
421 |       log.info("Accepting EpiCoV agreement.", level=2)
422 |       ev <- createCommand(
423 |         wid = download_pid_wid$wid,
424 |         pid = download_pid_wid$pid,
425 |         cid = credentials$download_panel_cid,
426 |         cmd = "DownloadReminder",
427 |         params = setNames(list(), character(0)) #hack for empty {}
428 |       )
429 |       json_queue    <- list(queue = list(ev))
430 |       data          <- formatDataForRequest(credentials$sid, download_pid_wid$wid, download_pid_wid$pid, json_queue, timestamp())
431 |       response      <- httr::POST(GISAID_URL, httr::add_headers(.headers = headers), body = data)
432 |       response_data <- parseResponse(response)
433 | 
434 |       download_pid_wid$wid <- extract_first_match("sys.openOverlay\\('(.{5,20})',", response_data$responses[[3]]$data)
435 |       download_pid_wid$pid <- extract_first_match(",'(.{5,20})',new Object", response_data$responses[[3]]$data)
436 | 
437 |       agreement_page      <- send_request(paste0('sid=', credentials$sid, '&pid=', download_pid_wid$pid, '&wid=', download_pid_wid$wid, '&mode=page') , method = "POST")
438 |       agreement_page_text <- httr::content(agreement_page, as = 'text')
439 |       # extract the cid for download button
440 |       credentials$download_panel_cid <- extract_first_match("'(.{5,20})','Corona2020DownloadReminderButtonsComponent", agreement_page_text)
441 |       # accept the agreement
442 |       agree_check_box_ceid <-  extract_first_match("createFI\\('(.{5,20})','CheckboxWidget'", agreement_page_text)
443 |       queue = list()
444 |       command <- createCommand(
445 |         wid = download_pid_wid$wid,
446 |         pid = download_pid_wid$pid,
447 |         cid = credentials$download_panel_cid,
448 |         cmd = 'setTarget',
449 |         params = list(cvalue=list("agreed"), ceid=agree_check_box_ceid)
450 |       )
451 |       queue <- append(queue, list(command))
452 |       command <- createCommand(
453 |         wid = download_pid_wid$wid,
454 |         pid = download_pid_wid$pid,
455 |         cid = credentials$download_panel_cid,
456 |         cmd = 'ChangeValue',
457 |         params = list(cvalue=list("agreed"), ceid=agree_check_box_ceid)
458 |       )
459 |       queue <- append(queue, list(command))
460 |       command <- createCommand(
461 |         wid = download_pid_wid$wid,
462 |         pid = download_pid_wid$pid,
463 |         cid = credentials$download_panel_cid,
464 |         cmd = 'Agreed',
465 |         params = list(ceid=agree_check_box_ceid)
466 |       )
467 |       queue         <- append(queue, list(command))
468 |       command_queue <- list(queue = queue)
469 |       data          <- formatDataForRequest(credentials$sid, download_pid_wid$wid, download_pid_wid$pid, command_queue, timestamp())
470 |       response      <- send_request(method = 'POST', data=data)
471 |       response_data <- parseResponse(response)
472 |     }
473 | 
474 |     # ---------------------------------------------------------------------------
475 |     # Generate Download URL
476 | 
477 |     log.info("Generating download URL.", level=2)
478 |     ev <- createCommand(
479 |       wid = download_pid_wid$wid,
480 |       pid = download_pid_wid$pid,
481 |       cid = credentials$download_panel_cid,
482 |       cmd = download_cmd,
483 |       params = setNames(list(), character(0)) #hack for empty {}
484 |     )
485 |     json_queue <- list(queue = list(ev))
486 |     data       <- formatDataForRequest(credentials$sid, download_pid_wid$wid, download_pid_wid$pid, json_queue, timestamp())
487 | 
488 |     log.info("Compressing data.", level=2)
489 |     response <- httr::POST(GISAID_URL, httr::add_headers(.headers = headers), body = data)
490 |     response_data   <- parseResponse(response)
491 | 
492 |     # extract check_async
493 |     check_async_id = strsplit(response_data$responses[[1]]$data, "'")[[1]][4]
494 | 
495 |     # while generateDownloadDone not ready
496 |     is_ready = FALSE
497 |     while (!is_ready) {
498 |       response      <- httr::GET(paste0('https://www.epicov.org/epi3/check_async/', check_async_id, '?_=', timestamp()))
499 |       response_data <- parseResponse(response)
500 |       is_ready      <- response_data$is_ready
501 |       if (!is_ready) { Sys.sleep(1) }
502 |     }
503 | 
504 |     # ---------------------------------------------------------------------------
505 |     # Get Download URL
506 | 
507 |     log.info("Data ready.", level=2)
508 |     ev <- createCommand(
509 |       wid = credentials$wid,
510 |       pid = credentials$pid,
511 |       cid = credentials$query_cid,
512 |       cmd = "generateDownloadDone",
513 |       params = setNames(list(), character(0)) #hack for empty {}
514 |     )
515 |     json_queue    <- list(queue = list(ev))
516 |     data          <- formatDataForRequest(credentials$sid, credentials$wid, credentials$pid, json_queue, timestamp())
517 |     response      <- send_request(method = 'POST', data=data)
518 |     response_data <- parseResponse(response)
519 | 
520 |     # extract download url
521 |     download_url <- paste0("https://www.epicov.org",strsplit(response_data$responses[[1]]$data, '"')[[1]][2])
522 |     log.info(paste("Download url:", download_url), level=2)
523 | 
524 |     # ---------------------------------------------------------------------------
525 |     # Download File
526 | 
527 |     tryCatch({
528 |       log.info(paste("Downloading file:", tmpFile), level=2)
529 | 
530 |       # Augur Input
531 |       if (grepl("\\.tar$", tmpFile)){
532 |         download.file(download_url, tmpFile, quiet = TRUE, method = 'auto', mode = "wb")
533 |         augur_input_dir <- "gisaidr_augur_input_tmp"
534 |         # Remove existing directories from tar archives
535 |         if (dir.exists(augur_input_dir)){
536 |           unlink(augur_input_dir, recursive=TRUE)
537 |         }
538 |         # Decompress tar archive
539 |         untar(tmpFile, exdir=augur_input_dir, restore_times = FALSE, verbose=FALSE)
540 |         # Extract Sequences
541 |         sequencesFile <- list.files("gisaidr_augur_input_tmp", pattern = "*.sequences.fasta")[1]
542 |         if (is.na(sequencesFile)) { stop(log.error("Could not find sequences file.")) }
543 |         sequencesFile <- file.path("gisaidr_augur_input_tmp", sequencesFile)
544 |         # Extract Metadata
545 |         metadataFile <- list.files("gisaidr_augur_input_tmp", pattern = "*.metadata.tsv")[1]
546 |         if (is.na(metadataFile)) { stop(log.error("Could not find metadata file.")) }
547 |         metadataFile <- file.path("gisaidr_augur_input_tmp", metadataFile)
548 |       }
549 |       else {
550 |         download.file(download_url, tmpFile, quiet = TRUE, method = 'auto')
551 |       }
552 | 
553 |       if (!is.null(metadataFile) && file.exists(metadataFile)){
554 |         metadata <- read.csv(metadataFile, sep="\t", quote="", check.names=FALSE)
555 |         # Replace "?" with "NA" in the augur_input metadata
556 |         if ( download_type == "augur_input" ) { metadata[ metadata == "?" ] <- NA }
557 |       }
558 |       if (!is.null(sequencesFile) && file.exists(sequencesFile)){
559 |         sequences <- readChar(sequencesFile, file.info(tmpFile)$size)
560 |       }
561 | 
562 |       # Decide on how we want to organize the download results based on type
563 |       if (download_type == "augur_input"){
564 |         download_results[[download_type]] <- list(metadata=metadata, sequences=sequences)
565 |       } else if (download_type == "sequences"){
566 |         download_results[[download_type]] <- sequences
567 |       } else {
568 |         download_results[[download_type]] <- metadata
569 |       }
570 | 
571 |     }, finally = {
572 |       if (clean_up){
573 |         log.info("Cleaning up temporary files.", level=2)
574 |         if (!is.null(tmpFile)       && file.exists(tmpFile))       { file.remove(tmpFile) }
575 |         if (!is.null(metadataFile)  && file.exists(metadataFile))  { file.remove(metadataFile) }
576 |         if (!is.null(sequencesFile) && file.exists(sequencesFile)) { file.remove(sequencesFile) }
577 |         if (dir.exists("gisaidr_augur_input_tmp"))                 { unlink("gisaidr_augur_input_tmp", recursive=TRUE) }
578 |       }
579 |     })
580 |   }
581 | 
582 |   return(download_results)
583 | }
584 | 


--------------------------------------------------------------------------------
/GISAID_LOCATIONS.txt:
--------------------------------------------------------------------------------
  1 | Africa / ...
  2 | Africa / Algeria / ...
  3 | Africa / Algeria / Adrar
  4 | Africa / Algeria / Ain Salah
  5 | Africa / Algeria / Alger
  6 | Africa / Algeria / Algiers
  7 | Africa / Algeria / Blida
  8 | Africa / Algeria / Bordj-Bou-Arreridj
  9 | Africa / Algeria / Boufarik
 10 | Africa / Algeria / Bouira
 11 | Africa / Algeria / El Oued
 12 | Africa / Algeria / Laghouat
 13 | Africa / Algeria / Ouargla
 14 | Africa / Algeria / Sétif
 15 | Africa / Algeria / Tipaza
 16 | Africa / Algeria / Tizi-Ouzou
 17 | Africa / Angola / ...
 18 | Africa / Angola / Cabinda
 19 | Africa / Angola / LUANDA
 20 | Africa / Angola / Luanda
 21 | Africa / Angola / Luanda / Alvalade
 22 | Africa / Angola / Luanda / Bairro Azul
 23 | Africa / Angola / Luanda / Belas
 24 | Africa / Angola / Luanda / Benfica
 25 | Africa / Angola / Luanda / C Sequele
 26 | Africa / Angola / Luanda / Cacuaco
 27 | Africa / Angola / Luanda / Camama
 28 | Africa / Angola / Luanda / Cond Jardim Do Edem
 29 | Africa / Angola / Luanda / Cond Vila Flor
 30 | Africa / Angola / Luanda / Ingombota
 31 | Africa / Angola / Luanda / Lar Do Patriota
 32 | Africa / Angola / Luanda / Luanda
 33 | Africa / Angola / Luanda / Maianga
 34 | Africa / Angola / Luanda / Morro Bento
 35 | Africa / Angola / Luanda / Nelito Soares
 36 | Africa / Angola / Luanda / Regedoria
 37 | Africa / Angola / Luanda / Samba
 38 | Africa / Angola / Luanda / Talatona
 39 | Africa / Angola / Luanda / Viana
 40 | Africa / Benin / ...
 41 | Africa / Benin / Cotonou
 42 | Africa / Benin / Oueme
 43 | Africa / Botswana
 44 | Africa / Burkina Faso / ...
 45 | Africa / Burkina Faso / Banfora
 46 | Africa / Burkina Faso / Batie
 47 | Africa / Burkina Faso / Batié
 48 | Africa / Burkina Faso / Bobo Dioulasso
 49 | Africa / Burkina Faso / Boromo
 50 | Africa / Burkina Faso / Dandé
 51 | Africa / Burkina Faso / Dano
 52 | Africa / Burkina Faso / Diebougou
 53 | Africa / Burkina Faso / Gaoua
 54 | Africa / Burkina Faso / Houndé
 55 | Africa / Burkina Faso / Loumbila
 56 | Africa / Burkina Faso / N'Dorola
 57 | Africa / Burkina Faso / Orodara
 58 | Africa / Burkina Faso / Ziniare
 59 | Africa / Burundi
 60 | Africa / Cabo Verde / ...
 61 | Africa / Cabo Verde / Mindelo
 62 | Africa / Cabo Verde / Praia
 63 | Africa / Cameroon / ...
 64 | Africa / Cameroon / Bafoussam
 65 | Africa / Cameroon / Douala
 66 | Africa / Cameroon / Dschang
 67 | Africa / Cameroon / Ebolowa
 68 | Africa / Cameroon / Garoua
 69 | Africa / Cameroon / Kribi
 70 | Africa / Cameroon / Sangmelima
 71 | Africa / Cameroon / Yaounde
 72 | Africa / Cameroon / Yaoundé
 73 | Africa / Canary Islands
 74 | Africa / Central African Republic
 75 | Africa / Chad / ...
 76 | Africa / Chad / N'Djamena
 77 | Africa / Comoros / ...
 78 | Africa / Comoros / Moheli
 79 | Africa / Comoros / Ngazidja
 80 | Africa / Cote d'Ivoire
 81 | Africa / Democratic Republic of the Congo
 82 | Africa / Djibouti
 83 | Africa / Egypt
 84 | Africa / Equatorial Guinea
 85 | Africa / Eswatini / ...
 86 | Africa / Eswatini / Hhohho
 87 | Africa / Eswatini / Manzini
 88 | Africa / Eswatini / Mbabane
 89 | Africa / Eswatini / Shiselweni
 90 | Africa / Ethiopia
 91 | Africa / Gabon
 92 | Africa / Gambia / ...
 93 | Africa / Gambia / Basse
 94 | Africa / Gambia / Kombo
 95 | Africa / Gambia / West Coast Region
 96 | Africa / Ghana
 97 | Africa / Guinea / ...
 98 | Africa / Guinea / Beyla
 99 | Africa / Guinea / Boffa
100 | Africa / Guinea / Boke
101 | Africa / Guinea / Conakry
102 | Africa / Guinea / Coyah
103 | Africa / Guinea / Dabola
104 | Africa / Guinea / Dubreka
105 | Africa / Guinea / Dubréka
106 | Africa / Guinea / Forecariah
107 | Africa / Guinea / Guekedou
108 | Africa / Guinea / Kerouane
109 | Africa / Guinea / Kindia
110 | Africa / Guinea / Kissidougou
111 | Africa / Guinea / Mamou
112 | Africa / Guinea / Siguri
113 | Africa / Guinea / Tougue
114 | Africa / Guinea Bissau
115 | Africa / Kenya
116 | Africa / Lesotho / ...
117 | Africa / Lesotho / Berea
118 | Africa / Lesotho / Botha Bothe
119 | Africa / Lesotho / Leribe
120 | Africa / Lesotho / Mafeteng
121 | Africa / Lesotho / Maseru
122 | Africa / Lesotho / Mohales' Hoek
123 | Africa / Lesotho / Qachas' Nek
124 | Africa / Lesotho / Quthing
125 | Africa / Libya
126 | Africa / Madagascar / ...
127 | Africa / Madagascar / Analamanga / Antananarivo
128 | Africa / Madagascar / Analamanga / Fenoarivo
129 | Africa / Madagascar / Antananarivo
130 | Africa / Madagascar / Antananarivo / Analamanga
131 | Africa / Madagascar / Antananarivo / Analamanga / Ambohidratrimo
132 | Africa / Madagascar / Antananarivo / Analamanga / Andramasina
133 | Africa / Madagascar / Antananarivo / Analamanga / Antananarivo-Atsimondrano
134 | Africa / Madagascar / Antananarivo / Analamanga / Antananarivo-Avaradrano
135 | Africa / Madagascar / Antananarivo / Analamanga / Antananarivo-Renivohitra
136 | Africa / Madagascar / Antananarivo / Itasy / Arivonimamo
137 | Africa / Madagascar / Antananarivo / Itasy / Miarinarivo
138 | Africa / Madagascar / Antananarivo / Vakinankaratra / Antsirabe-I
139 | Africa / Madagascar / Antsiranana / Diana / Antsiranana-I
140 | Africa / Madagascar / Antsiranana / Diana / Nosy-Be
141 | Africa / Madagascar / Fianarantsoa / Amoron'i-Mania / Ambositra
142 | Africa / Madagascar / Fianarantsoa / Haute-Matsiatra / Fianarantsoa-I
143 | Africa / Madagascar / Mahajanga / Boeny / Mahajanga-I
144 | Africa / Madagascar / Toamasina
145 | Africa / Madagascar / Toliara / Atsimo-Andrefana / Toliary-I
146 | Africa / Madagascar / Toliara / Menabe / Morondava
147 | Africa / Malawi
148 | Africa / Mali
149 | Africa / Mauritius
150 | Africa / Mayotte / ...
151 | Africa / Mayotte / MAMOUDZOU
152 | Africa / Mayotte / Mamoudzou
153 | Africa / Morocco
154 | Africa / Morocoo / ...
155 | Africa / Morocoo / Rabat
156 | Africa / Mozambique
157 | Africa / Namibia / ...
158 | Africa / Namibia / Erongo
159 | Africa / Namibia / Hardap
160 | Africa / Namibia / Karas
161 | Africa / Namibia / Kavango
162 | Africa / Namibia / Kavango East
163 | Africa / Namibia / Kavango West
164 | Africa / Namibia / Kharas
165 | Africa / Namibia / Khomas
166 | Africa / Namibia / Kunene
167 | Africa / Namibia / Manzini
168 | Africa / Namibia / Ohangwena
169 | Africa / Namibia / Omaheke
170 | Africa / Namibia / Omusati
171 | Africa / Namibia / Oshana
172 | Africa / Namibia / Oshikoto
173 | Africa / Namibia / Otjozondjupa
174 | Africa / Namibia / Zambezi
175 | Africa / Niger / ...
176 | Africa / Niger / Agadez
177 | Africa / Niger / Maradi
178 | Africa / Niger / Niamey
179 | Africa / Niger / Zinder
180 | Africa / Nigeria
181 | Africa / Republic of Congo / ...
182 | Africa / Republic of Congo / Brazzaville
183 | Africa / Republic of the Congo
184 | Africa / Reunion
185 | Africa / Rwanda
186 | Africa / Senegal
187 | Africa / Sierra Leone
188 | Africa / Somalia / ...
189 | Africa / Somalia / Awdal
190 | Africa / Somalia / Banaadir
191 | Africa / Somalia / Benadir
192 | Africa / Somalia / Marodi-Jex
193 | Africa / Somalia / Mudug
194 | Africa / South Africa
195 | Africa / South Sudan
196 | Africa / Togo
197 | Africa / Tunisia
198 | Africa / Uganda
199 | Africa / Zambia
200 | Africa / Zimbabwe
201 | Asia / ...
202 | Asia / Afghanistan
203 | Asia / Armenia
204 | Asia / Bahrain
205 | Asia / Bangladesh
206 | Asia / Brunei
207 | Asia / Cambodia
208 | Asia / China
209 | Asia / Georgia
210 | Asia / Hong Kong
211 | Asia / India
212 | Asia / Indonesia
213 | Asia / Iran
214 | Asia / Iraq
215 | Asia / Israel
216 | Asia / Japan
217 | Asia / Jordan / ...
218 | Asia / Jordan / Amman
219 | Asia / Jordan / Aqaba
220 | Asia / Jordan / Irbid
221 | Asia / Jordan / Mafraq
222 | Asia / Jordan / Zarqa
223 | Asia / Kazakhstan / ...
224 | Asia / Kazakhstan / ALmaty
225 | Asia / Kazakhstan / Akmola region
226 | Asia / Kazakhstan / Aktobe City
227 | Asia / Kazakhstan / Aktobe region
228 | Asia / Kazakhstan / Almata region
229 | Asia / Kazakhstan / Almaty
230 | Asia / Kazakhstan / Almaty Region
231 | Asia / Kazakhstan / Almaty Region / Almaty
232 | Asia / Kazakhstan / Almaty Region / Esik city
233 | Asia / Kazakhstan / Almaty Region / Kaskelen city
234 | Asia / Kazakhstan / Almaty Region / Taldykorgan
235 | Asia / Kazakhstan / Almaty Region / Tengizchevroil
236 | Asia / Kazakhstan / Atyrau
237 | Asia / Kazakhstan / Atyrau region
238 | Asia / Kazakhstan / Atyrau region / Tengizchevroil
239 | Asia / Kazakhstan / East Kazakhstan region
240 | Asia / Kazakhstan / Jambyl Region
241 | Asia / Kazakhstan / Karaganda
242 | Asia / Kazakhstan / Karaganda / Temirtau
243 | Asia / Kazakhstan / Karaganda region
244 | Asia / Kazakhstan / Karaganda region / Temirtau
245 | Asia / Kazakhstan / Kostanay
246 | Asia / Kazakhstan / Kostanay region
247 | Asia / Kazakhstan / Kyzylorda
248 | Asia / Kazakhstan / Kyzylorda region
249 | Asia / Kazakhstan / Kyzylorda region / Kyzylorda
250 | Asia / Kazakhstan / Mangistau region
251 | Asia / Kazakhstan / Mangystau Region
252 | Asia / Kazakhstan / Mangystau Region / Aktau
253 | Asia / Kazakhstan / North Kazakhstan Region
254 | Asia / Kazakhstan / Nur-Sultan
255 | Asia / Kazakhstan / Pavlodar
256 | Asia / Kazakhstan / Pavlodar region
257 | Asia / Kazakhstan / South Kazakhstan Region
258 | Asia / Kazakhstan / Talgar
259 | Asia / Kazakhstan / Turkestan region
260 | Asia / Kazakhstan / Turkestan region / Turkestan
261 | Asia / Kazakhstan / West Kazakhstan Region
262 | Asia / Kazakhstan / West Kazakhstan Region / Oral
263 | Asia / Kazakhstan / West-Kazakhstan
264 | Asia / Kazakhstan / West-kazakhstan region
265 | Asia / Kazakhstan / Zhambyl
266 | Asia / Kazakhstan / Zhambyl region
267 | Asia / Kazakhstan / Zhambyl region / Taraz
268 | Asia / Kuwait
269 | Asia / Lebanon
270 | Asia / Malaysia
271 | Asia / Maldives
272 | Asia / Mongolia
273 | Asia / Myanmar
274 | Asia / Nepal
275 | Asia / Oman / ...
276 | Asia / Oman / Al Buraimi
277 | Asia / Oman / Al Wosta
278 | Asia / Oman / Al Wusta
279 | Asia / Oman / AlWusta
280 | Asia / Oman / Buraimi
281 | Asia / Oman / Buraymi / Buraymi
282 | Asia / Oman / Dakhiliyah
283 | Asia / Oman / Dakhiliyah / Bahla
284 | Asia / Oman / Dakhiliyah / Manah
285 | Asia / Oman / Dakhiliyah / Nizwa
286 | Asia / Oman / Dakhiliyah / Samail
287 | Asia / Oman / Dakhilya
288 | Asia / Oman / Dakhliyah
289 | Asia / Oman / Dhahira
290 | Asia / Oman / Dhahirah / Ibri
291 | Asia / Oman / Dhakliyah
292 | Asia / Oman / Dhofar
293 | Asia / Oman / Musandam
294 | Asia / Oman / Muscat
295 | Asia / Oman / Muscat / Al Amrat
296 | Asia / Oman / Muscat / As Seeb
297 | Asia / Oman / Muscat / Bawshar
298 | Asia / Oman / Muscat / Mutrah
299 | Asia / Oman / Muscat / Qurayat
300 | Asia / Oman / North Batinah
301 | Asia / Oman / North Batinah / Sohar
302 | Asia / Oman / North Sharqiya
303 | Asia / Oman / North Sharqiyah
304 | Asia / Oman / North Sharqiyah / Ibra
305 | Asia / Oman / South Batinah
306 | Asia / Oman / South Batinah / Al Awabi
307 | Asia / Oman / South Batinah / Ar Rustaq
308 | Asia / Oman / South Batinah / Barka
309 | Asia / Oman / South Batinah / Nakhal
310 | Asia / Oman / South Batinah / Rustaq
311 | Asia / Oman / South Sharqiyah
312 | Asia / Pakistan
313 | Asia / Palestine / ...
314 | Asia / Palestine / Bethlehem
315 | Asia / Palestine / Hebron
316 | Asia / Palestine / Jenin
317 | Asia / Palestine / Jericho
318 | Asia / Palestine / Jerusalem
319 | Asia / Palestine / Nablus
320 | Asia / Palestine / Ramallah
321 | Asia / Palestine / Tulkarem
322 | Asia / Palestine / West Bank / Al-Khalil
323 | Asia / Palestine / West Bank / Bethlehem
324 | Asia / Palestine / West Bank / Jenin
325 | Asia / Palestine / West Bank / Jericho
326 | Asia / Palestine / West Bank / Nablus
327 | Asia / Palestine / West Bank / Qalqilia
328 | Asia / Palestine / West Bank / Qalqilya
329 | Asia / Palestine / West Bank / Qalqlieh
330 | Asia / Palestine / West Bank / Ramallah
331 | Asia / Palestine / West Bank / Salfit
332 | Asia / Palestine / West Bank / Tubas
333 | Asia / Palestine / West Bank / Tulkarem
334 | Asia / Philippines
335 | Asia / Qatar / ...
336 | Asia / Qatar / Ad Dawhah
337 | Asia / Qatar / Ad-Dawhah
338 | Asia / Qatar / Doha
339 | Asia / Saudi Arabia
340 | Asia / Singapore
341 | Asia / South Korea
342 | Asia / Sri Lanka
343 | Asia / Taiwan
344 | Asia / Thailand
345 | Asia / Timor-Leste
346 | Asia / United Arab Emirates
347 | Asia / Uzbekistan
348 | Asia / Vietnam
349 | Europe / ...
350 | Europe / Albania
351 | Europe / Andorra
352 | Europe / Austria
353 | Europe / Azerbaijan
354 | Europe / Belarus
355 | Europe / Belgium
356 | Europe / Bosnia and Herzegovina
357 | Europe / Bulgaria
358 | Europe / Coatia / ...
359 | Europe / Coatia / Varazdin County
360 | Europe / Crimea
361 | Europe / Croatia
362 | Europe / Cyprus
363 | Europe / Czech Republic
364 | Europe / Denmark
365 | Europe / Estonia
366 | Europe / Faroe Islands
367 | Europe / Finland
368 | Europe / France
369 | Europe / Germany
370 | Europe / Gibraltar
371 | Europe / Greece
372 | Europe / Hungary / ...
373 | Europe / Hungary / Bacs-Kiskun
374 | Europe / Hungary / Bacs-Kiskun County
375 | Europe / Hungary / Balassagyarmat
376 | Europe / Hungary / Balatonfured
377 | Europe / Hungary / Baranya
378 | Europe / Hungary / Baranya county
379 | Europe / Hungary / Bekesszentandras
380 | Europe / Hungary / Borsod-Abauj-Zemplen County
381 | Europe / Hungary / Budapest
382 | Europe / Hungary / Bács-Kiskun county
383 | Europe / Hungary / Békés County
384 | Europe / Hungary / Békés county
385 | Europe / Hungary / Csongrad County
386 | Europe / Hungary / Csongrad-Csanad
387 | Europe / Hungary / Csongrád county
388 | Europe / Hungary / Debrecen
389 | Europe / Hungary / Fejer
390 | Europe / Hungary / Gyor-Moson-Sopron
391 | Europe / Hungary / Győr-Moson-Sopron county
392 | Europe / Hungary / Hajdu-Bihar County
393 | Europe / Hungary / Hajdú-Bihar
394 | Europe / Hungary / Heves
395 | Europe / Hungary / Heves county
396 | Europe / Hungary / Jasz-Nagykun-Szolnok
397 | Europe / Hungary / Jász-Nagykun-Szolnok county
398 | Europe / Hungary / Kecskemet
399 | Europe / Hungary / Komarom-Esztergom
400 | Europe / Hungary / Komárom-Esztergom county
401 | Europe / Hungary / Nograd
402 | Europe / Hungary / Nógrád
403 | Europe / Hungary / Oroshaza
404 | Europe / Hungary / Pest
405 | Europe / Hungary / Pest county
406 | Europe / Hungary / Somogy county
407 | Europe / Hungary / Szabolcs-Szatmar-Bereg County
408 | Europe / Hungary / Szabolcs-Szatmár Bereg county
409 | Europe / Hungary / Szabolcs-Szatmár-Bereg
410 | Europe / Hungary / Szeged
411 | Europe / Hungary / Vas
412 | Europe / Hungary / Veszprem
413 | Europe / Hungary / Veszprém county
414 | Europe / Iceland / ...
415 | Europe / Iceland / Reykjavik
416 | Europe / Ireland
417 | Europe / Israel / ...
418 | Europe / Israel / Herzliya
419 | Europe / Italy
420 | Europe / Kosovo
421 | Europe / Latvia
422 | Europe / Liechtenstein
423 | Europe / Lithuania
424 | Europe / Lombardy / ...
425 | Europe / Lombardy / Milan
426 | Europe / Luxembourg
427 | Europe / Malta
428 | Europe / Moldova
429 | Europe / Monaco
430 | Europe / Montenegro
431 | Europe / Netherlands
432 | Europe / North Macedonia
433 | Europe / Norway
434 | Europe / Poland
435 | Europe / Portugal
436 | Europe / Romania
437 | Europe / Russia
438 | Europe / Serbia
439 | Europe / Slovakia
440 | Europe / Slovenia
441 | Europe / Spain
442 | Europe / Sweden
443 | Europe / Switzerland
444 | Europe / Turkey
445 | Europe / Ukraine
446 | Europe / United Kingdom
447 | North America / ...
448 | North America / Antigua and Barbuda
449 | North America / Bahamas
450 | North America / Barbados
451 | North America / Belize
452 | North America / Bermuda
453 | North America / British Virgin Islands
454 | North America / Canada
455 | North America / Cayman Islands
456 | North America / Costa Rica
457 | North America / Cuba
458 | North America / Dominica
459 | North America / Dominican Republic
460 | North America / El Salvador / ...
461 | North America / El Salvador / San Salvador
462 | North America / Grenada
463 | North America / Guadeloupe
464 | North America / Guatemala
465 | North America / Haiti
466 | North America / Honduras
467 | North America / Jamaica
468 | North America / Martinique / ...
469 | North America / Martinique / Fort de France
470 | North America / Martinique / Fort-De-France
471 | North America / Martinique / Fort-de-France
472 | North America / Martinique / Lamentin
473 | North America / Martinique / Le Lorrain
474 | North America / Martinique / Schoelcher
475 | North America / Martinique / Trinite
476 | North America / Mexico / ...
477 | North America / Mexico / Aguascalientes
478 | North America / Mexico / Baja California
479 | North America / Mexico / Baja California Sur
480 | North America / Mexico / Campeche
481 | North America / Mexico / Chiapas
482 | North America / Mexico / Chihuahua
483 | North America / Mexico / Ciudad de Mexico
484 | North America / Mexico / Coahuila
485 | North America / Mexico / Coahuila de Zaragoza
486 | North America / Mexico / Colima
487 | North America / Mexico / Distrito Federal
488 | North America / Mexico / Durango
489 | North America / Mexico / Estado de Mexico
490 | North America / Mexico / Guanajuato
491 | North America / Mexico / Guerrero
492 | North America / Mexico / Hidalgo
493 | North America / Mexico / Jalisco
494 | North America / Mexico / Mexico
495 | North America / Mexico / Mexico City
496 | North America / Mexico / Michoacan
497 | North America / Mexico / Morelos
498 | North America / Mexico / Nayarit
499 | North America / Mexico / Nuevo Leon
500 | North America / Mexico / Oaxaca
501 | North America / Mexico / Puebla
502 | North America / Mexico / Queretaro
503 | North America / Mexico / Quintana Roo
504 | North America / Mexico / San Luis Potosi
505 | North America / Mexico / Sinaloa
506 | North America / Mexico / Sonora
507 | North America / Mexico / State of Mexico
508 | North America / Mexico / Tabasco
509 | North America / Mexico / Tamaulipas
510 | North America / Mexico / Tlaxcala
511 | North America / Mexico / Veracruz
512 | North America / Mexico / Yucatan
513 | North America / Mexico / Yucatán
514 | North America / Mexico / Zacatecas
515 | North America / Panama
516 | North America / Saint Barthelemy
517 | North America / Saint Kitts and Nevis
518 | North America / Saint Lucia
519 | North America / Saint Martin
520 | North America / Saint Vincent and the Grenadines
521 | North America / Sint Eustatius
522 | North America / Sint Maarten
523 | North America / Turks and Caicos Islands
524 | North America / USA
525 | North America / USA / Alabama
526 | North America / USA / Alaska
527 | North America / USA / Arizona
528 | North America / USA / Arkansas
529 | North America / USA / California
530 | North America / USA / Colorado
531 | North America / USA / Connecticut
532 | North America / USA / Delaware
533 | North America / USA / District of Columbia
534 | North America / USA / Dominican Republic
535 | North America / USA / Florida
536 | North America / USA / Georgia
537 | North America / USA / Hawaii
538 | North America / USA / Idaho
539 | North America / USA / Illinois
540 | North America / USA / Indiana
541 | North America / USA / Iowa
542 | North America / USA / Kansas
543 | North America / USA / Kentucky
544 | North America / USA / Louisiana
545 | North America / USA / Maine
546 | North America / USA / Maryland
547 | North America / USA / Massachusetts
548 | North America / USA / Michigan
549 | North America / USA / Minnesota
550 | North America / USA / Mississippi
551 | North America / USA / Missouri
552 | North America / USA / Montana
553 | North America / USA / Nebraska
554 | North America / USA / Nevada
555 | North America / USA / New Hampshire
556 | North America / USA / New Jersey
557 | North America / USA / New Mexico
558 | North America / USA / New York
559 | North America / USA / North Carolina
560 | North America / USA / North Dakota
561 | North America / USA / Ohio
562 | North America / USA / Oklahoma
563 | North America / USA / Oregon
564 | North America / USA / Pennsylvania
565 | North America / USA / Puerto Rico
566 | North America / USA / Rhode Island
567 | North America / USA / South Carolina
568 | North America / USA / South Dakota
569 | North America / USA / Tennessee
570 | North America / USA / Texas
571 | North America / USA / Utah
572 | North America / USA / Vermont
573 | North America / USA / Virgin Islands
574 | North America / USA / Virgin Islands of the U.S.
575 | North America / USA / Virginia
576 | North America / USA / Washington
577 | North America / USA / West Virginia
578 | North America / USA / Wisconsin
579 | North America / USA / Wyoming
580 | Oceania / ...
581 | Oceania / Australia
582 | Oceania / Australia / Australian Capital Territory
583 | Oceania / Australia / New South Wales
584 | Oceania / Australia / New South Wales / Sydney
585 | Oceania / Australia / New South Wales / Tweed Heads
586 | Oceania / Australia / Northern Territory
587 | Oceania / Australia / Queensland
588 | Oceania / Australia / Queensland / Brisbane
589 | Oceania / Australia / Queensland / Cairns
590 | Oceania / Australia / Queensland / Gold Coast
591 | Oceania / Australia / Queensland / Northern Queensland
592 | Oceania / Australia / Queensland / Rockhampton
593 | Oceania / Australia / Queensland / Southeast Queensland
594 | Oceania / Australia / Queensland / Sunshine Coast
595 | Oceania / Australia / South Australia
596 | Oceania / Australia / Tasmania
597 | Oceania / Australia / Victoria
598 | Oceania / Australia / Victoria / Clayton
599 | Oceania / Australia / Victoria / Melbourne
600 | Oceania / Australia / Western Australia
601 | Oceania / Australia / Western Australia / Perth
602 | Oceania / French Polynesia / Tahiti / Papeete
603 | Oceania / Guam
604 | Oceania / New Zealand
605 | Oceania / New Zealand / Auckland
606 | Oceania / New Zealand / Auckland / Counties Manukau
607 | Oceania / New Zealand / Auckland / Waitemata
608 | Oceania / New Zealand / Bay of Plenty
609 | Oceania / New Zealand / Canterbury
610 | Oceania / New Zealand / Capital and Coast
611 | Oceania / New Zealand / Combined Auckland
612 | Oceania / New Zealand / Combined Wellington
613 | Oceania / New Zealand / Counties Manukau
614 | Oceania / New Zealand / Hawkes Bay
615 | Oceania / New Zealand / Hutt Valley
616 | Oceania / New Zealand / Lakes
617 | Oceania / New Zealand / MidCentral
618 | Oceania / New Zealand / Nelson Marlborough
619 | Oceania / New Zealand / Northland
620 | Oceania / New Zealand / Otago
621 | Oceania / New Zealand / South Canterbury
622 | Oceania / New Zealand / Southern
623 | Oceania / New Zealand / Taranaki
624 | Oceania / New Zealand / Waikato
625 | Oceania / New Zealand / Wairarapa
626 | Oceania / New Zealand / Waitemata
627 | Oceania / New Zealand / Wellington
628 | Oceania / New Zealand / Whanganui
629 | Oceania / Northern Mariana Islands
630 | Oceania / Papua New Guinea
631 | Oceania / USA / Guam
632 | Oceania / USA / Northern Mariana Islands
633 | Oceania / Wallis and Futuna / Wallis
634 | South America / ...
635 | South America / Anguilla
636 | South America / Antigua and Barbuda / ...
637 | South America / Antigua and Barbuda / Caribbean
638 | South America / Argentina
639 | South America / Aruba
640 | South America / Barbados / ...
641 | South America / Barbados / Caribbean
642 | South America / Bolivia
643 | South America / Bonaire / ...
644 | South America / Bonaire / Bonaire
645 | South America / Brazil
646 | South America / British Virgin Islands
647 | South America / Chile / ...
648 | South America / Chile / Aisen / ...
649 | South America / Chile / Aisen / Aisen
650 | South America / Chile / Aisen / Cochrane
651 | South America / Chile / Aisen / Coyhaique
652 | South America / Chile / Aisen / Guaitecas
653 | South America / Chile / Aisen / Rio Ibanez
654 | South America / Chile / Antofagasta
655 | South America / Chile / Araucania / ...
656 | South America / Chile / Araucania / Angol
657 | South America / Chile / Araucania / Collipulli
658 | South America / Chile / Araucania / Lonquimay
659 | South America / Chile / Araucania / Padre Las Casas
660 | South America / Chile / Araucania / Pitrufquen
661 | South America / Chile / Araucania / Renaico
662 | South America / Chile / Araucania / Temuco
663 | South America / Chile / Araucania / Tolten
664 | South America / Chile / Araucania / Villarrica
665 | South America / Chile / Arica and Parinacota / ...
666 | South America / Chile / Arica and Parinacota / Arica
667 | South America / Chile / Arica y Parinacota / ...
668 | South America / Chile / Arica y Parinacota / Arica
669 | South America / Chile / Atacama / ...
670 | South America / Chile / Atacama / Caldera
671 | South America / Chile / Atacama / Chanaral
672 | South America / Chile / Atacama / Chañaral / El Salvador
673 | South America / Chile / Atacama / Copiapo
674 | South America / Chile / Atacama / Copiapo / Caldera
675 | South America / Chile / Atacama / Copiapo / Chanaral
676 | South America / Chile / Atacama / Copiapo / Paipote
677 | South America / Chile / Atacama / Copiapo / Tierra Amarilla
678 | South America / Chile / Atacama / Diego De Almagro
679 | South America / Chile / Atacama / Domeyko
680 | South America / Chile / Atacama / Huasco
681 | South America / Chile / Atacama / Huasco / Freirina
682 | South America / Chile / Atacama / Tierra Amarilla
683 | South America / Chile / Atacama / Vallenar
684 | South America / Chile / Aysen / ...
685 | South America / Chile / Aysen / Aisen
686 | South America / Chile / Aysen / Cochrane
687 | South America / Chile / Aysen / Coyhaique
688 | South America / Chile / Biobio / ...
689 | South America / Chile / Biobio / Antuco
690 | South America / Chile / Biobio / Cabrero
691 | South America / Chile / Biobio / Chiguayante
692 | South America / Chile / Biobio / Chillan
693 | South America / Chile / Biobio / Concepcion
694 | South America / Chile / Biobio / Contulmo
695 | South America / Chile / Biobio / Coronel
696 | South America / Chile / Biobio / Diguillin / Chillan
697 | South America / Chile / Biobio / Hualpen
698 | South America / Chile / Biobio / Hualqui
699 | South America / Chile / Biobio / Los Angeles
700 | South America / Chile / Biobio / Lota
701 | South America / Chile / Biobio / Mulchen
702 | South America / Chile / Biobio / Nacimiento
703 | South America / Chile / Biobio / Negrete
704 | South America / Chile / Biobio / Quilleco
705 | South America / Chile / Biobio / San Pedro De La Paz
706 | South America / Chile / Biobio / San Pedro de la Paz
707 | South America / Chile / Biobio / Santa Juana
708 | South America / Chile / Biobio / Talcahuano
709 | South America / Chile / Biobio / Tucapel
710 | South America / Chile / Biobio / Yumbel
711 | South America / Chile / Coquimbo
712 | South America / Chile / El Maule / ...
713 | South America / Chile / El Maule / Colbun
714 | South America / Chile / El Maule / Constitucion
715 | South America / Chile / El Maule / Licanten
716 | South America / Chile / El Maule / Maule
717 | South America / Chile / El Maule / Parral
718 | South America / Chile / El Maule / Retiro
719 | South America / Chile / El Maule / Rio Claro
720 | South America / Chile / El Maule / San Clemente
721 | South America / Chile / El Maule / San Javier
722 | South America / Chile / El Maule / San Rafael
723 | South America / Chile / El Maule / Talca
724 | South America / Chile / La Araucania / ...
725 | South America / Chile / La Araucania / Angol
726 | South America / Chile / La Araucania / Carahue
727 | South America / Chile / La Araucania / Cholchol
728 | South America / Chile / La Araucania / Collipulli
729 | South America / Chile / La Araucania / Cunco
730 | South America / Chile / La Araucania / Freire
731 | South America / Chile / La Araucania / Galvarino
732 | South America / Chile / La Araucania / Lautaro
733 | South America / Chile / La Araucania / Loncoche
734 | South America / Chile / La Araucania / Padre Las Casas
735 | South America / Chile / La Araucania / Perquenco
736 | South America / Chile / La Araucania / Pitrufquen
737 | South America / Chile / La Araucania / Pucon
738 | South America / Chile / La Araucania / Saavedra
739 | South America / Chile / La Araucania / Temuco
740 | South America / Chile / La Araucania / Vilcun
741 | South America / Chile / La Araucania / Villarrica
742 | South America / Chile / Libertador General Bernardo O'Higgins / ...
743 | South America / Chile / Libertador General Bernardo O'Higgins / Pichidegua
744 | South America / Chile / Libertador General Bernardo O'Higgins / Rancagua
745 | South America / Chile / Los Lagos / ...
746 | South America / Chile / Los Lagos / Ancud
747 | South America / Chile / Los Lagos / Calbuco
748 | South America / Chile / Los Lagos / Cochamo
749 | South America / Chile / Los Lagos / Fresia
750 | South America / Chile / Los Lagos / Frutillar
751 | South America / Chile / Los Lagos / Llanquihue
752 | South America / Chile / Los Lagos / Llanquihue / Puerto Montt
753 | South America / Chile / Los Lagos / Los Muermos
754 | South America / Chile / Los Lagos / Maullin
755 | South America / Chile / Los Lagos / Osorno
756 | South America / Chile / Los Lagos / Puerto Montt
757 | South America / Chile / Los Lagos / Puerto Varas
758 | South America / Chile / Los Lagos / Quellon
759 | South America / Chile / Los Rios / ...
760 | South America / Chile / Los Rios / Futrono
761 | South America / Chile / Los Rios / La Union
762 | South America / Chile / Los Rios / Lago Ranco
763 | South America / Chile / Los Rios / Lanco
764 | South America / Chile / Los Rios / Los Lagos
765 | South America / Chile / Los Rios / Mafil
766 | South America / Chile / Los Rios / Mariquina
767 | South America / Chile / Los Rios / Paillaco
768 | South America / Chile / Los Rios / Panguipulli
769 | South America / Chile / Los Rios / Rio Bueno
770 | South America / Chile / Los Rios / Valdivia
771 | South America / Chile / Magallanes / ...
772 | South America / Chile / Magallanes / Natales
773 | South America / Chile / Magallanes / Porvenir
774 | South America / Chile / Magallanes / Puerto Natales
775 | South America / Chile / Magallanes / Puerto Wiilliams
776 | South America / Chile / Magallanes / Puerto Williams
777 | South America / Chile / Magallanes / Punta Arenas
778 | South America / Chile / Magallanes / Timaukel
779 | South America / Chile / Maule / ...
780 | South America / Chile / Maule / Cauquenes
781 | South America / Chile / Maule / Colbun
782 | South America / Chile / Maule / Constitucion
783 | South America / Chile / Maule / Curepto
784 | South America / Chile / Maule / Curico
785 | South America / Chile / Maule / Empedrado
786 | South America / Chile / Maule / Hualane
787 | South America / Chile / Maule / Licanten
788 | South America / Chile / Maule / Linares
789 | South America / Chile / Maule / Longavi
790 | South America / Chile / Maule / Maule
791 | South America / Chile / Maule / Molina
792 | South America / Chile / Maule / Pelarco
793 | South America / Chile / Maule / Pencahue
794 | South America / Chile / Maule / Rio Claro
795 | South America / Chile / Maule / Romeral
796 | South America / Chile / Maule / Sagrada Familia
797 | South America / Chile / Maule / San Clemente
798 | South America / Chile / Maule / San Rafael
799 | South America / Chile / Maule / Talca
800 | South America / Chile / Maule / Teno
801 | South America / Chile / Maule / Villa Alegre
802 | South America / Chile / Maule / Yerbas Buenas
803 | South America / Chile / Nuble / ...
804 | South America / Chile / Nuble / AIquen
805 | South America / Chile / Nuble / Bulnes
806 | South America / Chile / Nuble / Chillan
807 | South America / Chile / Nuble / Chillan Viejo
808 | South America / Chile / Nuble / Coelemu
809 | South America / Chile / Nuble / Coihueco
810 | South America / Chile / Nuble / El Carmen
811 | South America / Chile / Nuble / Niquen
812 | South America / Chile / Nuble / Pemuco
813 | South America / Chile / Nuble / Quillon
814 | South America / Chile / Nuble / San Carlos
815 | South America / Chile / Nuble / San Ignacio
816 | South America / Chile / Nuble / San Nicolas
817 | South America / Chile / Nuble / Treguaco
818 | South America / Chile / Nuble / Yungay
819 | South America / Chile / O'Higgins / ...
820 | South America / Chile / O'Higgins / Codegua
821 | South America / Chile / O'Higgins / Coinco
822 | South America / Chile / O'Higgins / Coltauco
823 | South America / Chile / O'Higgins / Donihue
824 | South America / Chile / O'Higgins / Graneros
825 | South America / Chile / O'Higgins / La Estrella
826 | South America / Chile / O'Higgins / Litueche
827 | South America / Chile / O'Higgins / Lolol
828 | South America / Chile / O'Higgins / Machali
829 | South America / Chile / O'Higgins / Malloa
830 | South America / Chile / O'Higgins / Marchihue
831 | South America / Chile / O'Higgins / Mostazal
832 | South America / Chile / O'Higgins / Nancagua
833 | South America / Chile / O'Higgins / Navidad
834 | South America / Chile / O'Higgins / Olivar
835 | South America / Chile / O'Higgins / Paredones
836 | South America / Chile / O'Higgins / Peumo
837 | South America / Chile / O'Higgins / Pichidegua
838 | South America / Chile / O'Higgins / Pichilemu
839 | South America / Chile / O'Higgins / Quinta De Tilcoco
840 | South America / Chile / O'Higgins / Rancagua
841 | South America / Chile / O'Higgins / Rengo
842 | South America / Chile / O'Higgins / Requinoa
843 | South America / Chile / O'Higgins / San Fernando
844 | South America / Chile / O'Higgins / San Francisco de Mostazal
845 | South America / Chile / O'Higgins / San Vicente
846 | South America / Chile / Puerto Montt
847 | South America / Chile / Region Metropolitana de Santiago
848 | South America / Chile / Tarapaca / ...
849 | South America / Chile / Tarapaca / Alto Hospicio
850 | South America / Chile / Tarapaca / Iquique
851 | South America / Chile / Tarapaca / Pozo Almonte
852 | South America / Chile / Tarapacá / ...
853 | South America / Chile / Valparaiso
854 | South America / Chile / Ñuble / ...
855 | South America / Chile / �uble / ...
856 | South America / Colombia
857 | South America / Curacao
858 | South America / Dominica / ...
859 | South America / Dominica / Caribbean
860 | South America / Ecuador
861 | South America / French Guiana
862 | South America / Grenada
863 | South America / Guyana
864 | South America / Guyane
865 | South America / Jamaica / ...
866 | South America / Jamaica / Caribbean
867 | South America / Montserrat / ...
868 | South America / Montserrat / Caribbean
869 | South America / Paraguay
870 | South America / Peru
871 | South America / Saint Kitts and Nevis / ...
872 | South America / Saint Kitts and Nevis / Caribbean
873 | South America / Saint Lucia
874 | South America / Saint Vincent and the Grenadines
875 | South America / Suriname
876 | South America / Trinidad and Tobago
877 | South America / Uruguay
878 | South America / Venezuela
879 | 


--------------------------------------------------------------------------------