├── client ├── bazel │ └── BUILD ├── go │ ├── BUILD │ ├── go.work │ └── util │ │ ├── go.mod │ │ ├── BUILD.bazel │ │ ├── command │ │ ├── BUILD.bazel │ │ └── Autocomplete.go │ │ ├── go.sum │ │ └── UsageConfig.go ├── apps │ └── public │ │ └── Datasets │ │ └── v2 │ │ ├── BUILD │ │ ├── datasets │ │ ├── .gitignore │ │ ├── flags │ │ │ ├── BUILD.bazel │ │ │ ├── GeneTaxonFilter.go │ │ │ ├── AnnotatedOnlyFlag.go │ │ │ ├── ReferenceOnlyFlag.go │ │ │ ├── TypeMaterialFlag.go │ │ │ ├── SkipZipValidation.go │ │ │ ├── TaxExactMatchFlag.go │ │ │ ├── ExcludeAtypical.go │ │ │ ├── DownloadPreviewFlag.go │ │ │ ├── ChromosomesFlag.go │ │ │ ├── DehydratedFlag.go │ │ │ ├── TaxParentsFlag.go │ │ │ ├── TaxChildrenFlag.go │ │ │ ├── ExcludeMultiIsolate.go │ │ │ ├── SearchStringFlag.go │ │ │ ├── DebugFlag.go │ │ │ ├── WpDownloadFlags.go │ │ │ ├── AssemblySourceFlag.go │ │ │ ├── common.go │ │ │ ├── HostTaxonFilter.go │ │ │ ├── JsonLinesAndLimitFlag.go │ │ │ ├── TaxonomyDownloadReportFlag.go │ │ │ ├── GenomeReportFlag.go │ │ │ ├── VirusSummaryReportFlag.go │ │ │ ├── TaxonomyReportFlag.go │ │ │ ├── MetaGenomeDerived.go │ │ │ ├── TaxonRankFilterFlag.go │ │ │ ├── FastaFilter.go │ │ │ ├── VirusDownloadReportFlag.go │ │ │ ├── GenomeAssemblyLevel.go │ │ │ ├── OrthologTaxonFilterFlag.go │ │ │ ├── LimitFlag.go │ │ │ ├── GenomeIncludeFlag.go │ │ │ ├── GeneIncludeFlag.go │ │ │ ├── AssemblyVersionFlag.go │ │ │ ├── VirusIncludeFlag.go │ │ │ ├── ReleasedDateFlag.go │ │ │ ├── InputFileFlag.go │ │ │ └── VirusFilterFlags.go │ │ ├── GenomeAccessionRetriever.go │ │ ├── DownloadVirus.go │ │ ├── SummaryVirus.go │ │ ├── ResponseHandling.go │ │ ├── Summary.go │ │ ├── BUILD.bazel │ │ ├── DownloadTaxonomy.go │ │ ├── SummaryGeneTaxon.go │ │ ├── SummaryGenomeAccession.go │ │ ├── DownloadGeneGeneId.go │ │ ├── SummaryGenomeTaxon.go │ │ ├── TaxonomyIdRetriever.go │ │ ├── DownloadGeneLocusTag.go │ │ ├── SummaryVirusGenomeTaxon.go │ │ ├── progressbar.go │ │ ├── DownloadGeneTaxon.go │ │ ├── DownloadVirusGenomeAccession.go │ │ ├── DownloadGeneSymbol.go │ │ ├── SummaryVirusGenome.go │ │ ├── GeneIdRetriever.go │ │ ├── DownloadVirusGenomeTaxon.go │ │ ├── DownloadGenomeAccession.go │ │ ├── DownloadGene.go │ │ ├── DownloadGenomeTaxon.go │ │ ├── DownloadVirusProtein.go │ │ ├── DownloadVirusGenome.go │ │ ├── SummaryGeneLocusTag.go │ │ ├── SummaryGeneAccession.go │ │ ├── SummaryTaxonomyTaxon.go │ │ ├── SummaryGeneId.go │ │ ├── DownloadGeneAccession.go │ │ ├── DownloadTaxonomyTaxon.go │ │ ├── SummaryGeneSymbol.go │ │ ├── VirusDownloader.go │ │ ├── SummaryVirusGenomeAccession.go │ │ ├── TaxonomyDownloader.go │ │ └── Download.go │ │ ├── cmd │ │ └── datasets │ │ │ ├── main.go │ │ │ └── BUILD.bazel │ │ └── go.mod ├── openapi │ ├── templates │ │ ├── .gitignore │ │ ├── go │ │ │ ├── go.mod.mustache │ │ │ └── model_enum.mustache │ │ ├── make_patch.sh │ │ └── go.patch │ ├── configs │ │ └── openapigen.go.config │ └── BUILD ├── .envrc ├── .gitignore ├── BUILD ├── .bazelrc ├── README.md ├── workspace_status.sh └── MODULE.bazel ├── training ├── images │ ├── readme.md │ ├── data-package.png │ ├── datasets0902.png │ └── datasets-commands.png ├── readme.md ├── 2023-01-03-datasets-cli14 │ ├── images │ │ ├── readme.md │ │ ├── ants.png │ │ ├── dataformat1.png │ │ ├── datasets1.png │ │ ├── data-packages.png │ │ ├── datasets-gene.png │ │ ├── elmo_workflow.png │ │ ├── dataformat-gene.png │ │ ├── elmo_workflow-1.png │ │ ├── elmo_workflow-2.png │ │ ├── elmo_workflow-3.png │ │ ├── dataformat-genome.png │ │ ├── dataformat-genome-org.png │ │ ├── datasets-s-genome-tax.png │ │ ├── data-package-contents-op2.png │ │ ├── dataformat-genome-3fields.png │ │ ├── dataformat-genome-contign50.png │ │ └── dataformat-genome-3fields-accession.png │ ├── datasets.yml │ └── readme.md ├── ASM-NGS │ └── README.md └── TAGC-2024 │ └── README.md ├── .github └── ISSUE_TEMPLATE │ ├── feature_request.md │ └── bug_report.md ├── LICENSE.md ├── CONTRIBUTING.md └── README.md /client/bazel/BUILD: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /client/go/BUILD: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /client/apps/public/Datasets/v2/BUILD: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /client/apps/public/Datasets/v2/datasets/.gitignore: -------------------------------------------------------------------------------- 1 | *.go.dev 2 | -------------------------------------------------------------------------------- /client/openapi/templates/.gitignore: -------------------------------------------------------------------------------- 1 | openapi-generator-cli-*.jar 2 | -------------------------------------------------------------------------------- /client/.envrc: -------------------------------------------------------------------------------- 1 | export USE_BAZEL_VERSION=7.x 2 | export GCC_VERSION=13.2.0 3 | -------------------------------------------------------------------------------- /client/.gitignore: -------------------------------------------------------------------------------- 1 | bazel-bin 2 | bazel-client 3 | bazel-out 4 | bazel-testlogs 5 | -------------------------------------------------------------------------------- /training/images/readme.md: -------------------------------------------------------------------------------- 1 | Folder for storing images for NCBI Datasets workshops and trainings 2 | -------------------------------------------------------------------------------- /training/readme.md: -------------------------------------------------------------------------------- 1 | This folder contains workshops and training information from NCBI Datasets 2 | -------------------------------------------------------------------------------- /training/2023-01-03-datasets-cli14/images/readme.md: -------------------------------------------------------------------------------- 1 | Folder containing images for the jupyter notebooks 2 | -------------------------------------------------------------------------------- /training/images/data-package.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/datasets/HEAD/training/images/data-package.png -------------------------------------------------------------------------------- /training/images/datasets0902.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/datasets/HEAD/training/images/datasets0902.png -------------------------------------------------------------------------------- /training/images/datasets-commands.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/datasets/HEAD/training/images/datasets-commands.png -------------------------------------------------------------------------------- /training/2023-01-03-datasets-cli14/images/ants.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/datasets/HEAD/training/2023-01-03-datasets-cli14/images/ants.png -------------------------------------------------------------------------------- /training/2023-01-03-datasets-cli14/images/dataformat1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/datasets/HEAD/training/2023-01-03-datasets-cli14/images/dataformat1.png -------------------------------------------------------------------------------- /training/2023-01-03-datasets-cli14/images/datasets1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/datasets/HEAD/training/2023-01-03-datasets-cli14/images/datasets1.png -------------------------------------------------------------------------------- /training/2023-01-03-datasets-cli14/images/data-packages.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/datasets/HEAD/training/2023-01-03-datasets-cli14/images/data-packages.png -------------------------------------------------------------------------------- /training/2023-01-03-datasets-cli14/images/datasets-gene.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/datasets/HEAD/training/2023-01-03-datasets-cli14/images/datasets-gene.png -------------------------------------------------------------------------------- /training/2023-01-03-datasets-cli14/images/elmo_workflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/datasets/HEAD/training/2023-01-03-datasets-cli14/images/elmo_workflow.png -------------------------------------------------------------------------------- /training/2023-01-03-datasets-cli14/images/dataformat-gene.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/datasets/HEAD/training/2023-01-03-datasets-cli14/images/dataformat-gene.png -------------------------------------------------------------------------------- /training/2023-01-03-datasets-cli14/images/elmo_workflow-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/datasets/HEAD/training/2023-01-03-datasets-cli14/images/elmo_workflow-1.png -------------------------------------------------------------------------------- /training/2023-01-03-datasets-cli14/images/elmo_workflow-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/datasets/HEAD/training/2023-01-03-datasets-cli14/images/elmo_workflow-2.png -------------------------------------------------------------------------------- /training/2023-01-03-datasets-cli14/images/elmo_workflow-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/datasets/HEAD/training/2023-01-03-datasets-cli14/images/elmo_workflow-3.png -------------------------------------------------------------------------------- /training/2023-01-03-datasets-cli14/images/dataformat-genome.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/datasets/HEAD/training/2023-01-03-datasets-cli14/images/dataformat-genome.png -------------------------------------------------------------------------------- /client/go/go.work: -------------------------------------------------------------------------------- 1 | go 1.23.0 2 | 3 | toolchain go1.23.4 4 | 5 | use ./apps/public/Datasets/v2 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | use ./go/util 16 | 17 | -------------------------------------------------------------------------------- /training/2023-01-03-datasets-cli14/images/dataformat-genome-org.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/datasets/HEAD/training/2023-01-03-datasets-cli14/images/dataformat-genome-org.png -------------------------------------------------------------------------------- /training/2023-01-03-datasets-cli14/images/datasets-s-genome-tax.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/datasets/HEAD/training/2023-01-03-datasets-cli14/images/datasets-s-genome-tax.png -------------------------------------------------------------------------------- /training/2023-01-03-datasets-cli14/images/data-package-contents-op2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/datasets/HEAD/training/2023-01-03-datasets-cli14/images/data-package-contents-op2.png -------------------------------------------------------------------------------- /training/2023-01-03-datasets-cli14/images/dataformat-genome-3fields.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/datasets/HEAD/training/2023-01-03-datasets-cli14/images/dataformat-genome-3fields.png -------------------------------------------------------------------------------- /training/2023-01-03-datasets-cli14/images/dataformat-genome-contign50.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/datasets/HEAD/training/2023-01-03-datasets-cli14/images/dataformat-genome-contign50.png -------------------------------------------------------------------------------- /training/2023-01-03-datasets-cli14/images/dataformat-genome-3fields-accession.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/datasets/HEAD/training/2023-01-03-datasets-cli14/images/dataformat-genome-3fields-accession.png -------------------------------------------------------------------------------- /client/go/util/go.mod: -------------------------------------------------------------------------------- 1 | module datasets/util 2 | 3 | go 1.23 4 | 5 | require github.com/spf13/cobra v1.8.1 6 | 7 | require ( 8 | github.com/inconshreveable/mousetrap v1.1.0 // indirect 9 | github.com/spf13/pflag v1.0.5 // indirect 10 | ) 11 | -------------------------------------------------------------------------------- /client/go/util/BUILD.bazel: -------------------------------------------------------------------------------- 1 | load("@rules_go//go:def.bzl", "go_library") 2 | 3 | go_library( 4 | name = "util", 5 | srcs = ["UsageConfig.go"], 6 | importpath = "datasets/util", 7 | visibility = ["//visibility:public"], 8 | deps = ["@com_github_spf13_cobra//:cobra"], 9 | ) 10 | -------------------------------------------------------------------------------- /client/go/util/command/BUILD.bazel: -------------------------------------------------------------------------------- 1 | load("@rules_go//go:def.bzl", "go_library") 2 | 3 | go_library( 4 | name = "command", 5 | srcs = ["Autocomplete.go"], 6 | importpath = "datasets/util/command", 7 | visibility = ["//visibility:public"], 8 | deps = ["@com_github_spf13_cobra//:cobra"], 9 | ) 10 | -------------------------------------------------------------------------------- /client/apps/public/Datasets/v2/cmd/datasets/main.go: -------------------------------------------------------------------------------- 1 | //go:generate ${GO_PATH}/bin/goversioninfo -manifest=../../resource/goversioninfo.exe.manifest -64 -product-version=${VER} -file-version=${VER} 2 | 3 | package main 4 | 5 | import ( 6 | "datasets_cli/v2/datasets" 7 | ) 8 | 9 | func main() { 10 | datasets.Execute() 11 | } 12 | -------------------------------------------------------------------------------- /training/ASM-NGS/README.md: -------------------------------------------------------------------------------- 1 | Content presented at the joint NCBI workshop at the ASM-NGS conference on 10/16/2024. 2 | 3 | - Conference website: [https://asm.org/Events/ASM-NGS/Home](https://asm.org/Events/ASM-NGS/Home) 4 | - Workshop Wiki: [https://github.com/ncbi/workshop-asm-ngs-2024/wiki](https://github.com/ncbi/workshop-asm-ngs-2024/wiki) 5 | -------------------------------------------------------------------------------- /client/BUILD: -------------------------------------------------------------------------------- 1 | load("@gazelle//:def.bzl", "gazelle") 2 | 3 | # gazelle:resolve go datasets/openapi/v2 //openapi:golib.v2 4 | gazelle(name = "gazelle") 5 | 6 | genrule( 7 | name = "version", 8 | srcs = [".git"], 9 | outs = ["version.txt"], 10 | cmd_bash = "git --git-dir=$(location :.git) describe --always --dirty |tee $@", 11 | ) 12 | -------------------------------------------------------------------------------- /client/openapi/configs/openapigen.go.config: -------------------------------------------------------------------------------- 1 | { 2 | "packageName": "datasets", 3 | "isGoSubmodule": true, 4 | "enumClassPrefix": true, 5 | "gitRepoId": "datasets", 6 | "gitUserId": "ncbi", 7 | "templateDir": "go", 8 | "files": { 9 | "api.mustache": { 10 | "templateType": "API", 11 | "destinationFilename": ".go" 12 | } 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /training/2023-01-03-datasets-cli14/datasets.yml: -------------------------------------------------------------------------------- 1 | name: ncbi_datasets 2 | channels: 3 | - conda-forge 4 | - bioconda 5 | - defaults 6 | dependencies: 7 | - pandas 8 | - ncbi-datasets-cli 9 | - blast 10 | - jq 11 | - jupyter 12 | - tree 13 | - seqkit 14 | - mafft 15 | - fasttree 16 | - biopython 17 | - matplotlib 18 | prefix: /opt/anaconda3/envs/ncbi_datasets 19 | -------------------------------------------------------------------------------- /client/openapi/templates/go/go.mod.mustache: -------------------------------------------------------------------------------- 1 | module datasets/openapi/v2 2 | 3 | go 1.18 4 | 5 | require ( 6 | {{#hasOAuthMethods}} 7 | golang.org/x/oauth2 v0.0.0-20210323180902-22b0adad7558 8 | {{/hasOAuthMethods}} 9 | {{#withAWSV4Signature}} 10 | github.com/aws/aws-sdk-go v1.34.14 11 | {{/withAWSV4Signature}} 12 | {{#importValidator}} 13 | gopkg.in/validator.v2 v2.0.1 14 | {{/importValidator}} 15 | ) 16 | -------------------------------------------------------------------------------- /client/.bazelrc: -------------------------------------------------------------------------------- 1 | build --workspace_status_command=workspace_status.sh 2 | 3 | ### 4 | # Correctness guarantees 5 | ### 6 | test --incompatible_exclusive_test_sandboxed 7 | build --incompatible_strict_action_env 8 | build --action_env=$LD_LIBRARY_PATH 9 | 10 | ### 11 | # Convenience 12 | ### 13 | build --sandbox_fake_hostname 14 | build --sandbox_fake_username 15 | build --show_timestamps 16 | 17 | build --java_runtime_version=remotejdk_21 18 | -------------------------------------------------------------------------------- /training/2023-01-03-datasets-cli14/readme.md: -------------------------------------------------------------------------------- 1 | # NCBI Datasets CLI v14 tutorial 2 | 3 | This folder contains an updated version of the Jupyter notebook presented at the CSHL Genomic Informatics 2021 as part of a NCBI Datasets workshop. Modifications include images and commands to reflect the NCBI Datasets CLI changes from v13 to v14. 4 | 5 | To create a conda environment with all the packages to run the Jupyter Notebook, download the file `datasets.yml` and run the following command: 6 | 7 | `conda env create --file datasets.yml` 8 | 9 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: 'enhancement' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | ### Thank you 17 | Thanks for your feedback--your feature requests help improve NCBI Datasets. 18 | -------------------------------------------------------------------------------- /client/openapi/templates/make_patch.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | VERSION=7.2.0 4 | CLI=openapi-generator-cli-${VERSION}.jar 5 | 6 | if [[ ! -e ${CLI} ]]; then 7 | curl "https://repo1.maven.org/maven2/org/openapitools/openapi-generator-cli/${VERSION}/${CLI}" > ${CLI} 8 | fi 9 | 10 | for template in go; do 11 | PATCH_FILES=$(find ${template} -type f) 12 | rm -rf a b 13 | mkdir -p a b 14 | ( cd a; echo ${PATCH_FILES} | xargs -r unzip ../${CLI} ) 15 | ln -s ../${template} b/${template} 16 | diff -Naur a b > ${template}.patch 17 | rm -rf a b 18 | done 19 | -------------------------------------------------------------------------------- /training/TAGC-2024/README.md: -------------------------------------------------------------------------------- 1 | # NCBI Datasets TAGC 2024 Workshop Notebook 2 | 3 | This folder contains a Jupyter notebook that will be presented as part of an NCBI Datasets workshop at TAGC (The Allied Genetics Conference) 2024. 4 | 5 | To create a conda environment with all the packages to run the Jupyter Notebook, run the following command: 6 | 7 | `conda create -n tagc -c conda-forge -c bioconda ncbi-datasets-cli jq tree clustalo jupyterlab -y` 8 | 9 | Then activate the environment: 10 | 11 | `conda activate tagc` 12 | 13 | Then launch jupyter lab: 14 | 15 | `jupyter lab` 16 | -------------------------------------------------------------------------------- /client/apps/public/Datasets/v2/datasets/flags/BUILD.bazel: -------------------------------------------------------------------------------- 1 | load("@rules_go//go:def.bzl", "go_library") 2 | 3 | go_library( 4 | name = "flags", 5 | srcs = glob( 6 | ["*.go"], 7 | exclude = ["*_test.go"], 8 | ), 9 | importpath = "datasets_cli/v2/datasets/flags", 10 | visibility = ["//visibility:public"], 11 | deps = [ 12 | "//openapi:golib.v2", 13 | "@com_github_araddon_dateparse//:dateparse", 14 | "@com_github_spf13_cobra//:cobra", 15 | "@com_github_spf13_pflag//:pflag", 16 | "@com_github_thediveo_enumflag_v2//:enumflag", 17 | ], 18 | ) 19 | 20 | -------------------------------------------------------------------------------- /client/README.md: -------------------------------------------------------------------------------- 1 | # Building Datasets Client 2 | ## Setup 3 | The build system depends on having `bazel` available. You should use [bazelisk](https://github.com/bazelbuild/bazelisk). 4 | 5 | Second, you must have the file `workspace_status.sh` available in your PATH. 6 | 7 | ## Building 8 | From this directory, run: 9 | 10 | ``` 11 | bazel build apps/public/Datasets/... 12 | ``` 13 | 14 | 15 | 16 | The executable will be available at `bazel-bin/apps/public/Datasets/v2/cmd/datasets/datasets_/datasets`. 17 | 18 | ## Alternative installations 19 | `datasets` is also available through [conda](https://anaconda.org/conda-forge/ncbi-datasets-cli). 20 | -------------------------------------------------------------------------------- /client/apps/public/Datasets/v2/datasets/flags/GeneTaxonFilter.go: -------------------------------------------------------------------------------- 1 | package flags 2 | 3 | import ( 4 | "github.com/spf13/cobra" 5 | "github.com/spf13/pflag" 6 | ) 7 | 8 | type GeneTaxonFilterFlag struct { 9 | FlagInterface 10 | Taxon string 11 | } 12 | 13 | func NewGeneTaxonFilterFlag() *GeneTaxonFilterFlag { 14 | return &GeneTaxonFilterFlag{} 15 | } 16 | 17 | func (gtff *GeneTaxonFilterFlag) RegisterFlags(flags *pflag.FlagSet) { 18 | flags.StringVar(>ff.Taxon, "taxon", "human", "Define species (NCBI taxid, common or scientific name) for gene symbol") 19 | } 20 | 21 | func (gtff *GeneTaxonFilterFlag) PreRunE(cmd *cobra.Command, args []string) (err error) { 22 | return nil 23 | } 24 | -------------------------------------------------------------------------------- /client/apps/public/Datasets/v2/datasets/GenomeAccessionRetriever.go: -------------------------------------------------------------------------------- 1 | package datasets 2 | 3 | import ( 4 | openapi "datasets/openapi/v2" 5 | ) 6 | 7 | type GenomeAccRetriever struct { 8 | assemblyAccessions []string 9 | DefaultPageProcessorFuncs[openapi.V2reportsAssemblyDataReport, *openapi.V2reportsAssemblyDataReportPage] 10 | } 11 | 12 | func (r *GenomeAccRetriever) ReportName() string { 13 | return "genome" 14 | } 15 | 16 | func (assmRetriever *GenomeAccRetriever) ProcessPage(ppage *openapi.V2reportsAssemblyDataReportPage) { 17 | for _, report := range ppage.GetReports() { 18 | assmRetriever.assemblyAccessions = append(assmRetriever.assemblyAccessions, report.GetAccession()) 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /client/apps/public/Datasets/v2/datasets/DownloadVirus.go: -------------------------------------------------------------------------------- 1 | package datasets 2 | 3 | import ( 4 | "github.com/spf13/cobra" 5 | ) 6 | 7 | func createVirusCmd() *cobra.Command { 8 | cmd := &cobra.Command{ 9 | Use: "virus", 10 | Short: "Download a virus data package", 11 | Long: "Download a virus genome or SARS-CoV-2 protein data package as a zip file.", 12 | Example: ` datasets download virus genome taxon sars-cov-2 --host dog 13 | datasets download virus protein S --host dog --filename SARS2-spike-dog.zip`, 14 | Args: cobra.NoArgs, 15 | RunE: ParentCommandRunE, 16 | } 17 | 18 | cmd.AddCommand(createDownloadVirusGenomeCmd()) 19 | cmd.AddCommand(createDownloadVirusProteinCmd()) 20 | 21 | return cmd 22 | } 23 | -------------------------------------------------------------------------------- /client/apps/public/Datasets/v2/datasets/SummaryVirus.go: -------------------------------------------------------------------------------- 1 | package datasets 2 | 3 | import ( 4 | cmdflags "datasets_cli/v2/datasets/flags" 5 | 6 | "github.com/spf13/cobra" 7 | ) 8 | 9 | func createSummaryVirusCmd() *cobra.Command { 10 | flagSets := []cmdflags.FlagInterface{} 11 | 12 | cmd := &cobra.Command{ 13 | Use: "virus", 14 | Short: "Print a data report containing virus genome metadata", 15 | Long: ` 16 | Print a data report containing virus genome metadata by accession or taxon. The data report is returned in JSON format.`, 17 | Args: cobra.NoArgs, 18 | RunE: ParentCommandRunE, 19 | } 20 | 21 | cmdflags.RegisterAllFlags(flagSets, cmd.Flags()) 22 | 23 | cmd.AddCommand(createSummaryVirusGenomeCmd()) 24 | 25 | return cmd 26 | } 27 | -------------------------------------------------------------------------------- /client/apps/public/Datasets/v2/datasets/flags/AnnotatedOnlyFlag.go: -------------------------------------------------------------------------------- 1 | package flags 2 | 3 | import ( 4 | "github.com/spf13/cobra" 5 | "github.com/spf13/pflag" 6 | ) 7 | 8 | type AnnotatedOnlyFlag struct { 9 | FlagInterface 10 | annotatedOnly bool 11 | } 12 | 13 | func NewAnnotatedOnlyFlag() *AnnotatedOnlyFlag { 14 | return &AnnotatedOnlyFlag{annotatedOnly: false} 15 | } 16 | 17 | func (ao *AnnotatedOnlyFlag) RegisterFlags(flags *pflag.FlagSet) { 18 | flags.BoolVar(&ao.annotatedOnly, "annotated", false, "Limit to annotated genomes") 19 | } 20 | 21 | func (ao *AnnotatedOnlyFlag) PreRunE(cmd *cobra.Command, args []string) (err error) { 22 | return nil 23 | } 24 | 25 | func (ao *AnnotatedOnlyFlag) AnnotatedOnly() bool { 26 | return ao.annotatedOnly 27 | } 28 | -------------------------------------------------------------------------------- /client/apps/public/Datasets/v2/datasets/flags/ReferenceOnlyFlag.go: -------------------------------------------------------------------------------- 1 | package flags 2 | 3 | import ( 4 | "github.com/spf13/cobra" 5 | "github.com/spf13/pflag" 6 | ) 7 | 8 | type ReferenceOnlyFlag struct { 9 | FlagInterface 10 | referenceOnly bool 11 | } 12 | 13 | func NewReferenceOnlyFlag() *ReferenceOnlyFlag { 14 | return &ReferenceOnlyFlag{referenceOnly: false} 15 | } 16 | 17 | func (ro *ReferenceOnlyFlag) RegisterFlags(flags *pflag.FlagSet) { 18 | flags.BoolVar(&ro.referenceOnly, "reference", false, "Limit to reference genomes") 19 | } 20 | 21 | func (ro *ReferenceOnlyFlag) PreRunE(cmd *cobra.Command, args []string) (err error) { 22 | return nil 23 | } 24 | 25 | func (ro *ReferenceOnlyFlag) ReferenceOnly() bool { 26 | return ro.referenceOnly 27 | } 28 | -------------------------------------------------------------------------------- /client/apps/public/Datasets/v2/datasets/flags/TypeMaterialFlag.go: -------------------------------------------------------------------------------- 1 | package flags 2 | 3 | import ( 4 | "github.com/spf13/cobra" 5 | "github.com/spf13/pflag" 6 | ) 7 | 8 | type TypeMaterialFlag struct { 9 | FlagInterface 10 | typeMaterial bool 11 | } 12 | 13 | func NewTypeMaterialFlag() *TypeMaterialFlag { 14 | return &TypeMaterialFlag{typeMaterial: false} 15 | } 16 | 17 | func (tm *TypeMaterialFlag) RegisterFlags(flags *pflag.FlagSet) { 18 | flags.BoolVar(&tm.typeMaterial, "from-type", false, "Only return records with type material") 19 | } 20 | 21 | func (tm *TypeMaterialFlag) PreRunE(cmd *cobra.Command, args []string) (err error) { 22 | return nil 23 | } 24 | 25 | func (tm *TypeMaterialFlag) TypeMaterial() bool { 26 | return tm.typeMaterial 27 | } 28 | -------------------------------------------------------------------------------- /client/apps/public/Datasets/v2/datasets/flags/SkipZipValidation.go: -------------------------------------------------------------------------------- 1 | package flags 2 | 3 | import ( 4 | "github.com/spf13/cobra" 5 | "github.com/spf13/pflag" 6 | ) 7 | 8 | type SkipZipValidation struct { 9 | FlagInterface 10 | skipValidation bool 11 | } 12 | 13 | func NewSkipZipValidationFlag() *SkipZipValidation { 14 | return &SkipZipValidation{} 15 | } 16 | 17 | func (sv *SkipZipValidation) RegisterFlags(flags *pflag.FlagSet) { 18 | flags.BoolVar(&sv.skipValidation, "fast-zip-validation", false, "Skip zip checksum validation after download") 19 | } 20 | 21 | func (lf *SkipZipValidation) PreRunE(cmd *cobra.Command, args []string) (err error) { 22 | return nil 23 | } 24 | 25 | func (sv *SkipZipValidation) IsSkipValidation() bool { 26 | return sv.skipValidation 27 | } 28 | -------------------------------------------------------------------------------- /client/apps/public/Datasets/v2/datasets/flags/TaxExactMatchFlag.go: -------------------------------------------------------------------------------- 1 | package flags 2 | 3 | import ( 4 | "github.com/spf13/cobra" 5 | "github.com/spf13/pflag" 6 | ) 7 | 8 | type TaxExactMatch struct { 9 | FlagInterface 10 | taxExactMatch bool 11 | } 12 | 13 | func NewTaxExactMatchFlag() *TaxExactMatch { 14 | return &TaxExactMatch{taxExactMatch: false} 15 | } 16 | 17 | func (sv *TaxExactMatch) RegisterFlags(flags *pflag.FlagSet) { 18 | flags.BoolVar(&sv.taxExactMatch, "tax-exact-match", false, "Exclude sub-species when a species-level taxon is specified") 19 | } 20 | 21 | func (lf *TaxExactMatch) PreRunE(cmd *cobra.Command, args []string) (err error) { 22 | return nil 23 | } 24 | 25 | func (sv *TaxExactMatch) IsTaxExactMatch() bool { 26 | return sv.taxExactMatch 27 | } 28 | -------------------------------------------------------------------------------- /client/apps/public/Datasets/v2/datasets/flags/ExcludeAtypical.go: -------------------------------------------------------------------------------- 1 | package flags 2 | 3 | import ( 4 | "github.com/spf13/cobra" 5 | "github.com/spf13/pflag" 6 | ) 7 | 8 | type ExcludeAtypicalFlag struct { 9 | FlagInterface 10 | excludeAtypical bool 11 | } 12 | 13 | func NewExcludeAtypicalFlag() *ExcludeAtypicalFlag { 14 | return &ExcludeAtypicalFlag{excludeAtypical: false} 15 | } 16 | 17 | func (tm *ExcludeAtypicalFlag) RegisterFlags(flags *pflag.FlagSet) { 18 | flags.BoolVar(&tm.excludeAtypical, "exclude-atypical", false, "Exclude atypical assemblies") 19 | } 20 | 21 | func (lf *ExcludeAtypicalFlag) PreRunE(cmd *cobra.Command, args []string) (err error) { 22 | return nil 23 | } 24 | 25 | func (tm *ExcludeAtypicalFlag) ExcludeAtypical() bool { 26 | return tm.excludeAtypical 27 | } 28 | -------------------------------------------------------------------------------- /client/apps/public/Datasets/v2/datasets/flags/DownloadPreviewFlag.go: -------------------------------------------------------------------------------- 1 | package flags 2 | 3 | import ( 4 | "github.com/spf13/cobra" 5 | "github.com/spf13/pflag" 6 | ) 7 | 8 | type DownloadPreviewFlag struct { 9 | FlagInterface 10 | downloadPreview bool 11 | } 12 | 13 | func NewDownloadPreviewFlag() *DownloadPreviewFlag { 14 | return &DownloadPreviewFlag{downloadPreview: false} 15 | } 16 | 17 | func (dp *DownloadPreviewFlag) RegisterFlags(flags *pflag.FlagSet) { 18 | flags.BoolVar(&dp.downloadPreview, "preview", false, "Show information about the requested data package") 19 | } 20 | 21 | func (dp *DownloadPreviewFlag) PreRunE(cmd *cobra.Command, args []string) (err error) { 22 | return nil 23 | } 24 | 25 | func (dp *DownloadPreviewFlag) IsPreview() bool { 26 | return dp.downloadPreview 27 | } 28 | -------------------------------------------------------------------------------- /client/apps/public/Datasets/v2/datasets/flags/ChromosomesFlag.go: -------------------------------------------------------------------------------- 1 | package flags 2 | 3 | import ( 4 | "github.com/spf13/cobra" 5 | "github.com/spf13/pflag" 6 | ) 7 | 8 | type ChromosomesFlag struct { 9 | FlagInterface 10 | chromosomes []string 11 | } 12 | 13 | func NewChromosomesFlag() *ChromosomesFlag { 14 | cf := &ChromosomesFlag{} 15 | return cf 16 | } 17 | 18 | func (cf *ChromosomesFlag) RegisterFlags(flags *pflag.FlagSet) { 19 | flags.StringSliceVar(&cf.chromosomes, "chromosomes", []string{}, "Limit to a specified, comma-delimited list of chromosomes, or 'all' for all chromosomes") 20 | } 21 | 22 | func (cf *ChromosomesFlag) PreRunE(cmd *cobra.Command, args []string) (err error) { 23 | return nil 24 | } 25 | 26 | func (cf *ChromosomesFlag) GetChromosomes() []string { 27 | return cf.chromosomes 28 | } 29 | -------------------------------------------------------------------------------- /client/apps/public/Datasets/v2/datasets/flags/DehydratedFlag.go: -------------------------------------------------------------------------------- 1 | package flags 2 | 3 | import ( 4 | "github.com/spf13/cobra" 5 | "github.com/spf13/pflag" 6 | ) 7 | 8 | type DehydratedFlag struct { 9 | FlagInterface 10 | dehydrated bool 11 | } 12 | 13 | func NewDehydratedFlag() *DehydratedFlag { 14 | return &DehydratedFlag{dehydrated: false} 15 | } 16 | 17 | func (df *DehydratedFlag) RegisterFlags(flags *pflag.FlagSet) { 18 | flags.BoolVar(&df.dehydrated, "dehydrated", false, "Download a dehydrated zip archive including the data report and locations of data files (use the rehydrate command to retrieve data files).") 19 | } 20 | 21 | func (df *DehydratedFlag) PreRunE(cmd *cobra.Command, args []string) (err error) { 22 | return nil 23 | } 24 | 25 | func (df *DehydratedFlag) Dehydrated() bool { 26 | return df.dehydrated 27 | } 28 | -------------------------------------------------------------------------------- /client/apps/public/Datasets/v2/datasets/flags/TaxParentsFlag.go: -------------------------------------------------------------------------------- 1 | package flags 2 | 3 | import ( 4 | "github.com/spf13/cobra" 5 | "github.com/spf13/pflag" 6 | ) 7 | 8 | type TaxParentsFlag struct { 9 | FlagInterface 10 | taxParents bool 11 | } 12 | 13 | func NewTaxParentsFlag() *TaxParentsFlag { 14 | return &TaxParentsFlag{taxParents: false} 15 | } 16 | 17 | func (tpf *TaxParentsFlag) RegisterFlags(flags *pflag.FlagSet) { 18 | flags.BoolVar(&tpf.taxParents, "parents", false, "Include all parents of the requested taxon") 19 | } 20 | 21 | func (tpf *TaxParentsFlag) PreRunE(cmd *cobra.Command, args []string) (err error) { 22 | return nil 23 | } 24 | 25 | func (tpf *TaxParentsFlag) GetParents() bool { 26 | return tpf.taxParents 27 | } 28 | 29 | func (tpf *TaxParentsFlag) SetParents(val bool) { 30 | tpf.taxParents = val 31 | } 32 | -------------------------------------------------------------------------------- /client/apps/public/Datasets/v2/datasets/flags/TaxChildrenFlag.go: -------------------------------------------------------------------------------- 1 | package flags 2 | 3 | import ( 4 | "github.com/spf13/cobra" 5 | "github.com/spf13/pflag" 6 | ) 7 | 8 | type TaxChildrenFlag struct { 9 | FlagInterface 10 | taxChildren bool 11 | } 12 | 13 | func NewTaxChildrenFlag() *TaxChildrenFlag { 14 | return &TaxChildrenFlag{taxChildren: false} 15 | } 16 | 17 | func (tcf *TaxChildrenFlag) RegisterFlags(flags *pflag.FlagSet) { 18 | flags.BoolVar(&tcf.taxChildren, "children", false, "Return all taxonomic children of the requested taxon") 19 | } 20 | 21 | func (tcf *TaxChildrenFlag) PreRunE(cmd *cobra.Command, args []string) (err error) { 22 | return nil 23 | } 24 | 25 | func (tcf *TaxChildrenFlag) GetChildren() bool { 26 | return tcf.taxChildren 27 | } 28 | 29 | func (tcf *TaxChildrenFlag) SetChildren(val bool) { 30 | tcf.taxChildren = val 31 | } 32 | -------------------------------------------------------------------------------- /client/apps/public/Datasets/v2/datasets/flags/ExcludeMultiIsolate.go: -------------------------------------------------------------------------------- 1 | package flags 2 | 3 | import ( 4 | "github.com/spf13/cobra" 5 | "github.com/spf13/pflag" 6 | ) 7 | 8 | type ExcludeMultiIsolateFlag struct { 9 | FlagInterface 10 | excludeMultiIsolate bool 11 | } 12 | 13 | func NewExcludeMultiIsolateFlag() *ExcludeMultiIsolateFlag { 14 | return &ExcludeMultiIsolateFlag{excludeMultiIsolate: false} 15 | } 16 | 17 | func (mi *ExcludeMultiIsolateFlag) RegisterFlags(flags *pflag.FlagSet) { 18 | flags.BoolVar(&mi.excludeMultiIsolate, "exclude-multi-isolate", false, "Exclude assemblies from multi-isolate projects") 19 | } 20 | 21 | func (lf *ExcludeMultiIsolateFlag) PreRunE(cmd *cobra.Command, args []string) (err error) { 22 | return nil 23 | } 24 | 25 | func (mi *ExcludeMultiIsolateFlag) ExcludeMultiIsolate() bool { 26 | return mi.excludeMultiIsolate 27 | } 28 | -------------------------------------------------------------------------------- /client/apps/public/Datasets/v2/datasets/flags/SearchStringFlag.go: -------------------------------------------------------------------------------- 1 | package flags 2 | 3 | import ( 4 | "github.com/spf13/cobra" 5 | "github.com/spf13/pflag" 6 | ) 7 | 8 | type SearchStringFlag struct { 9 | FlagInterface 10 | searchStrings []string 11 | } 12 | 13 | func NewSearchStringFlag() *SearchStringFlag { 14 | ssf := &SearchStringFlag{} 15 | return ssf 16 | } 17 | 18 | func (ssf *SearchStringFlag) RegisterFlags(flags *pflag.FlagSet) { 19 | flags.StringSliceVar(&ssf.searchStrings, "search", []string{}, `Limit results to genomes with specified text in the searchable fields: 20 | species and infraspecies, assembly name and submitter. 21 | To search multiple strings, use the flag multiple times.`) 22 | } 23 | 24 | func (ssf *SearchStringFlag) PreRunE(cmd *cobra.Command, args []string) (err error) { 25 | return nil 26 | } 27 | 28 | func (ssf *SearchStringFlag) GetSearchText() []string { 29 | return ssf.searchStrings 30 | } 31 | -------------------------------------------------------------------------------- /client/go/util/go.sum: -------------------------------------------------------------------------------- 1 | github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= 2 | github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= 3 | github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= 4 | github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= 5 | github.com/spf13/cobra v1.8.1 h1:e5/vxKd/rZsfSJMUX1agtjeTDf+qv1/JdBF8gg5k9ZM= 6 | github.com/spf13/cobra v1.8.1/go.mod h1:wHxEcudfqmLYa8iTfL+OuZPbBZkmvliBWKIezN3kD9Y= 7 | github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= 8 | github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= 9 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 10 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 11 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: 'bug' 6 | assignees: '' 7 | 8 | --- 9 | ### Before opening an issue, please: 10 | 11 | - [ ] Make sure you are using the latest version using `datasets --version` 12 | - [ ] Review our [documentation](https://www.ncbi.nlm.nih.gov/datasets/docs/download-and-install/) 13 | 14 | **Describe the bug** 15 | A clear and concise description of what the bug is. 16 | 17 | **To Reproduce** 18 | Please provide a reproducible example, with steps to reproduce the behavior _and_ any relevant taxonomic, genome assembly, or gene identifiers. 19 | 20 | Steps to reproduce the behavior: 21 | 1. Go to '...' 22 | 2. Click on '....' 23 | 3. Scroll down to '....' 24 | 4. See error 25 | 26 | **Expected behavior** 27 | A clear and concise description of what you expected to happen. 28 | 29 | ### Thank you 30 | 31 | Thanks for your feedback--your bug reports help improve NCBI Datasets. 32 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | # Public Domain notice 2 | 3 | **National Center for Biotechnology Information** 4 | 5 | This software is a "United States Government Work" under the terms of the 6 | United States Copyright Act. It was written as part of the authors' 7 | official duties as United States Government employees and thus cannot 8 | be copyrighted. This software is freely available to the public for 9 | use. The National Library of Medicine and the U.S. Government have not 10 | placed any restriction on its use or reproduction. 11 | 12 | Although all reasonable efforts have been taken to ensure the accuracy 13 | and reliability of the software and data, the NLM and the U.S. 14 | Government do not and cannot warrant the performance or results that 15 | may be obtained by using this software or data. The NLM and the U.S. 16 | Government disclaim all warranties, express or implied, including 17 | warranties of performance, merchantability or fitness for any 18 | particular purpose. 19 | 20 | Please cite NCBI in any work or product based on this material. 21 | 22 | -------------------------------------------------------------------------------- /client/apps/public/Datasets/v2/datasets/flags/DebugFlag.go: -------------------------------------------------------------------------------- 1 | package flags 2 | 3 | import ( 4 | "os" 5 | "strconv" 6 | 7 | "github.com/spf13/cobra" 8 | "github.com/spf13/pflag" 9 | ) 10 | 11 | type DebugFlag struct { 12 | FlagInterface 13 | RunDebug bool 14 | } 15 | 16 | func (df *DebugFlag) useEnv(envVarName, argName string) (val string) { 17 | val = os.Getenv(envVarName) 18 | return 19 | } 20 | 21 | func (df *DebugFlag) useEnvBool(envVarName, argName string, defaultVal bool) bool { 22 | if val, err := strconv.ParseBool(df.useEnv(envVarName, argName)); err == nil { 23 | return val 24 | } 25 | return defaultVal 26 | } 27 | 28 | func NewDebugFlag() *DebugFlag { 29 | return &DebugFlag{} 30 | } 31 | 32 | func (df *DebugFlag) RegisterFlags(flags *pflag.FlagSet) { 33 | flags.BoolVar(&df.RunDebug, "debug", df.useEnvBool("DATASETS_DEBUG", "debug", false), "Emit debugging info") 34 | } 35 | 36 | func (df *DebugFlag) Debug() bool { 37 | return df.RunDebug 38 | } 39 | 40 | func (df *DebugFlag) PreRunE(cmd *cobra.Command, args []string) (err error) { 41 | return nil 42 | } 43 | -------------------------------------------------------------------------------- /client/apps/public/Datasets/v2/datasets/flags/WpDownloadFlags.go: -------------------------------------------------------------------------------- 1 | package flags 2 | 3 | import ( 4 | "github.com/spf13/cobra" 5 | "github.com/spf13/pflag" 6 | ) 7 | 8 | type WpDownloadFlags struct { 9 | FlagInterface 10 | includeFlanksBp int 11 | taxonFilter string 12 | } 13 | 14 | func NewWpDownloadFlags() *WpDownloadFlags { 15 | wdf := &WpDownloadFlags{} 16 | return wdf 17 | } 18 | 19 | func (wdf *WpDownloadFlags) RegisterFlags(flags *pflag.FlagSet) { 20 | flags.IntVar(&wdf.includeFlanksBp, "include-flanks-bp", 0, "Specify the length of flanking nucleotides (WP accessions only)") 21 | flags.StringVar(&wdf.taxonFilter, "taxon-filter", "", "Limit gene sequences and annotation report file to specified taxon (any rank, only available for WP accessions)") 22 | } 23 | 24 | func (wdf *WpDownloadFlags) PreRunE(cmd *cobra.Command, args []string) (err error) { 25 | return 26 | } 27 | 28 | func (wdf *WpDownloadFlags) IncludeFlankBp() int { 29 | return wdf.includeFlanksBp 30 | } 31 | 32 | func (wdf *WpDownloadFlags) TaxonFilter() string { 33 | return wdf.taxonFilter 34 | } 35 | -------------------------------------------------------------------------------- /client/apps/public/Datasets/v2/datasets/ResponseHandling.go: -------------------------------------------------------------------------------- 1 | package datasets 2 | 3 | import ( 4 | "bytes" 5 | "errors" 6 | "fmt" 7 | "net/http" 8 | 9 | openapi "datasets/openapi/v2" 10 | ) 11 | 12 | func CreateErrorMessageFromMessageOrError(resp *http.Response, inError error, printfTemplate string) (err error) { 13 | if inError != nil { 14 | err = inError 15 | } else if resp == nil { 16 | err = errors.New("Gateway Error") 17 | } else if resp.StatusCode >= 300 { 18 | err = fmt.Errorf(printfTemplate, resp.Status) 19 | } 20 | return 21 | } 22 | 23 | func MessagesToError(messages []openapi.V2reportsMessage) (err error) { 24 | if len(messages) > 0 { 25 | var errorMsg bytes.Buffer 26 | MessagesToErrorBuf(messages, &errorMsg) 27 | if errorMsg.Len() > 0 { 28 | err = errors.New(errorMsg.String()) 29 | } 30 | } 31 | return 32 | } 33 | 34 | func MessagesToErrorBuf(messages []openapi.V2reportsMessage, errors *bytes.Buffer) { 35 | if len(messages) > 0 { 36 | for _, message := range messages { 37 | fmt.Fprintf(errors, "%s\n", message.Error.GetMessage()) 38 | } 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /client/apps/public/Datasets/v2/go.mod: -------------------------------------------------------------------------------- 1 | module datasets_cli/v2 2 | 3 | go 1.23.0 4 | 5 | toolchain go1.23.4 6 | 7 | require ( 8 | bou.ke/monkey v1.0.2 9 | github.com/antihax/optional v1.0.0 10 | github.com/araddon/dateparse v0.0.0-20210429162001-6b43995a97de 11 | github.com/docker/go-units v0.5.0 12 | github.com/gosuri/uiprogress v0.0.1 13 | github.com/hashicorp/go-cleanhttp v0.5.2 14 | github.com/hashicorp/go-retryablehttp v0.7.7 15 | github.com/spf13/afero v1.11.0 16 | github.com/spf13/cobra v1.8.1 17 | github.com/spf13/pflag v1.0.5 18 | github.com/stretchr/testify v1.9.0 19 | github.com/thediveo/enumflag/v2 v2.0.7 20 | gitlab.com/metakeule/fmtdate v1.2.2 21 | golang.org/x/exp v0.0.0-20250103183323-7d7fa50e5329 22 | golang.org/x/text v0.22.0 23 | ) 24 | 25 | require ( 26 | github.com/davecgh/go-spew v1.1.1 // indirect 27 | github.com/gosuri/uilive v0.0.3 // indirect 28 | github.com/inconshreveable/mousetrap v1.1.0 // indirect 29 | github.com/mattn/go-isatty v0.0.20 // indirect 30 | github.com/pmezard/go-difflib v1.0.0 // indirect 31 | golang.org/x/sys v0.28.0 // indirect 32 | gopkg.in/yaml.v3 v3.0.1 // indirect 33 | ) 34 | -------------------------------------------------------------------------------- /client/apps/public/Datasets/v2/datasets/Summary.go: -------------------------------------------------------------------------------- 1 | package datasets 2 | 3 | import ( 4 | "github.com/spf13/cobra" 5 | ) 6 | 7 | func createSummaryCmd() *cobra.Command { 8 | 9 | cmd := &cobra.Command{ 10 | Use: "summary", 11 | Short: "Print a data report containing gene, genome, taxonomy or virus metadata", 12 | Long: "Print a data report containing gene, genome, taxonomy or virus metadata in JSON format.", 13 | Example: ` datasets summary genome accession GCF_000001405.40 14 | datasets summary genome taxon "mus musculus" 15 | datasets summary gene gene-id 672 16 | datasets summary gene symbol brca1 --taxon "mus musculus" 17 | datasets summary gene accession NP_000483.3 18 | datasets summary taxonomy taxon "mus musculus" 19 | datasets summary virus genome accession NC_045512.2 20 | datasets summary virus genome taxon sars-cov-2 --host dog`, 21 | Args: cobra.NoArgs, 22 | RunE: ParentCommandRunE, 23 | } 24 | 25 | cmd.AddCommand(createSummaryGeneCmd()) 26 | cmd.AddCommand(createSummaryGenomeCmd()) 27 | cmd.AddCommand(createSummaryVirusCmd()) 28 | cmd.AddCommand(createSummaryTaxonomyCmd()) 29 | 30 | return cmd 31 | } 32 | -------------------------------------------------------------------------------- /client/apps/public/Datasets/v2/datasets/BUILD.bazel: -------------------------------------------------------------------------------- 1 | load("@rules_go//go:def.bzl", "go_library") 2 | 3 | go_library( 4 | name = "datasets", 5 | srcs = glob( 6 | ["*.go"], 7 | exclude = ["*_test.go"], 8 | ), 9 | importpath = "datasets_cli/v2/datasets", 10 | visibility = ["//visibility:public"], 11 | deps = [ 12 | "//apps/public/Datasets/v2/datasets/flags", 13 | "//go/util", 14 | "//go/util/command", 15 | "//openapi:golib.v2", 16 | "@com_github_antihax_optional//:optional", 17 | "@com_github_docker_go_units//:go-units", 18 | "@com_github_gosuri_uiprogress//:uiprogress", 19 | "@com_github_hashicorp_go_cleanhttp//:go-cleanhttp", 20 | "@com_github_hashicorp_go_retryablehttp//:go-retryablehttp", 21 | "@com_github_spf13_afero//:afero", 22 | "@com_github_spf13_cobra//:cobra", 23 | "@com_github_thediveo_enumflag_v2//:enumflag", 24 | "@com_gitlab_metakeule_fmtdate//:fmtdate", 25 | "@org_golang_x_exp//slices", 26 | "@org_golang_x_text//language", 27 | "@org_golang_x_text//message", 28 | ], 29 | ) 30 | 31 | -------------------------------------------------------------------------------- /client/apps/public/Datasets/v2/datasets/DownloadTaxonomy.go: -------------------------------------------------------------------------------- 1 | package datasets 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/spf13/cobra" 7 | ) 8 | 9 | func createTaxonomyCmd() *cobra.Command { 10 | inputDescription := "taxonomy taxon (NCBI Taxonomy ID, scientific or common name at any tax rank)" 11 | cmd := &cobra.Command{ 12 | Use: "taxonomy", 13 | Short: "Download a taxonomy data package", 14 | Long: fmt.Sprintf(` 15 | Download a taxonomy data package by %s. 16 | 17 | The default taxonomy data package includes the following files: 18 | * taxonomy_report.jsonl 19 | * taxonomy_summary.tsv 20 | * dataset_catalog.json (a list of files and file types included in the data package) 21 | A taxonomy names data report can also be added to the package 22 | * names_report.jsonl`, inputDescription), 23 | Example: ` datasets download taxonomy taxon "bos taurus" 24 | datasets download taxonomy taxon human,"drosophila melanogaster" --include names 25 | datasets download taxonomy taxon 10116 --parents --children`, 26 | 27 | Args: cobra.NoArgs, 28 | RunE: ParentCommandRunE, 29 | } 30 | 31 | cmd.AddCommand(createDownloadTaxonomyTaxonCmd()) 32 | return cmd 33 | } 34 | -------------------------------------------------------------------------------- /client/apps/public/Datasets/v2/cmd/datasets/BUILD.bazel: -------------------------------------------------------------------------------- 1 | load("@rules_go//go:def.bzl", "go_binary", "go_library") 2 | 3 | go_library( 4 | name = "datasets_lib", 5 | srcs = ["main.go"], 6 | importpath = "datasets_cli/v2/cmd/datasets", 7 | visibility = ["//visibility:private"], 8 | deps = ["//apps/public/Datasets/v2/datasets"], 9 | ) 10 | 11 | go_binary( 12 | name = "datasets", 13 | embed = [":datasets_lib"], 14 | visibility = ["//visibility:public"], 15 | x_defs = {"datasets_cli/v2/datasets.AppVersion": "{STABLE_SOFTWARE_VERSION_WITHOUT_PREFIX}"}, 16 | ) 17 | 18 | # [ 19 | # go_binary( 20 | # name = "{os}-{arch}/datasets".format(os=goos, arch=goarch), 21 | # embed = [":datasets_lib"], 22 | # goos = goos, 23 | # goarch = goarch, 24 | # visibility = ["//visibility:public"], 25 | # x_defs = {"datasets_cli/v2/datasets.AppVersion": "{STABLE_SOFTWARE_VERSION_WITHOUT_PREFIX}"}, 26 | # ) 27 | # for goos, goarchs in { 28 | # "darwin": ["arm64", "amd64"], 29 | # "linux": ["amd64", "arm64", "arm"], 30 | # "windows": ["amd64"], 31 | # }.items() 32 | # for goarch in goarchs 33 | 34 | # ] 35 | -------------------------------------------------------------------------------- /client/workspace_status.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | function normalizeTag() 4 | { 5 | local TAG=$1 6 | if [[ -z $TAG ]] 7 | then 8 | printf "" 9 | return 10 | fi 11 | TAG=${TAG//+/-} 12 | TAG=${TAG//\//-} 13 | printf "$TAG" 14 | } 15 | 16 | function getVersion() 17 | { 18 | if [ "$1" == "--stable" ]; then 19 | params="--no-abbr" 20 | else 21 | params="" 22 | fi 23 | version=$(git describe --tags $params 2>/dev/null) 24 | echo ${version} | awk '''BEGIN {FS="-"} 25 | $1=="" {print "[git-pep440] The \"git describe\" output is empty. Are you sure you have tags?" > "/dev/stderr"; print "0.0"; exit 0} 26 | $2=="" {print $1; exit 0} 27 | {printf("%s.post%s+%s", $1, $2, $3)} 28 | ''' 29 | 30 | } 31 | 32 | GIT_BRANCH="$(git rev-parse --abbrev-ref HEAD)" 33 | 34 | version=$(getVersion --stable) 35 | version_without_prefix=$(echo $version | sed 's/^v//') 36 | 37 | cat < 0 48 | } 49 | 50 | func (htf *HostTaxonFilterFlag) HostTaxIdValue() string { 51 | return htf.hostTaxId 52 | } 53 | -------------------------------------------------------------------------------- /client/apps/public/Datasets/v2/datasets/SummaryGeneTaxon.go: -------------------------------------------------------------------------------- 1 | package datasets 2 | 3 | import ( 4 | openapi "datasets/openapi/v2" 5 | cmdflags "datasets_cli/v2/datasets/flags" 6 | "fmt" 7 | 8 | "github.com/spf13/cobra" 9 | ) 10 | 11 | func createSummaryGeneTaxonCmd(sGeneFlag *SummaryGeneFlag) *cobra.Command { 12 | inputDescription := "taxon (NCBI Taxonomy ID, scientific or common name for a species)" 13 | 14 | cmd := &cobra.Command{ 15 | Use: "taxon [flags]", 16 | Short: fmt.Sprintf("Print a data report containing gene metadata by %s", inputDescription), 17 | Long: fmt.Sprintf(`Print a data report containing gene metadata by %s. The data report is returned in JSON format.`, inputDescription), 18 | Example: ` datasets summary gene taxon "pacific white shrimp"`, 19 | Args: cmdflags.ExpectOnePositionalArgument(inputDescription), 20 | PreRunE: cmdflags.ExecutePreRunEFor(sGeneFlag.cmdFlagSet), 21 | RunE: func(cmd *cobra.Command, args []string) error { 22 | 23 | taxId, taxError := RetrieveTaxIdForTaxon( 24 | args[0], 25 | false, 26 | openapi.V2ORGANISMQUERYREQUESTTAXONRESOURCEFILTER_GENE, 27 | "gene", 28 | ) 29 | if taxError != nil { 30 | return taxError 31 | } 32 | 33 | request := openapi.NewV2GeneDatasetReportsRequest() 34 | request.SetTaxon(taxId) 35 | 36 | cli, cliErr := createOAClient() 37 | if cliErr != nil { 38 | return cliErr 39 | } 40 | return geneSummaryPagePrinter(sGeneFlag, NewDefaultRequestIterator(request), getGeneApi(cli)) 41 | }, 42 | } 43 | 44 | return cmd 45 | } 46 | -------------------------------------------------------------------------------- /client/apps/public/Datasets/v2/datasets/flags/JsonLinesAndLimitFlag.go: -------------------------------------------------------------------------------- 1 | package flags 2 | 3 | import ( 4 | "fmt" 5 | "strings" 6 | 7 | "github.com/spf13/cobra" 8 | "github.com/spf13/pflag" 9 | ) 10 | 11 | type JsonLinesAndLimitFlag struct { 12 | FlagInterface 13 | limitFlag *LimitFlag 14 | jsonLines bool 15 | } 16 | 17 | func NewJsonLineAndLimitFlag(limitType string) *JsonLinesAndLimitFlag { 18 | return &JsonLinesAndLimitFlag{ 19 | limitFlag: NewLimitFlag(limitType), 20 | jsonLines: false, 21 | } 22 | } 23 | 24 | func (jll *JsonLinesAndLimitFlag) RegisterFlags(flags *pflag.FlagSet) { 25 | flags.BoolVar(&jll.jsonLines, "as-json-lines", false, "Output results in JSON Lines format") 26 | jll.limitFlag.RegisterFlags(flags) 27 | } 28 | 29 | func (jll *JsonLinesAndLimitFlag) PreRunE(cmd *cobra.Command, args []string) (err error) { 30 | // Add error checking here. 31 | err = jll.limitFlag.PreRunE(cmd, args) 32 | if err != nil { 33 | return err 34 | } 35 | if strings.ToLower(jll.limitFlag.LimitRaw()) == "none" && jll.jsonLines { 36 | err = fmt.Errorf("Setting 'limit' to 0 is not compatible with 'as-json-lines'") 37 | return 38 | } 39 | return nil 40 | } 41 | 42 | func (jll *JsonLinesAndLimitFlag) JsonLines() bool { 43 | return jll.jsonLines 44 | } 45 | 46 | func (jll *JsonLinesAndLimitFlag) RetrievalCount() int { 47 | return jll.limitFlag.maxRetrieval 48 | } 49 | 50 | func (jll *JsonLinesAndLimitFlag) CountOnly() bool { 51 | return jll.limitFlag.countOnly 52 | } 53 | 54 | func (jll *JsonLinesAndLimitFlag) LimitRaw() string { 55 | return jll.limitFlag.limitRaw 56 | } 57 | -------------------------------------------------------------------------------- /client/apps/public/Datasets/v2/datasets/flags/TaxonomyDownloadReportFlag.go: -------------------------------------------------------------------------------- 1 | package flags 2 | 3 | import ( 4 | openapi "datasets/openapi/v2" 5 | 6 | "github.com/spf13/cobra" 7 | "github.com/spf13/pflag" 8 | "github.com/thediveo/enumflag/v2" 9 | ) 10 | 11 | type TaxonomyDownloadIncludeFlags enumflag.Flag 12 | 13 | var ( 14 | ArgTaxonomyDownloadIncludeFlags []TaxonomyDownloadIncludeFlags 15 | ) 16 | 17 | const ( 18 | AuxNames TaxonomyDownloadIncludeFlags = iota 19 | ) 20 | 21 | var TaxonomyDownloadIncludeFlagIds = map[TaxonomyDownloadIncludeFlags][]string{ 22 | AuxNames: {"names"}, 23 | } 24 | 25 | var TaxonomyDownloadIncludeFlagOpenapi = map[TaxonomyDownloadIncludeFlags]openapi.V2TaxonomyDatasetRequestTaxonomyReportType{ 26 | AuxNames: openapi.V2TAXONOMYDATASETREQUESTTAXONOMYREPORTTYPE_NAMES_REPORT, 27 | } 28 | 29 | type TaxonomyDownloadIncludeFlag struct { 30 | FlagInterface 31 | IncludeReports []TaxonomyDownloadIncludeFlags 32 | } 33 | 34 | func NewTaxonomyDownloadIncludeFlag() *TaxonomyDownloadIncludeFlag { 35 | tif := &TaxonomyDownloadIncludeFlag{ 36 | IncludeReports: []TaxonomyDownloadIncludeFlags{}, 37 | } 38 | return tif 39 | } 40 | 41 | func (tif *TaxonomyDownloadIncludeFlag) RegisterFlags(flags *pflag.FlagSet) { 42 | flags.Var( 43 | enumflag.NewSlice(&((*tif).IncludeReports), "string", TaxonomyDownloadIncludeFlagIds, enumflag.EnumCaseInsensitive), 44 | "include", 45 | `Add report to download: 46 | * names: taxonomy names report 47 | `) 48 | } 49 | 50 | func (tif *TaxonomyDownloadIncludeFlag) PreRunE(cmd *cobra.Command, args []string) (err error) { 51 | return nil 52 | } 53 | -------------------------------------------------------------------------------- /client/apps/public/Datasets/v2/datasets/flags/GenomeReportFlag.go: -------------------------------------------------------------------------------- 1 | package flags 2 | 3 | import ( 4 | "github.com/spf13/cobra" 5 | "github.com/spf13/pflag" 6 | "github.com/thediveo/enumflag/v2" 7 | ) 8 | 9 | type GenomeReportMode enumflag.Flag 10 | 11 | // Define the enumeration values for ReportMode. 12 | const ( 13 | Genome GenomeReportMode = iota 14 | Sequence 15 | IdsOnly 16 | ) 17 | 18 | var GenomeReportModeIds = map[GenomeReportMode][]string{ 19 | Genome: {"genome"}, 20 | Sequence: {"sequence"}, 21 | IdsOnly: {"ids_only"}, 22 | } 23 | 24 | type GenomeReportFlag struct { 25 | FlagInterface 26 | genomeReport GenomeReportMode 27 | } 28 | 29 | func NewGenomeReportFlag() *GenomeReportFlag { 30 | grf := &GenomeReportFlag{ 31 | genomeReport: Genome, 32 | } 33 | return grf 34 | } 35 | 36 | func (grf *GenomeReportFlag) GenomeReport() bool { 37 | return grf.genomeReport == Genome 38 | } 39 | 40 | func (grf *GenomeReportFlag) SequenceReport() bool { 41 | return grf.genomeReport == Sequence 42 | } 43 | 44 | func (grf *GenomeReportFlag) IdsOnly() bool { 45 | return grf.genomeReport == IdsOnly 46 | } 47 | 48 | func (grf *GenomeReportFlag) RegisterFlags(flags *pflag.FlagSet) { 49 | flags.Var( 50 | enumflag.New(&grf.genomeReport, "string", GenomeReportModeIds, enumflag.EnumCaseInsensitive), 51 | "report", 52 | `Choose the output type: 53 | * genome: Retrieve the primary genome report 54 | * sequence: Retrieve the sequence report 55 | * ids_only: Retrieve only the genome identifiers 56 | `) 57 | } 58 | 59 | func (grf *GenomeReportFlag) PreRunE(cmd *cobra.Command, args []string) (err error) { 60 | return nil 61 | } 62 | -------------------------------------------------------------------------------- /client/apps/public/Datasets/v2/datasets/flags/VirusSummaryReportFlag.go: -------------------------------------------------------------------------------- 1 | package flags 2 | 3 | import ( 4 | "github.com/spf13/cobra" 5 | "github.com/spf13/pflag" 6 | "github.com/thediveo/enumflag/v2" 7 | ) 8 | 9 | type VirusSummaryReportMode enumflag.Flag 10 | 11 | // Define the enumeration values for FooMode. 12 | const ( 13 | DEFAULT VirusSummaryReportMode = iota 14 | ANNOTATION 15 | ) 16 | 17 | // Map enumeration values to their textual representations (value 18 | // identifiers). 19 | var VirusSummaryReportModeIds = map[VirusSummaryReportMode][]string{ 20 | DEFAULT: {"virus"}, 21 | ANNOTATION: {"annotation"}, 22 | } 23 | 24 | var ArgsVirusSummaryReportMode VirusSummaryReportMode 25 | 26 | type VirusSummaryReportFlag struct { 27 | FlagInterface 28 | virusReport VirusSummaryReportMode 29 | } 30 | 31 | func NewVirusSummaryReportFlag() *VirusSummaryReportFlag { 32 | vsrf := &VirusSummaryReportFlag{ 33 | virusReport: ArgsVirusSummaryReportMode, 34 | } 35 | return vsrf 36 | } 37 | 38 | // TODO: * "biosample": returns a biosample report 39 | const summaryReportFlagLongDesc string = `Specify report virus genome report summary type 40 | * virus: returns a primary virus data report 41 | * annotation: returns a virus annotation report 42 | ` 43 | 44 | func (vsrf *VirusSummaryReportFlag) RegisterFlags(flags *pflag.FlagSet) { 45 | flags.Var( 46 | enumflag.New(&ArgsVirusSummaryReportMode, "string", VirusSummaryReportModeIds, enumflag.EnumCaseInsensitive), 47 | "report", 48 | summaryReportFlagLongDesc) 49 | } 50 | 51 | func (vsrf *VirusSummaryReportFlag) PreRunE(cmd *cobra.Command, args []string) (err error) { 52 | 53 | return nil 54 | 55 | } 56 | -------------------------------------------------------------------------------- /client/MODULE.bazel: -------------------------------------------------------------------------------- 1 | module( 2 | name = "datasets_client", 3 | ) 4 | 5 | bazel_dep(name = "rules_go", version = "0.50.1") 6 | bazel_dep(name = "gazelle", version = "0.40.0") 7 | 8 | ########################## 9 | ## Golang Configuration ## 10 | ########################## 11 | 12 | # Register the Go SDK installed on the host. 13 | go_sdk = use_extension("@rules_go//go:extensions.bzl", "go_sdk") 14 | go_sdk.download(version = "1.23.4") 15 | 16 | go_deps = use_extension("@gazelle//:extensions.bzl", "go_deps") 17 | go_deps.from_file(go_work = "//go:go.work") 18 | 19 | # All *direct* Go dependencies of the module have to be listed explicitly. 20 | use_repo( 21 | go_deps, 22 | "com_github_antihax_optional", 23 | "com_github_araddon_dateparse", 24 | "com_github_docker_go_units", 25 | "com_github_gosuri_uiprogress", 26 | "com_github_hashicorp_go_cleanhttp", 27 | "com_github_hashicorp_go_retryablehttp", 28 | "com_github_spf13_afero", 29 | "com_github_spf13_cobra", 30 | "com_github_spf13_pflag", 31 | "com_github_thediveo_enumflag_v2", 32 | "com_gitlab_metakeule_fmtdate", 33 | "org_golang_x_exp", 34 | "org_golang_x_text", 35 | ) 36 | 37 | ########### 38 | ## Other ## 39 | ########### 40 | 41 | bazel_dep(name = "aspect_bazel_lib", version = "1.38.1") 42 | 43 | bazel_dep(name = "openapi_tools_generator_bazel", version = "0.2.0") 44 | 45 | openapi_gen = use_extension("@openapi_tools_generator_bazel//:extension.bzl", "openapi_gen") 46 | openapi_gen.client( 47 | sha256 = "1cf0c80de12c0fdc8594289c19e414b402108ef10b8dd0bfda1953151341ab5d", 48 | version = "7.2.0", 49 | ) 50 | use_repo(openapi_gen, "openapi_tools_generator_bazel_cli") 51 | -------------------------------------------------------------------------------- /client/apps/public/Datasets/v2/datasets/SummaryGenomeAccession.go: -------------------------------------------------------------------------------- 1 | package datasets 2 | 3 | import ( 4 | cmdflags "datasets_cli/v2/datasets/flags" 5 | 6 | "github.com/spf13/cobra" 7 | ) 8 | 9 | func createSummaryGenomeAccessionCmd(sgf SummaryGenomeFlag, assemblyRequestFlag AssemblyRequestFlag) *cobra.Command { 10 | iff := cmdflags.NewInputFileFlag(cmdflags.InputFileListTypeGenomeAcc, cmdflags.AsIntegerFalse) 11 | flagSets := []cmdflags.FlagInterface{iff} 12 | 13 | cmd := &cobra.Command{ 14 | Use: "accession ", 15 | Short: "Print a data report containing assembled genome metadata by Assembly or BioProject accession", 16 | Long: ` 17 | Print a data report containing assembled genome metadata by Assembly or BioProject accession. The data report is returned in JSON format.`, 18 | Example: ` datasets summary genome accession GCF_000001405.40 19 | datasets summary genome accession GCA_003774525.2 GCA_000001635 20 | datasets summary genome accession GCF_000001405.40 --report sequence --as-json-lines 21 | datasets summary genome accession PRJNA31257`, 22 | 23 | PreRunE: cmdflags.ExecutePreRunEFor(flagSets), 24 | 25 | RunE: func(cmd *cobra.Command, args []string) error { 26 | // This could potentially change avf.AssemblyVersion 27 | accessions := assemblyRequestFlag.assemblyVersionFlag.UpdateForInputAccessions(iff.InputIDArgs) 28 | request, err := GetGenomeReportsAccessionRequest(accessions) 29 | if err != nil { 30 | return err 31 | } 32 | return getGenomeSummary(NewGenomeAccessionRequestIter(request), sgf, assemblyRequestFlag) 33 | }, 34 | } 35 | 36 | cmdflags.RegisterAllFlags(flagSets, cmd.PersistentFlags()) 37 | 38 | return cmd 39 | } 40 | -------------------------------------------------------------------------------- /client/apps/public/Datasets/v2/datasets/flags/TaxonomyReportFlag.go: -------------------------------------------------------------------------------- 1 | package flags 2 | 3 | import ( 4 | "github.com/spf13/cobra" 5 | "github.com/spf13/pflag" 6 | "github.com/thediveo/enumflag/v2" 7 | ) 8 | 9 | type TaxonomyReportMode enumflag.Flag 10 | 11 | // Define the enumeration values for ReportMode. 12 | const ( 13 | Taxonomy TaxonomyReportMode = iota 14 | Names 15 | TaxIdsOnly 16 | ) 17 | 18 | var TaxonomyReportModeIds = map[TaxonomyReportMode][]string{ 19 | Taxonomy: {"taxonomy"}, 20 | Names: {"names"}, 21 | TaxIdsOnly: {"ids_only"}, 22 | } 23 | 24 | type TaxonomyReportFlag struct { 25 | FlagInterface 26 | taxonomyReport TaxonomyReportMode 27 | } 28 | 29 | func NewTaxonomyReportFlag() *TaxonomyReportFlag { 30 | trf := &TaxonomyReportFlag{ 31 | taxonomyReport: Taxonomy, 32 | } 33 | return trf 34 | } 35 | 36 | func (trf *TaxonomyReportFlag) TaxonomyReport() bool { 37 | return trf.taxonomyReport == Taxonomy 38 | } 39 | 40 | func (trf *TaxonomyReportFlag) NamesReport() bool { 41 | return trf.taxonomyReport == Names 42 | } 43 | 44 | func (trf *TaxonomyReportFlag) IdsOnly() bool { 45 | return trf.taxonomyReport == TaxIdsOnly 46 | } 47 | 48 | func (trf *TaxonomyReportFlag) RegisterFlags(flags *pflag.FlagSet) { 49 | flags.Var( 50 | enumflag.New(&trf.taxonomyReport, "string", TaxonomyReportModeIds, enumflag.EnumCaseInsensitive), 51 | "report", 52 | `Choose the output type: 53 | * taxonomy: Retrieve the primary taxonomy report 54 | * names: Retrieve the taxonomy names report 55 | * ids_only: Retrieve only the taxonomy identifiers 56 | `) 57 | } 58 | 59 | func (trf *TaxonomyReportFlag) PreRunE(cmd *cobra.Command, args []string) (err error) { 60 | return nil 61 | } 62 | -------------------------------------------------------------------------------- /client/apps/public/Datasets/v2/datasets/DownloadGeneGeneId.go: -------------------------------------------------------------------------------- 1 | package datasets 2 | 3 | import ( 4 | cmdflags "datasets_cli/v2/datasets/flags" 5 | 6 | "github.com/spf13/cobra" 7 | ) 8 | 9 | func createDownloadGeneGeneIDCmd(dgf DownloadGeneFlag) *cobra.Command { 10 | iff := cmdflags.NewInputFileFlag(cmdflags.InputFileListTypeGeneId, cmdflags.AsIntegerTrue) 11 | svf := cmdflags.NewSkipZipValidationFlag() 12 | otf := cmdflags.NewOrthologTaxonFilterFlag() 13 | flagSets := []cmdflags.FlagInterface{iff, otf, svf} 14 | 15 | cmd := &cobra.Command{ 16 | Use: "gene-id ", 17 | Short: "Download a gene data package by NCBI Gene ID", 18 | Example: ` datasets download gene gene-id 672 19 | datasets download gene gene-id 2597 14433`, 20 | Long: ` 21 | Download a gene data package by NCBI Gene ID. Gene data packages include gene, transcript and protein sequences and one or more data reports. Data packages are downloaded as a zip archive. 22 | 23 | The default gene data package includes the following files: 24 | * rna.fna (transcript sequences) 25 | * protein.faa (protein sequences) 26 | * data_report.jsonl (data report with gene metadata) 27 | * dataset_catalog.json (a list of files and file types included in the data package)`, 28 | PreRunE: cmdflags.ExecutePreRunEFor(flagSets), 29 | 30 | RunE: func(cmd *cobra.Command, args []string) error { 31 | downloader, err := NewGeneDownloader(dgf.previewFlag.IsPreview(), dgf.geneIncludeFlag, dgf.filterFlag, WithGeneIds(iff, otf)) 32 | if err != nil { 33 | return err 34 | } 35 | return downloader.Download(svf.IsSkipValidation()) 36 | }, 37 | } 38 | cmdflags.RegisterAllFlags(flagSets, cmd.Flags()) 39 | 40 | return cmd 41 | } 42 | -------------------------------------------------------------------------------- /client/apps/public/Datasets/v2/datasets/flags/MetaGenomeDerived.go: -------------------------------------------------------------------------------- 1 | package flags 2 | 3 | import ( 4 | openapi "datasets/openapi/v2" 5 | "github.com/spf13/cobra" 6 | "github.com/spf13/pflag" 7 | "github.com/thediveo/enumflag/v2" 8 | ) 9 | 10 | type MetagenomeDerived enumflag.Flag 11 | 12 | // Define the enumeration values for MetagenomeDerived 13 | const ( 14 | MetagenomeDerived_All MetagenomeDerived = iota 15 | Only 16 | Exclude 17 | ) 18 | 19 | var MetagenomeDerivedIds = map[MetagenomeDerived][]string{ 20 | MetagenomeDerived_All: {"all", "METAGENOME_DERIVED_UNSET"}, 21 | Only: {"only", "metagenome_derived_only"}, 22 | Exclude: {"exclude", "metagenome_derived_exclude"}, 23 | } 24 | 25 | type MetaGenomeDerivedFlag struct { 26 | FlagInterface 27 | metaGenomeDerived MetagenomeDerived 28 | } 29 | 30 | func NewMetaGenomeDerivedFlag() *MetaGenomeDerivedFlag { 31 | mgd := &MetaGenomeDerivedFlag{ 32 | metaGenomeDerived: MetagenomeDerived_All, 33 | } 34 | return mgd 35 | } 36 | 37 | func (mgd *MetaGenomeDerivedFlag) GetMetaGenomeFilter() openapi.V2AssemblyDatasetDescriptorsFilterMetagenomeDerivedFilter { 38 | return openapi.V2AssemblyDatasetDescriptorsFilterMetagenomeDerivedFilter(MetagenomeDerivedIds[mgd.metaGenomeDerived][1]) 39 | } 40 | 41 | func (mgd *MetaGenomeDerivedFlag) RegisterFlags(flags *pflag.FlagSet) { 42 | flags.Var( 43 | enumflag.New(&mgd.metaGenomeDerived, "string", MetagenomeDerivedIds, enumflag.EnumCaseInsensitive), 44 | "mag", 45 | "Limit to metagenome assembled genomes (only) or remove them from the results (exclude)") 46 | } 47 | 48 | func (mgd *MetaGenomeDerivedFlag) PreRunE(cmd *cobra.Command, args []string) (err error) { 49 | return nil 50 | } 51 | -------------------------------------------------------------------------------- /client/apps/public/Datasets/v2/datasets/SummaryGenomeTaxon.go: -------------------------------------------------------------------------------- 1 | package datasets 2 | 3 | import ( 4 | openapi "datasets/openapi/v2" 5 | cmdflags "datasets_cli/v2/datasets/flags" 6 | "fmt" 7 | 8 | "github.com/spf13/cobra" 9 | ) 10 | 11 | func createSummaryGenomeTaxonCmd(sgf SummaryGenomeFlag, assemblyRequestFlag AssemblyRequestFlag) *cobra.Command { 12 | inputDescription := "taxon (NCBI Taxonomy ID, scientific or common name at any tax rank)" 13 | tem := cmdflags.NewTaxExactMatchFlag() 14 | iff := cmdflags.NewInputFileFlag(cmdflags.InputFileListTypeTaxon, cmdflags.AsIntegerFalse) 15 | flagSets := []cmdflags.FlagInterface{tem, iff} 16 | 17 | cmd := &cobra.Command{ 18 | Use: "taxon", 19 | Short: fmt.Sprintf("Print a data report containing genome metadata by %s", inputDescription), 20 | Long: fmt.Sprintf(` 21 | Print a data report containing genome metadata by %s. The data report is returned in JSON format.`, inputDescription), 22 | Example: ` datasets summary genome taxon human 23 | datasets summary genome taxon "mus musculus" 24 | datasets summary genome taxon 10116`, 25 | 26 | PreRunE: cmdflags.ExecutePreRunEFor(flagSets), 27 | RunE: func(cmd *cobra.Command, args []string) (err error) { 28 | var taxIdsMap, taxErr = RetrieveTaxIdsForTaxons(cmd, iff.InputIDArgs, true, openapi.V2ORGANISMQUERYREQUESTTAXONRESOURCEFILTER_GENOME, "genome") 29 | if taxErr != nil { 30 | return taxErr 31 | } 32 | 33 | taxons := getMapListValues(taxIdsMap) 34 | return getGenomeSummary(NewDefaultRequestIterator(GetGenomeReportsTaxonRequest(taxons, tem.IsTaxExactMatch())), sgf, assemblyRequestFlag) 35 | }, 36 | } 37 | 38 | cmdflags.RegisterAllFlags(flagSets, cmd.PersistentFlags()) 39 | 40 | return cmd 41 | } 42 | -------------------------------------------------------------------------------- /client/apps/public/Datasets/v2/datasets/TaxonomyIdRetriever.go: -------------------------------------------------------------------------------- 1 | package datasets 2 | 3 | import ( 4 | openapi "datasets/openapi/v2" 5 | ) 6 | 7 | type TaxonomyIdRetriever struct { 8 | taxIdsMap map[int32]bool 9 | taxRanks []openapi.V2reportsRankType 10 | DefaultPageProcessorFuncs[openapi.V2reportsTaxonomyReportMatch, *openapi.V2reportsTaxonomyDataReportPage] 11 | } 12 | 13 | func NewTaxonomyIdRetriever() TaxonomyIdRetriever { 14 | return TaxonomyIdRetriever{ 15 | taxIdsMap: make(map[int32]bool), 16 | } 17 | } 18 | 19 | func (taxidRetriever *TaxonomyIdRetriever) ReportName() string { 20 | return "taxonomy" 21 | } 22 | 23 | func (taxidRetriever *TaxonomyIdRetriever) ProcessPage(ppage *openapi.V2reportsTaxonomyDataReportPage) { 24 | for _, report := range ppage.GetReports() { 25 | if report.HasTaxonomy() { 26 | // Don't include the root taxid 27 | if report.Taxonomy.GetTaxId() != 1 { 28 | taxidRetriever.taxIdsMap[report.Taxonomy.GetTaxId()] = true 29 | } 30 | } 31 | } 32 | } 33 | 34 | func (taxidRetriever *TaxonomyIdRetriever) AddTaxIds(taxIds []int32) { 35 | for _, taxId := range taxIds { 36 | taxidRetriever.taxIdsMap[taxId] = true 37 | } 38 | } 39 | 40 | func (taxidRetriever *TaxonomyIdRetriever) SetRanks(ranks []openapi.V2reportsRankType) { 41 | taxidRetriever.taxRanks = ranks 42 | } 43 | 44 | func (taxidRetriever *TaxonomyIdRetriever) GetRanks() []openapi.V2reportsRankType { 45 | return taxidRetriever.taxRanks 46 | } 47 | 48 | func (taxidRetriever *TaxonomyIdRetriever) GetTaxIds() []int32 { 49 | taxIds := make([]int32, len(taxidRetriever.taxIdsMap)) 50 | i := 0 51 | for taxId := range taxidRetriever.taxIdsMap { 52 | taxIds[i] = taxId 53 | i++ 54 | } 55 | 56 | return taxIds 57 | } 58 | -------------------------------------------------------------------------------- /client/apps/public/Datasets/v2/datasets/flags/TaxonRankFilterFlag.go: -------------------------------------------------------------------------------- 1 | package flags 2 | 3 | import ( 4 | openapi "datasets/openapi/v2" 5 | 6 | "fmt" 7 | "github.com/spf13/cobra" 8 | "github.com/spf13/pflag" 9 | 10 | "strings" 11 | ) 12 | 13 | type TaxonRankFilterFlag struct { 14 | FlagInterface 15 | taxonRankFilter []string 16 | openapiRanks []openapi.V2reportsRankType 17 | } 18 | 19 | func NewTaxonRankFilterFlag() *TaxonRankFilterFlag { 20 | trf := &TaxonRankFilterFlag{} 21 | return trf 22 | } 23 | 24 | func (trf *TaxonRankFilterFlag) RegisterFlags(flags *pflag.FlagSet) { 25 | flags.StringSliceVar(&trf.taxonRankFilter, "rank", trf.taxonRankFilter, "Limit taxons to only those with the specified ranks. When enabled, the children and parents flags will be enabled as well.") 26 | } 27 | 28 | func (lf *TaxonRankFilterFlag) PreRunE(cmd *cobra.Command, args []string) (err error) { 29 | for _, rank := range lf.taxonRankFilter { 30 | // Provide a little flexibility - automatically convert to upper case and replace dashes with underscores 31 | cleanedRank := strings.Replace(strings.ToUpper(rank), "-", "_", -1) 32 | var rankType *openapi.V2reportsRankType 33 | rankType, err = openapi.NewV2reportsRankTypeFromValue(cleanedRank) 34 | if err == nil { 35 | lf.openapiRanks = append(lf.openapiRanks, *rankType) 36 | } else { 37 | rankTextReplacer := strings.NewReplacer("[", "", "]", "") 38 | allowedRanks := rankTextReplacer.Replace(fmt.Sprintf("%v", openapi.AllowedV2reportsRankTypeEnumValues)) 39 | return fmt.Errorf("Invalid value '%v' for rank. Valid ranks are:\n%s", rank, allowedRanks) 40 | } 41 | } 42 | return 43 | } 44 | 45 | func (trf *TaxonRankFilterFlag) GetRanks() []openapi.V2reportsRankType { 46 | return trf.openapiRanks 47 | } 48 | -------------------------------------------------------------------------------- /client/apps/public/Datasets/v2/datasets/flags/FastaFilter.go: -------------------------------------------------------------------------------- 1 | package flags 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | 7 | "github.com/spf13/cobra" 8 | "github.com/spf13/pflag" 9 | ) 10 | 11 | type FastaFilterFlag struct { 12 | FlagInterface 13 | fastaFilter []string 14 | fastaFilterFile string 15 | filters []string 16 | } 17 | 18 | func NewFastaFilterFlag() *FastaFilterFlag { 19 | return &FastaFilterFlag{} 20 | } 21 | 22 | func (fff *FastaFilterFlag) RegisterFlags(flags *pflag.FlagSet) { 23 | flags.StringSliceVar(&fff.fastaFilter, "fasta-filter", []string{}, "Limit protein and RNA sequence files to the specified RefSeq nucleotide and protein accessions") 24 | flags.StringVar(&fff.fastaFilterFile, "fasta-filter-file", "", "Limit protein and RNA sequence files to the specified RefSeq nucleotide and protein accessions included in the specified file") 25 | } 26 | 27 | func (fff *FastaFilterFlag) PreRunE(cmd *cobra.Command, args []string) (err error) { 28 | if fff.fastaFilterFile != "" { 29 | fp, fileErr := os.Open(fff.fastaFilterFile) 30 | if fileErr != nil { 31 | return fmt.Errorf("'%s' opening input file: '%s'", fileErr.Error(), fff.fastaFilterFile) 32 | } 33 | defer fp.Close() 34 | fff.filters = readLines(fp) 35 | // Check if any accessions were read 36 | if len(fff.filters) == 0 { 37 | return fmt.Errorf( 38 | "No identifiers read from file: '%s'\n File should have 1 identifier per row and no spaces or quotes", 39 | fff.fastaFilterFile, 40 | ) 41 | } 42 | } 43 | 44 | // Set other fields 45 | if len(fff.fastaFilter) > 0 { 46 | fff.filters = append(fff.filters, fff.fastaFilter...) 47 | } 48 | 49 | return nil 50 | } 51 | 52 | func (fff *FastaFilterFlag) Filters() []string { 53 | return fff.filters 54 | } 55 | -------------------------------------------------------------------------------- /client/apps/public/Datasets/v2/datasets/DownloadGeneLocusTag.go: -------------------------------------------------------------------------------- 1 | package datasets 2 | 3 | import ( 4 | cmdflags "datasets_cli/v2/datasets/flags" 5 | 6 | "github.com/spf13/cobra" 7 | ) 8 | 9 | func createDownloadGeneLocusTagCmd(dgf DownloadGeneFlag) *cobra.Command { 10 | iff := cmdflags.NewInputFileFlag(cmdflags.InputFileListTypeLocusTag, cmdflags.AsIntegerFalse) 11 | otf := cmdflags.NewOrthologTaxonFilterFlag() 12 | svf := cmdflags.NewSkipZipValidationFlag() 13 | gtff := cmdflags.NewGeneTaxonFilterFlag() 14 | 15 | flagSets := []cmdflags.FlagInterface{iff, otf, svf, gtff} 16 | 17 | cmd := &cobra.Command{ 18 | Use: "locus-tag ", 19 | Short: "Download a gene data package by locus tag", 20 | Example: ` datasets download gene locus-tag b0001 21 | datasets download gene locus-tag b0001 ArthCt125`, 22 | Long: ` 23 | Download a gene data package by locus tag. Gene data packages include gene, transcript and protein sequences and one or more data reports. Data packages are downloaded as a zip archive. 24 | 25 | The default gene data package includes the following files: 26 | * rna.fna (transcript sequences) 27 | * protein.faa (protein sequences) 28 | * data_report.jsonl (data report with gene metadata) 29 | * dataset_catalog.json (a list of files and file types included in the data package)`, 30 | PreRunE: cmdflags.ExecutePreRunEFor(flagSets), 31 | RunE: func(cmd *cobra.Command, args []string) error { 32 | downloader, err := NewGeneDownloader(dgf.previewFlag.IsPreview(), dgf.geneIncludeFlag, dgf.filterFlag, WithLocusTags(iff, otf)) 33 | if err != nil { 34 | return err 35 | } 36 | return downloader.Download(svf.IsSkipValidation()) 37 | }, 38 | } 39 | cmdflags.RegisterAllFlags(flagSets, cmd.Flags()) 40 | 41 | return cmd 42 | } 43 | -------------------------------------------------------------------------------- /client/apps/public/Datasets/v2/datasets/SummaryVirusGenomeTaxon.go: -------------------------------------------------------------------------------- 1 | package datasets 2 | 3 | import ( 4 | openapi "datasets/openapi/v2" 5 | cmdflags "datasets_cli/v2/datasets/flags" 6 | "fmt" 7 | "github.com/spf13/cobra" 8 | ) 9 | 10 | func createSummaryVirusGenomeTaxCmd(vsf SummaryVirusFlag) *cobra.Command { 11 | inputDescription := "taxon (NCBI Taxonomy ID, scientific or common name for any virus at any tax rank)" 12 | iff := cmdflags.NewInputFileFlag(cmdflags.InputFileListTypeTaxon, cmdflags.AsIntegerFalse, cmdflags.WithLimit(MAX_VIRUS_TAXONS)) 13 | flagSets := []cmdflags.FlagInterface{iff} 14 | 15 | cmd := &cobra.Command{ 16 | Use: "taxon", 17 | Short: fmt.Sprintf("Print a data report containing virus genome metadata by %s", inputDescription), 18 | Long: fmt.Sprintf(` 19 | Print a data report containing virus genome metadata by %s. The data report is returned in JSON format 20 | 21 | Refer to NCBI's [download and install](https://www.ncbi.nlm.nih.gov/datasets/docs/v2/download-and-install/) documentation for information about getting started with the command-line tools.`, inputDescription), 22 | Example: " datasets summary virus genome taxon sars-cov-2 --host dog", 23 | PreRunE: cmdflags.ExecutePreRunEFor(flagSets), 24 | 25 | RunE: func(cmd *cobra.Command, args []string) (err error) { 26 | if err != nil { 27 | return err 28 | } 29 | var taxIdsMap, taxErr = RetrieveTaxIdsForTaxons(cmd, iff.InputIDArgs, true, openapi.V2ORGANISMQUERYREQUESTTAXONRESOURCEFILTER_ALL, "virus", 10239) 30 | if taxErr != nil { 31 | return taxErr 32 | } 33 | 34 | err = executeSummaryVirusGenomeCmd(getMapListValues(taxIdsMap), vsf, nil) 35 | 36 | return err 37 | 38 | }, 39 | } 40 | cmdflags.RegisterAllFlags(flagSets, cmd.PersistentFlags()) 41 | 42 | return cmd 43 | } 44 | -------------------------------------------------------------------------------- /client/apps/public/Datasets/v2/datasets/progressbar.go: -------------------------------------------------------------------------------- 1 | package datasets 2 | 3 | import ( 4 | "fmt" 5 | "io" 6 | "time" 7 | 8 | units "github.com/docker/go-units" 9 | "github.com/gosuri/uiprogress" 10 | ) 11 | 12 | func byteCountSI(b int64) string { 13 | return units.HumanSizeWithPrecision(float64(b), 3) 14 | } 15 | 16 | func byteCountPerSecSI(b int64, seconds float64) string { 17 | return units.HumanSizeWithPrecision(float64(b)/seconds, 3) + "/s" 18 | } 19 | 20 | type copyProgressBar struct { 21 | io.Writer 22 | total int64 // Total # of bytes written 23 | bar *uiprogress.Bar 24 | startTime time.Time 25 | filename string 26 | status string 27 | } 28 | 29 | func (progressBar *copyProgressBar) Write(p []byte) (n int, err error) { 30 | n, err = progressBar.Writer.Write(p) 31 | progressBar.total += int64(n) 32 | if err == nil { 33 | progressBar.status = byteCountPerSecSI(progressBar.total, time.Since(progressBar.startTime).Seconds()) 34 | } 35 | return 36 | } 37 | 38 | func (progressBar *copyProgressBar) Copy(dest io.Writer, src io.Reader) (n int64, err error) { 39 | if !argNoProgress && progressBar.bar == nil { 40 | progressBar.bar = progress.AddBar(1) 41 | progressBar.bar.LeftEnd = ' ' 42 | progressBar.bar.RightEnd = ' ' 43 | progressBar.bar.Width = 2 44 | progressBar.bar.Width = 2 45 | progressBar.bar.PrependFunc(func(b *uiprogress.Bar) string { 46 | return "Downloading: " + progressBar.filename 47 | }) 48 | progressBar.bar.AppendFunc(func(b *uiprogress.Bar) string { 49 | return fmt.Sprintf("%s %s", byteCountSI(progressBar.total), progressBar.status) 50 | }) 51 | } 52 | progressBar.status = "connecting" 53 | progressBar.total = 0 54 | progressBar.startTime = time.Now() 55 | progressBar.Writer = dest 56 | n, err = io.Copy(progressBar, src) 57 | progressBar.status = "done" 58 | return 59 | } 60 | -------------------------------------------------------------------------------- /client/apps/public/Datasets/v2/datasets/DownloadGeneTaxon.go: -------------------------------------------------------------------------------- 1 | package datasets 2 | 3 | import ( 4 | openapi "datasets/openapi/v2" 5 | cmdflags "datasets_cli/v2/datasets/flags" 6 | "fmt" 7 | 8 | "github.com/spf13/cobra" 9 | ) 10 | 11 | func createDownloadGeneTaxonCmd(dgf DownloadGeneFlag) *cobra.Command { 12 | 13 | inputDescription := "taxon (NCBI Taxonomy ID, scientific or common name for a species)" 14 | svf := cmdflags.NewSkipZipValidationFlag() 15 | flagSets := []cmdflags.FlagInterface{svf} 16 | 17 | cmd := &cobra.Command{ 18 | Use: "taxon ", 19 | Short: fmt.Sprintf("Download a gene data package by %s", inputDescription), 20 | Example: ` datasets download gene taxon "drosophila melanogaster" --include protein,cds`, 21 | Long: fmt.Sprintf(` 22 | Download a gene data package by %s. Gene data packages include gene, transcript and protein sequences and one or more data reports. Data packages are downloaded as a zip archive. 23 | 24 | The default gene data package includes the following files: 25 | * rna.fna (transcript sequences) 26 | * protein.faa (protein sequences) 27 | * data_report.jsonl (data report with gene metadata) 28 | * dataset_catalog.json (a list of files and file types included in the data package)`, inputDescription), 29 | Args: cmdflags.ExpectOnePositionalArgument(inputDescription), 30 | RunE: func(cmd *cobra.Command, args []string) error { 31 | taxId, taxError := RetrieveTaxIdForTaxon( 32 | args[0], 33 | false, 34 | openapi.V2ORGANISMQUERYREQUESTTAXONRESOURCEFILTER_GENE, 35 | "gene", 36 | ) 37 | if taxError != nil { 38 | return taxError 39 | } 40 | 41 | downloader, err := NewGeneDownloader(dgf.previewFlag.IsPreview(), dgf.geneIncludeFlag, dgf.filterFlag, WithTaxon(taxId)) 42 | if err != nil { 43 | return err 44 | } 45 | return downloader.Download(svf.IsSkipValidation()) 46 | }, 47 | } 48 | cmdflags.RegisterAllFlags(flagSets, cmd.Flags()) 49 | 50 | return cmd 51 | } 52 | -------------------------------------------------------------------------------- /client/apps/public/Datasets/v2/datasets/flags/VirusDownloadReportFlag.go: -------------------------------------------------------------------------------- 1 | package flags 2 | 3 | import ( 4 | openapi "datasets/openapi/v2" 5 | "github.com/spf13/cobra" 6 | "github.com/spf13/pflag" 7 | "github.com/thediveo/enumflag/v2" 8 | ) 9 | 10 | type VirusDownloadReportSelection enumflag.Flag 11 | 12 | var ( 13 | ArgsVirusDownloadReportSelection []VirusDownloadReportSelection = []VirusDownloadReportSelection{DOWNLOAD_DATASET_REPORT} 14 | ) 15 | 16 | const ( 17 | DOWNLOAD_DATASET_REPORT VirusDownloadReportSelection = iota 18 | DOWNLOAD_ANNOTATION 19 | ) 20 | 21 | var VirusDownloadReportSelectionIds = map[VirusDownloadReportSelection][]string{ 22 | DOWNLOAD_DATASET_REPORT: {"DATASET_REPORT"}, 23 | DOWNLOAD_ANNOTATION: {"ANNOTATION"}, 24 | } 25 | 26 | var VirusDownloadReportSelectionOpenapi = map[VirusDownloadReportSelection]openapi.V2VirusDatasetReportType{ 27 | DOWNLOAD_DATASET_REPORT: openapi.V2VIRUSDATASETREPORTTYPE_DATASET_REPORT, 28 | DOWNLOAD_ANNOTATION: openapi.V2VIRUSDATASETREPORTTYPE_ANNOTATION, 29 | } 30 | 31 | type VirusDownloadReportFlag struct { 32 | FlagInterface 33 | virusReport []VirusDownloadReportSelection 34 | } 35 | 36 | func NewVirusDownloadReportFlag() *VirusDownloadReportFlag { 37 | vrf := &VirusDownloadReportFlag{ 38 | virusReport: ArgsVirusDownloadReportSelection, 39 | } 40 | return vrf 41 | } 42 | 43 | const reportFlagLongDesc string = `specify additional virus data report to download. Base data report (data_report.jsonl) will always be included in the download. 44 | * annotation: returns annotation report (annotation_report.jsonl) 45 | ` 46 | 47 | func (vrf *VirusDownloadReportFlag) RegisterFlags(flags *pflag.FlagSet) { 48 | flags.Var( 49 | enumflag.NewSlice(&ArgsVirusDownloadReportSelection, "string(,string)", VirusDownloadReportSelectionIds, enumflag.EnumCaseInsensitive), 50 | "report", 51 | reportFlagLongDesc, 52 | ) 53 | } 54 | 55 | func (vrf *VirusDownloadReportFlag) PreRunE(cmd *cobra.Command, args []string) (err error) { 56 | 57 | return nil 58 | 59 | } 60 | -------------------------------------------------------------------------------- /client/apps/public/Datasets/v2/datasets/flags/GenomeAssemblyLevel.go: -------------------------------------------------------------------------------- 1 | package flags 2 | 3 | import ( 4 | openapi "datasets/openapi/v2" 5 | "github.com/spf13/cobra" 6 | "github.com/spf13/pflag" 7 | "github.com/thediveo/enumflag/v2" 8 | ) 9 | 10 | type GenomeAssemblyLevel enumflag.Flag 11 | 12 | const ( 13 | Chromosome GenomeAssemblyLevel = iota 14 | Scaffold 15 | Contig 16 | Complete 17 | ) 18 | 19 | var GenomeAssemblyLevelIds = map[GenomeAssemblyLevel][]string{ 20 | Chromosome: {"chromosome"}, 21 | Scaffold: {"scaffold"}, 22 | Contig: {"contig"}, 23 | Complete: {"complete"}, 24 | } 25 | 26 | var GenomeAssemblyLevelOpenapi = map[GenomeAssemblyLevel]openapi.V2reportsAssemblyLevel{ 27 | Chromosome: openapi.V2REPORTSASSEMBLYLEVEL_CHROMOSOME, 28 | Scaffold: openapi.V2REPORTSASSEMBLYLEVEL_SCAFFOLD, 29 | Contig: openapi.V2REPORTSASSEMBLYLEVEL_CONTIG, 30 | Complete: openapi.V2REPORTSASSEMBLYLEVEL_COMPLETE_GENOME, 31 | } 32 | 33 | type GenomeAssemblyLevelFlag struct { 34 | FlagInterface 35 | genomeAssemblyLevels []GenomeAssemblyLevel 36 | } 37 | 38 | func NewGenomeAssemblyLevelFlag() *GenomeAssemblyLevelFlag { 39 | gal := &GenomeAssemblyLevelFlag{ 40 | genomeAssemblyLevels: []GenomeAssemblyLevel{}, 41 | } 42 | return gal 43 | } 44 | 45 | func (gal *GenomeAssemblyLevelFlag) GetAssemblyLevels() (assemblyLevels []openapi.V2reportsAssemblyLevel) { 46 | for _, level := range gal.genomeAssemblyLevels { 47 | assemblyLevels = append(assemblyLevels, GenomeAssemblyLevelOpenapi[level]) 48 | } 49 | return 50 | } 51 | 52 | func (gal *GenomeAssemblyLevelFlag) RegisterFlags(flags *pflag.FlagSet) { 53 | flags.Var( 54 | enumflag.NewSlice(&gal.genomeAssemblyLevels, "string", GenomeAssemblyLevelIds, enumflag.EnumCaseInsensitive), 55 | "assembly-level", 56 | `Limit to genomes at one or more assembly levels (comma-separated): 57 | * chromosome 58 | * complete 59 | * contig 60 | * scaffold 61 | `) 62 | } 63 | 64 | func (gal *GenomeAssemblyLevelFlag) PreRunE(cmd *cobra.Command, args []string) (err error) { 65 | return nil 66 | } 67 | -------------------------------------------------------------------------------- /client/openapi/BUILD: -------------------------------------------------------------------------------- 1 | # gazelle:ignore 2 | 3 | load("@aspect_bazel_lib//lib:copy_to_directory.bzl", "copy_to_directory") 4 | load("@rules_go//go:def.bzl", "go_library") 5 | load("@openapi_tools_generator_bazel//:defs.bzl", "openapi_generator") 6 | 7 | package(default_visibility = ["//visibility:public"]) 8 | 9 | ## Golang 10 | filegroup( 11 | name = "golang_template", 12 | srcs = glob([ 13 | "templates/go/*.mustache", 14 | ]), 15 | ) 16 | 17 | filegroup( 18 | name = "openapi3_v2_yaml", 19 | srcs = ["openapi3_v2.yaml"], 20 | ) 21 | 22 | [ 23 | openapi_generator( 24 | name = "openapi.genlib.{ver}.go".format(ver = version), 25 | config = "configs/openapigen.go.config".format(ver = version), 26 | generator = "go", 27 | spec = ":openapi3_{ver}_yaml".format(ver = version), 28 | template_dir = "templates/go", 29 | ) 30 | for version in [ 31 | "v2", 32 | ] 33 | ] 34 | 35 | ## Create go Library (two step process) 36 | # See issue https://github.com/OpenAPITools/openapi-generator-bazel/issues/22 37 | # for use of copy_to_directory 38 | # TODO: Clean up and put into DT/toolshed 39 | [ 40 | copy_to_directory( 41 | name = "gofiles.{ver}.go".format(ver = version), 42 | srcs = [":openapi.genlib.{ver}.go".format(ver = version)], 43 | exclude_srcs_patterns = [ 44 | "**/main.go", 45 | "**/README.md", 46 | "**/git_push.sh", 47 | "**/*_test.go", 48 | "**/.*ignore", 49 | "**/.travis.yml", 50 | "**/docs/*", 51 | "**/api/*", 52 | "**/.openapi-generator/*", 53 | ], 54 | include_srcs_patterns = ["**/*.go"], 55 | ) 56 | for version in [ 57 | "v2", 58 | ] 59 | ] 60 | 61 | [ 62 | go_library( 63 | name = "golib.{ver}".format(ver = version), 64 | srcs = [":gofiles.{ver}.go".format(ver = version)], 65 | importpath = "datasets/openapi/{ver}".format(ver = version), 66 | visibility = [ 67 | "//visibility:public", 68 | ], 69 | ) 70 | for version in [ 71 | "v2", 72 | ] 73 | ] 74 | -------------------------------------------------------------------------------- /client/apps/public/Datasets/v2/datasets/DownloadVirusGenomeAccession.go: -------------------------------------------------------------------------------- 1 | package datasets 2 | 3 | import ( 4 | cmdflags "datasets_cli/v2/datasets/flags" 5 | "errors" 6 | "github.com/spf13/cobra" 7 | ) 8 | 9 | func initInputFlagVirusAccession() (iff *cmdflags.InputFileFlag, svf *cmdflags.SkipZipValidation, flagSets []cmdflags.FlagInterface) { 10 | iff = cmdflags.NewInputFileFlag(cmdflags.InputFileListTypeVirusAcc, cmdflags.AsIntegerFalse) 11 | svf = cmdflags.NewSkipZipValidationFlag() 12 | flagSets = []cmdflags.FlagInterface{iff, svf} 13 | 14 | return iff, svf, flagSets 15 | } 16 | 17 | func createDownloadVirusGenomeAccessionCmd(dvf DownloadVirusFlag) *cobra.Command { 18 | 19 | iff, svf, flagSets := initInputFlagVirusAccession() 20 | 21 | cmd := &cobra.Command{ 22 | Use: "accession ", 23 | Short: "Download a virus genome data package by accession.", 24 | Long: ` 25 | Download a virus genome data package by GenBank or RefSeq nucleotide accession. Virus genome data packages include genome, transcript and protein sequences, annotation and one or more data reports. Data packages are downloaded as a zip archive. 26 | 27 | The default virus genome data package includes the following files: 28 | * genomic.fna (genomic sequences) 29 | * data_report.jsonl (data report with virus genome metadata) 30 | * dataset_catalog.json (a list of files and file types included in the data package)`, 31 | Example: ` datasets download virus genome accession NC_045512.2 --include genome,cds,protein`, 32 | PreRunE: cmdflags.ExecutePreRunEFor(flagSets), 33 | RunE: func(cmd *cobra.Command, args []string) (err error) { 34 | idArgs := iff.InputIDArgs 35 | if len(idArgs) == 0 { 36 | return errors.New("Input accessions not specified") 37 | } 38 | downloader, warning, err := NewVirusDownloader(VirusDownloadWithAccession(idArgs, dvf)) 39 | if err != nil { 40 | if warning != "" { 41 | cmd.Println(warning) 42 | } 43 | return err 44 | } 45 | if warning != "" { 46 | cmd.Println(warning) 47 | } 48 | return downloader.Download(svf.IsSkipValidation()) 49 | 50 | }, 51 | } 52 | cmdflags.RegisterAllFlags(flagSets, cmd.Flags()) 53 | return cmd 54 | } 55 | -------------------------------------------------------------------------------- /client/apps/public/Datasets/v2/datasets/DownloadGeneSymbol.go: -------------------------------------------------------------------------------- 1 | package datasets 2 | 3 | import ( 4 | openapi "datasets/openapi/v2" 5 | cmdflags "datasets_cli/v2/datasets/flags" 6 | 7 | "github.com/spf13/cobra" 8 | ) 9 | 10 | func createDownloadGeneSymbolCmd(dgf DownloadGeneFlag) *cobra.Command { 11 | iff := cmdflags.NewInputFileFlag(cmdflags.InputFileListTypeGeneSymbol, cmdflags.AsIntegerFalse) 12 | otf := cmdflags.NewOrthologTaxonFilterFlag() 13 | svf := cmdflags.NewSkipZipValidationFlag() 14 | gtff := cmdflags.NewGeneTaxonFilterFlag() 15 | 16 | flagSets := []cmdflags.FlagInterface{iff, otf, svf, gtff} 17 | 18 | cmd := &cobra.Command{ 19 | Use: "symbol ", 20 | Short: "Download a gene data package by gene symbol", 21 | Example: ` datasets download gene symbol tp53 22 | datasets download gene symbol brca1 --taxon "mus musculus"`, 23 | Long: ` 24 | Download a gene data package by gene symbol and taxon (NCBI Taxonomy ID, scientific or common name for a species). If no taxon is specified, data will be returned for human (--taxon human). Gene data packages include gene, transcript and protein sequences and one or more data reports. Data packages are downloaded as a zip archive. 25 | 26 | The default gene data package includes the following files: 27 | * rna.fna (transcript sequences) 28 | * protein.faa (protein sequences) 29 | * data_report.jsonl (data report with gene metadata) 30 | * dataset_catalog.json (a list of files and file types included in the data package)`, 31 | PreRunE: cmdflags.ExecutePreRunEFor(flagSets), 32 | RunE: func(cmd *cobra.Command, args []string) error { 33 | taxId, taxError := RetrieveTaxIdForTaxon( 34 | gtff.Taxon, 35 | false, 36 | openapi.V2ORGANISMQUERYREQUESTTAXONRESOURCEFILTER_GENE, 37 | "gene", 38 | ) 39 | if taxError != nil { 40 | return taxError 41 | } 42 | 43 | downloader, err := NewGeneDownloader(dgf.previewFlag.IsPreview(), dgf.geneIncludeFlag, dgf.filterFlag, WithSymbolAndTaxon(iff, otf, taxId)) 44 | if err != nil { 45 | return err 46 | } 47 | return downloader.Download(svf.IsSkipValidation()) 48 | }, 49 | } 50 | cmdflags.RegisterAllFlags(flagSets, cmd.Flags()) 51 | 52 | return cmd 53 | } 54 | -------------------------------------------------------------------------------- /client/apps/public/Datasets/v2/datasets/flags/OrthologTaxonFilterFlag.go: -------------------------------------------------------------------------------- 1 | package flags 2 | 3 | import ( 4 | openapi "datasets/openapi/v2" 5 | "github.com/spf13/cobra" 6 | "github.com/spf13/pflag" 7 | "strings" 8 | ) 9 | 10 | type OrthologTaxonFilterFlag struct { 11 | FlagInterface 12 | orthologTaxonFilter []string 13 | orthologTaxIdFilter []string 14 | } 15 | 16 | // Might need to parameterize for download vs summary to fix the help text. 17 | func NewOrthologTaxonFilterFlag() *OrthologTaxonFilterFlag { 18 | otf := &OrthologTaxonFilterFlag{} 19 | return otf 20 | } 21 | 22 | func (otf *OrthologTaxonFilterFlag) RegisterFlags(flags *pflag.FlagSet) { 23 | flags.StringSliceVar(&otf.orthologTaxonFilter, "ortholog", otf.orthologTaxonFilter, "Retrieves data for an ortholog set. Provide one or more taxa (any rank, limited to vertebrates and insects) to filter results or 'all' for the complete set.") 24 | } 25 | 26 | func (otf *OrthologTaxonFilterFlag) PreRunE(cmd *cobra.Command, args []string) (err error) { 27 | if Contains(otf.orthologTaxonFilter, "all") { 28 | return nil 29 | } 30 | for _, requestedTaxon := range otf.orthologTaxonFilter { 31 | if strings.TrimSpace(requestedTaxon) != "" { 32 | taxId, taxError := RetrieveTaxIdForTaxon( 33 | requestedTaxon, 34 | true, 35 | openapi.V2ORGANISMQUERYREQUESTTAXONRESOURCEFILTER_GENE, 36 | "gene orthologs", 37 | ) 38 | if taxError != nil { 39 | return taxError 40 | } 41 | otf.orthologTaxIdFilter = append(otf.orthologTaxIdFilter, taxId) 42 | } 43 | } 44 | 45 | return nil 46 | } 47 | 48 | func (otf *OrthologTaxonFilterFlag) IsOrthologRequested() bool { 49 | return len(otf.orthologTaxonFilter) > 0 50 | } 51 | 52 | func (otf *OrthologTaxonFilterFlag) RequestAllOrthologs() bool { 53 | return Contains(otf.orthologTaxonFilter, "all") 54 | } 55 | 56 | func (otf *OrthologTaxonFilterFlag) OrthologTaxonValue() []string { 57 | return otf.orthologTaxIdFilter 58 | } 59 | 60 | // Helper 61 | // Contains is a case insensitive match, finding needle in a haystack 62 | func Contains(haystack []string, needle string) bool { 63 | for _, a := range haystack { 64 | if strings.ToLower(a) == strings.ToLower(needle) { 65 | return true 66 | } 67 | } 68 | return false 69 | } 70 | -------------------------------------------------------------------------------- /client/apps/public/Datasets/v2/datasets/flags/LimitFlag.go: -------------------------------------------------------------------------------- 1 | package flags 2 | 3 | import ( 4 | "fmt" 5 | "math" 6 | "strconv" 7 | "strings" 8 | 9 | "github.com/spf13/cobra" 10 | "github.com/spf13/pflag" 11 | ) 12 | 13 | type LimitFlag struct { 14 | FlagInterface 15 | limitType string // genome, gene, or virus 16 | limitRaw string // all, none or number 17 | maxRetrieval int 18 | countOnly bool 19 | } 20 | 21 | func NewLimitFlag(limitType string) *LimitFlag { 22 | lf := &LimitFlag{ 23 | limitType: limitType, 24 | } 25 | return lf 26 | } 27 | 28 | // Unused AFAICT 29 | // func NewLimitFlagFor(limitType string, limitArgument string) *LimitFlag { 30 | // lf := &LimitFlag{ 31 | // limitType: limitType, 32 | // limitRaw: limitArgument, 33 | // } 34 | // return lf 35 | // } 36 | 37 | func (lf *LimitFlag) RegisterFlags(flags *pflag.FlagSet) { 38 | flags.StringVar(&lf.limitRaw, "limit", "all", strings.Replace(`Limit the number of TYPE summaries returned 39 | * all: returns all matching TYPE summaries 40 | * a number: returns the specified number of matching TYPE summaries 41 | `, "TYPE", lf.limitType, -1)) 42 | } 43 | 44 | func (lf *LimitFlag) PreRunE(cmd *cobra.Command, args []string) (err error) { 45 | return lf.SetRetrievalAndCount() 46 | } 47 | 48 | func (lf *LimitFlag) RetrievalCount() int { 49 | return lf.maxRetrieval 50 | } 51 | 52 | func (lf *LimitFlag) CountOnly() bool { 53 | return lf.countOnly 54 | } 55 | 56 | func (lf *LimitFlag) LimitRaw() string { 57 | return lf.limitRaw 58 | } 59 | 60 | // Helper function 61 | func (lf *LimitFlag) SetRetrievalAndCount() (err error) { 62 | lf.maxRetrieval = math.MaxInt 63 | lf.countOnly = false 64 | if lf.limitRaw == "" { 65 | return 66 | } 67 | lowerCaseLimit := strings.ToLower(lf.limitRaw) 68 | if lowerCaseLimit == "none" || lowerCaseLimit == "0" { 69 | lf.maxRetrieval = 1 70 | lf.countOnly = true 71 | return 72 | } 73 | 74 | if lowerCaseLimit != "all" { 75 | lf.maxRetrieval, err = strconv.Atoi(lowerCaseLimit) 76 | if err != nil { 77 | err = fmt.Errorf("Invalid 'limit' value %s. Must be 'all', 'none', or a number.", lowerCaseLimit) 78 | } else if lf.maxRetrieval < 0 { 79 | lf.maxRetrieval = math.MaxInt 80 | } 81 | } 82 | return 83 | } 84 | -------------------------------------------------------------------------------- /client/openapi/templates/go.patch: -------------------------------------------------------------------------------- 1 | diff -Naur a/go/api.mustache b/go/api.mustache 2 | --- a/go/api.mustache 2023-12-22 07:08:14.000000000 -0500 3 | +++ b/go/api.mustache 2025-06-30 09:16:31.191652388 -0400 4 | @@ -365,6 +365,12 @@ 5 | return {{#returnType}}localVarReturnValue, {{/returnType}}localVarHTTPResponse, err 6 | } 7 | 8 | + // NCBI Datasets Customization to support zip streaming 9 | + if localVarHTTPResponse.Header.Get("Content-Type") != "application/json" { 10 | + return {{#returnType}}localVarReturnValue, {{/returnType}}localVarHTTPResponse, nil 11 | + } 12 | + // End NCBI Datasets Customization 13 | + 14 | localVarBody, err := io.ReadAll(localVarHTTPResponse.Body) 15 | localVarHTTPResponse.Body.Close() 16 | localVarHTTPResponse.Body = io.NopCloser(bytes.NewBuffer(localVarBody)) 17 | diff -Naur a/go/client.mustache b/go/client.mustache 18 | --- a/go/client.mustache 2023-12-22 07:08:14.000000000 -0500 19 | +++ b/go/client.mustache 2025-06-30 15:56:45.055166148 -0400 20 | @@ -262,7 +262,7 @@ 21 | } 22 | 23 | if c.cfg.Debug { 24 | - dump, err := httputil.DumpResponse(resp, true) 25 | + dump, err := httputil.DumpResponse(resp, false) 26 | if err != nil { 27 | return resp, err 28 | } 29 | diff -Naur a/go/go.mod.mustache b/go/go.mod.mustache 30 | --- a/go/go.mod.mustache 2023-12-22 07:08:14.000000000 -0500 31 | +++ b/go/go.mod.mustache 2025-06-30 14:24:16.846624024 -0400 32 | @@ -1,4 +1,4 @@ 33 | -module {{gitHost}}/{{gitUserId}}/{{gitRepoId}}{{#isGoSubmodule}}/{{packageName}}{{/isGoSubmodule}} 34 | +module datasets/openapi/v2 35 | 36 | go 1.18 37 | 38 | @@ -9,4 +9,7 @@ 39 | {{#withAWSV4Signature}} 40 | github.com/aws/aws-sdk-go v1.34.14 41 | {{/withAWSV4Signature}} 42 | + {{#importValidator}} 43 | + gopkg.in/validator.v2 v2.0.1 44 | + {{/importValidator}} 45 | ) 46 | diff -Naur a/go/model_enum.mustache b/go/model_enum.mustache 47 | --- a/go/model_enum.mustache 2023-12-22 07:08:14.000000000 -0500 48 | +++ b/go/model_enum.mustache 2025-07-01 11:31:36.892029397 -0400 49 | @@ -1,3 +1,5 @@ 50 | +import "os" 51 | + 52 | // {{{classname}}} {{{description}}}{{^description}}the model '{{{classname}}}'{{/description}} 53 | type {{{classname}}} {{{format}}}{{^format}}{{dataType}}{{/format}} 54 | 55 | @@ -35,7 +37,10 @@ 56 | } 57 | } 58 | 59 | - return fmt.Errorf("%+v is not a valid {{classname}}", value) 60 | + // If no match found, assign the raw value and display a warning 61 | + *v = {{{classname}}}(value) 62 | + fmt.Fprintf(os.Stderr, "Warning: '%v' is not a valid value for {{{classname}}}\n", value) 63 | + return nil 64 | } 65 | 66 | // New{{{classname}}}FromValue returns a pointer to a valid {{{classname}}} 67 | -------------------------------------------------------------------------------- /client/apps/public/Datasets/v2/datasets/SummaryVirusGenome.go: -------------------------------------------------------------------------------- 1 | package datasets 2 | 3 | import ( 4 | openapi "datasets/openapi/v2" 5 | cmdflags "datasets_cli/v2/datasets/flags" 6 | 7 | "github.com/spf13/cobra" 8 | ) 9 | 10 | type SummaryVirusFlag struct { 11 | virusFilterFlags *cmdflags.VirusFilterFlags 12 | 13 | virusSumRptFlag *cmdflags.VirusSummaryReportFlag 14 | jsonLinesLimitFlag *cmdflags.JsonLinesAndLimitFlag 15 | cmdFlagSet []cmdflags.FlagInterface 16 | } 17 | 18 | func initSummaryVirusGenomeFlag() SummaryVirusFlag { 19 | 20 | vsrf := cmdflags.NewVirusSummaryReportFlag() 21 | jll := cmdflags.NewJsonLineAndLimitFlag("virus") 22 | vff := cmdflags.NewVirusFilterFlags(cmdflags.GenomeReleasedAfterDesc, cmdflags.VirusGenomeUpdatedAfterDesc) 23 | 24 | svf := SummaryVirusFlag{ 25 | 26 | virusSumRptFlag: vsrf, 27 | jsonLinesLimitFlag: jll, 28 | virusFilterFlags: vff, 29 | cmdFlagSet: []cmdflags.FlagInterface{vsrf, jll, vff}, 30 | } 31 | 32 | return svf 33 | } 34 | 35 | func (dvf *SummaryVirusFlag) PrepareAnnotationReportRequest(accs []string, taxons []string) *openapi.V2VirusAnnotationReportRequest { 36 | return dvf.virusFilterFlags.PrepareAnnotationReportRequest(accs, taxons) 37 | } 38 | 39 | func (dvf *SummaryVirusFlag) PrepareDatasetReportRequest(accs []string, taxons []string) *openapi.V2VirusDataReportRequest { 40 | return dvf.virusFilterFlags.PrepareDatasetReportRequest(accs, taxons) 41 | } 42 | 43 | func createSummaryVirusGenomeCmd() *cobra.Command { 44 | 45 | summaryVirusFlag := initSummaryVirusGenomeFlag() 46 | 47 | cmd := &cobra.Command{ 48 | 49 | Use: "genome", 50 | Short: "Print a data report containing virus genome metadata by accession or taxon", 51 | Long: ` 52 | Print a data report containing virus genome metadata by nucleotide accession or taxon. The data report is returned in JSON format. 53 | 54 | Refer to NCBI's [download and install](https://www.ncbi.nlm.nih.gov/datasets/docs/v2/download-and-install/) documentation for information about getting started with the command-line tools.`, 55 | Example: ` datasets summary virus genome accession NC_045512.2 56 | datasets summary virus genome taxon sars-cov-2 --host dog`, 57 | Args: cobra.NoArgs, 58 | PersistentPreRunE: cmdflags.PersistentPreRunEFor(summaryVirusFlag.cmdFlagSet, rootCmd), 59 | RunE: ParentCommandRunE, 60 | } 61 | 62 | cmdflags.RegisterAllFlags(summaryVirusFlag.cmdFlagSet, cmd.PersistentFlags()) 63 | 64 | cmd.AddCommand(createSummaryVirusGenomeTaxCmd(summaryVirusFlag)) 65 | cmd.AddCommand(createSummaryVirusGenomeAccCmd(summaryVirusFlag)) 66 | 67 | return cmd 68 | 69 | } 70 | -------------------------------------------------------------------------------- /client/apps/public/Datasets/v2/datasets/GeneIdRetriever.go: -------------------------------------------------------------------------------- 1 | package datasets 2 | 3 | import ( 4 | openapi "datasets/openapi/v2" 5 | "strconv" 6 | ) 7 | 8 | // This implements the PageProcessor interface. 9 | // Client-side paged geneids can return dupicates, so we use a map. 10 | type GeneIdRetriever struct { 11 | geneIdsMap map[int32]bool 12 | DefaultPageProcessorFuncs[openapi.V2reportsGeneReportMatch, *openapi.V2reportsGeneDataReportPage] 13 | } 14 | 15 | func (r *GeneIdRetriever) ReportName() string { 16 | return "gene" 17 | } 18 | 19 | func (geneIdRetriever *GeneIdRetriever) ProcessPage(ppage *openapi.V2reportsGeneDataReportPage) { 20 | for _, report := range ppage.GetReports() { 21 | gene := report.GetGene() 22 | // While 32 specifies what the integer must fit, it still returns an in64, thus the additional cast 23 | geneId64, e := strconv.ParseInt(gene.GetGeneId(), 10, 32) 24 | if e == nil { 25 | // Ignore errors 26 | geneIdRetriever.geneIdsMap[int32(geneId64)] = true 27 | } 28 | } 29 | } 30 | 31 | func (geneIdRetriever *GeneIdRetriever) GetGeneIds() []int32 { 32 | keys := make([]int32, len(geneIdRetriever.geneIdsMap)) 33 | i := 0 34 | for k := range geneIdRetriever.geneIdsMap { 35 | keys[i] = k 36 | i++ 37 | } 38 | return keys 39 | } 40 | 41 | func (geneIdRetriever *GeneIdRetriever) RetrievalCount(totalCount int) int { 42 | return len(geneIdRetriever.geneIdsMap) 43 | } 44 | 45 | func NewGeneIdRetriever() GeneIdRetriever { 46 | return GeneIdRetriever{ 47 | geneIdsMap: make(map[int32]bool), 48 | } 49 | } 50 | 51 | type GeneCounts struct { 52 | Genes int `json:"genes"` 53 | Transcripts int `json:"transcripts"` 54 | Proteins int `json:"proteins"` 55 | } 56 | 57 | type GeneCountRetriever struct { 58 | geneCounts GeneCounts 59 | geneIdsMap map[string]bool 60 | DefaultPageProcessorFuncs[openapi.V2reportsGeneReportMatch, *openapi.V2reportsGeneDataReportPage] 61 | } 62 | 63 | func (r *GeneCountRetriever) ReportName() string { 64 | return "gene" 65 | } 66 | 67 | func (geneCountRetriever *GeneCountRetriever) ProcessPage(ppage *openapi.V2reportsGeneDataReportPage) { 68 | for _, report := range ppage.GetReports() { 69 | gene := report.GetGene() 70 | if !geneCountRetriever.geneIdsMap[gene.GetGeneId()] { 71 | geneCountRetriever.geneCounts.Genes += 1 72 | geneCountRetriever.geneCounts.Transcripts += int(gene.GetTranscriptCount()) 73 | geneCountRetriever.geneCounts.Proteins += int(gene.GetProteinCount()) 74 | geneCountRetriever.geneIdsMap[gene.GetGeneId()] = true 75 | } 76 | } 77 | } 78 | 79 | func NewGeneCountRetriever() GeneCountRetriever { 80 | return GeneCountRetriever{ 81 | geneIdsMap: make(map[string]bool), 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /client/apps/public/Datasets/v2/datasets/DownloadVirusGenomeTaxon.go: -------------------------------------------------------------------------------- 1 | package datasets 2 | 3 | import ( 4 | openapi "datasets/openapi/v2" 5 | cmdflags "datasets_cli/v2/datasets/flags" 6 | "fmt" 7 | 8 | "github.com/spf13/cobra" 9 | ) 10 | 11 | var KNOWN_VIRUS_TAXONS = []string{"2697049", "197911", "2955291", "11320"} 12 | 13 | const MAX_VIRUS_TAXONS = 100 14 | 15 | func createDownloadVirusGenomeTaxonCmd(dvf DownloadVirusFlag) *cobra.Command { 16 | svf := cmdflags.NewSkipZipValidationFlag() 17 | iff := cmdflags.NewInputFileFlag(cmdflags.InputFileListTypeTaxon, cmdflags.AsIntegerFalse, cmdflags.WithLimit(MAX_VIRUS_TAXONS)) 18 | flagSets := []cmdflags.FlagInterface{svf, iff} 19 | 20 | inputDescription := "taxon (NCBI Taxonomy ID, scientific or common name for any virus at any tax rank)" 21 | 22 | cmd := &cobra.Command{ 23 | Use: "taxon ", 24 | Short: fmt.Sprintf("Download a virus genome data package by %s", inputDescription), 25 | Long: fmt.Sprintf(` 26 | Download a virus genome data package by %s. Virus genome data packages include genome, transcript and protein sequences, annotation and one or more data reports. Data packages are downloaded as a zip archive. 27 | 28 | The default virus genome data package includes the following files: 29 | * genomic.fna (genomic sequences) 30 | * data_report.jsonl (data report with virus genome metadata) 31 | * dataset_catalog.json (a list of files and file types included in the data package)`, inputDescription), 32 | Example: ` datasets download virus genome taxon sars-cov-2 --host dog --include protein 33 | datasets download virus genome taxon coronaviridae --host "manis javanica"`, 34 | PreRunE: cmdflags.ExecutePreRunEFor(flagSets), 35 | RunE: func(cmd *cobra.Command, args []string) (err error) { 36 | 37 | // To support Cloud builds, we need to allow 2697049 + FLU straight through 38 | // without any checks. 39 | var taxons []string 40 | if len(args) == 1 && cmdflags.Contains(KNOWN_VIRUS_TAXONS, args[0]) { 41 | taxons = append(taxons, args[0]) 42 | } else { 43 | taxIdsMap, taxErr := RetrieveTaxIdsForTaxons(cmd, iff.InputIDArgs, true, openapi.V2ORGANISMQUERYREQUESTTAXONRESOURCEFILTER_ALL, "virus", 10239) 44 | if taxErr != nil { 45 | return taxErr 46 | } 47 | taxons = getMapListValues(taxIdsMap) 48 | } 49 | 50 | downloader, warning, err := NewVirusDownloader(VirusDownloadWithTaxon(taxons, dvf)) 51 | if err != nil { 52 | if warning != "" { 53 | cmd.Println(warning) 54 | } 55 | return err 56 | } 57 | if warning != "" { 58 | cmd.Println(warning) 59 | } 60 | return downloader.Download(svf.IsSkipValidation()) 61 | }, 62 | } 63 | cmdflags.RegisterAllFlags(flagSets, cmd.Flags()) 64 | 65 | return cmd 66 | } 67 | -------------------------------------------------------------------------------- /client/apps/public/Datasets/v2/datasets/DownloadGenomeAccession.go: -------------------------------------------------------------------------------- 1 | package datasets 2 | 3 | import ( 4 | openapi "datasets/openapi/v2" 5 | cmdflags "datasets_cli/v2/datasets/flags" 6 | 7 | "github.com/spf13/cobra" 8 | ) 9 | 10 | func createDownloadGenomeAccessionCmd(dgf DownloadGenomeFlag, assemblyRequestFlag AssemblyRequestFlag) *cobra.Command { 11 | iff := cmdflags.NewInputFileFlag(cmdflags.InputFileListTypeGenomeAcc, cmdflags.AsIntegerFalse) 12 | flagSets := []cmdflags.FlagInterface{iff} 13 | 14 | cmd := &cobra.Command{ 15 | Use: "accession ", 16 | Short: "Download a genome data package by Assembly or BioProject accession", 17 | Long: ` 18 | Download a genome data package by Assembly or BioProject accession. Genome data packages may include assembled genome, transcript and protein sequences, annotation and one or more data reports. Data packages are downloaded as a zip archive. 19 | 20 | The default genome data package includes the following files: 21 | * __genomic.fna (genomic sequences) 22 | * assembly_data_report.jsonl (data report with genome assembly and annotation metadata) 23 | * dataset_catalog.json (a list of files and file types included in the data package)`, 24 | Example: ` datasets download genome accession GCF_000001405.40 --chromosomes X,Y --include protein,cds 25 | datasets download genome accession GCA_003774525.2 GCA_000001635 --chromosomes X,Y,Un.9 26 | datasets download genome accession GCA_003774525.2 --preview 27 | datasets download genome accession PRJNA289059 --include none`, 28 | PreRunE: cmdflags.ExecutePreRunEFor(flagSets), 29 | RunE: func(cmd *cobra.Command, args []string) error { 30 | // This could potentially change AssemblyVersion 31 | accessions := assemblyRequestFlag.assemblyVersionFlag.UpdateForInputAccessions(iff.InputIDArgs) 32 | 33 | if dgf.downloadPreviewFlag.IsPreview() { 34 | request, err := GetGenomeReportsAccessionRequest(accessions) 35 | if err != nil { 36 | return err 37 | } 38 | request.SetReturnedContent(openapi.V2ASSEMBLYDATASETREPORTSREQUESTCONTENTTYPE_ASSM_ACC) 39 | err = updateAssemblyReportRequestOption(request, assemblyRequestFlag) 40 | if err != nil { 41 | return err 42 | } 43 | return getDownloadSummary(dgf, NewGenomeAccessionRequestIter(request)) 44 | } 45 | downloader, warning, err := NewGenomeDownloader(GenomeWithAccessions(accessions), dgf, assemblyRequestFlag) 46 | if warning != "" { 47 | cmd.PrintErrln(warning) 48 | } 49 | if err != nil { 50 | return err 51 | } 52 | err = downloader.Download(dgf.skipValidationFlag.IsSkipValidation()) 53 | return err 54 | }, 55 | } 56 | 57 | cmdflags.RegisterAllFlags(flagSets, cmd.Flags()) 58 | 59 | return cmd 60 | } 61 | -------------------------------------------------------------------------------- /client/apps/public/Datasets/v2/datasets/DownloadGene.go: -------------------------------------------------------------------------------- 1 | package datasets 2 | 3 | import ( 4 | cmdflags "datasets_cli/v2/datasets/flags" 5 | 6 | "github.com/spf13/cobra" 7 | ) 8 | 9 | type DownloadGeneFlag struct { 10 | geneIncludeFlag *cmdflags.GeneIncludeFlag 11 | previewFlag *cmdflags.DownloadPreviewFlag 12 | filterFlag *cmdflags.FastaFilterFlag 13 | cmdFlagSet []cmdflags.FlagInterface 14 | } 15 | 16 | func newDownloadGeneFlag(default_include_flags []cmdflags.GeneIncludeFlags) DownloadGeneFlag { 17 | gif := cmdflags.NewGeneIncludeFlag(default_include_flags) 18 | dpf := cmdflags.NewDownloadPreviewFlag() 19 | fff := cmdflags.NewFastaFilterFlag() 20 | 21 | dgf := DownloadGeneFlag{ 22 | geneIncludeFlag: gif, 23 | previewFlag: dpf, 24 | filterFlag: fff, 25 | cmdFlagSet: []cmdflags.FlagInterface{gif, dpf, fff}, 26 | } 27 | 28 | return dgf 29 | } 30 | 31 | func createGeneCmd() *cobra.Command { 32 | downloadGeneFlag := newDownloadGeneFlag(cmdflags.GeneDefault) 33 | 34 | cmd := &cobra.Command{ 35 | Use: "gene", 36 | Short: "Download a gene data package", 37 | Example: ` datasets download gene gene-id 672 38 | datasets download gene symbol brca1 --taxon "mus musculus" 39 | datasets download gene accession NP_000483.3 40 | datasets download gene gene-id 2778 --fasta-filter NC_000020.11,NM_001077490.3,NP_001070958.1`, 41 | Long: ` 42 | Download a gene data package. Gene data packages include gene, transcript and protein sequences and one or more data reports. Data packages are downloaded as a zip archive. 43 | 44 | The default gene data package for NM, NR, NP, XM, XR, XP and YP accessions: 45 | * rna.fna (transcript sequences) 46 | * protein.faa (protein sequences) 47 | * data_report.jsonl (data report with gene metadata) 48 | * dataset_catalog.json (a list of files and file types included in the data package)`, 49 | Args: cobra.NoArgs, 50 | RunE: ParentCommandRunE, 51 | PersistentPreRunE: cmdflags.PersistentPreRunEFor(downloadGeneFlag.cmdFlagSet, downloadCmd), 52 | PersistentPostRun: func(cmd *cobra.Command, args []string) { 53 | if !argNoProgress && !downloadGeneFlag.previewFlag.IsPreview() { 54 | progress.Stop() 55 | } 56 | }, 57 | } 58 | 59 | cmdflags.RegisterAllFlags(downloadGeneFlag.cmdFlagSet, cmd.PersistentFlags()) 60 | 61 | // Can't use the downloadGeneFlag created here in download gene accession, because default include values are different there 62 | cmd.AddCommand(createDownloadGeneGeneIDCmd(downloadGeneFlag)) 63 | cmd.AddCommand(createDownloadGeneSymbolCmd(downloadGeneFlag)) 64 | cmd.AddCommand(createDownloadGeneAccession()) 65 | cmd.AddCommand(createDownloadGeneTaxonCmd(downloadGeneFlag)) 66 | cmd.AddCommand(createDownloadGeneLocusTagCmd(downloadGeneFlag)) 67 | 68 | return cmd 69 | } 70 | -------------------------------------------------------------------------------- /client/apps/public/Datasets/v2/datasets/DownloadGenomeTaxon.go: -------------------------------------------------------------------------------- 1 | package datasets 2 | 3 | import ( 4 | "fmt" 5 | 6 | openapi "datasets/openapi/v2" 7 | cmdflags "datasets_cli/v2/datasets/flags" 8 | 9 | "github.com/spf13/cobra" 10 | ) 11 | 12 | func createDownloadGenomeTaxonCmd(dgf DownloadGenomeFlag, assemblyRequestFlag AssemblyRequestFlag) *cobra.Command { 13 | tem := cmdflags.NewTaxExactMatchFlag() 14 | iff := cmdflags.NewInputFileFlag(cmdflags.InputFileListTypeTaxon, cmdflags.AsIntegerFalse) 15 | flagSets := []cmdflags.FlagInterface{tem, iff} 16 | 17 | inputDescription := "taxon (NCBI Taxonomy ID, scientific or common name at any tax rank)" 18 | cmd := &cobra.Command{ 19 | Use: "taxon ", 20 | Short: fmt.Sprintf("Download a genome data package by %s", inputDescription), 21 | Long: fmt.Sprintf(` 22 | Download a genome data package by %s. Genome data packages may include genome, transcript and protein sequences, annotation and one or more data reports. Data packages are downloaded as a zip archive. 23 | 24 | The default genome data package includes the following files: 25 | * __genomic.fna (genomic sequences) 26 | * assembly_data_report.jsonl (data report with genome assembly and annotation metadata) 27 | * dataset_catalog.json (a list of files and file types included in the data package)`, inputDescription), 28 | Example: ` datasets download genome taxon human --chromosomes 21 --include none 29 | datasets download genome taxon "bos taurus" --reference 30 | datasets download genome taxon human --preview 31 | datasets download genome taxon 10116 --include rna,protein`, 32 | 33 | PreRunE: cmdflags.ExecutePreRunEFor(flagSets), 34 | 35 | RunE: func(cmd *cobra.Command, args []string) error { 36 | var taxIdsMap, taxErr = RetrieveTaxIdsForTaxons(cmd, iff.InputIDArgs, true, openapi.V2ORGANISMQUERYREQUESTTAXONRESOURCEFILTER_GENOME, "genome") 37 | if taxErr != nil { 38 | return taxErr 39 | } 40 | taxons := getMapListValues(taxIdsMap) 41 | 42 | if dgf.downloadPreviewFlag.IsPreview() { 43 | request := GetGenomeReportsTaxonRequest(taxons, tem.IsTaxExactMatch()) 44 | request.SetReturnedContent(openapi.V2ASSEMBLYDATASETREPORTSREQUESTCONTENTTYPE_ASSM_ACC) 45 | err := updateAssemblyReportRequestOption(request, assemblyRequestFlag) 46 | if err != nil { 47 | return err 48 | } 49 | return getDownloadSummary(dgf, NewDefaultRequestIterator(request)) 50 | } 51 | downloader, warning, err := NewGenomeDownloader(GenomeWithTaxon(taxons, tem.IsTaxExactMatch()), dgf, assemblyRequestFlag) 52 | if warning != "" { 53 | cmd.PrintErrln(warning) 54 | } 55 | if err != nil { 56 | return err 57 | } 58 | return downloader.Download(dgf.skipValidationFlag.IsSkipValidation()) 59 | }, 60 | } 61 | 62 | cmdflags.RegisterAllFlags(flagSets, cmd.Flags()) 63 | 64 | return cmd 65 | } 66 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to NCBI Datasets 2 | 3 | This guide documents the various ways to contribute to NCBI Datasets, including information about how we handle pull requests. 4 | 5 | We encourage you to contribute to NCBI Datasets by opening [new GitHub issues](https://github.com/ncbi/datasets/issues/new/choose) to report bugs or request features in addition to proposing code modifications via the pull request process. While we may not be able to implement all feature requests, this feedback can help inform future work on NCBI Datasets. 6 | 7 | ## Bug reports 8 | 9 | Submitting a bug report is one of the simplest and most useful ways to contribute, as it helps us to quickly identify and fix issues. 10 | 11 | Report a bug using the New issue button on the [issues page](https://github.com/ncbi/datasets/issues/new/choose). A good bug report should include a clear and concise description of the bug and a reproducible example, with steps to reproduce the behavior, with any relevant identifiers. When possible, before submitting a bug report, please try to verify that you are able to reproduce the problem by ruling out common issues such as network connectivity interruptions. 12 | 13 | The NCBI Datasets team will triage the bug reports and assign an internal priority based on the extent and impact of the issue. Due to limited resources, not all bug reports will be handled immediately for a variety of reasons (such as a planned redesign, inability to reproduce the problem, etc.). All reports, however, are useful feedback for the team and help inform the project as it moves forward. 14 | 15 | ## Feature requests 16 | 17 | We encourage feature requests. Please be aware that not all feature requests can be supported immediately, but all requests help the team plan and design future improvements to the software. 18 | 19 | When requesting a new feature, please provide a clear and concise description of the feature you would like and any details about the problem this would solve for you. It is especially important to include information on how this would fit into your workflow in order to understand the context of the request. Often solutions to a feature request are implemented differently than an end-user may expect due to infrastructure or other issues. 20 | 21 | ## Pull requests (code changes) 22 | 23 | Our build system is not yet capable of incorporating code changes in a PR into the NCBI Datasets command-line tools. 24 | 25 | However, we are happy to review and consider code changes in a PR and will credit you in our release notes if we decide to incorporate the suggested code changes into our tools. 26 | 27 | Here is what you can expect if you create a PR: 28 | 1. We will review and discuss internally whether to incorporate the suggested code change 29 | 1. If we decide to change the code based on your suggested change, we will credit you in our release notes and close the PR 30 | -------------------------------------------------------------------------------- /client/apps/public/Datasets/v2/datasets/DownloadVirusProtein.go: -------------------------------------------------------------------------------- 1 | package datasets 2 | 3 | import ( 4 | "github.com/spf13/cobra" 5 | 6 | "context" 7 | 8 | openapi "datasets/openapi/v2" 9 | cmdflags "datasets_cli/v2/datasets/flags" 10 | ) 11 | 12 | func downloadVirusProtein(cmd *cobra.Command, proteinNames []string, assmFilename string, dvf DownloadVirusFlag) (err error) { 13 | 14 | cli, err := createOAClient() 15 | if err != nil { 16 | return 17 | } 18 | 19 | request := openapi.NewV2Sars2ProteinDatasetRequest() 20 | request.SetProteins(proteinNames) 21 | dvf.prepareSars2ProteinDatasetRequest(request) 22 | 23 | f, err := afs.Create(assmFilename) 24 | if err != nil { 25 | return 26 | } 27 | 28 | length := int64(-1) // unknown length 29 | 30 | _, resp, err := cli.VirusAPI.Sars2ProteinDownloadPost(context.TODO()).V2Sars2ProteinDatasetRequest(*request).Execute() 31 | if err = handleHTTPResponse(resp, err); err != nil { 32 | return 33 | } 34 | err = downloadData(&f, resp, err, assmFilename, length, dvf.skipValFlag.IsSkipValidation()) 35 | return 36 | } 37 | 38 | func createDownloadVirusProteinCmd() *cobra.Command { 39 | 40 | downloadVirusProteinFlag := initDownloadVirusFlag( 41 | cmdflags.IncludeSequenceLongDescProtein, 42 | cmdflags.VirusProteinReleasedAfterDesc, 43 | cmdflags.VirusProteinUpdatedAfterDesc, 44 | ) 45 | cmd := &cobra.Command{ 46 | Use: "protein [flags]", 47 | Short: "Download a SARS-CoV-2 protein dataset by protein name", 48 | Long: ` 49 | Download a SARS-CoV-2 protein data package by protein name. SARS-CoV-2 protein 50 | data packages include CDS and protein sequence, annotation and a detailed data report. 51 | Datasets are downloaded as a zip file. 52 | 53 | The default SARS-CoV-2 protein data package includes the following files: 54 | * cds.fna (nucleotide coding sequences) 55 | * protein.faa (protein sequences) 56 | * data_report.jsonl (data report with viral metadata) 57 | * dataset_catalog.json (a list of files and file types included in the data package) 58 | 59 | Allowed protein names are: ORF1ab, ORF1a, nsp1, nsp2, nsp3, nsp4, nsp5, nsp6, nsp7, nsp8, nsp9, nsp10, rdrp, nsp11, nsp13, nsp14, nsp15, nsp16, S, ORF3a, E, M, ORF6, ORF7a, ORF7b, ORF8, N, ORF10`, 60 | Example: ` datasets download virus protein S --host dog --filename SARS2-spike-dog.zip 61 | datasets download virus protein rdrp --refseq --filename SARS2-rdrp-refseq.zip`, 62 | Args: cmdflags.ExpectAtLeastOnePositionalArgument("SARS-Cov-2 protein name or symbol"), 63 | PreRunE: cmdflags.ExecutePreRunEFor(downloadVirusProteinFlag.cmdFlagSet), 64 | RunE: func(cmd *cobra.Command, args []string) (err error) { 65 | return downloadVirusProtein(cmd, args, argDownloadFilename, downloadVirusProteinFlag) 66 | }, 67 | } 68 | 69 | flags := cmd.PersistentFlags() 70 | 71 | cmdflags.RegisterAllFlags(downloadVirusProteinFlag.cmdFlagSet, flags) 72 | 73 | return cmd 74 | } 75 | -------------------------------------------------------------------------------- /client/apps/public/Datasets/v2/datasets/flags/GenomeIncludeFlag.go: -------------------------------------------------------------------------------- 1 | package flags 2 | 3 | import ( 4 | openapi "datasets/openapi/v2" 5 | 6 | "github.com/spf13/cobra" 7 | "github.com/spf13/pflag" 8 | "github.com/thediveo/enumflag/v2" 9 | ) 10 | 11 | type GenomeIncludeAnnotation enumflag.Flag 12 | 13 | const ( 14 | GenomeGff3 GenomeIncludeAnnotation = iota 15 | GenomeGbff 16 | GenomeGtf 17 | GenomeProtein 18 | GenomeRna 19 | GenomeCds 20 | GenomeSeq 21 | GenomeSequenceReport 22 | GenomeIncludeNoneFlag 23 | ) 24 | 25 | // Map enumeration values to their textual representations (value 26 | // identifiers). 27 | var GenomeIncludeAnnotationIds = map[GenomeIncludeAnnotation][]string{ 28 | GenomeGff3: {"gff3"}, 29 | GenomeGbff: {"gbff"}, 30 | GenomeGtf: {"gtf"}, 31 | GenomeProtein: {"protein"}, 32 | GenomeRna: {"rna"}, 33 | GenomeCds: {"cds"}, 34 | GenomeSeq: {"genome"}, 35 | GenomeSequenceReport: {"seq-report"}, 36 | GenomeIncludeNoneFlag: {"none"}, 37 | } 38 | 39 | var GenomeIncludeAnnotationOpenapi = map[GenomeIncludeAnnotation]openapi.V2AnnotationForAssemblyType{ 40 | GenomeGff3: openapi.V2ANNOTATIONFORASSEMBLYTYPE_GENOME_GFF, 41 | GenomeGbff: openapi.V2ANNOTATIONFORASSEMBLYTYPE_GENOME_GBFF, 42 | GenomeGtf: openapi.V2ANNOTATIONFORASSEMBLYTYPE_GENOME_GTF, 43 | GenomeProtein: openapi.V2ANNOTATIONFORASSEMBLYTYPE_PROT_FASTA, 44 | GenomeRna: openapi.V2ANNOTATIONFORASSEMBLYTYPE_RNA_FASTA, 45 | GenomeCds: openapi.V2ANNOTATIONFORASSEMBLYTYPE_CDS_FASTA, 46 | GenomeSeq: openapi.V2ANNOTATIONFORASSEMBLYTYPE_GENOME_FASTA, 47 | GenomeSequenceReport: openapi.V2ANNOTATIONFORASSEMBLYTYPE_SEQUENCE_REPORT, 48 | } 49 | 50 | type GenomeIncludeAnnotationFlag struct { 51 | FlagInterface 52 | IncludeAnnotation []GenomeIncludeAnnotation 53 | } 54 | 55 | func NewGenomeIncludeAnnotationFlag(defaultGenomeIncludeAnnotationFlag []GenomeIncludeAnnotation) *GenomeIncludeAnnotationFlag { 56 | giaf := &GenomeIncludeAnnotationFlag{ 57 | IncludeAnnotation: defaultGenomeIncludeAnnotationFlag, 58 | } 59 | return giaf 60 | } 61 | 62 | func (giaf *GenomeIncludeAnnotationFlag) RegisterFlags(flags *pflag.FlagSet) { 63 | flags.Var( 64 | enumflag.NewSlice(&((*giaf).IncludeAnnotation), "string(,string)", GenomeIncludeAnnotationIds, enumflag.EnumCaseInsensitive), 65 | "include", 66 | `Specify the data files to include (comma-separated). 67 | * genome: genomic sequence 68 | * rna: transcript 69 | * protein: amnio acid sequences 70 | * cds: nucleotide coding sequences 71 | * gff3: general feature file 72 | * gtf: gene transfer format 73 | * gbff: GenBank flat file 74 | * seq-report: sequence report file 75 | * none: do not retrieve any sequence files 76 | `) 77 | } 78 | 79 | func (giaf *GenomeIncludeAnnotationFlag) PreRunE(cmd *cobra.Command, args []string) (err error) { 80 | return nil 81 | } 82 | -------------------------------------------------------------------------------- /client/apps/public/Datasets/v2/datasets/DownloadVirusGenome.go: -------------------------------------------------------------------------------- 1 | package datasets 2 | 3 | import ( 4 | openapi "datasets/openapi/v2" 5 | cmdflags "datasets_cli/v2/datasets/flags" 6 | 7 | "github.com/spf13/cobra" 8 | ) 9 | 10 | type DownloadVirusFlag struct { 11 | virusFilterFlags *cmdflags.VirusFilterFlags 12 | 13 | virusSeqFlags *cmdflags.VirusDownloadIncludeFlag 14 | skipValFlag *cmdflags.SkipZipValidation 15 | cmdFlagSet []cmdflags.FlagInterface 16 | } 17 | 18 | func (dvf *DownloadVirusFlag) addFiltersAndOptionsTo(request *openapi.V2VirusDatasetRequest) { 19 | dvf.virusFilterFlags.PrepareDownloadRequest(request) 20 | dvf.virusSeqFlags.PrepareDownloadRequest(request) 21 | } 22 | 23 | func (dvf *DownloadVirusFlag) prepareSars2ProteinDatasetRequest(request *openapi.V2Sars2ProteinDatasetRequest) { 24 | dvf.virusFilterFlags.PrepareSarsProteinDownloadRequest(request) 25 | dvf.virusSeqFlags.PrepareSarsProteinDownloadRequest(request) 26 | } 27 | 28 | func initDownloadVirusFlag(longDesc string, releasedAfterDesc string, updatedAfterDesc string) DownloadVirusFlag { 29 | 30 | vsf := cmdflags.NewVirusDownloadIncludeFlag(longDesc) 31 | svf := cmdflags.NewSkipZipValidationFlag() 32 | vff := cmdflags.NewVirusFilterFlags(releasedAfterDesc, updatedAfterDesc) 33 | 34 | dvf := DownloadVirusFlag{ 35 | skipValFlag: svf, 36 | virusFilterFlags: vff, 37 | virusSeqFlags: vsf, 38 | cmdFlagSet: []cmdflags.FlagInterface{svf, vff, vsf}, 39 | } 40 | 41 | return dvf 42 | } 43 | 44 | func createDownloadVirusGenomeCmd() *cobra.Command { 45 | 46 | downloadVirusGenomeFlag := initDownloadVirusFlag( 47 | cmdflags.IncludeSequenceLongDescGenome, 48 | cmdflags.GenomeReleasedAfterDesc, 49 | cmdflags.VirusGenomeUpdatedAfterDesc, 50 | ) 51 | 52 | cmd := &cobra.Command{ 53 | Use: "genome [command] ", 54 | Short: "Download a virus genome dataset by accession or taxon", 55 | Long: ` 56 | Download a virus genome data package by GenBank or RefSeq nucleotide accession. Virus genome data packages include genome, 57 | transcript and protein sequences, annotation and one or more data reports. Data packages are downloaded as a zip archive. 58 | 59 | The default virus genome data package includes the following files: 60 | * genomic.fna (genomic sequences) 61 | * data_report.jsonl (data report with virus genome metadata) 62 | * dataset_catalog.json (a list of files and file types included in the data package)`, 63 | Example: ` datasets download virus genome taxon sars-cov-2 --host dog --include protein 64 | datasets download virus genome taxon coronaviridae --host "manis javanica"`, 65 | RunE: ParentCommandRunE, 66 | PersistentPreRunE: cmdflags.PersistentPreRunEFor(downloadVirusGenomeFlag.cmdFlagSet, downloadCmd), 67 | } 68 | 69 | flags := cmd.PersistentFlags() 70 | 71 | cmdflags.RegisterAllFlags(downloadVirusGenomeFlag.cmdFlagSet, flags) 72 | 73 | cmd.AddCommand(createDownloadVirusGenomeAccessionCmd(downloadVirusGenomeFlag)) 74 | cmd.AddCommand(createDownloadVirusGenomeTaxonCmd(downloadVirusGenomeFlag)) 75 | 76 | return cmd 77 | } 78 | -------------------------------------------------------------------------------- /client/apps/public/Datasets/v2/datasets/SummaryGeneLocusTag.go: -------------------------------------------------------------------------------- 1 | package datasets 2 | 3 | import ( 4 | openapi "datasets/openapi/v2" 5 | cmdflags "datasets_cli/v2/datasets/flags" 6 | "fmt" 7 | 8 | "github.com/spf13/cobra" 9 | ) 10 | 11 | func GeneDatasetReportRequestForLocusTags(cli *openapi.APIClient, iff *cmdflags.InputFileFlag, otf *cmdflags.OrthologTaxonFilterFlag) (*openapi.V2GeneDatasetReportsRequest, error) { 12 | request := openapi.NewV2GeneDatasetReportsRequest() 13 | 14 | geneLocusTags := iff.AsStringList() 15 | if !otf.IsOrthologRequested() { 16 | request.SetLocusTags(geneLocusTags) 17 | return request, nil 18 | } 19 | // Compute the gene-id list via ortholog 20 | // For each locus-tag, -> gene_id 21 | // then for each gene_id -> ortholog gene_ids 22 | inputGeneInts, errLocusTag := RetrieveGeneIdsForLocusTags(cli, geneLocusTags) 23 | if errLocusTag != nil { 24 | return nil, errLocusTag 25 | } 26 | 27 | geneInts, err := RetrieveOrthologGeneIdsFor(cli, otf, inputGeneInts) 28 | if err != nil { 29 | return nil, err 30 | } 31 | if len(geneInts) == 0 { 32 | return nil, fmt.Errorf("No gene orthologs found for the selected locus-tags") 33 | } 34 | request.SetGeneIds(geneInts) 35 | return request, nil 36 | } 37 | 38 | func RetrieveGeneIdsForLocusTags(cli *openapi.APIClient, geneLocusTags []string) (geneInts []int32, err error) { 39 | api := GeneDatasetApi{geneApi: cli.GeneAPI} 40 | geneIdRetriever := NewGeneIdRetriever() 41 | 42 | request := openapi.NewV2GeneDatasetReportsRequest() 43 | request.SetLocusTags(geneLocusTags) 44 | request.SetReturnedContent(openapi.V2GENEDATASETREPORTSREQUESTCONTENTTYPE_IDS_ONLY) 45 | _, err = ProcessAllPages[ 46 | *openapi.V2GeneDatasetReportsRequest, 47 | openapi.V2reportsGeneDataReportPage, 48 | openapi.V2reportsGeneReportMatch, 49 | *openapi.V2reportsGeneDataReportPage](NewGeneAccessionRequestIter(request), &api, &geneIdRetriever) 50 | if err != nil { 51 | return nil, err 52 | } 53 | 54 | return geneIdRetriever.GetGeneIds(), nil 55 | } 56 | 57 | func createSummaryGeneLocusTagCmd(sGeneFlag *SummaryGeneFlag) *cobra.Command { 58 | iff := cmdflags.NewInputFileFlag(cmdflags.InputFileListTypeLocusTag, cmdflags.AsIntegerFalse) 59 | otf := cmdflags.NewOrthologTaxonFilterFlag() 60 | flagSets := []cmdflags.FlagInterface{iff, otf} 61 | 62 | cmd := &cobra.Command{ 63 | Use: "locus-tag ", 64 | Short: "Print a data report containing gene metadata by locus tag", 65 | Long: ` 66 | Print a data report containing gene metadata by locus tag. The data report is returned in JSON format.`, 67 | Example: ` datasets summary gene locus-tag b0001 68 | datasets summary gene locus-tag b0001 ArthCt125`, 69 | PreRunE: cmdflags.ExecutePreRunEFor(flagSets), 70 | RunE: func(cmd *cobra.Command, args []string) (err error) { 71 | cli, cliErr := createOAClient() 72 | if cliErr != nil { 73 | return cliErr 74 | } 75 | 76 | request, err := GeneDatasetReportRequestForLocusTags(cli, iff, otf) 77 | if err != nil { 78 | return err 79 | } 80 | 81 | return geneSummaryPagePrinter(sGeneFlag, NewGeneLocusTagRequestIter(request), getGeneApi(cli)) 82 | }, 83 | } 84 | 85 | cmdflags.RegisterAllFlags(flagSets, cmd.PersistentFlags()) 86 | return cmd 87 | } 88 | -------------------------------------------------------------------------------- /client/apps/public/Datasets/v2/datasets/SummaryGeneAccession.go: -------------------------------------------------------------------------------- 1 | package datasets 2 | 3 | import ( 4 | openapi "datasets/openapi/v2" 5 | cmdflags "datasets_cli/v2/datasets/flags" 6 | "fmt" 7 | 8 | "github.com/spf13/cobra" 9 | ) 10 | 11 | func GeneDatasetReportRequestForAccessions(cli *openapi.APIClient, iff *cmdflags.InputFileFlag, otf *cmdflags.OrthologTaxonFilterFlag) (*openapi.V2GeneDatasetReportsRequest, error) { 12 | request := openapi.NewV2GeneDatasetReportsRequest() 13 | 14 | geneAccessions := iff.AsStringList() 15 | if !otf.IsOrthologRequested() { 16 | request.SetAccessions(geneAccessions) 17 | return request, nil 18 | } 19 | // Compute the gene-id list via ortholog 20 | // For each accession, -> gene_id 21 | // then for each gene_id -> ortholog gene_ids 22 | inputGeneInts, errAcc := RetrieveGeneIdsForAccessions(cli, geneAccessions) 23 | if errAcc != nil { 24 | return nil, errAcc 25 | } 26 | 27 | geneInts, err := RetrieveOrthologGeneIdsFor(cli, otf, inputGeneInts) 28 | if err != nil { 29 | return nil, err 30 | } 31 | if len(geneInts) == 0 { 32 | return nil, fmt.Errorf("No gene orthologs found for the selected accessions") 33 | } 34 | request.SetGeneIds(geneInts) 35 | return request, nil 36 | } 37 | 38 | func RetrieveGeneIdsForAccessions(cli *openapi.APIClient, geneAccessions []string) (geneInts []int32, err error) { 39 | api := GeneDatasetApi{geneApi: cli.GeneAPI} 40 | geneIdRetriever := NewGeneIdRetriever() 41 | 42 | request := openapi.NewV2GeneDatasetReportsRequest() 43 | request.SetAccessions(geneAccessions) 44 | request.SetReturnedContent(openapi.V2GENEDATASETREPORTSREQUESTCONTENTTYPE_IDS_ONLY) 45 | _, err = ProcessAllPages[ 46 | *openapi.V2GeneDatasetReportsRequest, 47 | openapi.V2reportsGeneDataReportPage, 48 | openapi.V2reportsGeneReportMatch, 49 | *openapi.V2reportsGeneDataReportPage](NewGeneAccessionRequestIter(request), &api, &geneIdRetriever) 50 | if err != nil { 51 | return nil, err 52 | } 53 | 54 | return geneIdRetriever.GetGeneIds(), nil 55 | } 56 | 57 | func createSummaryGeneAccessionCmd(sGeneFlag *SummaryGeneFlag) *cobra.Command { 58 | iff := cmdflags.NewInputFileFlag(cmdflags.InputFileListTypeGeneAcc, cmdflags.AsIntegerFalse) 59 | otf := cmdflags.NewOrthologTaxonFilterFlag() 60 | flagSets := []cmdflags.FlagInterface{iff, otf} 61 | 62 | cmd := &cobra.Command{ 63 | Use: "accession ", 64 | Short: "Print a data report containing gene metadata by RefSeq nucleotide or protein accession", 65 | Long: ` 66 | Print a data report containing gene metadata by RefSeq nucleotide or protein accession. The data report is returned in JSON format.`, 67 | Example: ` datasets summary gene accession NP_000483.3 68 | datasets summary gene accession NM_000546.6 NM_000492.4`, 69 | PreRunE: cmdflags.ExecutePreRunEFor(flagSets), 70 | RunE: func(cmd *cobra.Command, args []string) (err error) { 71 | cli, cliErr := createOAClient() 72 | if cliErr != nil { 73 | return cliErr 74 | } 75 | 76 | request, err := GeneDatasetReportRequestForAccessions(cli, iff, otf) 77 | if err != nil { 78 | return err 79 | } 80 | 81 | return geneSummaryPagePrinter(sGeneFlag, NewGeneAccessionRequestIter(request), getGeneApi(cli)) 82 | }, 83 | } 84 | 85 | cmdflags.RegisterAllFlags(flagSets, cmd.PersistentFlags()) 86 | return cmd 87 | } 88 | -------------------------------------------------------------------------------- /client/openapi/templates/go/model_enum.mustache: -------------------------------------------------------------------------------- 1 | import "os" 2 | 3 | // {{{classname}}} {{{description}}}{{^description}}the model '{{{classname}}}'{{/description}} 4 | type {{{classname}}} {{{format}}}{{^format}}{{dataType}}{{/format}} 5 | 6 | // List of {{{name}}} 7 | const ( 8 | {{#allowableValues}} 9 | {{#enumVars}} 10 | {{^-first}} 11 | {{/-first}} 12 | {{#enumClassPrefix}}{{{classname.toUpperCase}}}_{{/enumClassPrefix}}{{name}} {{{classname}}} = {{{value}}} 13 | {{/enumVars}} 14 | {{/allowableValues}} 15 | ) 16 | 17 | // All allowed values of {{{classname}}} enum 18 | var Allowed{{{classname}}}EnumValues = []{{{classname}}}{ 19 | {{#allowableValues}} 20 | {{#enumVars}} 21 | {{{value}}}, 22 | {{/enumVars}} 23 | {{/allowableValues}} 24 | } 25 | 26 | func (v *{{{classname}}}) UnmarshalJSON(src []byte) error { 27 | var value {{{format}}}{{^format}}{{dataType}}{{/format}} 28 | err := json.Unmarshal(src, &value) 29 | if err != nil { 30 | return err 31 | } 32 | enumTypeValue := {{{classname}}}(value) 33 | for _, existing := range Allowed{{{classname}}}EnumValues { 34 | if existing == enumTypeValue { 35 | *v = enumTypeValue 36 | return nil 37 | } 38 | } 39 | 40 | // If no match found, assign the raw value and display a warning 41 | *v = {{{classname}}}(value) 42 | fmt.Fprintf(os.Stderr, "Warning: '%v' is not a valid value for {{{classname}}}\n", value) 43 | return nil 44 | } 45 | 46 | // New{{{classname}}}FromValue returns a pointer to a valid {{{classname}}} 47 | // for the value passed as argument, or an error if the value passed is not allowed by the enum 48 | func New{{{classname}}}FromValue(v {{{format}}}{{^format}}{{dataType}}{{/format}}) (*{{{classname}}}, error) { 49 | ev := {{{classname}}}(v) 50 | if ev.IsValid() { 51 | return &ev, nil 52 | } else { 53 | return nil, fmt.Errorf("invalid value '%v' for {{{classname}}}: valid values are %v", v, Allowed{{{classname}}}EnumValues) 54 | } 55 | } 56 | 57 | // IsValid return true if the value is valid for the enum, false otherwise 58 | func (v {{{classname}}}) IsValid() bool { 59 | for _, existing := range Allowed{{{classname}}}EnumValues { 60 | if existing == v { 61 | return true 62 | } 63 | } 64 | return false 65 | } 66 | 67 | // Ptr returns reference to {{{name}}} value 68 | func (v {{{classname}}}) Ptr() *{{{classname}}} { 69 | return &v 70 | } 71 | 72 | type Nullable{{{classname}}} struct { 73 | value *{{{classname}}} 74 | isSet bool 75 | } 76 | 77 | func (v Nullable{{classname}}) Get() *{{classname}} { 78 | return v.value 79 | } 80 | 81 | func (v *Nullable{{classname}}) Set(val *{{classname}}) { 82 | v.value = val 83 | v.isSet = true 84 | } 85 | 86 | func (v Nullable{{classname}}) IsSet() bool { 87 | return v.isSet 88 | } 89 | 90 | func (v *Nullable{{classname}}) Unset() { 91 | v.value = nil 92 | v.isSet = false 93 | } 94 | 95 | func NewNullable{{classname}}(val *{{classname}}) *Nullable{{classname}} { 96 | return &Nullable{{classname}}{value: val, isSet: true} 97 | } 98 | 99 | func (v Nullable{{{classname}}}) MarshalJSON() ([]byte, error) { 100 | return json.Marshal(v.value) 101 | } 102 | 103 | func (v *Nullable{{{classname}}}) UnmarshalJSON(src []byte) error { 104 | v.isSet = true 105 | return json.Unmarshal(src, &v.value) 106 | } 107 | -------------------------------------------------------------------------------- /client/apps/public/Datasets/v2/datasets/SummaryTaxonomyTaxon.go: -------------------------------------------------------------------------------- 1 | package datasets 2 | 3 | import ( 4 | openapi "datasets/openapi/v2" 5 | cmdflags "datasets_cli/v2/datasets/flags" 6 | "fmt" 7 | "github.com/spf13/cobra" 8 | ) 9 | 10 | type SummaryTaxonomyFlag struct { 11 | rptFlag *cmdflags.TaxonomyReportFlag 12 | childrenFlag *cmdflags.TaxChildrenFlag 13 | parentsFlag *cmdflags.TaxParentsFlag 14 | inputFile *cmdflags.InputFileFlag 15 | rankFlag *cmdflags.TaxonRankFilterFlag 16 | jsonLinesLimitFlag *cmdflags.JsonLinesAndLimitFlag 17 | cmdFlagSet []cmdflags.FlagInterface 18 | } 19 | 20 | func initSummaryTaxonomyFlag() SummaryTaxonomyFlag { 21 | rf := cmdflags.NewTaxonomyReportFlag() 22 | cf := cmdflags.NewTaxChildrenFlag() 23 | lf := cmdflags.NewTaxParentsFlag() 24 | iff := cmdflags.NewInputFileFlag(cmdflags.InputFileListTypeTaxon, cmdflags.AsIntegerFalse) 25 | raf := cmdflags.NewTaxonRankFilterFlag() 26 | jlf := cmdflags.NewJsonLineAndLimitFlag("taxonomy") 27 | 28 | stf := SummaryTaxonomyFlag{ 29 | rptFlag: rf, 30 | childrenFlag: cf, 31 | parentsFlag: lf, 32 | inputFile: iff, 33 | rankFlag: raf, 34 | jsonLinesLimitFlag: jlf, 35 | cmdFlagSet: []cmdflags.FlagInterface{rf, cf, lf, iff, raf, jlf}, 36 | } 37 | 38 | return stf 39 | } 40 | 41 | func createSummaryTaxonomyTaxonCmd() *cobra.Command { 42 | stf := initSummaryTaxonomyFlag() 43 | inputDescription := "taxon (NCBI Taxonomy ID, scientific or common name at any tax rank)" 44 | 45 | cmd := &cobra.Command{ 46 | Use: "taxon", 47 | Short: fmt.Sprintf("Print a data report containing taxonomy metadata by %s", inputDescription), 48 | Long: fmt.Sprintf(` 49 | Print a data report containing taxonomy metadata by %s. The data report is returned in JSON format.`, inputDescription), 50 | Example: ` datasets summary taxonomy taxon human 51 | datasets summary taxonomy taxon "mus musculus" "human" --report names 52 | datasets summary taxonomy taxon "human" --children 53 | datasets summary taxonomy taxon "mus musculus" --rank genus 54 | datasets summary taxonomy taxon "human" --parents --report names 55 | datasets summary taxonomy taxon 10116 --report ids_only`, 56 | 57 | PreRunE: cmdflags.ExecutePreRunEFor(stf.cmdFlagSet), 58 | 59 | RunE: func(cmd *cobra.Command, args []string) (err error) { 60 | var taxIdsMap, taxErr = RetrieveTaxIdsForTaxons(cmd, stf.inputFile.InputIDArgs, true, openapi.V2ORGANISMQUERYREQUESTTAXONRESOURCEFILTER_ALL, "taxonomy") 61 | if taxErr != nil { 62 | return taxErr 63 | } 64 | 65 | taxIds := getMapKeys(taxIdsMap) 66 | 67 | // report error if more than one taxid is used in conjuction with children or parents flags 68 | if stf.childrenFlag.GetChildren() && len(taxIds) > 1 { 69 | return fmt.Errorf("The 'children' flag only supports a single taxon") 70 | } 71 | if stf.parentsFlag.GetParents() && len(taxIds) > 1 { 72 | return fmt.Errorf("The 'parents' flag only supports a single taxon") 73 | } 74 | if len(stf.rankFlag.GetRanks()) > 0 && len(taxIds) > 1 { 75 | return fmt.Errorf("The 'rank' flag, which automatically searches both children and parents, only supports a single taxon") 76 | } 77 | 78 | return getTaxonomySummary(taxIds, stf, taxIdsMap) 79 | }, 80 | } 81 | 82 | cmdflags.RegisterAllFlags(stf.cmdFlagSet, cmd.PersistentFlags()) 83 | 84 | return cmd 85 | } 86 | -------------------------------------------------------------------------------- /client/apps/public/Datasets/v2/datasets/SummaryGeneId.go: -------------------------------------------------------------------------------- 1 | package datasets 2 | 3 | import ( 4 | openapi "datasets/openapi/v2" 5 | "sort" 6 | 7 | cmdflags "datasets_cli/v2/datasets/flags" 8 | 9 | "github.com/spf13/cobra" 10 | ) 11 | 12 | func createJsonLinesPrintDescriptor(reportMode GeneReportMode) func(openapi.V2reportsGeneReportMatch) { 13 | return func(report openapi.V2reportsGeneReportMatch) { 14 | if reportMode == Product { 15 | printResults(report.GetProduct()) 16 | return 17 | } 18 | printResults(report.GetGene()) 19 | } 20 | } 21 | 22 | func GeneIdsAsIntsForInputs(cli *openapi.APIClient, iff *cmdflags.InputFileFlag, otf *cmdflags.OrthologTaxonFilterFlag) (geneInts []int32, err error) { 23 | 24 | if !otf.IsOrthologRequested() { 25 | return iff.AsInt32List, nil 26 | } 27 | return RetrieveOrthologGeneIdsFor(cli, otf, iff.AsInt32List) 28 | } 29 | 30 | func RetrieveOrthologGeneIdsFor(cli *openapi.APIClient, otf *cmdflags.OrthologTaxonFilterFlag, inputGeneInts []int32) (geneInts []int32, err error) { 31 | 32 | geneOrthologApi := GeneOrthologApi{geneApi: cli.GeneAPI} 33 | 34 | geneIdRetriever := NewGeneIdRetriever() 35 | 36 | for _, geneId := range inputGeneInts { 37 | request := openapi.NewV2OrthologRequest() 38 | request.SetGeneId(geneId) 39 | request.SetReturnedContent(openapi.V2ORTHOLOGREQUESTCONTENTTYPE_IDS_ONLY) 40 | if !otf.RequestAllOrthologs() { 41 | request.SetTaxonFilter(otf.OrthologTaxonValue()) 42 | } 43 | 44 | _, err = ProcessAllPagesRequest[ 45 | *openapi.V2OrthologRequest, 46 | openapi.V2reportsGeneDataReportPage, 47 | openapi.V2reportsGeneReportMatch, 48 | *openapi.V2reportsGeneDataReportPage](request, &geneOrthologApi, &geneIdRetriever) 49 | if err != nil { 50 | return nil, err 51 | } 52 | } 53 | return geneIdRetriever.GetGeneIds(), nil 54 | } 55 | 56 | func createSummaryGeneIdCmd(sGeneFlag *SummaryGeneFlag) *cobra.Command { 57 | iff := cmdflags.NewInputFileFlag(cmdflags.InputFileListTypeGeneId, cmdflags.AsIntegerTrue) 58 | otf := cmdflags.NewOrthologTaxonFilterFlag() 59 | flagSets := []cmdflags.FlagInterface{iff, otf} 60 | 61 | cmd := &cobra.Command{ 62 | Use: "gene-id [flags]", 63 | Short: "Print a data report containing gene metadata by NCBI Gene ID", 64 | Long: ` 65 | Print a data report containing gene metadata by NCBI Gene ID. The data report is returned in JSON format.`, 66 | Example: ` datasets summary gene gene-id 672 67 | datasets summary gene gene-id 2597 14433`, 68 | PreRunE: cmdflags.ExecutePreRunEFor(flagSets), 69 | RunE: func(cmd *cobra.Command, args []string) error { 70 | 71 | // This will go in a future PreRunE someday ... 72 | cli, cliErr := createOAClient() 73 | if cliErr != nil { 74 | return cliErr 75 | } 76 | 77 | geneInts, err := GeneIdsAsIntsForInputs(cli, iff, otf) 78 | if err != nil { 79 | return err 80 | } 81 | if len(geneInts) == 0 { 82 | cmd.PrintErrln("Warning: No gene orthologs found for the specified NCBI GeneID and taxon.") 83 | return nil 84 | } 85 | sort.Slice(geneInts, func(i, j int) bool { return geneInts[i] < geneInts[j] }) 86 | 87 | request := openapi.NewV2GeneDatasetReportsRequest() 88 | request.SetGeneIds(geneInts) 89 | return geneSummaryPagePrinter(sGeneFlag, NewGeneIdRequestIter(request), getGeneApi(cli)) 90 | }, 91 | } 92 | 93 | cmdflags.RegisterAllFlags(flagSets, cmd.PersistentFlags()) 94 | 95 | return cmd 96 | } 97 | -------------------------------------------------------------------------------- /client/apps/public/Datasets/v2/datasets/DownloadGeneAccession.go: -------------------------------------------------------------------------------- 1 | package datasets 2 | 3 | import ( 4 | openapi "datasets/openapi/v2" 5 | cmdflags "datasets_cli/v2/datasets/flags" 6 | "strings" 7 | 8 | "github.com/spf13/cobra" 9 | ) 10 | 11 | func createDownloadGeneAccession() *cobra.Command { 12 | iff := cmdflags.NewInputFileFlag(cmdflags.InputFileListTypeGeneAcc, cmdflags.AsIntegerFalse) 13 | svf := cmdflags.NewSkipZipValidationFlag() 14 | otf := cmdflags.NewOrthologTaxonFilterFlag() 15 | wdf := cmdflags.NewWpDownloadFlags() 16 | flagSets := []cmdflags.FlagInterface{iff, otf, svf, wdf} 17 | // Prokaryote and non-prokaryote accessions have different defaults, so do not set them here 18 | dgf := newDownloadGeneFlag([]cmdflags.GeneIncludeFlags{}) 19 | allFlags := append(dgf.cmdFlagSet, flagSets...) 20 | 21 | cmd := &cobra.Command{ 22 | Use: "accession ", 23 | Short: "Download a gene data package by RefSeq nucleotide or protein accession", 24 | Example: ` datasets download gene accession NP_000483.3 25 | datasets download gene accession NM_000546.6 NM_000492.4 26 | datasets download gene accession WP_000769114.1`, 27 | Long: ` 28 | Download a gene data package by RefSeq nucleotide or protein accession. Gene data packages include gene, transcript and protein sequences and one or more data reports. Data packages are downloaded as a zip archive. 29 | 30 | The default gene data package for NM, NR, NP, XM, XR, XP and YP accessions: 31 | * rna.fna (transcript sequences) 32 | * protein.faa (protein sequences) 33 | * data_report.jsonl (data report with gene metadata) 34 | * dataset_catalog.json (a list of files and file types included in the data package) 35 | 36 | The default gene data package for WP accessions: 37 | * gene.fna (gene sequences for all genomes on which the WP is annotated) 38 | * protein.faa (protein sequences) 39 | * data_report.jsonl (data report with gene metadata) 40 | * dataset_catalog.json (a list of files and file types included in the data package) 41 | * annotation_report.jsonl (annotated locations of WP proteins on bacterial genomes)`, 42 | PreRunE: cmdflags.ExecutePreRunEFor(allFlags), 43 | RunE: func(cmd *cobra.Command, args []string) error { 44 | if isProkaryoteAcc(iff.AsStringList()) { 45 | 46 | taxIdFilter := "" 47 | taxError := error(nil) 48 | if strings.Trim(wdf.TaxonFilter(), " ") != "" { 49 | taxIdFilter, taxError = RetrieveTaxIdForTaxon( 50 | wdf.TaxonFilter(), 51 | true, 52 | openapi.V2ORGANISMQUERYREQUESTTAXONRESOURCEFILTER_ALL, 53 | "download gene by accession", 54 | ) 55 | if taxError != nil { 56 | return taxError 57 | } 58 | } 59 | downloader, err := NewGeneProkDownloader(dgf.previewFlag.IsPreview(), taxIdFilter, dgf.geneIncludeFlag, wdf, WithProkAccessions(iff)) 60 | if err != nil { 61 | return err 62 | } 63 | return downloader.Download(svf.IsSkipValidation()) 64 | } else { 65 | downloader, err := NewGeneDownloader(dgf.previewFlag.IsPreview(), dgf.geneIncludeFlag, dgf.filterFlag, WithAccessions(iff, otf)) 66 | if err != nil { 67 | return err 68 | } 69 | return downloader.Download(svf.IsSkipValidation()) 70 | } 71 | }, 72 | } 73 | 74 | cmdflags.RegisterAllFlags(flagSets, cmd.Flags()) 75 | cmdflags.RegisterAllFlags(dgf.cmdFlagSet, cmd.Flags()) 76 | 77 | return cmd 78 | } 79 | 80 | func isProkaryoteAcc(acc_list []string) bool { 81 | const prokPrefix string = "WP_" 82 | for _, accession := range acc_list { 83 | if !strings.HasPrefix(strings.ToUpper(accession), prokPrefix) { 84 | return false 85 | } 86 | } 87 | return true 88 | } 89 | -------------------------------------------------------------------------------- /client/go/util/command/Autocomplete.go: -------------------------------------------------------------------------------- 1 | package command 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "os" 7 | 8 | "github.com/spf13/cobra" 9 | ) 10 | 11 | func getLongAutocompleteText(rootCmdName string, shell string) string { 12 | switch shell { 13 | case "bash": 14 | return `Steps to setup command-line completion for bash: 15 | 1. Ensure you have bash-completion installed on your system 16 | 17 | 2. Execute once: 18 | ` + "```" + ` 19 | ` + rootCmdName + ` completion bash > ~/.bash_completion.d/ncbi-` + rootCmdName + `.bash 20 | ` + "```" + ` 21 | 22 | 3. Create/edit the file ` + "`" + `~/.bash_completion` + "`" + `, adding the following: 23 | ` + "```" + ` 24 | for f in ~/.bash_completion.d/* ; do 25 | . $f 26 | [[ -f "$f" ]] && source "$f" 27 | done 28 | ` + "```" + ` 29 | 30 | Depending on your setup, you may need to source ` + "`" + `~/.bash_completion` + "`" + ` 31 | ` 32 | case "zsh": 33 | return `Steps to setup command-line completion for zsh: 34 | 35 | 1. If shell completion is not already enabled in your environment, enable it by 36 | executing the following once: 37 | ` + "```" + ` 38 | echo "autoload -U compinit; compinit" >> ~/.zshrc 39 | ` + "```" + ` 40 | 41 | 2. To load completions for each session, execute once: 42 | ` + "```" + ` 43 | ` + rootCmdName + ` completion zsh > "${fpath[1]}/_ncbi_` + rootCmdName + `" 44 | ` + "```" + ` 45 | 46 | You will need to start a new shell for this setup to take effect. 47 | ` 48 | 49 | case "fish": 50 | return `To load completions for each session, execute once: 51 | ` + "```" + ` 52 | ` + rootCmdName + ` completion fish > ~/.config/fish/completions/ncbi-` + rootCmdName + `.fish 53 | ` + "```" + ` 54 | ` 55 | case "powershell": 56 | return `Generate powershell autocompletion script. 57 | 58 | See the [golang cobra documentation](https://github.com/spf13/cobra/blob/master/powershell_completions.md) for details. 59 | ` 60 | } 61 | return "" 62 | } 63 | 64 | func shellCompletion(shell string, cmd *cobra.Command) error { 65 | switch shell { 66 | case "bash": 67 | return cmd.Root().GenBashCompletion(os.Stdout) 68 | case "zsh": 69 | return cmd.Root().GenZshCompletion(os.Stdout) 70 | case "fish": 71 | return cmd.Root().GenFishCompletion(os.Stdout, true) 72 | case "powershell": 73 | return cmd.Root().GenPowerShellCompletion(os.Stdout) 74 | } 75 | return errors.New("Internal error: unsupported shell") 76 | } 77 | 78 | func generateAutocompleteCmd(rootCmdName string, shell string) *cobra.Command { 79 | cmd := cobra.Command{ 80 | Use: shell, 81 | Short: fmt.Sprintf("Generate %s autocompletion script", shell), 82 | Long: getLongAutocompleteText(rootCmdName, shell), 83 | RunE: func(cmd *cobra.Command, args []string) error { 84 | return shellCompletion(shell, cmd) 85 | }, 86 | Args: cobra.MaximumNArgs(0), 87 | } 88 | return &cmd 89 | } 90 | 91 | func addAutocompletionShellCommands(rootCmdName string, cmd *cobra.Command) { 92 | autocompletes := []string{ 93 | "bash", 94 | "zsh", 95 | "fish", 96 | "powershell", 97 | } 98 | for _, shell := range autocompletes { 99 | cmd.AddCommand(generateAutocompleteCmd(rootCmdName, shell)) 100 | } 101 | } 102 | 103 | func NewAutocompleteCmd(rootCmd *cobra.Command) *cobra.Command { 104 | cmd := &cobra.Command{ 105 | Use: "completion", 106 | Short: "Generate autocompletion scripts", 107 | Long: `This sub-command generates files needed to enable auto-complete for several popular command-line interpreters. 108 | 109 | When enabled, the command-line interpreter can automatically fill in subcommands and options. 110 | 111 | A good introduction of command-line completion is found on [wikipedia](https://en.wikipedia.org/wiki/Command-line_completion).`, 112 | Args: cobra.MaximumNArgs(0), 113 | } 114 | addAutocompletionShellCommands(rootCmd.Name(), cmd) 115 | return cmd 116 | 117 | } 118 | -------------------------------------------------------------------------------- /client/apps/public/Datasets/v2/datasets/DownloadTaxonomyTaxon.go: -------------------------------------------------------------------------------- 1 | package datasets 2 | 3 | import ( 4 | "fmt" 5 | 6 | openapi "datasets/openapi/v2" 7 | cmdflags "datasets_cli/v2/datasets/flags" 8 | 9 | "github.com/spf13/cobra" 10 | ) 11 | 12 | type DownloadTaxonomyFlag struct { 13 | rptFlag *cmdflags.TaxonomyDownloadIncludeFlag 14 | childrenFlag *cmdflags.TaxChildrenFlag 15 | parentsFlag *cmdflags.TaxParentsFlag 16 | inputFile *cmdflags.InputFileFlag 17 | skipValidation *cmdflags.SkipZipValidation 18 | rankFlag *cmdflags.TaxonRankFilterFlag 19 | 20 | cmdFlagSet []cmdflags.FlagInterface 21 | } 22 | 23 | func initDownloadTaxonomyFlag() DownloadTaxonomyFlag { 24 | rf := cmdflags.NewTaxonomyDownloadIncludeFlag() 25 | cf := cmdflags.NewTaxChildrenFlag() 26 | lf := cmdflags.NewTaxParentsFlag() 27 | iff := cmdflags.NewInputFileFlag(cmdflags.InputFileListTypeTaxon, cmdflags.AsIntegerFalse) 28 | sv := cmdflags.NewSkipZipValidationFlag() 29 | raf := cmdflags.NewTaxonRankFilterFlag() 30 | 31 | stf := DownloadTaxonomyFlag{ 32 | rptFlag: rf, 33 | childrenFlag: cf, 34 | parentsFlag: lf, 35 | inputFile: iff, 36 | skipValidation: sv, 37 | rankFlag: raf, 38 | cmdFlagSet: []cmdflags.FlagInterface{rf, cf, lf, iff, sv, raf}, 39 | } 40 | 41 | return stf 42 | } 43 | 44 | func createDownloadTaxonomyTaxonCmd() *cobra.Command { 45 | downloadTaxonomyFlags := initDownloadTaxonomyFlag() 46 | 47 | inputDescription := "taxonomy taxon (NCBI Taxonomy ID, scientific or common name at any tax rank)" 48 | cmd := &cobra.Command{ 49 | Use: "taxon ", 50 | Short: fmt.Sprintf("Download a taxonomy package by %s", inputDescription), 51 | Long: fmt.Sprintf(` 52 | Download a taxonomy data package by %s. 53 | 54 | The default taxonomy data package includes the following files: 55 | * taxonomy_report.jsonl 56 | * taxonomy_summary.tsv 57 | * dataset_catalog.json (a list of files and file types included in the data package) 58 | A taxonomy names data report can also be added to the package 59 | * names_report.jsonl`, inputDescription), 60 | Example: ` datasets download taxonomy taxon "bos taurus" 61 | datasets download taxonomy taxon human,"drosophila melanogaster" --include names 62 | datasets download taxonomy taxon 10116 --parents --children`, 63 | 64 | PreRunE: cmdflags.ExecutePreRunEFor(downloadTaxonomyFlags.cmdFlagSet), 65 | 66 | RunE: func(cmd *cobra.Command, args []string) error { 67 | 68 | // Convert all taxons to valid taxids (and save as a set) before calling download 69 | var taxIdsMap, taxErr = RetrieveTaxIdsForTaxons( 70 | cmd, 71 | downloadTaxonomyFlags.inputFile.InputIDArgs, 72 | true, 73 | openapi.V2ORGANISMQUERYREQUESTTAXONRESOURCEFILTER_ALL, 74 | "taxonomy", 75 | ) 76 | if taxErr != nil { 77 | return taxErr 78 | } 79 | 80 | taxIds, err := strToInt32ListErr(getMapKeys(taxIdsMap)) 81 | if err != nil { 82 | return err 83 | } 84 | 85 | // report error if more than one taxid is used in conjuction with children flag. 86 | if downloadTaxonomyFlags.childrenFlag.GetChildren() && len(taxIds) > 1 { 87 | return fmt.Errorf("The 'children' flag only supports a single taxon") 88 | } 89 | if downloadTaxonomyFlags.parentsFlag.GetParents() && len(taxIds) > 1 { 90 | return fmt.Errorf("The 'parents' flag only supports a single taxon") 91 | } 92 | if len(downloadTaxonomyFlags.rankFlag.GetRanks()) > 0 && len(taxIds) > 1 { 93 | return fmt.Errorf("The 'rank' flag, which automatically searches both children and parents, only supports a single taxon") 94 | } 95 | 96 | downloader, warning, err := NewTaxonomyDownloader(taxIds, downloadTaxonomyFlags) 97 | if warning != "" { 98 | cmd.PrintErrln(warning) 99 | } 100 | if err != nil { 101 | return err 102 | } 103 | return downloader.Download(downloadTaxonomyFlags.skipValidation.IsSkipValidation()) 104 | }, 105 | } 106 | 107 | cmdflags.RegisterAllFlags(downloadTaxonomyFlags.cmdFlagSet, cmd.PersistentFlags()) 108 | 109 | return cmd 110 | } 111 | -------------------------------------------------------------------------------- /client/apps/public/Datasets/v2/datasets/SummaryGeneSymbol.go: -------------------------------------------------------------------------------- 1 | package datasets 2 | 3 | import ( 4 | openapi "datasets/openapi/v2" 5 | cmdflags "datasets_cli/v2/datasets/flags" 6 | 7 | "github.com/spf13/cobra" 8 | ) 9 | 10 | func addSymbolAndTaxonTo(request *openapi.V2GeneDatasetReportsRequest, symbols []string, taxon string) { 11 | symbols_taxon := openapi.NewV2GeneDatasetReportsRequestSymbolsForTaxon() 12 | symbols_taxon.SetSymbols(symbols) 13 | symbols_taxon.SetTaxon(taxon) 14 | request.SetSymbolsForTaxon(*symbols_taxon) 15 | } 16 | 17 | func RetrieveGeneIdsForSymbolsAndTaxon(cli *openapi.APIClient, symbols []string, taxon string) (geneInts []int32, err error) { 18 | 19 | api := GeneDatasetApi{geneApi: cli.GeneAPI} 20 | geneIdRetriever := NewGeneIdRetriever() 21 | 22 | request := openapi.NewV2GeneDatasetReportsRequest() 23 | addSymbolAndTaxonTo(request, symbols, taxon) 24 | request.SetReturnedContent(openapi.V2GENEDATASETREPORTSREQUESTCONTENTTYPE_IDS_ONLY) 25 | _, err = ProcessAllPages[ 26 | *openapi.V2GeneDatasetReportsRequest, 27 | openapi.V2reportsGeneDataReportPage, 28 | openapi.V2reportsGeneReportMatch, 29 | *openapi.V2reportsGeneDataReportPage](NewGeneSymbolRequestIter(request), &api, &geneIdRetriever) 30 | if err != nil { 31 | return nil, err 32 | } 33 | 34 | return geneIdRetriever.GetGeneIds(), nil 35 | } 36 | 37 | func GeneDatasetReportRequestForSymbolAndTaxon(cli *openapi.APIClient, iff *cmdflags.InputFileFlag, otf *cmdflags.OrthologTaxonFilterFlag, taxId string) (*openapi.V2GeneDatasetReportsRequest, error) { 38 | request := openapi.NewV2GeneDatasetReportsRequest() 39 | 40 | geneSymbols := iff.AsStringList() 41 | if !otf.IsOrthologRequested() { 42 | addSymbolAndTaxonTo(request, geneSymbols, taxId) 43 | return request, nil 44 | } 45 | // Compute the gene-id list via ortholog 46 | // For each symbol, -> gene_id 47 | // then for each gene_id -> ortholog gene_ids 48 | inputGeneInts, errSymbol := RetrieveGeneIdsForSymbolsAndTaxon(cli, geneSymbols, taxId) 49 | 50 | if errSymbol != nil { 51 | return nil, errSymbol 52 | } 53 | 54 | geneInts, err := RetrieveOrthologGeneIdsFor(cli, otf, inputGeneInts) 55 | if err != nil { 56 | return nil, err 57 | } 58 | if len(geneInts) == 0 { 59 | return nil, nil 60 | } 61 | 62 | request.SetGeneIds(geneInts) 63 | return request, nil 64 | } 65 | 66 | func createSummaryGeneSymbolCmd(sGeneFlag *SummaryGeneFlag) *cobra.Command { 67 | iff := cmdflags.NewInputFileFlag(cmdflags.InputFileListTypeGeneSymbol, cmdflags.AsIntegerFalse) 68 | otf := cmdflags.NewOrthologTaxonFilterFlag() 69 | gtff := cmdflags.NewGeneTaxonFilterFlag() 70 | flagSets := []cmdflags.FlagInterface{iff, otf, gtff} 71 | 72 | cmd := &cobra.Command{ 73 | Use: "symbol [flags]", 74 | Short: "Print a data report containing gene metadata by gene symbol", 75 | Long: ` 76 | Print a data report containing gene metadata by gene symbol and taxon (NCBI Taxonomy ID, scientific or common name for a species). If no taxon is specified, data will be returned for human. The data report is returned in JSON format.`, 77 | Example: ` datasets summary gene symbol tp53 78 | datasets summary gene symbol brca1 --taxon "mus musculus"`, 79 | PreRunE: cmdflags.ExecutePreRunEFor(flagSets), 80 | RunE: func(cmd *cobra.Command, args []string) error { 81 | cli, cliErr := createOAClient() 82 | if cliErr != nil { 83 | return cliErr 84 | } 85 | 86 | taxId, taxError := RetrieveTaxIdForTaxon( 87 | gtff.Taxon, 88 | false, 89 | openapi.V2ORGANISMQUERYREQUESTTAXONRESOURCEFILTER_GENE, 90 | "gene", 91 | ) 92 | if taxError != nil { 93 | return taxError 94 | } 95 | request, err := GeneDatasetReportRequestForSymbolAndTaxon(cli, iff, otf, taxId) 96 | 97 | if err != nil { 98 | return err 99 | } 100 | if request == nil { 101 | return nil 102 | } 103 | 104 | return geneSummaryPagePrinter(sGeneFlag, NewGeneSymbolRequestIter(request), getGeneApi(cli)) 105 | }, 106 | } 107 | 108 | cmdflags.RegisterAllFlags(flagSets, cmd.PersistentFlags()) 109 | return cmd 110 | } 111 | -------------------------------------------------------------------------------- /client/apps/public/Datasets/v2/datasets/flags/GeneIncludeFlag.go: -------------------------------------------------------------------------------- 1 | package flags 2 | 3 | import ( 4 | "fmt" 5 | 6 | openapi "datasets/openapi/v2" 7 | 8 | "github.com/spf13/cobra" 9 | "github.com/spf13/pflag" 10 | "github.com/thediveo/enumflag/v2" 11 | ) 12 | 13 | type GeneIncludeFlags enumflag.Flag 14 | 15 | var ( 16 | GeneDefault []GeneIncludeFlags = []GeneIncludeFlags{Rna, Protein} 17 | GeneFastaDefault []openapi.V2Fasta = []openapi.V2Fasta{openapi.V2FASTA_RNA, openapi.V2FASTA_PROTEIN} 18 | GeneAccessionFastaDefault []openapi.V2Fasta = []openapi.V2Fasta{openapi.V2FASTA_GENE, openapi.V2FASTA_PROTEIN} 19 | ) 20 | 21 | const ( 22 | Gene GeneIncludeFlags = iota 23 | Rna 24 | Protein 25 | Cds 26 | FivePrimeUtr 27 | ThreePrimeUtr 28 | ProductReport 29 | GeneIncludeNoneFlag 30 | ) 31 | 32 | var GeneIncludeFlagIds = map[GeneIncludeFlags][]string{ 33 | Gene: {"gene"}, 34 | Rna: {"rna"}, 35 | Protein: {"protein"}, 36 | Cds: {"cds"}, 37 | FivePrimeUtr: {"5p-utr"}, 38 | ThreePrimeUtr: {"3p-utr"}, 39 | ProductReport: {"product-report"}, 40 | GeneIncludeNoneFlag: {"None"}, 41 | } 42 | 43 | var GeneIncludeFlagsOpenapi = map[GeneIncludeFlags]openapi.V2Fasta{ 44 | Gene: openapi.V2FASTA_GENE, 45 | Rna: openapi.V2FASTA_RNA, 46 | Protein: openapi.V2FASTA_PROTEIN, 47 | Cds: openapi.V2FASTA_CDS, 48 | FivePrimeUtr: openapi.V2FASTA__5_P_UTR, 49 | ThreePrimeUtr: openapi.V2FASTA__3_P_UTR, 50 | } 51 | 52 | type GeneIncludeFlag struct { 53 | FlagInterface 54 | IncludeAnnotation []GeneIncludeFlags 55 | } 56 | 57 | func NewGeneIncludeFlag(defaultGeneIncludeFlag []GeneIncludeFlags) *GeneIncludeFlag { 58 | gif := &GeneIncludeFlag{ 59 | IncludeAnnotation: defaultGeneIncludeFlag, 60 | } 61 | return gif 62 | } 63 | 64 | func (gif *GeneIncludeFlag) RegisterFlags(flags *pflag.FlagSet) { 65 | flags.Var( 66 | enumflag.NewSlice(&((*gif).IncludeAnnotation), "string(,string)", GeneIncludeFlagIds, enumflag.EnumCaseInsensitive), 67 | "include", 68 | `Specify the data files to include (comma-separated). 69 | * gene: gene sequence 70 | * rna: transcript 71 | * protein: amino acid sequences 72 | * cds: nucleotide coding sequences 73 | * 5p-utr: 5'-UTR 74 | * 3p-utr: 3'-UTR 75 | * product-report: gene transcript and protein locations and metadata 76 | * none: do not retrieve any sequence files 77 | `) 78 | } 79 | 80 | func (gif *GeneIncludeFlag) PreRunE(cmd *cobra.Command, args []string) (err error) { 81 | return nil 82 | } 83 | 84 | func (gif *GeneIncludeFlag) SetProkDownloadFlags(request *openapi.V2ProkaryoteGeneRequest) (err error) { 85 | // set defaults here because prok and non-prok gene-accession download have different defaults 86 | if len(gif.IncludeAnnotation) == 0 { 87 | request.SetIncludeAnnotationType(GeneAccessionFastaDefault) 88 | return 89 | } 90 | 91 | annotations := make([]openapi.V2Fasta, 0) 92 | for _, fl := range gif.IncludeAnnotation { 93 | if fl == GeneIncludeNoneFlag { 94 | continue 95 | } 96 | if fl != Gene && fl != Protein { 97 | return fmt.Errorf("File format %s is not supported for prokaryotic (WP_) downloads", GeneIncludeFlagIds[fl][0]) 98 | } 99 | annotations = append(annotations, GeneIncludeFlagsOpenapi[fl]) 100 | } 101 | request.SetIncludeAnnotationType(annotations) 102 | return 103 | } 104 | 105 | func (gif *GeneIncludeFlag) SetGeneDownloadFlags(request *openapi.V2GeneDatasetRequest) { 106 | // set defaults here because prok and non-prok gene-accession download have different defaults 107 | if len(gif.IncludeAnnotation) == 0 { 108 | request.SetIncludeAnnotationType(GeneFastaDefault) 109 | return 110 | } 111 | 112 | annotations := make([]openapi.V2Fasta, 0) 113 | for _, fl := range gif.IncludeAnnotation { 114 | if fl == GeneIncludeNoneFlag { 115 | continue 116 | } 117 | if fl == ProductReport { 118 | request.SetAuxReport(append(request.GetAuxReport(), openapi.V2GENEDATASETREQUESTGENEDATASETREPORTTYPE_PRODUCT_REPORT)) 119 | continue 120 | } 121 | annotations = append(annotations, GeneIncludeFlagsOpenapi[fl]) 122 | } 123 | request.SetIncludeAnnotationType(annotations) 124 | } 125 | -------------------------------------------------------------------------------- /client/apps/public/Datasets/v2/datasets/flags/AssemblyVersionFlag.go: -------------------------------------------------------------------------------- 1 | package flags 2 | 3 | import ( 4 | "strings" 5 | 6 | openapi "datasets/openapi/v2" 7 | 8 | "github.com/spf13/cobra" 9 | "github.com/spf13/pflag" 10 | "github.com/thediveo/enumflag/v2" 11 | ) 12 | 13 | type GenomeAssemblyVersion enumflag.Flag 14 | 15 | // Define the enumeration values for GenomeAssemblyVersion 16 | const ( 17 | Current GenomeAssemblyVersion = iota 18 | All_assemblies 19 | Default 20 | ) 21 | 22 | // Note that all_assemblies matches protobuf value but we want more intuitive 'all' for the cli parameter 23 | var GenomeAssemblyVersionIds = map[GenomeAssemblyVersion][]string{ 24 | Current: {"latest", "current"}, 25 | All_assemblies: {"all"}, 26 | Default: {""}, 27 | } 28 | 29 | var GenomeAssemblyVersionOpenapi = map[GenomeAssemblyVersion]openapi.V2AssemblyDatasetDescriptorsFilterAssemblyVersion{ 30 | Current: openapi.V2ASSEMBLYDATASETDESCRIPTORSFILTERASSEMBLYVERSION_CURRENT, 31 | All_assemblies: openapi.V2ASSEMBLYDATASETDESCRIPTORSFILTERASSEMBLYVERSION_ALL_ASSEMBLIES, 32 | } 33 | 34 | func RemoveDuplicateStrings(strSlice []string) []string { 35 | allKeys := make(map[string]bool) 36 | list := []string{} 37 | for _, item := range strSlice { 38 | if _, value := allKeys[item]; !value { 39 | allKeys[item] = true 40 | list = append(list, item) 41 | } 42 | } 43 | return list 44 | } 45 | 46 | type AssemblyVersionFlag struct { 47 | FlagInterface 48 | AssemblyVersion *GenomeAssemblyVersion 49 | } 50 | 51 | func NewAssemblyVersionFlag(defaultAssemblyVersionFlag GenomeAssemblyVersion) *AssemblyVersionFlag { 52 | avf := &AssemblyVersionFlag{ 53 | AssemblyVersion: &defaultAssemblyVersionFlag, 54 | } 55 | return avf 56 | } 57 | 58 | func (avf *AssemblyVersionFlag) RegisterFlags(flags *pflag.FlagSet) { 59 | flags.Var( 60 | enumflag.New(avf.AssemblyVersion, "string", GenomeAssemblyVersionIds, enumflag.EnumCaseInsensitive), 61 | "assembly-version", 62 | `Limit to 'latest' assembly accession version or include 'all' (latest + previous versions)`) 63 | } 64 | 65 | func (avf *AssemblyVersionFlag) PreRunE(cmd *cobra.Command, args []string) (err error) { 66 | return nil 67 | } 68 | 69 | func (avf *AssemblyVersionFlag) GetAssemblyVersion() openapi.V2AssemblyDatasetDescriptorsFilterAssemblyVersion { 70 | // Default to current if the command is not an accessions command and user did not explicitly set the value 71 | if *avf.AssemblyVersion == Default { 72 | return openapi.V2ASSEMBLYDATASETDESCRIPTORSFILTERASSEMBLYVERSION_CURRENT 73 | } 74 | return GenomeAssemblyVersionOpenapi[*avf.AssemblyVersion] 75 | } 76 | 77 | func isExplicitVersionRequested(accessions []string) bool { 78 | for _, acc := range accessions { 79 | if strings.Index(acc, ".") != -1 { 80 | return true 81 | } 82 | } 83 | return false 84 | } 85 | 86 | func stripVersion(accessions []string) []string { 87 | var accessions_no_ver []string 88 | for _, acc := range accessions { 89 | dot_found := strings.Index(acc, ".") 90 | if dot_found != -1 { 91 | accessions_no_ver = append(accessions_no_ver, acc[:dot_found]) 92 | } else { 93 | accessions_no_ver = append(accessions_no_ver, acc) 94 | } 95 | } 96 | return accessions_no_ver 97 | } 98 | 99 | func (avf *AssemblyVersionFlag) UpdateForInputAccessions(accessions []string) []string { 100 | new_accs := accessions 101 | 102 | if *avf.AssemblyVersion == All_assemblies { 103 | // If user specified All_assemblies: Strip ".versions" to return all versions of requested assemblies 104 | new_accs = stripVersion(new_accs) 105 | } else { 106 | if isExplicitVersionRequested(new_accs) { 107 | if *avf.AssemblyVersion == Current { 108 | // If accessions have '.version', strip it if user explicitly request current accessions 109 | new_accs = stripVersion(new_accs) 110 | } else { 111 | // If accessions have a .version and AssemblyVersion==Default, use All_assemblies 112 | // to return the specifically requested version 113 | *avf.AssemblyVersion = All_assemblies 114 | } 115 | } else { 116 | // If accessions do not have '.version' and user did not request 'All_assemblies', return current version 117 | // (regardless of whether user picked 'Current' or 'Default') 118 | *avf.AssemblyVersion = Current 119 | } 120 | } 121 | 122 | return RemoveDuplicateStrings(new_accs) 123 | } 124 | -------------------------------------------------------------------------------- /client/go/util/UsageConfig.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import ( 4 | "regexp" 5 | "strings" 6 | 7 | "github.com/spf13/cobra" 8 | ) 9 | 10 | type UsageSection struct { 11 | Section string 12 | SectionText string 13 | Commands []string 14 | } 15 | 16 | type UsageSections []*UsageSection 17 | type UsageCmdMap map[string]*UsageSections 18 | 19 | type FlagAdjustment struct { 20 | RegexpMatch string 21 | Content func(reportName, flagContent string) []string 22 | } 23 | 24 | type UsageConfig struct { 25 | Sections UsageCmdMap 26 | AdjustFlags []*FlagAdjustment 27 | } 28 | 29 | var ( 30 | usageConfig = UsageConfig{Sections: UsageCmdMap{}} 31 | ) 32 | 33 | func getDefaultSections(cmd *cobra.Command) *UsageSections { 34 | defaultSection := &UsageSection{ 35 | SectionText: "Available Commands", 36 | } 37 | for _, c := range cmd.Commands() { 38 | defaultSection.Commands = append(defaultSection.Commands, c.Name()) 39 | } 40 | return &UsageSections{defaultSection} 41 | } 42 | 43 | func AddUsageSections(group string, cmdmap *UsageSections) { 44 | usageConfig.Sections[group] = cmdmap 45 | } 46 | 47 | func AddFlagAdjustment(adjust *FlagAdjustment) { 48 | usageConfig.AdjustFlags = append(usageConfig.AdjustFlags, adjust) 49 | } 50 | 51 | func getUsageSections(cmd *cobra.Command) *UsageSections { 52 | if s, ok := usageConfig.Sections[cmd.Name()]; ok { 53 | return s 54 | } else { 55 | return getDefaultSections(cmd) 56 | } 57 | } 58 | 59 | func getUsageCommands(commands []*cobra.Command, section *UsageSection) (retval []*cobra.Command) { 60 | for _, sectionCommand := range section.Commands { 61 | for _, c := range commands { 62 | if sectionCommand == c.Name() { 63 | retval = append(retval, c) 64 | } 65 | } 66 | } 67 | return 68 | } 69 | 70 | func expandFields(cmd *cobra.Command, flags string) (retval string) { 71 | if len(usageConfig.AdjustFlags) == 0 { 72 | retval = flags 73 | return 74 | } 75 | for _, aflag := range usageConfig.AdjustFlags { 76 | re := regexp.MustCompile(aflag.RegexpMatch) 77 | lines := strings.Split(flags, "\n") 78 | retval = "" 79 | for _, line := range lines { 80 | retval += line + "\n" 81 | if locs := re.FindStringIndex(line); len(locs) == 2 { 82 | prefix := strings.Repeat(" ", locs[1]+4) + "- " 83 | content := aflag.Content(cmd.Use, flags) 84 | for _, cline := range content { 85 | retval += prefix + cline + "\n" 86 | } 87 | } 88 | } 89 | } 90 | return 91 | } 92 | 93 | func getUsageTemplate() string { 94 | return ` 95 | {{- "Usage" }} 96 | {{- if .Runnable}} 97 | {{- "\n "}}{{.UseLine}} 98 | {{- end}} 99 | {{- if .HasAvailableSubCommands}} 100 | {{- "\n "}}{{.CommandPath}} [command] 101 | {{- end}} 102 | {{- if gt (len .Aliases) 0}} 103 | {{- "\n\n"}}Aliases 104 | {{- "\n "}}{{.NameAndAliases}} 105 | {{- end}} 106 | {{- if .HasExample -}} 107 | {{- "\n\n"}}Examples 108 | {{.Example}} 109 | {{- end}} 110 | {{- $cmd := .}} 111 | {{- if .HasAvailableSubCommands}} 112 | {{- range (usageSections $cmd) -}} 113 | {{- $section := .}} 114 | {{- "\n\n"}}{{$section.SectionText}} 115 | {{- range (usageCommands $cmd.Commands $section) -}} 116 | {{- if (or .IsAvailableCommand (eq .Name "help")) -}} 117 | {{- "\n " }}{{rpad .Name .NamePadding}} {{.Short}} 118 | {{- end}} 119 | {{- end}} 120 | {{- end}} 121 | {{- end}} 122 | {{- if .HasAvailableLocalFlags}} 123 | {{- "\n\n"}}Flags 124 | {{- "\n"}}{{expandFields $cmd .LocalFlags.FlagUsages }} 125 | {{- end}} 126 | {{- if .HasAvailableInheritedFlags}} 127 | {{- "\n\n"}}Global Flags 128 | {{- "\n"}}{{.InheritedFlags.FlagUsages}} 129 | {{- end}} 130 | 131 | {{- if .HasHelpSubCommands}} 132 | 133 | Additional help topics: 134 | {{- range .Commands -}} 135 | {{- if .IsAdditionalHelpTopicCommand -}} 136 | {{- "\n "}}{{rpad .CommandPath .CommandPathPadding}} {{.Short}} 137 | {{- end}} 138 | {{- end}} 139 | {{- end}} 140 | 141 | {{- if .HasAvailableSubCommands}} 142 | Use {{.CommandPath}} --help for detailed help about a command. 143 | {{- end}} 144 | ` 145 | } 146 | 147 | func InitRootCommand(cmd *cobra.Command) { 148 | cmd.SetUsageTemplate(getUsageTemplate()) 149 | } 150 | 151 | func init() { 152 | cobra.AddTemplateFunc("usageSections", getUsageSections) 153 | cobra.AddTemplateFunc("usageCommands", getUsageCommands) 154 | cobra.AddTemplateFunc("expandFields", expandFields) 155 | } 156 | -------------------------------------------------------------------------------- /client/apps/public/Datasets/v2/datasets/flags/VirusIncludeFlag.go: -------------------------------------------------------------------------------- 1 | package flags 2 | 3 | import ( 4 | openapi "datasets/openapi/v2" 5 | "github.com/spf13/cobra" 6 | "github.com/spf13/pflag" 7 | "github.com/thediveo/enumflag/v2" 8 | ) 9 | 10 | type VirusDownloadInclude enumflag.Flag 11 | 12 | const ( 13 | GENOME VirusDownloadInclude = iota 14 | CDS 15 | PROTEIN 16 | VIRUS_ANNOTATION 17 | VIRUS_BIOSAMPLE 18 | NONE 19 | ) 20 | 21 | var VirusDownloadIncludeIds = map[VirusDownloadInclude][]string{ 22 | GENOME: {"genome"}, 23 | CDS: {"cds"}, 24 | PROTEIN: {"protein"}, 25 | VIRUS_ANNOTATION: {"annotation"}, 26 | VIRUS_BIOSAMPLE: {"biosample"}, 27 | NONE: {"none"}, 28 | } 29 | 30 | var VirusDownloadIncludeOpenapi = map[VirusDownloadInclude]openapi.V2ViralSequenceType{ 31 | GENOME: openapi.V2VIRALSEQUENCETYPE_GENOME, 32 | CDS: openapi.V2VIRALSEQUENCETYPE_CDS, 33 | PROTEIN: openapi.V2VIRALSEQUENCETYPE_PROTEIN, 34 | NONE: openapi.V2VIRALSEQUENCETYPE_NONE, 35 | } 36 | 37 | var VirusReportSelectionOpenapi = map[VirusDownloadInclude]openapi.V2VirusDatasetReportType{ 38 | VIRUS_ANNOTATION: openapi.V2VIRUSDATASETREPORTTYPE_ANNOTATION, 39 | VIRUS_BIOSAMPLE: openapi.V2VIRUSDATASETREPORTTYPE_BIOSAMPLE_REPORT, 40 | } 41 | 42 | type VirusDownloadIncludeFlag struct { 43 | FlagInterface 44 | argsVirusSequence []VirusDownloadInclude 45 | incSeqLongDesc string 46 | } 47 | 48 | var GenomeDefault []VirusDownloadInclude = []VirusDownloadInclude{GENOME} 49 | 50 | var ProteinDefault []VirusDownloadInclude = []VirusDownloadInclude{PROTEIN} 51 | 52 | func NewVirusDownloadIncludeFlag(longDesc string) *VirusDownloadIncludeFlag { 53 | var defaultSeq []VirusDownloadInclude = GenomeDefault 54 | if longDesc == IncludeSequenceLongDescProtein { 55 | defaultSeq = ProteinDefault 56 | } 57 | vsf := &VirusDownloadIncludeFlag{ 58 | argsVirusSequence: defaultSeq, 59 | incSeqLongDesc: longDesc, 60 | } 61 | return vsf 62 | } 63 | 64 | const IncludeSequenceLongDescGenome string = `Specify virus genome sequence types to download 65 | * genome: genomic sequences 66 | * cds: nucleotide coding sequences 67 | * protein: amino acid sequences 68 | * annotation: annotation report 69 | * biosample: biosample report 70 | * none: no sequence data, only primary data report 71 | ` 72 | 73 | const IncludeSequenceLongDescProtein string = `Specify virus genome sequence types to download 74 | * cds: nucleotide coding sequences 75 | * protein: amino acid sequences 76 | * annotation: annotation report 77 | * biosample: biosample report 78 | * none: no sequence data, only primary data report 79 | ` 80 | 81 | func (vsf *VirusDownloadIncludeFlag) RegisterFlags(flags *pflag.FlagSet) { 82 | flags.Var( 83 | enumflag.NewSlice(&vsf.argsVirusSequence, "string(,string)", VirusDownloadIncludeIds, enumflag.EnumCaseInsensitive), 84 | "include", 85 | vsf.incSeqLongDesc, 86 | ) 87 | } 88 | 89 | func (vsf *VirusDownloadIncludeFlag) PreRunE(cmd *cobra.Command, args []string) (err error) { 90 | 91 | return nil 92 | 93 | } 94 | 95 | func (vsf *VirusDownloadIncludeFlag) PrepareDownloadRequest(request *openapi.V2VirusDatasetRequest) { 96 | var virusSeqOptions []openapi.V2ViralSequenceType 97 | var virusReportOptions []openapi.V2VirusDatasetReportType 98 | 99 | for _, v := range vsf.argsVirusSequence { 100 | if v == VIRUS_ANNOTATION || v == VIRUS_BIOSAMPLE { 101 | virusReportOptions = append(virusReportOptions, VirusReportSelectionOpenapi[v]) 102 | } else { 103 | virusSeqOptions = append(virusSeqOptions, VirusDownloadIncludeOpenapi[v]) 104 | } 105 | } 106 | request.SetIncludeSequence(virusSeqOptions) 107 | request.SetAuxReport(virusReportOptions) 108 | } 109 | 110 | func (vsf *VirusDownloadIncludeFlag) PrepareSarsProteinDownloadRequest(request *openapi.V2Sars2ProteinDatasetRequest) { 111 | var virusSeqOptions []openapi.V2ViralSequenceType 112 | var virusReportOptions []openapi.V2VirusDatasetReportType 113 | 114 | for _, v := range vsf.argsVirusSequence { 115 | if v == VIRUS_ANNOTATION || v == VIRUS_BIOSAMPLE { 116 | virusReportOptions = append(virusReportOptions, VirusReportSelectionOpenapi[v]) 117 | } else { 118 | virusSeqOptions = append(virusSeqOptions, VirusDownloadIncludeOpenapi[v]) 119 | } 120 | } 121 | request.SetIncludeSequence(virusSeqOptions) 122 | request.SetAuxReport(virusReportOptions) 123 | } 124 | -------------------------------------------------------------------------------- /client/apps/public/Datasets/v2/datasets/flags/ReleasedDateFlag.go: -------------------------------------------------------------------------------- 1 | package flags 2 | 3 | import ( 4 | "fmt" 5 | "time" 6 | 7 | "github.com/araddon/dateparse" 8 | "github.com/spf13/cobra" 9 | "github.com/spf13/pflag" 10 | ) 11 | 12 | const dateFormat = "YYYY-MM-DD" 13 | const dateDescr = " a specified date (free format, ISO 8601 " + dateFormat + " recommended)" 14 | 15 | func CheckDateTime(dt string, flag string) (time.Time, string, error) { 16 | if dt == "" { 17 | return time.Time{}, "", nil 18 | } 19 | t, dateErr := dateparse.ParseAny(dt) 20 | if dateErr != nil { 21 | return time.Time{}, "", fmt.Errorf("--%s requires a valid date, ISO 8601 standard "+dateFormat+" is recommended", flag) 22 | } 23 | // Golang format accepts this special 'layout', equivalent to MM/DD/YYYY 24 | return t, t.Format("01/02/2006"), nil 25 | } 26 | 27 | const ( 28 | VirusProteinReleasedAfterDesc string = "Limit to coronavirus genomes released on or after" + dateDescr 29 | VirusProteinUpdatedAfterDesc string = "Limit to coronavirus genomes updated on or after" + dateDescr 30 | VirusGenomeUpdatedAfterDesc string = "Limit to genomes updated on or after" + dateDescr 31 | GenomeReleasedBeforeDesc string = "Limit to genomes released on or before" + dateDescr 32 | GenomeReleasedAfterDesc string = "Limit to genomes released on or after" + dateDescr 33 | ) 34 | 35 | // released-before 36 | type ReleasedBeforeFlag struct { 37 | FlagInterface 38 | beforeFlagDesc string 39 | releasedBeforeFlag string 40 | releasedBeforeSanitized string 41 | releasedBeforeDateSanitized time.Time 42 | } 43 | 44 | func NewReleasedBeforeFlag(beforeDesc string) *ReleasedBeforeFlag { 45 | rbf := &ReleasedBeforeFlag{ 46 | beforeFlagDesc: beforeDesc, 47 | } 48 | return rbf 49 | } 50 | 51 | func (rbf *ReleasedBeforeFlag) ReleasedBeforeFlagDate() string { 52 | return rbf.releasedBeforeSanitized 53 | } 54 | 55 | func (uaf *ReleasedBeforeFlag) ReleasedBeforeFlagDateAsTime() time.Time { 56 | return uaf.releasedBeforeDateSanitized 57 | } 58 | 59 | func (rbf *ReleasedBeforeFlag) RegisterFlags(flags *pflag.FlagSet) { 60 | flags.StringVar(&rbf.releasedBeforeFlag, "released-before", "", rbf.beforeFlagDesc) 61 | } 62 | 63 | func (rbf *ReleasedBeforeFlag) PreRunE(cmd *cobra.Command, args []string) (err error) { 64 | rbf.releasedBeforeDateSanitized, rbf.releasedBeforeSanitized, err = CheckDateTime(rbf.releasedBeforeFlag, "released-before") 65 | return 66 | } 67 | 68 | // released-after 69 | type ReleasedAfterFlag struct { 70 | FlagInterface 71 | afterFlagDesc string 72 | releasedAfterFlag string 73 | releasedAfterSanitized string 74 | releasedAfterDateSanitized time.Time 75 | } 76 | 77 | func NewReleasedAfterFlag(afterDesc string) *ReleasedAfterFlag { 78 | raf := &ReleasedAfterFlag{ 79 | afterFlagDesc: afterDesc, 80 | } 81 | return raf 82 | } 83 | 84 | func (raf *ReleasedAfterFlag) ReleasedAfterFlagDate() string { 85 | return raf.releasedAfterSanitized 86 | } 87 | 88 | func (uaf *ReleasedAfterFlag) ReleasedAfterFlagDateAsTime() time.Time { 89 | return uaf.releasedAfterDateSanitized 90 | } 91 | 92 | func (raf *ReleasedAfterFlag) RegisterFlags(flags *pflag.FlagSet) { 93 | flags.StringVar(&raf.releasedAfterFlag, "released-after", "", raf.afterFlagDesc) 94 | } 95 | 96 | func (raf *ReleasedAfterFlag) PreRunE(cmd *cobra.Command, args []string) (err error) { 97 | raf.releasedAfterDateSanitized, raf.releasedAfterSanitized, err = CheckDateTime(raf.releasedAfterFlag, "released-after") 98 | return 99 | } 100 | 101 | // updated-after 102 | type UpdatedAfterFlag struct { 103 | FlagInterface 104 | updatedAfterFlagDesc string 105 | updatedAfterFlag string 106 | updatedAfterSanitized string 107 | updatedAfterDateSanitized time.Time 108 | } 109 | 110 | func NewUpdatedAfterFlag(updatedAfterDesc string) *UpdatedAfterFlag { 111 | uaf := &UpdatedAfterFlag{ 112 | updatedAfterFlagDesc: updatedAfterDesc, 113 | } 114 | return uaf 115 | } 116 | 117 | func (uaf *UpdatedAfterFlag) UpdatedAfterFlagDate() string { 118 | return uaf.updatedAfterSanitized 119 | } 120 | 121 | func (uaf *UpdatedAfterFlag) UpdatedAfterFlagDateAsTime() time.Time { 122 | return uaf.updatedAfterDateSanitized 123 | } 124 | 125 | func (uaf *UpdatedAfterFlag) RegisterFlags(flags *pflag.FlagSet) { 126 | flags.StringVar(&uaf.updatedAfterFlag, "updated-after", "", uaf.updatedAfterFlagDesc) 127 | } 128 | 129 | func (uaf *UpdatedAfterFlag) PreRunE(cmd *cobra.Command, args []string) (err error) { 130 | uaf.updatedAfterDateSanitized, uaf.updatedAfterSanitized, err = CheckDateTime(uaf.updatedAfterFlag, "updated-after") 131 | return 132 | } 133 | -------------------------------------------------------------------------------- /client/apps/public/Datasets/v2/datasets/flags/InputFileFlag.go: -------------------------------------------------------------------------------- 1 | package flags 2 | 3 | import ( 4 | "bufio" 5 | "bytes" 6 | "errors" 7 | "fmt" 8 | "io" 9 | "os" 10 | "strconv" 11 | "strings" 12 | 13 | "github.com/spf13/cobra" 14 | "github.com/spf13/pflag" 15 | ) 16 | 17 | const ( 18 | AsIntegerTrue bool = true 19 | AsIntegerFalse bool = false 20 | ) 21 | 22 | const ( 23 | InputFileListTypeVirusAcc string = "nucleotide accessions" 24 | InputFileListTypeGenomeAcc string = "NCBI Assembly or BioProject accessions" 25 | InputFileListTypeGeneAcc string = "NCBI Gene Accessions" 26 | InputFileListTypeGeneId string = "NCBI Gene IDs" 27 | InputFileListTypeGeneSymbol string = "NCBI Gene Symbols" 28 | InputFileListTypeLocusTag string = "NCBI Gene Locus Tags" 29 | InputFileListTypeTaxon string = "NCBI taxonomy identifiers (taxid or current scientific name)" 30 | ) 31 | 32 | type InputFileFlag struct { 33 | InputIDArgs []string 34 | AsInt32List []int32 35 | inputType string 36 | inputFile *string 37 | asInt bool 38 | limit int 39 | } 40 | 41 | // Option defines a type for functional options. 42 | type Option func(*InputFileFlag) 43 | 44 | func NewInputFileFlag(inputType string, asInt bool, options ...Option) *InputFileFlag { 45 | iff := &InputFileFlag{ 46 | inputType: inputType, 47 | inputFile: new(string), 48 | asInt: asInt, 49 | limit: 0, 50 | } 51 | for _, opt := range options { 52 | opt(iff) 53 | } 54 | 55 | return iff 56 | } 57 | 58 | func WithLimit(limit int) Option { 59 | return func(iff *InputFileFlag) { 60 | iff.limit = limit 61 | } 62 | } 63 | 64 | func (iff *InputFileFlag) RegisterFlags(flags *pflag.FlagSet) { 65 | if iff.limit > 0 { 66 | flags.StringVar(iff.inputFile, "inputfile", *iff.inputFile, fmt.Sprintf("Read a list of %s from a file to use as input (max: %d)", iff.inputType, iff.limit)) 67 | } else { 68 | flags.StringVar(iff.inputFile, "inputfile", *iff.inputFile, fmt.Sprintf("Read a list of %s from a file to use as input", iff.inputType)) 69 | } 70 | } 71 | 72 | func (iff *InputFileFlag) PreRunE(cmd *cobra.Command, args []string) (err error) { 73 | iff.InputIDArgs, err = getArgsFromListOrFile(args, *iff.inputFile, cmd, iff.inputType) 74 | if err != nil { 75 | return err 76 | } 77 | if iff.asInt { 78 | iff.AsInt32List, err = strToInt32ListErr(iff.InputIDArgs) 79 | } 80 | if iff.limit > 0 && len(iff.InputIDArgs) > iff.limit { 81 | return fmt.Errorf("too many %s provided, limit is %d", iff.inputType, iff.limit) 82 | } 83 | return err 84 | } 85 | 86 | func (iff *InputFileFlag) AsStringList() []string { 87 | return iff.InputIDArgs 88 | } 89 | 90 | // Helper functions 91 | func strToInt32ListErr(strs []string) (geneInts []int32, err error) { 92 | hasError := false 93 | lastBadInput := "" 94 | for _, idFullStr := range strs { 95 | for _, idStr := range strings.Split(idFullStr, ",") { 96 | geneInt64, e := strconv.ParseInt(idStr, 10, 32) 97 | geneInt32 := int32(geneInt64) 98 | if e != nil { 99 | hasError = true 100 | lastBadInput = idStr 101 | } else { 102 | geneInts = append(geneInts, geneInt32) 103 | } 104 | } 105 | } 106 | if hasError { 107 | err = fmt.Errorf("unable to parse input value as an integer: '%s'", lastBadInput) 108 | } 109 | return 110 | } 111 | 112 | func readLines(fp io.Reader) []string { 113 | var lines []string 114 | reader := bufio.NewReader(fp) 115 | scanner := bufio.NewScanner(reader) 116 | scanner.Buffer(make([]byte, 0, 1024), 64*1024) // 117 | for scanner.Scan() { 118 | var line = strings.TrimSpace(scanner.Text()) 119 | if len(line) > 0 { 120 | lines = append(lines, line) 121 | } 122 | } 123 | return lines 124 | } 125 | 126 | func getArgsFromListOrFile(args []string, argInputFile string, cmd *cobra.Command, inputType string) (idArgs []string, err error) { 127 | var errorMsg bytes.Buffer 128 | 129 | if len(args) != 0 { 130 | if argInputFile != "" { 131 | err = fmt.Errorf("Accepts either argument or file, not both") 132 | return 133 | } 134 | if len(args) == 1 { 135 | idArgs = strings.Split(args[0], ",") 136 | } else { 137 | idArgs = args 138 | } 139 | return 140 | } 141 | 142 | if argInputFile == "-" { 143 | idArgs = readLines(os.Stdin) 144 | } else if argInputFile != "" { 145 | fp, fileErr := os.Open(argInputFile) 146 | if fileErr != nil { 147 | err = fmt.Errorf("'%s' opening input file: '%s'", fileErr.Error(), argInputFile) 148 | return 149 | } 150 | defer fp.Close() 151 | idArgs = readLines(fp) 152 | // Check if any geneIDs read 153 | if len(idArgs) == 0 { 154 | fmt.Fprintf( 155 | &errorMsg, 156 | "No identifiers read from file: '%s'\n File should have 1 identifier per row and no spaces or quotes", 157 | argInputFile, 158 | ) 159 | } 160 | } else { 161 | errorFunc := ExpectOnePositionalArgument(inputType) 162 | err = errorFunc(cmd, args) 163 | return 164 | } 165 | 166 | if errorMsg.Len() > 0 { 167 | err = errors.New(errorMsg.String()) 168 | } 169 | return 170 | } 171 | -------------------------------------------------------------------------------- /client/apps/public/Datasets/v2/datasets/flags/VirusFilterFlags.go: -------------------------------------------------------------------------------- 1 | package flags 2 | 3 | import ( 4 | openapi "datasets/openapi/v2" 5 | "github.com/spf13/cobra" 6 | "github.com/spf13/pflag" 7 | ) 8 | 9 | type VirusFilterFlags struct { 10 | FlagInterface 11 | annotatedOnly bool 12 | completeOnly bool 13 | geoLocation string 14 | geoUsaState string 15 | lineage string 16 | refseqOnly bool 17 | hostTaxonFlag *HostTaxonFilterFlag 18 | releasedAfterFlag *ReleasedAfterFlag 19 | updatedAfterFlag *UpdatedAfterFlag 20 | } 21 | 22 | func NewVirusFilterFlags(releasedAfterDesc string, updatedAfterDesc string) *VirusFilterFlags { 23 | vff := &VirusFilterFlags{ 24 | hostTaxonFlag: NewHostTaxonFilterFlag(), 25 | releasedAfterFlag: NewReleasedAfterFlag(releasedAfterDesc), 26 | updatedAfterFlag: NewUpdatedAfterFlag(updatedAfterDesc), 27 | } 28 | return vff 29 | } 30 | 31 | func (vff *VirusFilterFlags) RegisterFlags(flags *pflag.FlagSet) { 32 | flags.BoolVar(&vff.annotatedOnly, "annotated", false, "Limit to annotated genomes") 33 | 34 | flags.BoolVar(&vff.completeOnly, "complete-only", false, "Limit to complete sequences, as defined by submitter") 35 | flags.StringVar(&vff.geoLocation, "geo-location", "", "Limit to genomes isolated from a specified geographic location (continent or country)") 36 | flags.StringVar(&vff.geoUsaState, "usa-state", "", "Limit to genomes isolated from a specified U.S. state (two-letter abbreviation)") 37 | flags.StringVar(&vff.lineage, "lineage", "", "Limit results by Pango lineage (only SARS-CoV-2)") 38 | flags.BoolVar(&vff.refseqOnly, "refseq", false, "Limit to RefSeq genomes") 39 | 40 | vff.releasedAfterFlag.RegisterFlags(flags) 41 | vff.updatedAfterFlag.RegisterFlags(flags) 42 | vff.hostTaxonFlag.RegisterFlags(flags) 43 | } 44 | 45 | func (vff *VirusFilterFlags) PreRunE(cmd *cobra.Command, args []string) (err error) { 46 | for _, f := range []FlagInterface{vff.releasedAfterFlag, vff.updatedAfterFlag, vff.hostTaxonFlag} { 47 | err := f.PreRunE(cmd, args) 48 | if err != nil { 49 | return err 50 | } 51 | } 52 | return nil 53 | } 54 | 55 | // Use for summary of data_reports 56 | func (vff *VirusFilterFlags) PrepareDownloadRequest(request *openapi.V2VirusDatasetRequest) { 57 | request.SetRefseqOnly(vff.refseqOnly) 58 | request.SetAnnotatedOnly(vff.annotatedOnly) 59 | request.SetPangolinClassification(vff.lineage) 60 | request.SetGeoLocation(vff.geoLocation) 61 | request.SetUsaState(vff.geoUsaState) 62 | request.SetCompleteOnly(vff.completeOnly) 63 | 64 | request.SetReleasedSince(vff.releasedAfterFlag.ReleasedAfterFlagDateAsTime()) 65 | request.SetUpdatedSince(vff.updatedAfterFlag.UpdatedAfterFlagDateAsTime()) 66 | request.SetHost(vff.hostTaxonFlag.HostTaxIdValue()) 67 | } 68 | 69 | func (vff *VirusFilterFlags) PrepareSarsProteinDownloadRequest(request *openapi.V2Sars2ProteinDatasetRequest) { 70 | request.SetRefseqOnly(vff.refseqOnly) 71 | request.SetAnnotatedOnly(vff.annotatedOnly) 72 | request.SetPangolinClassification(vff.lineage) 73 | request.SetGeoLocation(vff.geoLocation) 74 | request.SetUsaState(vff.geoUsaState) 75 | request.SetCompleteOnly(vff.completeOnly) 76 | 77 | request.SetReleasedSince(vff.releasedAfterFlag.ReleasedAfterFlagDateAsTime()) 78 | request.SetUpdatedSince(vff.updatedAfterFlag.UpdatedAfterFlagDateAsTime()) 79 | request.SetHost(vff.hostTaxonFlag.HostTaxIdValue()) 80 | } 81 | 82 | func (vff *VirusFilterFlags) PrepareAnnotationReportRequest(accs []string, taxons []string) *openapi.V2VirusAnnotationReportRequest { 83 | filter := *openapi.NewV2VirusAnnotationFilter() 84 | filter.SetRefseqOnly(vff.refseqOnly) 85 | filter.SetAnnotatedOnly(vff.annotatedOnly) 86 | filter.SetPangolinClassification(vff.lineage) 87 | filter.SetGeoLocation(vff.geoLocation) 88 | filter.SetUsaState(vff.geoUsaState) 89 | filter.SetCompleteOnly(vff.completeOnly) 90 | 91 | filter.SetReleasedSince(vff.releasedAfterFlag.ReleasedAfterFlagDateAsTime()) 92 | filter.SetUpdatedSince(vff.updatedAfterFlag.UpdatedAfterFlagDateAsTime()) 93 | filter.SetHost(vff.hostTaxonFlag.HostTaxIdValue()) 94 | 95 | filter.SetAccessions(accs) 96 | filter.SetTaxons(taxons) 97 | 98 | request := openapi.NewV2VirusAnnotationReportRequest() 99 | request.SetFilter(filter) 100 | request.SetPageSize(1000) 101 | return request 102 | } 103 | 104 | func (vff *VirusFilterFlags) PrepareDatasetReportRequest(accs []string, taxons []string) *openapi.V2VirusDataReportRequest { 105 | filter := *openapi.NewV2VirusDatasetFilter() 106 | filter.SetRefseqOnly(vff.refseqOnly) 107 | filter.SetAnnotatedOnly(vff.annotatedOnly) 108 | filter.SetPangolinClassification(vff.lineage) 109 | filter.SetGeoLocation(vff.geoLocation) 110 | filter.SetUsaState(vff.geoUsaState) 111 | filter.SetCompleteOnly(vff.completeOnly) 112 | 113 | filter.SetReleasedSince(vff.releasedAfterFlag.ReleasedAfterFlagDateAsTime()) 114 | filter.SetUpdatedSince(vff.updatedAfterFlag.UpdatedAfterFlagDateAsTime()) 115 | filter.SetHost(vff.hostTaxonFlag.HostTaxIdValue()) 116 | 117 | filter.SetAccessions(accs) 118 | filter.SetTaxons(taxons) 119 | 120 | request := openapi.NewV2VirusDataReportRequest() 121 | request.SetFilter(filter) 122 | request.SetPageSize(1000) 123 | return request 124 | } 125 | -------------------------------------------------------------------------------- /client/apps/public/Datasets/v2/datasets/VirusDownloader.go: -------------------------------------------------------------------------------- 1 | package datasets 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | "fmt" 7 | _nethttp "net/http" 8 | "strings" 9 | 10 | cmdflags "datasets_cli/v2/datasets/flags" 11 | 12 | openapi "datasets/openapi/v2" 13 | ) 14 | 15 | type VirusDownloader struct { 16 | request *openapi.V2VirusDatasetRequest 17 | cli *openapi.APIClient 18 | } 19 | 20 | const VIRAL_TAXID = "10239" 21 | const VIRAL_TAXID_COMP = int32(10239) 22 | const SARS2_TAXID = "2697049" 23 | 24 | func (vd *VirusDownloader) Download(argSkipZipVal bool) (err error) { 25 | 26 | _, resp, err := vd.cli.VirusAPI.VirusGenomeDownloadPost(context.TODO()).V2VirusDatasetRequest(*vd.request).Execute() 27 | if err != nil { 28 | return err 29 | } 30 | if err = handleHTTPResponse(resp, err); err != nil { 31 | return err 32 | } 33 | length := int64(-1) // unknown length 34 | err = downloadDataForFile(resp, err, argDownloadFilename, length, argSkipZipVal) 35 | return 36 | 37 | } 38 | 39 | func (vd *VirusDownloader) checkTaxonWithinScope(taxons []string) bool { 40 | contains := func(s []int32, e int32) bool { 41 | for _, a := range s { 42 | if a == e { 43 | return true 44 | } 45 | } 46 | return false 47 | } 48 | 49 | // Shortcut. Also needed in clouldbuild, which does not load taxonomy service 50 | if len(taxons) == 1 && cmdflags.Contains(KNOWN_VIRUS_TAXONS, strings.TrimSpace(taxons[0])) { 51 | return true 52 | } 53 | org, resp, err := vd.cli.TaxonomyAPI.TaxonomyMetadataPost(context.TODO()).V2TaxonomyMetadataRequest( 54 | openapi.V2TaxonomyMetadataRequest{ 55 | Taxons: taxons, 56 | }, 57 | ).Execute() 58 | if err = handleHTTPResponse(resp, err); err == nil { 59 | taxNodes := org.GetTaxonomyNodes() 60 | if len(taxNodes) < 1 { 61 | return false 62 | } 63 | for _, taxNode := range taxNodes { 64 | taxonomy := taxNode.GetTaxonomy() 65 | lineage := taxonomy.GetLineage() 66 | if !(contains(lineage, VIRAL_TAXID_COMP) || taxonomy.GetTaxId() == VIRAL_TAXID_COMP) { 67 | return false 68 | } 69 | } 70 | } 71 | return true 72 | } 73 | 74 | func (vd *VirusDownloader) getVirusAvailability(argIDs []string) (*openapi.V2VirusAvailability, *_nethttp.Response, error) { 75 | virusAvailabilityRequest := openapi.NewV2VirusAvailabilityRequest() 76 | virusAvailabilityRequest.SetAccessions(argIDs) 77 | return vd.cli.VirusAPI.VirusAccessionAvailabilityPost(context.TODO()).V2VirusAvailabilityRequest(*virusAvailabilityRequest).Execute() 78 | } 79 | 80 | type VirusDownloaderIdOption func(vd *VirusDownloader) (string, error) 81 | 82 | func VirusDownloadWithAccession(accessions []string, dvf DownloadVirusFlag) VirusDownloaderIdOption { 83 | return func(vd *VirusDownloader) (string, error) { 84 | var warning = "" 85 | virusAvailability, _, err := vd.getVirusAvailability(accessions) 86 | if err != nil { 87 | warning = "There was a problem validating your requested accessions" 88 | return warning, err 89 | } 90 | request, invAccessionWarning, err := GetVirusAccessionRequest(*virusAvailability) 91 | if invAccessionWarning != "" { 92 | warning += invAccessionWarning 93 | } 94 | if err != nil { 95 | return "", err 96 | } 97 | dvf.addFiltersAndOptionsTo(request) 98 | vd.request = request 99 | return warning, err 100 | } 101 | } 102 | 103 | func VirusDownloadWithTaxon(taxons []string, dvf DownloadVirusFlag) VirusDownloaderIdOption { 104 | return func(vd *VirusDownloader) (string, error) { 105 | if !vd.checkTaxonWithinScope(taxons) { 106 | var warning = "The download virus genome taxon command only supports virus taxa.\nFor data on other organisms, please use the download genome taxon command.\n" 107 | return warning, fmt.Errorf("taxa %s is out of scope", taxons) 108 | } 109 | request := GetVirusTaxonRequest(taxons) 110 | /// Imagine we send in the vff here, and it produces the filter request for us. 111 | dvf.addFiltersAndOptionsTo(request) 112 | vd.request = request 113 | return "", nil 114 | } 115 | } 116 | 117 | func GetVirusAccessionRequest(virusAvailability openapi.V2VirusAvailability) (request *openapi.V2VirusDatasetRequest, warning string, err error) { 118 | if len(virusAvailability.GetInvalidAccessions()) > 0 { 119 | warning += virusAvailability.GetMessage() 120 | } 121 | validAccessions := virusAvailability.GetValidAccessions() 122 | if len(validAccessions) == 0 { 123 | err := errors.New("No valid accessions were passed") 124 | return nil, "", err 125 | } 126 | request = openapi.NewV2VirusDatasetRequest() 127 | request.SetAccessions(validAccessions) 128 | 129 | return request, warning, err 130 | } 131 | 132 | func GetVirusTaxonRequest(taxons []string) *openapi.V2VirusDatasetRequest { 133 | 134 | request := openapi.NewV2VirusDatasetRequest() 135 | request.SetTaxons(taxons) 136 | 137 | return request 138 | } 139 | 140 | func NewVirusDownloader(setIdOption VirusDownloaderIdOption) (vd *VirusDownloader, warning string, err error) { 141 | cli, err := createOAClient() 142 | if err != nil { 143 | return nil, "", err 144 | } 145 | vd = &VirusDownloader{ 146 | request: openapi.NewV2VirusDatasetRequest(), 147 | cli: cli, 148 | } 149 | 150 | warning, idErr := setIdOption(vd) 151 | if idErr != nil { 152 | return nil, warning, idErr 153 | } 154 | 155 | return vd, warning, err 156 | 157 | } 158 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # NCBI Datasets 2 | 3 | NCBI Datasets is a resource that lets you easily gather data from across NCBI databases. You can use it to find and download sequence, annotation, and metadata for genes and genomes using our command-line interface (CLI) tools or [NCBI Datasets](https://www.ncbi.nlm.nih.gov/datasets/) web interface. 4 | 5 | NCBI Datasets tools are under active development. To submit feedback, please create a [GitHub issue](https://github.com/ncbi/datasets/issues/new/choose) or [contact NCBI](mailto:info@ncbi.nlm.nih.gov) directly with your questions, comments or feature requests. 6 | 7 | ## Install the NCBI Datasets command-line tools 8 | 9 | [![Anaconda.org badge](https://anaconda.org/conda-forge/ncbi-datasets-cli/badges/version.svg)](https://anaconda.org/conda-forge/ncbi-datasets-cli) 10 | [![Platforms badge](https://anaconda.org/conda-forge/ncbi-datasets-cli/badges/platforms.svg)](https://anaconda.org/conda-forge/ncbi-datasets-cli) 11 | [![Total downloads badge](https://anaconda.org/conda-forge/ncbi-datasets-cli/badges/downloads.svg)](https://anaconda.org/conda-forge/ncbi-datasets-cli) 12 | 13 | Install the latest version (CLI v16.x) of the NCBI Datasets CLI tools, *datasets* and *dataformat*, using conda: 14 | 15 | `conda install -c conda-forge ncbi-datasets-cli` 16 | 17 | For other installation options, see our CLI tools [download and install](https://www.ncbi.nlm.nih.gov/datasets/docs/download-and-install/) instructions. 18 | 19 | ## Use the NCBI Datasets command-line tools 20 | 21 | Use *datasets* to download biological sequence data across all domains of life from NCBI. 22 | 23 | Use *dataformat* to convert metadata included as part of the data package from JSON Lines format to other formats. 24 | 25 | ### Examples: 26 | Use *datasets* to download a genome data package for the human reference genome GRCh38: 27 | 28 | `datasets download genome taxon human --reference --filename human-reference.zip` 29 | 30 | Use *dataformat* to extract selected fields of metadata from the downloaded data package for the human reference genome, GRCh38: 31 | ``` 32 | dataformat tsv genome --package human-reference.zip --fields organism-name,assminfo-name,accession,assminfo-submitter 33 | Organism name Assembly Name Assembly Accession Assembly Submitter 34 | Homo sapiens GRCh38.p14 GCF_000001405.40 Genome Reference Consortium 35 | ``` 36 | 37 | The Datasets CLI schematic below also outlines the available commands for the *datasets* CLI. 38 | ![Datasets CLI schematic](https://www.ncbi.nlm.nih.gov/datasets/docs/v2/datasets_schema_taxonomy.png) 39 | 40 | ### Download large numbers of genomes 41 | 42 | Download large numbers of genomes by first downloading a dehydrated zip archive and then accessing the data in three steps. 43 | 44 | 1. Download the dehydrated zip archive 45 | 1. Unzip the downloaded zip archive 46 | 1. Rehydrate to access the data 47 | 48 | 49 | Try this example for the human reference genome: 50 | 51 | 1. Download the dehydrated zip archive: 52 | `datasets download genome accession GCF_000001405.40 --dehydrated --filename human_GRCh38_dataset.zip` 53 | 54 | 1. Unzip the downloaded zip archive: 55 | `unzip human_GRCh38_dataset.zip -d my_human_dataset` 56 | 57 | 1. Rehydrate to access the data: 58 | `datasets rehydrate --directory my_human_dataset/` 59 | 60 | For more information, see [how to download large genome data packages](https://www.ncbi.nlm.nih.gov/datasets/docs/how-tos/genomes/large-download/). 61 | 62 | ### Use your API key with the NCBI Datasets command-line tools 63 | NCBI Datasets API and command-line tool requests are rate-limited. By default, this rate limit is set at 5 requests per second (rps). By using your API key, you can increase this rate limit to 10 rps. For more information, see our documentation on [how to get an API key](https://www.ncbi.nlm.nih.gov/datasets/docs/v2/api/api-keys/#get-your-api-key) and [how to use your API key.](https://www.ncbi.nlm.nih.gov/datasets/docs/v2/api/api-keys/#use-your-api-key-with-the-ncbi-datasets-command-line-tools) 64 | 65 | ## NCBI Datasets data packages 66 | NCBI Datasets provides sequence, annotation, metadata and other biological data as [NCBI Datasets Data Package zip archives](https://www.ncbi.nlm.nih.gov/datasets/docs/v2/reference-docs/data-packages/). 67 | 68 | We currently offer four types of data package: 69 | 1. An [NCBI Datasets Gene Data Package](https://www.ncbi.nlm.nih.gov/datasets/docs/v2/reference-docs/data-packages/gene-package/) 70 | 1. An [NCBI Datasets Genome Data Package](https://www.ncbi.nlm.nih.gov/datasets/docs/v2/reference-docs/data-packages/genome/) 71 | 1. A specialized [NCBI Datasets Virus Data Package](https://www.ncbi.nlm.nih.gov/datasets/docs/v2/reference-docs/data-packages/virus-genome/). 72 | 1. An [NCBI Datasets Taxonomy Data Package](https://www.ncbi.nlm.nih.gov/datasets/docs/v2/reference-docs/data-packages/taxonomy/) 73 | 74 | ## NCBI Datasets data reports 75 | NCBI Datasets data packages include data report files that contain metadata about the requested records. [Data report schemas](https://www.ncbi.nlm.nih.gov/datasets/docs/reference-docs/data-reports/) describe each type of data report, including available fields, with descriptions and examples. 76 | 77 | ## Citing NCBI Datasets 78 | ### Exploring and retrieving sequence and metadata for species across the tree of life with NCBI Datasets 79 | 80 | O'Leary NA, Cox E, Holmes JB, Anderson WR, Falk R, Hem V, Tsuchiya MTN, Schuler GD, Zhang X, Torcivia J, Ketter A, Breen L, Cothran J, Bajwa H, Tinne J, Meric PA, Hlavina W, Schneider VA. [Exploring and retrieving sequence and metadata for species across the tree of life with NCBI Datasets.](https://www.nature.com/articles/s41597-024-03571-y) Sci Data. 2024 Jul 5;11(1):732. doi: 10.1038/s41597-024-03571-y. PMID: 38969627; PMCID: PMC11226681. 81 | -------------------------------------------------------------------------------- /client/apps/public/Datasets/v2/datasets/SummaryVirusGenomeAccession.go: -------------------------------------------------------------------------------- 1 | package datasets 2 | 3 | import ( 4 | _nethttp "net/http" 5 | 6 | "github.com/spf13/cobra" 7 | 8 | openapi "datasets/openapi/v2" 9 | cmdflags "datasets_cli/v2/datasets/flags" 10 | ) 11 | 12 | type VirusDatasetApi struct { 13 | virusApi *openapi.VirusAPIService 14 | } 15 | 16 | func (apiService *VirusDatasetApi) GetPage(request *openapi.V2VirusDataReportRequest) (*openapi.V2reportsVirusDataReportPage, *_nethttp.Response, error) { 17 | apiRequest := openapi.ApiVirusReportsByPostRequest{ 18 | ApiService: apiService.virusApi, 19 | } 20 | return apiRequest.V2VirusDataReportRequest(*request).Execute() 21 | 22 | } 23 | 24 | func (apiService *VirusDatasetApi) GetPagePtr(page openapi.V2reportsVirusDataReportPage) *openapi.V2reportsVirusDataReportPage { 25 | return &page 26 | } 27 | 28 | type VirusAnnotationApi struct { 29 | virusApi *openapi.VirusAPIService 30 | } 31 | 32 | func (apiService *VirusAnnotationApi) GetPage(request *openapi.V2VirusAnnotationReportRequest) (*openapi.V2reportsVirusAnnotationReportPage, *_nethttp.Response, error) { 33 | apiRequest := openapi.ApiVirusAnnotationReportsByPostRequest{ 34 | ApiService: apiService.virusApi, 35 | } 36 | return apiRequest.V2VirusAnnotationReportRequest(*request).Execute() 37 | } 38 | 39 | func (apiService *VirusAnnotationApi) GetPagePtr(page openapi.V2reportsVirusAnnotationReportPage) *openapi.V2reportsVirusAnnotationReportPage { 40 | return &page 41 | } 42 | 43 | func NewVirusGenomeAccessionRequestIterator(request *openapi.V2VirusDataReportRequest) *DefaultPagedRequestIterator[*openapi.V2VirusDataReportRequest, string] { 44 | accRequester := NewDefaultPagedRequestIterator[*openapi.V2VirusDataReportRequest, string](request, 45 | func(request *openapi.V2VirusDataReportRequest, accessions []string) { 46 | request.Filter.SetAccessions(accessions) 47 | }, 48 | ) 49 | 50 | if request.Filter.HasAccessions() && len(request.Filter.GetAccessions()) > PAGE_ITER_THRESHOLD { 51 | accRequester.ids = request.Filter.GetAccessions() 52 | request.Filter.SetAccessions([]string{}) 53 | } 54 | 55 | return accRequester 56 | } 57 | 58 | func NewVirusAnnotAccessionRequestIterator(request *openapi.V2VirusAnnotationReportRequest) *DefaultPagedRequestIterator[*openapi.V2VirusAnnotationReportRequest, string] { 59 | accRequester := NewDefaultPagedRequestIterator[*openapi.V2VirusAnnotationReportRequest, string](request, 60 | func(request *openapi.V2VirusAnnotationReportRequest, accessions []string) { 61 | request.Filter.SetAccessions(accessions) 62 | }, 63 | ) 64 | 65 | if request.Filter.HasAccessions() && len(request.Filter.GetAccessions()) > PAGE_ITER_THRESHOLD { 66 | accRequester.ids = request.Filter.GetAccessions() 67 | request.Filter.SetAccessions([]string{}) 68 | } 69 | 70 | return accRequester 71 | } 72 | 73 | func executeSummaryVirusGenomeCmd(taxons []string, svf SummaryVirusFlag, iff *cmdflags.InputFileFlag) error { 74 | 75 | var accs []string 76 | if iff != nil { 77 | accs = iff.InputIDArgs 78 | } 79 | 80 | cli, cliErr := createOAClient() 81 | if cliErr != nil { 82 | return cliErr 83 | } 84 | 85 | if cmdflags.ArgsVirusSummaryReportMode == cmdflags.ANNOTATION { 86 | api := VirusAnnotationApi{virusApi: cli.VirusAPI} 87 | pagePrinter := NewPagePrinter[openapi.V2reportsVirusAnnotationReport, *openapi.V2reportsVirusAnnotationReportPage]( 88 | "virus", 89 | svf.jsonLinesLimitFlag.JsonLines(), 90 | ) 91 | request := svf.PrepareAnnotationReportRequest(accs, taxons) 92 | _, err := ProcessPages[*openapi.V2VirusAnnotationReportRequest, 93 | openapi.V2reportsVirusAnnotationReportPage, 94 | openapi.V2reportsVirusAnnotationReport, 95 | *openapi.V2reportsVirusAnnotationReportPage](NewVirusAnnotAccessionRequestIterator(request), &api, &pagePrinter, svf.jsonLinesLimitFlag.RetrievalCount(), svf.jsonLinesLimitFlag.CountOnly()) 96 | return err 97 | } else { 98 | api := VirusDatasetApi{virusApi: cli.VirusAPI} 99 | pagePrinter := NewPagePrinter[openapi.V2reportsVirusAssembly, *openapi.V2reportsVirusDataReportPage]( 100 | "virus", 101 | svf.jsonLinesLimitFlag.JsonLines(), 102 | ) 103 | request := svf.PrepareDatasetReportRequest(accs, taxons) 104 | _, err := ProcessPages[*openapi.V2VirusDataReportRequest, 105 | openapi.V2reportsVirusDataReportPage, 106 | openapi.V2reportsVirusAssembly, 107 | *openapi.V2reportsVirusDataReportPage](NewVirusGenomeAccessionRequestIterator(request), &api, &pagePrinter, svf.jsonLinesLimitFlag.RetrievalCount(), svf.jsonLinesLimitFlag.CountOnly()) 108 | return err 109 | } 110 | 111 | } 112 | 113 | func initInputFlagSummaryVirusAccession() (iff *cmdflags.InputFileFlag, flagSets []cmdflags.FlagInterface) { 114 | iff = cmdflags.NewInputFileFlag(cmdflags.InputFileListTypeVirusAcc, cmdflags.AsIntegerFalse) 115 | flagSets = []cmdflags.FlagInterface{iff} 116 | 117 | return iff, flagSets 118 | } 119 | 120 | func createSummaryVirusGenomeAccCmd(vsf SummaryVirusFlag) *cobra.Command { 121 | iff, flagSets := initInputFlagSummaryVirusAccession() 122 | cmd := &cobra.Command{ 123 | Use: "accession", 124 | Short: "Print a data report containing virus genome metadata by accession", 125 | Long: ` 126 | Print a data report containing virus genome metadata by nucleotide accession. The data report is returned in JSON format. 127 | 128 | Refer to NCBI's [download and install](https://www.ncbi.nlm.nih.gov/datasets/docs/v2/download-and-install/) documentation for information about getting started with the command-line tools.`, 129 | Example: " datasets summary virus genome accession NC_045512.2", 130 | PreRunE: cmdflags.ExecutePreRunEFor(flagSets), 131 | RunE: func(cmd *cobra.Command, args []string) (err error) { 132 | 133 | var NoTaxons []string 134 | err = executeSummaryVirusGenomeCmd(NoTaxons, vsf, iff) 135 | 136 | return err 137 | }, 138 | } 139 | cmdflags.RegisterAllFlags(flagSets, cmd.Flags()) 140 | return cmd 141 | } 142 | -------------------------------------------------------------------------------- /client/apps/public/Datasets/v2/datasets/TaxonomyDownloader.go: -------------------------------------------------------------------------------- 1 | package datasets 2 | 3 | import ( 4 | openapi "datasets/openapi/v2" 5 | cmdflags "datasets_cli/v2/datasets/flags" 6 | _nethttp "net/http" 7 | 8 | "context" 9 | "fmt" 10 | "strconv" 11 | ) 12 | 13 | type TaxonomyBase struct { 14 | cli *openapi.APIClient 15 | } 16 | 17 | type TaxonomyDownloader struct { 18 | TaxonomyBase 19 | request *openapi.V2TaxonomyDatasetRequest 20 | } 21 | 22 | type taxonRelatedIdsApi struct { 23 | taxonApi *openapi.TaxonomyAPIService 24 | } 25 | 26 | func (apiService *taxonRelatedIdsApi) GetPage(request *openapi.V2TaxonomyRelatedIdRequest) (*openapi.V2reportsTaxonomyDataReportPage, *_nethttp.Response, error) { 27 | apiRequest := openapi.ApiTaxonomyRelatedIdsPostRequest{ 28 | ApiService: apiService.taxonApi, 29 | } 30 | reportPage := openapi.NewV2reportsTaxonomyDataReportPage() 31 | taxIdsPage, http_resp, err := apiRequest.V2TaxonomyRelatedIdRequest(*request).Execute() 32 | if err == nil { 33 | // Switch page result types since the simpler TaxonomyTaxIdsPage does not have the fields required by the ProcessPages api 34 | taxCount := len(taxIdsPage.GetTaxIds()) 35 | reportPage.SetNextPageToken(taxIdsPage.GetNextPageToken()) 36 | reportPage.SetTotalCount(int32(taxCount)) 37 | // Yes, golang requires a two-step assignment here 38 | reports := make([]openapi.V2reportsTaxonomyReportMatch, taxCount) 39 | reportPage.SetReports(reports) 40 | for i := 0; i < taxCount; i++ { 41 | reportMatch := openapi.NewV2reportsTaxonomyReportMatch() 42 | reportMatch.SetTaxonomy(*openapi.NewV2reportsTaxonomyNode()) 43 | reportMatch.Taxonomy.SetTaxId(taxIdsPage.GetTaxIds()[i]) 44 | reportPage.Reports[i] = *reportMatch 45 | } 46 | } 47 | return reportPage, http_resp, err 48 | } 49 | 50 | func (apiService *taxonRelatedIdsApi) GetPagePtr(page openapi.V2reportsTaxonomyDataReportPage) *openapi.V2reportsTaxonomyDataReportPage { 51 | return &page 52 | } 53 | 54 | func (td *TaxonomyBase) getFilteredTaxids(taxIds []int32, taxidRetriever *TaxonomyIdRetriever) error { 55 | request := openapi.NewV2TaxonomyMetadataRequest() 56 | request.SetTaxons(GetTaxonsFromTaxIds(taxIds)) 57 | request.SetRanks(taxidRetriever.GetRanks()) 58 | // Just gathering taxids here - minimize amount returned for performance 59 | request.SetReturnedContent(openapi.V2TAXONOMYMETADATAREQUESTCONTENTTYPE_TAXIDS) 60 | 61 | api := TaxonApi{taxonApi: td.cli.TaxonomyAPI} 62 | 63 | _, err := ProcessAllPages[*openapi.V2TaxonomyMetadataRequest, 64 | openapi.V2reportsTaxonomyDataReportPage, 65 | openapi.V2reportsTaxonomyReportMatch, 66 | *openapi.V2reportsTaxonomyDataReportPage](NewDefaultRequestIterator(request), &api, taxidRetriever) 67 | 68 | return err 69 | } 70 | 71 | func (td *TaxonomyBase) getChildTaxIds(taxId int32, taxidRetriever *TaxonomyIdRetriever, getParents bool, getChildren bool) error { 72 | related_ids_request := openapi.NewV2TaxonomyRelatedIdRequest() 73 | related_ids_request.SetTaxId(taxId) 74 | related_ids_request.SetIncludeSubtree(getChildren) 75 | related_ids_request.SetIncludeLineage(getParents) 76 | related_ids_request.SetRanks(taxidRetriever.GetRanks()) 77 | 78 | api := taxonRelatedIdsApi{taxonApi: td.cli.TaxonomyAPI} 79 | _, err := ProcessAllPages[*openapi.V2TaxonomyRelatedIdRequest, 80 | openapi.V2reportsTaxonomyDataReportPage, 81 | openapi.V2reportsTaxonomyReportMatch, 82 | *openapi.V2reportsTaxonomyDataReportPage](NewDefaultRequestIterator(related_ids_request), &api, taxidRetriever) 83 | 84 | return err 85 | } 86 | 87 | func GetTaxonsFromTaxIds(taxIds []int32) []string { 88 | taxons := make([]string, len(taxIds)) 89 | for idx, taxId := range taxIds { 90 | taxons[idx] = strconv.Itoa(int(taxId)) 91 | } 92 | 93 | return taxons 94 | } 95 | 96 | func (td *TaxonomyBase) retrieveChildAndParentTaxIds(taxIds []int32, ranks []openapi.V2reportsRankType, getParents bool, getChildren bool) (error, []int32) { 97 | // taxid(s) passed in may not be of requested rank, but if lineage or children are requested, we still get 98 | // the parents/children and test Those against the rank(s) 99 | 100 | // We only need to filter taxids here if ranks is provided (there are no other filters) 101 | taxidRetriever := NewTaxonomyIdRetriever() 102 | if len(ranks) > 0 { 103 | taxidRetriever.SetRanks(ranks) 104 | err := td.getFilteredTaxids(taxIds, &taxidRetriever) 105 | if err != nil { 106 | return err, taxidRetriever.GetTaxIds() 107 | } 108 | } else { 109 | taxidRetriever.AddTaxIds(taxIds) 110 | } 111 | 112 | // Get all children. Note the caller should have already checked that taxIds had length==1 113 | if getChildren || getParents { 114 | err := td.getChildTaxIds(taxIds[0], &taxidRetriever, getParents, getChildren) 115 | if err != nil { 116 | return err, taxidRetriever.GetTaxIds() 117 | } 118 | } 119 | 120 | return nil, taxidRetriever.GetTaxIds() 121 | } 122 | 123 | func (td *TaxonomyDownloader) setAllTaxidsForRequest(taxIds []int32, downloadFlags DownloadTaxonomyFlag) error { 124 | td.request.SetAuxReports([]openapi.V2TaxonomyDatasetRequestTaxonomyReportType{openapi.V2TAXONOMYDATASETREQUESTTAXONOMYREPORTTYPE_TAXONOMY_SUMMARY}) 125 | for _, rpt := range downloadFlags.rptFlag.IncludeReports { 126 | td.request.SetAuxReports(append(td.request.GetAuxReports(), cmdflags.TaxonomyDownloadIncludeFlagOpenapi[rpt])) 127 | } 128 | 129 | // If the user supplied ranks, the retrieval of children and parents is automatically set to true - 130 | // otherwise it would just be filtering the command line taxons against the rank which would have little value 131 | if len(downloadFlags.rankFlag.GetRanks()) > 0 { 132 | downloadFlags.childrenFlag.SetChildren(true) 133 | downloadFlags.parentsFlag.SetParents(true) 134 | } 135 | 136 | err, taxIds := td.retrieveChildAndParentTaxIds(taxIds, downloadFlags.rankFlag.GetRanks(), downloadFlags.parentsFlag.GetParents(), downloadFlags.childrenFlag.GetChildren()) 137 | if err != nil { 138 | return err 139 | } 140 | td.request.SetTaxIds(taxIds) 141 | 142 | return nil 143 | } 144 | 145 | func NewTaxonomyDownloader(taxIds []int32, downloadFlags DownloadTaxonomyFlag) (*TaxonomyDownloader, string, error) { 146 | cli, err := createOAClient() 147 | if err != nil { 148 | return nil, "", err 149 | } 150 | td := &TaxonomyDownloader{ 151 | TaxonomyBase: TaxonomyBase{ 152 | cli: cli, 153 | }, 154 | request: openapi.NewV2TaxonomyDatasetRequest(), 155 | } 156 | optErr := td.setAllTaxidsForRequest(taxIds, downloadFlags) 157 | if optErr != nil { 158 | return nil, "", optErr 159 | } 160 | if len(td.request.GetTaxIds()) == 0 { 161 | return nil, "", fmt.Errorf("No taxons found that match selection\n") 162 | } 163 | 164 | return td, "", nil 165 | } 166 | 167 | func (td *TaxonomyDownloader) Download(argSkipZipVal bool) (err error) { 168 | _, resp, err := td.cli.TaxonomyAPI.DownloadTaxonomyPackageByPost(context.TODO()).V2TaxonomyDatasetRequest(*td.request).Execute() 169 | if err = handleHTTPResponse(resp, err); err != nil { 170 | return 171 | } 172 | length := int64(-1) // unknown length 173 | err = downloadDataForFile(resp, err, argDownloadFilename, length, argSkipZipVal) 174 | return 175 | } 176 | -------------------------------------------------------------------------------- /client/apps/public/Datasets/v2/datasets/Download.go: -------------------------------------------------------------------------------- 1 | package datasets 2 | 3 | import ( 4 | "archive/zip" 5 | "encoding/json" 6 | "errors" 7 | "fmt" 8 | "io" 9 | "io/ioutil" 10 | _nethttp "net/http" 11 | "os" 12 | 13 | openapi "datasets/openapi/v2" 14 | "github.com/gosuri/uiprogress" 15 | "github.com/spf13/afero" 16 | "github.com/spf13/cobra" 17 | ) 18 | 19 | var ( 20 | argDownloadFilename string 21 | argJsonInputFilename string 22 | ) 23 | 24 | func downloadDataForFile(resp *_nethttp.Response, inError error, filename string, length int64, argSkipZipVal bool) (err error) { 25 | f, e := afs.Create(filename) 26 | if e != nil { 27 | err = fmt.Errorf("'%s' opening output file: %s", e, filename) 28 | return 29 | } 30 | defer f.Close() 31 | return downloadData(&f, resp, err, filename, length, argSkipZipVal) 32 | } 33 | 34 | func downloadData(f *afero.File, resp *_nethttp.Response, inError error, filename string, length int64, argSkipZipVal bool) (err error) { 35 | if inError != nil { 36 | err = fmt.Errorf("Error connecting to service: %s", inError) 37 | return 38 | } 39 | if resp.StatusCode == 200 { 40 | defer resp.Body.Close() 41 | progressBar := ©ProgressBar{} 42 | progressBar.filename = filename 43 | if _, e := progressBar.Copy(*f, resp.Body); e != nil { 44 | progressBar.status = "error" 45 | err = fmt.Errorf("Download error: %s", e) 46 | return 47 | } 48 | if !isValidZip(filename, argSkipZipVal) { 49 | afs.Remove(filename) //nolint:errcheck 50 | err = errors.New("Internal error (invalid zip archive). Please try again") 51 | if !argNoProgress { 52 | progressBar.status = "invalid zip archive" 53 | } 54 | return 55 | } 56 | if !argNoProgress { 57 | if argSkipZipVal { 58 | progressBar.status = "valid zip structure -- files not checked" 59 | } else { 60 | progressBar.status = "valid data package" 61 | } 62 | } 63 | } else if resp.StatusCode == 404 { 64 | err = fmt.Errorf("request does not match any items in our database") 65 | } else if resp.StatusCode == 429 { 66 | msg := ` 67 | Selected items are too large for direct download. Please add '--dehydrated' to the 68 | command to download a zip archive with links to the required data. The full dataset can then 69 | be retrieved by unzipping that file and then executing the command: 70 | datasets rehydrate ncbi_dataset` 71 | err = errors.New(msg) 72 | } else { 73 | err = fmt.Errorf("Unexpected Error: %s", resp.Status) 74 | } 75 | return 76 | } 77 | 78 | // downloadCmd represents the download command 79 | var downloadCmd = &cobra.Command{ 80 | Use: "download", 81 | Short: "Download a gene, genome or virus dataset as a zip file", 82 | Long: ` 83 | Download genome, gene and virus data packages, including sequence, annotation, and metadata, as a zip file. 84 | 85 | Refer to NCBI's [download and install](https://www.ncbi.nlm.nih.gov/datasets/docs/v2/download-and-install/) documentation for information about getting started with the command-line tools.`, 86 | Example: ` datasets download genome accession GCF_000001405.40 --chromosomes X,Y --include cds,genome,protein,seq-report,gbff,gff3 87 | datasets download genome taxon "bos taurus" --reference 88 | datasets download gene gene-id 672 89 | datasets download gene symbol brca1 --taxon "mus musculus" 90 | datasets download gene accession NP_000483.3 91 | datasets download taxonomy taxon human,sars-cov-2 92 | datasets download virus genome taxon sars-cov-2 --host dog 93 | datasets download virus protein S --host dog --filename SARS2-spike-dog.zip`, 94 | Args: cobra.NoArgs, 95 | PersistentPreRunE: func(cmd *cobra.Command, args []string) (err error) { 96 | if !argNoProgress { 97 | progress.Start() 98 | } 99 | return nil 100 | }, 101 | PersistentPostRun: func(cmd *cobra.Command, args []string) { 102 | if !argNoProgress { 103 | progress.Stop() 104 | } 105 | }, 106 | RunE: func(cmd *cobra.Command, args []string) error { 107 | if argJsonInputFilename == "" { 108 | return errors.New("Must provide a valid json input file") 109 | } 110 | 111 | content, err := ioutil.ReadFile(argJsonInputFilename) 112 | if err != nil { 113 | return fmt.Errorf("Opening json request file: %s: \"%s\"", argJsonInputFilename, err) 114 | } 115 | 116 | // Convert json string to request structure 117 | req := openapi.NewV2DatasetRequest() 118 | err = json.Unmarshal([]byte(content), &req) 119 | if err != nil { 120 | return fmt.Errorf("Parsing JSON request file %s: \"%s\"", argJsonInputFilename, err) 121 | } 122 | 123 | if req.GenomeV2 != nil { 124 | downloader, err := NewGenomeRequestDownloader(req.GenomeV2, initAssemblyRequestFlag()) 125 | if err != nil { 126 | return err 127 | } 128 | return downloader.Download(false) 129 | } else if req.GeneV2 != nil { 130 | downloader, err := NewGeneRequestDownloader(req.GeneV2) 131 | if err != nil { 132 | return err 133 | } 134 | return downloader.Download(false) 135 | } else { 136 | return errors.New("request did not have a valid gene or genome request object") 137 | } 138 | }, 139 | } 140 | 141 | func isValidZip(filename string, argSkipZipVal bool) bool { 142 | var progressBar *uiprogress.Bar 143 | progressStatus := "Validating package" 144 | if !argNoProgress { 145 | progressBar = progress.AddBar(1) 146 | progressBar.Width = 2 147 | progressBar.PrependFunc(func(b *uiprogress.Bar) string { return progressStatus }) 148 | } 149 | fileInfo, err := afs.Stat(filename) 150 | if err != nil { 151 | return false 152 | } 153 | 154 | file, err := afs.OpenFile(filename, os.O_RDONLY, os.FileMode(0644)) 155 | if err != nil { 156 | return false 157 | } 158 | 159 | zipfile, err := zip.NewReader(file, fileInfo.Size()) 160 | if err != nil { 161 | return false 162 | } 163 | 164 | if !argNoProgress && progressBar != nil && len(zipfile.File) > 0 { 165 | progressBar.Total = len(zipfile.File) 166 | progressBar.Width = 50 167 | progressBar.AppendCompleted() 168 | progressBar.AppendFunc(func(b *uiprogress.Bar) string { return fmt.Sprintf("%d/%d", b.Current(), b.Total) }) 169 | } 170 | for _, zippedfile := range zipfile.File { 171 | // fmt.Printf("Name=%s, size=%d, crc=%d\n", zippedfile.Name, zippedfile.UncompressedSize64, zippedfile.CRC32) 172 | r, err := zippedfile.Open() 173 | if err != nil { 174 | return false 175 | } 176 | if !argSkipZipVal { 177 | progressStatus = "Validating package files" 178 | if _, err := io.Copy(io.Discard, r); err != nil { 179 | return false 180 | } 181 | } 182 | if !argNoProgress && progressBar != nil { 183 | progressBar.Incr() 184 | } 185 | } 186 | 187 | return true 188 | } 189 | 190 | func init() { 191 | downloadCmd.AddCommand(createGeneCmd()) 192 | downloadCmd.AddCommand(createGenomeCmd()) 193 | downloadCmd.AddCommand(createTaxonomyCmd()) 194 | downloadCmd.AddCommand(createVirusCmd()) 195 | 196 | pflags := downloadCmd.PersistentFlags() 197 | lflags := downloadCmd.Flags() 198 | pflags.StringVar(&argDownloadFilename, "filename", "ncbi_dataset.zip", "Specify a custom file name for the downloaded data package") 199 | pflags.BoolVar(&argNoProgress, "no-progressbar", false, "Hide progress bar") 200 | lflags.StringVar(&argJsonInputFilename, "input-json", "", "a file that contains a valid json request object for genome or gene queries") 201 | if err := lflags.MarkHidden("input-json"); err != nil { 202 | defaultLogger.Fatalln("Invalid attempt to create hidden flag") 203 | } 204 | } 205 | --------------------------------------------------------------------------------