├── .Rbuildignore ├── .gitattributes ├── .github ├── .gitignore ├── CONTRIBUTING.md ├── issue_template.md ├── pull_request_template.md └── workflows │ ├── R-CMD-check.yaml │ ├── cran-checks.yaml │ ├── flir.yaml │ ├── recheck.yml │ ├── rhub.yaml │ └── test-coverage.yaml ├── .gitignore ├── CODE_OF_CONDUCT.md ├── DESCRIPTION ├── LICENSE ├── NAMESPACE ├── NEWS.md ├── R ├── GSODR-package.R ├── get_GSOD.R ├── get_inventory.R ├── get_isd_history.R ├── get_updates.R ├── globals.R ├── internal_functions.R ├── nearest_stations.R ├── process_csv.R ├── reformat_GSOD.R ├── update_station_list.R └── zzz.R ├── README.Rmd ├── README.md ├── air.toml ├── codecov.yml ├── codemeta.json ├── cran-comments.md ├── data-raw ├── README.md ├── fetch_isd-history.Rmd └── fetch_isd-history.md ├── flir ├── config.yml └── rules │ └── builtin │ ├── T_and_F_symbol.yml │ ├── absolute_path.yml │ ├── any_duplicated.yml │ ├── any_is_na.yml │ ├── class_equals.yml │ ├── condition_message.yml │ ├── double_assignment.yml │ ├── duplicate_argument.yml │ ├── empty_assignment.yml │ ├── equal_assignment.yml │ ├── equals_na.yml │ ├── expect_comparison.yml │ ├── expect_identical.yml │ ├── expect_length.yml │ ├── expect_named.yml │ ├── expect_not.yml │ ├── expect_null.yml │ ├── expect_true_false.yml │ ├── expect_type.yml │ ├── for_loop_index.yml │ ├── function_return.yml │ ├── implicit_assignment.yml │ ├── is_numeric.yml │ ├── length_levels.yml │ ├── length_test.yml │ ├── lengths.yml │ ├── library_call.yml │ ├── list_comparison.yml │ ├── literal_coercion.yml │ ├── matrix_apply.yml │ ├── missing_argument.yml │ ├── nested_ifelse.yml │ ├── numeric_leading_zero.yml │ ├── outer_negation.yml │ ├── package_hooks.yml │ ├── paste.yml │ ├── redundant_equals.yml │ ├── redundant_ifelse.yml │ ├── rep_len.yml │ ├── right_assignment.yml │ ├── sample_int.yml │ ├── semicolon.yml │ ├── seq.yml │ ├── sort.yml │ ├── stopifnot_all.yml │ ├── todo_comment.yml │ ├── undesirable_function.yml │ ├── undesirable_operator.yml │ ├── unnecessary_nesting.yml │ ├── unreachable_code.yml │ └── which_grepl.yml ├── inst ├── CITATION ├── WORDLIST ├── extdata │ ├── isd_diff.rda │ └── isd_history.rda ├── paper │ ├── paper.bib │ ├── paper.md │ └── paper.pdf └── vector │ └── GSODR-hex.svg ├── man ├── GSODR-package.Rd ├── dot-agroclimatology_list.Rd ├── dot-apply_process_csv.Rd ├── dot-check_url_exists.Rd ├── dot-download_files.Rd ├── dot-subset_country_list.Rd ├── dot-untar_files.Rd ├── dot-validate_country.Rd ├── dot-validate_missing_days.Rd ├── dot-validate_station_data_years.Rd ├── dot-validate_station_id.Rd ├── dot-validate_years.Rd ├── figures │ └── logo.png ├── get_GSOD.Rd ├── get_inventory.Rd ├── get_isd_history.Rd ├── get_updates.Rd ├── nearest_stations.Rd ├── print.GSODR.Info.Rd ├── reformat_GSOD.Rd └── update_station_list.Rd ├── tests ├── spelling.R ├── testthat.R └── testthat │ ├── test-get_GSOD.R │ ├── test-get_inventory.R │ ├── test-get_updates.R │ ├── test-nearest_stations.R │ ├── test-reformat_GSOD.R │ └── test-update_station_list.R └── vignettes ├── Ex5-1.png ├── GSODR.Rmd ├── GSODR.Rmd.orig ├── GSOD_Station_locations-1.png ├── precompile.R └── references.bib /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^CRAN-RELEASE$ 2 | ^\.github$ 3 | ^.*\.Rproj$ 4 | ^\.Rproj\.user$ 5 | ^cran-comments\.md$ 6 | ^revdep$ 7 | ^data-raw$ 8 | ^README\.Rmd$ 9 | ^README-.*\.png$ 10 | ^paper$ 11 | vignettes/figure 12 | ^codecov\.yml$ 13 | ^docs$ 14 | ^codemeta\.json$ 15 | ^_pkgdown\.yml$ 16 | ^doc$ 17 | ^Meta$ 18 | ^\.ccache$ 19 | ^CODE_OF_CONDUCT\.md$ 20 | ^CRAN-SUBMISSION$ 21 | ^air\.toml$ 22 | ^flir$ 23 | ^man/dot-validate_years\.Rd$ 24 | ^man/dot-validate_station_id\.Rd$ 25 | ^man/dot-validate_station_data_years\.Rd$ 26 | ^man/dot-validate_country\.Rd$ 27 | ^man/dot-validate_missing_days\.Rd$ 28 | ^man/dot-download_files\.Rd$ 29 | ^man/dot-agroclimatology_list\.Rd$ 30 | ^man/dot-subset_country_list\.Rd$ 31 | ^man/dot-apply_process_csv\.Rd$ 32 | ^man/dot-check_url_exists\.Rd$ 33 | ^man/dot-untar_files\.Rd$ 34 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | * text=auto 2 | data/* binary 3 | src/* text=lf 4 | R/* text=lf 5 | -------------------------------------------------------------------------------- /.github/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | -------------------------------------------------------------------------------- /.github/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # CONTRIBUTING 2 | 3 | ### Fixing typos 4 | 5 | Small typos or grammatical errors in documentation may be edited directly using 6 | the GitHub web interface, so long as the changes are made in the _source_ file. 7 | 8 | - YES: you edit a roxygen comment in a `.R` file below `R/`. 9 | - NO: you edit an `.Rd` file below `man/`. 10 | 11 | ### Prerequisites 12 | 13 | Before you make a substantial pull request, you should always file an issue and 14 | make sure someone from the team agrees that it’s a problem. If you’ve found a 15 | bug, create an associated issue and illustrate the bug with a minimal 16 | [reprex](https://www.tidyverse.org/help/#reprex). 17 | 18 | ### Pull request process 19 | 20 | - We recommend that you create a Git branch for each pull request (PR). 21 | - Look at the Travis and AppVeyor build status before and after making changes. 22 | The `README` should contain badges for any continuous integration services used 23 | by the package. 24 | - We recommend the tidyverse [style guide](http://style.tidyverse.org). 25 | You can use the [styler](https://CRAN.R-project.org/package=styler) package or 26 | the [Air formatter](https://posit-dev.github.io/air/formatter.html) to apply 27 | these styles, but please don't restyle code that has nothing to do with your PR. 28 | - We use [roxygen2](https://cran.r-project.org/package=roxygen2). 29 | - We use [testthat](https://cran.r-project.org/package=testthat). Contributions 30 | with test cases included are easier to accept. 31 | - For user-facing changes, add a bullet to the top of `NEWS.md` below the 32 | current development version header describing the changes made followed by your 33 | GitHub username, and links to relevant issue(s)/PR(s). 34 | 35 | ### Code of Conduct 36 | 37 | Please note that the nasapower project is released with a 38 | [Contributor Code of Conduct](CODE_OF_CONDUCT.md). By contributing to this 39 | project you agree to abide by its terms. 40 | 41 | ### See rOpenSci [contributing guide](https://ropensci.github.io/dev_guide/contributingguide.html) 42 | 43 | for further details. 44 | 45 | ### Discussion forum 46 | 47 | Check out our [discussion forum](https://discuss.ropensci.org) if you think your issue requires a longer form discussion. 48 | 49 | ### Prefer to Email? 50 | 51 | Email the person listed as maintainer in the `DESCRIPTION` file of this repo. 52 | 53 | Though note that private discussions over email don't help others - of course email is totally warranted if it's a sensitive problem of any kind. 54 | 55 | ### Thanks for contributing 56 | 57 | This contributing guide is adapted from the tidyverse contributing guide available at 58 | -------------------------------------------------------------------------------- /.github/issue_template.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 |
Session Info 6 | 7 | ```r 8 | 9 | ``` 10 |
11 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | ## Description 8 | 9 | 10 | ## Related Issue 11 | 14 | 15 | ## Example 16 | 18 | 19 | 21 | -------------------------------------------------------------------------------- /.github/workflows/R-CMD-check.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | pull_request: 7 | branches: [main, master] 8 | 9 | name: R-CMD-check.yaml 10 | 11 | permissions: read-all 12 | 13 | jobs: 14 | R-CMD-check: 15 | runs-on: ${{ matrix.config.os }} 16 | 17 | name: ${{ matrix.config.os }} (${{ matrix.config.r }}) 18 | 19 | strategy: 20 | fail-fast: false 21 | matrix: 22 | config: 23 | - {os: macos-latest, r: 'release'} 24 | - {os: windows-latest, r: 'release'} 25 | - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} 26 | - {os: ubuntu-latest, r: 'release'} 27 | - {os: ubuntu-latest, r: 'oldrel-1'} 28 | 29 | env: 30 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 31 | R_KEEP_PKG_SOURCE: yes 32 | 33 | steps: 34 | - uses: actions/checkout@v4 35 | 36 | - uses: r-lib/actions/setup-pandoc@v2 37 | 38 | - uses: r-lib/actions/setup-r@v2 39 | with: 40 | r-version: ${{ matrix.config.r }} 41 | http-user-agent: ${{ matrix.config.http-user-agent }} 42 | use-public-rspm: true 43 | 44 | - uses: r-lib/actions/setup-r-dependencies@v2 45 | with: 46 | extra-packages: any::rcmdcheck 47 | needs: check 48 | 49 | - uses: r-lib/actions/check-r-package@v2 50 | with: 51 | upload-snapshots: true 52 | build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")' 53 | -------------------------------------------------------------------------------- /.github/workflows/cran-checks.yaml: -------------------------------------------------------------------------------- 1 | name: Check CRAN status 2 | 3 | on: 4 | schedule: 5 | # Runs daily at 4:00 PM UTC (9:00 AM PST) 6 | - cron: '0 16 * * *' 7 | # allows for manually running of the check 8 | workflow_dispatch: 9 | 10 | jobs: 11 | check_cran_status: 12 | runs-on: ubuntu-latest 13 | 14 | steps: 15 | - name: Get CRAN checks 16 | uses: ricochet-rs/cran-checks/check-pkg@main 17 | with: 18 | pkg: GSODR 19 | -------------------------------------------------------------------------------- /.github/workflows/flir.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | pull_request: 7 | branches: [main, master] 8 | release: 9 | types: [published] 10 | workflow_dispatch: 11 | 12 | name: flir 13 | 14 | jobs: 15 | flir: 16 | runs-on: macOS-latest 17 | # Only restrict concurrency for non-PR jobs 18 | concurrency: 19 | group: flir-${{ github.event_name != 'pull_request' || github.run_id }} 20 | env: 21 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 22 | permissions: 23 | contents: write 24 | steps: 25 | - uses: actions/checkout@v4 26 | 27 | - uses: r-lib/actions/setup-r@v2 28 | 29 | - name: Install flir 30 | run: install.packages("flir", repos = c("https://etiennebacher.r-universe.dev/", getOption("repos"))) 31 | shell: Rscript {0} 32 | 33 | - name: Run flir 34 | run: flir::lint() 35 | shell: Rscript {0} 36 | env: 37 | FLIR_ERROR_ON_LINT: true 38 | -------------------------------------------------------------------------------- /.github/workflows/recheck.yml: -------------------------------------------------------------------------------- 1 | on: 2 | workflow_dispatch: 3 | inputs: 4 | which: 5 | type: choice 6 | description: Which dependents to check 7 | options: 8 | - strong 9 | - most 10 | 11 | name: Reverse dependency check 12 | 13 | jobs: 14 | revdep_check: 15 | name: Reverse check ${{ inputs.which }} dependents 16 | uses: r-devel/recheck/.github/workflows/recheck.yml@v1 17 | with: 18 | which: ${{ inputs.which }} 19 | -------------------------------------------------------------------------------- /.github/workflows/rhub.yaml: -------------------------------------------------------------------------------- 1 | # R-hub's generic GitHub Actions workflow file. It's canonical location is at 2 | # https://github.com/r-hub/actions/blob/v1/workflows/rhub.yaml 3 | # You can update this file to a newer version using the rhub2 package: 4 | # 5 | # rhub::rhub_setup() 6 | # 7 | # It is unlikely that you need to modify this file manually. 8 | 9 | name: R-hub 10 | run-name: "${{ github.event.inputs.id }}: ${{ github.event.inputs.name || format('Manually run by {0}', github.triggering_actor) }}" 11 | 12 | on: 13 | workflow_dispatch: 14 | inputs: 15 | config: 16 | description: 'A comma separated list of R-hub platforms to use.' 17 | type: string 18 | default: 'linux,windows,macos' 19 | name: 20 | description: 'Run name. You can leave this empty now.' 21 | type: string 22 | id: 23 | description: 'Unique ID. You can leave this empty now.' 24 | type: string 25 | 26 | jobs: 27 | 28 | setup: 29 | runs-on: ubuntu-latest 30 | outputs: 31 | containers: ${{ steps.rhub-setup.outputs.containers }} 32 | platforms: ${{ steps.rhub-setup.outputs.platforms }} 33 | 34 | steps: 35 | # NO NEED TO CHECKOUT HERE 36 | - uses: r-hub/actions/setup@v1 37 | with: 38 | config: ${{ github.event.inputs.config }} 39 | id: rhub-setup 40 | 41 | linux-containers: 42 | needs: setup 43 | if: ${{ needs.setup.outputs.containers != '[]' }} 44 | runs-on: ubuntu-latest 45 | name: ${{ matrix.config.label }} 46 | strategy: 47 | fail-fast: false 48 | matrix: 49 | config: ${{ fromJson(needs.setup.outputs.containers) }} 50 | container: 51 | image: ${{ matrix.config.container }} 52 | 53 | steps: 54 | - uses: r-hub/actions/checkout@v1 55 | - uses: r-hub/actions/platform-info@v1 56 | with: 57 | token: ${{ secrets.RHUB_TOKEN }} 58 | job-config: ${{ matrix.config.job-config }} 59 | - uses: r-hub/actions/setup-deps@v1 60 | with: 61 | token: ${{ secrets.RHUB_TOKEN }} 62 | job-config: ${{ matrix.config.job-config }} 63 | - uses: r-hub/actions/run-check@v1 64 | with: 65 | token: ${{ secrets.RHUB_TOKEN }} 66 | job-config: ${{ matrix.config.job-config }} 67 | 68 | other-platforms: 69 | needs: setup 70 | if: ${{ needs.setup.outputs.platforms != '[]' }} 71 | runs-on: ${{ matrix.config.os }} 72 | name: ${{ matrix.config.label }} 73 | strategy: 74 | fail-fast: false 75 | matrix: 76 | config: ${{ fromJson(needs.setup.outputs.platforms) }} 77 | 78 | steps: 79 | - uses: r-hub/actions/checkout@v1 80 | - uses: r-hub/actions/setup-r@v1 81 | with: 82 | job-config: ${{ matrix.config.job-config }} 83 | token: ${{ secrets.RHUB_TOKEN }} 84 | - uses: r-hub/actions/platform-info@v1 85 | with: 86 | token: ${{ secrets.RHUB_TOKEN }} 87 | job-config: ${{ matrix.config.job-config }} 88 | - uses: r-hub/actions/setup-deps@v1 89 | with: 90 | job-config: ${{ matrix.config.job-config }} 91 | token: ${{ secrets.RHUB_TOKEN }} 92 | - uses: r-hub/actions/run-check@v1 93 | with: 94 | job-config: ${{ matrix.config.job-config }} 95 | token: ${{ secrets.RHUB_TOKEN }} 96 | -------------------------------------------------------------------------------- /.github/workflows/test-coverage.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | pull_request: 7 | branches: [main, master] 8 | 9 | name: test-coverage.yaml 10 | 11 | permissions: read-all 12 | 13 | jobs: 14 | test-coverage: 15 | runs-on: ubuntu-latest 16 | env: 17 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 18 | 19 | steps: 20 | - uses: actions/checkout@v4 21 | 22 | - uses: r-lib/actions/setup-r@v2 23 | with: 24 | use-public-rspm: true 25 | 26 | - uses: r-lib/actions/setup-r-dependencies@v2 27 | with: 28 | extra-packages: any::covr, any::xml2 29 | needs: coverage 30 | 31 | - name: Test coverage 32 | run: | 33 | cov <- covr::package_coverage( 34 | quiet = FALSE, 35 | clean = FALSE, 36 | install_path = file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package") 37 | ) 38 | covr::to_cobertura(cov) 39 | shell: Rscript {0} 40 | 41 | - uses: codecov/codecov-action@v4 42 | with: 43 | fail_ci_if_error: ${{ github.event_name != 'pull_request' && true || false }} 44 | file: ./cobertura.xml 45 | plugin: noop 46 | disable_search: true 47 | token: ${{ secrets.CODECOV_TOKEN }} 48 | 49 | - name: Show testthat output 50 | if: always() 51 | run: | 52 | ## -------------------------------------------------------------------- 53 | find '${{ runner.temp }}/package' -name 'testthat.Rout*' -exec cat '{}' \; || true 54 | shell: bash 55 | 56 | - name: Upload test results 57 | if: failure() 58 | uses: actions/upload-artifact@v4 59 | with: 60 | name: coverage-test-failures 61 | path: ${{ runner.temp }}/package 62 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | 3 | .RData 4 | 5 | .Rhistory 6 | 7 | *.Rproj 8 | 9 | .Rproj.user 10 | 11 | fetch_isd-history_cache 12 | 13 | inst/doc 14 | 15 | .Rbuildignore~ 16 | 17 | *.kml 18 | 19 | *.kmz 20 | 21 | *.rds 22 | 23 | generate.rb 24 | *.code-workspace 25 | doc 26 | Meta 27 | /doc/ 28 | /Meta/ 29 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | We as members, contributors, and leaders pledge to make participation in our 6 | community a harassment-free experience for everyone, regardless of age, body 7 | size, visible or invisible disability, ethnicity, sex characteristics, gender 8 | identity and expression, level of experience, education, socio-economic status, 9 | nationality, personal appearance, race, religion, or sexual identity and 10 | orientation. 11 | 12 | We pledge to act and interact in ways that contribute to an open, welcoming, 13 | diverse, inclusive, and healthy community. 14 | 15 | ## Our Standards 16 | 17 | Examples of behavior that contributes to a positive environment for our 18 | community include: 19 | 20 | * Demonstrating empathy and kindness toward other people 21 | * Being respectful of differing opinions, viewpoints, and experiences 22 | * Giving and gracefully accepting constructive feedback 23 | * Accepting responsibility and apologizing to those affected by our mistakes, 24 | and learning from the experience 25 | * Focusing on what is best not just for us as individuals, but for the overall 26 | community 27 | 28 | Examples of unacceptable behavior include: 29 | 30 | * The use of sexualized language or imagery, and sexual attention or 31 | advances of any kind 32 | * Trolling, insulting or derogatory comments, and personal or political attacks 33 | * Public or private harassment 34 | * Publishing others' private information, such as a physical or email 35 | address, without their explicit permission 36 | * Other conduct which could reasonably be considered inappropriate in a 37 | professional setting 38 | 39 | ## Enforcement Responsibilities 40 | 41 | Community leaders are responsible for clarifying and enforcing our standards 42 | of acceptable behavior and will take appropriate and fair corrective action in 43 | response to any behavior that they deem inappropriate, threatening, offensive, 44 | or harmful. 45 | 46 | Community leaders have the right and responsibility to remove, edit, or reject 47 | comments, commits, code, wiki edits, issues, and other contributions that are 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation 49 | decisions when appropriate. 50 | 51 | ## Scope 52 | 53 | This Code of Conduct applies within all community spaces, and also applies 54 | when an individual is officially representing the community in public spaces. 55 | Examples of representing our community include using an official e-mail 56 | address, posting via an official social media account, or acting as an appointed 57 | representative at an online or offline event. 58 | 59 | ## Enforcement 60 | 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 62 | reported to the community leaders responsible for enforcement at adamhsparks@gmail.com. 63 | All complaints will be reviewed and investigated promptly and fairly. 64 | 65 | All community leaders are obligated to respect the privacy and security of the 66 | reporter of any incident. 67 | 68 | ## Enforcement Guidelines 69 | 70 | Community leaders will follow these Community Impact Guidelines in determining 71 | the consequences for any action they deem in violation of this Code of Conduct: 72 | 73 | ### 1. Correction 74 | 75 | **Community Impact**: Use of inappropriate language or other behavior deemed 76 | unprofessional or unwelcome in the community. 77 | 78 | **Consequence**: A private, written warning from community leaders, providing 79 | clarity around the nature of the violation and an explanation of why the 80 | behavior was inappropriate. A public apology may be requested. 81 | 82 | ### 2. Warning 83 | 84 | **Community Impact**: A violation through a single incident or series of 85 | actions. 86 | 87 | **Consequence**: A warning with consequences for continued behavior. No 88 | interaction with the people involved, including unsolicited interaction with 89 | those enforcing the Code of Conduct, for a specified period of time. This 90 | includes avoiding interactions in community spaces as well as external channels 91 | like social media. Violating these terms may lead to a temporary or permanent 92 | ban. 93 | 94 | ### 3. Temporary Ban 95 | 96 | **Community Impact**: A serious violation of community standards, including 97 | sustained inappropriate behavior. 98 | 99 | **Consequence**: A temporary ban from any sort of interaction or public 100 | communication with the community for a specified period of time. No public or 101 | private interaction with the people involved, including unsolicited interaction 102 | with those enforcing the Code of Conduct, is allowed during this period. 103 | Violating these terms may lead to a permanent ban. 104 | 105 | ### 4. Permanent Ban 106 | 107 | **Community Impact**: Demonstrating a pattern of violation of community 108 | standards, including sustained inappropriate behavior, harassment of an 109 | individual, or aggression toward or disparagement of classes of individuals. 110 | 111 | **Consequence**: A permanent ban from any sort of public interaction within the 112 | community. 113 | 114 | ## Attribution 115 | 116 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], 117 | version 2.0, 118 | available at . 119 | 120 | Community Impact Guidelines were inspired by [Mozilla's code of conduct 121 | enforcement ladder](https://github.com/mozilla/diversity). 122 | 123 | [homepage]: https://www.contributor-covenant.org 124 | 125 | For answers to common questions about this code of conduct, see the FAQ at 126 | . Translations are available at . 127 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Type: Package 2 | Package: GSODR 3 | Title: Global Surface Summary of the Day ('GSOD') Weather Data Client 4 | Version: 4.1.3.9000 5 | Authors@R: c( 6 | person("Adam H.", "Sparks", , "adamhsparks@gmail.com", role = c("aut", "cre"), 7 | comment = c(ORCID = "0000-0002-0061-8359")), 8 | person("Tomislav", "Hengl", , "tom.hengl@isric.org", role = "aut", 9 | comment = c(ORCID = "0000-0002-9921-5129")), 10 | person("Andrew", "Nelson", , "dr.andy.nelson@gmail.com", role = "aut", 11 | comment = c(ORCID = "0000-0002-7249-3778")), 12 | person("Hugh", "Parsonage", , "hugh.parsonage@gmail.com", role = c("cph", "ctb"), 13 | comment = c(ORCID = "0000-0003-4055-0835")), 14 | person("Taras", "Kaduk", , "taras.kaduk@gmail.com", role = "ctb", 15 | comment = "Suggestion for handling bulk station downloads more\n efficiently"), 16 | person("Gwenael", "Giboire", , "gwenael.giboire@oda-groupe.com", role = "ctb", 17 | comment = "Several bug reports in early versions and testing feedback"), 18 | person("Łukasz", "Pawlik", , "lukpawlik@gmail.com", role = "ctb", 19 | comment = "Reported bug in windspeed conversion calculation"), 20 | person("Ross", "Darnell", , "Ross.Darnell@data61.csiro.au", role = "ctb", 21 | comment = c("Reported bug in 'Windows OS' versions causing 'GSOD' data untarring to\n fail", ORCID = "0000-0002-7973-6322")), 22 | person("Tyler", "Widdison", , "Tyler.Widdison@usav.org", role = "ctb", 23 | comment = "Reported bug where\n `nearest_stations()` did not return stations in order of nearest to\n farthest"), 24 | person("Curtin University", role = "cph", 25 | comment = "Supported the development of 'GSODR' through\n Adam H. Sparks's time.") 26 | ) 27 | Description: Provides automated downloading, parsing, cleaning, unit 28 | conversion and formatting of Global Surface Summary of the Day 29 | ('GSOD') weather data from the from the USA National Centers for 30 | Environmental Information ('NCEI'). Units are converted from from 31 | United States Customary System ('USCS') units to International System 32 | of Units ('SI'). Stations may be individually checked for number of 33 | missing days defined by the user, where stations with too many missing 34 | observations are omitted. Only stations with valid reported latitude 35 | and longitude values are permitted in the final data. Additional 36 | useful elements, saturation vapour pressure ('es'), actual vapour 37 | pressure ('ea') and relative humidity ('RH') are calculated from the 38 | original data using the improved August-Roche-Magnus approximation 39 | (Alduchov & Eskridge 1996) and included in the final data set. The 40 | resulting metadata include station identification information, 41 | country, state, latitude, longitude, elevation, weather observations 42 | and associated flags. For information on the 'GSOD' data from 'NCEI', 43 | please see the 'GSOD' 'readme.txt' file available from, 44 | . 45 | License: MIT + file LICENSE 46 | URL: https://docs.ropensci.org/GSODR/, https://github.com/ropensci/GSODR 47 | BugReports: https://github.com/ropensci/GSODR/issues 48 | Depends: 49 | R (>= 3.5.0) 50 | Imports: 51 | countrycode, 52 | curl, 53 | data.table (>= 1.15.4), 54 | R.utils, 55 | stats, 56 | utils, 57 | withr 58 | Suggests: 59 | bit64, 60 | dplyr, 61 | ggplot2, 62 | gridExtra, 63 | knitr, 64 | mapproj, 65 | maps, 66 | rmarkdown, 67 | roxyglobals, 68 | spelling, 69 | testthat, 70 | tidyr 71 | VignetteBuilder: 72 | knitr 73 | ByteCompile: TRUE 74 | Config/Needs/build: moodymudskipper/devtag 75 | Config/roxyglobals/filename: globals.R 76 | Config/roxyglobals/unique: FALSE 77 | Config/testthat/edition: 3 78 | Config/testthat/parallel: true 79 | Encoding: UTF-8 80 | Language: en-US 81 | NeedsCompilation: no 82 | Repository: CRAN 83 | Roxygen: list(roclets = c("collate", "namespace", "rd", 84 | "roxyglobals::global_roclet", "devtag::dev_roclet"), markdown = TRUE) 85 | RoxygenNote: 7.3.2 86 | X-schema.org-applicationCategory: Tools 87 | X-schema.org-isPartOf: https://ropensci.org 88 | X-schema.org-keywords: US-NCEI, meteorological-data, global-weather, 89 | weather, weather-data, meteorology, station-data, surface-weather, 90 | data-access, US-NCDC 91 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | YEAR: 2017 2 | COPYRIGHT HOLDER: Adam Sparks, Tomislav Hengl and Andrew Nelson 3 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method(print,GSODR.Info) 4 | export(get_GSOD) 5 | export(get_inventory) 6 | export(get_isd_history) 7 | export(get_updates) 8 | export(nearest_stations) 9 | export(reformat_GSOD) 10 | importFrom(data.table,":=") 11 | importFrom(data.table,.BY) 12 | importFrom(data.table,.EACHI) 13 | importFrom(data.table,.GRP) 14 | importFrom(data.table,.I) 15 | importFrom(data.table,.N) 16 | importFrom(data.table,.NGRP) 17 | importFrom(data.table,.SD) 18 | importFrom(data.table,CJ) 19 | importFrom(data.table,data.table) 20 | importFrom(data.table,fifelse) 21 | importFrom(data.table,fread) 22 | importFrom(data.table,rbindlist) 23 | importFrom(data.table,set) 24 | importFrom(data.table,setDT) 25 | importFrom(data.table,setcolorder) 26 | importFrom(data.table,setkey) 27 | importFrom(data.table,setkeyv) 28 | importFrom(data.table,setnames) 29 | importFrom(data.table,setorder) 30 | -------------------------------------------------------------------------------- /R/GSODR-package.R: -------------------------------------------------------------------------------- 1 | #' @keywords internal 2 | "_PACKAGE" 3 | 4 | # The following block is used by usethis to automatically manage 5 | # roxygen namespace tags. Modify with care! 6 | ## usethis namespace: start 7 | #' @importFrom data.table := 8 | #' @importFrom data.table .BY 9 | #' @importFrom data.table .EACHI 10 | #' @importFrom data.table .GRP 11 | #' @importFrom data.table .I 12 | #' @importFrom data.table .N 13 | #' @importFrom data.table .NGRP 14 | #' @importFrom data.table .SD 15 | #' @importFrom data.table data.table 16 | #' @importFrom data.table CJ 17 | #' @importFrom data.table fifelse 18 | #' @importFrom data.table fread 19 | #' @importFrom data.table set 20 | #' @importFrom data.table setcolorder 21 | #' @importFrom data.table setorder 22 | #' @importFrom data.table setkey 23 | #' @importFrom data.table setkeyv 24 | #' @importFrom data.table setnames 25 | #' @importFrom data.table setDT 26 | #' @importFrom data.table rbindlist 27 | 28 | ## usethis namespace: end 29 | NULL 30 | -------------------------------------------------------------------------------- /R/get_GSOD.R: -------------------------------------------------------------------------------- 1 | #' Download and Return a data.table Object of GSOD Weather Data 2 | #' 3 | #' @description 4 | #' Automates downloading, cleaning, reformatting of data from the Global Surface 5 | #' Summary of the Day (\acronym{GSOD}) data provided by the 6 | #' [US National Centers for Environmental Information (NCEI)(https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.ncdc:C00516), 7 | #' Three additional useful elements: saturation vapour pressure (es), actual 8 | #' vapour pressure (ea) and relative humidity (RH) are calculated and returned 9 | #' in the final data frame using the improved August-Roche-Magnus approximation 10 | #' (Alduchov and Eskridge 1996). 11 | #' 12 | #' 13 | #' @details 14 | #' All units are converted to International System of Units (SI), *e.g*, 15 | #' Fahrenheit to Celsius and inches to millimetres. 16 | #' 17 | #' Data summarise each year by station, which include vapour pressure and 18 | #' relative humidity elements calculated from existing data in \acronym{GSOD}. 19 | #' 20 | #' All missing values in resulting files are represented as `NA` regardless of 21 | #' which field they occur in. 22 | #' 23 | #' For a complete list of the fields and description of the contents and units, 24 | #' please refer to Appendix 1 in the \CRANpkg{GSODR} vignette, 25 | #' `vignette("GSODR", package = "GSODR")`. 26 | #' 27 | #' For more information see the description of the data provided by 28 | #' \acronym{NCEI}, . 29 | #' 30 | #' @param years Year(s) of weather data to download. 31 | #' @param station Optional. Specify a station or multiple stations for which to 32 | #' retrieve, check and clean weather data using \var{STATION}. The 33 | #' \acronym{NCEI} reports years for which the data are available. This function 34 | #' checks against these years. However, not all cases are properly documented 35 | #' and in some cases files may not exist for download even though it is 36 | #' indicated that data was recorded for the station for a particular year. If a 37 | #' station is specified that does not have an existing file on the server, this 38 | #' function will silently fail and move on to existing files for download and 39 | #' cleaning. 40 | #' @param country Optional. Specify a country for which to retrieve weather 41 | #' data; full name, 2 or 3 letter \acronym{ISO} or 2 letter \acronym{FIPS} codes 42 | #' can be used. All stations within the specified country will be returned. 43 | #' @param max_missing Optional. The maximum number of days allowed to be 44 | #' missing from a station's data before it is excluded from final file output. 45 | #' @param agroclimatology Optional. Logical. Only clean data for stations 46 | #' between latitudes 60 and -60 for agroclimatology work, defaults to `FALSE`. 47 | #' Set to `TRUE` to include only stations within the confines of these 48 | #' latitudes. 49 | #' 50 | #' @note \CRANpkg{GSODR} attempts to validate year and station combination 51 | #' requests, however, in certain cases the start and end date may encompass 52 | #' years where no data is available. In these cases no data will be returned. 53 | #' It is suggested that the user check the latest data availability for the 54 | #' station(s) desired using [get_inventory()] as this list is frequently 55 | #' updated by the \acronym{NCEI} and is not shipped with \CRANpkg{GSODR}. 56 | #' 57 | #' @note While \CRANpkg{GSODR} does not distribute GSOD weather data, users of 58 | #' the data should note the conditions that the U.S. \acronym{NCEI} places upon 59 | #' the \acronym{GSOD} data. 60 | #' \dQuote{The following data and products may have conditions placed on their 61 | #' international commercial use. They can be used within the U.S. or for non- 62 | #' commercial international activities without restriction. The non-U.S. data 63 | #' cannot be redistributed for commercial purposes. Re-distribution of these 64 | #' data by others must provide this same notification. A log of IP addresses 65 | #' accessing these data and products will be maintained and may be made 66 | #' available to data providers.} 67 | #' 68 | #' @examplesIf interactive() 69 | #' # Download weather station data for Toowoomba, Queensland for 2010 70 | #' tbar <- get_GSOD(years = 2010, station = "955510-99999") 71 | #' 72 | #' # Download weather data for the year 1929 73 | #' w_1929 <- get_GSOD(years = 1929) 74 | #' 75 | #' # Download weather data for the year 1929 for Ireland 76 | #' ie_1929 <- get_GSOD(years = 1929, country = "Ireland") 77 | #' 78 | #' @author Adam H. Sparks, \email{adamhsparks@@gmail.com} 79 | #' 80 | #' @section References: 81 | #' 82 | #' Alduchov, O.A. and Eskridge, R.E., 1996. Improved Magnus form approximation 83 | #' of saturation vapor pressure. Journal of Applied Meteorology and Climatology, 84 | #' 35(4), pp.601-609. DOI: 85 | #' <10.1175%2F1520-0450%281996%29035%3C0601%3AIMFAOS%3E2.0.CO%3B2>. 86 | #' 87 | #' @returns A [data.table::data.table()] object of \acronym{GSOD} weather data. 88 | #' 89 | #' @seealso [reformat_GSOD()] 90 | #' @autoglobal 91 | #' @export get_GSOD 92 | 93 | get_GSOD <- function( 94 | years, 95 | station = NULL, 96 | country = NULL, 97 | max_missing = NULL, 98 | agroclimatology = FALSE 99 | ) { 100 | # Validate user inputs ------------------------------------------------------- 101 | .validate_years(years) 102 | # Validate stations for missing days ----------------------------------------- 103 | if (!is.null(max_missing) && (is.na(max_missing) || max_missing < 1L)) { 104 | stop( 105 | call. = FALSE, 106 | "The `max_missing` parameter must be a positive", 107 | "value larger than 1." 108 | ) 109 | } 110 | 111 | if (!is.null(max_missing) && (format(Sys.Date(), "%Y") %in% years)) { 112 | stop( 113 | call. = FALSE, 114 | "You cannot use `max_missing` with the current, incomplete year." 115 | ) 116 | } 117 | 118 | if (isTRUE(agroclimatology) && !is.null(station)) { 119 | stop( 120 | call. = FALSE, 121 | "You cannot specify a single station along with agroclimatology." 122 | ) 123 | } 124 | 125 | # Load station list 126 | load(system.file("extdata", "isd_history.rda", package = "GSODR")) # nocov 127 | 128 | if (!is.null(station)) { 129 | # Validate user entered stations for existence in stations list from NCEI 130 | invisible(lapply( 131 | X = station, 132 | FUN = .validate_station_id, 133 | isd_history = isd_history 134 | )) 135 | 136 | # Validate station data against years available. If years are requested w/o 137 | # data, an Warning and an `NA` is returned and removed here before passing 138 | # the modified vector to `.download_files()` 139 | station <- lapply( 140 | X = station, 141 | FUN = .validate_station_data_years, 142 | isd_history = isd_history, 143 | years = years 144 | ) 145 | station <- station[!is.na(station)] 146 | } 147 | 148 | # Download files from server ------------------------------------------------- 149 | file_list <- .download_files(station, years) 150 | 151 | # Subset file_list for agroclimatology only stations ------------------------- 152 | if (isTRUE(agroclimatology)) { 153 | file_list <- 154 | .agroclimatology_list(file_list, isd_history, years) 155 | } 156 | 157 | # Subset file_list for specified country ------------------------------------- 158 | if (!is.null(country)) { 159 | # Load country list 160 | # CRAN NOTE avoidance 161 | 162 | country <- .validate_country(country, isd_history) 163 | 164 | file_list <- 165 | .subset_country_list( 166 | country = country, 167 | isd_history = isd_history, 168 | file_list = file_list, 169 | years = years 170 | ) 171 | } 172 | 173 | # Validate stations for missing days ----------------------------------------- 174 | if (!is.null(max_missing)) { 175 | file_list <- 176 | .validate_missing_days(max_missing, file_list) 177 | if (length(file_list) == 0L) { 178 | stop( 179 | call. = FALSE, 180 | "There were no stations that had a max of ", 181 | max_missing, 182 | " days." 183 | ) 184 | } 185 | } 186 | 187 | GSOD <- .apply_process_csv(file_list, isd_history) 188 | 189 | # remove any leftover files from download to prevent polluting a new run 190 | file.remove(list.files(tempdir(), pattern = ".csv$", full.names = TRUE)) 191 | 192 | return(GSOD) 193 | } 194 | -------------------------------------------------------------------------------- /R/get_inventory.R: -------------------------------------------------------------------------------- 1 | #' Download and Return a data.table Object of GSOD Weather Station Data Inventories 2 | #' 3 | #' The \acronym{NCEI} maintains a document, 4 | #' , which lists 5 | #' the number of weather observations by station-year-month from the beginning 6 | #' of the stations' records. This function retrieves that document and prints 7 | #' an information header displaying the last update time with a data frame of 8 | #' the inventory information for each station-year-month. 9 | #' 10 | #' @note While \CRANpkg{GSODR} does not distribute GSOD weather data, users of 11 | #' the data should note the conditions that the U.S. \acronym{NCEI} places upon 12 | #' the \acronym{GSOD} data. 13 | #' \dQuote{The following data and products may have conditions placed on their 14 | #' international commercial use. They can be used within the U.S. or for non- 15 | #' commercial international activities without restriction. The non-U.S. data 16 | #' cannot be redistributed for commercial purposes. Re-distribution of these 17 | #' data by others must provide this same notification. A log of IP addresses 18 | #' accessing these data and products will be maintained and may be made 19 | #' available to data providers.} 20 | #' 21 | #' @examplesIf interactive() 22 | #' inventory <- get_inventory() 23 | #' inventory 24 | #' 25 | #' @returns A `GSODR.info` object, which inherits from [data.table::data.table]. 26 | #' @author Adam H. Sparks, \email{adamhsparks@@gmail.com} 27 | #' @family metadata 28 | #' @autoglobal 29 | #' @export get_inventory 30 | 31 | get_inventory <- function() { 32 | load(system.file("extdata", "isd_history.rda", package = "GSODR")) # nocov 33 | setkeyv(isd_history, "STNID") 34 | 35 | tryCatch( 36 | { 37 | curl::curl_download( 38 | "https://www1.ncdc.noaa.gov/pub/data/noaa/isd-inventory.txt.z", 39 | destfile = file.path(tempdir(), "inventory.txt"), 40 | quiet = TRUE 41 | ) 42 | 43 | main_body <- 44 | fread( 45 | file.path(tempdir(), "inventory.txt"), 46 | skip = 8L, 47 | col.names = c( 48 | "USAF", 49 | "WBAN", 50 | "YEAR", 51 | "JAN", 52 | "FEB", 53 | "MAR", 54 | "APR", 55 | "MAY", 56 | "JUN", 57 | "JUL", 58 | "AUG", 59 | "SEP", 60 | "OCT", 61 | "NOV", 62 | "DEC" 63 | ) 64 | ) 65 | 66 | main_body[, STNID := paste(main_body$USAF, main_body$WBAN, sep = "-")] 67 | setkeyv(main_body, "STNID") 68 | 69 | main_body[, c("USAF", "WBAN") := NULL] 70 | 71 | setcolorder(main_body, "STNID") 72 | 73 | header <- 74 | readLines(file.path(tempdir(), "inventory.txt"), n = 6L) 75 | 76 | # sift out the year and month 77 | year_month <- grep("[0-9]{4}", header) 78 | 79 | year_month <- 80 | tools::toTitleCase(tolower(gsub( 81 | "^([^\\D]*\\d+).*", 82 | "\\1", 83 | header[[year_month]] 84 | ))) 85 | year_month <- gsub("Through ", "", year_month, fixed = TRUE) 86 | year_month <- gsub("\\..*", "", year_month) 87 | 88 | main_body <- isd_history[main_body, on = "STNID"] 89 | 90 | class(main_body) <- c("GSODR.Info", class(main_body)) 91 | 92 | # add attributes for printing df 93 | attr(main_body, "GSODR.Inventory") <- c( 94 | " *** FEDERAL CLIMATE COMPLEX INTEGRATED SURFACE DATA INVENTORY *** \n", 95 | " This inventory provides the number of weather observations by \n", 96 | " STATION-YEAR-MONTH for beginning of record through", 97 | year_month, 98 | " \n" 99 | ) 100 | }, 101 | error = function(cond) { 102 | stop( 103 | "There was a problem retrieving the inventory file. Perhaps \n", 104 | "the server is not responding currently or there is no \n", 105 | "Internet connection. Please try again later.", 106 | call. = FALSE 107 | ) 108 | } 109 | ) 110 | 111 | unlink(file.path(tempdir(), "inventory.txt")) 112 | return(main_body) 113 | } 114 | 115 | #' Prints GSODR.info object 116 | #' 117 | #' @param x GSODR.Info object 118 | #' @param ... ignored 119 | #' @export 120 | print.GSODR.Info <- function(x, ...) { 121 | cat(attr(x, "GSODR.Inventory")) 122 | NextMethod(x) 123 | invisible(x) 124 | } 125 | -------------------------------------------------------------------------------- /R/get_isd_history.R: -------------------------------------------------------------------------------- 1 | #' Get the Most Recent isd_history File 2 | #' 3 | #' @returns A [data.table::data.table] object 4 | #' @export 5 | #' @family metadata 6 | #' @autoglobal 7 | #' @examplesIf interactive() 8 | #' get_isd_history() 9 | #' 10 | get_isd_history <- function() { 11 | isd_history <- fread( 12 | input = "https://www1.ncdc.noaa.gov/pub/data/noaa/isd-history.csv", 13 | strip.white = TRUE, 14 | showProgress = FALSE, 15 | keepLeadingZeros = TRUE 16 | ) 17 | 18 | # add STNID column 19 | isd_history[, STNID := paste(USAF, WBAN, sep = "-")] 20 | setcolorder(isd_history, "STNID") 21 | setnames(isd_history, "STATION NAME", "NAME") 22 | 23 | # remove stations where LAT or LON is NA 24 | isd_history <- stats::na.omit(isd_history, cols = c("LAT", "LON")) 25 | 26 | # remove extra columns 27 | isd_history[, c("USAF", "WBAN", "ICAO") := NULL] 28 | 29 | isd_history <- 30 | isd_history[setDT(countrycode::codelist), on = c("CTRY" = "fips")] 31 | 32 | isd_history <- isd_history[, c( 33 | "STNID", 34 | "NAME", 35 | "LAT", 36 | "LON", 37 | "ELEV(M)", 38 | "CTRY", 39 | "STATE", 40 | "BEGIN", 41 | "END", 42 | "country.name.en", 43 | "iso2c", 44 | "iso3c" 45 | )] 46 | 47 | # clean data 48 | isd_history[isd_history == -999L] <- NA 49 | isd_history[isd_history == -999.9] <- NA 50 | isd_history <- 51 | isd_history[ 52 | !is.na(isd_history$LAT) & 53 | !is.na(isd_history$LON), 54 | ] 55 | isd_history <- 56 | isd_history[ 57 | isd_history$LAT != 0.0 & 58 | isd_history$LON != 0.0, 59 | ] 60 | isd_history <- 61 | isd_history[ 62 | isd_history$LAT > -90.0 & 63 | isd_history$LAT < 90.0, 64 | ] 65 | isd_history <- 66 | isd_history[ 67 | isd_history$LON > -180.0 & 68 | isd_history$LON < 180.0, 69 | ] 70 | 71 | # set colnames to upper case 72 | names(isd_history) <- toupper(names(isd_history)) 73 | setnames(isd_history, old = "COUNTRY.NAME.EN", new = "COUNTRY_NAME") 74 | 75 | # set country names to be upper case for easier internal verifications 76 | isd_history[, COUNTRY_NAME := toupper(COUNTRY_NAME)] 77 | 78 | # set key for joins when processing CSV files 79 | setkeyv(isd_history, "STNID") 80 | 81 | # select only the cols of interest 82 | x <- c( 83 | "STNID", 84 | "NAME", 85 | "LAT", 86 | "LON", 87 | "ELEV(M)", 88 | "CTRY", 89 | "STATE", 90 | "BEGIN", 91 | "END", 92 | "COUNTRY_NAME", 93 | "ISO2C", 94 | "ISO3C" 95 | ) 96 | 97 | isd_history <- isd_history[, ..x] 98 | 99 | return(isd_history[]) 100 | } 101 | -------------------------------------------------------------------------------- /R/get_updates.R: -------------------------------------------------------------------------------- 1 | #' Get updates.txt With Information on Updates to the GSOD Data Set 2 | #' 3 | #' Gets and imports the 'updates.txt' file that has a change log of GSOD data. 4 | #' Changes are shown in order from most recent to oldest changes by the "DATE" 5 | #' field. Column names follow \CRANpkg{GSODR} naming conventions. 6 | #' 7 | #' 8 | #' @returns A [data.table::data.table()] object 9 | #' @export 10 | #' @autoglobal 11 | #' @family metadata 12 | #' @examplesIf interactive() 13 | #' get_updates() 14 | #' 15 | get_updates <- function() { 16 | op <- options(timeout = 600L) 17 | on.exit(options(op)) 18 | 19 | file_in <- file.path(tempdir(), "updates.txt") 20 | if (!file.exists(file_in)) { 21 | tryCatch( 22 | { 23 | utils::download.file( 24 | url = "https://www1.ncdc.noaa.gov/pub/data/noaa/updates.txt", 25 | destfile = file_in, 26 | mode = "wb", 27 | quiet = TRUE 28 | ) 29 | }, 30 | error = function(x) { 31 | stop( 32 | "The NCEI server with the update information is not responding. ", 33 | "Please retry again later.\n", 34 | call. = FALSE 35 | ) 36 | } 37 | ) 38 | } 39 | 40 | x <- data.table::setDT( 41 | utils::read.fwf( 42 | file = file_in, 43 | widths = c(7L, 5L, 5L, 11L, 25L), 44 | header = FALSE, 45 | comment.char = "", 46 | allowEscapes = TRUE, 47 | strip.white = TRUE, 48 | colClasses = "character", 49 | col.names = c("STATION", "WBAN", "YEAR", "DATE", "COMMENT") 50 | ) 51 | ) 52 | 53 | x[, STNID := sprintf("%s-%s", STATION, WBAN)] 54 | x[, c("STATION", "WBAN") := NULL] 55 | x[, YEAR := as.integer(YEAR)] 56 | x[, DATE := as.Date(DATE)] 57 | setorder(x, -DATE) 58 | setcolorder(x, c("STNID")) 59 | return(x[]) 60 | } 61 | -------------------------------------------------------------------------------- /R/globals.R: -------------------------------------------------------------------------------- 1 | # Generated by roxyglobals: do not edit by hand 2 | 3 | utils::globalVariables(c( 4 | "isd_history", # 5 | "isd_history", # 6 | "STNID", # 7 | "STNID", # 8 | "USAF", # 9 | "WBAN", # 10 | "COUNTRY_NAME", # 11 | "..x", # 12 | "STNID", # 13 | "STATION", # 14 | "WBAN", # 15 | "YEAR", # 16 | "DATE", # 17 | "isd_history", # 18 | "distance_km", # 19 | "STNID", # <.process_csv> 20 | "STP", # <.process_csv> 21 | "STP_ATTRIBUTES", # <.process_csv> 22 | "YEARMODA", # <.process_csv> 23 | "DATE", # <.process_csv> 24 | "YEAR", # <.process_csv> 25 | "MONTH", # <.process_csv> 26 | "DAY", # <.process_csv> 27 | "YDAY", # <.process_csv> 28 | "TEMP", # <.process_csv> 29 | "DEWP", # <.process_csv> 30 | "WDSP", # <.process_csv> 31 | "MXSPD", # <.process_csv> 32 | "GUST", # <.process_csv> 33 | "VISIB", # <.process_csv> 34 | "MAX", # <.process_csv> 35 | "MIN", # <.process_csv> 36 | "PRCP", # <.process_csv> 37 | "SNDP", # <.process_csv> 38 | "EA", # <.process_csv> 39 | "ES", # <.process_csv> 40 | "RH", # <.process_csv> 41 | "I_FOG", # <.process_csv> 42 | "I_RAIN_DRIZZLE", # <.process_csv> 43 | "I_SNOW_ICE", # <.process_csv> 44 | "I_HAIL", # <.process_csv> 45 | "I_THUNDER", # <.process_csv> 46 | "I_TORNADO_FUNNEL", # <.process_csv> 47 | "FRSHTT", # <.process_csv> 48 | "isd_history", # 49 | NULL 50 | )) 51 | -------------------------------------------------------------------------------- /R/internal_functions.R: -------------------------------------------------------------------------------- 1 | #' Validate Years 2 | #' 3 | #' @param years User entered years for request 4 | #' @returns None unless error in years being requested by users. 5 | #' @autoglobal 6 | #' @dev 7 | .validate_years <- function(years) { 8 | if (inherits(years, what = "character")) { 9 | stop( 10 | call. = FALSE, 11 | "Years must be entered as a numeric value." 12 | ) 13 | } 14 | this_year <- 1900L + as.POSIXlt(Sys.Date())$year 15 | for (i in years) { 16 | if (i <= 0L) { 17 | stop("\nThis is not a valid year.\n") 18 | } else if (i < 1929L) { 19 | stop( 20 | call. = FALSE, 21 | "\nThe GSOD data files start at 1929, you have entered a year prior 22 | to 1929.\n" 23 | ) 24 | } else if (i > this_year) { 25 | stop( 26 | call. = FALSE, 27 | "\nThe year cannot be greater than current year.\n" 28 | ) 29 | } 30 | } 31 | return(invisible(NULL)) 32 | } 33 | 34 | 35 | #' Validate Station IDs 36 | #' 37 | #' @param station User entered station ID 38 | #' @param isd_history isd_history.csv from NCEI provided by GSODR 39 | #' @returns None unless an error with the years or invalid station ID. 40 | #' @autoglobal 41 | #' @dev 42 | .validate_station_id <- function(station, isd_history) { 43 | if (!station %in% isd_history$STNID) { 44 | stop( 45 | call. = FALSE, 46 | "\n", 47 | station, 48 | " is not a valid station ID number, please check your entry.\n", 49 | "Valid Station IDs can be found in the isd-history.txt file\n", 50 | "available from the US NCEI server by combining the USAF and\n", 51 | "WBAN columns, e.g. '007005' '99999' is '007005-99999' from this\n", 52 | "file \n" 53 | ) 54 | } 55 | return(invisible(NULL)) 56 | } 57 | 58 | #' Validate Station Data for Years Available 59 | #' 60 | #' @param station User entered station ID 61 | #' @param isd_history isd_history.csv from NCEI provided by GSODR 62 | #' @param years User entered years for query 63 | #' @returns `station_id` value, "station", `NA` if no match with available 64 | #' data. 65 | #' @autoglobal 66 | #' @dev 67 | .validate_station_data_years <- function(station, isd_history, years) { 68 | BEGIN <- 69 | as.numeric(substr( 70 | isd_history[isd_history$STNID == station, ]$BEGIN, 71 | 1L, 72 | 4L 73 | )) 74 | END <- 75 | as.numeric(substr(isd_history[isd_history$STNID == station, ]$END, 1L, 4L)) 76 | if (min(years) < BEGIN || max(years) > END) { 77 | warning( 78 | "\nThis station, ", 79 | station, 80 | ", only provides data for years ", 81 | BEGIN, 82 | " to ", 83 | END, 84 | ".\n", 85 | "Please send a request that falls within these years.", 86 | call. = FALSE 87 | ) 88 | station <- NA 89 | } 90 | return(station) 91 | } 92 | 93 | #' Validate country requests 94 | #' 95 | #' @param country User requested country name 96 | #' @param isd_history Data provided from NCEI on stations' locations and years 97 | #' @returns A validated country name. 98 | #' @autoglobal 99 | #' @dev 100 | .validate_country <- 101 | function(country, isd_history) { 102 | if (!is.null(country)) { 103 | country <- toupper(trimws(country[1L])) 104 | nc <- nchar(country) 105 | if (nc == 3L) { 106 | if (country %in% isd_history$ISO3C) { 107 | c <- which(country == isd_history$ISO3C) 108 | country <- as.character(isd_history[c, "CTRY"][1L]) 109 | } else { 110 | stop( 111 | call. = FALSE, 112 | "\nPlease provide a valid name or 2 or 3 ", 113 | "letter ISO country code\n" 114 | ) 115 | } 116 | } else if (nc == 2L) { 117 | if (country %in% isd_history$ISO2C) { 118 | c <- which(country == isd_history$ISO2C) 119 | country <- as.character(isd_history[c, "CTRY"][1L]) 120 | } else if (country %in% isd_history$CTRY) { 121 | c <- which(country == isd_history$CTRY) 122 | country <- as.character(isd_history[c, "CTRY"][1L]) 123 | } else { 124 | stop( 125 | call. = FALSE, 126 | "\nPlease provide a valid name or 2 or 3 ", 127 | "\nletter ISO country code" 128 | ) 129 | } 130 | } else if (country %in% isd_history$COUNTRY_NAME) { 131 | c <- which(country == isd_history$COUNTRY_NAME) 132 | country <- as.character(isd_history[c, "CTRY"][1L]) 133 | } else { 134 | stop( 135 | call. = FALSE, 136 | "\nPlease provide a valid name or 2 or 3 ", 137 | "letter ISO country code\n" 138 | ) 139 | } 140 | } 141 | return(country) 142 | } 143 | 144 | 145 | #' Validate data for missing days 146 | #' 147 | #' @param max_missing User entered maximum permissible missing days 148 | #' @param GSOD_list A list of GSOD files that have been downloaded from NCEI 149 | #' @returns A validated `list()` of GSOD files that meet requirements for 150 | #' missing days. 151 | #' @autoglobal 152 | #' @dev 153 | .validate_missing_days <- 154 | function(max_missing, file_list) { 155 | records <- 156 | unlist(lapply( 157 | X = paste0(file_list), 158 | FUN = R.utils::countLines 159 | )) 160 | names(records) <- file_list 161 | year <- as.numeric(substr( 162 | file_list[1L], 163 | start = nchar(file_list[1L]) - 19L, 164 | stop = nchar(file_list[1L]) - 16L 165 | )) 166 | ifelse( 167 | format(as.POSIXct(paste0(year, "-03-01")) - 1L, "%d") != "29", 168 | allow <- 365L - max_missing, 169 | allow <- 366L - max_missing 170 | ) 171 | file_list <- stats::na.omit(ifelse(records >= allow, file_list, NA)) 172 | } 173 | 174 | 175 | #' Download GSOD files from NCEI server 176 | #' 177 | #' @param station Station ID being requested. Optional 178 | #' @param years Years being requested. Mandatory 179 | #' @autoglobal 180 | #' @returns A list of data for processing before returning to user. 181 | #' 182 | #' @dev 183 | .download_files <- 184 | function(station, years) { 185 | # if no station or station > 10 download annual zip files ------------------ 186 | if (is.null(station) | length(station) > 10L) { 187 | url_list <- 188 | paste0( 189 | "https://www.ncei.noaa.gov/data/global-summary-of-the-day/archive/", 190 | years, 191 | ".tar.gz" 192 | ) 193 | 194 | tryCatch( 195 | for (i in url_list) { 196 | if (.check_url_exists(x = i)) { 197 | curl::curl_download( 198 | url = i, 199 | destfile = file.path(tempdir(), basename(i)), 200 | mode = "wb" 201 | ) 202 | } 203 | }, 204 | error = function(x) { 205 | stop( 206 | call. = FALSE, 207 | "\nA file download has failed.\n" 208 | ) 209 | } 210 | ) 211 | # create a list of files that have been downloaded and untar them 212 | tar_files <- 213 | list.files(tempdir(), pattern = "*\\.tar.gz$", full.names = TRUE) 214 | 215 | withr::with_dir(tempdir(), .untar_files(tar_files)) 216 | 217 | GSOD_list <- 218 | list.files( 219 | tempdir(), 220 | pattern = "*\\.csv$", 221 | full.names = TRUE, 222 | recursive = TRUE 223 | ) 224 | 225 | if (is.null(station)) { 226 | return(GSOD_list) 227 | } else { 228 | # Get a Cartesian join of all stations of interest and all years 229 | files_stations <- 230 | CJ(years, station, sorted = FALSE)[, paste0( 231 | tempdir(), 232 | "/", 233 | years, 234 | "/", 235 | gsub("-", "", station, fixed = TRUE), 236 | ".csv" 237 | )] 238 | 239 | GSOD_list <- 240 | subset(GSOD_list, GSOD_list %in% files_stations) 241 | 242 | return(GSOD_list) 243 | } 244 | } 245 | 246 | # if a station is provided, download its files ----------------------------- 247 | if (!is.null(station)) { 248 | station <- gsub("-", "", station, fixed = TRUE) 249 | url_list <- 250 | CJ(years, station, sorted = FALSE)[, paste0( 251 | "https://www.ncei.noaa.gov/data/global-summary-of-the-day/access/", 252 | years, 253 | "/", 254 | station, 255 | ".csv" 256 | )] 257 | 258 | tryCatch( 259 | for (i in url_list) { 260 | # check for an http error b4 proceeding' 261 | if (.check_url_exists(x = i)) { 262 | curl::curl_download( 263 | url = i, 264 | destfile = paste0( 265 | tempdir(), 266 | "/", 267 | substr(i, nchar(i) - 20L, nchar(i) - 16L), 268 | # year 269 | "-", 270 | basename(i) # filename 271 | ) 272 | ) 273 | } 274 | }, 275 | error = function(x) { 276 | stop( 277 | call. = FALSE, 278 | "\nThe file downloads have failed. Please retry.\n" 279 | ) 280 | } 281 | ) 282 | 283 | GSOD_list <- 284 | list.files(tempdir(), pattern = "*\\.csv$", full.names = TRUE) 285 | } 286 | return(GSOD_list) 287 | } 288 | 289 | #' Agroclimatology List 290 | #' 291 | #' @param x A `data.table` of GSOD data from .download_data 292 | #' @param isd_history isd_history file from NCEI 293 | #' @param years Years being requested 294 | #' @returns A list of GSOD stations suitable for agroclimatology work. 295 | #' @autoglobal 296 | #' @dev 297 | 298 | .agroclimatology_list <- 299 | function(file_list, isd_history, years) { 300 | station_list <- isd_history[ 301 | isd_history$LAT >= -60L & 302 | isd_history$LAT <= 60L, 303 | ]$STNID 304 | station_list <- gsub("-", "", station_list, fixed = TRUE) 305 | 306 | station_list <- 307 | CJ(years, sorted = FALSE)[, paste0( 308 | tempdir(), 309 | "/", 310 | years, 311 | "/", 312 | station_list, 313 | ".csv" 314 | )] 315 | 316 | file_list <- file_list[file_list %in% station_list] 317 | rm(station_list) 318 | return(file_list) 319 | } 320 | 321 | #' Subset country list 322 | #' 323 | #' @param country Country of interest to subset on 324 | #' @param GSOD_list List of GSOD files to be subset 325 | #' @param isd_history isd_history.csv file from NCEI provided by GSODR 326 | #' @param years Years being requested 327 | #' @keywords internal 328 | #' @returns A list of stations in the requested country. 329 | #' @autoglobal 330 | #' @dev 331 | .subset_country_list <- 332 | function(country, file_list, isd_history, years) { 333 | station_list <- 334 | isd_history[isd_history$CTRY == country, ]$STNID 335 | station_list <- gsub("-", "", station_list, fixed = TRUE) 336 | station_list <- 337 | CJ(years, sorted = FALSE)[, paste0( 338 | tempdir(), 339 | "/", 340 | years, 341 | "/", 342 | station_list, 343 | ".csv" 344 | )] 345 | file_list <- file_list[file_list %in% station_list] 346 | rm(station_list) 347 | return(file_list) 348 | } 349 | 350 | #' Process .gz files 351 | #' 352 | #' @param file_list List of GSOD files 353 | #' @param isd_history isd_history.csv file from NCEI provided by GSODR 354 | #' @keywords internal 355 | #' @returns A `data.table` of GSOD weather data. 356 | #' @autoglobal 357 | #' @dev 358 | .apply_process_csv <- function(file_list, isd_history) { 359 | x <- lapply( 360 | X = file_list, 361 | FUN = .process_csv, 362 | isd_history = isd_history 363 | ) 364 | return(rbindlist(x)) 365 | } 366 | 367 | #' Check That a URL Exists Before Downloading 368 | #' 369 | #' @param x a URL for checking 370 | #' @returns A numeric value representing the HTTP response. 371 | #' @dev 372 | 373 | .check_url_exists <- function(x) { 374 | # check for an http error b4 proceeding, only if status is 200 375 | return(grepl( 376 | 200L, 377 | curlGetHeaders( 378 | x, 379 | redirect = TRUE, 380 | verify = TRUE, 381 | timeout = 0L, 382 | TLS = "" 383 | )[[1L]] 384 | )) 385 | } 386 | 387 | 388 | #' Untar GSOD Tar Archive Files 389 | #' 390 | #' @param tar_files a list of tar files located in in `tempdir()` 391 | #' 392 | #' @dev 393 | #' @returns Called for it's side-effects, untars the archive files in the 394 | #' `tempdir()`. 395 | 396 | .untar_files <- function(tar_files) { 397 | for (i in tar_files) { 398 | year_dir <- substr(i, nchar(i) - 10L, nchar(i) - 7L) 399 | utils::untar(i, exdir = year_dir) 400 | } 401 | } 402 | -------------------------------------------------------------------------------- /R/nearest_stations.R: -------------------------------------------------------------------------------- 1 | #' Find Nearest GSOD Stations to a Specified Latitude and Longitude 2 | #' 3 | #' Given latitude and longitude values entered as decimal degrees (DD), this 4 | #' function returns a list (as an atomic vector) of station ID 5 | #' values, which can be used in[get_GSOD()] to query for specific stations as an 6 | #' argument in the `station` parameter of that function. 7 | #' 8 | #' @param LAT Latitude expressed as decimal degrees (DD) (WGS84) 9 | #' @param LON Longitude expressed as decimal degrees (DD) (WGS84) 10 | #' @param distance Distance in kilometres from point for which stations are to 11 | #' be returned. 12 | #' 13 | #' @note The \acronym{GSOD} data, which are downloaded and manipulated by 14 | #' \CRANpkg{GSODR} stipulate that the following notice should be given. 15 | #' \dQuote{The following data and products may have conditions placed on their 16 | #' international commercial use. They can be used within the U.S. or for non- 17 | #' commercial international activities without restriction. The non-U.S. data 18 | #' cannot be redistributed for commercial purposes. Re-distribution of these 19 | #' data by others must provide this same notification.} 20 | #' 21 | #' @examplesIf interactive() 22 | #' # Find stations within a 100km radius of Toowoomba, QLD, AUS 23 | #' 24 | #' n <- nearest_stations(LAT = -27.5598, LON = 151.9507, distance = 100) 25 | #' n 26 | #' 27 | #' @returns A [data.table::data.table] with full station metadata including the 28 | #' distance from the user specified coordinates from nearest to farthest. 29 | #' @author Adam H. Sparks, \email{adamhsparks@@gmail.com} 30 | #' @autoglobal 31 | #' @export nearest_stations 32 | 33 | nearest_stations <- function(LAT, LON, distance) { 34 | # load current local copy of isd_history 35 | load(system.file("extdata", "isd_history.rda", package = "GSODR")) 36 | 37 | user_LAT <- LAT 38 | user_LON <- LON 39 | 40 | # Distance over a great circle. Reasonable approximation. 41 | # From @HughParsonage in our (now retired) {bomrang} package, 42 | # https://github.com/ropensci/bomrang/blob/master/R/internal_functions.R 43 | haversine_distance <- function(lat1, lon1, lat2, lon2) { 44 | # to radians 45 | lat1 <- lat1 * 0.01745329 # this is `pi / 180` pre calculated for efficiency 46 | lat2 <- lat2 * 0.01745329 47 | lon1 <- lon1 * 0.01745329 48 | lon2 <- lon2 * 0.01745329 49 | 50 | delta_lat <- abs(lat1 - lat2) 51 | delta_lon <- abs(lon1 - lon2) 52 | 53 | # radius of earth 54 | 6371.0 * 55 | 2.0 * 56 | asin(sqrt(`+`( 57 | (sin(delta_lat / 2.0))^2.0, 58 | cos(lat1) * cos(lat2) * (sin(delta_lon / 2.))^2. 59 | ))) 60 | } 61 | 62 | isd_history[, 63 | distance_km := round( 64 | haversine_distance( 65 | lat1 = LAT, 66 | lon1 = LON, 67 | lat2 = user_LAT, 68 | lon2 = user_LON 69 | ), 70 | 1L 71 | ) 72 | ] 73 | 74 | subset_stns <- 75 | data.table(subset( 76 | isd_history[order(distance_km)], 77 | distance_km < distance 78 | )[[1L]]) 79 | setnames(subset_stns, "V1", "STNID") 80 | 81 | return(isd_history[subset_stns, on = "STNID"]) 82 | } 83 | -------------------------------------------------------------------------------- /R/process_csv.R: -------------------------------------------------------------------------------- 1 | #' Processes GSOD Data for Use in an R Session 2 | #' 3 | #' @param x A `data.table` generated from `.download_data()` 4 | #' @param isd_history Internal metadata file for station locations 5 | #' @returns A `data.table` of well-formatted weather data 6 | #' @keywords internal 7 | #' @autoglobal 8 | #' @noRd 9 | 10 | .process_csv <- function(x, isd_history) { 11 | # Import data from the website for individual stations or tempdir() for all -- 12 | # The "STP" column is set to be character here to handle the issues with vals 13 | # over 1000 having the leading zero removed. 14 | DT <- fread( 15 | x, 16 | strip.white = TRUE, 17 | keepLeadingZeros = TRUE, 18 | colClasses = c("STP" = "character") 19 | ) 20 | 21 | # Replace 99.99 et al. with NA 22 | set(DT, i = which(DT[["PRCP"]] == "99.99"), j = "PRCP", value = NA) 23 | 24 | # Replace 999.9 with NA 25 | for (col in names(DT)[ 26 | names(DT) %in% 27 | c( 28 | "VISIB", 29 | "WDSP", 30 | "MXSPD", 31 | "GUST", 32 | "SNDP" 33 | ) 34 | ]) { 35 | set(DT, i = which(DT[[col]] == "999.9"), j = col, value = NA) 36 | } 37 | 38 | # Replace 9999.99 with NA 39 | for (col in names(DT)[ 40 | names(DT) %in% 41 | c( 42 | "TEMP", 43 | "DEWP", 44 | "SLP", 45 | "STP", 46 | "MAX", 47 | "MIN" 48 | ) 49 | ]) { 50 | set(DT, i = which(DT[[col]] == "9999.9"), j = col, value = NA) 51 | } 52 | 53 | # Replace " " with NA 54 | for (col in names(DT)[ 55 | names(DT) %in% 56 | c( 57 | "PRCP_ATTRIBUTES", 58 | "MIN_ATTRIBUTES", 59 | "MAX_ATTRIBUTES" 60 | ) 61 | ]) { 62 | set(DT, i = which(DT[[col]] == " "), j = col, value = NA) 63 | } 64 | 65 | # Add STNID col -------------------------------------------------------------- 66 | DT[, STNID := gsub("^(.{6})(.*)$", "\\1-\\2", DT$STATION)] 67 | 68 | # Correct STP values --------------------------------------------------------- 69 | # The NCEI supplied CSV files are broken, they lop off the "1" in values >1000 70 | # See https://github.com/ropensci/GSODR/issues/117 71 | DT[, 72 | STP := fifelse( 73 | startsWith(x = STP, prefix = "0"), 74 | sprintf("%s%s", 1L, DT$STP), 75 | STP, 76 | na = NA 77 | ) 78 | ] 79 | 80 | DT[, STP := fifelse(STP_ATTRIBUTES == " 0", NA, STP)] 81 | 82 | # Add and convert date related columns --------------------------------------- 83 | DT[, YEARMODA := as.Date(DATE, format = "%Y-%m-%d")] 84 | DT[, YEAR := as.integer(substr(DATE, 1L, 4L))] 85 | DT[, MONTH := as.integer(substr(DATE, 6L, 7L))] 86 | DT[, DAY := as.integer(substr(DATE, 9L, 10L))] 87 | DT[, YDAY := as.integer(strftime(as.Date(DATE), format = "%j"))] 88 | 89 | # Convert *_ATTRIBUTES cols to integer --------------------------------------- 90 | for (col in names(DT)[ 91 | names(DT) %in% 92 | c( 93 | "TEMP_ATTRIBUTES", 94 | "DEWP_ATTRIBUTES", 95 | "SLP_ATTRIBUTES", 96 | "STP_ATTRIBUTES", 97 | "VISIB_ATTRIBUTES", 98 | "WDSP_ATTRIBUTES" 99 | ) 100 | ]) { 101 | set(DT, j = col, value = as.integer(DT[[col]])) 102 | } 103 | 104 | # Drop unnecessary columns --------------------------------------------------- 105 | DT[, c("DATE", "STATION") := NULL] 106 | 107 | # Convert numeric cols to be numeric ----------------------------------------- 108 | for (col in c( 109 | "TEMP", 110 | "DEWP", 111 | "SLP", 112 | "STP", 113 | "WDSP", 114 | "MXSPD", 115 | "GUST", 116 | "VISIB", 117 | "WDSP", 118 | "MAX", 119 | "MIN", 120 | "PRCP", 121 | "SNDP" 122 | )) { 123 | set(DT, j = col, value = as.numeric(DT[[col]])) 124 | } 125 | 126 | # Convert data to Metric units ----------------------------------------------- 127 | DT[, TEMP := round(0.5556 * (TEMP - 32.0), 1L)] 128 | DT[, DEWP := round(0.5556 * (DEWP - 32.0), 1L)] 129 | DT[, WDSP := round(WDSP * 0.514444444, 1L)] 130 | DT[, MXSPD := round(MXSPD * 0.514444444, 1L)] 131 | DT[, GUST := round(GUST * 0.514444444, 1L)] 132 | DT[, VISIB := round(VISIB * 1.60934, 1L)] 133 | DT[, MAX := round((MAX - 32.0) * 0.5556, 1L)] 134 | DT[, MIN := round((MIN - 32.0) * 0.5556, 1L)] 135 | DT[, PRCP := round(PRCP * 25.4, 2L)] 136 | DT[, SNDP := round(SNDP * 25.4, 1L)] 137 | 138 | # Calculate EA, ES and RH using August-Roche-Magnus approximation ------------ 139 | # Oleg A. Alduchov and Robert E. Eskridge 1995 140 | # https://doi.org/10.1175/1520-0450(1996)035<0601:IMFAOS>2.0.CO;2 141 | # EA derived from dew point 142 | DT[, 143 | EA := round( 144 | 0.61094 * 145 | exp( 146 | (17.625 * (DEWP)) / 147 | ((DEWP) + 243.04) 148 | ), 149 | 1L 150 | ) 151 | ] 152 | # ES derived from average temperature 153 | DT[, 154 | ES := round( 155 | 0.61094 * 156 | exp( 157 | (17.625 * (TEMP)) / 158 | ((TEMP) + 243.04) 159 | ), 160 | 1L 161 | ) 162 | ] 163 | DT[, 164 | RH := round( 165 | 100L * 166 | (exp((17.625 * DEWP) / (243.04 + DEWP)) / 167 | exp((17.625 * (TEMP)) / (243.04 + (TEMP)))), 168 | 1L 169 | ) 170 | ] 171 | 172 | # Split FRSHTT into separate columns ----------------------------------------- 173 | DT[, 174 | I_FOG := fifelse( 175 | DT$FRSHTT != 0L, 176 | as.numeric(substr( 177 | x = DT$FRSHTT, 178 | start = 1L, 179 | stop = 1L 180 | )), 181 | 0L 182 | ) 183 | ] 184 | DT[, 185 | I_RAIN_DRIZZLE := fifelse( 186 | DT$FRSHTT != 0L, 187 | as.numeric(substr( 188 | x = DT$FRSHTT, 189 | start = 2L, 190 | stop = 2L 191 | )), 192 | 0L 193 | ) 194 | ] 195 | DT[, 196 | I_SNOW_ICE := fifelse( 197 | DT$FRSHTT != 0L, 198 | as.numeric(substr( 199 | x = DT$FRSHTT, 200 | start = 3L, 201 | stop = 3L 202 | )), 203 | 0L 204 | ) 205 | ] 206 | DT[, 207 | I_HAIL := fifelse( 208 | DT$FRSHTT != 0L, 209 | as.numeric(substr( 210 | x = DT$FRSHTT, 211 | start = 4L, 212 | stop = 4L 213 | )), 214 | 0L 215 | ) 216 | ] 217 | DT[, 218 | I_THUNDER := fifelse( 219 | DT$FRSHTT != 0L, 220 | as.numeric(substr( 221 | DT$FRSHTT, 222 | start = 5L, 223 | stop = 5L 224 | )), 225 | 0L 226 | ) 227 | ] 228 | DT[, 229 | I_TORNADO_FUNNEL := fifelse( 230 | DT$FRSHTT != 0L, 231 | as.numeric(substr( 232 | x = DT$FRSHTT, 233 | start = 6L, 234 | stop = 6L 235 | )), 236 | 0L 237 | ) 238 | ] 239 | DT[, FRSHTT := NULL] 240 | 241 | # Join with internal isd-history for CTRY column ----------------------------- 242 | setkey(DT, STNID) 243 | DT <- isd_history[DT] 244 | 245 | # drop extra cols 246 | DT[, c("i.NAME", "LATITUDE", "LONGITUDE", "ELEV(M)") := NULL] 247 | setnames(DT, c("LAT", "LON"), c("LATITUDE", "LONGITUDE")) 248 | 249 | # setcolorder ---------------------------------------------------------------- 250 | setcolorder( 251 | DT, 252 | c( 253 | "STNID", 254 | "NAME", 255 | "CTRY", 256 | "COUNTRY_NAME", 257 | "ISO2C", 258 | "ISO3C", 259 | "STATE", 260 | "LATITUDE", 261 | "LONGITUDE", 262 | "ELEVATION", 263 | "BEGIN", 264 | "END", 265 | "YEARMODA", 266 | "YEAR", 267 | "MONTH", 268 | "DAY", 269 | "YDAY", 270 | "TEMP", 271 | "TEMP_ATTRIBUTES", 272 | "DEWP", 273 | "DEWP_ATTRIBUTES", 274 | "SLP", 275 | "SLP_ATTRIBUTES", 276 | "STP", 277 | "STP_ATTRIBUTES", 278 | "VISIB", 279 | "VISIB_ATTRIBUTES", 280 | "WDSP", 281 | "WDSP_ATTRIBUTES", 282 | "MXSPD", 283 | "GUST", 284 | "MAX", 285 | "MAX_ATTRIBUTES", 286 | "MIN", 287 | "MIN_ATTRIBUTES", 288 | "PRCP", 289 | "PRCP_ATTRIBUTES", 290 | "SNDP", 291 | "I_FOG", 292 | "I_RAIN_DRIZZLE", 293 | "I_SNOW_ICE", 294 | "I_HAIL", 295 | "I_THUNDER", 296 | "I_TORNADO_FUNNEL", 297 | "EA", 298 | "ES", 299 | "RH" 300 | ) 301 | ) 302 | return(DT) 303 | } 304 | -------------------------------------------------------------------------------- /R/reformat_GSOD.R: -------------------------------------------------------------------------------- 1 | #' Tidy and Return a data.table Object of GSOD Data From Local Storage 2 | #' 3 | #' This function automates cleaning and reformatting of \acronym{GSOD} station 4 | #' files in\cr \dQuote{YEAR.tar.gz}, provided that they have been untarred or 5 | #' \dQuote{STATION.csv} format that have been downloaded from the United States 6 | #' National Center for Environmental Information's (\acronym{NCEI}) 7 | #' download page. Three additional useful elements: saturation vapour pressure 8 | #' (es), actual vapour pressure (ea) and relative humidity (RH) are calculated 9 | #' and returned in the final data frame using the improved August-Roche-Magnus 10 | #' approximation (Alduchov and Eskridge 1996). All units are converted to 11 | #' International System of Units (SI), *e.g.*, Fahrenheit to Celsius and 12 | #' inches to millimetres. 13 | #' 14 | #' @param dsn User supplied full file path to location of data files on local 15 | #' disk for tidying. 16 | #' @param file_list User supplied list of file paths to individual files of data 17 | #' on local disk for tidying. Ignored if `dsn` is set. Use if there are other 18 | #' files in the `dsn` that you do not wish to reformat. 19 | #' 20 | #' @details 21 | #' 22 | #' If multiple stations are given, data are summarised for each year by station, 23 | #' which include vapour pressure and relative humidity elements calculated from 24 | #' existing data in \acronym{GSOD}. Else, a single station is tidied and a data 25 | #' frame is returned. 26 | #' 27 | #' All missing values in resulting files are represented as `NA` regardless 28 | #' of which field they occur in. 29 | #' 30 | #' Only station files in the original \dQuote{csv} file format are supported by 31 | #' this function. If you have downloaded the full annual (\dQuote{YYYY.tar.gz}) 32 | #' file you will need to extract the individual station files from the tar file 33 | #' first to use this function. 34 | #' 35 | #' Note that [reformat_GSOD()] will attempt to reformat any \dQuote{.csv} 36 | #' files found in the `dsn` that you provide. If there are non-\acronym{GSOD} 37 | #' files present this will lead to errors. 38 | #' 39 | #' For a complete list of the fields and description of the contents and units, 40 | #' please refer to Appendix 1 in the \CRANpkg{GSODR} vignette, 41 | #' \code{vignette("GSODR", package = "GSODR")}. 42 | #' 43 | #' @note While \CRANpkg{GSODR} does not distribute \acronym{GSOD} weather data, 44 | #' users of the data should note the conditions that the U.S. \acronym{NCEI} 45 | #' places upon the \acronym{GSOD} data. 46 | #' \dQuote{The following data and products may have conditions placed on their 47 | #' international commercial use. They can be used within the U.S. or for non- 48 | #' commercial international activities without restriction. The non-U.S. data 49 | #' cannot be redistributed for commercial purposes. Re-distribution of these 50 | #' data by others must provide this same notification. A log of IP addresses 51 | #' accessing these data and products will be maintained and may be made 52 | #' available to data providers.} 53 | #' 54 | #' @seealso 55 | #' For automated downloading and tidying see the [get_GSOD()] function, which 56 | #' provides expanded functionality for automatically downloading and expanding 57 | #' annual \acronym{GSOD} files and cleaning station files. 58 | #' 59 | #' @section References: 60 | #' 61 | #' Alduchov, O.A. and Eskridge, R.E., 1996. Improved Magnus form approximation 62 | #' of saturation vapor pressure. Journal of Applied Meteorology and Climatology, 63 | #' 35(4), pp.601-609. DOI: 64 | #' <10.1175%2F1520-0450%281996%29035%3C0601%3AIMFAOS%3E2.0.CO%3B2>. 65 | #' 66 | #' @examplesIf interactive() 67 | #' 68 | #' # Download data to 'tempdir()' 69 | #' download.file( 70 | #' url = 71 | #' "https://www.ncei.noaa.gov/data/global-summary-of-the-day/access/2010/95551099999.csv", 72 | #' destfile = file.path(tempdir(), "95551099999.csv"), 73 | #' mode = "wb" 74 | #' ) 75 | #' 76 | #' # Reformat station data files in R's tempdir() directory 77 | #' tbar <- reformat_GSOD(dsn = tempdir()) 78 | #' 79 | #' tbar 80 | #' 81 | #' @author Adam H. Sparks, \email{adamhsparks@@gmail.com} 82 | #' 83 | #' @returns A data frame as a [data.table::data.table] object of 84 | #' \acronym{GSOD} data. 85 | #' @seealso [get_GSOD()] 86 | #' @autoglobal 87 | #' @export reformat_GSOD 88 | 89 | reformat_GSOD <- function(dsn = NULL, file_list = NULL) { 90 | load(system.file("extdata", "isd_history.rda", package = "GSODR")) # nocov 91 | 92 | # If both dsn and file_path are set, emit message that only dsn is used 93 | if (!is.null(dsn) & !is.null(file_list)) { 94 | message( 95 | "\nYou have specified both `file_list` and `dsn`.\n", 96 | "Proceeding with using only the value from `dsn`.\n", 97 | "See `?reformat_GSOD` if this behaviour was not expected.\n" 98 | ) 99 | } 100 | 101 | # If dsn !NULL, create a list of files to reformat 102 | if (!is.null(dsn)) { 103 | file_list <- list.files( 104 | path = dsn, 105 | pattern = "^.*\\.csv$", 106 | full.names = TRUE 107 | ) 108 | if (length(file_list) == 0L) { 109 | stop("No files were found, please check your file location.") 110 | } 111 | } 112 | GSOD_XY <- .apply_process_csv(file_list, isd_history) 113 | return(GSOD_XY) 114 | } 115 | -------------------------------------------------------------------------------- /R/update_station_list.R: -------------------------------------------------------------------------------- 1 | #' Download Latest isd-history.csv File and Update an Internal Database 2 | #' 3 | #' This function downloads the latest station list (isd-history.csv) from the 4 | #' \acronym{NCEI} server and updates the data distributed with \CRANpkg{GSODR} 5 | #' to the latest stations available. These data provide unique identifiers, 6 | #' country, state (if in U.S.) and when weather observations begin and end. 7 | #' 8 | #' Care should be taken when using this function if reproducibility is necessary 9 | #' as different machines with the same version of \CRANpkg{GSODR} can end up 10 | #' with different versions of the 'isd_history.csv' file internally. 11 | #' 12 | #' There is no need to use this unless you know that a station exists in the 13 | #' isd_history.csv file that is not available in the self-contained 14 | #' database distributed with \CRANpkg{GSODR}. 15 | #' 16 | #' To directly access these data, use: \cr 17 | #' `load(system.file("extdata", "isd_history.rda", package = "GSODR"))` 18 | #' 19 | #' To see the latest version available from the \acronym{NCEI} server, please 20 | #' refer to [get_isd_history()]. 21 | #' 22 | #' @examples 23 | #' \dontrun{ 24 | #' update_station_list() 25 | #' } 26 | #' 27 | #' @seealso [get_isd_history()] 28 | #' @author Adam H. Sparks, \email{adamhsparks@@gmail.com} 29 | #' @autoglobal 30 | 31 | update_station_list <- function() { 32 | message( 33 | "This will overwrite GSODR's current internal list of GSOD stations. \n", 34 | "If reproducibility is necessary, you may not wish to proceed. \n", 35 | "Do you understand and wish to proceed (Y/n)?\n" 36 | ) 37 | 38 | answer <- 39 | readLines(con = getOption("GSODR_connection"), n = 1L) 40 | 41 | answer <- toupper(answer) 42 | 43 | if (answer != "Y" & answer != "YES") { 44 | stop("Station list was not updated.", call. = FALSE) 45 | } 46 | 47 | isd_history <- get_isd_history() 48 | 49 | # write rda file to disk for use with GSODR package 50 | fname <- 51 | system.file("extdata", "isd_history.rda", package = "GSODR") 52 | save(isd_history, file = fname, compress = "bzip2") 53 | } 54 | -------------------------------------------------------------------------------- /R/zzz.R: -------------------------------------------------------------------------------- 1 | .onAttach <- function(libname, pkgname) { 2 | options(GSODR_connection = stdin()) 3 | } 4 | -------------------------------------------------------------------------------- /air.toml: -------------------------------------------------------------------------------- 1 | [format] 2 | line-width = 80 3 | indent-width = 2 4 | indent-style = "space" 5 | line-ending = "auto" 6 | persistent-line-breaks = true 7 | exclude = [] 8 | default-exclude = true 9 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | comment: false 2 | -------------------------------------------------------------------------------- /codemeta.json: -------------------------------------------------------------------------------- 1 | { 2 | "@context": "https://doi.org/10.5063/schema/codemeta-2.0", 3 | "@type": "SoftwareSourceCode", 4 | "identifier": "GSODR", 5 | "description": "Provides automated downloading, parsing, cleaning, unit conversion and formatting of Global Surface Summary of the Day ('GSOD') weather data from the from the USA National Centers for Environmental Information ('NCEI'). Units are converted from from United States Customary System ('USCS') units to International System of Units ('SI'). Stations may be individually checked for number of missing days defined by the user, where stations with too many missing observations are omitted. Only stations with valid reported latitude and longitude values are permitted in the final data. Additional useful elements, saturation vapour pressure ('es'), actual vapour pressure ('ea') and relative humidity ('RH') are calculated from the original data using the improved August-Roche-Magnus approximation (Alduchov & Eskridge 1996) and included in the final data set. The resulting metadata include station identification information, country, state, latitude, longitude, elevation, weather observations and associated flags. For information on the 'GSOD' data from 'NCEI', please see the 'GSOD' 'readme.txt' file available from, .", 6 | "name": "GSODR: Global Surface Summary of the Day ('GSOD') Weather Data Client", 7 | "relatedLink": ["https://docs.ropensci.org/GSODR/", "https://CRAN.R-project.org/package=GSODR"], 8 | "codeRepository": "https://github.com/ropensci/GSODR", 9 | "issueTracker": "https://github.com/ropensci/GSODR/issues", 10 | "license": "https://spdx.org/licenses/MIT", 11 | "version": "4.1.3.9000", 12 | "programmingLanguage": { 13 | "@type": "ComputerLanguage", 14 | "name": "R", 15 | "url": "https://r-project.org" 16 | }, 17 | "runtimePlatform": "R version 4.5.0 (2025-04-11)", 18 | "provider": { 19 | "@id": "https://cran.r-project.org", 20 | "@type": "Organization", 21 | "name": "Comprehensive R Archive Network (CRAN)", 22 | "url": "https://cran.r-project.org" 23 | }, 24 | "author": [ 25 | { 26 | "@type": "Person", 27 | "givenName": "Adam H.", 28 | "familyName": "Sparks", 29 | "email": "adamhsparks@gmail.com", 30 | "@id": "https://orcid.org/0000-0002-0061-8359" 31 | }, 32 | { 33 | "@type": "Person", 34 | "givenName": "Tomislav", 35 | "familyName": "Hengl", 36 | "email": "tom.hengl@isric.org", 37 | "@id": "https://orcid.org/0000-0002-9921-5129" 38 | }, 39 | { 40 | "@type": "Person", 41 | "givenName": "Andrew", 42 | "familyName": "Nelson", 43 | "email": "dr.andy.nelson@gmail.com", 44 | "@id": "https://orcid.org/0000-0002-7249-3778" 45 | } 46 | ], 47 | "contributor": [ 48 | { 49 | "@type": "Person", 50 | "givenName": "Hugh", 51 | "familyName": "Parsonage", 52 | "email": "hugh.parsonage@gmail.com", 53 | "@id": "https://orcid.org/0000-0003-4055-0835" 54 | }, 55 | { 56 | "@type": "Person", 57 | "givenName": "Taras", 58 | "familyName": "Kaduk", 59 | "email": "taras.kaduk@gmail.com" 60 | }, 61 | { 62 | "@type": "Person", 63 | "givenName": "Gwenael", 64 | "familyName": "Giboire", 65 | "email": "gwenael.giboire@oda-groupe.com" 66 | }, 67 | { 68 | "@type": "Person", 69 | "givenName": "Łukasz", 70 | "familyName": "Pawlik", 71 | "email": "lukpawlik@gmail.com" 72 | }, 73 | { 74 | "@type": "Person", 75 | "givenName": "Ross", 76 | "familyName": "Darnell", 77 | "email": "Ross.Darnell@data61.csiro.au", 78 | "@id": "https://orcid.org/0000-0002-7973-6322" 79 | }, 80 | { 81 | "@type": "Person", 82 | "givenName": "Tyler", 83 | "familyName": "Widdison", 84 | "email": "Tyler.Widdison@usav.org" 85 | } 86 | ], 87 | "copyrightHolder": [ 88 | { 89 | "@type": "Person", 90 | "givenName": "Hugh", 91 | "familyName": "Parsonage", 92 | "email": "hugh.parsonage@gmail.com", 93 | "@id": "https://orcid.org/0000-0003-4055-0835" 94 | }, 95 | { 96 | "@type": "Organization", 97 | "name": "Curtin University" 98 | } 99 | ], 100 | "maintainer": [ 101 | { 102 | "@type": "Person", 103 | "givenName": "Adam H.", 104 | "familyName": "Sparks", 105 | "email": "adamhsparks@gmail.com", 106 | "@id": "https://orcid.org/0000-0002-0061-8359" 107 | } 108 | ], 109 | "softwareSuggestions": [ 110 | { 111 | "@type": "SoftwareApplication", 112 | "identifier": "bit64", 113 | "name": "bit64", 114 | "provider": { 115 | "@id": "https://cran.r-project.org", 116 | "@type": "Organization", 117 | "name": "Comprehensive R Archive Network (CRAN)", 118 | "url": "https://cran.r-project.org" 119 | }, 120 | "sameAs": "https://CRAN.R-project.org/package=bit64" 121 | }, 122 | { 123 | "@type": "SoftwareApplication", 124 | "identifier": "dplyr", 125 | "name": "dplyr", 126 | "provider": { 127 | "@id": "https://cran.r-project.org", 128 | "@type": "Organization", 129 | "name": "Comprehensive R Archive Network (CRAN)", 130 | "url": "https://cran.r-project.org" 131 | }, 132 | "sameAs": "https://CRAN.R-project.org/package=dplyr" 133 | }, 134 | { 135 | "@type": "SoftwareApplication", 136 | "identifier": "ggplot2", 137 | "name": "ggplot2", 138 | "provider": { 139 | "@id": "https://cran.r-project.org", 140 | "@type": "Organization", 141 | "name": "Comprehensive R Archive Network (CRAN)", 142 | "url": "https://cran.r-project.org" 143 | }, 144 | "sameAs": "https://CRAN.R-project.org/package=ggplot2" 145 | }, 146 | { 147 | "@type": "SoftwareApplication", 148 | "identifier": "gridExtra", 149 | "name": "gridExtra", 150 | "provider": { 151 | "@id": "https://cran.r-project.org", 152 | "@type": "Organization", 153 | "name": "Comprehensive R Archive Network (CRAN)", 154 | "url": "https://cran.r-project.org" 155 | }, 156 | "sameAs": "https://CRAN.R-project.org/package=gridExtra" 157 | }, 158 | { 159 | "@type": "SoftwareApplication", 160 | "identifier": "knitr", 161 | "name": "knitr", 162 | "provider": { 163 | "@id": "https://cran.r-project.org", 164 | "@type": "Organization", 165 | "name": "Comprehensive R Archive Network (CRAN)", 166 | "url": "https://cran.r-project.org" 167 | }, 168 | "sameAs": "https://CRAN.R-project.org/package=knitr" 169 | }, 170 | { 171 | "@type": "SoftwareApplication", 172 | "identifier": "mapproj", 173 | "name": "mapproj", 174 | "provider": { 175 | "@id": "https://cran.r-project.org", 176 | "@type": "Organization", 177 | "name": "Comprehensive R Archive Network (CRAN)", 178 | "url": "https://cran.r-project.org" 179 | }, 180 | "sameAs": "https://CRAN.R-project.org/package=mapproj" 181 | }, 182 | { 183 | "@type": "SoftwareApplication", 184 | "identifier": "maps", 185 | "name": "maps", 186 | "provider": { 187 | "@id": "https://cran.r-project.org", 188 | "@type": "Organization", 189 | "name": "Comprehensive R Archive Network (CRAN)", 190 | "url": "https://cran.r-project.org" 191 | }, 192 | "sameAs": "https://CRAN.R-project.org/package=maps" 193 | }, 194 | { 195 | "@type": "SoftwareApplication", 196 | "identifier": "rmarkdown", 197 | "name": "rmarkdown", 198 | "provider": { 199 | "@id": "https://cran.r-project.org", 200 | "@type": "Organization", 201 | "name": "Comprehensive R Archive Network (CRAN)", 202 | "url": "https://cran.r-project.org" 203 | }, 204 | "sameAs": "https://CRAN.R-project.org/package=rmarkdown" 205 | }, 206 | { 207 | "@type": "SoftwareApplication", 208 | "identifier": "roxyglobals", 209 | "name": "roxyglobals", 210 | "provider": { 211 | "@id": "https://cran.r-project.org", 212 | "@type": "Organization", 213 | "name": "Comprehensive R Archive Network (CRAN)", 214 | "url": "https://cran.r-project.org" 215 | }, 216 | "sameAs": "https://CRAN.R-project.org/package=roxyglobals" 217 | }, 218 | { 219 | "@type": "SoftwareApplication", 220 | "identifier": "spelling", 221 | "name": "spelling", 222 | "provider": { 223 | "@id": "https://cran.r-project.org", 224 | "@type": "Organization", 225 | "name": "Comprehensive R Archive Network (CRAN)", 226 | "url": "https://cran.r-project.org" 227 | }, 228 | "sameAs": "https://CRAN.R-project.org/package=spelling" 229 | }, 230 | { 231 | "@type": "SoftwareApplication", 232 | "identifier": "testthat", 233 | "name": "testthat", 234 | "provider": { 235 | "@id": "https://cran.r-project.org", 236 | "@type": "Organization", 237 | "name": "Comprehensive R Archive Network (CRAN)", 238 | "url": "https://cran.r-project.org" 239 | }, 240 | "sameAs": "https://CRAN.R-project.org/package=testthat" 241 | }, 242 | { 243 | "@type": "SoftwareApplication", 244 | "identifier": "tidyr", 245 | "name": "tidyr", 246 | "provider": { 247 | "@id": "https://cran.r-project.org", 248 | "@type": "Organization", 249 | "name": "Comprehensive R Archive Network (CRAN)", 250 | "url": "https://cran.r-project.org" 251 | }, 252 | "sameAs": "https://CRAN.R-project.org/package=tidyr" 253 | } 254 | ], 255 | "softwareRequirements": { 256 | "1": { 257 | "@type": "SoftwareApplication", 258 | "identifier": "R", 259 | "name": "R", 260 | "version": ">= 3.5.0" 261 | }, 262 | "2": { 263 | "@type": "SoftwareApplication", 264 | "identifier": "countrycode", 265 | "name": "countrycode", 266 | "provider": { 267 | "@id": "https://cran.r-project.org", 268 | "@type": "Organization", 269 | "name": "Comprehensive R Archive Network (CRAN)", 270 | "url": "https://cran.r-project.org" 271 | }, 272 | "sameAs": "https://CRAN.R-project.org/package=countrycode" 273 | }, 274 | "3": { 275 | "@type": "SoftwareApplication", 276 | "identifier": "curl", 277 | "name": "curl", 278 | "provider": { 279 | "@id": "https://cran.r-project.org", 280 | "@type": "Organization", 281 | "name": "Comprehensive R Archive Network (CRAN)", 282 | "url": "https://cran.r-project.org" 283 | }, 284 | "sameAs": "https://CRAN.R-project.org/package=curl" 285 | }, 286 | "4": { 287 | "@type": "SoftwareApplication", 288 | "identifier": "data.table", 289 | "name": "data.table", 290 | "version": ">= 1.15.4", 291 | "provider": { 292 | "@id": "https://cran.r-project.org", 293 | "@type": "Organization", 294 | "name": "Comprehensive R Archive Network (CRAN)", 295 | "url": "https://cran.r-project.org" 296 | }, 297 | "sameAs": "https://CRAN.R-project.org/package=data.table" 298 | }, 299 | "5": { 300 | "@type": "SoftwareApplication", 301 | "identifier": "R.utils", 302 | "name": "R.utils", 303 | "provider": { 304 | "@id": "https://cran.r-project.org", 305 | "@type": "Organization", 306 | "name": "Comprehensive R Archive Network (CRAN)", 307 | "url": "https://cran.r-project.org" 308 | }, 309 | "sameAs": "https://CRAN.R-project.org/package=R.utils" 310 | }, 311 | "6": { 312 | "@type": "SoftwareApplication", 313 | "identifier": "stats", 314 | "name": "stats" 315 | }, 316 | "7": { 317 | "@type": "SoftwareApplication", 318 | "identifier": "utils", 319 | "name": "utils" 320 | }, 321 | "8": { 322 | "@type": "SoftwareApplication", 323 | "identifier": "withr", 324 | "name": "withr", 325 | "provider": { 326 | "@id": "https://cran.r-project.org", 327 | "@type": "Organization", 328 | "name": "Comprehensive R Archive Network (CRAN)", 329 | "url": "https://cran.r-project.org" 330 | }, 331 | "sameAs": "https://CRAN.R-project.org/package=withr" 332 | }, 333 | "SystemRequirements": null 334 | }, 335 | "applicationCategory": "Tools", 336 | "isPartOf": "https://ropensci.org", 337 | "keywords": ["US-NCEI", "meteorological-data", "global-weather", "weather", "weather-data", "meteorology", "station-data", "surface-weather", "data-access", "US-NCDC", "r", "gsod", "ncdc", "weather-stations", "global-data", "ncei", "weather-information", "historical-weather", "historical-data", "daily-data", "daily-weather", "rstats", "r-package"], 338 | "fileSize": "4366.75KB", 339 | "citation": [ 340 | { 341 | "@type": "SoftwareSourceCode", 342 | "author": [ 343 | { 344 | "@type": "Person", 345 | "givenName": "Adam H.", 346 | "familyName": "Sparks" 347 | }, 348 | { 349 | "@type": "Person", 350 | "givenName": "Tomislav", 351 | "familyName": "Hengl" 352 | }, 353 | { 354 | "@type": "Person", 355 | "givenName": "Andrew", 356 | "familyName": "Nelson" 357 | } 358 | ], 359 | "name": "{GSODR}: Global Summary Daily Weather Data in R", 360 | "identifier": "10.5281/zenodo.1040727", 361 | "url": "https://CRAN.R-project.org/package=nasapower", 362 | "description": "R package version 4.1.3.9000", 363 | "@id": "https://doi.org/10.5281/zenodo.1040727", 364 | "sameAs": "https://doi.org/10.5281/zenodo.1040727" 365 | }, 366 | { 367 | "@type": "ScholarlyArticle", 368 | "datePublished": "2018", 369 | "author": [ 370 | { 371 | "@type": "Person", 372 | "givenName": "Adam H.", 373 | "familyName": "Sparks" 374 | } 375 | ], 376 | "name": "nasapower: A NASA POWER Global Meteorology, Surface Solar Energy and Climatology Data Client for R", 377 | "identifier": "10.21105/joss.01035", 378 | "pagination": "1035", 379 | "@id": "https://doi.org/10.21105/joss.01035", 380 | "sameAs": "https://doi.org/10.21105/joss.01035", 381 | "isPartOf": { 382 | "@type": "PublicationIssue", 383 | "issueNumber": "30", 384 | "datePublished": "2018", 385 | "isPartOf": { 386 | "@type": ["PublicationVolume", "Periodical"], 387 | "volumeNumber": "3", 388 | "name": "The Journal of Open Source Software" 389 | } 390 | } 391 | } 392 | ], 393 | "releaseNotes": "https://github.com/ropensci/GSODR/blob/master/NEWS.md", 394 | "readme": "https://github.com/ropensci/GSODR/blob/main/README.md", 395 | "contIntegration": ["https://github.com/ropensci/GSODR/actions/workflows/R-CMD-check.yaml", "https://app.codecov.io/gh/ropensci/GSODR"], 396 | "developmentStatus": "https://www.repostatus.org/#active", 397 | "review": { 398 | "@type": "Review", 399 | "url": "https://github.com/ropensci/software-review/issues/79", 400 | "provider": "https://ropensci.org" 401 | } 402 | } 403 | -------------------------------------------------------------------------------- /cran-comments.md: -------------------------------------------------------------------------------- 1 | # R CMD check results 2 | 3 | 0 errors | 0 warnings | 1 note 4 | 5 | - This is a new patch release that fixes issues with tests on CRAN servers. 6 | -------------------------------------------------------------------------------- /data-raw/README.md: -------------------------------------------------------------------------------- 1 | 2 | # GSODR data-raw contents 3 | 4 | ## Fetch isd-history 5 | 6 | This document details how the GSOD station history data file, 7 | ["isd-history.csv"](https://www1.ncdc.noaa.gov/pub/data/noaa/isd-history.csv), 8 | is fetched from the NCEI server and saved for inclusion in the _GSODR_ 9 | package in /data/isd_history.rda. These data are used for determining the years 10 | that a station reported data for filtering user requests before sending them to 11 | the server to reduce failed requests. 12 | 13 | [fetch_isd-history.md](fetch_isd-history.md) 14 | -------------------------------------------------------------------------------- /data-raw/fetch_isd-history.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Fetch and Clean 'isd_history.csv' File" 3 | author: "Adam H. Sparks" 4 | date: "`r Sys.Date()`" 5 | output: github_document 6 | --- 7 | 8 | ```{r setup, include=FALSE} 9 | knitr::opts_chunk$set(echo = TRUE) 10 | ``` 11 | 12 | ```{r color, echo = FALSE, results='asis'} 13 | # crayon needs to be explicitly activated in Rmd 14 | options(crayon.enabled = TRUE) 15 | # Hooks needs to be set to deal with outputs 16 | # thanks to fansi logic 17 | old_hooks <- fansi::set_knit_hooks(knitr::knit_hooks, 18 | which = c("output", "message", "error")) 19 | ``` 20 | 21 | # Introduction 22 | 23 | The "isd_history.csv" file details GSOD station metadata. 24 | These data include the start and stop years used by {GSODR} to pre-check requests before querying the server for download and the country code used by {GSODR} when sub-setting for requests by country. 25 | The following checks are performed on the raw data file before inclusion in {GSODR}, 26 | 27 | - Check for valid lon and lat values; 28 | 29 | - isd_history where latitude or longitude are `NA` or both 0 are removed leaving only properly georeferenced stations, 30 | 31 | - isd_history where latitude is < -90˚ or > 90˚ are removed, 32 | 33 | - isd_history where longitude is < -180˚ or > 180˚ are removed. 34 | 35 | - A new field, STNID, a concatenation of the USAF and WBAN fields, is added. 36 | 37 | # Data Processing 38 | 39 | ## Set up workspace 40 | 41 | ```{r load_libs, echo=TRUE, message=FALSE, output=FALSE, warning=FALSE} 42 | library("sessioninfo") 43 | library("skimr") 44 | library("countrycode") 45 | library("data.table") 46 | ``` 47 | 48 | ## Download and clean data 49 | 50 | ```{r download_NE_data, echo=TRUE, message=FALSE, warning=FALSE} 51 | # download data 52 | new_isd_history <- fread("https://www1.ncdc.noaa.gov/pub/data/noaa/isd-history.csv") 53 | ``` 54 | 55 | ## Add/drop columns and save to disk 56 | 57 | ```{r clean_and_reformat, echo=TRUE, cache=FALSE} 58 | # pad WBAN where necessary 59 | new_isd_history[, WBAN := sprintf("%05d", WBAN)] 60 | 61 | # add STNID column 62 | new_isd_history[, STNID := paste(USAF, WBAN, sep = "-")] 63 | setcolorder(new_isd_history, "STNID") 64 | setnames(new_isd_history, "STATION NAME", "NAME") 65 | 66 | # remove stations where LAT or LON is NA 67 | new_isd_history <- na.omit(new_isd_history, cols = c("LAT", "LON")) 68 | 69 | # remove extra columns 70 | new_isd_history[, c("USAF", "WBAN", "ICAO") := NULL] 71 | ``` 72 | 73 | ## Add country names based on FIPS 74 | 75 | ```{r merge-country, eval=TRUE, message=FALSE} 76 | new_isd_history <- 77 | new_isd_history[setDT(countrycode::codelist), on = c("CTRY" = "fips")] 78 | 79 | new_isd_history <- new_isd_history[, c( 80 | "STNID", 81 | "NAME", 82 | "LAT", 83 | "LON", 84 | "ELEV(M)", 85 | "CTRY", 86 | "STATE", 87 | "BEGIN", 88 | "END", 89 | "country.name.en", 90 | "iso2c", 91 | "iso3c" 92 | )] 93 | 94 | # clean data 95 | new_isd_history[new_isd_history == -999] <- NA 96 | new_isd_history[new_isd_history == -999.9] <- NA 97 | new_isd_history <- 98 | new_isd_history[!is.na(new_isd_history$LAT) & 99 | !is.na(new_isd_history$LON),] 100 | new_isd_history <- 101 | new_isd_history[new_isd_history$LAT != 0 & 102 | new_isd_history$LON != 0,] 103 | new_isd_history <- 104 | new_isd_history[new_isd_history$LAT > -90 & 105 | new_isd_history$LAT < 90,] 106 | new_isd_history <- 107 | new_isd_history[new_isd_history$LON > -180 & 108 | new_isd_history$LON < 180,] 109 | 110 | # set colnames to upper case 111 | names(new_isd_history) <- toupper(names(new_isd_history)) 112 | setnames(new_isd_history, 113 | old = "COUNTRY.NAME.EN", 114 | new = "COUNTRY_NAME") 115 | 116 | # set country names to be upper case for easier internal verifications 117 | new_isd_history[, COUNTRY_NAME := toupper(COUNTRY_NAME)] 118 | 119 | # set key for joins when processing CSV files 120 | setkeyv(new_isd_history, "STNID") 121 | ``` 122 | 123 | ## Show changes from last release 124 | 125 | ```{r diff-codes} 126 | # ensure we aren't using a locally installed dev version 127 | install.packages("GSODR", repos = "https://cloud.r-project.org/") 128 | load(system.file("extdata", "isd_history.rda", package = "GSODR")) 129 | 130 | # select only the cols of interest 131 | x <- names(isd_history) 132 | new_isd_history <- new_isd_history[, ..x] 133 | 134 | (isd_diff <- diffobj::diffPrint(new_isd_history, isd_history)) 135 | 136 | rm(isd_history) 137 | 138 | isd_history <- new_isd_history 139 | ``` 140 | 141 | ## View and save the data 142 | 143 | ```{r view-and-save} 144 | str(isd_history) 145 | 146 | # write rda file to disk for use with GSODR package 147 | save(isd_history, 148 | file = "../inst/extdata/isd_history.rda", 149 | compress = "bzip2") 150 | 151 | save(isd_diff, 152 | file = "../inst/extdata/isd_diff.rda", 153 | compress = "bzip2") 154 | ``` 155 | 156 | # Notes 157 | 158 | ## NOAA policy 159 | 160 | Users of these data should take into account the following (from the [NCEI website](https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.ncdc:C00516)): 161 | 162 | > The following data and products may have conditions placed on their international commercial use. They can be used within the U.S. or for non-commercial international activities without restriction. The non-U.S. data cannot be redistributed for commercial purposes. Re-distribution of these data by others must provide this same notification. A log of IP addresses accessing these data and products will be maintained and may be made available to data providers. 163 | > For details, please consult: [WMO Resolution 40. NOAA Policy](https://community.wmo.int/resolution-40) 164 | 165 | ## R System Information 166 | 167 | ```{r system_information, echo=FALSE} 168 | session_info() 169 | ``` 170 | -------------------------------------------------------------------------------- /flir/config.yml: -------------------------------------------------------------------------------- 1 | keep: 2 | - any_duplicated 3 | - any_is_na 4 | - class_equals 5 | - condition_message 6 | - double_assignment 7 | - duplicate_argument 8 | - empty_assignment 9 | - equal_assignment 10 | - equals_na 11 | - expect_comparison 12 | - expect_identical 13 | - expect_length 14 | - expect_named 15 | - expect_not 16 | - expect_null 17 | - expect_true_false 18 | - expect_type 19 | - for_loop_index 20 | - function_return 21 | - implicit_assignment 22 | - is_numeric 23 | - length_levels 24 | - length_test 25 | - lengths 26 | - library_call 27 | - list_comparison 28 | - literal_coercion 29 | - matrix_apply 30 | - missing_argument 31 | - nested_ifelse 32 | - numeric_leading_zero 33 | - outer_negation 34 | - package_hooks 35 | - paste 36 | - redundant_equals 37 | - redundant_ifelse 38 | - rep_len 39 | - right_assignment 40 | - sample_int 41 | - semicolon 42 | - seq 43 | - sort 44 | - stopifnot_all 45 | - T_and_F_symbol 46 | - todo_comment 47 | - undesirable_function 48 | - undesirable_operator 49 | - unnecessary_nesting 50 | - unreachable_code 51 | - which_grepl 52 | -------------------------------------------------------------------------------- /flir/rules/builtin/T_and_F_symbol.yml: -------------------------------------------------------------------------------- 1 | id: true_false_symbol 2 | language: r 3 | severity: warning 4 | rule: 5 | pattern: T 6 | kind: identifier 7 | not: 8 | any: 9 | - precedes: 10 | any: 11 | - pattern: <- 12 | - pattern: = 13 | - regex: ^~$ 14 | - follows: 15 | any: 16 | - pattern: $ 17 | - regex: ^~$ 18 | - inside: 19 | any: 20 | - kind: parameter 21 | - kind: call 22 | - kind: binary_operator 23 | follows: 24 | regex: ^~$ 25 | stopBy: end 26 | stopBy: 27 | kind: 28 | argument 29 | fix: TRUE 30 | message: Use TRUE instead of the symbol T. 31 | 32 | --- 33 | 34 | id: true_false_symbol-2 35 | language: r 36 | severity: warning 37 | rule: 38 | pattern: F 39 | kind: identifier 40 | not: 41 | any: 42 | - precedes: 43 | any: 44 | - pattern: <- 45 | - pattern: = 46 | - regex: ^~$ 47 | - follows: 48 | any: 49 | - pattern: $ 50 | - regex: ^~$ 51 | - inside: 52 | any: 53 | - kind: parameter 54 | - kind: call 55 | - kind: binary_operator 56 | follows: 57 | regex: ^~$ 58 | stopBy: end 59 | stopBy: 60 | kind: 61 | argument 62 | fix: FALSE 63 | message: Use FALSE instead of the symbol F. 64 | 65 | --- 66 | 67 | id: true_false_symbol-3 68 | language: r 69 | severity: warning 70 | rule: 71 | pattern: T 72 | kind: identifier 73 | precedes: 74 | any: 75 | - pattern: <- 76 | - pattern: = 77 | not: 78 | inside: 79 | kind: argument 80 | message: Don't use T as a variable name, as it can break code relying on T being TRUE. 81 | 82 | --- 83 | 84 | id: true_false_symbol-4 85 | language: r 86 | severity: warning 87 | rule: 88 | pattern: F 89 | kind: identifier 90 | precedes: 91 | any: 92 | - pattern: <- 93 | - pattern: = 94 | not: 95 | inside: 96 | kind: argument 97 | message: Don't use F as a variable name, as it can break code relying on F being FALSE. 98 | -------------------------------------------------------------------------------- /flir/rules/builtin/absolute_path.yml: -------------------------------------------------------------------------------- 1 | # id: absolute_path-1 2 | # language: r 3 | # severity: warning 4 | # rule: 5 | # kind: string_content 6 | # any: 7 | # - regex: '^~[[:alpha:]]' 8 | # - regex: '^~/[[:alpha:]]' 9 | # - regex: '^[[:alpha:]]:' 10 | # - regex: '^(/|~)$' 11 | # - regex: '^/[[:alpha:]]' 12 | # - regex: '^\\' 13 | # message: Do not use absolute paths. 14 | -------------------------------------------------------------------------------- /flir/rules/builtin/any_duplicated.yml: -------------------------------------------------------------------------------- 1 | id: any_duplicated-1 2 | language: r 3 | severity: warning 4 | rule: 5 | pattern: any($$$ duplicated($MYVAR) $$$) 6 | fix: anyDuplicated(~~MYVAR~~) > 0 7 | message: anyDuplicated(x, ...) > 0 is better than any(duplicated(x), ...). 8 | 9 | --- 10 | 11 | id: any_duplicated-2 12 | language: r 13 | severity: warning 14 | rule: 15 | any: 16 | - pattern: length(unique($MYVAR)) == length($MYVAR) 17 | - pattern: length($MYVAR) == length(unique($MYVAR)) 18 | fix: anyDuplicated(~~MYVAR~~) == 0L 19 | message: anyDuplicated(x) == 0L is better than length(unique(x)) == length(x). 20 | 21 | --- 22 | 23 | id: any_duplicated-3 24 | language: r 25 | severity: warning 26 | rule: 27 | pattern: length(unique($MYVAR)) != length($MYVAR) 28 | fix: anyDuplicated(~~MYVAR~~) != 0L 29 | message: | 30 | Use anyDuplicated(x) != 0L (or > or <) instead of length(unique(x)) != length(x) 31 | (or > or <). 32 | 33 | --- 34 | 35 | id: any_duplicated-4 36 | language: r 37 | severity: warning 38 | rule: 39 | any: 40 | - pattern: nrow($DATA) != length(unique($DATA$µCOL)) 41 | - pattern: length(unique($DATA$µCOL)) != nrow($DATA) 42 | fix: anyDuplicated(~~DATA~~$~~COL~~) != 0L 43 | message: | 44 | anyDuplicated(DF$col) != 0L is better than length(unique(DF$col)) != nrow(DF) 45 | 46 | --- 47 | 48 | # id: any_duplicated-5 49 | # language: r 50 | # severity: warning 51 | # rule: 52 | # any: 53 | # - pattern: 54 | # context: nrow($DATA) != length(unique($DATA[["µCOL"]])) 55 | # strictness: ast 56 | # - pattern: 57 | # context: length(unique($DATA[["µCOL"]])) != nrow($DATA) 58 | # strictness: ast 59 | # fix: anyDuplicated(~~DATA~~[["~~COL~~"]]) != 0L 60 | # message: | 61 | # anyDuplicated(DF[["col"]]) != 0L is better than length(unique(DF[["col"]])) != nrow(DF) 62 | # 63 | # --- 64 | 65 | id: any_duplicated-6 66 | language: r 67 | severity: warning 68 | rule: 69 | any: 70 | - pattern: nrow($DATA) == length(unique($DATA$µCOL)) 71 | - pattern: length(unique($DATA$µCOL)) == nrow($DATA) 72 | fix: anyDuplicated(~~DATA~~$~~COL~~) == 0L 73 | message: | 74 | anyDuplicated(DF$col) == 0L is better than length(unique(DF$col)) == nrow(DF) 75 | 76 | # --- 77 | # 78 | # id: any_duplicated-7 79 | # language: r 80 | # severity: warning 81 | # rule: 82 | # any: 83 | # - pattern: 84 | # context: nrow($DATA) == length(unique($DATA[["µCOL"]])) 85 | # strictness: ast 86 | # - pattern: 87 | # context: length(unique($DATA[["µCOL"]])) == nrow($DATA) 88 | # strictness: ast 89 | # fix: anyDuplicated(~~DATA~~[["~~COL~~"]]) == 0L 90 | # message: | 91 | # anyDuplicated(DF[["col"]]) == 0L is better than length(unique(DF[["col"]])) == nrow(DF) 92 | -------------------------------------------------------------------------------- /flir/rules/builtin/any_is_na.yml: -------------------------------------------------------------------------------- 1 | id: any_na-1 2 | language: r 3 | severity: warning 4 | rule: 5 | any: 6 | - pattern: any(is.na($MYVAR)) 7 | - pattern: any(na.rm = $NARM, is.na($MYVAR)) 8 | - pattern: any(is.na($MYVAR), na.rm = $NARM) 9 | fix: anyNA(~~MYVAR~~) 10 | message: anyNA(x) is better than any(is.na(x)). 11 | 12 | --- 13 | 14 | id: any_na-2 15 | language: r 16 | severity: warning 17 | rule: 18 | any: 19 | - pattern: NA %in% $ELEM 20 | - pattern: NA_real_ %in% $ELEM 21 | - pattern: NA_logical_ %in% $ELEM 22 | - pattern: NA_character_ %in% $ELEM 23 | - pattern: NA_complex_ %in% $ELEM 24 | fix: anyNA(~~ELEM~~) 25 | message: anyNA(x) is better than NA %in% x. 26 | -------------------------------------------------------------------------------- /flir/rules/builtin/class_equals.yml: -------------------------------------------------------------------------------- 1 | id: class_equals-1 2 | language: r 3 | severity: warning 4 | rule: 5 | any: 6 | - pattern: class($VAR) == $CLASSNAME 7 | - pattern: $CLASSNAME == class($VAR) 8 | not: 9 | inside: 10 | kind: argument 11 | fix: inherits(~~VAR~~, ~~CLASSNAME~~) 12 | message: Instead of comparing class(x) with ==, use inherits(x, 'class-name') or is. or is(x, 'class') 13 | 14 | --- 15 | 16 | id: class_equals-2 17 | language: r 18 | severity: warning 19 | rule: 20 | any: 21 | - pattern: class($VAR) != $CLASSNAME 22 | - pattern: $CLASSNAME != class($VAR) 23 | not: 24 | inside: 25 | kind: argument 26 | fix: "!inherits(~~VAR~~, ~~CLASSNAME~~)" 27 | message: "Instead of comparing class(x) with !=, use !inherits(x, 'class-name') or is. or is(x, 'class')" 28 | 29 | --- 30 | 31 | id: class_equals-3 32 | language: r 33 | severity: warning 34 | rule: 35 | any: 36 | - pattern: $CLASSNAME %in% class($VAR) 37 | - pattern: class($VAR) %in% $CLASSNAME 38 | constraints: 39 | CLASSNAME: 40 | kind: string 41 | fix: inherits(~~VAR~~, ~~CLASSNAME~~) 42 | message: Instead of comparing class(x) with %in%, use inherits(x, 'class-name') or is. or is(x, 'class') 43 | -------------------------------------------------------------------------------- /flir/rules/builtin/condition_message.yml: -------------------------------------------------------------------------------- 1 | id: condition_message-1 2 | language: r 3 | severity: warning 4 | rule: 5 | pattern: $FUN($$$ paste0($$$MSG) $$$) 6 | kind: call 7 | not: 8 | any: 9 | - has: 10 | kind: extract_operator 11 | - has: 12 | stopBy: end 13 | kind: argument 14 | has: 15 | field: name 16 | regex: "^collapse|recycle0$" 17 | stopBy: end 18 | constraints: 19 | FUN: 20 | regex: "^(packageStartupMessage|stop|warning)$" 21 | fix: ~~FUN~~(~~MSG~~) 22 | message: | 23 | ~~FUN~~(paste0(...)) can be rewritten as ~~FUN~~(...). 24 | -------------------------------------------------------------------------------- /flir/rules/builtin/double_assignment.yml: -------------------------------------------------------------------------------- 1 | id: right_double_assignment 2 | language: r 3 | severity: hint 4 | rule: 5 | pattern: $RHS ->> $LHS 6 | has: 7 | field: rhs 8 | kind: identifier 9 | message: ->> can have hard-to-predict behavior; prefer assigning to a 10 | specific environment instead (with assign() or <-). 11 | 12 | --- 13 | 14 | id: left_double_assignment 15 | language: r 16 | severity: hint 17 | rule: 18 | pattern: $LHS <<- $RHS 19 | has: 20 | field: lhs 21 | kind: identifier 22 | message: <<- can have hard-to-predict behavior; prefer assigning to a 23 | specific environment instead (with assign() or <-). 24 | -------------------------------------------------------------------------------- /flir/rules/builtin/duplicate_argument.yml: -------------------------------------------------------------------------------- 1 | id: duplicate_argument-1 2 | language: r 3 | severity: warning 4 | rule: 5 | # Look for a function argument... 6 | kind: argument 7 | any: 8 | - has: 9 | kind: identifier 10 | field: name 11 | pattern: $OBJ 12 | - has: 13 | kind: string_content 14 | pattern: $OBJ 15 | stopBy: end 16 | 17 | # ... that follows other argument(s) with the same name... 18 | follows: 19 | kind: argument 20 | stopBy: end 21 | has: 22 | stopBy: end 23 | kind: identifier 24 | field: name 25 | pattern: $OBJ 26 | 27 | # ... inside a function call (or a subset environment for data.table)... 28 | inside: 29 | kind: arguments 30 | follows: 31 | any: 32 | - kind: identifier 33 | pattern: $FUN 34 | - kind: string 35 | inside: 36 | any: 37 | - kind: call 38 | - kind: subset 39 | 40 | # ... that is not a function listed below. 41 | constraints: 42 | FUN: 43 | not: 44 | regex: ^(mutate|transmute)$ 45 | 46 | message: Avoid duplicate arguments in function calls. 47 | -------------------------------------------------------------------------------- /flir/rules/builtin/empty_assignment.yml: -------------------------------------------------------------------------------- 1 | id: empty_assignment-1 2 | language: r 3 | severity: warning 4 | rule: 5 | any: 6 | - pattern: $OBJ <- {} 7 | - pattern: $OBJ <- {$CONTENT} 8 | - pattern: $OBJ = {} 9 | - pattern: $OBJ = {$CONTENT} 10 | constraints: 11 | CONTENT: 12 | regex: ^\s+$ 13 | message: | 14 | Assign NULL explicitly or, whenever possible, allocate the empty object with 15 | the right type and size. 16 | -------------------------------------------------------------------------------- /flir/rules/builtin/equal_assignment.yml: -------------------------------------------------------------------------------- 1 | id: equal_assignment 2 | language: r 3 | severity: hint 4 | rule: 5 | pattern: $LHS = $RHS 6 | has: 7 | field: lhs 8 | kind: identifier 9 | fix: ~~LHS~~ <- ~~RHS~~ 10 | message: Use <-, not =, for assignment. 11 | -------------------------------------------------------------------------------- /flir/rules/builtin/equals_na.yml: -------------------------------------------------------------------------------- 1 | id: equals_na 2 | language: r 3 | severity: warning 4 | rule: 5 | any: 6 | - pattern: $MYVAR == NA 7 | - pattern: $MYVAR == NA_integer_ 8 | - pattern: $MYVAR == NA_real_ 9 | - pattern: $MYVAR == NA_complex_ 10 | - pattern: $MYVAR == NA_character_ 11 | - pattern: NA == $MYVAR 12 | - pattern: NA_integer_ == $MYVAR 13 | - pattern: NA_real_ == $MYVAR 14 | - pattern: NA_complex_ == $MYVAR 15 | - pattern: NA_character_ == $MYVAR 16 | fix: is.na(~~MYVAR~~) 17 | message: Use is.na for comparisons to NA (not == or !=). 18 | 19 | --- 20 | 21 | id: equals_na-2 22 | language: r 23 | severity: warning 24 | rule: 25 | any: 26 | - pattern: $MYVAR != NA 27 | - pattern: $MYVAR != NA_integer_ 28 | - pattern: $MYVAR != NA_real_ 29 | - pattern: $MYVAR != NA_complex_ 30 | - pattern: $MYVAR != NA_character_ 31 | - pattern: NA != $MYVAR 32 | - pattern: NA_integer_ != $MYVAR 33 | - pattern: NA_real_ != $MYVAR 34 | - pattern: NA_complex_ != $MYVAR 35 | - pattern: NA_character_ != $MYVAR 36 | fix: is.na(~~MYVAR~~) 37 | message: Use is.na for comparisons to NA (not == or !=). 38 | -------------------------------------------------------------------------------- /flir/rules/builtin/expect_comparison.yml: -------------------------------------------------------------------------------- 1 | id: expect_comparison-1 2 | language: r 3 | severity: warning 4 | rule: 5 | pattern: expect_true($X > $Y) 6 | fix: expect_gt(~~X~~, ~~Y~~) 7 | message: expect_gt(x, y) is better than expect_true(x > y). 8 | 9 | --- 10 | 11 | id: expect_comparison-2 12 | language: r 13 | severity: warning 14 | rule: 15 | pattern: expect_true($X >= $Y) 16 | fix: expect_gte(~~X~~, ~~Y~~) 17 | message: expect_gte(x, y) is better than expect_true(x >= y). 18 | 19 | --- 20 | 21 | id: expect_comparison-3 22 | language: r 23 | severity: warning 24 | rule: 25 | pattern: expect_true($X < $Y) 26 | fix: expect_lt(~~X~~, ~~Y~~) 27 | message: expect_lt(x, y) is better than expect_true(x < y). 28 | 29 | --- 30 | 31 | id: expect_comparison-4 32 | language: r 33 | severity: warning 34 | rule: 35 | pattern: expect_true($X <= $Y) 36 | fix: expect_lte(~~X~~, ~~Y~~) 37 | message: expect_lte(x, y) is better than expect_true(x <= y). 38 | -------------------------------------------------------------------------------- /flir/rules/builtin/expect_identical.yml: -------------------------------------------------------------------------------- 1 | id: expect_identical-1 2 | language: r 3 | severity: warning 4 | rule: 5 | pattern: expect_true(identical($VAL1, $VAL2)) 6 | fix: expect_identical(~~VAL1~~, ~~VAL2~~) 7 | message: Use expect_identical(x, y) instead of expect_true(identical(x, y)). 8 | 9 | --- 10 | 11 | id: expect_identical-2 12 | language: r 13 | severity: warning 14 | rule: 15 | pattern: expect_equal($VAL1, $VAL2) 16 | fix: expect_identical(~~VAL1~~, ~~VAL2~~) 17 | constraints: 18 | VAL1: 19 | all: 20 | - not: 21 | has: 22 | stopBy: end 23 | kind: float 24 | regex: \. 25 | - not: 26 | regex: ^typeof 27 | - not: 28 | pattern: NULL 29 | VAL2: 30 | all: 31 | - not: 32 | has: 33 | stopBy: end 34 | kind: float 35 | regex: \. 36 | - not: 37 | regex: ^typeof 38 | - not: 39 | pattern: NULL 40 | message: | 41 | Use expect_identical(x, y) by default; resort to expect_equal() only when 42 | needed, e.g. when setting ignore_attr= or tolerance=. 43 | -------------------------------------------------------------------------------- /flir/rules/builtin/expect_length.yml: -------------------------------------------------------------------------------- 1 | id: expect_length-1 2 | language: r 3 | severity: warning 4 | rule: 5 | any: 6 | - pattern: $FUN(length($OBJ), $VALUE) 7 | - pattern: $FUN($VALUE, length($OBJ)) 8 | constraints: 9 | FUN: 10 | regex: ^(expect_identical|expect_equal)$ 11 | VALUE: 12 | not: 13 | regex: length\( 14 | fix: expect_length(~~OBJ~~, ~~VALUE~~) 15 | message: expect_length(x, n) is better than ~~FUN~~(length(x), n). 16 | -------------------------------------------------------------------------------- /flir/rules/builtin/expect_named.yml: -------------------------------------------------------------------------------- 1 | id: expect_named-1 2 | language: r 3 | severity: warning 4 | rule: 5 | any: 6 | - pattern: 7 | context: expect_identical(names($OBJ), $VALUES) 8 | strictness: ast 9 | - pattern: 10 | context: expect_identical($VALUES, names($OBJ)) 11 | strictness: ast 12 | constraints: 13 | VALUES: 14 | not: 15 | regex: ^(colnames\(|rownames\(|dimnames\(|NULL|names\() 16 | has: 17 | kind: null 18 | fix: expect_named(~~OBJ~~, ~~VALUES~~) 19 | message: expect_named(x, n) is better than expect_identical(names(x), n). 20 | 21 | --- 22 | 23 | id: expect_named-2 24 | language: r 25 | severity: warning 26 | rule: 27 | any: 28 | - pattern: 29 | context: expect_equal(names($OBJ), $VALUES) 30 | strictness: ast 31 | - pattern: 32 | context: expect_equal($VALUES, names($OBJ)) 33 | strictness: ast 34 | constraints: 35 | VALUES: 36 | not: 37 | regex: ^(colnames\(|rownames\(|dimnames\(|NULL|names\() 38 | fix: expect_named(~~OBJ~~, ~~VALUES~~) 39 | message: expect_named(x, n) is better than expect_equal(names(x), n). 40 | 41 | --- 42 | 43 | id: expect_named-3 44 | language: r 45 | severity: warning 46 | rule: 47 | any: 48 | - pattern: 49 | context: testthat::expect_identical(names($OBJ), $VALUES) 50 | strictness: ast 51 | - pattern: 52 | context: testthat::expect_identical($VALUES, names($OBJ)) 53 | strictness: ast 54 | constraints: 55 | VALUES: 56 | not: 57 | regex: ^(colnames\(|rownames\(|dimnames\(|NULL|names\() 58 | fix: testthat::expect_named(~~OBJ~~, ~~VALUES~~) 59 | message: expect_named(x, n) is better than expect_identical(names(x), n). 60 | 61 | --- 62 | 63 | id: expect_named-4 64 | language: r 65 | severity: warning 66 | rule: 67 | any: 68 | - pattern: 69 | context: testthat::expect_equal(names($OBJ), $VALUES) 70 | strictness: ast 71 | - pattern: 72 | context: testthat::expect_equal($VALUES, names($OBJ)) 73 | strictness: ast 74 | constraints: 75 | VALUES: 76 | not: 77 | regex: ^(colnames\(|rownames\(|dimnames\(|NULL|names\() 78 | fix: testthat::expect_named(~~OBJ~~, ~~VALUES~~) 79 | message: expect_named(x, n) is better than expect_equal(names(x), n). 80 | -------------------------------------------------------------------------------- /flir/rules/builtin/expect_not.yml: -------------------------------------------------------------------------------- 1 | id: expect_not-1 2 | language: r 3 | severity: warning 4 | rule: 5 | all: 6 | - pattern: expect_true(!$COND) 7 | - not: 8 | regex: '^expect_true\(!!' 9 | fix: expect_false(~~COND~~) 10 | message: expect_false(x) is better than expect_true(!x), and vice versa. 11 | 12 | --- 13 | 14 | id: expect_not-2 15 | language: r 16 | severity: warning 17 | rule: 18 | all: 19 | - pattern: expect_false(!$COND) 20 | - not: 21 | regex: '^expect_false\(!!' 22 | fix: expect_true(~~COND~~) 23 | message: expect_false(x) is better than expect_true(!x), and vice versa. 24 | -------------------------------------------------------------------------------- /flir/rules/builtin/expect_null.yml: -------------------------------------------------------------------------------- 1 | id: expect_null-1 2 | language: r 3 | severity: warning 4 | rule: 5 | any: 6 | - pattern: $FUN(NULL, $VALUES) 7 | - pattern: $FUN($VALUES, NULL) 8 | constraints: 9 | FUN: 10 | regex: ^(expect_identical|expect_equal)$ 11 | fix: expect_null(~~VALUES~~) 12 | message: expect_null(x) is better than ~~FUN~~(x, NULL). 13 | 14 | --- 15 | 16 | id: expect_null-2 17 | language: r 18 | severity: warning 19 | rule: 20 | pattern: expect_true(is.null($VALUES)) 21 | fix: expect_null(~~VALUES~~) 22 | message: expect_null(x) is better than expect_true(is.null(x)). 23 | -------------------------------------------------------------------------------- /flir/rules/builtin/expect_true_false.yml: -------------------------------------------------------------------------------- 1 | id: expect_true_false-1 2 | language: r 3 | severity: warning 4 | rule: 5 | any: 6 | - pattern: $FUN(TRUE, $VALUES) 7 | - pattern: $FUN($VALUES, TRUE) 8 | constraints: 9 | FUN: 10 | regex: ^(expect_identical|expect_equal)$ 11 | fix: expect_true(~~VALUES~~) 12 | message: expect_true(x) is better than ~~FUN~~(x, TRUE). 13 | 14 | --- 15 | 16 | id: expect_true_false-2 17 | language: r 18 | severity: warning 19 | rule: 20 | any: 21 | - pattern: $FUN(FALSE, $VALUES) 22 | - pattern: $FUN($VALUES, FALSE) 23 | constraints: 24 | FUN: 25 | regex: ^(expect_identical|expect_equal)$ 26 | fix: expect_false(~~VALUES~~) 27 | message: expect_false(x) is better than ~~FUN~~(x, FALSE). 28 | 29 | -------------------------------------------------------------------------------- /flir/rules/builtin/expect_type.yml: -------------------------------------------------------------------------------- 1 | id: expect_type-1 2 | language: r 3 | severity: warning 4 | rule: 5 | any: 6 | - pattern: 7 | context: expect_identical(typeof($OBJ), $VALUES) 8 | strictness: ast 9 | - pattern: 10 | context: expect_identical($VALUES, typeof($OBJ)) 11 | strictness: ast 12 | constraints: 13 | VALUES: 14 | not: 15 | regex: typeof 16 | fix: expect_type(~~OBJ~~, ~~VALUES~~) 17 | message: expect_type(x, t) is better than expect_identical(typeof(x), t). 18 | 19 | --- 20 | 21 | id: expect_type-2 22 | language: r 23 | severity: warning 24 | rule: 25 | any: 26 | - pattern: 27 | context: expect_equal(typeof($OBJ), $VALUES) 28 | strictness: ast 29 | - pattern: 30 | context: expect_equal($VALUES, typeof($OBJ)) 31 | strictness: ast 32 | constraints: 33 | VALUES: 34 | not: 35 | regex: typeof 36 | fix: expect_type(~~OBJ~~, ~~VALUES~~) 37 | message: expect_type(x, t) is better than expect_equal(typeof(x), t). 38 | 39 | --- 40 | 41 | id: expect_type-3 42 | language: r 43 | severity: warning 44 | rule: 45 | pattern: expect_true($FUN($OBJ)) 46 | constraints: 47 | FUN: 48 | regex: ^is\. 49 | not: 50 | regex: data\.frame$ 51 | message: expect_type(x, t) is better than expect_true(is.(x)). 52 | -------------------------------------------------------------------------------- /flir/rules/builtin/for_loop_index.yml: -------------------------------------------------------------------------------- 1 | id: for_loop_index-1 2 | language: r 3 | severity: warning 4 | rule: 5 | pattern: for ($IDX in $IDX) 6 | message: Don't re-use any sequence symbols as the index symbol in a for loop. 7 | 8 | --- 9 | 10 | id: for_loop_index-2 11 | language: r 12 | severity: warning 13 | rule: 14 | pattern: for ($IDX in $SEQ) 15 | constraints: 16 | SEQ: 17 | kind: call 18 | has: 19 | kind: arguments 20 | has: 21 | kind: argument 22 | stopBy: end 23 | has: 24 | kind: identifier 25 | field: value 26 | pattern: $IDX 27 | message: Don't re-use any sequence symbols as the index symbol in a for loop. 28 | -------------------------------------------------------------------------------- /flir/rules/builtin/function_return.yml: -------------------------------------------------------------------------------- 1 | id: function_return-1 2 | language: r 3 | severity: warning 4 | rule: 5 | any: 6 | - pattern: return($OBJ <- $VAL) 7 | - pattern: return($OBJ <<- $VAL) 8 | - pattern: return($VAL -> $OBJ) 9 | - pattern: return($VAL ->> $OBJ) 10 | message: | 11 | Move the assignment outside of the return() clause, or skip assignment altogether. 12 | -------------------------------------------------------------------------------- /flir/rules/builtin/implicit_assignment.yml: -------------------------------------------------------------------------------- 1 | id: implicit_assignment-1 2 | language: r 3 | severity: warning 4 | rule: 5 | any: 6 | - pattern: $RECEIVER <- $VALUE 7 | - pattern: $RECEIVER <<- $VALUE 8 | - pattern: $VALUE -> $RECEIVER 9 | - pattern: $VALUE ->> $RECEIVER 10 | inside: 11 | any: 12 | - kind: if_statement 13 | - kind: while_statement 14 | field: condition 15 | stopBy: end 16 | strictness: cst 17 | message: | 18 | Avoid implicit assignments in function calls. For example, instead of 19 | `if (x <- 1L) { ... }`, write `x <- 1L; if (x) { ... }`. 20 | 21 | --- 22 | 23 | id: implicit_assignment-2 24 | language: r 25 | severity: warning 26 | rule: 27 | any: 28 | - pattern: $RECEIVER <- $VALUE 29 | - pattern: $RECEIVER <<- $VALUE 30 | - pattern: $VALUE -> $RECEIVER 31 | - pattern: $VALUE ->> $RECEIVER 32 | inside: 33 | kind: for_statement 34 | field: sequence 35 | stopBy: end 36 | strictness: cst 37 | message: | 38 | Avoid implicit assignments in function calls. For example, instead of 39 | `if (x <- 1L) { ... }`, write `x <- 1L; if (x) { ... }`. 40 | 41 | # --- 42 | # 43 | # id: implicit_assignment-3 44 | # language: r 45 | # severity: warning 46 | # rule: 47 | # any: 48 | # - pattern: $RECEIVER <- $VALUE 49 | # - pattern: $RECEIVER <<- $VALUE 50 | # - pattern: $VALUE -> $RECEIVER 51 | # - pattern: $VALUE ->> $RECEIVER 52 | # inside: 53 | # kind: argument 54 | # field: value 55 | # strictness: cst 56 | # stopBy: end 57 | # not: 58 | # inside: 59 | # kind: call 60 | # field: function 61 | # has: 62 | # kind: identifier 63 | # regex: ^(lapply)$ 64 | # stopBy: end 65 | # strictness: cst 66 | # message: | 67 | # Avoid implicit assignments in function calls. For example, instead of 68 | # `if (x <- 1L) { ... }`, write `x <- 1L; if (x) { ... }`. 69 | 70 | -------------------------------------------------------------------------------- /flir/rules/builtin/is_numeric.yml: -------------------------------------------------------------------------------- 1 | id: is_numeric-1 2 | language: r 3 | severity: warning 4 | rule: 5 | any: 6 | - pattern: is.numeric($VAR) || is.integer($VAR) 7 | - pattern: is.integer($VAR) || is.numeric($VAR) 8 | message: is.numeric(x) || is.integer(x) can be simplified to is.numeric(x). Use 9 | is.double(x) to test for objects stored as 64-bit floating point. 10 | 11 | --- 12 | 13 | id: is_numeric-2 14 | language: r 15 | severity: warning 16 | rule: 17 | any: 18 | - pattern: 19 | context: class($VAR) %in% c("numeric", "integer") 20 | strictness: ast 21 | - pattern: 22 | context: class($VAR) %in% c("integer", "numeric") 23 | strictness: ast 24 | message: class(x) %in% c("numeric", "integer") can be simplified to is.numeric(x). Use 25 | is.double(x) to test for objects stored as 64-bit floating point. 26 | -------------------------------------------------------------------------------- /flir/rules/builtin/length_levels.yml: -------------------------------------------------------------------------------- 1 | id: length_levels-1 2 | language: r 3 | severity: warning 4 | rule: 5 | pattern: length(levels($VAR)) 6 | fix: nlevels(~~VAR~~) 7 | message: nlevels(x) is better than length(levels(x)). df 8 | -------------------------------------------------------------------------------- /flir/rules/builtin/length_test.yml: -------------------------------------------------------------------------------- 1 | # Strangely, having something like pattern: length($VAR $OP $VAR2) doesn't work 2 | 3 | id: length_test-1 4 | language: r 5 | severity: warning 6 | rule: 7 | pattern: length($VAR == $VAR2) 8 | fix: length(~~VAR~~) == ~~VAR2~~ 9 | message: Checking the length of a logical vector is likely a mistake. 10 | 11 | --- 12 | 13 | id: length_test-2 14 | language: r 15 | severity: warning 16 | rule: 17 | pattern: length($VAR != $VAR2) 18 | fix: length(~~VAR~~) != ~~VAR2~~ 19 | message: Checking the length of a logical vector is likely a mistake. 20 | 21 | --- 22 | 23 | id: length_test-3 24 | language: r 25 | severity: warning 26 | rule: 27 | pattern: length($VAR > $VAR2) 28 | fix: length(~~VAR~~) > ~~VAR2~~ 29 | message: Checking the length of a logical vector is likely a mistake. 30 | 31 | --- 32 | 33 | id: length_test-4 34 | language: r 35 | severity: warning 36 | rule: 37 | pattern: length($VAR >= $VAR2) 38 | fix: length(~~VAR~~) >= ~~VAR2~~ 39 | message: Checking the length of a logical vector is likely a mistake. 40 | 41 | --- 42 | 43 | id: length_test-5 44 | language: r 45 | severity: warning 46 | rule: 47 | pattern: length($VAR < $VAR2) 48 | fix: length(~~VAR~~) < ~~VAR2~~ 49 | message: Checking the length of a logical vector is likely a mistake. 50 | 51 | --- 52 | 53 | id: length_test-6 54 | language: r 55 | severity: warning 56 | rule: 57 | pattern: length($VAR <= $VAR2) 58 | fix: length(~~VAR~~) <= ~~VAR2~~ 59 | message: Checking the length of a logical vector is likely a mistake. 60 | -------------------------------------------------------------------------------- /flir/rules/builtin/lengths.yml: -------------------------------------------------------------------------------- 1 | id: sapply_lengths-1 2 | language: r 3 | severity: warning 4 | rule: 5 | any: 6 | - pattern: sapply($MYVAR, length) 7 | - pattern: sapply(FUN = length, $MYVAR) 8 | - pattern: sapply($MYVAR, FUN = length) 9 | - pattern: vapply($MYVAR, length $$$) 10 | 11 | - pattern: map_dbl($MYVAR, length) 12 | - pattern: map_dbl($MYVAR, .f = length) 13 | - pattern: map_dbl(.f = length, $MYVAR) 14 | - pattern: map_int($MYVAR, length) 15 | - pattern: map_int($MYVAR, .f = length) 16 | - pattern: map_int(.f = length, $MYVAR) 17 | 18 | - pattern: purrr::map_dbl($MYVAR, length) 19 | - pattern: purrr::map_dbl($MYVAR, .f = length) 20 | - pattern: purrr::map_dbl(.f = length, $MYVAR) 21 | - pattern: purrr::map_int($MYVAR, length) 22 | - pattern: purrr::map_int($MYVAR, .f = length) 23 | - pattern: purrr::map_int(.f = length, $MYVAR) 24 | fix: lengths(~~MYVAR~~) 25 | message: Use lengths() to find the length of each element in a list. 26 | 27 | --- 28 | 29 | id: sapply_lengths-2 30 | language: r 31 | severity: warning 32 | rule: 33 | any: 34 | - pattern: $MYVAR |> sapply(length) 35 | - pattern: $MYVAR |> sapply(FUN = length) 36 | - pattern: $MYVAR |> vapply(length $$$) 37 | - pattern: $MYVAR |> map_int(length) 38 | - pattern: $MYVAR |> map_int(length $$$) 39 | - pattern: $MYVAR |> purrr::map_int(length) 40 | - pattern: $MYVAR |> purrr::map_int(length $$$) 41 | fix: ~~MYVAR~~ |> lengths() 42 | message: Use lengths() to find the length of each element in a list. 43 | 44 | --- 45 | 46 | id: sapply_lengths-3 47 | language: r 48 | severity: warning 49 | rule: 50 | any: 51 | - pattern: $MYVAR %>% sapply(length) 52 | - pattern: $MYVAR %>% sapply(FUN = length) 53 | - pattern: $MYVAR %>% vapply(length $$$) 54 | - pattern: $MYVAR %>% map_int(length) 55 | - pattern: $MYVAR %>% map_int(length $$$) 56 | - pattern: $MYVAR %>% purrr::map_int(length) 57 | - pattern: $MYVAR %>% purrr::map_int(length $$$) 58 | fix: ~~MYVAR~~ %>% lengths() 59 | message: Use lengths() to find the length of each element in a list. 60 | -------------------------------------------------------------------------------- /flir/rules/builtin/library_call.yml: -------------------------------------------------------------------------------- 1 | id: library_call 2 | language: r 3 | severity: warning 4 | rule: 5 | kind: call 6 | has: 7 | regex: ^library|require$ 8 | kind: identifier 9 | follows: 10 | not: 11 | any: 12 | - kind: call 13 | has: 14 | regex: ^library|require$ 15 | kind: identifier 16 | - kind: comment 17 | not: 18 | inside: 19 | stopBy: end 20 | any: 21 | - kind: function_definition 22 | - kind: call 23 | has: 24 | pattern: suppressPackageStartupMessages 25 | kind: identifier 26 | message: Move all library/require calls to the top of the script. 27 | -------------------------------------------------------------------------------- /flir/rules/builtin/list_comparison.yml: -------------------------------------------------------------------------------- 1 | id: list_comparison-1 2 | language: r 3 | severity: warning 4 | rule: 5 | any: 6 | - pattern: $FUN($$$) > $$$ 7 | - pattern: $FUN($$$) >= $$$ 8 | - pattern: $FUN($$$) < $$$ 9 | - pattern: $FUN($$$) <= $$$ 10 | - pattern: $FUN($$$) == $$$ 11 | - pattern: $FUN($$$) != $$$ 12 | constraints: 13 | FUN: 14 | regex: ^(lapply|map|Map|\.mapply)$ 15 | message: | 16 | The output of ~~FUN~~(), a list, is being coerced for comparison. 17 | Instead, use a mapper that generates a vector with the correct type directly, 18 | for example vapply(x, FUN, character(1L)) if the output is a string. 19 | -------------------------------------------------------------------------------- /flir/rules/builtin/literal_coercion.yml: -------------------------------------------------------------------------------- 1 | id: literal_coercion-1 2 | language: r 3 | severity: warning 4 | rule: 5 | pattern: $FUN($VALUE) 6 | constraints: 7 | VALUE: 8 | kind: argument 9 | has: 10 | kind: float 11 | not: 12 | regex: 'e' 13 | FUN: 14 | regex: ^(int|as\.integer)$ 15 | fix: ~~VALUE~~L 16 | message: | 17 | Use ~~VALUE~~L instead of ~~FUN~~(~~VALUE~~), i.e., use literals directly 18 | where possible, instead of coercion. 19 | 20 | --- 21 | 22 | id: literal_coercion-2 23 | language: r 24 | severity: warning 25 | rule: 26 | pattern: as.character(NA) 27 | fix: NA_character_ 28 | message: | 29 | Use NA_character_ instead of as.character(NA), i.e., use literals directly 30 | where possible, instead of coercion. 31 | 32 | --- 33 | 34 | id: literal_coercion-3 35 | language: r 36 | severity: warning 37 | rule: 38 | pattern: as.logical($VAR) 39 | constraints: 40 | VAR: 41 | kind: argument 42 | has: 43 | any: 44 | - regex: ^1L$ 45 | - regex: ^1$ 46 | - regex: 'true' 47 | fix: TRUE 48 | message: Use TRUE instead of as.logical(~~VAR~~). 49 | 50 | --- 51 | 52 | id: literal_coercion-4 53 | language: r 54 | severity: warning 55 | rule: 56 | pattern: $FUN($VAR) 57 | constraints: 58 | VAR: 59 | kind: argument 60 | has: 61 | kind: float 62 | FUN: 63 | regex: ^(as\.numeric|as\.double)$ 64 | fix: ~~VAR~~ 65 | message: Use ~~VAR~~ instead of ~~FUN~~(~~VAR~~). 66 | 67 | --- 68 | 69 | id: literal_coercion-5 70 | language: r 71 | severity: warning 72 | rule: 73 | pattern: as.integer(NA) 74 | fix: NA_integer_ 75 | message: Use NA_integer_ instead of as.integer(NA). 76 | 77 | --- 78 | 79 | id: literal_coercion-6 80 | language: r 81 | severity: warning 82 | rule: 83 | pattern: $FUN(NA) 84 | constraints: 85 | FUN: 86 | regex: ^(as\.numeric|as\.double)$ 87 | fix: NA_real_ 88 | message: Use NA_real_ instead of ~~FUN~~(NA). 89 | 90 | -------------------------------------------------------------------------------- /flir/rules/builtin/matrix_apply.yml: -------------------------------------------------------------------------------- 1 | id: matrix_apply-1 2 | language: r 3 | severity: warning 4 | rule: 5 | any: 6 | - pattern: apply($INPUT, $MARG, sum) 7 | - pattern: apply($INPUT, MARGIN = $MARG, sum) 8 | - pattern: apply($INPUT, $MARG, FUN = sum) 9 | - pattern: apply($INPUT, MARGIN = $MARG, FUN = sum) 10 | constraints: 11 | MARG: 12 | has: 13 | regex: ^(2|2L)$ 14 | fix: colSums(~~INPUT~~) 15 | message: Use colSums(x) rather than apply(x, 2, sum) 16 | 17 | --- 18 | 19 | id: matrix_apply-2 20 | language: r 21 | severity: warning 22 | rule: 23 | any: 24 | - pattern: apply($INPUT, $MARG, sum, na.rm = $NARM) 25 | - pattern: apply($INPUT, MARGIN = $MARG, sum, na.rm = $NARM) 26 | - pattern: apply($INPUT, $MARG, FUN = sum, na.rm = $NARM) 27 | - pattern: apply($INPUT, MARGIN = $MARG, FUN = sum, na.rm = $NARM) 28 | constraints: 29 | MARG: 30 | has: 31 | regex: ^(2|2L)$ 32 | fix: colSums(~~INPUT~~, na.rm = ~~NARM~~) 33 | message: Use colSums(x, na.rm = ~~NARM~~) rather than apply(x, 2, sum, na.rm = ~~NARM~~). 34 | 35 | --- 36 | 37 | id: matrix_apply-3 38 | language: r 39 | severity: warning 40 | rule: 41 | any: 42 | - pattern: apply($INPUT, $MARG, sum) 43 | - pattern: apply($INPUT, MARGIN = $MARG, sum) 44 | - pattern: apply($INPUT, $MARG, FUN = sum) 45 | - pattern: apply($INPUT, MARGIN = $MARG, FUN = sum) 46 | constraints: 47 | MARG: 48 | has: 49 | regex: ^(1|1L)$ 50 | fix: rowSums(~~INPUT~~) 51 | message: Use rowSums(x) rather than apply(x, 1, sum) 52 | 53 | --- 54 | 55 | id: matrix_apply-4 56 | language: r 57 | severity: warning 58 | rule: 59 | any: 60 | - pattern: apply($INPUT, $MARG, sum, na.rm = $NARM) 61 | - pattern: apply($INPUT, MARGIN = $MARG, sum, na.rm = $NARM) 62 | - pattern: apply($INPUT, $MARG, FUN = sum, na.rm = $NARM) 63 | - pattern: apply($INPUT, MARGIN = $MARG, FUN = sum, na.rm = $NARM) 64 | constraints: 65 | MARG: 66 | has: 67 | regex: ^(1|1L)$ 68 | fix: rowSums(~~INPUT~~, na.rm = ~~NARM~~) 69 | message: Use rowSums(x, na.rm = ~~NARM~~) rather than apply(x, 1, sum, na.rm = ~~NARM~~). 70 | 71 | --- 72 | 73 | id: matrix_apply-5 74 | language: r 75 | severity: warning 76 | rule: 77 | any: 78 | - pattern: apply($INPUT, $MARG, mean) 79 | - pattern: apply($INPUT, MARGIN = $MARG, mean) 80 | - pattern: apply($INPUT, $MARG, FUN = mean) 81 | - pattern: apply($INPUT, MARGIN = $MARG, FUN = mean) 82 | constraints: 83 | MARG: 84 | has: 85 | regex: ^(1|1L)$ 86 | fix: rowMeans(~~INPUT~~) 87 | message: Use rowMeans(x) rather than apply(x, 1, mean). 88 | 89 | --- 90 | 91 | id: matrix_apply-6 92 | language: r 93 | severity: warning 94 | rule: 95 | any: 96 | - pattern: apply($INPUT, $MARG, mean, na.rm = $NARM) 97 | - pattern: apply($INPUT, MARGIN = $MARG, mean, na.rm = $NARM) 98 | - pattern: apply($INPUT, $MARG, FUN = mean, na.rm = $NARM) 99 | - pattern: apply($INPUT, MARGIN = $MARG, FUN = mean, na.rm = $NARM) 100 | constraints: 101 | MARG: 102 | has: 103 | regex: ^(1|1L)$ 104 | fix: rowMeans(~~INPUT~~, na.rm = ~~NARM~~) 105 | message: Use rowMeans(x, na.rm = ~~NARM~~) rather than apply(x, 1, mean, na.rm = ~~NARM~~). 106 | 107 | --- 108 | 109 | id: matrix_apply-7 110 | language: r 111 | severity: warning 112 | rule: 113 | any: 114 | - pattern: apply($INPUT, $MARG, mean) 115 | - pattern: apply($INPUT, MARGIN = $MARG, mean) 116 | - pattern: apply($INPUT, $MARG, FUN = mean) 117 | - pattern: apply($INPUT, MARGIN = $MARG, FUN = mean) 118 | constraints: 119 | MARG: 120 | has: 121 | regex: ^(2|2L)$ 122 | fix: colMeans(~~INPUT~~) 123 | message: Use colMeans(x) rather than apply(x, 2, mean). 124 | 125 | --- 126 | 127 | id: matrix_apply-8 128 | language: r 129 | severity: warning 130 | rule: 131 | any: 132 | - pattern: apply($INPUT, $MARG, mean, na.rm = $NARM) 133 | - pattern: apply($INPUT, MARGIN = $MARG, mean, na.rm = $NARM) 134 | - pattern: apply($INPUT, $MARG, FUN = mean, na.rm = $NARM) 135 | - pattern: apply($INPUT, MARGIN = $MARG, FUN = mean, na.rm = $NARM) 136 | constraints: 137 | MARG: 138 | has: 139 | regex: ^(2|2L)$ 140 | fix: colMeans(~~INPUT~~, na.rm = ~~NARM~~) 141 | message: Use colMeans(x, na.rm = ~~NARM~~) rather than apply(x, 2, mean, na.rm = ~~NARM~~). 142 | 143 | -------------------------------------------------------------------------------- /flir/rules/builtin/missing_argument.yml: -------------------------------------------------------------------------------- 1 | id: missing_argument-1 2 | language: r 3 | severity: warning 4 | rule: 5 | kind: arguments 6 | has: 7 | kind: comma 8 | any: 9 | - precedes: 10 | stopBy: neighbor 11 | any: 12 | - regex: '^\)$' 13 | - kind: comma 14 | - follows: 15 | any: 16 | - regex: '^\($' 17 | - kind: argument 18 | regex: '=$' 19 | follows: 20 | kind: identifier 21 | not: 22 | regex: '^(quote|switch|alist)$' 23 | inside: 24 | kind: call 25 | message: Missing argument in function call. 26 | 27 | --- 28 | 29 | id: missing_argument-2 30 | language: r 31 | severity: warning 32 | rule: 33 | kind: arguments 34 | regex: '=(\s+|)\)$' 35 | follows: 36 | any: 37 | - kind: identifier 38 | - kind: extract_operator 39 | - kind: namespace_operator 40 | not: 41 | regex: '^(quote|switch|alist)$' 42 | inside: 43 | kind: call 44 | message: Missing argument in function call. 45 | -------------------------------------------------------------------------------- /flir/rules/builtin/nested_ifelse.yml: -------------------------------------------------------------------------------- 1 | id: nested_ifelse-1 2 | language: r 3 | severity: warning 4 | rule: 5 | pattern: $FUN($COND, $TRUE, $FALSE) 6 | constraints: 7 | FALSE: 8 | regex: ^(ifelse|if_else|fifelse) 9 | FUN: 10 | regex: ^(ifelse|if_else|fifelse) 11 | message: | 12 | Don't use nested ~~FUN~~() calls; instead, try (1) data.table::fcase; 13 | (2) dplyr::case_when; or (3) using a lookup table. 14 | 15 | --- 16 | 17 | id: nested_ifelse-2 18 | language: r 19 | severity: warning 20 | rule: 21 | pattern: $FUN($COND, $TRUE, $FALSE) 22 | constraints: 23 | TRUE: 24 | regex: ^(ifelse|if_else|fifelse) 25 | FUN: 26 | regex: ^(ifelse|if_else|fifelse) 27 | message: | 28 | Don't use nested ~~FUN~~() calls; instead, try (1) data.table::fcase; 29 | (2) dplyr::case_when; or (3) using a lookup table. 30 | -------------------------------------------------------------------------------- /flir/rules/builtin/numeric_leading_zero.yml: -------------------------------------------------------------------------------- 1 | id: numeric_leading_zero-1 2 | language: r 3 | severity: warning 4 | rule: 5 | pattern: $VALUE 6 | any: 7 | - kind: float 8 | - kind: identifier 9 | regex: ^\.[0-9] 10 | fix: 0~~VALUE~~ 11 | message: Include the leading zero for fractional numeric constants. 12 | -------------------------------------------------------------------------------- /flir/rules/builtin/outer_negation.yml: -------------------------------------------------------------------------------- 1 | id: outer_negation-1 2 | language: r 3 | severity: warning 4 | rule: 5 | pattern: all(!$VAR) 6 | constraints: 7 | VAR: 8 | not: 9 | regex: '^!' 10 | fix: '!any(~~VAR~~)' 11 | message: | 12 | !any(x) is better than all(!x). The former applies negation only once after 13 | aggregation instead of many times for each element of x. 14 | 15 | --- 16 | 17 | id: outer_negation-2 18 | language: r 19 | severity: warning 20 | rule: 21 | pattern: any(! $VAR) 22 | constraints: 23 | VAR: 24 | not: 25 | regex: '^!' 26 | fix: '!all(~~VAR~~)' 27 | message: | 28 | !all(x) is better than any(!x). The former applies negation only once after 29 | aggregation instead of many times for each element of x. 30 | -------------------------------------------------------------------------------- /flir/rules/builtin/package_hooks.yml: -------------------------------------------------------------------------------- 1 | id: package_hooks-1 2 | language: r 3 | severity: warning 4 | rule: 5 | pattern: packageStartupMessage($$$) 6 | inside: 7 | stopBy: end 8 | kind: binary_operator 9 | has: 10 | stopBy: end 11 | field: lhs 12 | pattern: .onLoad 13 | message: Put packageStartupMessage() calls in .onAttach(), not .onLoad(). 14 | 15 | --- 16 | 17 | id: package_hooks-2 18 | language: r 19 | severity: warning 20 | rule: 21 | pattern: library.dynam($$$) 22 | inside: 23 | stopBy: end 24 | kind: binary_operator 25 | has: 26 | stopBy: end 27 | field: lhs 28 | pattern: .onAttach 29 | message: Put library.dynam() calls in .onLoad(), not .onAttach(). 30 | 31 | --- 32 | 33 | id: package_hooks-3 34 | language: r 35 | severity: warning 36 | rule: 37 | pattern: $FN($$$) 38 | inside: 39 | stopBy: end 40 | kind: binary_operator 41 | has: 42 | stopBy: end 43 | field: lhs 44 | pattern: .onLoad 45 | constraints: 46 | FN: 47 | regex: '^(cat|installed.packages|message|packageStartupMessage|print|writeLines)$' 48 | message: Don't use ~~FN~~() in .onLoad(). 49 | 50 | --- 51 | 52 | id: package_hooks-4 53 | language: r 54 | severity: warning 55 | rule: 56 | pattern: $FN($$$) 57 | inside: 58 | stopBy: end 59 | kind: binary_operator 60 | has: 61 | stopBy: end 62 | field: lhs 63 | pattern: .onAttach 64 | constraints: 65 | FN: 66 | # library.dynam already has its own linter 67 | regex: '^(cat|installed.packages|message|print|writeLines)$' 68 | message: Don't use ~~FN~~() in .onAttach(). 69 | 70 | --- 71 | 72 | id: package_hooks-5 73 | language: r 74 | severity: warning 75 | rule: 76 | pattern: $FN($$$) 77 | inside: 78 | stopBy: end 79 | kind: binary_operator 80 | has: 81 | stopBy: end 82 | field: lhs 83 | pattern: $LOAD 84 | constraints: 85 | LOAD: 86 | regex: '^(\.onAttach|\.onLoad)$' 87 | FN: 88 | regex: '^(require|library)$' 89 | message: Don't alter the search() path in ~~LOAD~~() by calling ~~FN~~(). 90 | 91 | --- 92 | 93 | id: package_hooks-6 94 | language: r 95 | severity: warning 96 | rule: 97 | pattern: installed.packages($$$) 98 | inside: 99 | stopBy: end 100 | kind: binary_operator 101 | has: 102 | stopBy: end 103 | field: lhs 104 | pattern: $LOAD 105 | constraints: 106 | LOAD: 107 | regex: '^(\.onAttach|\.onLoad)$' 108 | message: Don't slow down package load by running installed.packages() in ~~LOAD~~(). 109 | 110 | --- 111 | 112 | id: package_hooks-7 113 | language: r 114 | severity: warning 115 | rule: 116 | pattern: library.dynam.unload($$$) 117 | inside: 118 | stopBy: end 119 | kind: binary_operator 120 | has: 121 | stopBy: end 122 | field: lhs 123 | pattern: $LOAD 124 | constraints: 125 | LOAD: 126 | regex: '^(\.onDetach|\.Last\.lib)$' 127 | message: Use library.dynam.unload() calls in .onUnload(), not ~~LOAD~~(). 128 | -------------------------------------------------------------------------------- /flir/rules/builtin/paste.yml: -------------------------------------------------------------------------------- 1 | id: paste-1 2 | language: r 3 | severity: warning 4 | rule: 5 | pattern: 6 | context: paste($$$CONTENT sep = "" $$$CONTENT2) 7 | strictness: ast 8 | # fix: paste0($$$CONTENT) 9 | message: paste0(...) is better than paste(..., sep = ""). 10 | 11 | --- 12 | 13 | id: paste-2 14 | language: r 15 | severity: warning 16 | rule: 17 | any: 18 | - pattern: 19 | context: paste($CONTENT, collapse = ", ") 20 | strictness: ast 21 | - pattern: 22 | context: paste(collapse = ", ", $CONTENT) 23 | strictness: ast 24 | # fix: paste0($$$CONTENT) 25 | message: toString(.) is more expressive than paste(., collapse = ", "). 26 | 27 | --- 28 | 29 | id: paste-3 30 | language: r 31 | severity: warning 32 | rule: 33 | pattern: 34 | context: paste0($$$CONTENT sep = $USELESS $$$CONTENT2) 35 | strictness: ast 36 | # fix: paste0($$$CONTENT) 37 | message: | 38 | sep= is not a formal argument to paste0(); did you mean to use paste(), or 39 | collapse=? 40 | 41 | --- 42 | 43 | id: paste-4 44 | language: r 45 | severity: warning 46 | rule: 47 | any: 48 | - pattern: 49 | context: paste0($CONTENT, collapse = $FOO) 50 | strictness: ast 51 | - pattern: 52 | context: paste0(collapse = $FOO, $CONTENT) 53 | strictness: ast 54 | not: 55 | has: 56 | regex: sep 57 | kind: argument 58 | # fix: paste0($$$CONTENT) 59 | message: | 60 | Use paste(), not paste0(), to collapse a character vector when sep= is not used. 61 | 62 | # --- 63 | # 64 | # id: paste-5 65 | # language: r 66 | # severity: warning 67 | # rule: 68 | # pattern: 69 | # context: paste0(rep($VAR, $TIMES), collapse = "") 70 | # strictness: ast 71 | # constraints: 72 | # VAR: 73 | # kind: string 74 | # fix: strrep(~~VAR~~, ~~TIMES~~) 75 | # message: strrep(x, times) is better than paste0(rep(x, times), collapse = ""). 76 | -------------------------------------------------------------------------------- /flir/rules/builtin/redundant_equals.yml: -------------------------------------------------------------------------------- 1 | id: redundant_equals-1 2 | language: r 3 | severity: warning 4 | rule: 5 | any: 6 | - pattern: $VAR == TRUE 7 | - pattern: TRUE == $VAR 8 | - pattern: $VAR == FALSE 9 | - pattern: FALSE == $VAR 10 | message: | 11 | Using == on a logical vector is redundant. Well-named logical vectors can be 12 | used directly in filtering. For data.table's `i` argument, wrap the column 13 | name in (), like `DT[(is_treatment)]`. 14 | 15 | --- 16 | 17 | id: redundant_equals-2 18 | language: r 19 | severity: warning 20 | rule: 21 | any: 22 | - pattern: $VAR != TRUE 23 | - pattern: TRUE != $VAR 24 | - pattern: $VAR != FALSE 25 | - pattern: FALSE != $VAR 26 | message: | 27 | Using != on a logical vector is redundant. Well-named logical vectors can be 28 | used directly in filtering. For data.table's `i` argument, wrap the column 29 | name in (), like `DT[(is_treatment)]`. 30 | -------------------------------------------------------------------------------- /flir/rules/builtin/redundant_ifelse.yml: -------------------------------------------------------------------------------- 1 | id: redundant_ifelse-1 2 | language: r 3 | severity: warning 4 | rule: 5 | pattern: $FUN($COND, $VAL1, $VAL2) 6 | constraints: 7 | VAL1: 8 | regex: ^TRUE$ 9 | VAL2: 10 | regex: ^FALSE$ 11 | FUN: 12 | regex: ^(ifelse|fifelse|if_else)$ 13 | fix: ~~COND~~ 14 | message: | 15 | Use ~~COND~~ directly instead of calling ~~FUN~~(~~COND~~, TRUE, FALSE). 16 | 17 | --- 18 | 19 | id: redundant_ifelse-2 20 | language: r 21 | severity: warning 22 | rule: 23 | pattern: $FUN($COND, $VAL1, $VAL2) 24 | constraints: 25 | VAL1: 26 | regex: ^FALSE$ 27 | VAL2: 28 | regex: ^TRUE$ 29 | FUN: 30 | regex: ^(ifelse|fifelse|if_else)$ 31 | fix: '!(~~COND~~)' 32 | message: | 33 | Use !(~~COND~~) directly instead of calling ~~FUN~~(~~COND~~, FALSE, TRUE). 34 | 35 | --- 36 | 37 | id: redundant_ifelse-3 38 | language: r 39 | severity: warning 40 | rule: 41 | pattern: $FUN($COND, $VAL1, $VAL2) 42 | constraints: 43 | VAL1: 44 | regex: ^(1|1L)$ 45 | VAL2: 46 | regex: ^(0|0L)$ 47 | FUN: 48 | regex: ^(ifelse|fifelse|if_else)$ 49 | fix: as.integer(~~COND~~) 50 | message: Prefer as.integer(~~COND~~) to ~~FUN~~(~~COND~~, ~~VAL1~~, ~~VAL2~~). 51 | 52 | --- 53 | 54 | id: redundant_ifelse-4 55 | language: r 56 | severity: warning 57 | rule: 58 | pattern: $FUN($COND, $VAL1, $VAL2) 59 | constraints: 60 | VAL1: 61 | regex: ^(0|0L)$ 62 | VAL2: 63 | regex: ^(1|1L)$ 64 | FUN: 65 | regex: ^(ifelse|fifelse|if_else)$ 66 | fix: as.integer(!(~~COND~~)) 67 | message: Prefer as.integer(!(~~COND~~)) to ~~FUN~~(~~COND~~, ~~VAL1~~, ~~VAL2~~). 68 | -------------------------------------------------------------------------------- /flir/rules/builtin/rep_len.yml: -------------------------------------------------------------------------------- 1 | id: rep_len-1 2 | language: r 3 | severity: warning 4 | rule: 5 | pattern: rep($OBJ, length.out = $LEN) 6 | fix: rep_len(~~OBJ~~, ~~LEN~~) 7 | message: Use rep_len(x, n) instead of rep(x, length.out = n). 8 | -------------------------------------------------------------------------------- /flir/rules/builtin/right_assignment.yml: -------------------------------------------------------------------------------- 1 | id: right_assignment 2 | language: r 3 | severity: hint 4 | rule: 5 | pattern: $RHS -> $LHS 6 | has: 7 | field: rhs 8 | kind: identifier 9 | fix: ~~LHS~~<- ~~RHS~~ 10 | message: Use <-, not ->, for assignment. 11 | -------------------------------------------------------------------------------- /flir/rules/builtin/sample_int.yml: -------------------------------------------------------------------------------- 1 | id: sample_int-1 2 | language: r 3 | severity: warning 4 | rule: 5 | any: 6 | - pattern: sample(1:$N, $$$OTHER) 7 | - pattern: sample(1L:$N, $$$OTHER) 8 | fix: sample.int(~~N~~, ~~OTHER~~) 9 | message: sample.int(n, m, ...) is preferable to sample(1:n, m, ...). 10 | 11 | --- 12 | 13 | id: sample_int-2 14 | language: r 15 | severity: warning 16 | rule: 17 | pattern: sample(seq($N), $$$OTHER) 18 | fix: sample.int(~~N~~, ~~OTHER~~) 19 | message: sample.int(n, m, ...) is preferable to sample(seq(n), m, ...). 20 | 21 | --- 22 | 23 | id: sample_int-3 24 | language: r 25 | severity: warning 26 | rule: 27 | pattern: sample(seq_len($N), $$$OTHER) 28 | fix: sample.int(~~N~~, ~~OTHER~~) 29 | message: sample.int(n, m, ...) is preferable to sample(seq_len(n), m, ...). 30 | 31 | --- 32 | 33 | # Strangely this panicks if I rename FIRST to N 34 | id: sample_int-4 35 | language: r 36 | severity: warning 37 | rule: 38 | pattern: sample($FIRST, $$$OTHER) 39 | constraints: 40 | FIRST: 41 | regex: ^\d+(L|)$ 42 | fix: sample.int(~~N~~, ~~OTHER~~) 43 | message: sample.int(n, m, ...) is preferable to sample(n, m, ...). 44 | -------------------------------------------------------------------------------- /flir/rules/builtin/semicolon.yml: -------------------------------------------------------------------------------- 1 | id: semicolon-1 2 | language: r 3 | severity: warning 4 | rule: 5 | regex: ;\s+$ 6 | not: 7 | inside: 8 | kind: string 9 | stopBy: end 10 | message: Trailing semicolons are not needed. 11 | -------------------------------------------------------------------------------- /flir/rules/builtin/seq.yml: -------------------------------------------------------------------------------- 1 | id: seq-1 2 | language: r 3 | severity: warning 4 | rule: 5 | pattern: seq(length($VAR)) 6 | fix: seq_along(~~VAR~~) 7 | message: | 8 | seq(length(...)) is likely to be wrong in the empty edge case. Use seq_along(...) instead. 9 | 10 | --- 11 | 12 | id: seq-2 13 | language: r 14 | severity: warning 15 | rule: 16 | any: 17 | - pattern: 1:nrow($VAR) 18 | - pattern: 1L:nrow($VAR) 19 | regex: ^1 20 | fix: seq_len(nrow(~~VAR~~)) 21 | message: | 22 | 1:nrow(...) is likely to be wrong in the empty edge case. Use seq_len(nrow(...)) instead. 23 | 24 | --- 25 | 26 | id: seq-3 27 | language: r 28 | severity: warning 29 | rule: 30 | any: 31 | - pattern: 1:n() 32 | - pattern: 1L:n() 33 | regex: ^1 34 | fix: seq_len(n()) 35 | message: | 36 | 1:n() is likely to be wrong in the empty edge case. Use seq_len(n()) instead. 37 | 38 | --- 39 | 40 | id: seq-4 41 | language: r 42 | severity: warning 43 | rule: 44 | pattern: seq(nrow($VAR)) 45 | fix: seq_len(nrow(~~VAR~~)) 46 | message: | 47 | seq(nrow(...)) is likely to be wrong in the empty edge case. Use seq_len(nrow(...)) instead. 48 | 49 | --- 50 | 51 | id: seq-5 52 | language: r 53 | severity: warning 54 | rule: 55 | any: 56 | - pattern: 1:length($VAR) 57 | - pattern: 1L:length($VAR) 58 | regex: ^1 59 | fix: seq_along(~~VAR~~) 60 | message: | 61 | 1:length(...) is likely to be wrong in the empty edge case. Use seq_along(...) instead. 62 | 63 | --- 64 | 65 | id: seq-6 66 | language: r 67 | severity: warning 68 | rule: 69 | any: 70 | - pattern: 1:ncol($VAR) 71 | - pattern: 1L:ncol($VAR) 72 | regex: ^1 73 | fix: seq_len(ncol(~~VAR~~)) 74 | message: | 75 | 1:ncol(...) is likely to be wrong in the empty edge case. Use seq_len(ncol(...)) instead. 76 | 77 | --- 78 | 79 | id: seq-7 80 | language: r 81 | severity: warning 82 | rule: 83 | any: 84 | - pattern: 1:NCOL($VAR) 85 | - pattern: 1L:NCOL($VAR) 86 | regex: ^1 87 | fix: seq_len(NCOL(~~VAR~~)) 88 | message: | 89 | 1:NCOL(...) is likely to be wrong in the empty edge case. Use seq_len(NCOL(...)) instead. 90 | 91 | --- 92 | 93 | id: seq-8 94 | language: r 95 | severity: warning 96 | rule: 97 | any: 98 | - pattern: 1:NROW($VAR) 99 | - pattern: 1L:NROW($VAR) 100 | regex: ^1 101 | fix: seq_len(NROW(~~VAR~~)) 102 | message: | 103 | 1:NROW(...) is likely to be wrong in the empty edge case. Use seq_len(NROW(...)) instead. 104 | 105 | 106 | --- 107 | 108 | id: seq-9 109 | language: r 110 | severity: warning 111 | rule: 112 | pattern: seq(1, $VAL) 113 | not: 114 | pattern: seq(1, 0) 115 | constraints: 116 | VAL: 117 | regex: ^\d+(|L)$ 118 | fix: seq_len(~~VAL~~) 119 | message: seq_len(~~VAL~~) is more efficient than seq(1, ~~VAL~~). 120 | 121 | 122 | -------------------------------------------------------------------------------- /flir/rules/builtin/sort.yml: -------------------------------------------------------------------------------- 1 | id: sort-1 2 | language: r 3 | severity: warning 4 | rule: 5 | pattern: $OBJ[order($OBJ)] 6 | fix: sort(~~OBJ~~, na.last = TRUE) 7 | message: sort(~~OBJ~~, na.last = TRUE) is better than ~~OBJ~~[order(~~OBJ~~)]. 8 | 9 | --- 10 | 11 | id: sort-2 12 | language: r 13 | severity: warning 14 | rule: 15 | any: 16 | - pattern: $OBJ[order($OBJ, decreasing = $DECREASING)] 17 | - pattern: $OBJ[order(decreasing = $DECREASING, $OBJ)] 18 | constraints: 19 | DECREASING: 20 | regex: ^(TRUE|FALSE)$ 21 | fix: sort(~~OBJ~~, decreasing = ~~DECREASING~~, na.last = TRUE) 22 | message: | 23 | sort(~~OBJ~~, decreasing = ~~DECREASING~~, na.last = TRUE) is better than 24 | ~~OBJ~~[order(~~OBJ~~, decreasing = ~~DECREASING~~)]. 25 | 26 | --- 27 | 28 | id: sort-3 29 | language: r 30 | severity: warning 31 | rule: 32 | any: 33 | - pattern: $OBJ[order($OBJ, na.last = $NALAST)] 34 | - pattern: $OBJ[order(na.last = $NALAST, $OBJ)] 35 | constraints: 36 | NALAST: 37 | regex: ^(TRUE|FALSE)$ 38 | fix: sort(~~OBJ~~, na.last = ~~NALAST~~, na.last = TRUE) 39 | message: | 40 | sort(~~OBJ~~, na.last = ~~NALAST~~, na.last = TRUE) is better than 41 | ~~OBJ~~[order(~~OBJ~~, na.last = ~~NALAST~~)]. 42 | 43 | --- 44 | 45 | id: sort-4 46 | language: r 47 | severity: warning 48 | rule: 49 | any: 50 | - pattern: $OBJ[order($OBJ, decreasing = TRUE, na.last = FALSE)] 51 | - pattern: $OBJ[order($OBJ, na.last = FALSE, decreasing = TRUE)] 52 | - pattern: $OBJ[order(decreasing = TRUE, $OBJ, na.last = FALSE)] 53 | - pattern: $OBJ[order(decreasing = TRUE, na.last = FALSE, $OBJ)] 54 | - pattern: $OBJ[order(na.last = FALSE, decreasing = TRUE, $OBJ)] 55 | - pattern: $OBJ[order(na.last = FALSE, $OBJ, decreasing = TRUE)] 56 | fix: sort(~~OBJ~~, decreasing = TRUE, na.last = FALSE) 57 | message: | 58 | sort(~~OBJ~~, decreasing = TRUE, na.last = FALSE) is better than 59 | ~~OBJ~~[order(~~OBJ~~, na.last = FALSE, decreasing = TRUE)]. 60 | 61 | --- 62 | 63 | id: sort-5 64 | language: r 65 | severity: warning 66 | rule: 67 | any: 68 | - pattern: sort($OBJ) == $OBJ 69 | - pattern: $OBJ == sort($OBJ) 70 | fix: !is.unsorted(~~OBJ~~) 71 | message: | 72 | Use !is.unsorted(~~OBJ~~) to test the sortedness of a vector. 73 | 74 | --- 75 | 76 | id: sort-6 77 | language: r 78 | severity: warning 79 | rule: 80 | any: 81 | - pattern: sort($OBJ) != $OBJ 82 | - pattern: $OBJ != sort($OBJ) 83 | fix: is.unsorted(~~OBJ~~) 84 | message: | 85 | Use is.unsorted(~~OBJ~~) to test the unsortedness of a vector. 86 | -------------------------------------------------------------------------------- /flir/rules/builtin/stopifnot_all.yml: -------------------------------------------------------------------------------- 1 | id: stopifnot_all-1 2 | language: r 3 | severity: warning 4 | rule: 5 | pattern: stopifnot(all($$$CODE)) 6 | fix: stopifnot(~~CODE~~) 7 | message: | 8 | Use stopifnot(x) instead of stopifnot(all(x)). stopifnot(x) runs all() 'under 9 | the hood' and provides a better error message in case of failure. 10 | 11 | --- 12 | 13 | id: stopifnot_all-2 14 | language: r 15 | severity: warning 16 | rule: 17 | pattern: stopifnot(exprs = { all($$$CODE) }) 18 | fix: | 19 | stopifnot(exprs = { 20 | ~~CODE~~ 21 | }) 22 | message: | 23 | Use stopifnot(x) instead of stopifnot(all(x)). stopifnot(x) runs all() 'under 24 | the hood' and provides a better error message in case of failure. 25 | -------------------------------------------------------------------------------- /flir/rules/builtin/todo_comment.yml: -------------------------------------------------------------------------------- 1 | id: todo_comment-1 2 | language: r 3 | severity: warning 4 | rule: 5 | kind: comment 6 | regex: '(?i)#(|\s+)\b(todo|fixme)\b' 7 | message: Remove TODO comments. 8 | -------------------------------------------------------------------------------- /flir/rules/builtin/undesirable_function.yml: -------------------------------------------------------------------------------- 1 | id: undesirable_function-1 2 | language: r 3 | severity: warning 4 | rule: 5 | pattern: $FUN 6 | kind: identifier 7 | not: 8 | inside: 9 | kind: argument 10 | constraints: 11 | FUN: 12 | regex: ^(\.libPaths|attach|browser|debug|debugcall|debugonce|detach|par|setwd|structure|Sys\.setenv|Sys\.setlocale|trace|undebug|untrace)$ 13 | message: Function "~~FUN~~()" is undesirable. 14 | -------------------------------------------------------------------------------- /flir/rules/builtin/undesirable_operator.yml: -------------------------------------------------------------------------------- 1 | id: undesirable_operator-1 2 | language: r 3 | severity: warning 4 | rule: 5 | any: 6 | - pattern: $X <<- $Y 7 | - pattern: $X ->> $Y 8 | message: | 9 | Avoid undesirable operators `<<-` and `->>`. They assign outside the current 10 | environment in a way that can be hard to reason about. Prefer fully-encapsulated 11 | functions wherever possible, or, if necessary, assign to a specific environment 12 | with assign(). Recall that you can create an environment at the desired scope 13 | with new.env(). 14 | 15 | --- 16 | 17 | id: undesirable_operator-2 18 | language: r 19 | severity: warning 20 | rule: 21 | kind: namespace_operator 22 | has: 23 | pattern: ':::' 24 | message: | 25 | Operator `:::` is undesirable. It accesses non-exported functions inside 26 | packages. Code relying on these is likely to break in future versions of the 27 | package because the functions are not part of the public interface and may be 28 | changed or removed by the maintainers without notice. Use public functions 29 | via :: instead. 30 | -------------------------------------------------------------------------------- /flir/rules/builtin/unnecessary_nesting.yml: -------------------------------------------------------------------------------- 1 | id: unnecessary_nesting-1 2 | language: r 3 | severity: warning 4 | rule: 5 | kind: if_statement 6 | any: 7 | - has: 8 | kind: 'braced_expression' 9 | field: consequence 10 | has: 11 | kind: if_statement 12 | stopBy: neighbor 13 | not: 14 | has: 15 | kind: 'braced_expression' 16 | field: alternative 17 | stopBy: end 18 | not: 19 | any: 20 | - has: 21 | nthChild: 2 22 | - precedes: 23 | regex: "^else$" 24 | - has: 25 | kind: if_statement 26 | field: consequence 27 | stopBy: neighbor 28 | # Can be in if(), but not else if() 29 | not: 30 | inside: 31 | field: alternative 32 | kind: if_statement 33 | message: | 34 | Don't use nested `if` statements, where a single `if` with the combined 35 | conditional expression will do. For example, instead of `if (x) { if (y) { ... }}`, 36 | use `if (x && y) { ... }`. 37 | -------------------------------------------------------------------------------- /flir/rules/builtin/unreachable_code.yml: -------------------------------------------------------------------------------- 1 | id: unreachable_code-1 2 | language: r 3 | severity: warning 4 | rule: 5 | regex: '[^}]+' 6 | not: 7 | regex: 'else' 8 | follows: 9 | any: 10 | - pattern: return($$$A) 11 | - pattern: stop($$$A) 12 | not: 13 | precedes: 14 | regex: 'else' 15 | stopBy: end 16 | message: Code and comments coming after a return() or stop() should be removed. 17 | 18 | --- 19 | 20 | id: unreachable_code-2 21 | language: r 22 | severity: warning 23 | rule: 24 | regex: '[^}]+' 25 | not: 26 | regex: 'else' 27 | follows: 28 | any: 29 | - pattern: next 30 | - pattern: break 31 | stopBy: end 32 | message: Remove code and comments coming after `next` or `break` 33 | 34 | --- 35 | 36 | id: unreachable_code-3 37 | language: r 38 | severity: warning 39 | rule: 40 | inside: 41 | any: 42 | - kind: if_statement 43 | pattern: if (FALSE) 44 | - kind: while_statement 45 | pattern: while (FALSE) 46 | stopBy: end 47 | message: Remove code inside a conditional loop with a deterministically false condition. 48 | 49 | --- 50 | 51 | id: unreachable_code-4 52 | language: r 53 | severity: warning 54 | rule: 55 | inside: 56 | any: 57 | - kind: if_statement 58 | pattern: if (TRUE) 59 | - kind: while_statement 60 | pattern: while (TRUE) 61 | stopBy: end 62 | message: | 63 | One branch has a a deterministically true condition. The other branches can 64 | be removed. 65 | -------------------------------------------------------------------------------- /flir/rules/builtin/which_grepl.yml: -------------------------------------------------------------------------------- 1 | id: which_grepl-1 2 | language: r 3 | severity: warning 4 | rule: 5 | pattern: which(grepl($$$ARGS)) 6 | fix: grep(~~ARGS~~) 7 | message: grep(pattern, x) is better than which(grepl(pattern, x)). 8 | -------------------------------------------------------------------------------- /inst/CITATION: -------------------------------------------------------------------------------- 1 | 2 | bibentry( 3 | bibtype = "Manual", 4 | title = "{GSODR}: Global Summary Daily Weather Data in R", 5 | author = c(person("Adam H.", "Sparks"), 6 | person("Tomislav", "Hengl"), 7 | person("Andrew", "Nelson")), 8 | year = sub("-.*", "", meta$Date), 9 | note = sprintf("R package version %s", meta$Version), 10 | doi = "10.5281/zenodo.1040727", 11 | url = "https://CRAN.R-project.org/package=nasapower") 12 | 13 | bibentry( 14 | bibtype = "Article", 15 | author = c(person("Adam H.", "Sparks")), 16 | title = "nasapower: A NASA POWER Global Meteorology, Surface Solar Energy and Climatology Data Client for R", 17 | doi = "10.21105/joss.01035", 18 | year = 2018, 19 | month = "oct", 20 | publisher = "The Open Journal", 21 | volume = 3, 22 | number = 30, 23 | pages = 1035, 24 | journal = "The Journal of Open Source Software") 25 | -------------------------------------------------------------------------------- /inst/WORDLIST: -------------------------------------------------------------------------------- 1 | AIMFAOS 2 | ASOS 3 | Agroclimatology 4 | Alduchov 5 | BOM 6 | Bigouette 7 | CCI 8 | CGIAR 9 | CHELSA 10 | CMD 11 | CRAN's 12 | CRU 13 | CSI 14 | CSIRO 15 | CTRY 16 | CliFlo 17 | Curtin 18 | DATSAV 19 | DEWP 20 | DOI 21 | DPIRD 22 | Detsch 23 | ECCC 24 | ESA's 25 | ESACCI 26 | Eskridge 27 | FRSHTT 28 | FTPing 29 | Florian 30 | GPKG 31 | GSDOR 32 | GSOD 33 | GSODR's 34 | GSODRdata 35 | GSODTools 36 | GSODdata 37 | GeoPackage 38 | ICAO 39 | IMFAOS 40 | ISD 41 | Iannone 42 | JOSS 43 | MODCF 44 | MXSPD 45 | Magnus 46 | Mesonet 47 | NCDC 48 | NCEI 49 | NCEI's 50 | NCEI’s 51 | NOAA 52 | ORCID 53 | PRCP 54 | README 55 | Rda 56 | Rds 57 | Redoc 58 | Reefton 59 | Repatch 60 | Rmd 61 | SLP 62 | SNDP 63 | SRTM 64 | STNID 65 | STP 66 | Standardise 67 | Toowoomba 68 | USCS 69 | VISIB 70 | WBAN 71 | WDPS 72 | WDSP 73 | WGS 74 | WMO 75 | WSPD 76 | WorldClim 77 | YDAY 78 | YEARMODA 79 | YYYY 80 | YYYYMMDD 81 | agroclimatology 82 | al 83 | capitalisation 84 | changelog 85 | clifro 86 | codecov 87 | countries’ 88 | csv 89 | dplyr 90 | et 91 | gsod 92 | gz 93 | https 94 | httr 95 | isd 96 | iso 97 | joss 98 | juba 99 | kilometres 100 | licence 101 | locations’ 102 | lon 103 | md 104 | metres 105 | millimetres 106 | mistyped 107 | modelling 108 | nasapower 109 | ncdc 110 | ncei 111 | noaa 112 | onLoad 113 | openair 114 | optimised 115 | plyr 116 | png 117 | pre 118 | purrr 119 | rOpenSci 120 | rda 121 | readme 122 | readr 123 | reformats 124 | reproducibility 125 | rgdal 126 | riem 127 | rnoaa 128 | ropensci 129 | roxyglobals 130 | snuck 131 | sp 132 | stationaRy 133 | stations’ 134 | summarise 135 | summarised 136 | summarising 137 | tidyr 138 | untar 139 | untarred 140 | untarring 141 | vapour 142 | visualisation 143 | weatherOz 144 | weathercan 145 | windspeed 146 | worldmet 147 | www 148 | zzz 149 | -------------------------------------------------------------------------------- /inst/extdata/isd_diff.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/GSODR/4cefe1fa79148a14282b8b2a59def6dc66b2a5d7/inst/extdata/isd_diff.rda -------------------------------------------------------------------------------- /inst/extdata/isd_history.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/GSODR/4cefe1fa79148a14282b8b2a59def6dc66b2a5d7/inst/extdata/isd_history.rda -------------------------------------------------------------------------------- /inst/paper/paper.bib: -------------------------------------------------------------------------------- 1 | @Manual{R-base, 2 | title = {R: A Language and Environment for Statistical Computing}, 3 | author = {{R Core Team}}, 4 | organization = {R Foundation for Statistical Computing}, 5 | address = {Vienna, Austria}, 6 | year = {2016}, 7 | url = {https://www.R-project.org/} 8 | } 9 | 10 | @Misc{NCDC, 11 | author = {{United States National Oceanic and Atmospheric Administration National Climatic Data Center}}, 12 | title = {{Global Surface Summary of Day (GSOD)}}, 13 | month = {August}, 14 | year = {2016}, 15 | howpublished = {\url{https://data.noaa.gov/dataset/global-surface-summary-of-the-day-gsod}}, 16 | lastchecked = {10/08/2016}}, 17 | } 18 | 19 | @Manual{rnoaa, 20 | title = {rnoaa: 'NOAA' Weather Data from R}, 21 | author = {Scott Chamberlain}, 22 | year = {2016}, 23 | note = {R package version 0.5.6}, 24 | url = {https://CRAN.R-project.org/package=rnoaa}, 25 | } 26 | 27 | @Misc{Jarvis2008, 28 | title = {{Hole-filled SRTM for the globe Version 4, available from the CGIAR-CSI SRTM 90m Database}}, 29 | author = {Andy Jarvis and Hannes I Reuter and Andy Nelson and Edward Guevara}, 30 | year = {2008}, 31 | url = {http://srtm.csi.cgiar.org} 32 | } 33 | 34 | @Manual{GSODR, 35 | title = {{GSODR}: Global Summary Daily Weather Data in R}, 36 | author = {Adam Sparks and Tomislav Hengl and Andrew Nelson}, 37 | year = {2017}, 38 | note = {R package version 1.0.1}, 39 | url = {http://ropensci.github.io/GSODR/}, 40 | } 41 | 42 | @Misc{geopackage, 43 | author = {{Open Geospatial Consortium}}, 44 | title = {GeoPackage Encoding Standard}, 45 | url = {http://www.opengeospatial.org/standards/geopackage}, 46 | year = {2014} 47 | } 48 | 49 | @Manual{readr, 50 | title = {readr: Read Tabular Data}, 51 | author = {Hadley Wickham and Jim Hester and Romain Francois}, 52 | year = {2016}, 53 | note = {R package version 1.0.0}, 54 | url = {https://CRAN.R-project.org/package=readr}, 55 | } 56 | 57 | @Manual{data.table, 58 | title = {data.table: Extension of Data.frame}, 59 | author = {M Dowle and A Srinivasan and T Short and S Lianoglou with contributions from R Saporta and E Antonyan}, 60 | year = {2015}, 61 | note = {R package version 1.9.6}, 62 | url = {https://CRAN.R-project.org/package=data.table}, 63 | } 64 | 65 | @Article{plyr, 66 | title = {The Split-Apply-Combine Strategy for Data Analysis}, 67 | author = {Hadley Wickham}, 68 | journal = {Journal of Statistical Software}, 69 | year = {2011}, 70 | volume = {40}, 71 | number = {1}, 72 | pages = {1--29}, 73 | url = {http://www.jstatsoft.org/v40/i01/}, 74 | } 75 | -------------------------------------------------------------------------------- /inst/paper/paper.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: 'GSODR: Global Summary Daily Weather Data in R' 3 | authors: 4 | - affiliation: 1 5 | name: Adam H Sparks 6 | orcid: 0000-0002-0061-8359 7 | - affiliation: 2 8 | name: Tomislav Hengl 9 | orcid: 0000-0002-9921-5129 10 | - affiliation: 3 11 | name: Andrew Nelson 12 | orcid: 0000-0002-7249-3778 13 | date: "27 January 2017" 14 | output: pdf_document 15 | bibliography: paper.bib 16 | tags: 17 | - Global Surface Summary of the Day 18 | - GSOD 19 | - meteorology 20 | - climatology 21 | - weather data 22 | - R 23 | affiliations: 24 | - index: 1 25 | name: Centre for Crop Health, University of Southern Queensland, Toowoomba Queensland 26 | 4350, Australia 27 | - index: 2 28 | name: ISRIC - World Soil Information, P.O. Box 353, 6700 AJ Wageningen, The Netherlands 29 | - index: 3 30 | name: Faculty of Geo-Information and Earth Observation (ITC), University of Twente, 31 | Enschede 7500 AE, The Netherlands 32 | --- 33 | 34 | # Summary 35 | 36 | The GSODR package [@GSODR] is an R package [@R-base] providing automated 37 | downloading, parsing and cleaning of Global Surface Summary of the 38 | Day (GSOD) [@NCDC] weather data for use in R or saving as local files in either 39 | a Comma Separated Values (CSV) or GeoPackage (GPKG) [@geopackage] file. It 40 | builds on or complements several other scripts and packages. We take advantage 41 | of modern techniques in R to make more efficient use of available computing 42 | resources to complete the process, e.g., data.table [@data.table], plyr [@plyr] 43 | and readr [@readr], which allow the data cleaning, conversions and disk 44 | input/output processes to function quickly and efficiently. The rnoaa [@rnoaa] 45 | package already offers an excellent suite of tools for interacting with and 46 | downloading weather data from the United States National Oceanic and 47 | Atmospheric Administration, but lacks options for GSOD data retrieval. Several 48 | other APIs and R packages exist to access weather data, but most are region or 49 | continent specific, whereas GSOD is global. This package was developed to 50 | provide: 51 | 52 | * two functions that simplify downloading GSOD data and formatting it to 53 | easily be used in research; and 54 | 55 | * a function to help identify stations within a given radius of a point of 56 | interest. 57 | 58 | Alternative elevation data based on a 200 meter buffer of 59 | elevation values derived from the CGIAR-CSI SRTM 90m Database [@Jarvis2008] 60 | are included. These data are useful to help address possible inaccuracies and 61 | in many cases, fill in for missing elevation values in the reported station 62 | elevations. 63 | 64 | When using this package, GSOD stations are checked for inaccurate longitude and 65 | latitude values and any stations that have missing or have incorrect values are 66 | omitted from the final data set. Users may set a threshold for station files 67 | with too many missing observations for omission from the final output to help 68 | ensure data quality. All units are converted from the United States Customary 69 | System (USCS) to the International System of Units (SI), e.g., inches to 70 | millimetres and Fahrenheit to Celsius. Wind speed is also converted from knots 71 | to metres per second. Additional useful values, actual vapour pressure, 72 | saturated water vapour pressure, and relative humidity are calculated and 73 | included in the final output. Station metadata are merged with weather data for 74 | the final data set. 75 | 76 | # References 77 | -------------------------------------------------------------------------------- /inst/paper/paper.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/GSODR/4cefe1fa79148a14282b8b2a59def6dc66b2a5d7/inst/paper/paper.pdf -------------------------------------------------------------------------------- /man/GSODR-package.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/GSODR-package.R 3 | \docType{package} 4 | \name{GSODR-package} 5 | \alias{GSODR} 6 | \alias{GSODR-package} 7 | \title{GSODR: Global Surface Summary of the Day ('GSOD') Weather Data Client} 8 | \description{ 9 | \if{html}{\figure{logo.png}{options: style='float: right' alt='logo' width='120'}} 10 | 11 | Provides automated downloading, parsing, cleaning, unit conversion and formatting of Global Surface Summary of the Day ('GSOD') weather data from the from the USA National Centers for Environmental Information ('NCEI'). Units are converted from from United States Customary System ('USCS') units to International System of Units ('SI'). Stations may be individually checked for number of missing days defined by the user, where stations with too many missing observations are omitted. Only stations with valid reported latitude and longitude values are permitted in the final data. Additional useful elements, saturation vapour pressure ('es'), actual vapour pressure ('ea') and relative humidity ('RH') are calculated from the original data using the improved August-Roche-Magnus approximation (Alduchov & Eskridge 1996) and included in the final data set. The resulting metadata include station identification information, country, state, latitude, longitude, elevation, weather observations and associated flags. For information on the 'GSOD' data from 'NCEI', please see the 'GSOD' 'readme.txt' file available from, \url{https://www1.ncdc.noaa.gov/pub/data/gsod/readme.txt}. 12 | } 13 | \seealso{ 14 | Useful links: 15 | \itemize{ 16 | \item \url{https://docs.ropensci.org/GSODR/} 17 | \item \url{https://github.com/ropensci/GSODR} 18 | \item Report bugs at \url{https://github.com/ropensci/GSODR/issues} 19 | } 20 | 21 | } 22 | \author{ 23 | \strong{Maintainer}: Adam H. Sparks \email{adamhsparks@gmail.com} (\href{https://orcid.org/0000-0002-0061-8359}{ORCID}) 24 | 25 | Authors: 26 | \itemize{ 27 | \item Tomislav Hengl \email{tom.hengl@isric.org} (\href{https://orcid.org/0000-0002-9921-5129}{ORCID}) 28 | \item Andrew Nelson \email{dr.andy.nelson@gmail.com} (\href{https://orcid.org/0000-0002-7249-3778}{ORCID}) 29 | } 30 | 31 | Other contributors: 32 | \itemize{ 33 | \item Hugh Parsonage \email{hugh.parsonage@gmail.com} (\href{https://orcid.org/0000-0003-4055-0835}{ORCID}) [copyright holder, contributor] 34 | \item Taras Kaduk \email{taras.kaduk@gmail.com} (Suggestion for handling bulk station downloads more 35 | efficiently) [contributor] 36 | \item Gwenael Giboire \email{gwenael.giboire@oda-groupe.com} (Several bug reports in early versions and testing feedback) [contributor] 37 | \item Łukasz Pawlik \email{lukpawlik@gmail.com} (Reported bug in windspeed conversion calculation) [contributor] 38 | \item Ross Darnell \email{Ross.Darnell@data61.csiro.au} (\href{https://orcid.org/0000-0002-7973-6322}{ORCID}) (Reported bug in 'Windows OS' versions causing 'GSOD' data untarring to 39 | fail) [contributor] 40 | \item Tyler Widdison \email{Tyler.Widdison@usav.org} (Reported bug where 41 | `nearest_stations()` did not return stations in order of nearest to 42 | farthest) [contributor] 43 | \item Curtin University (Supported the development of 'GSODR' through 44 | Adam H. Sparks's time.) [copyright holder] 45 | } 46 | 47 | } 48 | \keyword{internal} 49 | -------------------------------------------------------------------------------- /man/dot-agroclimatology_list.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/internal_functions.R 3 | \name{.agroclimatology_list} 4 | \alias{.agroclimatology_list} 5 | \title{Agroclimatology List} 6 | \usage{ 7 | .agroclimatology_list(file_list, isd_history, years) 8 | } 9 | \arguments{ 10 | \item{isd_history}{isd_history file from NCEI} 11 | 12 | \item{years}{Years being requested} 13 | 14 | \item{x}{A \code{data.table} of GSOD data from .download_data} 15 | } 16 | \value{ 17 | A list of GSOD stations suitable for agroclimatology work. 18 | } 19 | \description{ 20 | Agroclimatology List 21 | } 22 | \keyword{internal} 23 | -------------------------------------------------------------------------------- /man/dot-apply_process_csv.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/internal_functions.R 3 | \name{.apply_process_csv} 4 | \alias{.apply_process_csv} 5 | \title{Process .gz files} 6 | \usage{ 7 | .apply_process_csv(file_list, isd_history) 8 | } 9 | \arguments{ 10 | \item{file_list}{List of GSOD files} 11 | 12 | \item{isd_history}{isd_history.csv file from NCEI provided by GSODR} 13 | } 14 | \value{ 15 | A \code{data.table} of GSOD weather data. 16 | } 17 | \description{ 18 | Process .gz files 19 | } 20 | \keyword{internal} 21 | \keyword{internal} 22 | -------------------------------------------------------------------------------- /man/dot-check_url_exists.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/internal_functions.R 3 | \name{.check_url_exists} 4 | \alias{.check_url_exists} 5 | \title{Check That a URL Exists Before Downloading} 6 | \usage{ 7 | .check_url_exists(x) 8 | } 9 | \arguments{ 10 | \item{x}{a URL for checking} 11 | } 12 | \value{ 13 | A numeric value representing the HTTP response. 14 | } 15 | \description{ 16 | Check That a URL Exists Before Downloading 17 | } 18 | \keyword{internal} 19 | -------------------------------------------------------------------------------- /man/dot-download_files.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/internal_functions.R 3 | \name{.download_files} 4 | \alias{.download_files} 5 | \title{Download GSOD files from NCEI server} 6 | \usage{ 7 | .download_files(station, years) 8 | } 9 | \arguments{ 10 | \item{station}{Station ID being requested. Optional} 11 | 12 | \item{years}{Years being requested. Mandatory} 13 | } 14 | \value{ 15 | A list of data for processing before returning to user. 16 | } 17 | \description{ 18 | Download GSOD files from NCEI server 19 | } 20 | \keyword{internal} 21 | -------------------------------------------------------------------------------- /man/dot-subset_country_list.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/internal_functions.R 3 | \name{.subset_country_list} 4 | \alias{.subset_country_list} 5 | \title{Subset country list} 6 | \usage{ 7 | .subset_country_list(country, file_list, isd_history, years) 8 | } 9 | \arguments{ 10 | \item{country}{Country of interest to subset on} 11 | 12 | \item{isd_history}{isd_history.csv file from NCEI provided by GSODR} 13 | 14 | \item{years}{Years being requested} 15 | 16 | \item{GSOD_list}{List of GSOD files to be subset} 17 | } 18 | \value{ 19 | A list of stations in the requested country. 20 | } 21 | \description{ 22 | Subset country list 23 | } 24 | \keyword{internal} 25 | \keyword{internal} 26 | -------------------------------------------------------------------------------- /man/dot-untar_files.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/internal_functions.R 3 | \name{.untar_files} 4 | \alias{.untar_files} 5 | \title{Untar GSOD Tar Archive Files} 6 | \usage{ 7 | .untar_files(tar_files) 8 | } 9 | \arguments{ 10 | \item{tar_files}{a list of tar files located in in \code{tempdir()}} 11 | } 12 | \value{ 13 | Called for it's side-effects, untars the archive files in the 14 | \code{tempdir()}. 15 | } 16 | \description{ 17 | Untar GSOD Tar Archive Files 18 | } 19 | \keyword{internal} 20 | -------------------------------------------------------------------------------- /man/dot-validate_country.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/internal_functions.R 3 | \name{.validate_country} 4 | \alias{.validate_country} 5 | \title{Validate country requests} 6 | \usage{ 7 | .validate_country(country, isd_history) 8 | } 9 | \arguments{ 10 | \item{country}{User requested country name} 11 | 12 | \item{isd_history}{Data provided from NCEI on stations' locations and years} 13 | } 14 | \value{ 15 | A validated country name. 16 | } 17 | \description{ 18 | Validate country requests 19 | } 20 | \keyword{internal} 21 | -------------------------------------------------------------------------------- /man/dot-validate_missing_days.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/internal_functions.R 3 | \name{.validate_missing_days} 4 | \alias{.validate_missing_days} 5 | \title{Validate data for missing days} 6 | \usage{ 7 | .validate_missing_days(max_missing, file_list) 8 | } 9 | \arguments{ 10 | \item{max_missing}{User entered maximum permissible missing days} 11 | 12 | \item{GSOD_list}{A list of GSOD files that have been downloaded from NCEI} 13 | } 14 | \value{ 15 | A validated \code{list()} of GSOD files that meet requirements for 16 | missing days. 17 | } 18 | \description{ 19 | Validate data for missing days 20 | } 21 | \keyword{internal} 22 | -------------------------------------------------------------------------------- /man/dot-validate_station_data_years.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/internal_functions.R 3 | \name{.validate_station_data_years} 4 | \alias{.validate_station_data_years} 5 | \title{Validate Station Data for Years Available} 6 | \usage{ 7 | .validate_station_data_years(station, isd_history, years) 8 | } 9 | \arguments{ 10 | \item{station}{User entered station ID} 11 | 12 | \item{isd_history}{isd_history.csv from NCEI provided by GSODR} 13 | 14 | \item{years}{User entered years for query} 15 | } 16 | \value{ 17 | \code{station_id} value, "station", \code{NA} if no match with available 18 | data. 19 | } 20 | \description{ 21 | Validate Station Data for Years Available 22 | } 23 | \keyword{internal} 24 | -------------------------------------------------------------------------------- /man/dot-validate_station_id.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/internal_functions.R 3 | \name{.validate_station_id} 4 | \alias{.validate_station_id} 5 | \title{Validate Station IDs} 6 | \usage{ 7 | .validate_station_id(station, isd_history) 8 | } 9 | \arguments{ 10 | \item{station}{User entered station ID} 11 | 12 | \item{isd_history}{isd_history.csv from NCEI provided by GSODR} 13 | } 14 | \value{ 15 | None unless an error with the years or invalid station ID. 16 | } 17 | \description{ 18 | Validate Station IDs 19 | } 20 | \keyword{internal} 21 | -------------------------------------------------------------------------------- /man/dot-validate_years.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/internal_functions.R 3 | \name{.validate_years} 4 | \alias{.validate_years} 5 | \title{Validate Years} 6 | \usage{ 7 | .validate_years(years) 8 | } 9 | \arguments{ 10 | \item{years}{User entered years for request} 11 | } 12 | \value{ 13 | None unless error in years being requested by users. 14 | } 15 | \description{ 16 | Validate Years 17 | } 18 | \keyword{internal} 19 | -------------------------------------------------------------------------------- /man/figures/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/GSODR/4cefe1fa79148a14282b8b2a59def6dc66b2a5d7/man/figures/logo.png -------------------------------------------------------------------------------- /man/get_GSOD.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get_GSOD.R 3 | \name{get_GSOD} 4 | \alias{get_GSOD} 5 | \title{Download and Return a data.table Object of GSOD Weather Data} 6 | \usage{ 7 | get_GSOD( 8 | years, 9 | station = NULL, 10 | country = NULL, 11 | max_missing = NULL, 12 | agroclimatology = FALSE 13 | ) 14 | } 15 | \arguments{ 16 | \item{years}{Year(s) of weather data to download.} 17 | 18 | \item{station}{Optional. Specify a station or multiple stations for which to 19 | retrieve, check and clean weather data using \var{STATION}. The 20 | \acronym{NCEI} reports years for which the data are available. This function 21 | checks against these years. However, not all cases are properly documented 22 | and in some cases files may not exist for download even though it is 23 | indicated that data was recorded for the station for a particular year. If a 24 | station is specified that does not have an existing file on the server, this 25 | function will silently fail and move on to existing files for download and 26 | cleaning.} 27 | 28 | \item{country}{Optional. Specify a country for which to retrieve weather 29 | data; full name, 2 or 3 letter \acronym{ISO} or 2 letter \acronym{FIPS} codes 30 | can be used. All stations within the specified country will be returned.} 31 | 32 | \item{max_missing}{Optional. The maximum number of days allowed to be 33 | missing from a station's data before it is excluded from final file output.} 34 | 35 | \item{agroclimatology}{Optional. Logical. Only clean data for stations 36 | between latitudes 60 and -60 for agroclimatology work, defaults to \code{FALSE}. 37 | Set to \code{TRUE} to include only stations within the confines of these 38 | latitudes.} 39 | } 40 | \value{ 41 | A \code{\link[data.table:data.table]{data.table::data.table()}} object of \acronym{GSOD} weather data. 42 | } 43 | \description{ 44 | Automates downloading, cleaning, reformatting of data from the Global Surface 45 | Summary of the Day (\acronym{GSOD}) data provided by the 46 | [US National Centers for Environmental Information (NCEI)(https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.ncdc:C00516), 47 | Three additional useful elements: saturation vapour pressure (es), actual 48 | vapour pressure (ea) and relative humidity (RH) are calculated and returned 49 | in the final data frame using the improved August-Roche-Magnus approximation 50 | (Alduchov and Eskridge 1996). 51 | } 52 | \details{ 53 | All units are converted to International System of Units (SI), \emph{e.g}, 54 | Fahrenheit to Celsius and inches to millimetres. 55 | 56 | Data summarise each year by station, which include vapour pressure and 57 | relative humidity elements calculated from existing data in \acronym{GSOD}. 58 | 59 | All missing values in resulting files are represented as \code{NA} regardless of 60 | which field they occur in. 61 | 62 | For a complete list of the fields and description of the contents and units, 63 | please refer to Appendix 1 in the \CRANpkg{GSODR} vignette, 64 | \code{vignette("GSODR", package = "GSODR")}. 65 | 66 | For more information see the description of the data provided by 67 | \acronym{NCEI}, \url{https://www.ncei.noaa.gov/data/global-summary-of-the-day/doc/readme.txt}. 68 | } 69 | \note{ 70 | \CRANpkg{GSODR} attempts to validate year and station combination 71 | requests, however, in certain cases the start and end date may encompass 72 | years where no data is available. In these cases no data will be returned. 73 | It is suggested that the user check the latest data availability for the 74 | station(s) desired using \code{\link[=get_inventory]{get_inventory()}} as this list is frequently 75 | updated by the \acronym{NCEI} and is not shipped with \CRANpkg{GSODR}. 76 | 77 | While \CRANpkg{GSODR} does not distribute GSOD weather data, users of 78 | the data should note the conditions that the U.S. \acronym{NCEI} places upon 79 | the \acronym{GSOD} data. 80 | \dQuote{The following data and products may have conditions placed on their 81 | international commercial use. They can be used within the U.S. or for non- 82 | commercial international activities without restriction. The non-U.S. data 83 | cannot be redistributed for commercial purposes. Re-distribution of these 84 | data by others must provide this same notification. A log of IP addresses 85 | accessing these data and products will be maintained and may be made 86 | available to data providers.} 87 | } 88 | \section{References}{ 89 | 90 | 91 | Alduchov, O.A. and Eskridge, R.E., 1996. Improved Magnus form approximation 92 | of saturation vapor pressure. Journal of Applied Meteorology and Climatology, 93 | 35(4), pp.601-609. DOI: 94 | <10.1175\%2F1520-0450\%281996\%29035\%3C0601\%3AIMFAOS\%3E2.0.CO\%3B2>. 95 | } 96 | 97 | \examples{ 98 | \dontshow{if (interactive()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} 99 | # Download weather station data for Toowoomba, Queensland for 2010 100 | tbar <- get_GSOD(years = 2010, station = "955510-99999") 101 | 102 | # Download weather data for the year 1929 103 | w_1929 <- get_GSOD(years = 1929) 104 | 105 | # Download weather data for the year 1929 for Ireland 106 | ie_1929 <- get_GSOD(years = 1929, country = "Ireland") 107 | \dontshow{\}) # examplesIf} 108 | } 109 | \seealso{ 110 | \code{\link[=reformat_GSOD]{reformat_GSOD()}} 111 | } 112 | \author{ 113 | Adam H. Sparks, \email{adamhsparks@gmail.com} 114 | } 115 | -------------------------------------------------------------------------------- /man/get_inventory.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get_inventory.R 3 | \name{get_inventory} 4 | \alias{get_inventory} 5 | \title{Download and Return a data.table Object of GSOD Weather Station Data Inventories} 6 | \usage{ 7 | get_inventory() 8 | } 9 | \value{ 10 | A \code{GSODR.info} object, which inherits from \link[data.table:data.table]{data.table::data.table}. 11 | } 12 | \description{ 13 | The \acronym{NCEI} maintains a document, 14 | \url{https://www1.ncdc.noaa.gov/pub/data/noaa/isd-inventory.txt}, which lists 15 | the number of weather observations by station-year-month from the beginning 16 | of the stations' records. This function retrieves that document and prints 17 | an information header displaying the last update time with a data frame of 18 | the inventory information for each station-year-month. 19 | } 20 | \note{ 21 | While \CRANpkg{GSODR} does not distribute GSOD weather data, users of 22 | the data should note the conditions that the U.S. \acronym{NCEI} places upon 23 | the \acronym{GSOD} data. 24 | \dQuote{The following data and products may have conditions placed on their 25 | international commercial use. They can be used within the U.S. or for non- 26 | commercial international activities without restriction. The non-U.S. data 27 | cannot be redistributed for commercial purposes. Re-distribution of these 28 | data by others must provide this same notification. A log of IP addresses 29 | accessing these data and products will be maintained and may be made 30 | available to data providers.} 31 | } 32 | \examples{ 33 | \dontshow{if (interactive()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} 34 | inventory <- get_inventory() 35 | inventory 36 | \dontshow{\}) # examplesIf} 37 | } 38 | \seealso{ 39 | Other metadata: 40 | \code{\link{get_isd_history}()}, 41 | \code{\link{get_updates}()} 42 | } 43 | \author{ 44 | Adam H. Sparks, \email{adamhsparks@gmail.com} 45 | } 46 | \concept{metadata} 47 | -------------------------------------------------------------------------------- /man/get_isd_history.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get_isd_history.R 3 | \name{get_isd_history} 4 | \alias{get_isd_history} 5 | \title{Get the Most Recent isd_history File} 6 | \usage{ 7 | get_isd_history() 8 | } 9 | \value{ 10 | A \link[data.table:data.table]{data.table::data.table} object 11 | } 12 | \description{ 13 | Get the Most Recent isd_history File 14 | } 15 | \examples{ 16 | \dontshow{if (interactive()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} 17 | get_isd_history() 18 | \dontshow{\}) # examplesIf} 19 | } 20 | \seealso{ 21 | Other metadata: 22 | \code{\link{get_inventory}()}, 23 | \code{\link{get_updates}()} 24 | } 25 | \concept{metadata} 26 | -------------------------------------------------------------------------------- /man/get_updates.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get_updates.R 3 | \name{get_updates} 4 | \alias{get_updates} 5 | \title{Get updates.txt With Information on Updates to the GSOD Data Set} 6 | \usage{ 7 | get_updates() 8 | } 9 | \value{ 10 | A \code{\link[data.table:data.table]{data.table::data.table()}} object 11 | } 12 | \description{ 13 | Gets and imports the 'updates.txt' file that has a change log of GSOD data. 14 | Changes are shown in order from most recent to oldest changes by the "DATE" 15 | field. Column names follow \CRANpkg{GSODR} naming conventions. 16 | } 17 | \examples{ 18 | \dontshow{if (interactive()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} 19 | get_updates() 20 | \dontshow{\}) # examplesIf} 21 | } 22 | \seealso{ 23 | Other metadata: 24 | \code{\link{get_inventory}()}, 25 | \code{\link{get_isd_history}()} 26 | } 27 | \concept{metadata} 28 | -------------------------------------------------------------------------------- /man/nearest_stations.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/nearest_stations.R 3 | \name{nearest_stations} 4 | \alias{nearest_stations} 5 | \title{Find Nearest GSOD Stations to a Specified Latitude and Longitude} 6 | \usage{ 7 | nearest_stations(LAT, LON, distance) 8 | } 9 | \arguments{ 10 | \item{LAT}{Latitude expressed as decimal degrees (DD) (WGS84)} 11 | 12 | \item{LON}{Longitude expressed as decimal degrees (DD) (WGS84)} 13 | 14 | \item{distance}{Distance in kilometres from point for which stations are to 15 | be returned.} 16 | } 17 | \value{ 18 | A \link[data.table:data.table]{data.table::data.table} with full station metadata including the 19 | distance from the user specified coordinates from nearest to farthest. 20 | } 21 | \description{ 22 | Given latitude and longitude values entered as decimal degrees (DD), this 23 | function returns a list (as an atomic vector) of station ID 24 | values, which can be used in\code{\link[=get_GSOD]{get_GSOD()}} to query for specific stations as an 25 | argument in the \code{station} parameter of that function. 26 | } 27 | \note{ 28 | The \acronym{GSOD} data, which are downloaded and manipulated by 29 | \CRANpkg{GSODR} stipulate that the following notice should be given. 30 | \dQuote{The following data and products may have conditions placed on their 31 | international commercial use. They can be used within the U.S. or for non- 32 | commercial international activities without restriction. The non-U.S. data 33 | cannot be redistributed for commercial purposes. Re-distribution of these 34 | data by others must provide this same notification.} 35 | } 36 | \examples{ 37 | \dontshow{if (interactive()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} 38 | # Find stations within a 100km radius of Toowoomba, QLD, AUS 39 | 40 | n <- nearest_stations(LAT = -27.5598, LON = 151.9507, distance = 100) 41 | n 42 | \dontshow{\}) # examplesIf} 43 | } 44 | \author{ 45 | Adam H. Sparks, \email{adamhsparks@gmail.com} 46 | } 47 | -------------------------------------------------------------------------------- /man/print.GSODR.Info.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get_inventory.R 3 | \name{print.GSODR.Info} 4 | \alias{print.GSODR.Info} 5 | \title{Prints GSODR.info object} 6 | \usage{ 7 | \method{print}{GSODR.Info}(x, ...) 8 | } 9 | \arguments{ 10 | \item{x}{GSODR.Info object} 11 | 12 | \item{...}{ignored} 13 | } 14 | \description{ 15 | Prints GSODR.info object 16 | } 17 | -------------------------------------------------------------------------------- /man/reformat_GSOD.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/reformat_GSOD.R 3 | \name{reformat_GSOD} 4 | \alias{reformat_GSOD} 5 | \title{Tidy and Return a data.table Object of GSOD Data From Local Storage} 6 | \usage{ 7 | reformat_GSOD(dsn = NULL, file_list = NULL) 8 | } 9 | \arguments{ 10 | \item{dsn}{User supplied full file path to location of data files on local 11 | disk for tidying.} 12 | 13 | \item{file_list}{User supplied list of file paths to individual files of data 14 | on local disk for tidying. Ignored if \code{dsn} is set. Use if there are other 15 | files in the \code{dsn} that you do not wish to reformat.} 16 | } 17 | \value{ 18 | A data frame as a \link[data.table:data.table]{data.table::data.table} object of 19 | \acronym{GSOD} data. 20 | } 21 | \description{ 22 | This function automates cleaning and reformatting of \acronym{GSOD} station 23 | files in\cr \dQuote{YEAR.tar.gz}, provided that they have been untarred or 24 | \dQuote{STATION.csv} format that have been downloaded from the United States 25 | National Center for Environmental Information's (\acronym{NCEI}) 26 | download page. Three additional useful elements: saturation vapour pressure 27 | (es), actual vapour pressure (ea) and relative humidity (RH) are calculated 28 | and returned in the final data frame using the improved August-Roche-Magnus 29 | approximation (Alduchov and Eskridge 1996). All units are converted to 30 | International System of Units (SI), \emph{e.g.}, Fahrenheit to Celsius and 31 | inches to millimetres. 32 | } 33 | \details{ 34 | If multiple stations are given, data are summarised for each year by station, 35 | which include vapour pressure and relative humidity elements calculated from 36 | existing data in \acronym{GSOD}. Else, a single station is tidied and a data 37 | frame is returned. 38 | 39 | All missing values in resulting files are represented as \code{NA} regardless 40 | of which field they occur in. 41 | 42 | Only station files in the original \dQuote{csv} file format are supported by 43 | this function. If you have downloaded the full annual (\dQuote{YYYY.tar.gz}) 44 | file you will need to extract the individual station files from the tar file 45 | first to use this function. 46 | 47 | Note that \code{\link[=reformat_GSOD]{reformat_GSOD()}} will attempt to reformat any \dQuote{.csv} 48 | files found in the \code{dsn} that you provide. If there are non-\acronym{GSOD} 49 | files present this will lead to errors. 50 | 51 | For a complete list of the fields and description of the contents and units, 52 | please refer to Appendix 1 in the \CRANpkg{GSODR} vignette, 53 | \code{vignette("GSODR", package = "GSODR")}. 54 | } 55 | \note{ 56 | While \CRANpkg{GSODR} does not distribute \acronym{GSOD} weather data, 57 | users of the data should note the conditions that the U.S. \acronym{NCEI} 58 | places upon the \acronym{GSOD} data. 59 | \dQuote{The following data and products may have conditions placed on their 60 | international commercial use. They can be used within the U.S. or for non- 61 | commercial international activities without restriction. The non-U.S. data 62 | cannot be redistributed for commercial purposes. Re-distribution of these 63 | data by others must provide this same notification. A log of IP addresses 64 | accessing these data and products will be maintained and may be made 65 | available to data providers.} 66 | } 67 | \section{References}{ 68 | 69 | 70 | Alduchov, O.A. and Eskridge, R.E., 1996. Improved Magnus form approximation 71 | of saturation vapor pressure. Journal of Applied Meteorology and Climatology, 72 | 35(4), pp.601-609. DOI: 73 | <10.1175\%2F1520-0450\%281996\%29035\%3C0601\%3AIMFAOS\%3E2.0.CO\%3B2>. 74 | } 75 | 76 | \examples{ 77 | \dontshow{if (interactive()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} 78 | 79 | # Download data to 'tempdir()' 80 | download.file( 81 | url = 82 | "https://www.ncei.noaa.gov/data/global-summary-of-the-day/access/2010/95551099999.csv", 83 | destfile = file.path(tempdir(), "95551099999.csv"), 84 | mode = "wb" 85 | ) 86 | 87 | # Reformat station data files in R's tempdir() directory 88 | tbar <- reformat_GSOD(dsn = tempdir()) 89 | 90 | tbar 91 | \dontshow{\}) # examplesIf} 92 | } 93 | \seealso{ 94 | For automated downloading and tidying see the \code{\link[=get_GSOD]{get_GSOD()}} function, which 95 | provides expanded functionality for automatically downloading and expanding 96 | annual \acronym{GSOD} files and cleaning station files. 97 | 98 | \code{\link[=get_GSOD]{get_GSOD()}} 99 | } 100 | \author{ 101 | Adam H. Sparks, \email{adamhsparks@gmail.com} 102 | } 103 | -------------------------------------------------------------------------------- /man/update_station_list.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/update_station_list.R 3 | \name{update_station_list} 4 | \alias{update_station_list} 5 | \title{Download Latest isd-history.csv File and Update an Internal Database} 6 | \usage{ 7 | update_station_list() 8 | } 9 | \description{ 10 | This function downloads the latest station list (isd-history.csv) from the 11 | \acronym{NCEI} server and updates the data distributed with \CRANpkg{GSODR} 12 | to the latest stations available. These data provide unique identifiers, 13 | country, state (if in U.S.) and when weather observations begin and end. 14 | } 15 | \details{ 16 | Care should be taken when using this function if reproducibility is necessary 17 | as different machines with the same version of \CRANpkg{GSODR} can end up 18 | with different versions of the 'isd_history.csv' file internally. 19 | 20 | There is no need to use this unless you know that a station exists in the 21 | isd_history.csv file that is not available in the self-contained 22 | database distributed with \CRANpkg{GSODR}. 23 | 24 | To directly access these data, use: \cr 25 | \code{load(system.file("extdata", "isd_history.rda", package = "GSODR"))} 26 | 27 | To see the latest version available from the \acronym{NCEI} server, please 28 | refer to \code{\link[=get_isd_history]{get_isd_history()}}. 29 | } 30 | \examples{ 31 | \dontrun{ 32 | update_station_list() 33 | } 34 | 35 | } 36 | \seealso{ 37 | \code{\link[=get_isd_history]{get_isd_history()}} 38 | } 39 | \author{ 40 | Adam H. Sparks, \email{adamhsparks@gmail.com} 41 | } 42 | -------------------------------------------------------------------------------- /tests/spelling.R: -------------------------------------------------------------------------------- 1 | if (requireNamespace("spelling", quietly = TRUE)) { 2 | spelling::spell_check_test( 3 | vignettes = TRUE, 4 | error = FALSE, 5 | skip_on_cran = TRUE 6 | ) 7 | } 8 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(GSODR) 3 | 4 | test_check("GSODR") 5 | -------------------------------------------------------------------------------- /tests/testthat/test-get_GSOD.R: -------------------------------------------------------------------------------- 1 | load(system.file("extdata", "isd_history.rda", package = "GSODR")) 2 | 3 | # Check that invalid years are handled gracefully ----------------------------- 4 | 5 | test_that("invalid years are handled gracefully", { 6 | skip_if_offline() 7 | expect_error(get_GSOD(years = "")) 8 | expect_error(get_GSOD(years = "nineteen ninety two")) 9 | expect_error(get_GSOD(years = "1923")) 10 | expect_error(get_GSOD(years = 1923)) 11 | expect_error(get_GSOD(years = 1901 + as.POSIXlt(Sys.Date())$year)) 12 | expect_error(get_GSOD(years = 0)) 13 | expect_error(get_GSOD(years = -1)) 14 | }) 15 | 16 | # some of these tests, test the sub-functions to avoid downloading files 17 | # Check that .validate_years handles valid years ------------------------------- 18 | test_that(".validate_years handles valid years", { 19 | skip_if_offline() 20 | expect_silent(.validate_years(years = 1929:2016)) 21 | expect_silent(.validate_years(years = 2016)) 22 | }) 23 | 24 | # Check that invalid stations are handled -------------------------------------- 25 | test_that("invalid stations are handled", { 26 | skip_if_offline() 27 | expect_error(.validate_station_id( 28 | station = "aaa-bbbbbb", 29 | isd_history = isd_history 30 | )) 31 | }) 32 | 33 | # Check that station validation for years available on server works properly --- 34 | test_that("Station validations are properly handled for years available", { 35 | skip_if_offline() 36 | expect_warning(.validate_station_data_years( 37 | station = "949999-00170", 38 | isd_history = isd_history, 39 | years = 2010 40 | )) 41 | }) 42 | 43 | test_that("Station validations are properly handled for years available", { 44 | skip_if_offline() 45 | expect_silent(.validate_station_data_years( 46 | years = 2010, 47 | station = "955510-99999", 48 | isd_history = isd_history 49 | )) 50 | }) 51 | 52 | # Check missing days in non-leap years ----------------------------------------- 53 | test_that("missing days check allows stations with permissible days missing, 54 | non-leap year", { 55 | skip_if_offline() 56 | max_missing <- 5 57 | td <- tempdir() 58 | just_right_2015 <- 59 | data.frame(c(rep(12, 360)), c(rep("X", 360))) 60 | too_short_2015 <- 61 | data.frame(c(rep(12, 300)), c(rep("X", 300))) 62 | df_list <- list(just_right_2015, too_short_2015) 63 | dir.create(path = file.path(td, "2015")) 64 | 65 | filenames <- c("just_right0", "too_short00") 66 | sapply( 67 | seq_len(length(df_list)), 68 | function(x) { 69 | write.csv( 70 | df_list[[x]], 71 | file = paste0( 72 | td, 73 | "/2015/", 74 | filenames[x], 75 | ".csv" 76 | ) 77 | ) 78 | } 79 | ) 80 | GSOD_list <- 81 | list.files( 82 | path = file.path(td, "2015"), 83 | pattern = ".csv$", 84 | full.names = TRUE 85 | ) 86 | 87 | if (!is.null(max_missing)) { 88 | GSOD_list_filtered <- .validate_missing_days( 89 | max_missing, 90 | GSOD_list 91 | ) 92 | } 93 | expect_length(GSOD_list, 2) 94 | expect_match( 95 | basename(GSOD_list_filtered), 96 | "just_right0.csv" 97 | ) 98 | rm_files <- 99 | list.files(file.path(td, "2015"), full.names = TRUE) 100 | file.remove(rm_files) 101 | file.remove(file.path(td, "2015")) 102 | }) 103 | 104 | # Check missing days in leap years --------------------------------------------- 105 | test_that("missing days check allows stations with permissible days missing, 106 | leap year", { 107 | skip_if_offline() 108 | max_missing <- 5 109 | td <- tempdir() 110 | just_right_2016 <- 111 | data.frame(c(rep(12, 361)), c(rep("X", 361))) 112 | too_short_2016 <- 113 | data.frame(c(rep(12, 300)), c(rep("X", 300))) 114 | df_list <- list(just_right_2016, too_short_2016) 115 | dir.create(path = file.path(td, "2016")) 116 | 117 | filenames <- c("just_right0", "too_short00") 118 | sapply( 119 | seq_len(length(df_list)), 120 | function(x) { 121 | write.csv( 122 | df_list[[x]], 123 | file = paste0( 124 | td, 125 | "/2016/", 126 | filenames[x], 127 | ".csv" 128 | ) 129 | ) 130 | } 131 | ) 132 | GSOD_list <- 133 | list.files( 134 | path = file.path(td, "2016"), 135 | pattern = ".csv$", 136 | full.names = TRUE 137 | ) 138 | if (!is.null(max_missing)) { 139 | GSOD_list_filtered <- .validate_missing_days( 140 | max_missing, 141 | GSOD_list 142 | ) 143 | } 144 | 145 | expect_length(GSOD_list, 2) 146 | expect_match(basename(GSOD_list_filtered), "just_right0.csv") 147 | rm_files <- 148 | list.files(file.path(td, "2016"), full.names = TRUE) 149 | file.remove(rm_files) 150 | file.remove(file.path(td, "2016")) 151 | }) 152 | 153 | # Check that max_missing only accepts positive values -------------------------- 154 | test_that("The 'max_missing' parameter will not accept NA values", { 155 | skip_if_offline() 156 | expect_error(get_GSOD(years = 2010, max_missing = NA)) 157 | }) 158 | 159 | test_that("The 'max_missing' parameter will not accept values < 1", { 160 | skip_if_offline() 161 | expect_error(get_GSOD(years = 2010, max_missing = 0.1)) 162 | }) 163 | 164 | # Check validate country returns a two letter code ----------------------------- 165 | test_that("Check validate country returns a two letter code", { 166 | skip_if_offline() 167 | # Load country list 168 | # CRAN NOTE avoidance 169 | 170 | country <- "Philippines" 171 | Philippines <- .validate_country(country, isd_history) 172 | expect_match(Philippines, "RP") 173 | 174 | country <- "PHL" 175 | PHL <- .validate_country(country, isd_history) 176 | expect_match(PHL, "RP") 177 | 178 | country <- "PH" 179 | PH <- .validate_country(country, isd_history) 180 | expect_match(PH, "RP") 181 | }) 182 | 183 | # Check validate country returns an error on invalid entry---------------------- 184 | test_that("Check validate country returns an error on invalid entry when 185 | mispelled", { 186 | skip_if_offline() 187 | country <- "Philipines" 188 | expect_error(.validate_country(country, isd_history)) 189 | }) 190 | 191 | test_that("Check validate country returns an error on invalid entry when two 192 | two characters are used that are not in the list", { 193 | skip_if_offline() 194 | country <- "RZ" 195 | expect_error(.validate_country(country, isd_history)) 196 | }) 197 | 198 | test_that("Check validate country returns an error on invalid entry when two 199 | three characters are used that are not in the list", { 200 | skip_if_offline() 201 | country <- "RPS" 202 | expect_error(.validate_country(country, isd_history)) 203 | }) 204 | 205 | # Check that max_missing is not allowed for current year ----------------------- 206 | test_that("max_missing is not allowed for current year", { 207 | skip_if_offline() 208 | years <- 1983:format(Sys.Date(), "%Y") 209 | expect_error(get_GSOD(years = years, max_missing = 5)) 210 | }) 211 | 212 | # Check that only unique stations returned, tempdir() is cleaned up on exit ---- 213 | test_that("unique stations are returned, tempdir() is cleaned up on exit", { 214 | skip_if_offline() 215 | a <- get_GSOD(years = 1929, station = "039800-99999") 216 | b <- get_GSOD(years = 1929, station = "039730-99999") 217 | expect_false(isTRUE(list.files( 218 | tempdir(), 219 | pattern = ".csv$", 220 | full.names = TRUE 221 | ))) 222 | expect_equal(length(unique(b$STNID)), 1) 223 | }) 224 | 225 | # Check that agroclimatology is returned when requested ------------------------ 226 | test_that("agroclimatology data is returned as requested", { 227 | skip_if_offline() 228 | a <- get_GSOD(years = 1929, agroclimatology = TRUE) 229 | expect_lt(max(a$LATITUDE), 60) 230 | expect_gt(min(a$LATITUDE), -60) 231 | }) 232 | 233 | # Check that agroclimatology and station cannot be specified concurrently ------ 234 | test_that("agroclimatology and station cannot be specified concurrently", { 235 | skip_if_offline() 236 | expect_error(get_GSOD( 237 | years = 2010, 238 | agroclimatology = TRUE, 239 | station = "489300-99999" 240 | )) 241 | }) 242 | 243 | # Check the structure of the data.table and contents --------------------------- 244 | # this also provides tests for `.process_csv()` 245 | # Check that when specifying a country only that country is returned ----------- 246 | test_that("get_GSOD works properly and for one country only", { 247 | skip_if_offline() 248 | a <- get_GSOD(years = 1929, country = "UK") 249 | expect_equal(a$CTRY[1], "UK") 250 | expect_s3_class(a, "data.table") 251 | expect_equal(a$STNID[[1]], "030050-99999") 252 | expect_equal(a$NAME[[1]], "LERWICK") 253 | expect_equal(a$CTRY[[1]], "UK") 254 | expect_equal(a$COUNTRY_NAME[[1]], "UNITED KINGDOM") 255 | expect_equal(a$ISO2C[[1]], "GB") 256 | expect_equal(a$ISO3C[[1]], "GBR") 257 | expect_equal(a$STATE[[1]], "") 258 | expect_equal(a$LATITUDE[[1]], 60.133) 259 | expect_equal(a$LONGITUDE[[1]], -1.183) 260 | expect_equal(a$ELEVATION[[1]], 84) 261 | expect_equal(a$BEGIN[[1]], 19291001) 262 | expect_equal( 263 | lapply(a, class), 264 | list( 265 | STNID = "character", 266 | NAME = "character", 267 | CTRY = "character", 268 | COUNTRY_NAME = "character", 269 | ISO2C = "character", 270 | ISO3C = "character", 271 | STATE = "character", 272 | LATITUDE = "numeric", 273 | LONGITUDE = "numeric", 274 | ELEVATION = "numeric", 275 | BEGIN = "integer", 276 | END = "integer", 277 | YEARMODA = "Date", 278 | YEAR = "integer", 279 | MONTH = "integer", 280 | DAY = "integer", 281 | YDAY = "integer", 282 | TEMP = "numeric", 283 | TEMP_ATTRIBUTES = "integer", 284 | DEWP = "numeric", 285 | DEWP_ATTRIBUTES = "integer", 286 | SLP = "numeric", 287 | SLP_ATTRIBUTES = "integer", 288 | STP = "numeric", 289 | STP_ATTRIBUTES = "integer", 290 | VISIB = "numeric", 291 | VISIB_ATTRIBUTES = "integer", 292 | WDSP = "numeric", 293 | WDSP_ATTRIBUTES = "integer", 294 | MXSPD = "numeric", 295 | GUST = "numeric", 296 | MAX = "numeric", 297 | MAX_ATTRIBUTES = "character", 298 | MIN = "numeric", 299 | MIN_ATTRIBUTES = "character", 300 | PRCP = "numeric", 301 | PRCP_ATTRIBUTES = "character", 302 | SNDP = "numeric", 303 | I_FOG = "numeric", 304 | I_RAIN_DRIZZLE = "numeric", 305 | I_SNOW_ICE = "numeric", 306 | I_HAIL = "numeric", 307 | I_THUNDER = "numeric", 308 | I_TORNADO_FUNNEL = "numeric", 309 | EA = "numeric", 310 | ES = "numeric", 311 | RH = "numeric" 312 | ) 313 | ) 314 | }) 315 | 316 | test_that("only specified country is returned using 2 letter ISO codes", { 317 | skip_if_offline() 318 | a <- get_GSOD(years = 1929, country = "GB") 319 | expect_equal(a$CTRY[1], "UK") 320 | }) 321 | 322 | test_that("only specified country is returned using 3 letter ISO codes", { 323 | skip_if_offline() 324 | a <- get_GSOD(years = 1929, country = "GBR") 325 | expect_equal(a$CTRY[1], "UK") 326 | }) 327 | -------------------------------------------------------------------------------- /tests/testthat/test-get_inventory.R: -------------------------------------------------------------------------------- 1 | # Check that get_inventory functions properly ---------------------------------- 2 | test_that("get_inventory fetches the inventory doc and returns a data frame", { 3 | skip_if_offline() 4 | x <- get_inventory() 5 | expect_length(x, 25) 6 | expect_s3_class(x, "data.frame") 7 | expect_type(x$STNID, "character") 8 | expect_type(x$NAME, "character") 9 | expect_type(x$LAT, "double") 10 | expect_type(x$LON, "double") 11 | expect_type(x$`ELEV(M)`, "double") 12 | expect_type(x$CTRY, "character") 13 | expect_type(x$STATE, "character") 14 | expect_type(x$BEGIN, "integer") 15 | expect_type(x$END, "integer") 16 | expect_type(x$COUNTRY_NAME, "character") 17 | expect_type(x$ISO2C, "character") 18 | expect_type(x$ISO3C, "character") 19 | expect_type(x$YEAR, "integer") 20 | expect_type(x$JAN, "integer") 21 | expect_type(x$FEB, "integer") 22 | expect_type(x$MAR, "integer") 23 | expect_type(x$APR, "integer") 24 | expect_type(x$MAY, "integer") 25 | expect_type(x$JUN, "integer") 26 | expect_type(x$JUL, "integer") 27 | expect_type(x$AUG, "integer") 28 | expect_type(x$SEP, "integer") 29 | expect_type(x$OCT, "integer") 30 | expect_type(x$NOV, "integer") 31 | expect_type(x$DEC, "integer") 32 | 33 | y <- capture.output(x) 34 | expect_type(y, "character") 35 | expect_equal( 36 | y[[1]], 37 | " *** FEDERAL CLIMATE COMPLEX INTEGRATED SURFACE DATA INVENTORY *** " 38 | ) 39 | expect_equal( 40 | y[[2]], 41 | " This inventory provides the number of weather observations by " 42 | ) 43 | }) 44 | 45 | test_that("inventory file is removed after download", { 46 | skip_if_offline() 47 | expect_true(!file.exists(file.path(tempdir(), "inventory.txt"))) 48 | }) 49 | -------------------------------------------------------------------------------- /tests/testthat/test-get_updates.R: -------------------------------------------------------------------------------- 1 | test_that("get_updates() returns a data.table", { 2 | skip_if_offline() 3 | x <- get_updates() 4 | expect_s3_class(x, "data.table") 5 | expect_named(x, c("STNID", "YEAR", "DATE", "COMMENT")) 6 | expect_type(x$STNID, "character") 7 | expect_type(x$YEAR, "integer") 8 | expect_s3_class(x$DATE, "Date") 9 | expect_type(x$COMMENT, "character") 10 | }) 11 | -------------------------------------------------------------------------------- /tests/testthat/test-nearest_stations.R: -------------------------------------------------------------------------------- 1 | # Check that nearest stations functions properly ------------------------------- 2 | test_that("nearest stations returns station IDs nearest to farthest", { 3 | skip_if_offline() 4 | n <- 5 | nearest_stations( 6 | LAT = -27.5598, 7 | LON = 151.9507, 8 | distance = 100 9 | ) 10 | expect_equal(dim(n), c(17, 13)) 11 | expect_s3_class(n, "data.table") 12 | expect_equal( 13 | n$STNID, 14 | c( 15 | "945510-99999", 16 | "955510-99999", 17 | "945520-99999", 18 | "949999-00170", 19 | "949999-00183", 20 | "945620-99999", 21 | "749459-99999", 22 | "945550-99999", 23 | "949999-00186", 24 | "955550-99999", 25 | "945951-99999", 26 | "949999-00172", 27 | "945420-99999", 28 | "949999-00179", 29 | "949999-00185", 30 | "949999-00176", 31 | "949999-00180" 32 | ) 33 | ) 34 | rm(n) 35 | }) 36 | -------------------------------------------------------------------------------- /tests/testthat/test-reformat_GSOD.R: -------------------------------------------------------------------------------- 1 | # Check that reformat_GSOD functions properly ---------------------------------- 2 | test_that("reformat_GSOD file_list parameter reformats data properly", { 3 | skip_if_offline() 4 | do.call( 5 | file.remove, 6 | list(list.files( 7 | tempdir(), 8 | pattern = ".csv$", 9 | full.names = TRUE 10 | )) 11 | ) 12 | 13 | # set up options for curl 14 | 15 | url_base <- 16 | "https://www.ncei.noaa.gov/data/global-summary-of-the-day/access/1960/" 17 | test_files <- 18 | c("06600099999.csv", "06620099999.csv") 19 | destinations <- file.path(tempdir(), test_files) 20 | 21 | Map( 22 | function(u, d) { 23 | curl::curl_download(u, d, mode = "wb", quiet = TRUE) 24 | }, 25 | paste0(url_base, test_files), 26 | destinations 27 | ) 28 | 29 | file_list <- list.files( 30 | path = tempdir(), 31 | pattern = "^.*\\.csv$", 32 | full.names = TRUE 33 | ) 34 | expect_equal(length(file_list), 2) 35 | expect_equal( 36 | basename(file_list), 37 | c( 38 | "06600099999.csv", 39 | "06620099999.csv" 40 | ) 41 | ) 42 | 43 | # check that provided a file list, the function works properly 44 | x <- reformat_GSOD(file_list = file_list) 45 | expect_equal(nrow(x), 722) 46 | expect_length(x, 47) 47 | expect_s3_class(x, "data.table") 48 | 49 | # check that provided a dsn only, the function works properly 50 | x <- reformat_GSOD(dsn = tempdir()) 51 | expect_equal(nrow(x), 722) 52 | expect_length(x, 47) 53 | expect_s3_class(x, "data.table") 54 | 55 | # Check that a message is emitted when both dsn and file_list are set -------- 56 | expect_message( 57 | reformat_GSOD( 58 | dsn = tempdir(), 59 | file_list = file_list 60 | ), 61 | regexp = "\nYou have specified both `file_list` and `dsn`. *" 62 | ) 63 | 64 | unlink(destinations) 65 | }) 66 | 67 | 68 | # Check that reformat_GSOD stops if no files are found ------------------------- 69 | test_that("reformat_GSOD stops if no files are found", { 70 | skip_if_offline() 71 | expect_error(reformat_GSOD(dsn = "/dev/NULL")) 72 | }) 73 | -------------------------------------------------------------------------------- /tests/testthat/test-update_station_list.R: -------------------------------------------------------------------------------- 1 | load(system.file("extdata", "isd_history.rda", package = "GSODR")) 2 | 3 | test_that("If user selects no, database not updated", { 4 | skip_if_offline() 5 | f <- file() 6 | options(GSODR_connection = f) 7 | ans <- "no" 8 | write(ans, f) 9 | expect_error(update_station_list()) 10 | options(GSODR_connection = stdin()) 11 | close(f) 12 | }) 13 | 14 | # update_forecast_locations() d-loads, imports file and resets timeout on exit-- 15 | test_that("update_station_list() downloads and imports proper file", { 16 | skip_if_offline() 17 | f <- file() 18 | options(GSODR_connection = f) 19 | ans <- "yes" 20 | write(ans, f) 21 | expect_message(update_station_list()) 22 | expect_equal(ncol(isd_history), 12) 23 | expect_named( 24 | isd_history, 25 | c( 26 | "STNID", 27 | "NAME", 28 | "LAT", 29 | "LON", 30 | "ELEV(M)", 31 | "CTRY", 32 | "STATE", 33 | "BEGIN", 34 | "END", 35 | "COUNTRY_NAME", 36 | "ISO2C", 37 | "ISO3C" 38 | ) 39 | ) 40 | expect_equal(options("timeout")[[1]], 60) 41 | options(GSODR_connection = stdin()) 42 | close(f) 43 | }) 44 | -------------------------------------------------------------------------------- /vignettes/Ex5-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/GSODR/4cefe1fa79148a14282b8b2a59def6dc66b2a5d7/vignettes/Ex5-1.png -------------------------------------------------------------------------------- /vignettes/GSOD_Station_locations-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/GSODR/4cefe1fa79148a14282b8b2a59def6dc66b2a5d7/vignettes/GSOD_Station_locations-1.png -------------------------------------------------------------------------------- /vignettes/precompile.R: -------------------------------------------------------------------------------- 1 | # vignettes that depend on Internet access need to be pre-compiled 2 | library("knitr") 3 | knit("vignettes/GSODR.Rmd.orig", "vignettes/GSODR.Rmd") 4 | 5 | # remove file path such that vignettes will build with figures 6 | replace <- readLines("vignettes/GSODR.Rmd") 7 | replace <- gsub("