├── .Rbuildignore
├── .covrignore
├── .github
    ├── .gitignore
    ├── CONTRIBUTING.md
    └── workflows
    │   ├── R-CMD-check.yaml
    │   ├── pkgdown.yaml
    │   └── test-coverage.yaml
├── .gitignore
├── .pre-commit-config.yaml
├── DESCRIPTION
├── LICENSE.md
├── NAMESPACE
├── NEWS.md
├── R
    ├── after.R
    ├── alphord.R
    ├── arg-match.R
    ├── before.R
    ├── c-wrappers.R
    ├── camel-case.R
    ├── can-be-num.R
    ├── currency.R
    ├── detect.R
    ├── elem.R
    ├── extract-non-nums.R
    ├── extract-nums.R
    ├── give-ext.R
    ├── locate.R
    ├── num-after.R
    ├── num-before.R
    ├── remove.R
    ├── singleize.R
    ├── split-by-nums.R
    ├── strex-package.R
    ├── to-vec.R
    ├── trim.R
    └── utils.R
├── README.Rmd
├── README.md
├── codecov.yml
├── codemeta.json
├── cran-comments.md
├── index.Rmd
├── index.md
├── inst
    ├── CODE_OF_CONDUCT.md
    └── WORDLIST
├── junk
    ├── R
    │   └── utils.R
    ├── braces.cpp
    ├── currency.cpp
    ├── elem.cpp
    ├── extract-non-nums.cpp
    ├── junk.R
    ├── junk.cpp
    ├── pasting.cpp
    ├── pasting.h
    └── strings.cpp
├── man
    ├── before-and-after.Rd
    ├── currency.Rd
    ├── figures
    │   ├── README-pressure-1.png
    │   └── logo.png
    ├── str_alphord_nums.Rd
    ├── str_before_last_dot.Rd
    ├── str_can_be_numeric.Rd
    ├── str_detect_all.Rd
    ├── str_elem.Rd
    ├── str_elems.Rd
    ├── str_extract_non_numerics.Rd
    ├── str_extract_numbers.Rd
    ├── str_give_ext.Rd
    ├── str_locate_braces.Rd
    ├── str_locate_nth.Rd
    ├── str_match_arg.Rd
    ├── str_nth_non_numeric.Rd
    ├── str_nth_number.Rd
    ├── str_nth_number_after_mth.Rd
    ├── str_nth_number_before_mth.Rd
    ├── str_paste_elems.Rd
    ├── str_remove_quoted.Rd
    ├── str_singleize.Rd
    ├── str_split_by_numbers.Rd
    ├── str_split_camel_case.Rd
    ├── str_to_vec.Rd
    ├── str_trim_anything.Rd
    └── strex.Rd
├── pkgdown
    └── favicon
    │   ├── apple-touch-icon-120x120.png
    │   ├── apple-touch-icon-60x60.png
    │   ├── apple-touch-icon-76x76.png
    │   ├── apple-touch-icon.png
    │   ├── favicon-16x16.png
    │   ├── favicon-32x32.png
    │   └── favicon.ico
├── src
    ├── .gitignore
    ├── arg-match.c
    ├── detect.c
    ├── fullocate.c
    ├── init.c
    ├── interleave.c
    ├── list-utils.c
    ├── list-utils.h
    ├── matrix-utils.c
    ├── pairlist-utils.c
    ├── pairlist-utils.h
    ├── stringi-imports.h
    └── vec-utils.c
├── strex.Rproj
├── tests
    ├── spelling.R
    ├── testthat.R
    └── testthat
    │   ├── _snaps
    │       ├── after.md
    │       ├── alphord.md
    │       ├── arg-match.md
    │       ├── currency.md
    │       ├── elem.md
    │       ├── extract-non-numerics.md
    │       ├── extract-nums.md
    │       ├── locate.md
    │       └── num-after.md
    │   ├── test-after.R
    │   ├── test-alphord.R
    │   ├── test-arg-match.R
    │   ├── test-before.R
    │   ├── test-c-wrappers.R
    │   ├── test-camel-case.R
    │   ├── test-can-be-num.R
    │   ├── test-currency.R
    │   ├── test-detect.R
    │   ├── test-elem.R
    │   ├── test-extract-non-numerics.R
    │   ├── test-extract-nums.R
    │   ├── test-give-ext.R
    │   ├── test-locate.R
    │   ├── test-num-after.R
    │   ├── test-num-before.R
    │   ├── test-remove.R
    │   ├── test-singleize.R
    │   ├── test-split-by-nums.R
    │   ├── test-to-vec.R
    │   ├── test-trim.R
    │   └── test-utils.R
└── vignettes
    ├── .gitignore
    ├── alphordering-numbers.Rmd
    ├── argument-matching.Rmd
    ├── before-and-after.Rmd
    ├── detection.Rmd
    ├── important-miscellany.Rmd
    └── numbers-in-strings.Rmd


/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | ^CRAN-RELEASE$
 2 | ^revdep$
 3 | ^cran-comments\.md$
 4 | ^codecov\.yml$
 5 | ^appveyor\.yml$
 6 | ^\.github$
 7 | ^docs$
 8 | ^_pkgdown\.yml$
 9 | ^README.+$
10 | ^LICENSE\.md$
11 | ^.*\.Rproj$
12 | ^\.Rproj\.user$
13 | ^index\.Rmd$
14 | ^index\.md$
15 | ^\.travis\.yml$
16 | ^junk$
17 | ^pkgdown$
18 | ^codemeta\.json$
19 | ^\.covrignore$
20 | ^CODE_OF_CONDUCT\.md$
21 | ^\.pre-commit-config\.yaml$
22 | ^CRAN-SUBMISSION$
23 | 


--------------------------------------------------------------------------------
/.covrignore:
--------------------------------------------------------------------------------
1 | R/strex-package.R
2 | 


--------------------------------------------------------------------------------
/.github/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | 


--------------------------------------------------------------------------------
/.github/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to strex
 2 | 
 3 | This outlines how to propose a change to strex. For more detailed
 4 | info about contributing to this, and other packages, please see the
 5 | [**development contributing guide**](https://rstd.io/tidy-contrib).
 6 | 
 7 | ### Fixing typos
 8 | 
 9 | Small typos or grammatical errors in documentation may be edited directly using
10 | the GitHub web interface, so long as the changes are made in the _source_ file.
11 | 
12 | *  YES: you edit a roxygen comment in a `.R` file below `R/`.
13 | *  NO: you edit an `.Rd` file below `man/`.
14 | 
15 | ### Prerequisites
16 | 
17 | Before you make a substantial pull request, you should always file an issue and
18 | make sure someone from the team agrees that it’s a problem. If you’ve found a
19 | bug, create an associated issue and illustrate the bug with a minimal 
20 | [reprex](https://www.tidyverse.org/help/#reprex).
21 | 
22 | ### Pull request process
23 | 
24 | *  We recommend that you create a Git branch for each pull request (PR).  
25 | *  Look at the Travis and AppVeyor build status before and after making changes.
26 | The `README` should contain badges for any continuous integration services used
27 | by the package.  
28 | *  New code should follow the tidyverse [style guide](http://style.tidyverse.org).
29 | You can use the [styler](https://CRAN.R-project.org/package=styler) package to
30 | apply these styles, but please don't restyle code that has nothing to do with 
31 | your PR.  
32 | *  We use [roxygen2](https://cran.r-project.org/package=roxygen2), with
33 | [Markdown syntax](https://cran.r-project.org/web/packages/roxygen2/vignettes/markdown.html), 
34 | for documentation.  
35 | *  We use [testthat](https://cran.r-project.org/package=testthat). Contributions
36 | with test cases included are easier to accept.  
37 | *  For user-facing changes, add a bullet to the top of `NEWS.md` below the
38 | current development version header describing the changes made followed by your
39 | GitHub username, and links to relevant issue(s)/PR(s).
40 | 
41 | ### Code of Conduct
42 | 
43 | Please note that the strex project is released with a
44 | [Contributor Code of Conduct](CODE_OF_CONDUCT.md). By contributing to this
45 | project you agree to abide by its terms.
46 | 


--------------------------------------------------------------------------------
/.github/workflows/R-CMD-check.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   push:
 5 |     branches: [main, master]
 6 |   pull_request:
 7 |     branches: [main, master]
 8 | 
 9 | name: R-CMD-check
10 | 
11 | jobs:
12 |   R-CMD-check:
13 |     runs-on: ${{ matrix.config.os }}
14 | 
15 |     name: ${{ matrix.config.os }} (${{ matrix.config.r }})
16 | 
17 |     strategy:
18 |       fail-fast: false
19 |       matrix:
20 |         config:
21 |           - {os: macOS-latest,   r: 'release'}
22 |           - {os: windows-latest, r: 'release'}
23 |           - {os: ubuntu-latest,   r: 'devel', http-user-agent: 'release'}
24 |           - {os: ubuntu-latest,   r: 'release'}
25 |           - {os: ubuntu-latest,   r: 'oldrel-1'}
26 | 
27 |     env:
28 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
29 |       R_KEEP_PKG_SOURCE: yes
30 | 
31 |     steps:
32 |       - uses: actions/checkout@v2
33 | 
34 |       - uses: r-lib/actions/setup-pandoc@v2
35 | 
36 |       - uses: r-lib/actions/setup-r@v2
37 |         with:
38 |           r-version: ${{ matrix.config.r }}
39 |           http-user-agent: ${{ matrix.config.http-user-agent }}
40 |           use-public-rspm: true
41 | 
42 |       - uses: r-lib/actions/setup-r-dependencies@v2
43 |         with:
44 |           extra-packages: any::rcmdcheck
45 |           needs: check
46 | 
47 |       - uses: r-lib/actions/check-r-package@v2
48 |         with:
49 |           upload-snapshots: true
50 | 


--------------------------------------------------------------------------------
/.github/workflows/pkgdown.yaml:
--------------------------------------------------------------------------------
 1 | on:
 2 |   push:
 3 |     branches: [main, master]
 4 | 
 5 | name: pkgdown
 6 | 
 7 | jobs:
 8 |   pkgdown:
 9 |     runs-on: macOS-latest
10 |     env:
11 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
12 |     steps:
13 |       - uses: actions/checkout@v2
14 | 
15 |       - uses: r-lib/actions/setup-r@v2
16 | 
17 |       - uses: r-lib/actions/setup-pandoc@v2
18 | 
19 |       - name: Query dependencies
20 |         run: |
21 |           install.packages('remotes')
22 |           saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2)
23 |           writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version")
24 |         shell: Rscript {0}
25 | 
26 |       - name: Cache R packages
27 |         uses: actions/cache@v2
28 |         with:
29 |           path: ${{ env.R_LIBS_USER }}
30 |           key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }}
31 |           restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-
32 | 
33 |       - name: Install dependencies
34 |         run: |
35 |           remotes::install_deps(dependencies = TRUE)
36 |           install.packages("pkgdown", type = "binary")
37 |         shell: Rscript {0}
38 | 
39 |       - name: Install package
40 |         run: R CMD INSTALL .
41 | 
42 |       - name: Deploy package
43 |         run: |
44 |           git config --local user.email "actions@github.com"
45 |           git config --local user.name "GitHub Actions"
46 |           Rscript -e 'pkgdown::deploy_to_branch(new_process = FALSE)'
47 | 


--------------------------------------------------------------------------------
/.github/workflows/test-coverage.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   push:
 5 |     branches: [main, master]
 6 |   pull_request:
 7 |     branches: [main, master]
 8 | 
 9 | name: test-coverage
10 | 
11 | jobs:
12 |   test-coverage:
13 |     runs-on: ubuntu-latest
14 |     env:
15 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
16 | 
17 |     steps:
18 |       - uses: actions/checkout@v2
19 | 
20 |       - uses: r-lib/actions/setup-r@v2
21 |         with:
22 |           use-public-rspm: true
23 | 
24 |       - uses: r-lib/actions/setup-r-dependencies@v2
25 |         with:
26 |           extra-packages: any::covr
27 |           needs: coverage
28 | 
29 |       - name: Test coverage
30 |         run: covr::codecov(quiet = FALSE)
31 |         shell: Rscript {0}
32 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | inst/doc/
5 | docs/
6 | revdep/
7 | CRAN-SUBMISSION
8 | inst/doc
9 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | # All available hooks: https://pre-commit.com/hooks.html
 2 | # R specific hooks: https://github.com/lorenzwalthert/precommit
 3 | repos:
 4 | -   repo: https://github.com/lorenzwalthert/precommit
 5 |     rev: v0.1.3
 6 |     hooks:
 7 |     -   id: roxygenize
 8 |     -   id: codemeta-description-updated
 9 |     -   id: use-tidy-description
10 |     -   id: parsable-R
11 |     -   id: no-browser-statement
12 | -   repo: https://github.com/pre-commit/pre-commit-hooks
13 |     rev: v3.4.0
14 |     hooks:
15 |     -   id: check-added-large-files
16 |         args: ['--maxkb=200']
17 |     -   id: end-of-file-fixer
18 |         exclude: '\.Rd|\.md'
19 | -   repo: local
20 |     hooks:
21 |     -   id: forbid-to-commit
22 |         name: Don't commit common R artifacts
23 |         entry: Cannot commit .Rhistory, .RData, .Rds or .rds.
24 |         language: fail
25 |         files: '\.Rhistory|\.RData|\.Rds|\.rds$'
26 |         # `exclude: <regex>` to allow committing specific files.
27 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: strex
 2 | Title: Extra String Manipulation Functions
 3 | Version: 2.0.1
 4 | Authors@R: 
 5 |     person("Rory", "Nolan", , "rorynoolan@gmail.com", role = c("aut", "cre"),
 6 |            comment = c(ORCID = "0000-0002-5239-4043"))
 7 | Description: There are some things that I wish were easier with the
 8 |     'stringr' or 'stringi' packages. The foremost of these is the
 9 |     extraction of numbers from strings. 'stringr' and 'stringi' make you
10 |     figure out the regular expression for yourself; 'strex' takes care of
11 |     this for you. There are many other handy functionalities in 'strex'.
12 |     Contributions to this package are encouraged; it is intended as a
13 |     miscellany of string manipulation functions that cannot be found in
14 |     'stringi' or 'stringr'.
15 | License: GPL-3
16 | URL: https://rorynolan.github.io/strex/,
17 |     https://github.com/rorynolan/strex
18 | BugReports: https://github.com/rorynolan/strex/issues
19 | Depends: 
20 |     R (>= 3.5),
21 |     stringr (>= 1.5)
22 | Imports: 
23 |     checkmate (>= 1.9.3),
24 |     lifecycle,
25 |     magrittr (>= 1.5),
26 |     rlang (>= 1.0),
27 |     stats,
28 |     stringi (>= 1.7.8),
29 |     utils
30 | Suggests: 
31 |     bench,
32 |     covr,
33 |     knitr,
34 |     purrr,
35 |     rmarkdown,
36 |     spelling,
37 |     testthat (>= 3.0)
38 | VignetteBuilder: 
39 |     knitr
40 | Biarch: TRUE
41 | Config/testthat/edition: 3
42 | Encoding: UTF-8
43 | Language: en-US
44 | Roxygen: list(markdown = TRUE)
45 | RoxygenNote: 7.3.2
46 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | export(match_arg)
 4 | export(str_after_first)
 5 | export(str_after_last)
 6 | export(str_after_nth)
 7 | export(str_alphord_nums)
 8 | export(str_before_first)
 9 | export(str_before_last)
10 | export(str_before_last_dot)
11 | export(str_before_nth)
12 | export(str_can_be_numeric)
13 | export(str_detect_all)
14 | export(str_detect_any)
15 | export(str_elem)
16 | export(str_elems)
17 | export(str_extract_currencies)
18 | export(str_extract_non_numerics)
19 | export(str_extract_numbers)
20 | export(str_first_currency)
21 | export(str_first_non_numeric)
22 | export(str_first_number)
23 | export(str_first_number_after_first)
24 | export(str_first_number_after_last)
25 | export(str_first_number_after_mth)
26 | export(str_first_number_before_first)
27 | export(str_first_number_before_last)
28 | export(str_first_number_before_mth)
29 | export(str_give_ext)
30 | export(str_last_currency)
31 | export(str_last_non_numeric)
32 | export(str_last_number)
33 | export(str_last_number_after_first)
34 | export(str_last_number_after_last)
35 | export(str_last_number_after_mth)
36 | export(str_last_number_before_first)
37 | export(str_last_number_before_last)
38 | export(str_last_number_before_mth)
39 | export(str_locate_braces)
40 | export(str_locate_first)
41 | export(str_locate_last)
42 | export(str_locate_nth)
43 | export(str_match_arg)
44 | export(str_nth_currency)
45 | export(str_nth_non_numeric)
46 | export(str_nth_number)
47 | export(str_nth_number_after_first)
48 | export(str_nth_number_after_last)
49 | export(str_nth_number_after_mth)
50 | export(str_nth_number_before_first)
51 | export(str_nth_number_before_last)
52 | export(str_nth_number_before_mth)
53 | export(str_paste_elems)
54 | export(str_remove_quoted)
55 | export(str_singleize)
56 | export(str_split_by_numbers)
57 | export(str_split_camel_case)
58 | export(str_to_vec)
59 | export(str_trim_anything)
60 | import(stringr)
61 | importFrom(magrittr,'%>%')
62 | importFrom(stats,as.dendrogram)
63 | importFrom(stringi,stri_write_lines)
64 | importFrom(utils,head)
65 | useDynLib(strex, .registration = TRUE)
66 | 


--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
  1 | # `strex` 2.0.1
  2 | 
  3 | ## BUG FIXES
  4 | * Was using `str_glue()` where `str_glue_data()` should have been used.
  5 | 
  6 | 
  7 | # `strex` 2.0.0
  8 | 
  9 | ## BREAKING CHANGES
 10 | * The `commas` argument has been replaced by the more flexible `big_mark`.
 11 | 
 12 | 
 13 | # `strex` 1.6.1
 14 | 
 15 | ## BUG FIXES
 16 | * Minor documentation fix for `str_detect_any()` and `str_detect_all()`.
 17 | 
 18 | 
 19 | # `strex` 1.6.0
 20 | 
 21 | ## MINOR IMPROVEMENTS
 22 | * Remove usage of `magrittr`'s `%<>%` operator.
 23 | * Now using the new `rlang::abort()` error message formatting.
 24 | 
 25 | 
 26 | # `strex` 1.5.1
 27 | 
 28 | ## BUG FIXES
 29 | * `purrr` should not have been in `Imports`. It is now in `Suggests`.
 30 | * Some lines in `stringi-imports.h` needed to be uncommented.
 31 | 
 32 | 
 33 | # `strex` 1.5.0
 34 | 
 35 | ## NEW FEATURES
 36 | * `str_detect_all()` and `str_detect_any()`.
 37 | 
 38 | 
 39 | # `strex` 1.4.4
 40 | 
 41 | ## BUG FIXES
 42 | * Fix for dev version of `stringr`.
 43 | 
 44 | 
 45 | # `strex` 1.4.3
 46 | 
 47 | ## BUG FIXES
 48 | * Insist on latest, bug-fixed `stringi` v1.7.8.
 49 | 
 50 | 
 51 | # `strex` 1.4.2
 52 | 
 53 | ## BUG FIXES
 54 | * Remove `LazyData` from `DESCRIPTION` (was causing CRAN note).
 55 | 
 56 | 
 57 | # `strex` 1.4.1
 58 | 
 59 | ## BUG FIXES
 60 | * R version 3.5 or greater is needed because `INTEGER_GET_REGION` is used, which was introduced with R's `ALTREP` stuff.
 61 | 
 62 | 
 63 | # `strex` 1.4.0
 64 | 
 65 | ## MINOR IMPROVEMENTS
 66 | * `str_trim_anything()` got a speedup.
 67 | * `str_match_arg()` now has better error messaging.
 68 | 
 69 | ## BUG FIXES
 70 | * `utils` was wrongly imported.
 71 | 
 72 | 
 73 | # `strex` 1.3.1
 74 | 
 75 | ## BUG FIXES
 76 | * Fix a garbage collection issue.
 77 | 
 78 | 
 79 | # `strex` 1.3.0
 80 | 
 81 | ## MINOR IMPROVEMENTS
 82 | * Move from `Rcpp` to `C`.
 83 | 
 84 | 
 85 | # `strex` 1.2.0
 86 | 
 87 | ## MINOR IMPROVEMENTS
 88 | * Include more examples to cover all common functionality.
 89 | 
 90 | 
 91 | # `strex` 1.1.1
 92 | 
 93 | ## BUG FIXES
 94 | * Insist on necessary versions of `stringr` and `processx`.
 95 | * Fix a C++ sanitizer warning.
 96 | 
 97 | 
 98 | # `strex` 1.1.0
 99 | 
100 | ## MINOR IMPROVEMENTS
101 | * The `pattern` argument of `str_trim_anything()` now supports regular expression.
102 | 
103 | ## BUG FIXES
104 | * The new `configure` step opens up the package to people with GCC < 4.9; previously installation failed for them.
105 | 
106 | 
107 | # `strex` 1.0.3
108 | 
109 | ## BUG FIXES  
110 | * Make internal function `all_equal()` more consistent.
111 | 
112 | 
113 | # `strex` 1.0.2
114 | 
115 | ## BUG FIXES  
116 | * C++ sanitizer issues.
117 | 
118 | 
119 | # `strex` 1.0.1
120 | 
121 | ## BUG FIXES  
122 | * There was a pervasive bug related to handling of zero-length inputs.
123 | 
124 | 
125 | # `strex` 1.0.0
126 | 
127 | ## BREAKING CHANGES
128 | * `str_split_by_nums()` has been renamed to `str_split_by_numbers()` for consistency with `str_extract_numbers()`.
129 | * `str_get_currencies()` has been renamed to `str_extract_currencies()` and been greatly improved.
130 | * `str_get_currency()` has been replaced by `str_nth_currency()` with siblings `str_first_currency()` and `str_last_currency()`.
131 | * `str_match_arg()` has been updated to behave more like `base::match_arg()`.
132 | 
133 | ## NEW FEATURES
134 | * `str_elems()` has been added. It is a vectorized version of `str_elem()`.
135 | * The number (and non-numeric) extraction functions can now deal with scientific notation and with comma-separated numbers. This makes `str_first_number()` more versatile than `as.numeric()` since `as.numeric("1,000")` returns `NA` but `str_first_number("1,000")` returns the number 1000.
136 | 
137 | 
138 | # `strex` 0.1.3
139 | 
140 | ## BUG FIXES
141 | * Require necessary version of `glue`.
142 | 
143 | 
144 | # `strex` 0.1.2
145 | 
146 | ## BUG FIXES
147 | * Require necessary version of `ore`.
148 | 
149 | 
150 | # `strex` 0.1.1
151 | 
152 | ## BUG FIXES
153 | * Allow for unexpected error message on mac.
154 | 
155 | 
156 | # `strex` 0.1.0
157 | 
158 | * The first incarnation.
159 | 


--------------------------------------------------------------------------------
/R/after.R:
--------------------------------------------------------------------------------
 1 | #' Extract text before or after `n`th occurrence of pattern.
 2 | #'
 3 | #' Extract the part of a string which is before or after the `n`th occurrence of
 4 | #' a specified pattern, vectorized over the string.
 5 | #'
 6 | #' \itemize{ \item `str_after_first(...)` is just `str_after_nth(..., n = 1)`.
 7 | #' \item `str_after_last(...)` is just `str_after_nth(..., n = -1)`. \item
 8 | #' `str_before_first(...)` is just `str_before_nth(..., n = 1)`. \item
 9 | #' `str_before_last(...)` is just `str_before_nth(..., n = -1)`. }
10 | #'
11 | #' @param string A character vector.
12 | #' @param pattern The pattern to look for.
13 | #'
14 | #'   The default interpretation is a regular expression, as described in
15 | #'   [stringi::about_search_regex].
16 | #'
17 | #'   To match a without regular expression (i.e. as a human would), use
18 | #'   [coll()][stringr::coll]. For details see [stringr::regex()].
19 | #'
20 | #' @param n A vector of integerish values. Must be either length 1 or
21 | #'   have length equal to the length of `string`. Negative indices count from
22 | #'   the back: while `n = 1` and `n = 2` correspond to first and second, `n =
23 | #'   -1` and `n = -2` correspond to last and second-last. `n = 0` will return
24 | #'   `NA`.
25 | #'
26 | #' @return A character vector.
27 | #' @examples
28 | #' string <- "abxxcdxxdexxfgxxh"
29 | #' str_after_nth(string, "xx", 3)
30 | #' str_before_nth(string, "e", 1:2)
31 | #' str_before_nth(string, "xx", -3)
32 | #' str_before_nth(string, ".", -3)
33 | #' str_before_nth(rep(string, 2), "..x", -3)
34 | #' str_before_first(string, "d")
35 | #' str_before_last(string, "x")
36 | #' string <- c("abc", "xyz.zyx")
37 | #' str_after_first(string, ".") # using regex
38 | #' str_after_first(string, coll(".")) # using human matching
39 | #' str_after_last(c("xy", "xz"), "x")
40 | #' @name before-and-after
41 | #' @family bisectors
42 | NULL
43 | 
44 | #' @rdname before-and-after
45 | #' @export
46 | str_after_nth <- function(string, pattern, n) {
47 |   if (is_l0_char(string)) {
48 |     return(character())
49 |   }
50 |   verify_string_pattern_n(string, pattern, n)
51 |   nth_instance_indices <- str_locate_nth(string, pattern, n)
52 |   str_sub(string, nth_instance_indices[, "end"] + 1)
53 | }
54 | 
55 | #' @rdname before-and-after
56 | #' @export
57 | str_after_first <- function(string, pattern) {
58 |   str_after_nth(string, pattern, n = 1)
59 | }
60 | 
61 | #' @rdname before-and-after
62 | #' @export
63 | str_after_last <- function(string, pattern) {
64 |   str_after_nth(string, pattern, n = -1)
65 | }
66 | 


--------------------------------------------------------------------------------
/R/alphord.R:
--------------------------------------------------------------------------------
  1 | #' Make string numbers comply with alphabetical order.
  2 | #'
  3 | #' If strings are numbered, their numbers may not *comply* with alphabetical
  4 | #' order, e.g. "abc2" comes after `"abc10"` in alphabetical order. We might (for
  5 | #' whatever reason) wish to change them such that they come in the order *that
  6 | #' we would like*. This function alters the strings such that they comply with
  7 | #' alphabetical order, so here `"abc2"` would be renamed to "abc02". It works on
  8 | #' file names with more than one number in them e.g. `"abc01def3"` (a string
  9 | #' with 2 numbers). All the strings in the character vector `string` must have
 10 | #' the same number of numbers, and the non-number bits must be the same.
 11 | #'
 12 | #' @inheritParams str_after_nth
 13 | #'
 14 | #' @return A character vector.
 15 | #'
 16 | #' @examples
 17 | #' string <- paste0("abc", 1:12)
 18 | #' print(string)
 19 | #' str_alphord_nums(string)
 20 | #' str_alphord_nums(c("abc9def55", "abc10def7"))
 21 | #' str_alphord_nums(c("01abc9def55", "5abc10def777", "99abc4def4"))
 22 | #' str_alphord_nums(1:10)
 23 | #' \dontrun{
 24 | #' str_alphord_nums(c("abc9def55", "abc10xyz7")) # error
 25 | #' }
 26 | #'
 27 | #' @family alphorderers
 28 | #' @export
 29 | str_alphord_nums <- function(string) {
 30 |   if (is_l0_char(string)) {
 31 |     return(character())
 32 |   }
 33 |   checkmate::assert(
 34 |     checkmate::check_numeric(string, min.len = 1),
 35 |     checkmate::check_character(string, min.len = 1)
 36 |   )
 37 |   if (is.numeric(string)) string <- as.character(string)
 38 |   have_nums <- str_detect(string, "\\d")
 39 |   if (!all(have_nums)) {
 40 |     bad_index <- match(F, have_nums)
 41 |     rlang::abort(
 42 |       c("Some of the input strings have no numbers in them.",
 43 |         x = str_glue(
 44 |           "The first bad string is string number {bad_index}, ",
 45 |           "which is '{string[bad_index]}'."
 46 |         )
 47 |       )
 48 |     )
 49 |   }
 50 |   non_nums <- str_extract_non_numerics(string)
 51 |   if (length(unique(non_nums)) > 1) {
 52 |     bad_index <- 2
 53 |     while (isTRUE(all.equal(non_nums[[1]], non_nums[[bad_index]]))) {
 54 |       bad_index <- bad_index + 1
 55 |     }
 56 |     rlang::abort(
 57 |       c("The non-number bits of every string must be the same.",
 58 |         i = str_glue(
 59 |           "The first pair of strings with different non-",
 60 |           "number bits are strings 1 and {bad_index}."
 61 |         ),
 62 |         x = str_glue("They are '{string[1]}' and '{string[bad_index]}'.")
 63 |       )
 64 |     )
 65 |   }
 66 |   nums <- str_extract_numbers(string, leave_as_string = TRUE)
 67 |   nums_lengths <- lengths(nums)
 68 |   if (length(unique(nums_lengths)) > 1) {
 69 |     bad_index <- match(F, nums_lengths == nums_lengths[1])
 70 |     rlang::abort(
 71 |       c("The strings must all have the same number of numbers.",
 72 |         x = str_glue(
 73 |           "Your string number 1 \"{string[1]}\" has ",
 74 |           "{nums_lengths[1]} numbers, whereas your string number ",
 75 |           "{bad_index} '{string[bad_index]}' has ",
 76 |           "{nums_lengths[bad_index]} numbers."
 77 |         )
 78 |       )
 79 |     )
 80 |   }
 81 |   nums <- simplify2array(nums)
 82 |   if (!is.matrix(nums)) nums <- t(nums)
 83 |   ncn <- nums %>%
 84 |     {
 85 |       array(str_length(.), dim = dim(.))
 86 |     }
 87 |   max_lengths <- int_mat_row_maxs(ncn)
 88 |   min_length <- min(ncn)
 89 |   to_prefix <- rep("0", max(max_lengths) - min_length) %>% str_c(collapse = "")
 90 |   nums <- str_c(to_prefix, nums)
 91 |   starts <- -rep(max_lengths, ncol(ncn))
 92 |   nums <- str_sub(nums, starts, -1) %>%
 93 |     split(rep(seq_len(ncol(ncn)), each = nrow(ncn)))
 94 |   num_first <- str_elem(string, 1) %>% str_can_be_numeric()
 95 |   if (length(unique(num_first)) > 1) {
 96 |     bad_index <- match(!num_first[1], num_first)
 97 |     rlang::abort(
 98 |       c(
 99 |         paste(
100 |           "It should either be the case that all strings start with",
101 |           "numbers or that none of them do."
102 |         ),
103 |         x = str_glue(
104 |           " String number 1 '{string[1]}' ",
105 |           "{ifelse(num_first[1], 'does', 'does not')} ",
106 |           "start with a number whereas ",
107 |           "string number {bad_index} '{string[bad_index]}' ",
108 |           "{ifelse(num_first[1], 'does not', 'does')} ",
109 |           "start with a number."
110 |         )
111 |       )
112 |     )
113 |   }
114 |   if (num_first[1]) {
115 |     interleaves <- interleave_chr_lsts(nums, non_nums)
116 |   } else {
117 |     interleaves <- interleave_chr_lsts(non_nums, nums)
118 |   }
119 |   stringi::stri_paste_list(interleaves)
120 | }
121 | 


--------------------------------------------------------------------------------
/R/before.R:
--------------------------------------------------------------------------------
 1 | #' @rdname before-and-after
 2 | #' @export
 3 | str_before_nth <- function(string, pattern, n) {
 4 |   if (is_l0_char(string)) {
 5 |     return(character())
 6 |   }
 7 |   verify_string_pattern_n(string, pattern, n)
 8 |   nth_instance_indices <- str_locate_nth(string, pattern, n)
 9 |   str_sub(string, 1, nth_instance_indices[, "start"] - 1)
10 | }
11 | 
12 | #' @rdname before-and-after
13 | #' @export
14 | str_before_first <- function(string, pattern) {
15 |   str_before_nth(string = string, pattern = pattern, n = 1)
16 | }
17 | 
18 | #' @rdname before-and-after
19 | #' @export
20 | str_before_last <- function(string, pattern) {
21 |   str_before_nth(string = string, pattern = pattern, n = -1)
22 | }
23 | 
24 | #' Extract the part of a string before the last period.
25 | #'
26 | #' This is usually used to get the part of a file name that doesn't include the
27 | #' file extension. It is vectorized over `string`. If there is no period in
28 | #' `string`, the input is returned.
29 | #'
30 | #' @inheritParams before-and-after
31 | #'
32 | #' @return A character vector.
33 | #'
34 | #' @examples
35 | #' str_before_last_dot(c("spreadsheet1.csv", "doc2.doc", ".R"))
36 | #' @family bisectors
37 | #' @export
38 | str_before_last_dot <- function(string) {
39 |   if (is_l0_char(string)) {
40 |     return(character())
41 |   }
42 |   checkmate::assert_character(string)
43 |   out <- tools::file_path_sans_ext(string)
44 |   out[(string == out) & (str_elem(out, 1) == ".")] <- ""
45 |   out
46 | }
47 | 


--------------------------------------------------------------------------------
/R/camel-case.R:
--------------------------------------------------------------------------------
 1 | #' Split a string based on CamelCase.
 2 | #'
 3 | #' Vectorized over `string`.
 4 | #'
 5 | #' @inheritParams str_after_nth
 6 | #' @param lower Do you want the output to be all lower case (or as is)?
 7 | #'
 8 | #' @return A list of character vectors, one list element for each element of
 9 | #'   `string`.
10 | #'
11 | #' @references Adapted from Ramnath Vaidyanathan's answer at
12 | #' http://stackoverflow.com/questions/8406974/splitting-camelcase-in-r.
13 | #'
14 | #' @examples
15 | #' str_split_camel_case(c("RoryNolan", "NaomiFlagg", "DepartmentOfSillyHats"))
16 | #' str_split_camel_case(c("RoryNolan", "NaomiFlagg", "DepartmentOfSillyHats",
17 | #'   lower = TRUE
18 | #' ))
19 | #' @family splitters
20 | #' @export
21 | str_split_camel_case <- function(string, lower = FALSE) {
22 |   if (is_l0_char(string)) {
23 |     return(list())
24 |   }
25 |   checkmate::assert_character(string)
26 |   checkmate::assert_flag(lower)
27 |   string <- gsub("^[^[:alnum:]]+|[^[:alnum:]]+$", "", string) %>%
28 |     gsub("(?!^)(?=[[:upper:]])", " ", ., perl = TRUE)
29 |   if (lower) string <- str_to_lower(string)
30 |   str_split(string, " ")
31 | }
32 | 


--------------------------------------------------------------------------------
/R/can-be-num.R:
--------------------------------------------------------------------------------
 1 | #' Check if a string could be considered as numeric.
 2 | #'
 3 | #' After padding is removed, could the input string be considered to be numeric,
 4 | #' i.e. could it be coerced to numeric. This function is vectorized over its one
 5 | #' argument.
 6 | #'
 7 | #' @inheritParams str_after_nth
 8 | #'
 9 | #' @return A logical vector.
10 | #'
11 | #' @examples
12 | #' str_can_be_numeric("3")
13 | #' str_can_be_numeric("5 ")
14 | #' str_can_be_numeric(c("1a", "abc"))
15 | #' @family type converters
16 | #' @export
17 | str_can_be_numeric <- function(string) {
18 |   checkmate::assert(
19 |     checkmate::check_character(string),
20 |     checkmate::check_numeric(string)
21 |   )
22 |   !is.na(suppressWarnings(as.numeric(string)))
23 | }
24 | 


--------------------------------------------------------------------------------
/R/currency.R:
--------------------------------------------------------------------------------
  1 | #' Number pattern for currency.
  2 | #'
  3 | #' @return A string.
  4 | #'
  5 | #' @noRd
  6 | curr_pattern <- function() {
  7 |   num_regex(decimals = TRUE, sci = TRUE, negs = TRUE, big_mark = ",")
  8 | }
  9 | 
 10 | #' Helper for currency extration.
 11 | #'
 12 | #' Given string numbers, strings and amount locations, output the required
 13 | #' data frame.
 14 | #'
 15 | #' @param string_num The string number.
 16 | #' @param string A character vector.
 17 | #' @param locs An integer matrix. The amount locations.
 18 | #'
 19 | #' @return A data frame.
 20 | #'
 21 | #' @noRd
 22 | extract_curr_helper <- function(string_num, string, locs) {
 23 |   amount <- string %>%
 24 |     str_sub(locs[, 1], locs[, 2]) %>%
 25 |     str_replace_all(stringr::coll(","), "") %>%
 26 |     as.numeric()
 27 |   curr_sym_pos <- locs[, 1] - 1
 28 |   curr_sym <- str_elem(string, curr_sym_pos)
 29 |   sign_sym_pos <- ifelse(curr_sym_pos == 0, 0, curr_sym_pos - 1)
 30 |   curr_sym_sign <- ifelse(str_elem(string, sign_sym_pos) == "-", -1, 1)
 31 |   amount <- amount * curr_sym_sign
 32 |   data.frame(
 33 |     string_num = string_num, string = string,
 34 |     curr_sym = curr_sym, amount = amount,
 35 |     stringsAsFactors = FALSE
 36 |   )
 37 | }
 38 | 
 39 | #' Extract currency amounts from a string.
 40 | #'
 41 | #' The currency of a number is defined as the character coming before the number
 42 | #' in the string. If nothing comes before (i.e. if the number is the first thing
 43 | #' in the string), the currency is the empty string, similarly the currency can
 44 | #' be a space, comma or any manner of thing.
 45 | #'
 46 | #' These functions are vectorized over `string` and `n`.
 47 | #'
 48 | #' [str_extract_currencies()] extracts all currency amounts.
 49 | #'
 50 | #' `str_nth_currency()` just gets the `n`th currency amount from each string.
 51 | #' `str_first_currency(string)` and `str_last_currency(string)` are just
 52 | #' wrappers for `str_nth_currency(string, n = 1)` and `str_nth_currency(string,
 53 | #' n = -1)`.
 54 | #'
 55 | #' "-$2.00" and "$-2.00" are interpreted as negative two dollars.
 56 | #'
 57 | #' If you request e.g. the 5th currency amount but there are only 3 currency
 58 | #' amounts, you get an amount and currency symbol of `NA`.
 59 | #'
 60 | #' @inheritParams str_after_nth
 61 | #'
 62 | #' @return A data frame with 4 columns: `string_num`, `string`, `curr_sym` and
 63 | #'   `amount`. Every extracted currency amount gets its own row in the data
 64 | #'   frame detailing the string number and string that it was extracted from,
 65 | #'   the currency symbol and the amount.
 66 | #'
 67 | #' @examples
 68 | #' string <- c("ab3 13", "$1", "35.00 $1.14", "abc5 $3.8", "stuff")
 69 | #' str_extract_currencies(string)
 70 | #' str_nth_currency(string, n = 2)
 71 | #' str_nth_currency(string, n = -2)
 72 | #' str_nth_currency(string, c(1, -2, 1, 2, -1))
 73 | #' str_first_currency(string)
 74 | #' str_last_currency(string)
 75 | #' @name currency
 76 | #' @family currency extractors
 77 | NULL
 78 | 
 79 | #' @rdname currency
 80 | #' @export
 81 | str_extract_currencies <- function(string) {
 82 |   if (is_l0_char(string)) {
 83 |     return(extract_curr_helper(
 84 |       integer(), character(),
 85 |       matrix(ncol = 2, nrow = 0)
 86 |     ))
 87 |   }
 88 |   checkmate::assert_character(string)
 89 |   locs <- str_locate_all(string, curr_pattern())
 90 |   locs_lens <- lengths(locs)
 91 |   string_num <- rep(seq_along(string), locs_lens / 2)
 92 |   string <- string[string_num]
 93 |   locs <- do.call(rbind, locs)
 94 |   extract_curr_helper(string_num, string, locs)
 95 | }
 96 | 
 97 | 
 98 | #' @rdname currency
 99 | #' @export
100 | str_nth_currency <- function(string, n) {
101 |   if (is_l0_char(string)) {
102 |     checkmate::assert_integerish(n)
103 |     return(extract_curr_helper(
104 |       integer(), character(),
105 |       matrix(ncol = 2, nrow = 0)
106 |     ))
107 |   }
108 |   verify_string_n(string, n)
109 |   abs_n <- abs(n)
110 |   if (length(n) == 1 && abs_n == 1) {
111 |     if (n == 1) {
112 |       locs <- stringi::stri_locate_first_regex(string, curr_pattern())
113 |     } else {
114 |       locs <- stringi::stri_locate_last_regex(string, curr_pattern())
115 |     }
116 |   } else {
117 |     locs <- matrix(NA_integer_, ncol = 2, nrow = length(string))
118 |     interim_locs <- str_locate_all(string, curr_pattern())
119 |     interim_locs_n_matches <- lengths(interim_locs) / 2
120 |     good <- interim_locs_n_matches >= abs_n
121 |     n_negs <- n < 0
122 |     if (any(n_negs)) {
123 |       if (length(n) == 1) {
124 |         n <- interim_locs_n_matches + n + 1
125 |       } else {
126 |         n[n_negs] <- interim_locs_n_matches[n_negs] + n[n_negs] + 1
127 |       }
128 |     }
129 |     if (any(good)) {
130 |       if (length(n) > 1) n <- n[good]
131 |       locs[good, ] <- interim_locs[good] %>%
132 |         int_mat_lst_rbind_nth_rows(n)
133 |     }
134 |   }
135 |   extract_curr_helper(seq_along(string), string, locs)
136 | }
137 | 
138 | #' @rdname currency
139 | #' @export
140 | str_first_currency <- function(string) {
141 |   str_nth_currency(string, n = 1)
142 | }
143 | 
144 | #' @rdname currency
145 | #' @export
146 | str_last_currency <- function(string) {
147 |   str_nth_currency(string, n = -1)
148 | }
149 | 


--------------------------------------------------------------------------------
/R/detect.R:
--------------------------------------------------------------------------------
 1 | #' Detect any or all patterns.
 2 | #'
 3 | #' Vectorized over `string`.
 4 | #'
 5 | #' @param string A character vector.
 6 | #' @param pattern A character vector. The patterns to look for. Default is
 7 | #'   `stringi`-style regular expression. [stringr::coll()] and
 8 | #'   [stringr::fixed()] are also permissible.
 9 | #' @param negate A flag. If `TRUE`, inverts the result.
10 | #'
11 | #' @return A character vector.
12 | #'
13 | #' @examples
14 | #' str_detect_all("quick brown fox", c("x", "y", "z"))
15 | #' str_detect_all(c(".", "-"), ".")
16 | #' str_detect_all(c(".", "-"), coll("."))
17 | #' str_detect_all(c(".", "-"), coll("."), negate = TRUE)
18 | #' str_detect_all(c(".", "-"), c(".", ":"))
19 | #' str_detect_all(c(".", "-"), coll(c(".", ":")))
20 | #' str_detect_all("xyzabc", c("a", "c", "z"))
21 | #' str_detect_all(c("xyzabc", "abcxyz"), c(".b", "^x"))
22 | #'
23 | #' @export
24 | str_detect_all <- function(string, pattern, negate = FALSE) {
25 |   checkmate::assert_character(string)
26 |   if (inherits(pattern, "stringr_boundary")) {
27 |     rlang::abort("Function cannot handle a `pattern` of type 'boundary'.")
28 |   }
29 |   checkmate::assert_character(pattern, min.chars = 1)
30 |   checkmate::assert_flag(negate)
31 |   if (inherits(pattern, "stringr_coll") || inherits(pattern, "stringr_fixed")) {
32 |     if (inherits(pattern, "stringr_coll")) {
33 |       out <- str_detect_many_coll(string, pattern)
34 |     } else {
35 |       out <- str_detect_many_fixed(string, pattern)
36 |     }
37 |     out <- Reduce(`&`, out)
38 |   } else {
39 |     pattern <- pattern %>%
40 |       str_c("(?=.*", ., ")") %>%
41 |       str_flatten() %>%
42 |       str_c("^", .)
43 |     out <- stringr::str_detect(string, pattern)
44 |   }
45 |   if (negate) out <- !out
46 |   out
47 | }
48 | 
49 | #' @rdname str_detect_all
50 | #'
51 | #' @examples
52 | #' str_detect_any("quick brown fox", c("x", "y", "z"))
53 | #' str_detect_any(c(".", "-"), ".")
54 | #' str_detect_any(c(".", "-"), coll("."))
55 | #' str_detect_any(c(".", "-"), coll("."), negate = TRUE)
56 | #' str_detect_any(c(".", "-"), c(".", ":"))
57 | #' str_detect_any(c(".", "-"), coll(c(".", ":")))
58 | #' str_detect_any(c("xyzabc", "abcxyz"), c(".b", "^x"))
59 | #'
60 | #' @export
61 | str_detect_any <- function(string, pattern, negate = FALSE) {
62 |   checkmate::assert_character(string)
63 |   if (inherits(pattern, "stringr_boundary")) {
64 |     rlang::abort("Function cannot handle a `pattern` of type 'boundary'.")
65 |   }
66 |   checkmate::assert_character(pattern, min.chars = 1)
67 |   checkmate::assert_flag(negate)
68 |   if (inherits(pattern, "stringr_coll") || inherits(pattern, "stringr_fixed")) {
69 |     if (inherits(pattern, "stringr_coll")) {
70 |       out <- str_detect_many_coll(string, pattern)
71 |     } else {
72 |       out <- str_detect_many_fixed(string, pattern)
73 |     }
74 |     out <- Reduce(`|`, out)
75 |   } else {
76 |     out <- str_detect(string, str_flatten(pattern, "|"))
77 |   }
78 |   if (negate) out <- !out
79 |   out
80 | }
81 | 


--------------------------------------------------------------------------------
/R/elem.R:
--------------------------------------------------------------------------------
  1 | #' Extract a single character from a string, using its index.
  2 | #'
  3 | #' If the element does not exist, this function returns the empty string. This
  4 | #' is consistent with [stringr::str_sub()]. This function is vectorised over
  5 | #' both arguments.
  6 | #'
  7 | #' @inheritParams str_after_nth
  8 | #' @param index An integer. Negative indexing is allowed as in
  9 | #'   [stringr::str_sub()].
 10 | #'
 11 | #' @return A one-character string.
 12 | #'
 13 | #' @examples
 14 | #' str_elem(c("abcd", "xyz"), 3)
 15 | #' str_elem("abcd", -2)
 16 | #' @family single element extractors
 17 | #' @export
 18 | str_elem <- function(string, index) {
 19 |   if (is_l0_char(string)) {
 20 |     return(character())
 21 |   }
 22 |   verify_string_n(string, index, "index")
 23 |   str_sub(string, index, index)
 24 | }
 25 | 
 26 | #' Helper for [str_elems()] and [str_paste_elems()].
 27 | #'
 28 | #' @return A list of elements of strings. Either with one list element per input
 29 | #'   string (orientation: bycol) or one string index (for multiple strings) per
 30 | #'   list element (orientation: byrow).
 31 | #'
 32 | #' @noRd
 33 | str_elems_helper <- function(string, indices, insist_bycol = FALSE) {
 34 |   indices <- as.integer(indices)
 35 |   # The following lapplys can only be easily and efficiently replaced if Rcpp
 36 |   # starts dealing with UTF-8 strings well.
 37 |   if (!insist_bycol && length(indices) > length(string)) {
 38 |     out <- lapply(indices, function(x) str_elem(string, x))
 39 |     attr(out, "strex__str_elems_helper__orientation") <- "byrow"
 40 |   } else {
 41 |     out <- lapply(string, function(x) str_elem(x, indices))
 42 |     attr(out, "strex__str_elems_helper__orientation") <- "bycol"
 43 |   }
 44 |   out
 45 | }
 46 | 
 47 | #' Extract several single elements from a string.
 48 | #'
 49 | #' Efficiently extract several elements from a string. See [str_elem()] for
 50 | #' extracting single elements. This function is vectorized over the first
 51 | #' argument.
 52 | #'
 53 | #' @inheritParams str_after_nth
 54 | #' @param indices A vector of integerish values. Negative indexing is allowed as
 55 | #'   in [stringr::str_sub()].
 56 | #' @param byrow Should the elements be organised in the matrix with one row per
 57 | #'   string (`byrow = TRUE`, the default) or one column per string (`byrow =
 58 | #'   FALSE`). See examples if you don't understand.
 59 | #'
 60 | #' @return A character matrix.
 61 | #'
 62 | #' @examples
 63 | #' string <- c("abc", "def", "ghi", "vwxyz")
 64 | #' str_elems(string, 1:2)
 65 | #' str_elems(string, 1:2, byrow = FALSE)
 66 | #' str_elems(string, c(1, 2, 3, 4, -1))
 67 | #' @family single element extractors
 68 | #' @export
 69 | str_elems <- function(string, indices, byrow = TRUE) {
 70 |   checkmate::assert_flag(byrow)
 71 |   if (is_l0_char(string)) {
 72 |     out <- matrix(character(), ncol = length(indices))
 73 |     if (!byrow) out <- t(out)
 74 |     return(out)
 75 |   }
 76 |   checkmate::assert_character(string, min.len = 1)
 77 |   checkmate::assert_integerish(indices, min.len = 1)
 78 |   out <- str_elems_helper(string, indices)
 79 |   if (attr(out, "strex__str_elems_helper__orientation") == "byrow") {
 80 |     byrow <- !byrow
 81 |   }
 82 |   stringi::stri_list2matrix(out, byrow = byrow)
 83 | }
 84 | 
 85 | #' Extract single elements of a string and paste them together.
 86 | #'
 87 | #' This is a quick way around doing a call to [str_elems()] followed by a call
 88 | #' of `apply(..., paste)`.
 89 | #'
 90 | #' Elements that don't exist e.g. element 5 of `"abc"` are ignored.
 91 | #'
 92 | #' @inheritParams str_after_nth
 93 | #' @inheritParams str_elems
 94 | #' @param sep A string. The separator for pasting `string` elements together.
 95 | #'
 96 | #' @return A character vector.
 97 | #'
 98 | #' @examples
 99 | #' string <- c("abc", "def", "ghi", "vwxyz")
100 | #' str_paste_elems(string, 1:2)
101 | #' str_paste_elems(string, c(1, 2, 3, 4, -1))
102 | #' str_paste_elems("abc", c(1, 5, 55, 43, 3))
103 | #' @family single element extractors
104 | #' @export
105 | str_paste_elems <- function(string, indices, sep = "") {
106 |   if (is_l0_char(string)) {
107 |     return(character())
108 |   }
109 |   checkmate::assert_character(string, min.len = 1)
110 |   checkmate::assert_integerish(indices, min.len = 1)
111 |   checkmate::assert_string(sep)
112 |   out <- str_elems_helper(string, indices, insist_bycol = TRUE)
113 |   stringi::stri_paste_list(out, sep = sep)
114 | }
115 | 


--------------------------------------------------------------------------------
/R/give-ext.R:
--------------------------------------------------------------------------------
 1 | #' Ensure a file name has the intended extension.
 2 | #'
 3 | #' Say you want to ensure a name is fit to be the name of a csv file. Then, if
 4 | #' the input doesn't end with ".csv", this function will tack ".csv" onto the
 5 | #' end of it. This is vectorized over the first argument.
 6 | #'
 7 | #' @param string The intended file name.
 8 | #' @param ext The intended file extension (with or without the ".").
 9 | #' @param replace If the file has an extension already, replace it (or append
10 | #'   the new extension name)?
11 | #'
12 | #' @return A string: the file name in your intended form.
13 | #'
14 | #' @examples
15 | #' str_give_ext(c("abc", "abc.csv"), "csv")
16 | #' str_give_ext("abc.csv", "pdf")
17 | #' str_give_ext("abc.csv", "pdf", replace = TRUE)
18 | #' @family appenders
19 | #' @export
20 | str_give_ext <- function(string, ext, replace = FALSE) {
21 |   if (is_l0_char(string)) {
22 |     return(character())
23 |   }
24 |   checkmate::assert_character(string)
25 |   checkmate::assert_string(ext)
26 |   checkmate::assert_flag(replace)
27 |   ext <- str_match(ext, "^\\.*(.*)")[, 2]
28 |   if (replace) {
29 |     string <- str_before_last_dot(string)
30 |   } else {
31 |     correct_ext <- str_detect(string, str_c("\\.", ext, "$"))
32 |     string[correct_ext] <- str_before_last_dot(string[correct_ext])
33 |   }
34 |   str_c(string, ".", ext)
35 | }
36 | 


--------------------------------------------------------------------------------
/R/locate.R:
--------------------------------------------------------------------------------
  1 | #' Locate the braces in a string.
  2 | #'
  3 | #' Give the positions of `(`, `)`, `[`, `]`, `\{`, `\}` within a string.
  4 | #'
  5 | #' @param string A character vector
  6 | #'
  7 | #' @return A data frame with 4 columns: `string_num`, `string`, `position` and
  8 | #'   `brace`. Every extracted brace amount gets its own row in the tibble
  9 | #'   detailing the string number and string that it was extracted from, the
 10 | #'   position in its string and the brace.
 11 | #'
 12 | #' @examples
 13 | #' str_locate_braces(c("a{](kkj)})", "ab(]c{}"))
 14 | #' @family locators
 15 | #' @export
 16 | str_locate_braces <- function(string) {
 17 |   checkmate::assert_character(string)
 18 |   if (is_l0_char(string)) {
 19 |     out <- data.frame(
 20 |       string_num = integer(),
 21 |       string = character(),
 22 |       position = integer(),
 23 |       brace = character(),
 24 |       stringsAsFactors = FALSE
 25 |     )
 26 |     return(out)
 27 |   }
 28 |   pattern <- "[\\(\\)\\[\\]\\{\\}]"
 29 |   locations <- str_locate_all(string, pattern) %>%
 30 |     int_mat_lst_nth_cols(1L)
 31 |   braces <- str_extract_all(string, pattern)
 32 |   string_num <- rep(seq_along(string), lengths(braces))
 33 |   data.frame(
 34 |     string_num = string_num, string = string[string_num],
 35 |     position = unlist(locations), brace = unlist(braces),
 36 |     stringsAsFactors = FALSE
 37 |   )
 38 | }
 39 | 
 40 | #' Locate the indices of the `n`th instance of a pattern.
 41 | #'
 42 | #' The `n`th instance of an pattern will cover a series of character
 43 | #' indices. These functions tell you which indices those are. These functions
 44 | #' are vectorised over all arguments.
 45 | #'
 46 | #' \itemize{ \item `str_locate_first(...)` is just `str_locate_nth(..., n = 1)`.
 47 | #' \item `str_locate_last(...)` is just `str_locate_nth(..., n = -1)`. }
 48 | #'
 49 | #' @inheritParams str_after_nth
 50 | #'
 51 | #' @return A two-column matrix. The \eqn{i}th row of this matrix gives the start
 52 | #'   and end indices of the \eqn{n}th instance of `pattern` in the \eqn{i}th
 53 | #'   element of `string`.
 54 | #'
 55 | #' @examples
 56 | #' str_locate_nth(c("abcdabcxyz", "abcabc"), "abc", 2)
 57 | #' str_locate_nth(
 58 | #'   c("This old thing.", "That beautiful thing there."),
 59 | #'   "\\w+", c(2, -2)
 60 | #' )
 61 | #' str_locate_nth("abc", "b", c(0, 1, 1, 2))
 62 | #' str_locate_first("abcxyzabc", "abc")
 63 | #' str_locate_last("abcxyzabc", "abc")
 64 | #' @family locators
 65 | #' @export
 66 | str_locate_nth <- function(string, pattern, n) {
 67 |   if (is_l0_char(string)) {
 68 |     out <- matrix(character(), ncol = 2) %>%
 69 |       magrittr::set_colnames(c("start", "end"))
 70 |     return(out)
 71 |   }
 72 |   verify_string_pattern_n(string, pattern, n)
 73 |   locs <- str_locate_all(string, pattern)
 74 |   locs_n_matches <- lengths(locs) / 2
 75 |   n_negs <- n < 0
 76 |   if (any(n_negs)) {
 77 |     if (length(n) == 1) {
 78 |       n <- locs_n_matches + n + 1
 79 |     } else {
 80 |       n[n_negs] <- locs_n_matches[n_negs] + n[n_negs] + 1
 81 |     }
 82 |   }
 83 |   out <- matrix(NA_integer_,
 84 |     nrow = max(lengths(list(string, pattern, n))), ncol = 2
 85 |   ) %>%
 86 |     magrittr::set_colnames(c("start", "end"))
 87 |   good <- (abs(n) <= locs_n_matches) & (n != 0)
 88 |   if (any(good)) {
 89 |     if (length(locs) == 1) {
 90 |       out[good, ] <- int_mat_lst_rbind_nth_rows(locs, n[good])
 91 |     } else {
 92 |       if (length(n) > 1) n <- n[good]
 93 |       out[good, ] <- int_mat_lst_rbind_nth_rows(locs[good], n)
 94 |     }
 95 |   }
 96 |   out
 97 | }
 98 | 
 99 | #' @rdname str_locate_nth
100 | #' @export
101 | str_locate_first <- function(string, pattern) {
102 |   str_locate_nth(string, pattern, n = 1)
103 | }
104 | 
105 | #' @rdname str_locate_nth
106 | #' @export
107 | str_locate_last <- function(string, pattern) {
108 |   str_locate_nth(string, pattern, n = -1)
109 | }
110 | 


--------------------------------------------------------------------------------
/R/remove.R:
--------------------------------------------------------------------------------
 1 | #' Remove the quoted parts of a string.
 2 | #'
 3 | #' If any parts of a string are quoted (between quotation marks), remove those
 4 | #' parts of the string, including the quotes. Run the examples and you'll know
 5 | #' exactly how this function works.
 6 | #'
 7 | #' @param string A character vector.
 8 | #'
 9 | #' @return A character vector.
10 | #' @examples
11 | #' string <- "\"abc\"67a\'dk\'f"
12 | #' cat(string)
13 | #' str_remove_quoted(string)
14 | #' @family removers
15 | #' @export
16 | str_remove_quoted <- function(string) {
17 |   if (is_l0_char(string)) {
18 |     return(character())
19 |   }
20 |   checkmate::assert_character(string)
21 |   string <- str_replace_all(string, "(?:\".*?\")", "")
22 |   string <- str_replace_all(string, "(?:\'.*?\')", "")
23 |   string
24 | }
25 | 


--------------------------------------------------------------------------------
/R/singleize.R:
--------------------------------------------------------------------------------
 1 | #' Remove back-to-back duplicates of a pattern in a string.
 2 | #'
 3 | #' If a string contains a given pattern duplicated back-to-back a number of
 4 | #' times, remove that duplication, leaving the pattern appearing once in that
 5 | #' position (works if the pattern is duplicated in different parts of a string,
 6 | #' removing all instances of duplication). This is vectorized over string and
 7 | #' pattern.
 8 | #'
 9 | #' @inheritParams str_after_nth
10 | #'
11 | #' @return A character vector.
12 | #'
13 | #' @examples
14 | #' str_singleize("abc//def", "/")
15 | #' str_singleize("abababcabab", "ab")
16 | #' str_singleize(c("abab", "cdcd"), "cd")
17 | #' str_singleize(c("abab", "cdcd"), c("ab", "cd"))
18 | #' @family removers
19 | #' @export
20 | str_singleize <- function(string, pattern) {
21 |   if (is_l0_char(string)) {
22 |     return(character())
23 |   }
24 |   verify_string_pattern(string, pattern)
25 |   dup_patt <- str_c("(", pattern, ")+")
26 |   str_replace_all(string, dup_patt, pattern)
27 | }
28 | 


--------------------------------------------------------------------------------
/R/split-by-nums.R:
--------------------------------------------------------------------------------
 1 | #' Split a string by its numeric characters assuming no number ambiguity.
 2 | #'
 3 | #' Sometimes the strings have ambiguous numbers in them e.g. 2.5.3. These have
 4 | #' to be dealt with by strex (which it does by returning `NA` in those cases).
 5 | #' This helper to `str_split_by_numbers()` assumes that the input has
 6 | #' no such ambiguities.
 7 | #'
 8 | #' @param string A character vector.
 9 | #' @param num_pattern The regex defining a numer in the current context.
10 | #'
11 | #' @return A list of character vectors.
12 | #'
13 | #' @noRd
14 | str_split_by_numbers_no_ambigs <- function(string, num_pattern) {
15 |   num_locs <- str_locate_all(string, num_pattern)
16 |   fullocated_locs <- lst_fullocate(num_locs,
17 |     start = 1, end = stringr::str_length(string)
18 |   )
19 |   stringi::stri_sub_all(string, fullocated_locs)
20 | }
21 | 
22 | #' Split a string by its numeric characters.
23 | #'
24 | #' Break a string wherever you go from a numeric character to a non-numeric or
25 | #' vice-versa. Keep the whole string, just split it up. Vectorised over
26 | #' `string`.
27 | #'
28 | #' @inheritParams str_extract_numbers
29 | #'
30 | #' @return A list of character vectors.
31 | #'
32 | #' @examples
33 | #' str_split_by_numbers(c("abc123def456.789gh", "a1b2c344"))
34 | #' str_split_by_numbers("abc123def456.789gh", decimals = TRUE)
35 | #' str_split_by_numbers(c("22", "1.2.3"), decimals = TRUE)
36 | #' @family splitters
37 | #' @export
38 | str_split_by_numbers <- function(string, decimals = FALSE,
39 |                                  leading_decimals = FALSE, negs = FALSE,
40 |                                  sci = FALSE, big_mark = "",
41 |                                  commas = FALSE) {
42 |   if (!isFALSE(commas)) {
43 |     lifecycle::deprecate_stop(
44 |       "2.0.0", "strex::str_split_by_numbers(commas)",
45 |       details = "Use the `big_mark` argument instead."
46 |     )
47 |   }
48 |   if (is_l0_char(string)) {
49 |     return(list())
50 |   }
51 |   checkmate::assert_character(string)
52 |   checkmate::assert_flag(decimals)
53 |   checkmate::assert_flag(leading_decimals)
54 |   checkmate::assert_flag(negs)
55 |   checkmate::assert_flag(sci)
56 |   checkmate::assert_string(big_mark)
57 |   num_pattern <- num_regex(
58 |     decimals = decimals, leading_decimals = leading_decimals,
59 |     negs = negs, sci = sci, big_mark = big_mark
60 |   )
61 |   ambig_pattern <- ambig_num_regex(
62 |     decimals = decimals,
63 |     leading_decimals = leading_decimals,
64 |     sci = sci, big_mark = big_mark
65 |   )
66 |   ambigs <- num_ambigs(string,
67 |     decimals = decimals,
68 |     leading_decimals = leading_decimals, sci = sci, big_mark = big_mark
69 |   )
70 |   out <- vector(mode = "list", length = length(string))
71 |   if (any(ambigs)) {
72 |     ambig_warn(string, ambigs, ambig_pattern)
73 |     out[ambigs] <- NA_character_
74 |     not_ambigs <- !ambigs
75 |     out[not_ambigs] <- str_split_by_numbers_no_ambigs(
76 |       string[not_ambigs],
77 |       num_pattern
78 |     )
79 |   } else {
80 |     out[] <- str_split_by_numbers_no_ambigs(string, num_pattern)
81 |   }
82 |   out
83 | }
84 | 


--------------------------------------------------------------------------------
/R/strex-package.R:
--------------------------------------------------------------------------------
 1 | #' @useDynLib strex, .registration = TRUE
 2 | #' @import stringr
 3 | #' @importFrom stringi stri_write_lines
 4 | #' @importFrom magrittr '%>%'
 5 | #' @importFrom stats as.dendrogram
 6 | #' @importFrom utils head
 7 | NULL
 8 | 
 9 | 
10 | ## quiets concerns of R CMD check re: the .'s that appear in pipelines
11 | if (getRversion() >= "2.15.1") {
12 |   utils::globalVariables(c("."))
13 | }
14 | 
15 | .onUnload <- function(libpath) library.dynam.unload("strex", libpath)
16 | 
17 | 
18 | #' `strex`: extra string manipulation functions
19 | #'
20 | #' There are some things that I wish were easier with the `stringr` or `stringi`
21 | #' packages. The foremost of these is the extraction of numbers from strings.
22 | #' `stringr` makes you figure out the regex for yourself; `strex` takes care of
23 | #' this for you. There are many more useful functionalities in `strex`. In
24 | #' particular, there's a `match_arg()` function which is more flexible than the
25 | #' base `match.arg()`. Contributions to this package are encouraged: it is
26 | #' intended as a miscellany of string manipulation functions which cannot be
27 | #' found in `stringi` or `stringr`.
28 | #'
29 | #' @name strex
30 | #' @aliases strex-package
31 | #' @references Rory Nolan and Sergi Padilla-Parra (2017). filesstrings: An R
32 | #'   package for file and string manipulation. The Journal of Open Source
33 | #'   Software, 2(14).  \doi{10.21105/joss.00260}.
34 | "_PACKAGE"
35 | 


--------------------------------------------------------------------------------
/R/to-vec.R:
--------------------------------------------------------------------------------
 1 | #' Convert a string to a vector of characters
 2 | #'
 3 | #' Go from a string to a vector whose \eqn{i}th element is the \eqn{i}th
 4 | #' character in the string.
 5 | #'
 6 | #' @inheritParams str_after_nth
 7 | #'
 8 | #' @return A character vector.
 9 | #'
10 | #' @examples
11 | #' str_to_vec("abcdef")
12 | #' @family converters
13 | #' @export
14 | str_to_vec <- function(string) {
15 |   if (is_l0_char(string)) {
16 |     return(character())
17 |   }
18 |   checkmate::assert_character(string)
19 |   strsplit(string, NULL)[[1]]
20 | }
21 | 


--------------------------------------------------------------------------------
/R/trim.R:
--------------------------------------------------------------------------------
  1 | #' Trim something other than whitespace
  2 | #'
  3 | #' The `stringi` and `stringr` packages let you trim whitespace, but
  4 | #' what if you want to trim something else from either (or both) side(s) of a
  5 | #' string? This function lets you select which pattern to trim and from which
  6 | #' side(s).
  7 | #'
  8 | #' @inheritParams str_after_nth
  9 | #' @param side Which side do you want to trim from? `"both"` is the
 10 | #'   default, but you can also have just either `"left"` or `"right"`
 11 | #'   (or optionally the shortened `"b"`, `"l"` and `"r"`).
 12 | #'
 13 | #' @return A string.
 14 | #'
 15 | #' @examples
 16 | #' str_trim_anything("..abcd.", ".", "left")
 17 | #' str_trim_anything("..abcd.", coll("."), "left")
 18 | #' str_trim_anything("-ghi--", "-", "both")
 19 | #' str_trim_anything("-ghi--", "-")
 20 | #' str_trim_anything("-ghi--", "-", "right")
 21 | #' str_trim_anything("-ghi--", "--")
 22 | #' str_trim_anything("-ghi--", "i-+")
 23 | #' @family removers
 24 | #'
 25 | #' @export
 26 | str_trim_anything <- function(string, pattern, side = "both") {
 27 |   if (is_l0_char(string)) {
 28 |     return(character())
 29 |   }
 30 |   verify_string_pattern(string, pattern, boundary_allowed = FALSE)
 31 |   out <- string
 32 |   checkmate::assert_string(side)
 33 |   side <- match_arg(side, c("both", "left", "right"), ignore_case = TRUE)
 34 |   type <- "regex"
 35 |   if (inherits(pattern, "stringr_fixed")) {
 36 |     type <- "fixed"
 37 |   } else if (inherits(pattern, "stringr_coll")) {
 38 |     type <- "coll"
 39 |   } else {
 40 |     bad_starts <- str_starts(pattern, "\\(*\\^")
 41 |     bad_ends <- str_ends(pattern, "\\$\\)*")
 42 |     if (any(bad_starts)) {
 43 |       rlang::abort(
 44 |         c(
 45 |           paste(
 46 |             "In `str_trim_anything()`, don't start your regular expression",
 47 |             "patterns with '^' to match the start of the string.",
 48 |             "The trimming by definition is happening at the edges."
 49 |           ),
 50 |           x = str_glue_data(
 51 |             list(
 52 |               pattern = pattern,
 53 |               first_bad = which.max(bad_starts)
 54 |             ),
 55 |             "Element {first_bad} of your pattern, ",
 56 |             "'{pattern[first_bad]}' is the first offender.",
 57 |           )
 58 |         )
 59 |       )
 60 |     } else if (any(bad_ends)) {
 61 |       rlang::abort(
 62 |         c(
 63 |           paste(
 64 |             "In `str_trim_anything()`, don't end your regular expression",
 65 |             "patterns with '$' to match the end of the string.",
 66 |             "The trimming by definition is happening at the edges."
 67 |           ),
 68 |           x = str_glue_data(
 69 |             list(pattern = pattern, first_bad = which.max(bad_ends)),
 70 |             "Element {first_bad} of your pattern, '{pattern[first_bad]}' ",
 71 |             "is the first offender."
 72 |           )
 73 |         )
 74 |       )
 75 |     }
 76 |     pattern <- str_c("(", pattern, ")+")
 77 |     pattern <- switch(side,
 78 |       left = str_c("^", pattern),
 79 |       right = str_c(pattern, "$"),
 80 |       pattern
 81 |     )
 82 |   }
 83 |   if (side == "both") {
 84 |     out <- string %>%
 85 |       str_trim_anything(pattern, "left") %>%
 86 |       str_trim_anything(pattern, "right")
 87 |   } else if (type == "regex") {
 88 |     out <- str_replace(string, pattern, "")
 89 |   } else if (side == "left") {
 90 |     starts <- which(str_starts(out, pattern))
 91 |     while (any(starts)) {
 92 |       out[starts] <- switch(type,
 93 |         fixed = stringi::stri_replace_first_fixed(
 94 |           out[starts],
 95 |           pattern[ifelse(length(pattern) == 1, 1, starts)],
 96 |           ""
 97 |         ),
 98 |         coll = stringi::stri_replace_first_coll(
 99 |           out[starts],
100 |           pattern[ifelse(length(pattern) == 1, 1, starts)],
101 |           ""
102 |         )
103 |       )
104 |       starts <- starts[str_starts(out[starts], pattern)]
105 |     }
106 |   } else if (side == "right") {
107 |     ends <- which(str_ends(out, pattern))
108 |     while (length(ends)) {
109 |       out[ends] <- switch(type,
110 |         fixed = stringi::stri_replace_last_fixed(
111 |           out[ends],
112 |           pattern[ifelse(length(pattern) == 1, 1, ends)],
113 |           ""
114 |         ),
115 |         coll = stringi::stri_replace_last_coll(
116 |           out[ends],
117 |           pattern[ifelse(length(pattern) == 1, 1, ends)],
118 |           ""
119 |         )
120 |       )
121 |       ends <- ends[str_ends(out[ends], pattern)]
122 |     }
123 |   }
124 |   out
125 | }
126 | 


--------------------------------------------------------------------------------
/R/utils.R:
--------------------------------------------------------------------------------
  1 | #' Assert that two objects have compatible lengths.
  2 | #'
  3 | #' Compatible means that either both have length less than or equal to 1, or
  4 | #' both have the same length.
  5 | #'
  6 | #' @param x, y Objects
  7 | #'
  8 | #' @return `TRUE`, invisibly if the lengths are compatible. Otherwise an error
  9 | #'   is thrown.
 10 | #'
 11 | #' @noRd
 12 | assert_compatible_lengths <- function(x, y) {
 13 |   x_sym <- rlang::ensym(x)
 14 |   y_sym <- rlang::ensym(y)
 15 |   checkmate::assert_vector(x)
 16 |   checkmate::assert_vector(y)
 17 |   if (length(x) > 1 && length(y) > 1) {
 18 |     if (length(x) != length(y)) {
 19 |       rlang::abort(
 20 |         c(
 21 |           str_glue(
 22 |             "If both `{x_sym}` and `{y_sym}` have lengths greater ",
 23 |             "than 1, then their lengths must be equal."
 24 |           ),
 25 |           x = str_glue("`{x_sym}` has length {length(x)}."),
 26 |           x = str_glue("`{y_sym}` has length {length(y)}.")
 27 |         )
 28 |       )
 29 |     }
 30 |   }
 31 |   invisible(TRUE)
 32 | }
 33 | 
 34 | #' Assert that the elements of a list have a common length.
 35 | #'
 36 | #' @param lst A list.
 37 | #'
 38 | #' @return `TRUE` (invisibly) if the elements have a common length. Otherwise,
 39 | #'   an error is thrown.
 40 | #'
 41 | #' @noRd
 42 | assert_lst_elems_common_length <- function(lst) {
 43 |   lst_sym <- rlang::ensym(lst)
 44 |   checkmate::assert_list(lst)
 45 |   l <- length(lst)
 46 |   if (l <= 1) {
 47 |     return(invisible(TRUE))
 48 |   }
 49 |   good <- lst_elems_common_length(lst, as.double(l))
 50 |   if (!good) {
 51 |     rlang::abort(
 52 |       str_glue("Elements of `{lst_sym}` do not have a common length.")
 53 |     )
 54 |   }
 55 |   invisible(TRUE)
 56 | }
 57 | 
 58 | #' Generate an error due to an incompatible combination of arguemnt lengths.
 59 | #'
 60 | #' @param string A character vector.
 61 | #' @param sym Another argument to a strex function.
 62 | #' @param replacement_sym A string to replace sym in the error message.
 63 | #'
 64 | #' @noRd
 65 | err_string_len <- function(string, sym, replacement_sym = NULL) {
 66 |   sym_sym <- rlang::enexpr(sym)
 67 |   if (!is.null(replacement_sym)) {
 68 |     sym_str <- replacement_sym
 69 |   } else {
 70 |     sym_str <- as.character(sym_sym)
 71 |   }
 72 |   rlang::abort(
 73 |     c(
 74 |       str_glue(
 75 |         "When `string` has length greater than 1, `{sym_str}` ",
 76 |         "must either be length 1 or have the same length as `string`."
 77 |       ),
 78 |       x = str_glue("Your `string` has length {length(string)}."),
 79 |       x = str_glue("Your `{sym_str}` has length {length(sym)}.")
 80 |     )
 81 |   )
 82 | }
 83 | 
 84 | verify_string_pattern <- function(string, pattern, boundary_allowed = TRUE) {
 85 |   checkmate::assert_character(string, min.len = 1)
 86 |   checkmate::assert_flag(boundary_allowed)
 87 |   if (boundary_allowed) {
 88 |     if (inherits(pattern, "stringr_boundary")) {
 89 |       checkmate::assert_character(pattern, min.len = 0)
 90 |     } else {
 91 |       checkmate::assert_character(pattern, min.len = 1)
 92 |     }
 93 |   } else if (inherits(pattern, "stringr_boundary")) {
 94 |     rlang::abort("Function cannot handle a `pattern` of type 'boundary'.")
 95 |   } else {
 96 |     checkmate::assert_character(pattern, min.len = 1)
 97 |   }
 98 |   if (length(pattern) > 1 && length(string) > 1 &&
 99 |     length(pattern) != length(string)) {
100 |     err_string_len(string, pattern)
101 |   }
102 |   invisible(TRUE)
103 | }
104 | 
105 | verify_string_n <- function(string, n, replacement_n_sym = NULL) {
106 |   checkmate::assert_character(string, min.len = 1)
107 |   checkmate::assert_integerish(n, min.len = 1)
108 |   if (length(n) > 1 && length(string) > 1 &&
109 |     length(n) != length(string)) {
110 |     err_string_len(string, n, replacement_n_sym)
111 |   }
112 |   invisible(TRUE)
113 | }
114 | 
115 | verify_string_pattern_n <- function(string, pattern, n,
116 |                                     replacement_n_sym = NULL) {
117 |   if (!is.null(replacement_n_sym)) {
118 |     n_sym_str <- replacement_n_sym
119 |   } else {
120 |     n_sym_str <- as.character(rlang::ensym(n))
121 |   }
122 |   verify_string_n(string, n, replacement_n_sym)
123 |   verify_string_pattern(string, pattern)
124 |   if (length(pattern) > 1 && length(n) > 1 &&
125 |     length(pattern) != length(n)) {
126 |     rlang::abort(
127 |       c(
128 |         paste(
129 |           "If `pattern` and `n` both have length greater than 1,",
130 |           "their lengths must be equal."
131 |         ),
132 |         x = str_glue("Your `pattern` has length {length(pattern)}."),
133 |         x = str_glue("Your `{n_sym_str}` has length {length(n)}.")
134 |       )
135 |     )
136 |   }
137 |   invisible(TRUE)
138 | }
139 | 
140 | verify_string_pattern_n_m <- function(string, pattern, n, m) {
141 |   verify_string_pattern_n(string, pattern, n)
142 |   checkmate::assert_integerish(m, min.len = 1)
143 |   verify_string_pattern_n(string, pattern, m, "m")
144 |   if (length(n) > 1 && length(m) > 1 &&
145 |     length(n) != length(m)) {
146 |     rlang::abort(
147 |       c(
148 |         paste(
149 |           "If `n` and `m` both have length greater than 1,",
150 |           "their lengths must be equal."
151 |         ),
152 |         x = str_glue("Your `n` has length {length(n)}."),
153 |         x = str_glue("Your `m` has length {length(m)}.")
154 |       )
155 |     )
156 |   }
157 |   invisible(TRUE)
158 | }
159 | 
160 | is_l0_char <- function(x) isTRUE(checkmate::check_character(x, max.len = 0))
161 | 


--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | output: github_document
 3 | ---
 4 | 
 5 | <!-- README.md is generated from README.Rmd. Please edit that file -->
 6 | 
 7 | ```{r setup, include = FALSE}
 8 | knitr::opts_chunk$set(
 9 |   collapse = TRUE,
10 |   comment = "#>",
11 |   fig.path = "man/figures/README-",
12 |   out.width = "100%"
13 | )
14 | ```
15 | 
16 | # strex <img src="man/figures/logo.png" align="right" height=140/>
17 | 
18 | There are some things that I wish were easier with the `stringr` or `stringi` packages. The foremost of these is the extraction of numbers from strings. `stringr` lets you figure this out the regex for yourself; `strex` takes care of this for you. There are many more useful functionalities in `strex`. In particular, there's a `match_arg()` function which is more flexible than the base `match.arg()`. Contributions to this package are encouraged: it is intended as a miscellany of string manipulation functions which cannot be found in `stringi` or `stringr`.
19 | 
20 | [![R-CMD-check](https://github.com/rorynolan/strex/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/rorynolan/strex/actions/workflows/R-CMD-check.yaml)
21 | [![Codecov test coverage](https://codecov.io/gh/rorynolan/strex/branch/master/graph/badge.svg)](https://app.codecov.io/gh/rorynolan/strex?branch=master)
22 | 
23 | ![Project Status: Active – The project has reached a stable, usable state and is being actively developed.](https://www.repostatus.org/badges/latest/active.svg)
24 | [![lifecycle](https://img.shields.io/badge/lifecycle-stable-brightgreen.svg)](https://lifecycle.r-lib.org/articles/stages.html)
25 | 
26 | [![CRAN status](https://www.r-pkg.org/badges/version/strex)](https://cran.r-project.org/package=strex)
27 | [![RStudio CRAN downloads](https://cranlogs.r-pkg.org/badges/grand-total/strex)](https://cranlogs.r-pkg.org/badges/grand-total/strex)
28 | [![RStudio CRAN monthly downloads](https://cranlogs.r-pkg.org/badges/strex)](https://cranlogs.r-pkg.org/badges/strex)
29 | 
30 | 
31 | 
32 | 
33 | ## Installation
34 | 
35 | You can install the release version of `strex` from [CRAN](https://CRAN.R-project.org) with:
36 | 
37 | ```{r, eval=FALSE}
38 | install.packages("strex")
39 | ```
40 | 
41 | You can install the development version of `strex` from [GitHub](https://github.com/rorynolan/strex/) with:
42 | 
43 | ```{r, eval=FALSE}
44 | devtools::install_github("rorynolan/strex")
45 | ```
46 | 
47 | 
48 | ## How to use the package
49 | 
50 | See the package website at https://rorynolan.github.io/strex/.
51 | 
52 | 
53 | 
54 | ## Contribution
55 | 
56 | The preferred method of contribution is by GitHub pull request. 
57 | 
58 | Please note that the `strex` project is released with a
59 | [Contributor Code of Conduct](inst/CODE_OF_CONDUCT.md).
60 | By contributing to this project, you agree to abide by its terms.
61 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | <!-- README.md is generated from README.Rmd. Please edit that file -->
 3 | 
 4 | # strex <img src="man/figures/logo.png" align="right" height=140/>
 5 | 
 6 | There are some things that I wish were easier with the `stringr` or
 7 | `stringi` packages. The foremost of these is the extraction of numbers
 8 | from strings. `stringr` lets you figure this out the regex for yourself;
 9 | `strex` takes care of this for you. There are many more useful
10 | functionalities in `strex`. In particular, there’s a `match_arg()`
11 | function which is more flexible than the base `match.arg()`.
12 | Contributions to this package are encouraged: it is intended as a
13 | miscellany of string manipulation functions which cannot be found in
14 | `stringi` or `stringr`.
15 | 
16 | [![R-CMD-check](https://github.com/rorynolan/strex/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/rorynolan/strex/actions/workflows/R-CMD-check.yaml)
17 | [![Codecov test
18 | coverage](https://codecov.io/gh/rorynolan/strex/branch/master/graph/badge.svg)](https://app.codecov.io/gh/rorynolan/strex?branch=master)
19 | 
20 | ![Project Status: Active – The project has reached a stable, usable
21 | state and is being actively
22 | developed.](https://www.repostatus.org/badges/latest/active.svg)
23 | [![lifecycle](https://img.shields.io/badge/lifecycle-stable-brightgreen.svg)](https://lifecycle.r-lib.org/articles/stages.html)
24 | 
25 | [![CRAN
26 | status](https://www.r-pkg.org/badges/version/strex)](https://cran.r-project.org/package=strex)
27 | [![RStudio CRAN
28 | downloads](https://cranlogs.r-pkg.org/badges/grand-total/strex)](https://cranlogs.r-pkg.org/badges/grand-total/strex)
29 | [![RStudio CRAN monthly
30 | downloads](https://cranlogs.r-pkg.org/badges/strex)](https://cranlogs.r-pkg.org/badges/strex)
31 | 
32 | ## Installation
33 | 
34 | You can install the release version of `strex` from
35 | [CRAN](https://CRAN.R-project.org) with:
36 | 
37 | ``` r
38 | install.packages("strex")
39 | ```
40 | 
41 | You can install the development version of `strex` from
42 | [GitHub](https://github.com/rorynolan/strex/) with:
43 | 
44 | ``` r
45 | devtools::install_github("rorynolan/strex")
46 | ```
47 | 
48 | ## How to use the package
49 | 
50 | See the package website at <https://rorynolan.github.io/strex/>.
51 | 
52 | ## Contribution
53 | 
54 | The preferred method of contribution is by GitHub pull request.
55 | 
56 | Please note that the `strex` project is released with a [Contributor
57 | Code of Conduct](inst/CODE_OF_CONDUCT.md). By contributing to this
58 | project, you agree to abide by its terms.
59 | 


--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
 1 | comment: false
 2 | 
 3 | coverage:
 4 |   status:
 5 |     project:
 6 |       default:
 7 |         target: auto
 8 |         threshold: 1%
 9 |         informational: true
10 |     patch:
11 |       default:
12 |         target: auto
13 |         threshold: 1%
14 |         informational: true
15 | 


--------------------------------------------------------------------------------
/cran-comments.md:
--------------------------------------------------------------------------------
1 | ## Reverse Dependencies
2 | We checked the reverse dependencies, comparing R CMD check results across CRAN and dev versions of this package.
3 |  * We saw 0 new problems
4 |  * We failed to check 0 packages
5 | 


--------------------------------------------------------------------------------
/index.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | output: github_document
 3 | ---
 4 | 
 5 | <!-- index.md is generated from index.Rmd. Please edit that file -->
 6 | 
 7 | ```{r setup, include = FALSE}
 8 | knitr::opts_chunk$set(
 9 |   collapse = TRUE,
10 |   comment = "#>",
11 |   fig.path = "man/figures/README-",
12 |   out.width = "100%"
13 | )
14 | ```
15 | # strex <img src="man/figures/logo.png" align="right" height=140/>
16 | 
17 | There are some things that I wish were easier with the `stringr` or `stringi` packages. The foremost of these is the extraction of numbers from strings. `stringr` makes you figure out the regex for yourself; `strex` takes care of this for you. There are many more useful functionalities in `strex`. In particular, there's a `match_arg()` function which is more flexible than the base `match.arg()`. Contributions to this package are encouraged: it is intended as a miscellany of string manipulation functions which cannot be found in `stringi` or `stringr`.
18 | 
19 | The github repo of `strex` is at https://github.com/rorynolan/strex.
20 | 
21 | ## Installation
22 | 
23 | You can install the release version of `strex` from [CRAN](https://CRAN.R-project.org) with:
24 | 
25 | ```{r, eval=FALSE}
26 | install.packages("strex")
27 | ```
28 | 
29 | You can install the development version of `strex` from [GitHub](https://github.com/rorynolan/strex/) with:
30 | 
31 | ```{r, eval=FALSE}
32 | devtools::install_github("rorynolan/strex")
33 | ```
34 | 
35 | 
36 | ## How to use the package
37 | 
38 | The following articles contain all you need to get going:
39 | 
40 | * [Alphordering Numbers](https://rorynolan.github.io/strex/articles/alphordering-numbers.html) tells you how to fix the pesky problem of numbers in file names not complying with alphabetical order.
41 | * [Argument Matching](https://rorynolan.github.io/strex/articles/argument-matching.html) showcases `strex::match_arg()`, an improvement on `base::match.arg()` which allows one to ignore case during argument matching.
42 | * [Before and After](https://rorynolan.github.io/strex/articles/before-and-after.html) is for the common problem where you want to get the bit of a string before or after an occurrence of a pattern.
43 | * [Numbers Within Strings](https://rorynolan.github.io/strex/articles/numbers-in-strings.html) shows how to deal with the common problem of extracting numeric information contained within larger strings.
44 | * [And/Or Detection](https://rorynolan.github.io/strex/articles/detection.html) showcases `str_detect_all()` and `str_detect_any()`.
45 | * [Important Miscellany](https://rorynolan.github.io/strex/articles/important-miscellany.html) is the rest, and there's a lot.
46 | 


--------------------------------------------------------------------------------
/index.md:
--------------------------------------------------------------------------------
 1 | 
 2 | <!-- index.md is generated from index.Rmd. Please edit that file -->
 3 | 
 4 | # strex <img src="man/figures/logo.png" align="right" height=140/>
 5 | 
 6 | There are some things that I wish were easier with the `stringr` or
 7 | `stringi` packages. The foremost of these is the extraction of numbers
 8 | from strings. `stringr` makes you figure out the regex for yourself;
 9 | `strex` takes care of this for you. There are many more useful
10 | functionalities in `strex`. In particular, there’s a `match_arg()`
11 | function which is more flexible than the base `match.arg()`.
12 | Contributions to this package are encouraged: it is intended as a
13 | miscellany of string manipulation functions which cannot be found in
14 | `stringi` or `stringr`.
15 | 
16 | The github repo of `strex` is at <https://github.com/rorynolan/strex>.
17 | 
18 | ## Installation
19 | 
20 | You can install the release version of `strex` from
21 | [CRAN](https://CRAN.R-project.org) with:
22 | 
23 | ``` r
24 | install.packages("strex")
25 | ```
26 | 
27 | You can install the development version of `strex` from
28 | [GitHub](https://github.com/rorynolan/strex/) with:
29 | 
30 | ``` r
31 | devtools::install_github("rorynolan/strex")
32 | ```
33 | 
34 | ## How to use the package
35 | 
36 | The following articles contain all you need to get going:
37 | 
38 | - [Alphordering
39 |   Numbers](https://rorynolan.github.io/strex/articles/alphordering-numbers.html)
40 |   tells you how to fix the pesky problem of numbers in file names not
41 |   complying with alphabetical order.
42 | - [Argument
43 |   Matching](https://rorynolan.github.io/strex/articles/argument-matching.html)
44 |   showcases `strex::match_arg()`, an improvement on `base::match.arg()`
45 |   which allows one to ignore case during argument matching.
46 | - [Before and
47 |   After](https://rorynolan.github.io/strex/articles/before-and-after.html)
48 |   is for the common problem where you want to get the bit of a string
49 |   before or after an occurrence of a pattern.
50 | - [Numbers Within
51 |   Strings](https://rorynolan.github.io/strex/articles/numbers-in-strings.html)
52 |   shows how to deal with the common problem of extracting numeric
53 |   information contained within larger strings.
54 | - [And/Or
55 |   Detection](https://rorynolan.github.io/strex/articles/detection.html)
56 |   showcases `str_detect_all()` and `str_detect_any()`.
57 | - [Important
58 |   Miscellany](https://rorynolan.github.io/strex/articles/important-miscellany.html)
59 |   is the rest, and there’s a lot.
60 | 


--------------------------------------------------------------------------------
/inst/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Contributor Code of Conduct
 2 | 
 3 | As contributors and maintainers of this project, we pledge to respect all people who 
 4 | contribute through reporting issues, posting feature requests, updating documentation,
 5 | submitting pull requests or patches, and other activities.
 6 | 
 7 | We are committed to making participation in this project a harassment-free experience for
 8 | everyone, regardless of level of experience, gender, gender identity and expression,
 9 | sexual orientation, disability, personal appearance, body size, race, ethnicity, age, or religion.
10 | 
11 | Examples of unacceptable behavior by participants include the use of sexual language or
12 | imagery, derogatory comments or personal attacks, trolling, public or private harassment,
13 | insults, or other unprofessional conduct.
14 | 
15 | Project maintainers have the right and responsibility to remove, edit, or reject comments,
16 | commits, code, wiki edits, issues, and other contributions that are not aligned to this 
17 | Code of Conduct. Project maintainers who do not follow the Code of Conduct may be removed 
18 | from the project team.
19 | 
20 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by 
21 | opening an issue or contacting one or more of the project maintainers.
22 | 
23 | This Code of Conduct is adapted from the Contributor Covenant 
24 | (https://www.contributor-covenant.org), version 1.0.0, available at 
25 | https://contributor-covenant.org/version/1/0/0/.
26 | 


--------------------------------------------------------------------------------
/inst/WORDLIST:
--------------------------------------------------------------------------------
 1 | Alphordering
 2 | CMD
 3 | CamelCase
 4 | Parra
 5 | RStudio
 6 | Ramnath
 7 | Sergi
 8 | Vaidyanathan's
 9 | Vectorised
10 | Vectorized
11 | abc
12 | alphord
13 | alphorderers
14 | appenders
15 | behaviour
16 | camelcase
17 | csv
18 | dev
19 | filesstrings
20 | github
21 | http
22 | https
23 | integerish
24 | io
25 | lifecycle
26 | matchers
27 | organised
28 | possiblity
29 | recognise
30 | repo
31 | rorynolan
32 | splitters
33 | stackoverflow
34 | stringi
35 | stringr
36 | th
37 | tibble
38 | uncommented
39 | vectorised
40 | vectorized
41 | 


--------------------------------------------------------------------------------
/junk/braces.cpp:
--------------------------------------------------------------------------------
 1 | #include <Rcpp.h>
 2 | using namespace Rcpp;
 3 | 
 4 | 
 5 | // [[Rcpp::export]]
 6 | List lst_df_pos_brace(CharacterVector string,
 7 |                       List positions, List braces) {
 8 |   std::size_t n = positions.size();
 9 |   if (braces.size() != n) {
10 |     throw std::invalid_argument("`positions` and `braces` must have the "
11 |                                   "same length.");
12 |   }
13 |   List out(n);
14 |   for (std::size_t i = 0; i != n; ++i) {
15 |     out[i] = DataFrame::create(_["position"] = positions[i],
16 |                                _["brace"] = braces[i]);
17 |   }
18 |   return out;
19 | }
20 | 


--------------------------------------------------------------------------------
/junk/currency.cpp:
--------------------------------------------------------------------------------
 1 | #include <Rcpp.h>
 2 | using namespace Rcpp;
 3 | 
 4 | #include "stod.h"
 5 | 
 6 | 
 7 | //' Get the character before a located pattern.
 8 | //'
 9 | //' @param s A string.
10 | //' @param locs The output of `stringr::str_locate()`.
11 | //'
12 | //' @return A character vector.
13 | //'
14 | //' @noRd
15 | CharacterVector get_prev_chars(std::string s, IntegerMatrix locs) {
16 |   R_xlen_t n_locs = locs.nrow();
17 |   CharacterVector out(n_locs);
18 |   for (R_xlen_t i = 0; i != n_locs; ++i) {
19 |     R_xlen_t one_indexed_pos = locs(i, 0);
20 |     if (one_indexed_pos > 1) out[i] = s.substr(one_indexed_pos - 2, 1);
21 |   }
22 |   return out;
23 | }
24 | 
25 | //' Get the character two before a located pattern.
26 | //'
27 | //' @param s A string.
28 | //' @param locs The output of `stringr::str_locate()`.
29 | //'
30 | //' @return A character vector.
31 | //'
32 | //' @noRd
33 | CharacterVector get_prevprev_chars(std::string s, IntegerMatrix locs) {
34 |   R_xlen_t n_locs = locs.nrow();
35 |   CharacterVector out(n_locs);
36 |   for (R_xlen_t i = 0; i != n_locs; ++i) {
37 |     R_xlen_t one_indexed_pos = locs(i, 0);
38 |     if (one_indexed_pos > 2) out[i] = s[one_indexed_pos - 3];
39 |   }
40 |   return out;
41 | }
42 | 
43 | //' Get a tibble of currencies from a string.
44 | //'
45 | //' The tibble has two colums: `sym` and `amount`.
46 | //'
47 | //' @param s A string.
48 | //' @param locs The output of `stringr::str_locate()`.
49 | //'
50 | //' @return A tibble.
51 | //'
52 | //' @noRd
53 | DataFrame get_currencies_tbl(std::string s, IntegerMatrix locs) {
54 |   R_xlen_t n_locs = locs.nrow();
55 |   CharacterVector str_amount(n_locs);
56 |   CharacterVector sym = get_prev_chars(s, locs);
57 |   CharacterVector sign = get_prevprev_chars(s, locs);
58 |   for (R_xlen_t i = 0; i != n_locs; ++i)
59 |     str_amount[i] = s.substr(locs(i, 0) - 1, locs(i, 1) - locs(i, 0) + 1);
60 |   NumericVector num_amount = char_to_num(str_amount, true);
61 |   for (R_xlen_t i = 0; i != n_locs; ++i)
62 |     if (sign[i] == "-") num_amount[i] *= -1;
63 |   DataFrame out = DataFrame::create(Named("sym") = sym,
64 |                                     Named("amount") = num_amount,
65 |                                     Named("stringsAsFactors") = false);
66 |   out.attr("class") = CharacterVector::create("tbl_df", "tbl", "data.frame");
67 |   return out;
68 | }
69 | 
70 | //' Get a list of data frames of currencies from a string.
71 | //'
72 | //' The data frames have two colums: `sym` and `amount`.
73 | //'
74 | //' @param strings A character vector.
75 | //' @param locs_lst The output of `stringr::str_locate_all()`.
76 | //'
77 | //' @return A data frame.
78 | //'
79 | // [[Rcpp::export]]
80 | List get_currencies_tbl_lst(CharacterVector strings, List locs_lst) {
81 |   R_xlen_t n = strings.length();
82 |   List out(n);
83 |   for (R_xlen_t i = 0; i != n; ++i) {
84 |     out[i] = get_currencies_tbl(as<std::string>(strings[i]),
85 |                                 as<IntegerMatrix>(locs_lst[i]));
86 |   }
87 |   return(out);
88 | }
89 | 


--------------------------------------------------------------------------------
/junk/elem.cpp:
--------------------------------------------------------------------------------
 1 | #include <Rcpp.h>
 2 | using namespace Rcpp;
 3 | 
 4 | //' Get the indices that exist in a string of length `str_len`.
 5 | //'
 6 | //' @param str_len The length of a string.
 7 | //' @param indices A sorted, unique vector of integers with no zero elements.
 8 | std::vector<int32_t> get_good_indices(R_xlen_t str_len,
 9 |                                       IntegerVector indices) {
10 |   std::vector<int32_t> out;
11 |   for (R_xlen_t i = 0, n = indices.length(); i != n; ++i)
12 |     if (abs(indices[i]) <= n) out.push_back(indices[i]);
13 |   return out;
14 | }
15 | 
16 | //' Vectorized `get_good_indices`
17 | std::vector<std::vector<int32_t>> get_good_indices_list(IntegerVector str_lens,
18 |                                                         IntegerVector indices) {
19 |   R_xlen_t n = str_lens.length();
20 |   std::vector<std::vector<int32_t>> out(n);
21 |   for (R_xlen_t i = 0; i != n; ++i)
22 |     out[i] = get_good_indices(str_lens[i], indices);
23 |   return out;
24 | }
25 | 
26 | //' Get the regular expression to match the `n`th next character.
27 | std::string nth_next_char_regex(R_xlen_t n) {
28 |   if (n == 1) {
29 |     return "(.)";
30 |   }
31 |   return std::string(".{") + (n - 1) + "}(.)";
32 | }
33 | 
34 | //' Get the regular expression to match this character with `n - 1` unmatched
35 | //' after.
36 | std::string next_char_regex_nm1_after(R_xlen_t n) {
37 |   if (n == 1) {
38 |     return "(.)";
39 |   }
40 |   return std::string("(.).{") + (n - 1) + "}";
41 | }
42 | 
43 | //' Get the regular expression to match positive indices in a string
44 | std::string get_pos_indices_regex(const std::vector<int32_t>& indices) {
45 |   R_xlen_t i = 0, n = indices.size();
46 |   while (i != n && indices[i] < 0) ++i;
47 |   std::string out;
48 |   if (i == n) return out;
49 |   out.reserve(7 * (n - i));
50 |   out += "^";
51 |   if (indices[i] > 1) out += nth_next_char_regex(indices[i]);
52 |   while(i != n - 1) {
53 |     out += nth_next_char_regex(indices[i + 1] - indices[i]);
54 |     ++i;
55 |   }
56 | }
57 | 
58 | 


--------------------------------------------------------------------------------
/junk/extract-non-nums.cpp:
--------------------------------------------------------------------------------
 1 | #include <Rcpp.h>
 2 | using namespace Rcpp;
 3 | 
 4 | #include <vector>
 5 | #include <string>
 6 | 
 7 | #include "substrs.h"
 8 | 
 9 | 
10 | //' Negate `str_locate()`.
11 | //'
12 | //' `str_locate()` tells you where bits of a string matching your pattern are.
13 | //' `str_unlocate()` takes the output of `str_locate()` and gives you the
14 | //' locations of the substrings _not_ located.
15 | //'
16 | //' @param x A two-column integer matrix: the output of str_locate.
17 | //' @param string_length The length of the searched string.
18 | //'
19 | //' @return An even-length integer vector. Each pair of elements defines a
20 | //' substring by the position of the first character and the substring size
21 | //' (this is ideal for feeding into C++'s `string::substr()`).
22 | //'
23 | //' @noRd
24 | std::vector<std::size_t> str_unlocate(IntegerMatrix x,
25 |                                       std::size_t string_length) {
26 |   if (x.length() == 0) return {0, string_length};
27 |   std::vector<std::size_t> locs;
28 |   if (x(0, 0) != 1) {
29 |     locs.push_back(0);
30 |     locs.push_back(x(0, 0) - 1);
31 |   }
32 |   std::size_t nr = x.nrow();
33 |   if (nr > 1) {
34 |     for (std::size_t r = 1; r != nr; ++r) {
35 |       if (x(r, 0) - x(r - 1, 1) > 1) {
36 |         locs.push_back(x(r - 1, 1));
37 |         locs.push_back(x(r, 0) - x(r - 1, 1) - 1);
38 |       }
39 |     }
40 |   }
41 |   if (x(nr - 1, 1) < string_length) {
42 |     locs.push_back(x(nr - 1, 1));
43 |     locs.push_back(string_length - x(nr - 1, 1));
44 |   }
45 |   return locs;
46 | }
47 | 
48 | //' Get the unlocated parts of strings.
49 | //'
50 | //' Given a character vector and the output from `str_locate_all()`, get the
51 | //' unlocated substrings (not their locations, the actual strings).
52 | //'
53 | //' @param strings A character vector.
54 | //' @param other_locations The output of `str_locate_all()`.
55 | //'
56 | //' @return A list of character vectors, as output by e.g. `str_extract_all()`.
57 | //'
58 | //' @noRd
59 | // [[Rcpp::export]]
60 | List unlocated_substrs(CharacterVector strings, List other_locations) {
61 |   std::size_t n = strings.length();
62 |   List out(n);
63 |   for (int i = 0; i != n; ++i) {
64 |     std::vector<std::size_t> locs = str_unlocate(other_locations[i],
65 |                                                  strings[i].size());
66 |     out[i] = substrs(as<std::string>(strings[i]), locs);
67 |   }
68 |   return out;
69 | }
70 | 


--------------------------------------------------------------------------------
/junk/junk.R:
--------------------------------------------------------------------------------
 1 | #' Group together close adjacent elements of a vector.
 2 | #'
 3 | #' Given a strictly increasing vector (each element is bigger than the last),
 4 | #' group together stretches of the vector where *adjacent* elements are
 5 | #' separated by at most some specified distance. Hence, each element in each
 6 | #' group has at least one other element in that group that is *close* to it. See
 7 | #' the examples.
 8 | #' @param x A strictly increasing numeric vector.
 9 | #' @param max_gap The biggest allowable gap between adjacent elements for them
10 | #'   to be considered part of the same *group*.
11 | #' @param check Check inputs for validity? Can be turned off for speed if you're
12 | #'   sure your inputs are valid.
13 | #' @return A where each element is one group, as a numeric vector.
14 | #' @examples
15 | #' group_close(1:10, 1)
16 | #' group_close(1:10, 0.5)
17 | #' group_close(c(1, 2, 4, 10, 11, 14, 20, 25, 27), 3)
18 | #' @noRd
19 | group_close <- function(x, max_gap = 1, check = TRUE) {
20 |   dva <- diff(x)
21 |   if (check) {
22 |     checkmate::assert_numeric(x, min.len = 1)
23 |     test <- dva > 0
24 |     if (anyNA(test) || (!all(test))) {
25 |       bad_index <- match(F, test)
26 |       custom_stop(
27 |         "`vec_ascending` must be strictly increasing.",
28 |         "
29 |                   Indices {bad_index} and {bad_index + 1} of `vec_ascending`
30 |                   are respectively {vec_ascending[bad_index]} and
31 |                   {vec_ascending[bad_index + 1]}, therefore `vec_ascending`
32 |                   is not strictly increasing.
33 |                   "
34 |       )
35 |     }
36 |   }
37 |   lva <- length(x)
38 |   if (lva == 1) return(list(x))
39 |   gaps <- dva
40 |   big_gaps <- which(gaps > max_gap)
41 |   nbgaps <- length(big_gaps) # number of big gaps
42 |   if (!nbgaps) return(list(x))
43 |   big_gaps %>% {
44 |     split(x, rep(seq_len(nbgaps + 1), times = c(.[1], diff(c(., lva)))))
45 |   }
46 | }
47 | 
48 | test_that("group_close works", {
49 |   expect_equal(unname(group_close(1:10, 1)), list(1:10))
50 |   expect_equal(unname(group_close(1:10, 0.5)), as.list(1:10))
51 |   expect_equal(
52 |     unname(group_close(c(1, 2, 4, 10, 11, 14, 20, 25, 27), 3)),
53 |     list(c(1, 2, 4), c(10, 11, 14), 20, c(25, 27))
54 |   )
55 |   expect_error(group_close(integer(0)))
56 |   expect_error(group_close(rep(1, 2)))
57 |   expect_equal(unname(group_close(0)), list(0))
58 |   expect_equal(unname(group_close(c(0, 2))), list(0, 2))
59 | })
60 | 
61 | #' Locate the braces in a string.
62 | #'
63 | #' Give the positions of `(`, `)`, `[`, `]`, `\{`, `\}` within a string.
64 | #'
65 | #' @param string A character vector
66 | #'
67 | #' @return A list of data frames, one for each member of the string character
68 | #'   vector. Each data frame has a "position" and "brace" column which give the
69 | #'   positions and types of braces in the given string.
70 | #'
71 | #' @examples
72 | #' str_locate_braces(c("a{](kkj)})", "ab(]c{}"))
73 | #' @export
74 | str_locate_braces <- function(string) {
75 |   locations <- str_locate_all(string, "[\\(\\)\\[\\]\\{\\}]") %>%
76 |     int_lst_first_col()
77 |   braces <- str_elems(string, locations)
78 |   lst_df_pos_brace(locations, braces)
79 | }
80 | 
81 | get_os <- function() {
82 |   sysinf <- Sys.info()
83 |   if (!is.null(sysinf)) {
84 |     os <- sysinf["sysname"]
85 |     if (os == "Darwin") {
86 |       os <- "mac"
87 |     }
88 |   } else { ## mystery machine
89 |     os <- .Platform$OS.type
90 |     if (grepl("^darwin", R.version$os)) {
91 |       os <- "mac"
92 |     }
93 |     if (grepl("linux-gnu", R.version$os)) {
94 |       os <- "linux"
95 |     }
96 |   }
97 |   tolower(os)
98 | }
99 | 


--------------------------------------------------------------------------------
/junk/junk.cpp:
--------------------------------------------------------------------------------
 1 | CharacterVector interleave_correctly_vec(std::string orig,
 2 |                                          CharacterVector strings1,
 3 |                                          CharacterVector strings2) {
 4 |   CharacterVector interleave = NA_STRING;
 5 |   if (strings1.size() == 0)
 6 |     interleave = strings2;
 7 |   else if (strings2.size() == 0)
 8 |     interleave = strings1;
 9 |   else {
10 |     CharacterVector onetwo = interleave_strings(strings1, strings2);
11 |     if (paste_collapse(onetwo, "") == orig)
12 |       interleave = onetwo;
13 |     else {
14 |       CharacterVector twoone = interleave_strings(strings2, strings1);
15 |       if (paste_collapse(twoone, "") == orig)
16 |         interleave = twoone;
17 |     }
18 |   }
19 |   return(interleave);
20 | }
21 | 
22 | // [[Rcpp::export]]
23 | List interleave_correctly(CharacterVector orig, List strings1, List strings2) {
24 |   int l = orig.size();
25 |   List interleaved(l);
26 |   if (strings1.size() != l || strings2.size() != l) {
27 |     for (int i = 0; i < l; i++) {
28 |       interleaved[i] = CharacterVector::create(NA_STRING);
29 |     }
30 |   }
31 |   else {
32 |     for (int i = 0; i < l; i++) {
33 |       interleaved[i] = interleave_correctly_vec(as<std::string>(orig[i]),
34 |                                                 strings1[i], strings2[i]);
35 |     }
36 |   }
37 |   return(interleaved);
38 | }
39 | 
40 | //' Get a series of substrings using `string::substr()`.
41 | //'
42 | //' @param s A `std::string`.
43 | //' @param locs Output of `stringr::str_locate()` (1-indexed).
44 | //'
45 | //' @return A character vector.
46 | //'
47 | //' @noRd
48 | // [[Rcpp::export]]
49 | CharacterVector substrs2(const std::string& s,
50 |                          const IntegerMatrix locs) {
51 |   std::size_t n_locs = locs.nrow();
52 |   CharacterVector out(n_locs);
53 |   for (std::size_t i = 0; i != n_locs; ++i) {
54 |     out[i] = s.substr(locs(i, 0) - 1, locs(i, 1) - locs(i, 0) + 1);
55 |   }
56 |   return out;
57 | }
58 | 
59 | 
60 | //' Remove first element of each character vector in list.
61 | //'
62 | //' BE CAREFUL! THIS FUNCTION MODIFIES ITS INPUT.
63 | //'
64 | //' @param x A list of character vectors.
65 | //'
66 | //' @return A list of character vectors.
67 | //'
68 | // [[Rcpp::export]]
69 | List str_list_remove_first_elems(List x) {
70 |   R_xlen_t n = x.length();
71 |   for (R_xlen_t i = 0; i != n; ++i) {
72 |     CharacterVector x_i = x[i];
73 |     if (x_i.length()) {
74 |       x_i.erase(0);
75 |       x[i] = x_i;
76 |     }
77 |   }
78 |   return x;
79 | }
80 | 


--------------------------------------------------------------------------------
/junk/pasting.cpp:
--------------------------------------------------------------------------------
 1 | #include <Rcpp.h>
 2 | using namespace Rcpp;
 3 | 
 4 | //' Paste a vector of strings into a single string.
 5 | //'
 6 | //' Paste a vector of strings together with a specified separator.
 7 | //'
 8 | //' @param strings A character vector of strings.
 9 | //' @param collapse A string.
10 | //'
11 | //' @return A string.
12 | //'
13 | //' @examples
14 | //' paste_collapse(c("abc", "def"), collapse = "_")
15 | //'
16 | //' @noRd
17 | // [[Rcpp::export]]
18 | std::string paste_collapse(CharacterVector strings, std::string collapse) {
19 |   std::string out = as<std::string>(strings[0]);
20 |   for (int i = 1; i < strings.size(); i++) {
21 |     out += collapse;
22 |     out += strings[i];
23 |   }
24 |   return out;
25 | }
26 | 
27 | //' Apply paste collapse to each element of a list.
28 | //'
29 | //' This is the same as doing
30 | //' `sapply(char.list, paste, collapse = collapse)`, it's just faster.
31 | //'
32 | //' @param char_list A list of character vectors.
33 | //' @param collapse A string.
34 | //'
35 | //' @return A list of character vectors.
36 | //'
37 | //' @examples
38 | //' paste_collapse_list_elems(list(1:3, c("a", 5, "rory")), collapse = "R")
39 | //'
40 | //' @noRd
41 | // [[Rcpp::export]]
42 | CharacterVector paste_collapse_list_elems(List char_list,
43 |                                           std::string collapse = "") {
44 |   int list_len = char_list.size();
45 |   CharacterVector pasted(list_len);
46 |   for (int i = 0; i < list_len; i++) {
47 |     CharacterVector strings = as<CharacterVector>(char_list[i]);
48 |     pasted[i] = paste_collapse(strings, collapse);
49 |   }
50 |   return(pasted);
51 | }
52 | 


--------------------------------------------------------------------------------
/junk/pasting.h:
--------------------------------------------------------------------------------
 1 | #ifndef STREX_PASTING_
 2 | #define STREX_PASTING_
 3 | 
 4 | 
 5 | #include <Rcpp.h>
 6 | using namespace Rcpp;
 7 | 
 8 | std::string paste_collapse(CharacterVector, std::string);
 9 | 
10 | CharacterVector paste_collapse_list_elems(List, std::string);
11 | 
12 | 
13 | #endif  // STREX_PASTING_
14 | 


--------------------------------------------------------------------------------
/junk/strings.cpp:
--------------------------------------------------------------------------------
 1 | #include <stdexcept>
 2 | #include <string>
 3 | 
 4 | #include <Rcpp.h>
 5 | 
 6 | using namespace Rcpp;
 7 | 
 8 | 
 9 | // [[Rcpp::export]]
10 | List str_elems(StringVector strings, List locations) {
11 |   std::size_t n = strings.size();
12 |   if (locations.size() != n) {
13 |     throw std::invalid_argument("`strings` and `locations` must have the "
14 |                                 "same length.");
15 |   }
16 |   List out(n);
17 |   for (std::size_t i = 0; i != n; ++i) {
18 |     IntegerVector locations_i = locations[i];
19 |     std::size_t m = locations_i.size();
20 |     CharacterVector out_i(m);
21 |     for (std::size_t j = 0; j != m; ++j) {
22 |       out_i[j] = std::string(1, strings[i][locations_i[j] - 1]);
23 |     }
24 |     out[i] = out_i;
25 |   }
26 |   return out;
27 | }
28 | 
29 | 
30 | 
31 | 
32 | 


--------------------------------------------------------------------------------
/man/before-and-after.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/after.R, R/before.R
 3 | \name{before-and-after}
 4 | \alias{before-and-after}
 5 | \alias{str_after_nth}
 6 | \alias{str_after_first}
 7 | \alias{str_after_last}
 8 | \alias{str_before_nth}
 9 | \alias{str_before_first}
10 | \alias{str_before_last}
11 | \title{Extract text before or after \code{n}th occurrence of pattern.}
12 | \usage{
13 | str_after_nth(string, pattern, n)
14 | 
15 | str_after_first(string, pattern)
16 | 
17 | str_after_last(string, pattern)
18 | 
19 | str_before_nth(string, pattern, n)
20 | 
21 | str_before_first(string, pattern)
22 | 
23 | str_before_last(string, pattern)
24 | }
25 | \arguments{
26 | \item{string}{A character vector.}
27 | 
28 | \item{pattern}{The pattern to look for.
29 | 
30 | The default interpretation is a regular expression, as described in
31 | \link[stringi:about_search_regex]{stringi::about_search_regex}.
32 | 
33 | To match a without regular expression (i.e. as a human would), use
34 | \link[stringr:modifiers]{coll()}. For details see \code{\link[stringr:modifiers]{stringr::regex()}}.}
35 | 
36 | \item{n}{A vector of integerish values. Must be either length 1 or
37 | have length equal to the length of \code{string}. Negative indices count from
38 | the back: while \code{n = 1} and \code{n = 2} correspond to first and second, \code{n = -1} and \code{n = -2} correspond to last and second-last. \code{n = 0} will return
39 | \code{NA}.}
40 | }
41 | \value{
42 | A character vector.
43 | }
44 | \description{
45 | Extract the part of a string which is before or after the \code{n}th occurrence of
46 | a specified pattern, vectorized over the string.
47 | }
48 | \details{
49 | \itemize{ \item \code{str_after_first(...)} is just \code{str_after_nth(..., n = 1)}.
50 | \item \code{str_after_last(...)} is just \code{str_after_nth(..., n = -1)}. \item
51 | \code{str_before_first(...)} is just \code{str_before_nth(..., n = 1)}. \item
52 | \code{str_before_last(...)} is just \code{str_before_nth(..., n = -1)}. }
53 | }
54 | \examples{
55 | string <- "abxxcdxxdexxfgxxh"
56 | str_after_nth(string, "xx", 3)
57 | str_before_nth(string, "e", 1:2)
58 | str_before_nth(string, "xx", -3)
59 | str_before_nth(string, ".", -3)
60 | str_before_nth(rep(string, 2), "..x", -3)
61 | str_before_first(string, "d")
62 | str_before_last(string, "x")
63 | string <- c("abc", "xyz.zyx")
64 | str_after_first(string, ".") # using regex
65 | str_after_first(string, coll(".")) # using human matching
66 | str_after_last(c("xy", "xz"), "x")
67 | }
68 | \seealso{
69 | Other bisectors: 
70 | \code{\link{str_before_last_dot}()}
71 | }
72 | \concept{bisectors}
73 | 


--------------------------------------------------------------------------------
/man/currency.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/currency.R
 3 | \name{currency}
 4 | \alias{currency}
 5 | \alias{str_extract_currencies}
 6 | \alias{str_nth_currency}
 7 | \alias{str_first_currency}
 8 | \alias{str_last_currency}
 9 | \title{Extract currency amounts from a string.}
10 | \usage{
11 | str_extract_currencies(string)
12 | 
13 | str_nth_currency(string, n)
14 | 
15 | str_first_currency(string)
16 | 
17 | str_last_currency(string)
18 | }
19 | \arguments{
20 | \item{string}{A character vector.}
21 | 
22 | \item{n}{A vector of integerish values. Must be either length 1 or
23 | have length equal to the length of \code{string}. Negative indices count from
24 | the back: while \code{n = 1} and \code{n = 2} correspond to first and second, \code{n = -1} and \code{n = -2} correspond to last and second-last. \code{n = 0} will return
25 | \code{NA}.}
26 | }
27 | \value{
28 | A data frame with 4 columns: \code{string_num}, \code{string}, \code{curr_sym} and
29 | \code{amount}. Every extracted currency amount gets its own row in the data
30 | frame detailing the string number and string that it was extracted from,
31 | the currency symbol and the amount.
32 | }
33 | \description{
34 | The currency of a number is defined as the character coming before the number
35 | in the string. If nothing comes before (i.e. if the number is the first thing
36 | in the string), the currency is the empty string, similarly the currency can
37 | be a space, comma or any manner of thing.
38 | }
39 | \details{
40 | These functions are vectorized over \code{string} and \code{n}.
41 | 
42 | \code{\link[=str_extract_currencies]{str_extract_currencies()}} extracts all currency amounts.
43 | 
44 | \code{str_nth_currency()} just gets the \code{n}th currency amount from each string.
45 | \code{str_first_currency(string)} and \code{str_last_currency(string)} are just
46 | wrappers for \code{str_nth_currency(string, n = 1)} and \code{str_nth_currency(string, n = -1)}.
47 | 
48 | "-$2.00" and "$-2.00" are interpreted as negative two dollars.
49 | 
50 | If you request e.g. the 5th currency amount but there are only 3 currency
51 | amounts, you get an amount and currency symbol of \code{NA}.
52 | }
53 | \examples{
54 | string <- c("ab3 13", "$1", "35.00 $1.14", "abc5 $3.8", "stuff")
55 | str_extract_currencies(string)
56 | str_nth_currency(string, n = 2)
57 | str_nth_currency(string, n = -2)
58 | str_nth_currency(string, c(1, -2, 1, 2, -1))
59 | str_first_currency(string)
60 | str_last_currency(string)
61 | }
62 | \concept{currency extractors}
63 | 


--------------------------------------------------------------------------------
/man/figures/README-pressure-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rorynolan/strex/84401bc145dbc63670c27c3c91376546117724ed/man/figures/README-pressure-1.png


--------------------------------------------------------------------------------
/man/figures/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rorynolan/strex/84401bc145dbc63670c27c3c91376546117724ed/man/figures/logo.png


--------------------------------------------------------------------------------
/man/str_alphord_nums.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/alphord.R
 3 | \name{str_alphord_nums}
 4 | \alias{str_alphord_nums}
 5 | \title{Make string numbers comply with alphabetical order.}
 6 | \usage{
 7 | str_alphord_nums(string)
 8 | }
 9 | \arguments{
10 | \item{string}{A character vector.}
11 | }
12 | \value{
13 | A character vector.
14 | }
15 | \description{
16 | If strings are numbered, their numbers may not \emph{comply} with alphabetical
17 | order, e.g. "abc2" comes after \code{"abc10"} in alphabetical order. We might (for
18 | whatever reason) wish to change them such that they come in the order \emph{that
19 | we would like}. This function alters the strings such that they comply with
20 | alphabetical order, so here \code{"abc2"} would be renamed to "abc02". It works on
21 | file names with more than one number in them e.g. \code{"abc01def3"} (a string
22 | with 2 numbers). All the strings in the character vector \code{string} must have
23 | the same number of numbers, and the non-number bits must be the same.
24 | }
25 | \examples{
26 | string <- paste0("abc", 1:12)
27 | print(string)
28 | str_alphord_nums(string)
29 | str_alphord_nums(c("abc9def55", "abc10def7"))
30 | str_alphord_nums(c("01abc9def55", "5abc10def777", "99abc4def4"))
31 | str_alphord_nums(1:10)
32 | \dontrun{
33 | str_alphord_nums(c("abc9def55", "abc10xyz7")) # error
34 | }
35 | 
36 | }
37 | \concept{alphorderers}
38 | 


--------------------------------------------------------------------------------
/man/str_before_last_dot.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/before.R
 3 | \name{str_before_last_dot}
 4 | \alias{str_before_last_dot}
 5 | \title{Extract the part of a string before the last period.}
 6 | \usage{
 7 | str_before_last_dot(string)
 8 | }
 9 | \arguments{
10 | \item{string}{A character vector.}
11 | }
12 | \value{
13 | A character vector.
14 | }
15 | \description{
16 | This is usually used to get the part of a file name that doesn't include the
17 | file extension. It is vectorized over \code{string}. If there is no period in
18 | \code{string}, the input is returned.
19 | }
20 | \examples{
21 | str_before_last_dot(c("spreadsheet1.csv", "doc2.doc", ".R"))
22 | }
23 | \seealso{
24 | Other bisectors: 
25 | \code{\link{before-and-after}}
26 | }
27 | \concept{bisectors}
28 | 


--------------------------------------------------------------------------------
/man/str_can_be_numeric.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/can-be-num.R
 3 | \name{str_can_be_numeric}
 4 | \alias{str_can_be_numeric}
 5 | \title{Check if a string could be considered as numeric.}
 6 | \usage{
 7 | str_can_be_numeric(string)
 8 | }
 9 | \arguments{
10 | \item{string}{A character vector.}
11 | }
12 | \value{
13 | A logical vector.
14 | }
15 | \description{
16 | After padding is removed, could the input string be considered to be numeric,
17 | i.e. could it be coerced to numeric. This function is vectorized over its one
18 | argument.
19 | }
20 | \examples{
21 | str_can_be_numeric("3")
22 | str_can_be_numeric("5 ")
23 | str_can_be_numeric(c("1a", "abc"))
24 | }
25 | \concept{type converters}
26 | 


--------------------------------------------------------------------------------
/man/str_detect_all.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/detect.R
 3 | \name{str_detect_all}
 4 | \alias{str_detect_all}
 5 | \alias{str_detect_any}
 6 | \title{Detect any or all patterns.}
 7 | \usage{
 8 | str_detect_all(string, pattern, negate = FALSE)
 9 | 
10 | str_detect_any(string, pattern, negate = FALSE)
11 | }
12 | \arguments{
13 | \item{string}{A character vector.}
14 | 
15 | \item{pattern}{A character vector. The patterns to look for. Default is
16 | \code{stringi}-style regular expression. \code{\link[stringr:modifiers]{stringr::coll()}} and
17 | \code{\link[stringr:modifiers]{stringr::fixed()}} are also permissible.}
18 | 
19 | \item{negate}{A flag. If \code{TRUE}, inverts the result.}
20 | }
21 | \value{
22 | A character vector.
23 | }
24 | \description{
25 | Vectorized over \code{string}.
26 | }
27 | \examples{
28 | str_detect_all("quick brown fox", c("x", "y", "z"))
29 | str_detect_all(c(".", "-"), ".")
30 | str_detect_all(c(".", "-"), coll("."))
31 | str_detect_all(c(".", "-"), coll("."), negate = TRUE)
32 | str_detect_all(c(".", "-"), c(".", ":"))
33 | str_detect_all(c(".", "-"), coll(c(".", ":")))
34 | str_detect_all("xyzabc", c("a", "c", "z"))
35 | str_detect_all(c("xyzabc", "abcxyz"), c(".b", "^x"))
36 | 
37 | str_detect_any("quick brown fox", c("x", "y", "z"))
38 | str_detect_any(c(".", "-"), ".")
39 | str_detect_any(c(".", "-"), coll("."))
40 | str_detect_any(c(".", "-"), coll("."), negate = TRUE)
41 | str_detect_any(c(".", "-"), c(".", ":"))
42 | str_detect_any(c(".", "-"), coll(c(".", ":")))
43 | str_detect_any(c("xyzabc", "abcxyz"), c(".b", "^x"))
44 | 
45 | }
46 | 


--------------------------------------------------------------------------------
/man/str_elem.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/elem.R
 3 | \name{str_elem}
 4 | \alias{str_elem}
 5 | \title{Extract a single character from a string, using its index.}
 6 | \usage{
 7 | str_elem(string, index)
 8 | }
 9 | \arguments{
10 | \item{string}{A character vector.}
11 | 
12 | \item{index}{An integer. Negative indexing is allowed as in
13 | \code{\link[stringr:str_sub]{stringr::str_sub()}}.}
14 | }
15 | \value{
16 | A one-character string.
17 | }
18 | \description{
19 | If the element does not exist, this function returns the empty string. This
20 | is consistent with \code{\link[stringr:str_sub]{stringr::str_sub()}}. This function is vectorised over
21 | both arguments.
22 | }
23 | \examples{
24 | str_elem(c("abcd", "xyz"), 3)
25 | str_elem("abcd", -2)
26 | }
27 | \seealso{
28 | Other single element extractors: 
29 | \code{\link{str_elems}()},
30 | \code{\link{str_paste_elems}()}
31 | }
32 | \concept{single element extractors}
33 | 


--------------------------------------------------------------------------------
/man/str_elems.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/elem.R
 3 | \name{str_elems}
 4 | \alias{str_elems}
 5 | \title{Extract several single elements from a string.}
 6 | \usage{
 7 | str_elems(string, indices, byrow = TRUE)
 8 | }
 9 | \arguments{
10 | \item{string}{A character vector.}
11 | 
12 | \item{indices}{A vector of integerish values. Negative indexing is allowed as
13 | in \code{\link[stringr:str_sub]{stringr::str_sub()}}.}
14 | 
15 | \item{byrow}{Should the elements be organised in the matrix with one row per
16 | string (\code{byrow = TRUE}, the default) or one column per string (\code{byrow = FALSE}). See examples if you don't understand.}
17 | }
18 | \value{
19 | A character matrix.
20 | }
21 | \description{
22 | Efficiently extract several elements from a string. See \code{\link[=str_elem]{str_elem()}} for
23 | extracting single elements. This function is vectorized over the first
24 | argument.
25 | }
26 | \examples{
27 | string <- c("abc", "def", "ghi", "vwxyz")
28 | str_elems(string, 1:2)
29 | str_elems(string, 1:2, byrow = FALSE)
30 | str_elems(string, c(1, 2, 3, 4, -1))
31 | }
32 | \seealso{
33 | Other single element extractors: 
34 | \code{\link{str_elem}()},
35 | \code{\link{str_paste_elems}()}
36 | }
37 | \concept{single element extractors}
38 | 


--------------------------------------------------------------------------------
/man/str_extract_non_numerics.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/extract-non-nums.R
 3 | \name{str_extract_non_numerics}
 4 | \alias{str_extract_non_numerics}
 5 | \title{Extract non-numbers from a string.}
 6 | \usage{
 7 | str_extract_non_numerics(
 8 |   string,
 9 |   decimals = FALSE,
10 |   leading_decimals = decimals,
11 |   negs = FALSE,
12 |   sci = FALSE,
13 |   big_mark = "",
14 |   commas = FALSE
15 | )
16 | }
17 | \arguments{
18 | \item{string}{A string.}
19 | 
20 | \item{decimals}{Do you want to include the possibility of decimal numbers
21 | (\code{TRUE}) or not (\code{FALSE}, the default).}
22 | 
23 | \item{leading_decimals}{Do you want to allow a leading decimal point to be
24 | the start of a number?}
25 | 
26 | \item{negs}{Do you want to allow negative numbers? Note that double negatives
27 | are not handled here (see the examples).}
28 | 
29 | \item{sci}{Make the search aware of scientific notation e.g. 2e3 is the same
30 | as 2000.}
31 | 
32 | \item{big_mark}{A character. Allow this character to be used as a thousands
33 | separator. This character will be removed from between digits before they
34 | are converted to numeric. You may specify many at once by pasting them
35 | together e.g. \code{big_mark = ",_"} will allow both commas and underscores.
36 | Internally, this will be used inside a \verb{[]} regex block so e.g. \code{"a-z"}
37 | will behave differently to \code{"az-"}. Most common separators (commas, spaces,
38 | underscores) should work fine.}
39 | 
40 | \item{commas}{Deprecated. Use \code{big_mark} instead.}
41 | }
42 | \description{
43 | Extract the non-numeric bits of a string where numbers are optionally defined
44 | with decimals, scientific notation and thousand separators.
45 | }
46 | \details{
47 | \itemize{ \item \code{str_first_non_numeric(...)} is just
48 | \code{str_nth_non_numeric(..., n = 1)}. \item \code{str_last_non_numeric(...)} is just
49 | \code{str_nth_non_numeric(..., n = -1)}. }
50 | }
51 | \examples{
52 | strings <- c(
53 |   "abc123def456", "abc-0.12def.345", "abc.12e4def34.5e9",
54 |   "abc1,100def1,230.5", "abc1,100e3,215def4e1,000"
55 | )
56 | str_extract_non_numerics(strings)
57 | str_extract_non_numerics(strings, decimals = TRUE, leading_decimals = FALSE)
58 | str_extract_non_numerics(strings, decimals = TRUE)
59 | str_extract_non_numerics(strings, big_mark = ",")
60 | str_extract_non_numerics(strings,
61 |   decimals = TRUE, leading_decimals = TRUE,
62 |   sci = TRUE
63 | )
64 | str_extract_non_numerics(strings,
65 |   decimals = TRUE, leading_decimals = TRUE,
66 |   sci = TRUE, big_mark = ",", negs = TRUE
67 | )
68 | str_extract_non_numerics(c("22", "1.2.3"), decimals = TRUE)
69 | }
70 | \seealso{
71 | Other non-numeric extractors: 
72 | \code{\link{str_nth_non_numeric}()}
73 | }
74 | \concept{non-numeric extractors}
75 | 


--------------------------------------------------------------------------------
/man/str_extract_numbers.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/extract-nums.R
 3 | \name{str_extract_numbers}
 4 | \alias{str_extract_numbers}
 5 | \title{Extract numbers from a string.}
 6 | \usage{
 7 | str_extract_numbers(
 8 |   string,
 9 |   decimals = FALSE,
10 |   leading_decimals = decimals,
11 |   negs = FALSE,
12 |   sci = FALSE,
13 |   big_mark = "",
14 |   leave_as_string = FALSE,
15 |   commas = FALSE
16 | )
17 | }
18 | \arguments{
19 | \item{string}{A string.}
20 | 
21 | \item{decimals}{Do you want to include the possibility of decimal numbers
22 | (\code{TRUE}) or not (\code{FALSE}, the default).}
23 | 
24 | \item{leading_decimals}{Do you want to allow a leading decimal point to be
25 | the start of a number?}
26 | 
27 | \item{negs}{Do you want to allow negative numbers? Note that double negatives
28 | are not handled here (see the examples).}
29 | 
30 | \item{sci}{Make the search aware of scientific notation e.g. 2e3 is the same
31 | as 2000.}
32 | 
33 | \item{big_mark}{A character. Allow this character to be used as a thousands
34 | separator. This character will be removed from between digits before they
35 | are converted to numeric. You may specify many at once by pasting them
36 | together e.g. \code{big_mark = ",_"} will allow both commas and underscores.
37 | Internally, this will be used inside a \verb{[]} regex block so e.g. \code{"a-z"}
38 | will behave differently to \code{"az-"}. Most common separators (commas, spaces,
39 | underscores) should work fine.}
40 | 
41 | \item{leave_as_string}{Do you want to return the number as a string (\code{TRUE})
42 | or as numeric (\code{FALSE}, the default)?}
43 | 
44 | \item{commas}{Deprecated. Use \code{big_mark} instead.}
45 | }
46 | \value{
47 | For \code{str_extract_numbers} and \code{str_extract_non_numerics}, a list of
48 | numeric or character vectors, one list element for each element of
49 | \code{string}. For \code{str_nth_number} and \code{str_nth_non_numeric}, a numeric or
50 | character vector the same length as the vector \code{string}.
51 | }
52 | \description{
53 | Extract the numbers from a string, where decimals, scientific notation and
54 | thousand separators are optionally allowed.
55 | }
56 | \details{
57 | If any part of a string contains an ambiguous number (e.g. \verb{1.2.3} would be
58 | ambiguous if \code{decimals = TRUE} (but not otherwise)), the value returned for
59 | that string will be \code{NA} and a \code{warning} will be issued.
60 | 
61 | With scientific notation, it is assumed that the exponent is not a decimal
62 | number e.g. \verb{2e2.4} is unacceptable. Thousand separators, however, are
63 | acceptable in the exponent.
64 | 
65 | Numbers outside the double precision floating point range (i.e. with absolute
66 | value greater than 1.797693e+308) are read as \code{Inf} (or \code{-Inf} if they begin
67 | with a minus sign). This is what \code{base::as.numeric()} does.
68 | }
69 | \examples{
70 | strings <- c(
71 |   "abc123def456", "abc-0.12def.345", "abc.12e4def34.5e9",
72 |   "abc1,100def1,230.5", "abc1,100e3,215def4e1,000"
73 | )
74 | str_extract_numbers(strings)
75 | str_extract_numbers(strings, decimals = TRUE)
76 | str_extract_numbers(strings, decimals = TRUE, leading_decimals = TRUE)
77 | str_extract_numbers(strings, big_mark = ",")
78 | str_extract_numbers(strings,
79 |   decimals = TRUE, leading_decimals = TRUE,
80 |   sci = TRUE
81 | )
82 | str_extract_numbers(strings,
83 |   decimals = TRUE, leading_decimals = TRUE,
84 |   sci = TRUE, big_mark = ",", negs = TRUE
85 | )
86 | str_extract_numbers(strings,
87 |   decimals = TRUE, leading_decimals = FALSE,
88 |   sci = FALSE, big_mark = ",", leave_as_string = TRUE
89 | )
90 | str_extract_numbers(c("22", "1.2.3"), decimals = TRUE)
91 | }
92 | \seealso{
93 | Other numeric extractors: 
94 | \code{\link{str_nth_number}()},
95 | \code{\link{str_nth_number_after_mth}()},
96 | \code{\link{str_nth_number_before_mth}()}
97 | }
98 | \concept{numeric extractors}
99 | 


--------------------------------------------------------------------------------
/man/str_give_ext.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/give-ext.R
 3 | \name{str_give_ext}
 4 | \alias{str_give_ext}
 5 | \title{Ensure a file name has the intended extension.}
 6 | \usage{
 7 | str_give_ext(string, ext, replace = FALSE)
 8 | }
 9 | \arguments{
10 | \item{string}{The intended file name.}
11 | 
12 | \item{ext}{The intended file extension (with or without the ".").}
13 | 
14 | \item{replace}{If the file has an extension already, replace it (or append
15 | the new extension name)?}
16 | }
17 | \value{
18 | A string: the file name in your intended form.
19 | }
20 | \description{
21 | Say you want to ensure a name is fit to be the name of a csv file. Then, if
22 | the input doesn't end with ".csv", this function will tack ".csv" onto the
23 | end of it. This is vectorized over the first argument.
24 | }
25 | \examples{
26 | str_give_ext(c("abc", "abc.csv"), "csv")
27 | str_give_ext("abc.csv", "pdf")
28 | str_give_ext("abc.csv", "pdf", replace = TRUE)
29 | }
30 | \concept{appenders}
31 | 


--------------------------------------------------------------------------------
/man/str_locate_braces.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/locate.R
 3 | \name{str_locate_braces}
 4 | \alias{str_locate_braces}
 5 | \title{Locate the braces in a string.}
 6 | \usage{
 7 | str_locate_braces(string)
 8 | }
 9 | \arguments{
10 | \item{string}{A character vector}
11 | }
12 | \value{
13 | A data frame with 4 columns: \code{string_num}, \code{string}, \code{position} and
14 | \code{brace}. Every extracted brace amount gets its own row in the tibble
15 | detailing the string number and string that it was extracted from, the
16 | position in its string and the brace.
17 | }
18 | \description{
19 | Give the positions of \code{(}, \verb{)}, \code{[}, \verb{]}, \verb{\\\{}, \verb{\\\}} within a string.
20 | }
21 | \examples{
22 | str_locate_braces(c("a{](kkj)})", "ab(]c{}"))
23 | }
24 | \seealso{
25 | Other locators: 
26 | \code{\link{str_locate_nth}()}
27 | }
28 | \concept{locators}
29 | 


--------------------------------------------------------------------------------
/man/str_locate_nth.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/locate.R
 3 | \name{str_locate_nth}
 4 | \alias{str_locate_nth}
 5 | \alias{str_locate_first}
 6 | \alias{str_locate_last}
 7 | \title{Locate the indices of the \code{n}th instance of a pattern.}
 8 | \usage{
 9 | str_locate_nth(string, pattern, n)
10 | 
11 | str_locate_first(string, pattern)
12 | 
13 | str_locate_last(string, pattern)
14 | }
15 | \arguments{
16 | \item{string}{A character vector.}
17 | 
18 | \item{pattern}{The pattern to look for.
19 | 
20 | The default interpretation is a regular expression, as described in
21 | \link[stringi:about_search_regex]{stringi::about_search_regex}.
22 | 
23 | To match a without regular expression (i.e. as a human would), use
24 | \link[stringr:modifiers]{coll()}. For details see \code{\link[stringr:modifiers]{stringr::regex()}}.}
25 | 
26 | \item{n}{A vector of integerish values. Must be either length 1 or
27 | have length equal to the length of \code{string}. Negative indices count from
28 | the back: while \code{n = 1} and \code{n = 2} correspond to first and second, \code{n = -1} and \code{n = -2} correspond to last and second-last. \code{n = 0} will return
29 | \code{NA}.}
30 | }
31 | \value{
32 | A two-column matrix. The \eqn{i}th row of this matrix gives the start
33 | and end indices of the \eqn{n}th instance of \code{pattern} in the \eqn{i}th
34 | element of \code{string}.
35 | }
36 | \description{
37 | The \code{n}th instance of an pattern will cover a series of character
38 | indices. These functions tell you which indices those are. These functions
39 | are vectorised over all arguments.
40 | }
41 | \details{
42 | \itemize{ \item \code{str_locate_first(...)} is just \code{str_locate_nth(..., n = 1)}.
43 | \item \code{str_locate_last(...)} is just \code{str_locate_nth(..., n = -1)}. }
44 | }
45 | \examples{
46 | str_locate_nth(c("abcdabcxyz", "abcabc"), "abc", 2)
47 | str_locate_nth(
48 |   c("This old thing.", "That beautiful thing there."),
49 |   "\\\\w+", c(2, -2)
50 | )
51 | str_locate_nth("abc", "b", c(0, 1, 1, 2))
52 | str_locate_first("abcxyzabc", "abc")
53 | str_locate_last("abcxyzabc", "abc")
54 | }
55 | \seealso{
56 | Other locators: 
57 | \code{\link{str_locate_braces}()}
58 | }
59 | \concept{locators}
60 | 


--------------------------------------------------------------------------------
/man/str_match_arg.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/arg-match.R
 3 | \name{str_match_arg}
 4 | \alias{str_match_arg}
 5 | \alias{match_arg}
 6 | \title{Argument Matching.}
 7 | \usage{
 8 | str_match_arg(
 9 |   arg,
10 |   choices = NULL,
11 |   index = FALSE,
12 |   several_ok = FALSE,
13 |   ignore_case = FALSE
14 | )
15 | 
16 | match_arg(
17 |   arg,
18 |   choices = NULL,
19 |   index = FALSE,
20 |   several_ok = FALSE,
21 |   ignore_case = FALSE
22 | )
23 | }
24 | \arguments{
25 | \item{arg}{A character vector (of length one unless \code{several_ok = TRUE}).}
26 | 
27 | \item{choices}{A character vector of candidate values.}
28 | 
29 | \item{index}{Return the index of the match rather than the match itself?}
30 | 
31 | \item{several_ok}{Allow \code{arg} to have length greater than one to match
32 | several arguments at once?}
33 | 
34 | \item{ignore_case}{Ignore case while matching. If this is \code{TRUE}, the
35 | returned value is the matched element of \code{choices} (with its original
36 | casing).}
37 | }
38 | \description{
39 | Match \code{arg} against a series of candidate \code{choices}. \code{arg} \emph{matches} an
40 | element of \code{choices} if \code{arg} is a prefix of that element.
41 | }
42 | \details{
43 | \code{ERROR}s are thrown when a match is not made and where the match is
44 | ambiguous. However, sometimes ambiguities are inevitable. Consider the case
45 | where \code{choices = c("ab", "abc")}, then there's no way to choose \code{"ab"}
46 | because \code{"ab"} is a prefix for \code{"ab"} and \code{"abc"}. If this is the case, you
47 | need to provide a full match, i.e. using \code{arg = "ab"} will get you \code{"ab"}
48 | without an error, however \code{arg = "a"} will throw an ambiguity error.
49 | 
50 | When \code{choices} is \code{NULL}, the \code{choices} are obtained from a default setting
51 | for the formal argument \code{arg} of the function from which \code{str_match_arg} was
52 | called. This is consistent with \code{base::match.arg()}. See the examples for
53 | details.
54 | 
55 | When \code{arg} and \code{choices} are identical and \code{several_ok = FALSE}, the first
56 | element of \code{choices} is returned. This is consistent with
57 | \code{base::match.arg()}.
58 | 
59 | This function inspired by \code{RSAGA::match.arg.ext()}. Its behaviour is almost
60 | identical (the difference is that \code{RSAGA::match.arg.ext(..., ignore.case = TRUE)} always returns in all lower case; \code{strex::match_arg(..., ignore_case = TRUE)} ignores case while matching but returns the element of \code{choices} in
61 | its original case). \code{RSAGA} is a heavy package to depend upon so
62 | \code{strex::match_arg()} is handy for package developers.
63 | 
64 | This function is designed to be used inside of other functions. It's fine to
65 | use it for other purposes, but the error messages might be a bit weird.
66 | }
67 | \examples{
68 | choices <- c("Apples", "Pears", "Bananas", "Oranges")
69 | match_arg("A", choices)
70 | match_arg("B", choices, index = TRUE)
71 | match_arg(c("a", "b"), choices, several_ok = TRUE, ignore_case = TRUE)
72 | match_arg(c("b", "a"), choices,
73 |   ignore_case = TRUE, index = TRUE,
74 |   several_ok = TRUE
75 | )
76 | myword <- function(w = c("abacus", "baseball", "candy")) {
77 |   w <- match_arg(w)
78 |   w
79 | }
80 | myword("b")
81 | myword()
82 | myword <- function(w = c("abacus", "baseball", "candy")) {
83 |   w <- match_arg(w, several_ok = TRUE)
84 |   w
85 | }
86 | myword("c")
87 | myword()
88 | }
89 | \concept{argument matchers}
90 | 


--------------------------------------------------------------------------------
/man/str_nth_non_numeric.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/extract-non-nums.R
  3 | \name{str_nth_non_numeric}
  4 | \alias{str_nth_non_numeric}
  5 | \alias{str_first_non_numeric}
  6 | \alias{str_last_non_numeric}
  7 | \title{Extract the \code{n}th non-numeric substring from a string.}
  8 | \usage{
  9 | str_nth_non_numeric(
 10 |   string,
 11 |   n,
 12 |   decimals = FALSE,
 13 |   leading_decimals = decimals,
 14 |   negs = FALSE,
 15 |   sci = FALSE,
 16 |   big_mark = "",
 17 |   commas = FALSE
 18 | )
 19 | 
 20 | str_first_non_numeric(
 21 |   string,
 22 |   decimals = FALSE,
 23 |   leading_decimals = decimals,
 24 |   negs = FALSE,
 25 |   sci = FALSE,
 26 |   big_mark = "",
 27 |   commas = FALSE
 28 | )
 29 | 
 30 | str_last_non_numeric(
 31 |   string,
 32 |   decimals = FALSE,
 33 |   leading_decimals = decimals,
 34 |   negs = FALSE,
 35 |   sci = FALSE,
 36 |   big_mark = ""
 37 | )
 38 | }
 39 | \arguments{
 40 | \item{string}{A string.}
 41 | 
 42 | \item{n}{A vector of integerish values. Must be either length 1 or
 43 | have length equal to the length of \code{string}. Negative indices count from
 44 | the back: while \code{n = 1} and \code{n = 2} correspond to first and second, \code{n = -1} and \code{n = -2} correspond to last and second-last. \code{n = 0} will return
 45 | \code{NA}.}
 46 | 
 47 | \item{decimals}{Do you want to include the possibility of decimal numbers
 48 | (\code{TRUE}) or not (\code{FALSE}, the default).}
 49 | 
 50 | \item{leading_decimals}{Do you want to allow a leading decimal point to be
 51 | the start of a number?}
 52 | 
 53 | \item{negs}{Do you want to allow negative numbers? Note that double negatives
 54 | are not handled here (see the examples).}
 55 | 
 56 | \item{sci}{Make the search aware of scientific notation e.g. 2e3 is the same
 57 | as 2000.}
 58 | 
 59 | \item{big_mark}{A character. Allow this character to be used as a thousands
 60 | separator. This character will be removed from between digits before they
 61 | are converted to numeric. You may specify many at once by pasting them
 62 | together e.g. \code{big_mark = ",_"} will allow both commas and underscores.
 63 | Internally, this will be used inside a \verb{[]} regex block so e.g. \code{"a-z"}
 64 | will behave differently to \code{"az-"}. Most common separators (commas, spaces,
 65 | underscores) should work fine.}
 66 | 
 67 | \item{commas}{Deprecated. Use \code{big_mark} instead.}
 68 | }
 69 | \description{
 70 | Extract the \code{n}th non-numeric bit of a string where numbers are optionally
 71 | defined with decimals, scientific notation and thousand separators.
 72 | \itemize{ \item \code{str_first_non_numeric(...)} is just
 73 | \code{str_nth_non_numeric(..., n = 1)}. \item \code{str_last_non_numeric(...)} is
 74 | just \code{str_nth_non_numeric(..., n = -1)}. }
 75 | }
 76 | \examples{
 77 | strings <- c(
 78 |   "abc123def456", "abc-0.12def.345", "abc.12e4def34.5e9",
 79 |   "abc1,100def1,230.5", "abc1,100e3,215def4e1,000"
 80 | )
 81 | str_nth_non_numeric(strings, n = 2)
 82 | str_nth_non_numeric(strings, n = -2, decimals = TRUE)
 83 | str_first_non_numeric(strings, decimals = TRUE, leading_decimals = FALSE)
 84 | str_last_non_numeric(strings, big_mark = ",")
 85 | str_nth_non_numeric(strings,
 86 |   n = 1, decimals = TRUE, leading_decimals = TRUE,
 87 |   sci = TRUE
 88 | )
 89 | str_first_non_numeric(strings,
 90 |   decimals = TRUE, leading_decimals = TRUE,
 91 |   sci = TRUE, big_mark = ",", negs = TRUE
 92 | )
 93 | str_first_non_numeric(c("22", "1.2.3"), decimals = TRUE)
 94 | }
 95 | \seealso{
 96 | Other non-numeric extractors: 
 97 | \code{\link{str_extract_non_numerics}()}
 98 | }
 99 | \concept{non-numeric extractors}
100 | 


--------------------------------------------------------------------------------
/man/str_nth_number.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/extract-nums.R
  3 | \name{str_nth_number}
  4 | \alias{str_nth_number}
  5 | \alias{str_first_number}
  6 | \alias{str_last_number}
  7 | \title{Extract the \code{n}th number from a string.}
  8 | \usage{
  9 | str_nth_number(
 10 |   string,
 11 |   n,
 12 |   decimals = FALSE,
 13 |   leading_decimals = decimals,
 14 |   negs = FALSE,
 15 |   sci = FALSE,
 16 |   big_mark = "",
 17 |   leave_as_string = FALSE,
 18 |   commas = FALSE
 19 | )
 20 | 
 21 | str_first_number(
 22 |   string,
 23 |   decimals = FALSE,
 24 |   leading_decimals = decimals,
 25 |   negs = FALSE,
 26 |   sci = FALSE,
 27 |   big_mark = "",
 28 |   leave_as_string = FALSE,
 29 |   commas = FALSE
 30 | )
 31 | 
 32 | str_last_number(
 33 |   string,
 34 |   decimals = FALSE,
 35 |   leading_decimals = decimals,
 36 |   negs = FALSE,
 37 |   sci = FALSE,
 38 |   big_mark = "",
 39 |   leave_as_string = FALSE,
 40 |   commas = FALSE
 41 | )
 42 | }
 43 | \arguments{
 44 | \item{string}{A string.}
 45 | 
 46 | \item{n}{A vector of integerish values. Must be either length 1 or
 47 | have length equal to the length of \code{string}. Negative indices count from
 48 | the back: while \code{n = 1} and \code{n = 2} correspond to first and second, \code{n = -1} and \code{n = -2} correspond to last and second-last. \code{n = 0} will return
 49 | \code{NA}.}
 50 | 
 51 | \item{decimals}{Do you want to include the possibility of decimal numbers
 52 | (\code{TRUE}) or not (\code{FALSE}, the default).}
 53 | 
 54 | \item{leading_decimals}{Do you want to allow a leading decimal point to be
 55 | the start of a number?}
 56 | 
 57 | \item{negs}{Do you want to allow negative numbers? Note that double negatives
 58 | are not handled here (see the examples).}
 59 | 
 60 | \item{sci}{Make the search aware of scientific notation e.g. 2e3 is the same
 61 | as 2000.}
 62 | 
 63 | \item{big_mark}{A character. Allow this character to be used as a thousands
 64 | separator. This character will be removed from between digits before they
 65 | are converted to numeric. You may specify many at once by pasting them
 66 | together e.g. \code{big_mark = ",_"} will allow both commas and underscores.
 67 | Internally, this will be used inside a \verb{[]} regex block so e.g. \code{"a-z"}
 68 | will behave differently to \code{"az-"}. Most common separators (commas, spaces,
 69 | underscores) should work fine.}
 70 | 
 71 | \item{leave_as_string}{Do you want to return the number as a string (\code{TRUE})
 72 | or as numeric (\code{FALSE}, the default)?}
 73 | 
 74 | \item{commas}{Deprecated. Use \code{big_mark} instead.}
 75 | }
 76 | \value{
 77 | A numeric vector (or a character vector if \code{leave_as_string = TRUE}).
 78 | }
 79 | \description{
 80 | Extract the \code{n}th number from a string, where decimals, scientific notation
 81 | and thousand separators are optionally allowed.
 82 | }
 83 | \details{
 84 | \itemize{ \item \code{str_first_number(...)} is just \code{str_nth_number(..., n = 1)}.
 85 | \item \code{str_last_number(...)} is just \code{str_nth_number(..., n = -1)}. }
 86 | 
 87 | For a detailed explanation of the number extraction, see
 88 | \code{\link[=str_extract_numbers]{str_extract_numbers()}}.
 89 | }
 90 | \examples{
 91 | strings <- c(
 92 |   "abc123def456", "abc-0.12def.345", "abc.12e4def34.5e9",
 93 |   "abc1,100def1,230.5", "abc1,100e3,215def4e1,000"
 94 | )
 95 | str_nth_number(strings, n = 2)
 96 | str_nth_number(strings, n = -2, decimals = TRUE)
 97 | str_first_number(strings, decimals = TRUE, leading_decimals = TRUE)
 98 | str_last_number(strings, big_mark = ",")
 99 | str_nth_number(strings,
100 |   n = 1, decimals = TRUE, leading_decimals = TRUE,
101 |   sci = TRUE
102 | )
103 | str_first_number(strings,
104 |   decimals = TRUE, leading_decimals = TRUE,
105 |   sci = TRUE, big_mark = ",", negs = TRUE
106 | )
107 | str_last_number(strings,
108 |   decimals = TRUE, leading_decimals = FALSE,
109 |   sci = FALSE, big_mark = ",", negs = TRUE, leave_as_string = TRUE
110 | )
111 | str_first_number(c("22", "1.2.3"), decimals = TRUE)
112 | }
113 | \seealso{
114 | Other numeric extractors: 
115 | \code{\link{str_extract_numbers}()},
116 | \code{\link{str_nth_number_after_mth}()},
117 | \code{\link{str_nth_number_before_mth}()}
118 | }
119 | \concept{numeric extractors}
120 | 


--------------------------------------------------------------------------------
/man/str_nth_number_after_mth.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/num-after.R
  3 | \name{str_nth_number_after_mth}
  4 | \alias{str_nth_number_after_mth}
  5 | \alias{str_nth_number_after_first}
  6 | \alias{str_nth_number_after_last}
  7 | \alias{str_first_number_after_mth}
  8 | \alias{str_last_number_after_mth}
  9 | \alias{str_first_number_after_first}
 10 | \alias{str_first_number_after_last}
 11 | \alias{str_last_number_after_first}
 12 | \alias{str_last_number_after_last}
 13 | \title{Find the \code{n}th number after the \code{m}th occurrence of a pattern.}
 14 | \usage{
 15 | str_nth_number_after_mth(
 16 |   string,
 17 |   pattern,
 18 |   n,
 19 |   m,
 20 |   decimals = FALSE,
 21 |   leading_decimals = decimals,
 22 |   negs = FALSE,
 23 |   sci = FALSE,
 24 |   big_mark = "",
 25 |   leave_as_string = FALSE,
 26 |   commas = FALSE
 27 | )
 28 | 
 29 | str_nth_number_after_first(
 30 |   string,
 31 |   pattern,
 32 |   n,
 33 |   decimals = FALSE,
 34 |   leading_decimals = decimals,
 35 |   negs = FALSE,
 36 |   sci = FALSE,
 37 |   big_mark = "",
 38 |   leave_as_string = FALSE,
 39 |   commas = FALSE
 40 | )
 41 | 
 42 | str_nth_number_after_last(
 43 |   string,
 44 |   pattern,
 45 |   n,
 46 |   decimals = FALSE,
 47 |   leading_decimals = decimals,
 48 |   negs = FALSE,
 49 |   sci = FALSE,
 50 |   big_mark = "",
 51 |   leave_as_string = FALSE,
 52 |   commas = FALSE
 53 | )
 54 | 
 55 | str_first_number_after_mth(
 56 |   string,
 57 |   pattern,
 58 |   m,
 59 |   decimals = FALSE,
 60 |   leading_decimals = decimals,
 61 |   negs = FALSE,
 62 |   sci = FALSE,
 63 |   big_mark = "",
 64 |   leave_as_string = FALSE,
 65 |   commas = FALSE
 66 | )
 67 | 
 68 | str_last_number_after_mth(
 69 |   string,
 70 |   pattern,
 71 |   m,
 72 |   decimals = FALSE,
 73 |   leading_decimals = decimals,
 74 |   negs = FALSE,
 75 |   sci = FALSE,
 76 |   big_mark = "",
 77 |   leave_as_string = FALSE,
 78 |   commas = FALSE
 79 | )
 80 | 
 81 | str_first_number_after_first(
 82 |   string,
 83 |   pattern,
 84 |   decimals = FALSE,
 85 |   leading_decimals = decimals,
 86 |   negs = FALSE,
 87 |   sci = FALSE,
 88 |   big_mark = "",
 89 |   leave_as_string = FALSE,
 90 |   commas = FALSE
 91 | )
 92 | 
 93 | str_first_number_after_last(
 94 |   string,
 95 |   pattern,
 96 |   decimals = FALSE,
 97 |   leading_decimals = decimals,
 98 |   negs = FALSE,
 99 |   sci = FALSE,
100 |   big_mark = "",
101 |   leave_as_string = FALSE,
102 |   commas = FALSE
103 | )
104 | 
105 | str_last_number_after_first(
106 |   string,
107 |   pattern,
108 |   decimals = FALSE,
109 |   leading_decimals = decimals,
110 |   negs = FALSE,
111 |   sci = FALSE,
112 |   big_mark = "",
113 |   leave_as_string = FALSE,
114 |   commas = FALSE
115 | )
116 | 
117 | str_last_number_after_last(
118 |   string,
119 |   pattern,
120 |   decimals = FALSE,
121 |   leading_decimals = decimals,
122 |   negs = FALSE,
123 |   sci = FALSE,
124 |   big_mark = "",
125 |   leave_as_string = FALSE,
126 |   commas = FALSE
127 | )
128 | }
129 | \arguments{
130 | \item{string}{A character vector.}
131 | 
132 | \item{pattern}{The pattern to look for.
133 | 
134 | The default interpretation is a regular expression, as described in
135 | \link[stringi:about_search_regex]{stringi::about_search_regex}.
136 | 
137 | To match a without regular expression (i.e. as a human would), use
138 | \link[stringr:modifiers]{coll()}. For details see \code{\link[stringr:modifiers]{stringr::regex()}}.}
139 | 
140 | \item{n, m}{Vectors of integerish values. Must be either length 1 or have
141 | length equal to the length of \code{string}. Negative indices count from the
142 | back: while \code{1} and \code{2} correspond to first and second, \code{-1} and \code{-2}
143 | correspond to last and second-last. \code{0} will return \code{NA}.}
144 | 
145 | \item{decimals}{Do you want to include the possibility of decimal numbers
146 | (\code{TRUE}) or not (\code{FALSE}, the default).}
147 | 
148 | \item{leading_decimals}{Do you want to allow a leading decimal point to be
149 | the start of a number?}
150 | 
151 | \item{negs}{Do you want to allow negative numbers? Note that double negatives
152 | are not handled here (see the examples).}
153 | 
154 | \item{sci}{Make the search aware of scientific notation e.g. 2e3 is the same
155 | as 2000.}
156 | 
157 | \item{big_mark}{A character. Allow this character to be used as a thousands
158 | separator. This character will be removed from between digits before they
159 | are converted to numeric. You may specify many at once by pasting them
160 | together e.g. \code{big_mark = ",_"} will allow both commas and underscores.
161 | Internally, this will be used inside a \verb{[]} regex block so e.g. \code{"a-z"}
162 | will behave differently to \code{"az-"}. Most common separators (commas, spaces,
163 | underscores) should work fine.}
164 | 
165 | \item{leave_as_string}{Do you want to return the number as a string (\code{TRUE})
166 | or as numeric (\code{FALSE}, the default)?}
167 | 
168 | \item{commas}{Deprecated. Use \code{big_mark} instead.}
169 | }
170 | \value{
171 | A numeric or character vector.
172 | }
173 | \description{
174 | Given a string, a pattern and natural numbers \code{n} and \code{m}, find the \code{n}th
175 | number after the \code{m}th occurrence of the pattern.
176 | }
177 | \examples{
178 | string <- c(
179 |   "abc1abc2abc3abc4abc5abc6abc7abc8abc9",
180 |   "abc1def2ghi3abc4def5ghi6abc7def8ghi9"
181 | )
182 | str_nth_number_after_mth(string, "abc", 1, 3)
183 | str_nth_number_after_mth(string, "abc", 2, 3)
184 | str_nth_number_after_first(string, "abc", 2)
185 | str_nth_number_after_last(string, "abc", -1)
186 | str_first_number_after_mth(string, "abc", 2)
187 | str_last_number_after_mth(string, "abc", 1)
188 | str_first_number_after_first(string, "abc")
189 | str_first_number_after_last(string, "abc")
190 | str_last_number_after_first(string, "abc")
191 | str_last_number_after_last(string, "abc")
192 | }
193 | \seealso{
194 | Other numeric extractors: 
195 | \code{\link{str_extract_numbers}()},
196 | \code{\link{str_nth_number}()},
197 | \code{\link{str_nth_number_before_mth}()}
198 | }
199 | \concept{numeric extractors}
200 | 


--------------------------------------------------------------------------------
/man/str_nth_number_before_mth.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/num-before.R
  3 | \name{str_nth_number_before_mth}
  4 | \alias{str_nth_number_before_mth}
  5 | \alias{str_nth_number_before_first}
  6 | \alias{str_nth_number_before_last}
  7 | \alias{str_first_number_before_mth}
  8 | \alias{str_last_number_before_mth}
  9 | \alias{str_first_number_before_first}
 10 | \alias{str_first_number_before_last}
 11 | \alias{str_last_number_before_first}
 12 | \alias{str_last_number_before_last}
 13 | \title{Find the \code{n}th number before the \code{m}th occurrence of a pattern.}
 14 | \usage{
 15 | str_nth_number_before_mth(
 16 |   string,
 17 |   pattern,
 18 |   n,
 19 |   m,
 20 |   decimals = FALSE,
 21 |   leading_decimals = decimals,
 22 |   negs = FALSE,
 23 |   sci = FALSE,
 24 |   big_mark = "",
 25 |   leave_as_string = FALSE,
 26 |   commas = FALSE
 27 | )
 28 | 
 29 | str_nth_number_before_first(
 30 |   string,
 31 |   pattern,
 32 |   n,
 33 |   decimals = FALSE,
 34 |   leading_decimals = decimals,
 35 |   negs = FALSE,
 36 |   sci = FALSE,
 37 |   big_mark = "",
 38 |   leave_as_string = FALSE,
 39 |   commas = FALSE
 40 | )
 41 | 
 42 | str_nth_number_before_last(
 43 |   string,
 44 |   pattern,
 45 |   n,
 46 |   decimals = FALSE,
 47 |   leading_decimals = decimals,
 48 |   negs = FALSE,
 49 |   sci = FALSE,
 50 |   big_mark = "",
 51 |   leave_as_string = FALSE,
 52 |   commas = FALSE
 53 | )
 54 | 
 55 | str_first_number_before_mth(
 56 |   string,
 57 |   pattern,
 58 |   m,
 59 |   decimals = FALSE,
 60 |   leading_decimals = decimals,
 61 |   negs = FALSE,
 62 |   sci = FALSE,
 63 |   big_mark = "",
 64 |   leave_as_string = FALSE,
 65 |   commas = FALSE
 66 | )
 67 | 
 68 | str_last_number_before_mth(
 69 |   string,
 70 |   pattern,
 71 |   m,
 72 |   decimals = FALSE,
 73 |   leading_decimals = decimals,
 74 |   negs = FALSE,
 75 |   sci = FALSE,
 76 |   big_mark = "",
 77 |   leave_as_string = FALSE,
 78 |   commas = FALSE
 79 | )
 80 | 
 81 | str_first_number_before_first(
 82 |   string,
 83 |   pattern,
 84 |   decimals = FALSE,
 85 |   leading_decimals = decimals,
 86 |   negs = FALSE,
 87 |   sci = FALSE,
 88 |   big_mark = "",
 89 |   leave_as_string = FALSE,
 90 |   commas = FALSE
 91 | )
 92 | 
 93 | str_first_number_before_last(
 94 |   string,
 95 |   pattern,
 96 |   decimals = FALSE,
 97 |   leading_decimals = decimals,
 98 |   negs = FALSE,
 99 |   sci = FALSE,
100 |   big_mark = "",
101 |   leave_as_string = FALSE,
102 |   commas = FALSE
103 | )
104 | 
105 | str_last_number_before_first(
106 |   string,
107 |   pattern,
108 |   decimals = FALSE,
109 |   leading_decimals = decimals,
110 |   negs = FALSE,
111 |   sci = FALSE,
112 |   big_mark = "",
113 |   leave_as_string = FALSE,
114 |   commas = FALSE
115 | )
116 | 
117 | str_last_number_before_last(
118 |   string,
119 |   pattern,
120 |   decimals = FALSE,
121 |   leading_decimals = decimals,
122 |   negs = FALSE,
123 |   sci = FALSE,
124 |   big_mark = "",
125 |   leave_as_string = FALSE,
126 |   commas = FALSE
127 | )
128 | }
129 | \arguments{
130 | \item{string}{A character vector.}
131 | 
132 | \item{pattern}{The pattern to look for.
133 | 
134 | The default interpretation is a regular expression, as described in
135 | \link[stringi:about_search_regex]{stringi::about_search_regex}.
136 | 
137 | To match a without regular expression (i.e. as a human would), use
138 | \link[stringr:modifiers]{coll()}. For details see \code{\link[stringr:modifiers]{stringr::regex()}}.}
139 | 
140 | \item{n, m}{Vectors of integerish values. Must be either length 1 or have
141 | length equal to the length of \code{string}. Negative indices count from the
142 | back: while \code{1} and \code{2} correspond to first and second, \code{-1} and \code{-2}
143 | correspond to last and second-last. \code{0} will return \code{NA}.}
144 | 
145 | \item{decimals}{Do you want to include the possibility of decimal numbers
146 | (\code{TRUE}) or not (\code{FALSE}, the default).}
147 | 
148 | \item{leading_decimals}{Do you want to allow a leading decimal point to be
149 | the start of a number?}
150 | 
151 | \item{negs}{Do you want to allow negative numbers? Note that double negatives
152 | are not handled here (see the examples).}
153 | 
154 | \item{sci}{Make the search aware of scientific notation e.g. 2e3 is the same
155 | as 2000.}
156 | 
157 | \item{big_mark}{A character. Allow this character to be used as a thousands
158 | separator. This character will be removed from between digits before they
159 | are converted to numeric. You may specify many at once by pasting them
160 | together e.g. \code{big_mark = ",_"} will allow both commas and underscores.
161 | Internally, this will be used inside a \verb{[]} regex block so e.g. \code{"a-z"}
162 | will behave differently to \code{"az-"}. Most common separators (commas, spaces,
163 | underscores) should work fine.}
164 | 
165 | \item{leave_as_string}{Do you want to return the number as a string (\code{TRUE})
166 | or as numeric (\code{FALSE}, the default)?}
167 | 
168 | \item{commas}{Deprecated. Use \code{big_mark} instead.}
169 | }
170 | \value{
171 | A numeric or character vector.
172 | }
173 | \description{
174 | Given a string, a pattern and natural numbers \code{n} and \code{m}, find the \code{n}th
175 | number that comes before the \code{m}th occurrence of the pattern.
176 | }
177 | \examples{
178 | string <- c(
179 |   "abc1abc2abc3abc4def5abc6abc7abc8abc9",
180 |   "abc1def2ghi3abc4def5ghi6abc7def8ghi9"
181 | )
182 | str_nth_number_before_mth(string, "def", 1, 1)
183 | str_nth_number_before_mth(string, "abc", 2, 3)
184 | str_nth_number_before_first(string, "def", 2)
185 | str_nth_number_before_last(string, "def", -1)
186 | str_first_number_before_mth(string, "abc", 2)
187 | str_last_number_before_mth(string, "def", 1)
188 | str_first_number_before_first(string, "def")
189 | str_first_number_before_last(string, "def")
190 | str_last_number_before_first(string, "def")
191 | str_last_number_before_last(string, "def")
192 | }
193 | \seealso{
194 | Other numeric extractors: 
195 | \code{\link{str_extract_numbers}()},
196 | \code{\link{str_nth_number}()},
197 | \code{\link{str_nth_number_after_mth}()}
198 | }
199 | \concept{numeric extractors}
200 | 


--------------------------------------------------------------------------------
/man/str_paste_elems.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/elem.R
 3 | \name{str_paste_elems}
 4 | \alias{str_paste_elems}
 5 | \title{Extract single elements of a string and paste them together.}
 6 | \usage{
 7 | str_paste_elems(string, indices, sep = "")
 8 | }
 9 | \arguments{
10 | \item{string}{A character vector.}
11 | 
12 | \item{indices}{A vector of integerish values. Negative indexing is allowed as
13 | in \code{\link[stringr:str_sub]{stringr::str_sub()}}.}
14 | 
15 | \item{sep}{A string. The separator for pasting \code{string} elements together.}
16 | }
17 | \value{
18 | A character vector.
19 | }
20 | \description{
21 | This is a quick way around doing a call to \code{\link[=str_elems]{str_elems()}} followed by a call
22 | of \code{apply(..., paste)}.
23 | }
24 | \details{
25 | Elements that don't exist e.g. element 5 of \code{"abc"} are ignored.
26 | }
27 | \examples{
28 | string <- c("abc", "def", "ghi", "vwxyz")
29 | str_paste_elems(string, 1:2)
30 | str_paste_elems(string, c(1, 2, 3, 4, -1))
31 | str_paste_elems("abc", c(1, 5, 55, 43, 3))
32 | }
33 | \seealso{
34 | Other single element extractors: 
35 | \code{\link{str_elem}()},
36 | \code{\link{str_elems}()}
37 | }
38 | \concept{single element extractors}
39 | 


--------------------------------------------------------------------------------
/man/str_remove_quoted.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/remove.R
 3 | \name{str_remove_quoted}
 4 | \alias{str_remove_quoted}
 5 | \title{Remove the quoted parts of a string.}
 6 | \usage{
 7 | str_remove_quoted(string)
 8 | }
 9 | \arguments{
10 | \item{string}{A character vector.}
11 | }
12 | \value{
13 | A character vector.
14 | }
15 | \description{
16 | If any parts of a string are quoted (between quotation marks), remove those
17 | parts of the string, including the quotes. Run the examples and you'll know
18 | exactly how this function works.
19 | }
20 | \examples{
21 | string <- "\"abc\"67a\'dk\'f"
22 | cat(string)
23 | str_remove_quoted(string)
24 | }
25 | \seealso{
26 | Other removers: 
27 | \code{\link{str_singleize}()},
28 | \code{\link{str_trim_anything}()}
29 | }
30 | \concept{removers}
31 | 


--------------------------------------------------------------------------------
/man/str_singleize.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/singleize.R
 3 | \name{str_singleize}
 4 | \alias{str_singleize}
 5 | \title{Remove back-to-back duplicates of a pattern in a string.}
 6 | \usage{
 7 | str_singleize(string, pattern)
 8 | }
 9 | \arguments{
10 | \item{string}{A character vector.}
11 | 
12 | \item{pattern}{The pattern to look for.
13 | 
14 | The default interpretation is a regular expression, as described in
15 | \link[stringi:about_search_regex]{stringi::about_search_regex}.
16 | 
17 | To match a without regular expression (i.e. as a human would), use
18 | \link[stringr:modifiers]{coll()}. For details see \code{\link[stringr:modifiers]{stringr::regex()}}.}
19 | }
20 | \value{
21 | A character vector.
22 | }
23 | \description{
24 | If a string contains a given pattern duplicated back-to-back a number of
25 | times, remove that duplication, leaving the pattern appearing once in that
26 | position (works if the pattern is duplicated in different parts of a string,
27 | removing all instances of duplication). This is vectorized over string and
28 | pattern.
29 | }
30 | \examples{
31 | str_singleize("abc//def", "/")
32 | str_singleize("abababcabab", "ab")
33 | str_singleize(c("abab", "cdcd"), "cd")
34 | str_singleize(c("abab", "cdcd"), c("ab", "cd"))
35 | }
36 | \seealso{
37 | Other removers: 
38 | \code{\link{str_remove_quoted}()},
39 | \code{\link{str_trim_anything}()}
40 | }
41 | \concept{removers}
42 | 


--------------------------------------------------------------------------------
/man/str_split_by_numbers.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/split-by-nums.R
 3 | \name{str_split_by_numbers}
 4 | \alias{str_split_by_numbers}
 5 | \title{Split a string by its numeric characters.}
 6 | \usage{
 7 | str_split_by_numbers(
 8 |   string,
 9 |   decimals = FALSE,
10 |   leading_decimals = FALSE,
11 |   negs = FALSE,
12 |   sci = FALSE,
13 |   big_mark = "",
14 |   commas = FALSE
15 | )
16 | }
17 | \arguments{
18 | \item{string}{A string.}
19 | 
20 | \item{decimals}{Do you want to include the possibility of decimal numbers
21 | (\code{TRUE}) or not (\code{FALSE}, the default).}
22 | 
23 | \item{leading_decimals}{Do you want to allow a leading decimal point to be
24 | the start of a number?}
25 | 
26 | \item{negs}{Do you want to allow negative numbers? Note that double negatives
27 | are not handled here (see the examples).}
28 | 
29 | \item{sci}{Make the search aware of scientific notation e.g. 2e3 is the same
30 | as 2000.}
31 | 
32 | \item{big_mark}{A character. Allow this character to be used as a thousands
33 | separator. This character will be removed from between digits before they
34 | are converted to numeric. You may specify many at once by pasting them
35 | together e.g. \code{big_mark = ",_"} will allow both commas and underscores.
36 | Internally, this will be used inside a \verb{[]} regex block so e.g. \code{"a-z"}
37 | will behave differently to \code{"az-"}. Most common separators (commas, spaces,
38 | underscores) should work fine.}
39 | 
40 | \item{commas}{Deprecated. Use \code{big_mark} instead.}
41 | }
42 | \value{
43 | A list of character vectors.
44 | }
45 | \description{
46 | Break a string wherever you go from a numeric character to a non-numeric or
47 | vice-versa. Keep the whole string, just split it up. Vectorised over
48 | \code{string}.
49 | }
50 | \examples{
51 | str_split_by_numbers(c("abc123def456.789gh", "a1b2c344"))
52 | str_split_by_numbers("abc123def456.789gh", decimals = TRUE)
53 | str_split_by_numbers(c("22", "1.2.3"), decimals = TRUE)
54 | }
55 | \seealso{
56 | Other splitters: 
57 | \code{\link{str_split_camel_case}()}
58 | }
59 | \concept{splitters}
60 | 


--------------------------------------------------------------------------------
/man/str_split_camel_case.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/camel-case.R
 3 | \name{str_split_camel_case}
 4 | \alias{str_split_camel_case}
 5 | \title{Split a string based on CamelCase.}
 6 | \usage{
 7 | str_split_camel_case(string, lower = FALSE)
 8 | }
 9 | \arguments{
10 | \item{string}{A character vector.}
11 | 
12 | \item{lower}{Do you want the output to be all lower case (or as is)?}
13 | }
14 | \value{
15 | A list of character vectors, one list element for each element of
16 | \code{string}.
17 | }
18 | \description{
19 | Vectorized over \code{string}.
20 | }
21 | \examples{
22 | str_split_camel_case(c("RoryNolan", "NaomiFlagg", "DepartmentOfSillyHats"))
23 | str_split_camel_case(c("RoryNolan", "NaomiFlagg", "DepartmentOfSillyHats",
24 |   lower = TRUE
25 | ))
26 | }
27 | \references{
28 | Adapted from Ramnath Vaidyanathan's answer at
29 | http://stackoverflow.com/questions/8406974/splitting-camelcase-in-r.
30 | }
31 | \seealso{
32 | Other splitters: 
33 | \code{\link{str_split_by_numbers}()}
34 | }
35 | \concept{splitters}
36 | 


--------------------------------------------------------------------------------
/man/str_to_vec.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/to-vec.R
 3 | \name{str_to_vec}
 4 | \alias{str_to_vec}
 5 | \title{Convert a string to a vector of characters}
 6 | \usage{
 7 | str_to_vec(string)
 8 | }
 9 | \arguments{
10 | \item{string}{A character vector.}
11 | }
12 | \value{
13 | A character vector.
14 | }
15 | \description{
16 | Go from a string to a vector whose \eqn{i}th element is the \eqn{i}th
17 | character in the string.
18 | }
19 | \examples{
20 | str_to_vec("abcdef")
21 | }
22 | \concept{converters}
23 | 


--------------------------------------------------------------------------------
/man/str_trim_anything.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/trim.R
 3 | \name{str_trim_anything}
 4 | \alias{str_trim_anything}
 5 | \title{Trim something other than whitespace}
 6 | \usage{
 7 | str_trim_anything(string, pattern, side = "both")
 8 | }
 9 | \arguments{
10 | \item{string}{A character vector.}
11 | 
12 | \item{pattern}{The pattern to look for.
13 | 
14 | The default interpretation is a regular expression, as described in
15 | \link[stringi:about_search_regex]{stringi::about_search_regex}.
16 | 
17 | To match a without regular expression (i.e. as a human would), use
18 | \link[stringr:modifiers]{coll()}. For details see \code{\link[stringr:modifiers]{stringr::regex()}}.}
19 | 
20 | \item{side}{Which side do you want to trim from? \code{"both"} is the
21 | default, but you can also have just either \code{"left"} or \code{"right"}
22 | (or optionally the shortened \code{"b"}, \code{"l"} and \code{"r"}).}
23 | }
24 | \value{
25 | A string.
26 | }
27 | \description{
28 | The \code{stringi} and \code{stringr} packages let you trim whitespace, but
29 | what if you want to trim something else from either (or both) side(s) of a
30 | string? This function lets you select which pattern to trim and from which
31 | side(s).
32 | }
33 | \examples{
34 | str_trim_anything("..abcd.", ".", "left")
35 | str_trim_anything("..abcd.", coll("."), "left")
36 | str_trim_anything("-ghi--", "-", "both")
37 | str_trim_anything("-ghi--", "-")
38 | str_trim_anything("-ghi--", "-", "right")
39 | str_trim_anything("-ghi--", "--")
40 | str_trim_anything("-ghi--", "i-+")
41 | }
42 | \seealso{
43 | Other removers: 
44 | \code{\link{str_remove_quoted}()},
45 | \code{\link{str_singleize}()}
46 | }
47 | \concept{removers}
48 | 


--------------------------------------------------------------------------------
/man/strex.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/strex-package.R
 3 | \docType{package}
 4 | \name{strex}
 5 | \alias{strex}
 6 | \alias{strex-package}
 7 | \title{\code{strex}: extra string manipulation functions}
 8 | \description{
 9 | There are some things that I wish were easier with the \code{stringr} or \code{stringi}
10 | packages. The foremost of these is the extraction of numbers from strings.
11 | \code{stringr} makes you figure out the regex for yourself; \code{strex} takes care of
12 | this for you. There are many more useful functionalities in \code{strex}. In
13 | particular, there's a \code{match_arg()} function which is more flexible than the
14 | base \code{match.arg()}. Contributions to this package are encouraged: it is
15 | intended as a miscellany of string manipulation functions which cannot be
16 | found in \code{stringi} or \code{stringr}.
17 | }
18 | \references{
19 | Rory Nolan and Sergi Padilla-Parra (2017). filesstrings: An R
20 | package for file and string manipulation. The Journal of Open Source
21 | Software, 2(14).  \doi{10.21105/joss.00260}.
22 | }
23 | \seealso{
24 | Useful links:
25 | \itemize{
26 |   \item \url{https://rorynolan.github.io/strex/}
27 |   \item \url{https://github.com/rorynolan/strex}
28 |   \item Report bugs at \url{https://github.com/rorynolan/strex/issues}
29 | }
30 | 
31 | }
32 | \author{
33 | \strong{Maintainer}: Rory Nolan \email{rorynoolan@gmail.com} (\href{https://orcid.org/0000-0002-5239-4043}{ORCID})
34 | 
35 | }
36 | 


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-120x120.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rorynolan/strex/84401bc145dbc63670c27c3c91376546117724ed/pkgdown/favicon/apple-touch-icon-120x120.png


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-60x60.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rorynolan/strex/84401bc145dbc63670c27c3c91376546117724ed/pkgdown/favicon/apple-touch-icon-60x60.png


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-76x76.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rorynolan/strex/84401bc145dbc63670c27c3c91376546117724ed/pkgdown/favicon/apple-touch-icon-76x76.png


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rorynolan/strex/84401bc145dbc63670c27c3c91376546117724ed/pkgdown/favicon/apple-touch-icon.png


--------------------------------------------------------------------------------
/pkgdown/favicon/favicon-16x16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rorynolan/strex/84401bc145dbc63670c27c3c91376546117724ed/pkgdown/favicon/favicon-16x16.png


--------------------------------------------------------------------------------
/pkgdown/favicon/favicon-32x32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rorynolan/strex/84401bc145dbc63670c27c3c91376546117724ed/pkgdown/favicon/favicon-32x32.png


--------------------------------------------------------------------------------
/pkgdown/favicon/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rorynolan/strex/84401bc145dbc63670c27c3c91376546117724ed/pkgdown/favicon/favicon.ico


--------------------------------------------------------------------------------
/src/.gitignore:
--------------------------------------------------------------------------------
1 | *.o
2 | *.so
3 | *.dll
4 | 


--------------------------------------------------------------------------------
/src/arg-match.c:
--------------------------------------------------------------------------------
 1 | #include <stdbool.h>
 2 | #include <stdlib.h>
 3 | #include <string.h>
 4 | 
 5 | #include <Rinternals.h>
 6 | 
 7 | 
 8 | bool C_is_prefix(const char *full, const char *pre) {
 9 |   return strncmp(pre, full, strlen(pre)) == 0;
10 | }
11 | 
12 | R_xlen_t C_count_if(bool* x, R_xlen_t x_len, R_xlen_t* first_true) {
13 |   R_xlen_t counter = 0;
14 |   *first_true = -1;
15 |   for (R_xlen_t i = 0; i != x_len; ++i) {
16 |     if (x[i]) {
17 |       if (counter == 0) *first_true = i;
18 |       counter++;
19 |     }
20 |   }
21 |   return counter;
22 | }
23 | 
24 | int C_match_arg_index1(const char *arg, SEXP choices) {
25 |   R_xlen_t n_choices = Rf_xlength(choices);
26 |   bool *is_pre = (bool*) malloc(n_choices * sizeof(bool));
27 |   R_xlen_t first_true = -1;
28 |   for (R_xlen_t i = 0; i != n_choices; ++i)
29 |     is_pre[i] = C_is_prefix(CHAR(STRING_ELT(choices, i)), arg);
30 |   int n_matches = C_count_if(is_pre, n_choices, &first_true);
31 |   free(is_pre);
32 |   if (n_matches == 0) {  // no match
33 |     return -1;
34 |   } else if (n_matches > 1) {  // ambiguity
35 |     for (R_xlen_t i = 0; i != n_choices; ++i) {
36 |       if (strcmp(arg, CHAR(STRING_ELT(choices, i))) == 0) {
37 |           return i + 1;
38 |       }
39 |     }
40 |     return -2;
41 |   } else {
42 |     return first_true + 1;
43 |   }
44 | }
45 | 
46 | SEXP C_match_arg_index(SEXP arg, SEXP choices) {
47 |   R_xlen_t arg_len = Rf_xlength(arg);
48 |   SEXP out = PROTECT(Rf_allocVector(INTSXP, arg_len));
49 |   int *out_int = INTEGER(out);
50 |   for (R_xlen_t i = 0; i != arg_len; ++i)
51 |     out_int[i] = C_match_arg_index1(CHAR(STRING_ELT(arg, i)), choices);
52 |   UNPROTECT(1);
53 |   return out;
54 | }
55 | 


--------------------------------------------------------------------------------
/src/detect.c:
--------------------------------------------------------------------------------
 1 | #include "stringi-imports.h"
 2 | 
 3 | SEXP C_stringi_detect_coll(SEXP string, SEXP pattern) {
 4 |   static SEXP(*fun)(SEXP, SEXP, SEXP, SEXP, SEXP) = NULL;
 5 |   if (fun == NULL) {
 6 |     fun = (SEXP(*)(SEXP, SEXP, SEXP, SEXP, SEXP))
 7 |     R_GetCCallable("stringi", "C_stri_detect_coll");
 8 |   }
 9 |   SEXP falsesxp = PROTECT(Rf_ScalarLogical(FALSE));
10 |   SEXP minusonesxp = PROTECT(ScalarInteger(-1));
11 |   SEXP out = PROTECT(
12 |     fun(string, pattern, falsesxp, minusonesxp, R_NilValue)
13 |   );
14 |   UNPROTECT(3);
15 |   return out;
16 | }
17 | 
18 | SEXP C_stringi_detect_fixed(SEXP string, SEXP pattern) {
19 |   static SEXP(*fun)(SEXP, SEXP, SEXP, SEXP, SEXP) = NULL;
20 |   if (fun == NULL) {
21 |     fun = (SEXP(*)(SEXP, SEXP, SEXP, SEXP, SEXP))
22 |     R_GetCCallable("stringi", "C_stri_detect_fixed");
23 |   }
24 |   SEXP falsesxp = PROTECT(Rf_ScalarLogical(FALSE));
25 |   SEXP minusonesxp = PROTECT(ScalarInteger(-1));
26 |   SEXP out = PROTECT(
27 |     fun(string, pattern, falsesxp, minusonesxp, R_NilValue)
28 |   );
29 |   UNPROTECT(3);
30 |   return out;
31 | }
32 | 
33 | SEXP C_str_detect_many_coll(SEXP string, SEXP pattern) {
34 |   R_xlen_t pl = Rf_xlength(pattern);
35 |   SEXP out = PROTECT(Rf_allocVector(VECSXP, pl));
36 |   for (R_xlen_t i = 0; i != pl; ++i) {
37 |     SEXP pattern_i = PROTECT(ScalarString(STRING_ELT(pattern, i)));
38 |     SEXP out_i = PROTECT(C_stringi_detect_coll(string, pattern_i));
39 |     SET_VECTOR_ELT(out, i, out_i);
40 |     UNPROTECT(2);
41 |   }
42 |   UNPROTECT(1);
43 |   return out;
44 | }
45 | 
46 | SEXP C_str_detect_many_fixed(SEXP string, SEXP pattern) {
47 |   R_xlen_t pl = Rf_xlength(pattern);
48 |   SEXP out = PROTECT(Rf_allocVector(VECSXP, pl));
49 |   for (R_xlen_t i = 0; i != pl; ++i) {
50 |     SEXP pattern_i = PROTECT(ScalarString(STRING_ELT(pattern, i)));
51 |     SEXP out_i = PROTECT(C_stringi_detect_fixed(string, pattern_i));
52 |     SET_VECTOR_ELT(out, i, out_i);
53 |     UNPROTECT(2);
54 |   }
55 |   UNPROTECT(1);
56 |   return out;
57 | }
58 | 


--------------------------------------------------------------------------------
/src/fullocate.c:
--------------------------------------------------------------------------------
  1 | #include <Rinternals.h>
  2 | 
  3 | #include "list-utils.h"
  4 | #include "pairlist-utils.h"
  5 | 
  6 | SEXP C_make_len2_int_vec(int first, int second) {
  7 |   SEXP out = PROTECT(Rf_allocVector(INTSXP, 2));
  8 |   int *out_int = INTEGER(out);
  9 |   out_int[0] = first;
 10 |   out_int[1] = second;
 11 |   UNPROTECT(1);
 12 |   return out;
 13 | }
 14 | 
 15 | SEXP C_fullocate(SEXP int_mat, int start, int end) {
 16 |   int nr = Rf_nrows(int_mat), *int_mat_int = INTEGER(int_mat);
 17 |   int last, row_num;  // row_num will be 1-indexed
 18 |   SEXP prlst0car;
 19 |   if (start >= int_mat_int[0]) {
 20 |     prlst0car = PROTECT(C_int_mat_nth_row_nrnc(int_mat_int, nr, 2, 1));
 21 |     last = int_mat_int[nr];
 22 |     row_num = 2;
 23 |   } else {
 24 |     prlst0car = PROTECT(C_make_len2_int_vec(start, int_mat_int[0] - 1));
 25 |     last = int_mat_int[0] - 1;
 26 |     row_num = 1;
 27 |   }
 28 |   SEXP prlst = PROTECT(Rf_list1(prlst0car));
 29 |   SEXP prlst_tail = prlst;
 30 |   int prlst_len = 1;
 31 |   while (row_num <= nr) {
 32 |     SEXP row = PROTECT(C_int_mat_nth_row_nrnc(int_mat_int, nr, 2, row_num));
 33 |     int *row_int = INTEGER(row);
 34 |     if (row_int[0] == last + 1) {
 35 |       SEXP next = PROTECT(Rf_list1(row));
 36 |       prlst_tail = SETCDR(prlst_tail, next);
 37 |       last = row_int[1];
 38 |       UNPROTECT(1);
 39 |       ++row_num;
 40 |     } else {
 41 |       SEXP next_car = PROTECT(C_make_len2_int_vec(last + 1, row_int[0] - 1));
 42 |       SEXP next = PROTECT(Rf_list1(next_car));
 43 |       prlst_tail = SETCDR(prlst_tail, next);
 44 |       last = row_int[0] - 1;
 45 |       UNPROTECT(2);
 46 |     }
 47 |     UNPROTECT(1);
 48 |     ++prlst_len;
 49 |   }
 50 |   if (INTEGER(CAR(prlst_tail))[1] < end) {
 51 |     SEXP next_car = PROTECT(C_make_len2_int_vec(last + 1, end));
 52 |     SEXP next = PROTECT(Rf_list1(next_car));
 53 |     SETCDR(prlst_tail, next);
 54 |     UNPROTECT(2);
 55 |     ++prlst_len;
 56 |   }
 57 |   SEXP out = PROTECT(C_int_prlst_rbind(prlst, prlst_len));
 58 |   UNPROTECT(3);
 59 |   return out;
 60 | }
 61 | 
 62 | SEXP C_lst_fullocate(SEXP int_mat_lst, SEXP start, SEXP end) {
 63 |   int *start_int = INTEGER(start), *end_int = INTEGER(end);
 64 |   R_xlen_t int_mat_lst_len = Rf_xlength(int_mat_lst);
 65 |   R_xlen_t start_len = Rf_xlength(start), end_len = Rf_xlength(end);
 66 |   SEXP out = PROTECT(Rf_allocVector(VECSXP, int_mat_lst_len));
 67 |   if (start_len == 1 && end_len == 1) {
 68 |     for (R_xlen_t i = 0; i != int_mat_lst_len; ++i) {
 69 |       SEXP out_i = PROTECT(
 70 |         C_fullocate(VECTOR_ELT(int_mat_lst, i), *start_int, *end_int)
 71 |       );
 72 |       SET_VECTOR_ELT(out, i, out_i);
 73 |       UNPROTECT(1);
 74 |     }
 75 |   } else if (start_len == 1 && end_len != 1) {
 76 |     for (R_xlen_t i = 0; i != int_mat_lst_len; ++i) {
 77 |       SEXP out_i = PROTECT(
 78 |         C_fullocate(VECTOR_ELT(int_mat_lst, i), *start_int, end_int[i])
 79 |       );
 80 |       SET_VECTOR_ELT(out, i, out_i);
 81 |       UNPROTECT(1);
 82 |     }
 83 |   } else if (start_len != 1 && end_len == 1) {
 84 |     for (R_xlen_t i = 0; i != int_mat_lst_len; ++i) {
 85 |       SEXP out_i = PROTECT(
 86 |         C_fullocate(VECTOR_ELT(int_mat_lst, i), start_int[i], *end_int)
 87 |       );
 88 |       SET_VECTOR_ELT(out, i, out_i);
 89 |       UNPROTECT(1);
 90 |     }
 91 |   } else {
 92 |     for (R_xlen_t i = 0; i != int_mat_lst_len; ++i) {
 93 |       SEXP out_i = PROTECT(
 94 |         C_fullocate(VECTOR_ELT(int_mat_lst, i), start_int[i], end_int[i])
 95 |       );
 96 |       SET_VECTOR_ELT(out, i, out_i);
 97 |       UNPROTECT(1);
 98 |     }
 99 |   }
100 |   UNPROTECT(1);
101 |   return out;
102 | }
103 | 


--------------------------------------------------------------------------------
/src/init.c:
--------------------------------------------------------------------------------
 1 | #include <R.h>
 2 | #include <Rinternals.h>
 3 | #include <stdlib.h> // for NULL
 4 | #include <R_ext/Rdynload.h>
 5 | 
 6 | extern SEXP C_chr_lst_nth_elems(SEXP, SEXP);
 7 | extern SEXP C_chr_lst_remove_empties(SEXP);
 8 | extern SEXP C_chr_vec_remove_empties(SEXP);
 9 | extern SEXP C_dbl_lst_nth_elems(SEXP, SEXP);
10 | extern SEXP C_int_lst_cbind(SEXP);
11 | extern SEXP C_int_lst_rbind(SEXP);
12 | extern SEXP C_int_mat_lst_cbind_nth_cols(SEXP, SEXP);
13 | extern SEXP C_int_mat_lst_cbind_nth_rows(SEXP, SEXP);
14 | extern SEXP C_int_mat_lst_nth_cols(SEXP, SEXP);
15 | extern SEXP C_int_mat_lst_nth_rows(SEXP, SEXP);
16 | extern SEXP C_int_mat_lst_rbind_nth_cols(SEXP, SEXP);
17 | extern SEXP C_int_mat_lst_rbind_nth_rows(SEXP, SEXP);
18 | extern SEXP C_int_mat_row_maxs(SEXP);
19 | extern SEXP C_int_vec_all_value(SEXP, SEXP);
20 | extern SEXP C_interleave_chr_lsts(SEXP, SEXP);
21 | extern SEXP C_interleave_chr_vecs(SEXP, SEXP);
22 | extern SEXP C_lst_chr_to_dbl(SEXP, SEXP);
23 | extern SEXP C_lst_elems_common_length(SEXP, SEXP);
24 | extern SEXP C_lst_fullocate(SEXP, SEXP, SEXP);
25 | extern SEXP C_match_arg_index(SEXP, SEXP);
26 | extern SEXP C_str_detect_many_coll(SEXP, SEXP);
27 | extern SEXP C_str_detect_many_fixed(SEXP, SEXP);
28 | extern SEXP C_SXP_int_prlst_cbind(SEXP, SEXP);
29 | extern SEXP C_SXP_int_prlst_rbind(SEXP, SEXP);
30 | 
31 | static const R_CallMethodDef CallEntries[] = {
32 |     {"C_chr_lst_nth_elems",          (DL_FUNC) &C_chr_lst_nth_elems,          2},
33 |     {"C_chr_lst_remove_empties",     (DL_FUNC) &C_chr_lst_remove_empties,     1},
34 |     {"C_chr_vec_remove_empties",     (DL_FUNC) &C_chr_vec_remove_empties,     1},
35 |     {"C_dbl_lst_nth_elems",          (DL_FUNC) &C_dbl_lst_nth_elems,          2},
36 |     {"C_int_lst_cbind",              (DL_FUNC) &C_int_lst_cbind,              1},
37 |     {"C_int_lst_rbind",              (DL_FUNC) &C_int_lst_rbind,              1},
38 |     {"C_int_mat_lst_cbind_nth_cols", (DL_FUNC) &C_int_mat_lst_cbind_nth_cols, 2},
39 |     {"C_int_mat_lst_cbind_nth_rows", (DL_FUNC) &C_int_mat_lst_cbind_nth_rows, 2},
40 |     {"C_int_mat_lst_nth_cols",       (DL_FUNC) &C_int_mat_lst_nth_cols,       2},
41 |     {"C_int_mat_lst_nth_rows",       (DL_FUNC) &C_int_mat_lst_nth_rows,       2},
42 |     {"C_int_mat_lst_rbind_nth_cols", (DL_FUNC) &C_int_mat_lst_rbind_nth_cols, 2},
43 |     {"C_int_mat_lst_rbind_nth_rows", (DL_FUNC) &C_int_mat_lst_rbind_nth_rows, 2},
44 |     {"C_int_mat_row_maxs",           (DL_FUNC) &C_int_mat_row_maxs,           1},
45 |     {"C_int_vec_all_value",          (DL_FUNC) &C_int_vec_all_value,          2},
46 |     {"C_interleave_chr_lsts",        (DL_FUNC) &C_interleave_chr_lsts,        2},
47 |     {"C_interleave_chr_vecs",        (DL_FUNC) &C_interleave_chr_vecs,        2},
48 |     {"C_lst_chr_to_dbl",             (DL_FUNC) &C_lst_chr_to_dbl,             2},
49 |     {"C_lst_elems_common_length",    (DL_FUNC) &C_lst_elems_common_length,    2},
50 |     {"C_lst_fullocate",              (DL_FUNC) &C_lst_fullocate,              3},
51 |     {"C_match_arg_index",            (DL_FUNC) &C_match_arg_index,            2},
52 |     {"C_str_detect_many_coll",       (DL_FUNC) &C_str_detect_many_coll,       2},
53 |     {"C_str_detect_many_fixed",      (DL_FUNC) &C_str_detect_many_fixed,      2},
54 |     {"C_SXP_int_prlst_cbind",        (DL_FUNC) &C_SXP_int_prlst_cbind,        2},
55 |     {"C_SXP_int_prlst_rbind",        (DL_FUNC) &C_SXP_int_prlst_rbind,        2},
56 |     {NULL, NULL, 0}
57 | };
58 | 
59 | void R_init_strex(DllInfo *dll)
60 | {
61 |     R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
62 |     R_useDynamicSymbols(dll, FALSE);
63 | }
64 | 


--------------------------------------------------------------------------------
/src/interleave.c:
--------------------------------------------------------------------------------
 1 | #include <Rinternals.h>
 2 | 
 3 | 
 4 | //' Interleave two vectors of strings.
 5 | //'
 6 | //' Make a vector of strings where the first element is from `x`, the
 7 | //' second is from `y`, the third is from `x`, the fourth is from
 8 | //' `y`, and so on.
 9 | //'
10 | //' `x` and `y` must be the same length or differ in length only
11 | //' by 1. If `y` is longer, it goes first.
12 | //'
13 | //' @param x,y Character vectors.
14 | //'
15 | //' @return A character vector.
16 | //'
17 | //' @examples
18 | //' interleave_chr_vecs(c("a", "c", "e"), c("b", "d"))
19 | //'
20 | //' @noRd
21 | SEXP C_interleave_chr_vecs(SEXP x, SEXP y) {
22 |   R_xlen_t x_len = Rf_xlength(x), y_len = Rf_xlength(y);
23 |   long long length_diff = x_len - y_len;
24 |   R_xlen_t l = x_len + y_len;
25 |   SEXP interleaved = PROTECT(Rf_allocVector(STRSXP, l));
26 |   R_xlen_t i = 0;
27 |   if (length_diff >= 0) {
28 |     while (i < l) {
29 |       if (i % 2 == 0) {
30 |         SET_STRING_ELT(interleaved, i, STRING_ELT(x, i / 2));
31 |         ++i;
32 |       } else if (i < l) {
33 |         SET_STRING_ELT(interleaved, i, STRING_ELT(y, i / 2));
34 |         ++i;
35 |       }
36 |     }
37 |   } else {
38 |     while (i < l) {
39 |       if (i % 2 == 0) {
40 |         SET_STRING_ELT(interleaved, i, STRING_ELT(y, i / 2));
41 |         ++i;
42 |       } else if (i < l) {
43 |         SET_STRING_ELT(interleaved, i, STRING_ELT(x, i / 2));
44 |         ++i;
45 |       }
46 |     }
47 |   }
48 |   UNPROTECT(1);
49 |   return(interleaved);
50 | }
51 | 
52 | SEXP C_interleave_chr_lsts(SEXP x, SEXP y) {
53 |   R_xlen_t l = Rf_xlength(x);
54 |   SEXP interleaved = PROTECT(Rf_allocVector(VECSXP, l));
55 |   for (R_xlen_t i = 0; i != l; ++i) {
56 |     SEXP interleaved_i = PROTECT(
57 |       C_interleave_chr_vecs(VECTOR_ELT(x, i), VECTOR_ELT(y, i))
58 |     );
59 |     SET_VECTOR_ELT(interleaved, i, interleaved_i);
60 |     UNPROTECT(1);
61 |   }
62 |   UNPROTECT(1);
63 |   return(interleaved);
64 | }
65 | 


--------------------------------------------------------------------------------
/src/list-utils.h:
--------------------------------------------------------------------------------
 1 | #ifndef LIST_UTILS_H_
 2 | #define LIST_UTILS_H_
 3 | 
 4 | #include <Rinternals.h>
 5 | 
 6 | 
 7 | SEXP C_lst_elems_common_length(SEXP lst, SEXP l);
 8 | SEXP C_chr_to_dbl(SEXP x, int commas);
 9 | SEXP C_lst_chr_to_dbl(SEXP x, SEXP commas);
10 | SEXP C_chr_vec_remove_empties(SEXP x);
11 | SEXP C_chr_lst_nth_elems(SEXP chr_lst, SEXP n);
12 | SEXP C_dbl_lst_nth_elems(SEXP dbl_lst, SEXP n);
13 | SEXP C_int_mat_nth_col(SEXP int_mat, int n);
14 | SEXP C_int_mat_nth_col_nrnc(int *int_mat_int, int nr, int nc, int n);
15 | SEXP C_int_mat_lst_nth_cols(SEXP int_mat_lst, SEXP n);
16 | SEXP C_int_mat_nth_row(SEXP int_mat, int n);
17 | SEXP C_int_mat_nth_row_nrnc(int *int_mat_int, int nr, int nc, int n);
18 | SEXP C_int_mat_lst_nth_rows(SEXP int_mat_lst, SEXP n);
19 | SEXP C_int_lst_cbind(SEXP int_lst);
20 | SEXP C_int_lst_rbind(SEXP int_lst);
21 | SEXP C_int_prlst_cbind(SEXP int_prlst, int int_prlst_len);
22 | SEXP C_int_prlst_rbind(SEXP int_prlst, int int_prlst_len);
23 | SEXP C_int_mat_lst_cbind_nth_cols(SEXP int_mat_lst, SEXP n);
24 | SEXP C_int_mat_lst_cbind_nth_rows(SEXP int_mat_lst, SEXP n);
25 | SEXP C_int_mat_lst_rbind_nth_cols(SEXP int_mat_lst, SEXP n);
26 | SEXP C_int_mat_lst_rbind_nth_rows(SEXP int_mat_lst, SEXP n);
27 | 
28 | 
29 | #endif  // LIST_UTILS_H_
30 | 


--------------------------------------------------------------------------------
/src/matrix-utils.c:
--------------------------------------------------------------------------------
 1 | #include <Rinternals.h>
 2 | 
 3 | SEXP C_int_mat_row_maxs(SEXP int_mat) {
 4 |   int nr = Rf_nrows(int_mat), nc = Rf_ncols(int_mat);
 5 |   int *int_mat_int = INTEGER(int_mat);
 6 |   SEXP out = PROTECT(Rf_allocVector(INTSXP, nr));
 7 |   int *out_int = INTEGER(out);
 8 |   for (int r = 0; r != nr; ++r) {
 9 |     int max = int_mat_int[r];
10 |     for (int c = 1; c < nc; ++c) {
11 |       if (int_mat_int[c * nr + r] > max) max = int_mat_int[c * nr + r];
12 |     }
13 |     out_int[r] = max;
14 |   }
15 |   UNPROTECT(1);
16 |   return out;
17 | }
18 | 


--------------------------------------------------------------------------------
/src/pairlist-utils.c:
--------------------------------------------------------------------------------
 1 | #include <Rinternals.h>
 2 | 
 3 | SEXP C_int_prlst_cbind(SEXP int_prlst, int int_prlst_len) {
 4 |   int nr = Rf_xlength(CAR(int_prlst));
 5 |   SEXP out = PROTECT(Rf_allocMatrix(INTSXP, nr, int_prlst_len));
 6 |   int *out_int = INTEGER(out);
 7 |   for (int i = 0; i != int_prlst_len; ++i) {
 8 |     SEXP int_prlst_i_car = CAR(int_prlst);
 9 |     INTEGER_GET_REGION(int_prlst_i_car, 0, nr, out_int + i * nr);
10 |     int_prlst = CDR(int_prlst);
11 |   }
12 |   UNPROTECT(1);
13 |   return out;
14 | }
15 | SEXP C_SXP_int_prlst_cbind(SEXP int_prlst, SEXP prlst_len) {
16 |   int int_prlst_len = *INTEGER(prlst_len);
17 |   return C_int_prlst_cbind(int_prlst, int_prlst_len);
18 | }
19 | 
20 | SEXP C_int_prlst_rbind(SEXP int_prlst, int int_prlst_len) {
21 |   SEXP int_prlst_tail = int_prlst;
22 |   int nc = Rf_xlength(CAR(int_prlst));
23 |   SEXP out = PROTECT(Rf_allocMatrix(INTSXP, int_prlst_len, nc));
24 |   int *out_int = INTEGER(out);
25 |   for (int i = 0; i != int_prlst_len; ++i) {
26 |     SEXP int_prlst_i_car = CAR(int_prlst_tail);
27 |     int *int_prlst_i_car_int = INTEGER(int_prlst_i_car);
28 |     for (int j = 0; j != nc; ++j) {
29 |       out_int[j * int_prlst_len + i] = int_prlst_i_car_int[j];
30 |     }
31 |     int_prlst_tail = CDR(int_prlst_tail);
32 |   }
33 |   UNPROTECT(1);
34 |   return out;
35 | }
36 | SEXP C_SXP_int_prlst_rbind(SEXP int_prlst, SEXP prlst_len) {
37 |   int int_prlst_len = *INTEGER(prlst_len);
38 |   return C_int_prlst_rbind(int_prlst, int_prlst_len);
39 | }
40 | 


--------------------------------------------------------------------------------
/src/pairlist-utils.h:
--------------------------------------------------------------------------------
 1 | #ifndef PAIRLIST_UTILS_H_
 2 | #define PAIRLIST_UTILS_H_
 3 | 
 4 | #include <Rinternals.h>
 5 | 
 6 | SEXP C_int_prlst_cbind(SEXP int_prlst, int int_prlst_len);
 7 | SEXP C_SXP_int_prlst_cbind(SEXP int_prlst, SEXP prlst_len);
 8 | SEXP C_int_prlst_rbind(SEXP int_prlst, int int_prlst_len);
 9 | SEXP C_SXP_int_prlst_rbind(SEXP int_prlst, SEXP prlst_len);
10 | 
11 | #endif  // PAIRLIST_UTILS_H_
12 | 


--------------------------------------------------------------------------------
/src/stringi-imports.h:
--------------------------------------------------------------------------------
 1 | #ifndef STRINGI_IMPORTS_H_
 2 | #define STRINGI_IMPORTS_H_
 3 | 
 4 | #include <Rinternals.h>
 5 | 
 6 | 
 7 | SEXP C_stringi_replace_all_regex(SEXP string, SEXP pattern, SEXP replacement);
 8 | SEXP C_stringi_detect_coll(SEXP string, SEXP pattern);
 9 | SEXP C_stringi_detect_fixed(SEXP string, SEXP pattern);
10 | 
11 | 
12 | #endif  // STRINGI_IMPORTS_H_
13 | 


--------------------------------------------------------------------------------
/src/vec-utils.c:
--------------------------------------------------------------------------------
 1 | #include <Rinternals.h>
 2 | 
 3 | SEXP C_int_vec_all_value(SEXP int_vec, SEXP int_scalar) {
 4 |   R_xlen_t n = Rf_xlength(int_vec);
 5 |   int *int_vec_int = INTEGER(int_vec);
 6 |   int *int_scalar_int = INTEGER(int_scalar);
 7 |   for (R_xlen_t i = 0; i != n; ++i) {
 8 |     if (int_vec_int[i] != *int_scalar_int) {
 9 |       SEXP out = PROTECT(Rf_ScalarLogical(0));
10 |       UNPROTECT(1);
11 |       return out;
12 |     }
13 |   }
14 |   SEXP out = PROTECT(Rf_ScalarLogical(1));
15 |   UNPROTECT(1);
16 |   return out;
17 | }
18 | 


--------------------------------------------------------------------------------
/strex.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: No
 4 | SaveWorkspace: No
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | 
18 | BuildType: Package
19 | PackageUseDevtools: Yes
20 | PackageInstallArgs: --no-multiarch --with-keep.source
21 | PackageRoxygenize: rd,collate,namespace
22 | 


--------------------------------------------------------------------------------
/tests/spelling.R:
--------------------------------------------------------------------------------
1 | if (requireNamespace("spelling", quietly = TRUE)) {
2 |   spelling::spell_check_test(
3 |     vignettes = TRUE, error = FALSE,
4 |     skip_on_cran = TRUE
5 |   )
6 | }
7 | 


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(strex)
3 | 
4 | test_check("strex")
5 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/after.md:
--------------------------------------------------------------------------------
1 | # str_after_nth works
2 | 
3 |     If `pattern` and `n` both have length greater than 1, their lengths must be equal.
4 |     x Your `pattern` has length 2.
5 |     x Your `n` has length 3.
6 | 
7 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/alphord.md:
--------------------------------------------------------------------------------
 1 | # str_alphord_nums works
 2 | 
 3 |     The non-number bits of every string must be the same.
 4 |     i The first pair of strings with different non-number bits are strings 1 and 3.
 5 |     x They are 'abc9def55' and 'abc10xyz7'.
 6 | 
 7 | ---
 8 | 
 9 |     It should either be the case that all strings start with numbers or that none of them do.
10 |     x  String number 1 '0abc9def55g' does start with a number whereas string number 2 'abc10def7g0' does not start with a number.
11 | 
12 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/arg-match.md:
--------------------------------------------------------------------------------
 1 | # match_arg() works
 2 | 
 3 |     `arg` must be a prefix of exactly one element of `choices`.
 4 |     x Your `arg`, 'A', is a prefix of two or more elements of `choices`.
 5 |     i The first two of these are 'Apples' and 'Avocados'.
 6 | 
 7 | ---
 8 | 
 9 |     `arg` must be a prefix of exactly one element of `choices`.
10 |     x Your `arg`, 'a', is a prefix of two or more elements of `choices`.
11 |     i The first two of these are 'Apples' and 'Avocados'.
12 | 
13 | ---
14 | 
15 |     `choices` must not have duplicate elements.
16 |     i Since you have set `ignore_case = TRUE`, elements 3 and 7 of your `choices`, ('Bananas' and 'bananas') are effectively duplicates.
17 | 
18 | ---
19 | 
20 |     `choices` must not have duplicate elements.
21 |     * Element 8 of your `choices`, 'Pears', is a duplicate.
22 | 
23 | ---
24 | 
25 |     `y` must be a prefix of exactly one element of `choices`.
26 |     i Your first 50 `choices` are 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50.
27 |     x Your `y`, 'a', is not a prefix of any of your `choices`.
28 | 
29 | ---
30 | 
31 |     You used `match_arg()` without specifying a `choices` argument.
32 |     i The only way to do this is from another function where the `arg` has a default setting. This is the same as `base::match.arg()`.
33 |     i See the man page for `match_arg()`.
34 |     i See the vignette on argument matching: enter `vignette('argument-matching', package = 'strex')` at the R console.
35 | 
36 | ---
37 | 
38 |     You used `match_arg()` without specifying a `choices` argument.
39 |     i The only way to do this is from another function where the `arg` has a default setting. This is the same as `base::match.arg()`.
40 |     i See the man page for `match_arg()`.
41 |     i See the vignette on argument matching: enter `vignette('argument-matching', package = 'strex')` at the R console.
42 | 
43 | ---
44 | 
45 |     You used `match_arg()` without specifying a `choices` argument.
46 |     i The only way to do this is from another function where the `arg` has a default setting. This is the same as `base::match.arg()`.
47 |     i See the man page for `match_arg()`.
48 |     i See the vignette on argument matching: enter `vignette('argument-matching', package = 'strex')` at the R console.
49 | 
50 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/currency.md:
--------------------------------------------------------------------------------
1 | # `str_nth_currency()` works
2 | 
3 |     When `string` has length greater than 1, `n` must either be length 1 or have the same length as `string`.
4 |     x Your `string` has length 3.
5 |     x Your `n` has length 7.
6 | 
7 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/elem.md:
--------------------------------------------------------------------------------
1 | # str_elem() works
2 | 
3 |     When `string` has length greater than 1, `index` must either be length 1 or have the same length as `string`.
4 |     x Your `string` has length 2.
5 |     x Your `index` has length 3.
6 | 
7 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/extract-non-numerics.md:
--------------------------------------------------------------------------------
1 | # str_extract_non_numerics() works
2 | 
3 |     To allow leading decimals, you need to first allow decimals.
4 |     i To allow decimals, use `decimals = TRUE`.
5 | 
6 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/extract-nums.md:
--------------------------------------------------------------------------------
 1 | # str_extract_numbers works
 2 | 
 3 |     `NA`s introduced by ambiguity.
 4 |     i The first such ambiguity is in string number 1 which is 'abc1.2.3'.
 5 |     x The offending part of that string is '.2.3'.
 6 | 
 7 | ---
 8 | 
 9 |     `NA`s introduced by ambiguity.
10 |     i The first such ambiguity is in string number 1 which is 'ab.1.2'.
11 |     x The offending part of that string is '.1.2'.
12 | 
13 | ---
14 | 
15 |     `NA`s introduced by ambiguity.
16 |     i The first such ambiguity is in string number 1 which is 'ab.1.2'.
17 |     x The offending part of that string is '.1.2'.
18 | 
19 | ---
20 | 
21 |     `NA`s introduced by ambiguity.
22 |     i The first such ambiguity is in string number 1 which is 'abc1.2.3'.
23 |     x The offending part of that string is '.2.3'.
24 | 
25 | ---
26 | 
27 |     `NA`s introduced by ambiguity.
28 |     i The first such ambiguity is in string number 1 which is 'ab.1.2'.
29 |     x The offending part of that string is '.1.2'.
30 | 
31 | ---
32 | 
33 |     To allow leading decimals, you need to first allow decimals.
34 |     i To allow decimals, use `decimals = TRUE`.
35 | 
36 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/locate.md:
--------------------------------------------------------------------------------
 1 | # `str_locate_nth()` works
 2 | 
 3 |     When `string` has length greater than 1, `pattern` must either be length 1 or have the same length as `string`.
 4 |     x Your `string` has length 2.
 5 |     x Your `pattern` has length 3.
 6 | 
 7 | ---
 8 | 
 9 |     When `string` has length greater than 1, `n` must either be length 1 or have the same length as `string`.
10 |     x Your `string` has length 2.
11 |     x Your `n` has length 5.
12 | 
13 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/num-after.md:
--------------------------------------------------------------------------------
1 | # `nth_number_after_mth()` works
2 | 
3 |     If `n` and `m` both have length greater than 1, their lengths must be equal.
4 |     x Your `n` has length 2.
5 |     x Your `m` has length 3.
6 | 
7 | 


--------------------------------------------------------------------------------
/tests/testthat/test-after.R:
--------------------------------------------------------------------------------
 1 | test_that("str_after_nth works", {
 2 |   string <- "ab..cd..de..fg..h"
 3 |   expect_equal(str_after_nth(string, "\\.\\.", 3), "fg..h",
 4 |     ignore_attr = TRUE
 5 |   )
 6 |   expect_equal(str_after_first(string, "\\.\\."), "cd..de..fg..h",
 7 |     ignore_attr = TRUE
 8 |   )
 9 |   expect_equal(str_after_last(string, "\\.\\."), "h",
10 |     ignore_attr = TRUE
11 |   )
12 |   expect_equal(str_before_first(string, "e"), "ab..cd..d",
13 |     ignore_attr = TRUE
14 |   )
15 |   string <- c("abc", "xyz.zyx")
16 |   expect_equal(str_after_first(string, "."), str_sub(string, 2))
17 |   expect_equal(str_after_first(string, coll(".")), c(NA, "zyx"))
18 |   expect_equal(str_after_first(character(), 1:3), character())
19 |   expect_equal(str_after_nth("abc", "b", c(0, 1)), c(NA, "c"))
20 |   string <- "abxxcdxxdexxfgxxh"
21 |   expect_equal(str_after_nth(string, "e", 1:2), c("xxfgxxh", NA))
22 |   expect_snapshot_error(str_after_nth(c("a"), c("a", "b"), 1:3))
23 | })
24 | 


--------------------------------------------------------------------------------
/tests/testthat/test-alphord.R:
--------------------------------------------------------------------------------
 1 | test_that("str_alphord_nums works", {
 2 |   strings <- paste0("abc", 1:12)
 3 |   expect_equal(
 4 |     str_alphord_nums(strings),
 5 |     str_c("abc", c(paste0(0, 1:9), 10:12))
 6 |   )
 7 |   expect_equal(
 8 |     str_alphord_nums(c("01abc9def55", "5abc10def777", "99abc4def4")),
 9 |     c("01abc09def055", "05abc10def777", "99abc04def004")
10 |   )
11 |   expect_equal(
12 |     str_alphord_nums(c("abc9def55", "abc10def7")),
13 |     c("abc09def55", "abc10def07")
14 |   )
15 |   expect_equal(
16 |     str_alphord_nums(c("abc9def55", "abc10def777", "abc4def4")),
17 |     c("abc09def055", "abc10def777", "abc04def004")
18 |   )
19 |   expect_snapshot_error(
20 |     str_alphord_nums(c("abc9def55", "abc9def5", "abc10xyz7"))
21 |   )
22 |   expect_error(
23 |     str_alphord_nums(c("abc9def55", "9abc10def7")),
24 |     "The strings must all have the same number of numbers."
25 |   )
26 |   expect_snapshot_error(str_alphord_nums(c("0abc9def55g", "abc10def7g0")))
27 |   expect_error(
28 |     str_alphord_nums("abc"),
29 |     "Some of the input strings have no numbers in them."
30 |   )
31 |   expect_equal(str_alphord_nums(1:10), c(paste0(0, 1:9), 10))
32 |   expect_equal(str_alphord_nums(character()), character())
33 | })
34 | 


--------------------------------------------------------------------------------
/tests/testthat/test-arg-match.R:
--------------------------------------------------------------------------------
 1 | test_that("match_arg() works", {
 2 |   expect_equal(match_arg("ab", c("abcdef", "defgh")), "abcdef")
 3 |   expect_error(match_arg("abcdefg", c("Abcdef", "defg")), "not a prefix of any")
 4 |   expect_equal(
 5 |     match_arg("ab", c("Abcdef", "defgh"), ignore_case = TRUE),
 6 |     "Abcdef"
 7 |   )
 8 |   expect_equal(match_arg("ab", c("xyz", "Abcdef", "defgh"),
 9 |     ignore_case = TRUE, index = TRUE
10 |   ), 2)
11 |   choices <- c("Apples", "Pears", "Bananas", "Oranges")
12 |   expect_equal(match_arg("A", choices), "Apples")
13 |   expect_equal(match_arg("B", choices, index = TRUE), 3)
14 |   expect_equal(
15 |     match_arg(c("b", "a"), choices,
16 |       several_ok = TRUE,
17 |       ignore_case = TRUE
18 |     ),
19 |     c("Bananas", "Apples")
20 |   )
21 |   expect_equal(
22 |     match_arg(c("b", "a"), choices,
23 |       ignore_case = TRUE, index = TRUE,
24 |       several_ok = TRUE
25 |     ),
26 |     c(3, 1)
27 |   )
28 |   choices <- c(choices, "Avocados", "Apricots")
29 |   expect_snapshot_error(match_arg("A", choices, ignore_case = FALSE))
30 |   x <- "a"
31 |   expect_snapshot_error(match_arg(x, choices, ignore_case = TRUE))
32 |   expect_error(
33 |     match_arg(c("A", "a"), choices),
34 |     str_c(
35 |       "`arg` must have length 1.+",
36 |       ". Your `arg` has length 2.+",
37 |       ". To use an `arg` with length greater than one, use.+",
38 |       "`several_ok = TRUE`."
39 |     )
40 |   )
41 |   choices <- c(choices, "bananas")
42 |   expect_snapshot_error(match_arg("p", choices, ignore_case = TRUE))
43 |   choices <- c(choices, "Pears")
44 |   expect_snapshot_error(match_arg("p", choices, ignore_case = TRUE))
45 |   expect_equal(match_arg("ab", c("ab", "abc")), "ab")
46 |   y <- "a"
47 |   expect_snapshot_error(match_arg(y, as.character(1:51)))
48 |   word <- function(w = c("abacus", "baseball", "candy")) {
49 |     match_arg(w)
50 |   }
51 |   expect_equal(word("b"), "baseball")
52 |   expect_equal(word(), "abacus")
53 |   word <- function(w = c("abacus", "baseball", "candy")) {
54 |     match_arg(w, several_ok = TRUE)
55 |   }
56 |   expect_equal(word("c"), "candy")
57 |   expect_equal(word(), c("abacus", "baseball", "candy"))
58 |   word <- function(w = c("abacus", "baseball", "candy")) {
59 |     match_arg(as.character(w), several_ok = TRUE)
60 |   }
61 |   expect_snapshot_error(word())
62 |   word <- function(w = 1:3) {
63 |     match_arg(w, several_ok = TRUE)
64 |   }
65 |   expect_snapshot_error(word())
66 |   word <- function(w = c("abacus", "baseball", "candy")) {
67 |     x <- "a"
68 |     match_arg(x, several_ok = TRUE)
69 |   }
70 |   expect_snapshot_error(word())
71 | })
72 | 


--------------------------------------------------------------------------------
/tests/testthat/test-before.R:
--------------------------------------------------------------------------------
 1 | test_that("str_before_last_dot works", {
 2 |   expect_equal(str_before_last_dot(c("spreadsheet1.csv", "doc2.doc")),
 3 |     c("spreadsheet1", "doc2"),
 4 |     ignore_attr = TRUE
 5 |   )
 6 | })
 7 | 
 8 | test_that("`str_before_nth()` works", {
 9 |   string <- "ab..cd..de..fg..h"
10 |   expect_equal(str_before_nth(string, "\\.", -3), "ab..cd..de.",
11 |     ignore_attr = TRUE
12 |   )
13 |   expect_equal(str_before_nth(string, ".", -3), "ab..cd..de..fg",
14 |     ignore_attr = TRUE
15 |   )
16 |   expect_equal(str_before_nth(rep(string, 2), fixed("."), -3),
17 |     rep("ab..cd..de.", 2),
18 |     ignore_attr = TRUE
19 |   )
20 |   expect_equal(str_before_last(rep(string, 2), fixed(".")),
21 |     rep("ab..cd..de..fg.", 2),
22 |     ignore_attr = TRUE
23 |   )
24 |   expect_equal(str_before_last(character(), 1:3), character())
25 |   string <- "abxxcdxxdexxfgxxh"
26 |   expect_equal(str_before_nth(string, "e", 1:2), c("abxxcdxxd", NA))
27 | })
28 | 


--------------------------------------------------------------------------------
/tests/testthat/test-camel-case.R:
--------------------------------------------------------------------------------
 1 | test_that("str_split_camel_case()` works", {
 2 |   expect_equal(
 3 |     str_split_camel_case(c(
 4 |       "RoryNolan", "NaomiFlagg",
 5 |       "DepartmentOfSillyHats"
 6 |     )),
 7 |     list(
 8 |       c("Rory", "Nolan"), c("Naomi", "Flagg"),
 9 |       c("Department", "Of", "Silly", "Hats")
10 |     )
11 |   )
12 |   expect_equal(
13 |     str_split_camel_case(
14 |       c(
15 |         "RoryNolan", "NaomiFlagg",
16 |         "DepartmentOfSillyHats"
17 |       ),
18 |       lower = TRUE
19 |     ),
20 |     list(
21 |       c("Rory", "Nolan"), c("Naomi", "Flagg"),
22 |       c("Department", "Of", "Silly", "Hats")
23 |     ) %>%
24 |       lapply(str_to_lower)
25 |   )
26 |   expect_equal(str_split_camel_case(character()), list())
27 | })
28 | 


--------------------------------------------------------------------------------
/tests/testthat/test-can-be-num.R:
--------------------------------------------------------------------------------
1 | test_that("`str_can_be_numeric()` works", {
2 |   expect_true(str_can_be_numeric("3"))
3 |   expect_true(str_can_be_numeric("5 "))
4 |   expect_equal(str_can_be_numeric(c("1a", "abc")), rep(FALSE, 2))
5 |   expect_equal(str_can_be_numeric(character()), logical())
6 |   expect_equal(str_can_be_numeric(numeric()), logical())
7 | })
8 | 


--------------------------------------------------------------------------------
/tests/testthat/test-currency.R:
--------------------------------------------------------------------------------
  1 | test_that("`str_extract_currencies()` works", {
  2 |   string <- "35.00 $1.14 abc5 $3.8 77"
  3 |   expect_equal(
  4 |     str_extract_currencies(string),
  5 |     data.frame(
  6 |       string_num = 1, string = string,
  7 |       curr_sym = c("", "$", "c", "$", " "),
  8 |       amount = c(35, 1.14, 5, 3.8, 77),
  9 |       stringsAsFactors = FALSE
 10 |     )
 11 |   )
 12 |   string <- c(
 13 |     "35.00 $1.14", "abc5 $3.8 77", "-$1.5e6",
 14 |     "over £1,000"
 15 |   )
 16 |   reps <- c(2, 3, 1, 1)
 17 |   expect_equal(
 18 |     str_extract_currencies(string),
 19 |     data.frame(
 20 |       string_num = rep(seq_along(string), reps),
 21 |       string = rep(string, reps),
 22 |       curr_sym = c("", "$", "c", "$", " ", "$", "£"),
 23 |       amount = c(35, 1.14, 5, 3.8, 77, -1.5e6, 1000),
 24 |       stringsAsFactors = FALSE
 25 |     )
 26 |   )
 27 |   expect_equal(
 28 |     as.data.frame(str_extract_currencies(character())),
 29 |     data.frame(
 30 |       string_num = integer(), string = character(),
 31 |       curr_sym = character(), amount = numeric(),
 32 |       stringsAsFactors = FALSE
 33 |     )
 34 |   )
 35 | })
 36 | test_that("`str_nth_currency()` works", {
 37 |   string <- c("ab3 13", "$1")
 38 |   expect_equal(
 39 |     str_nth_currency(string, n = 2),
 40 |     data.frame(
 41 |       string_num = seq_along(string), string = string,
 42 |       curr_sym = c(" ", NA), amount = c(13, NA),
 43 |       stringsAsFactors = FALSE
 44 |     )
 45 |   )
 46 |   string <- c("35.00 $1.14", "abc5 $3.8", "stuff")
 47 |   expect_equal(str_nth_currency(string, c(
 48 |     1,
 49 |     2, 1
 50 |   )), data.frame(
 51 |     string_num = seq_along(string), string = string,
 52 |     curr_sym = c("", "$", NA), amount = c(35, 3.8, NA),
 53 |     stringsAsFactors = FALSE
 54 |   ))
 55 |   string <- c("ab3 13", "$1", "35.00 $1.14", "abc5 $3.8", "stuff")
 56 |   expect_equal(
 57 |     str_nth_currency(string, n = 2),
 58 |     data.frame(
 59 |       string_num = 1:5,
 60 |       string = c("ab3 13", "$1", "35.00 $1.14", "abc5 $3.8", "stuff"),
 61 |       curr_sym = c(" ", NA, "$", "$", NA),
 62 |       amount = c(13, NA, 1.14, 3.8, NA),
 63 |       stringsAsFactors = FALSE
 64 |     )
 65 |   )
 66 |   expect_equal(
 67 |     str_nth_currency(string, c(1, 2, 1, 2, 1)),
 68 |     data.frame(
 69 |       string_num = 1:5,
 70 |       string = c("ab3 13", "$1", "35.00 $1.14", "abc5 $3.8", "stuff"),
 71 |       curr_sym = c("b", NA, "", "$", NA),
 72 |       amount = c(3, NA, 35, 3.8, NA),
 73 |       stringsAsFactors = FALSE
 74 |     )
 75 |   )
 76 |   expect_equal(
 77 |     str_first_currency(string),
 78 |     data.frame(
 79 |       string_num = 1:5,
 80 |       string = c(
 81 |         "ab3 13", "$1", "35.00 $1.14",
 82 |         "abc5 $3.8", "stuff"
 83 |       ),
 84 |       curr_sym = c("b", "$", "", "c", NA),
 85 |       amount = c(
 86 |         3,
 87 |         1, 35, 5, NA
 88 |       ), stringsAsFactors = FALSE
 89 |     )
 90 |   )
 91 |   expect_equal(
 92 |     str_last_currency(string),
 93 |     data.frame(
 94 |       string_num = 1:5, string = c(
 95 |         "ab3 13", "$1", "35.00 $1.14",
 96 |         "abc5 $3.8", "stuff"
 97 |       ), curr_sym = c(" ", "$", "$", "$", NA),
 98 |       amount = c(13, 1, 1.14, 3.8, NA),
 99 |       stringsAsFactors = FALSE
100 |     )
101 |   )
102 |   expect_snapshot_error(str_nth_currency(as.character(1:3), 1:7))
103 |   expect_equal(as.data.frame(str_nth_currency(string, n = -2)),
104 |     data.frame(
105 |       string_num = seq_along(string), string,
106 |       curr_sym = c("b", NA, "", "c", NA),
107 |       amount = c(3, NA, 35, 5, NA),
108 |       stringsAsFactors = FALSE
109 |     ),
110 |     ignore_attr = TRUE
111 |   )
112 |   expect_equal(
113 |     as.data.frame(str_nth_currency(string, c(1, -2, 1, 2, -1))),
114 |     data.frame(
115 |       string_num = seq_along(string), string,
116 |       curr_sym = c("b", NA, "", "$", NA),
117 |       amount = c(3, NA, 35, 3.8, NA),
118 |       stringsAsFactors = FALSE
119 |     )
120 |   )
121 |   expect_equal(
122 |     as.data.frame(str_nth_currency(character(), 1)),
123 |     data.frame(
124 |       string_num = integer(), string = character(),
125 |       curr_sym = character(), amount = numeric(),
126 |       stringsAsFactors = FALSE
127 |     )
128 |   )
129 | })
130 | 


--------------------------------------------------------------------------------
/tests/testthat/test-detect.R:
--------------------------------------------------------------------------------
 1 | test_that("`str_detect_all()` works", {
 2 |   expect_equal(str_detect_all("quick brown fox", c("x", "y", "z")), FALSE)
 3 |   expect_equal(str_detect_all(c(".", "-"), "."), c(TRUE, TRUE))
 4 |   expect_equal(str_detect_all(c(".", "-"), coll(".")), c(TRUE, FALSE))
 5 |   expect_equal(
 6 |     str_detect_all(c(".", "-"), coll("."), negate = TRUE),
 7 |     c(FALSE, TRUE)
 8 |   )
 9 |   expect_equal(
10 |     str_detect_all(c(".", "-"), fixed("."), negate = TRUE),
11 |     c(FALSE, TRUE)
12 |   )
13 |   expect_equal(str_detect_all(c(".", "-"), c(".", ":")), c(FALSE, FALSE))
14 |   expect_equal(str_detect_all(c(".", "-"), coll(c(".", ":"))), c(FALSE, FALSE))
15 |   expect_equal(str_detect_any("quick brown fox", c("x", "y", "z")), TRUE)
16 |   expect_equal(str_detect_any(c(".", "-"), "."), c(TRUE, TRUE))
17 |   expect_equal(str_detect_any(c(".", "-"), coll(".")), c(TRUE, FALSE))
18 |   expect_equal(str_detect_any(c(".", "-"), fixed(".")), c(TRUE, FALSE))
19 |   expect_equal(
20 |     str_detect_any(c(".", "-"), coll("."), negate = TRUE),
21 |     c(FALSE, TRUE)
22 |   )
23 |   expect_equal(str_detect_any(c(".", "-"), c(".", ":")), c(TRUE, TRUE))
24 |   expect_equal(str_detect_any(c(".", "-"), coll(c(".", ":"))), c(TRUE, FALSE))
25 |   expect_error(
26 |     str_detect_all("quick brown fox", boundary()),
27 |     "cannot handle.+pattern.+of type.+boundary"
28 |   )
29 |   expect_error(
30 |     str_detect_any("quick brown fox", boundary()),
31 |     "cannot handle.+pattern.+of type.+boundary"
32 |   )
33 |   expect_equal(
34 |     str_detect_any(c("xyzabc", "abcxyz"), c(".b", "^x")),
35 |     c(TRUE, TRUE)
36 |   )
37 |   expect_equal(
38 |     str_detect_all(c("xyzabc", "abcxyz"), c(".b", "^x")),
39 |     c(TRUE, FALSE)
40 |   )
41 |   expect_equal(str_detect_all("xyzabc", c("a", "c", "z")), TRUE)
42 | })
43 | 


--------------------------------------------------------------------------------
/tests/testthat/test-elem.R:
--------------------------------------------------------------------------------
 1 | test_that("str_elem() works", {
 2 |   expect_equal(str_elem(c("abcd", "xyz"), 3), c("c", "z"))
 3 |   expect_equal(str_elem("abcd", -2), "c")
 4 |   expect_equal(str_elem("abcd", 3), "c")
 5 |   expect_snapshot_error(str_elem(c("a", "b"), 1:3))
 6 | })
 7 | 
 8 | test_that("str_elems() works", {
 9 |   string <- c("abc", "def", "ghi", "vwxyz")
10 |   ans <- matrix(
11 |     c(
12 |       "a", "b",
13 |       "d", "e",
14 |       "g", "h",
15 |       "v", "w"
16 |     ),
17 |     ncol = 2, byrow = TRUE
18 |   )
19 |   expect_equal(str_elems(string, 1:2), ans)
20 |   expect_equal(str_elems(string, 1:2, byrow = FALSE), t(ans))
21 |   expect_equal(
22 |     str_elems(string, c(1, 2, 3, 4, -1)),
23 |     matrix(
24 |       c(
25 |         "a", "b", "c", "", "c",
26 |         "d", "e", "f", "", "f",
27 |         "g", "h", "i", "", "i",
28 |         "v", "w", "x", "y", "z"
29 |       ),
30 |       nrow = length(string), byrow = TRUE
31 |     )
32 |   )
33 |   expect_equal(str_elems(character(), 1:3), matrix(character(), ncol = 3))
34 |   expect_equal(
35 |     str_elems(character(), 1:3, byrow = FALSE),
36 |     t(matrix(character(), ncol = 3))
37 |   )
38 | })
39 | 
40 | test_that("str_paste_elems() works", {
41 |   string <- c("abc", "def", "ghi", "vwxyz")
42 |   expect_equal(str_paste_elems(string, 1:2), c("ab", "de", "gh", "vw"))
43 |   expect_equal(
44 |     str_paste_elems(string, c(1, 2, 3, 4, -1)),
45 |     c("abcc", "deff", "ghii", "vwxyz")
46 |   )
47 |   expect_equal(str_paste_elems("abc", c(1, 5, 55, 43, 3)), "ac")
48 |   expect_equal(str_paste_elems(character(), c(1, 5, 55, 43, 3)), character())
49 | })
50 | 


--------------------------------------------------------------------------------
/tests/testthat/test-extract-non-numerics.R:
--------------------------------------------------------------------------------
  1 | test_that("str_extract_non_numerics() works", {
  2 |   expect_equal(
  3 |     str_extract_non_numerics("--123abc456", negs = TRUE),
  4 |     list(c("-", "abc"))
  5 |   )
  6 |   expect_equal(str_first_non_numeric("--123abc456"), "--")
  7 |   expect_equal(str_last_non_numeric("--123abc456"), "abc")
  8 |   expect_equal(str_nth_non_numeric("--123abc456", -2), "--")
  9 |   expect_snapshot_error(str_extract_non_numerics("a.23", leading_decimals = T))
 10 |   expect_equal(str_first_non_numeric("1"), NA_character_)
 11 |   expect_equal(str_last_non_numeric(c("abc", "def")), c("abc", "def"))
 12 |   expect_equal(
 13 |     str_nth_non_numeric(c("ab12bd23", "wx56yz89"), c(3, -1)),
 14 |     c(NA, "yz")
 15 |   )
 16 |   strings <- c(
 17 |     "abc123def456", "abc-0.12def.345", "abc.12e4def34.5e9",
 18 |     "abc1,100def1,230.5", "abc1,100e3,215def4e1,000"
 19 |   )
 20 |   expect_equal(
 21 |     str_extract_non_numerics(strings),
 22 |     list(c("abc", "def"), c("abc-", ".", "def."), c(
 23 |       "abc.", "e",
 24 |       "def", ".", "e"
 25 |     ), c("abc", ",", "def", ",", "."), c(
 26 |       "abc", ",",
 27 |       "e", ",", "def", "e", ","
 28 |     ))
 29 |   )
 30 |   expect_equal(
 31 |     str_extract_non_numerics(strings,
 32 |       decimals = TRUE, leading_decimals = FALSE
 33 |     ),
 34 |     list(c("abc", "def"), c("abc-", "def."), c(
 35 |       "abc.", "e", "def",
 36 |       "e"
 37 |     ), c("abc", ",", "def", ","), c(
 38 |       "abc", ",", "e", ",", "def",
 39 |       "e", ","
 40 |     ))
 41 |   )
 42 |   expect_equal(
 43 |     str_extract_non_numerics(strings, decimals = TRUE),
 44 |     list(c("abc", "def"), c("abc-", "def"), c(
 45 |       "abc", "e", "def",
 46 |       "e"
 47 |     ), c("abc", ",", "def", ","), c(
 48 |       "abc", ",", "e", ",", "def",
 49 |       "e", ","
 50 |     ))
 51 |   )
 52 |   expect_equal(
 53 |     str_extract_non_numerics(strings, big_mark = ","),
 54 |     list(c("abc", "def"), c("abc-", ".", "def."), c(
 55 |       "abc.", "e",
 56 |       "def", ".", "e"
 57 |     ), c("abc", "def", "."), c(
 58 |       "abc", "e", "def",
 59 |       "e"
 60 |     ))
 61 |   )
 62 |   expect_equal(str_extract_non_numerics(strings,
 63 |     decimals = TRUE, leading_decimals = TRUE,
 64 |     sci = TRUE
 65 |   ), list(c("abc", "def"), c("abc-", "def"), c("abc", "def"), c(
 66 |     "abc",
 67 |     ",", "def", ","
 68 |   ), c("abc", ",", ",", "def", ",")))
 69 |   expect_equal(str_extract_non_numerics(strings,
 70 |     decimals = TRUE, leading_decimals = TRUE,
 71 |     sci = TRUE, big_mark = ",", negs = TRUE
 72 |   ), list(c("abc", "def"), c("abc", "def"), c("abc", "def"), c(
 73 |     "abc",
 74 |     "def"
 75 |   ), c("abc", "def")))
 76 |   expect_equal(
 77 |     str_nth_non_numeric(strings, n = 2),
 78 |     c("def", ".", "e", ",", ",")
 79 |   )
 80 |   expect_equal(
 81 |     str_nth_non_numeric(strings, n = -2, decimals = TRUE),
 82 |     c("abc", "abc-", "def", "def", "e")
 83 |   )
 84 |   expect_equal(str_first_non_numeric(strings,
 85 |     decimals = TRUE,
 86 |     leading_decimals = FALSE
 87 |   ), c("abc", "abc-", "abc.", "abc", "abc"))
 88 |   expect_equal(
 89 |     str_last_non_numeric(strings, big_mark = ","),
 90 |     c("def", "def.", "e", ".", "e")
 91 |   )
 92 |   expect_equal(str_nth_non_numeric(strings,
 93 |     n = 1, decimals = TRUE, leading_decimals = TRUE,
 94 |     sci = TRUE
 95 |   ), c("abc", "abc-", "abc", "abc", "abc"))
 96 |   expect_equal(str_first_non_numeric(strings,
 97 |     decimals = TRUE, leading_decimals = TRUE,
 98 |     sci = TRUE, big_mark = ",", negs = TRUE
 99 |   ), c("abc", "abc", "abc", "abc", "abc"))
100 |   expect_equal(
101 |     suppressWarnings(str_extract_non_numerics("abc25.25.25def",
102 |       decimals = TRUE
103 |     )),
104 |     list(NA_character_)
105 |   )
106 |   expect_equal(
107 |     suppressWarnings(str_last_non_numeric("abc25.25.25def",
108 |       decimals = TRUE
109 |     )),
110 |     NA_character_
111 |   )
112 |   expect_equal(str_extract_non_numerics(character()), list())
113 |   expect_equal(str_last_non_numeric(character()), character())
114 | })
115 | 


--------------------------------------------------------------------------------
/tests/testthat/test-give-ext.R:
--------------------------------------------------------------------------------
1 | test_that("`str_give_ext()` works", {
2 |   expect_equal(str_give_ext("abc.csv", "csv"), "abc.csv")
3 |   expect_equal(str_give_ext("abc", "csv"), "abc.csv")
4 |   expect_equal(str_give_ext("abc.csv", "pdf"), "abc.csv.pdf")
5 |   expect_equal(str_give_ext("abc.csv", "pdf", replace = TRUE), "abc.pdf")
6 |   expect_equal(str_give_ext(character(), "pdf"), character())
7 | })
8 | 


--------------------------------------------------------------------------------
/tests/testthat/test-locate.R:
--------------------------------------------------------------------------------
 1 | test_that("`str_locate_nth()` works", {
 2 |   expect_equal(
 3 |     str_locate_first(c("abcdabcxyz", "abcabc"), "abc"),
 4 |     matrix(c(1, 3), nrow = 2, ncol = 2, byrow = TRUE) %>%
 5 |       magrittr::set_colnames(c("start", "end"))
 6 |   )
 7 |   expect_equal(
 8 |     str_locate_last(c("abcdabcxyz", "abcabc"), "abc"),
 9 |     matrix(c(5, 7, 4, 6), nrow = 2, ncol = 2, byrow = TRUE) %>%
10 |       magrittr::set_colnames(c("start", "end"))
11 |   )
12 |   expect_equal(
13 |     str_locate_nth("abc1def2abc", "abc", 3),
14 |     matrix(NA_integer_, ncol = 2, nrow = 1) %>%
15 |       magrittr::set_colnames(c("start", "end"))
16 |   )
17 |   expect_equal(
18 |     str_locate_nth(
19 |       c(
20 |         "This old thing.",
21 |         "That beautiful thing there."
22 |       ),
23 |       "\\w+", c(2, -2)
24 |     ),
25 |     matrix(c(
26 |       6, 8,
27 |       16, 20
28 |     ), ncol = 2, byrow = 2) %>%
29 |       magrittr::set_colnames(c("start", "end"))
30 |   )
31 |   expect_snapshot_error(str_locate_first(c("a", "b"), c("c", "d", "e")))
32 |   expect_snapshot_error(str_locate_nth(c("a", "b"), c("a", "b"), 1:5))
33 |   expect_equal(
34 |     str_locate_nth("abc", "b", c(0, 1, 1, 2)),
35 |     matrix(c(rep(NA, 2), rep(2, 4), rep(NA, 2)),
36 |       ncol = 2, byrow = TRUE
37 |     ) %>%
38 |       magrittr::set_colnames(c("start", "end"))
39 |   )
40 |   expect_equal(
41 |     str_locate_nth(character(0), "b", 4),
42 |     matrix(character(0), ncol = 2) %>%
43 |       magrittr::set_colnames(c("start", "end"))
44 |   )
45 | })
46 | 
47 | test_that("str_locate_braces() works", {
48 |   string <- c("a{](kkj)})", "ab(]c{}")
49 |   out <- str_locate_braces(string)
50 |   expect_equal(
51 |     as.data.frame(out),
52 |     data.frame(
53 |       string_num = as.integer(rep(1:2, c(6, 4))),
54 |       string = rep(string, c(6, 4)),
55 |       position = as.integer(c(
56 |         2, 3, 4, 8, 9, 10,
57 |         3, 4, 6, 7
58 |       )),
59 |       brace = c(
60 |         "{", "]", "(", ")", "}", ")", "(",
61 |         "]", "{", "}"
62 |       ),
63 |       stringsAsFactors = FALSE
64 |     )
65 |   )
66 |   expect_equal(
67 |     as.data.frame(str_locate_braces(character())),
68 |     as.data.frame(out[0, ])
69 |   )
70 | })
71 | 


--------------------------------------------------------------------------------
/tests/testthat/test-num-after.R:
--------------------------------------------------------------------------------
 1 | test_that("`nth_number_after_mth()` works", {
 2 |   string <- c(
 3 |     "abc1abc2abc3abc4abc5abc6abc7abc8abc9",
 4 |     "abc1def2ghi3abc4def5ghi6abc7def8ghi9"
 5 |   )
 6 |   expect_equal(str_nth_number_after_mth(string, "abc", 1, 3), c(3, 7))
 7 |   expect_equal(str_nth_number_after_mth(string, "abc", 2, 3), c(4, 8))
 8 |   expect_equal(str_nth_number_after_first(string, "abc", 2), c(2, 2))
 9 |   expect_equal(str_nth_number_after_last(string, "abc", -1), c(9, 9))
10 |   expect_equal(str_first_number_after_mth(string, "abc", 2), c(2, 4))
11 |   expect_equal(str_last_number_after_mth(string, "abc", 1), c(9, 9))
12 |   expect_equal(str_first_number_after_first(string, "abc"), c(1, 1))
13 |   expect_equal(str_first_number_after_last(string, "abc"), c(9, 7))
14 |   expect_equal(str_last_number_after_first(string, "abc"), c(9, 9))
15 |   expect_equal(str_last_number_after_last(string, "abc"), c(9, 9))
16 |   expect_equal(str_last_number_after_last(character(), "abc"), numeric())
17 |   expect_equal(
18 |     str_last_number_after_last(character(), "abc",
19 |       leave_as_string = TRUE
20 |     ),
21 |     character()
22 |   )
23 |   expect_snapshot_error(str_nth_number_after_mth("abc", "123", 1:2, 1:3))
24 | })
25 | 


--------------------------------------------------------------------------------
/tests/testthat/test-num-before.R:
--------------------------------------------------------------------------------
 1 | test_that("`nth_number_before_mth()` works", {
 2 |   string <- c(
 3 |     "abc1abc2abc3abc4def5abc6abc7abc8abc9",
 4 |     "abc1def2ghi3abc4def5ghi6abc7def8ghi9"
 5 |   )
 6 |   expect_equal(str_nth_number_before_mth(string, "def", 1, 1), c(1, 1))
 7 |   expect_equal(str_nth_number_before_mth(string, "abc", 2, 3), c(2, 2))
 8 |   expect_equal(str_nth_number_before_first(string, "def", 2), c(2, NA))
 9 |   expect_equal(str_nth_number_before_last(string, "def", -1), c(4, 7))
10 |   expect_equal(str_first_number_before_mth(string, "abc", 2), c(1, 1))
11 |   expect_equal(str_last_number_before_mth(string, "def", 1), c(4, 1))
12 |   expect_equal(str_first_number_before_first(string, "def"), c(1, 1))
13 |   expect_equal(str_first_number_before_last(string, "def"), c(1, 1))
14 |   expect_equal(str_last_number_before_first(string, "def"), c(4, 1))
15 |   expect_equal(str_last_number_before_last(string, "def"), c(4, 7))
16 |   expect_equal(str_first_number_before_last(character(), "def"), numeric())
17 | })
18 | 


--------------------------------------------------------------------------------
/tests/testthat/test-remove.R:
--------------------------------------------------------------------------------
1 | test_that("`str_remove_quoted()` works", {
2 |   string <- "\"abc\"67a\'dk\'f"
3 |   expect_equal(str_remove_quoted(string), "67af")
4 |   expect_equal(str_remove_quoted(character()), character())
5 | })
6 | 


--------------------------------------------------------------------------------
/tests/testthat/test-singleize.R:
--------------------------------------------------------------------------------
 1 | test_that("`str_singleize()` works", {
 2 |   expect_equal(str_singleize("abc//def", "/"), "abc/def")
 3 |   expect_equal(str_singleize("abababcabab", "ab"), "abcab")
 4 |   expect_equal(str_singleize(c("abab", "cdcd"), "cd"), c("abab", "cd"))
 5 |   expect_equal(
 6 |     str_singleize(c("abab", "cdcd"), c("ab", "cd")),
 7 |     c("ab", "cd")
 8 |   )
 9 |   expect_equal(str_singleize(character(), "abc"), character())
10 | })
11 | 


--------------------------------------------------------------------------------
/tests/testthat/test-split-by-nums.R:
--------------------------------------------------------------------------------
 1 | test_that("str_split_by_numbers works", {
 2 |   expect_equal(
 3 |     str_split_by_numbers(c("abc123def456.789gh", "a1b2c344")),
 4 |     list(
 5 |       c("abc", "123", "def", "456", ".", "789", "gh"),
 6 |       c("a", 1, "b", 2, "c", 344)
 7 |     )
 8 |   )
 9 |   expect_equal(
10 |     str_split_by_numbers("abc123def456.789gh", decimals = TRUE),
11 |     list(c("abc", "123", "def", "456.789", "gh"))
12 |   )
13 |   expect_equal(str_split_by_numbers("22"), list("22"))
14 |   expect_equal(
15 |     suppressWarnings(str_split_by_numbers("abc25.25.25def", decimals = TRUE)),
16 |     list(NA_character_)
17 |   )
18 |   expect_equal(str_split_by_numbers(character()), list())
19 | })
20 | 


--------------------------------------------------------------------------------
/tests/testthat/test-to-vec.R:
--------------------------------------------------------------------------------
1 | test_that("str_to_vec works", {
2 |   expect_equal(str_to_vec("abcdef"), c("a", "b", "c", "d", "e", "f"))
3 |   expect_equal(str_to_vec(character()), character())
4 | })
5 | 


--------------------------------------------------------------------------------
/tests/testthat/test-trim.R:
--------------------------------------------------------------------------------
 1 | test_that("str_trim_anything works", {
 2 |   expect_equal(str_trim_anything("..abcd.", coll("."), "left"), "abcd.")
 3 |   expect_equal(
 4 |     str_trim_anything("..abcd.", coll("."), "left"),
 5 |     str_trim_anything("..abcd.", fixed("."), "left")
 6 |   )
 7 |   expect_equal(
 8 |     str_trim_anything("..abcd.", coll("."), "Ri"),
 9 |     str_trim_anything("..abcd.", coll("."), "r")
10 |   )
11 |   expect_equal(
12 |     str_trim_anything(c("abcx", "abcy"), c("x", "y")),
13 |     rep("abc", 2)
14 |   )
15 |   expect_equal(
16 |     str_trim_anything(c("abcx", "abcy"), coll(c("x", "y"))),
17 |     rep("abc", 2)
18 |   )
19 |   expect_equal(
20 |     str_trim_anything(c("abcx", "abcy"), fixed(c("x", "y"))),
21 |     rep("abc", 2)
22 |   )
23 |   expect_equal(str_trim_anything("..abcd.", ".", "left"), "")
24 |   expect_equal(str_trim_anything("-ghi--", "-"), "ghi")
25 |   expect_equal(str_trim_anything("-ghi--", "--"), "-ghi")
26 |   expect_equal(str_trim_anything("-ghi--", "--", "right"), "-ghi")
27 |   expect_equal(str_trim_anything(character(), "a"), character())
28 |   expect_equal(str_trim_anything("-ghi--", "i-+"), "-gh")
29 |   expect_equal(str_trim_anything("-ghi--", "-"), "ghi")
30 |   expect_equal(str_trim_anything(c("-ghi--", "xx"), "-"), c("ghi", "xx"))
31 |   expect_equal(str_trim_anything(c("-ghi--", "xx"), "(-)+"), c("ghi", "xx"))
32 |   expect_equal(
33 |     str_trim_anything(c("tttattt", "ttatt", "tat"), "t"),
34 |     rep("a", 3)
35 |   )
36 |   expect_error(
37 |     str_trim_anything("x", boundary("word")),
38 |     "Function cannot handle a `pattern` of type 'boundary'.",
39 |     fixed = TRUE
40 |   )
41 |   expect_error(
42 |     str_trim_anything(c("a", "b"), c("a", "^a")),
43 |     "don.+start.+reg.+ex.+with.+\\^.+Element 2.+\\^a"
44 |   )
45 |   expect_error(
46 |     str_trim_anything(c("a", "b"), c("a", "a$")),
47 |     "don.+end.+reg.+ex.+with.+\\$.+Element 2.+a\\$"
48 |   )
49 | })
50 | 


--------------------------------------------------------------------------------
/tests/testthat/test-utils.R:
--------------------------------------------------------------------------------
 1 | test_that("`*_list_nth_elems()` error correctly", {
 2 |   expect_error(
 3 |     chr_lst_nth_elems(list("a", "b"), 1:3),
 4 |     str_c(
 5 |       "If both.+chr_lst.+n.+lengths greater than 1.+",
 6 |       "then.+their lengths must be equal.+",
 7 |       "chr_lst.+length 2.+n.+length 3."
 8 |     )
 9 |   )
10 |   expect_error(
11 |     dbl_lst_nth_elems(list(1, 2), 1:3),
12 |     str_c(
13 |       "If both.+dbl_lst.+n.+lengths greater than 1.+",
14 |       "then.+lengths must be equal.+",
15 |       "dbl_lst.+length 2.+n.+length 3."
16 |     )
17 |   )
18 | })
19 | 
20 | test_that("assert_lst_elems_common_length() works", {
21 |   lst <- list(1)
22 |   expect_true(assert_lst_elems_common_length(lst))
23 |   lst <- list(1, 1:2)
24 |   expect_error(
25 |     assert_lst_elems_common_length(lst),
26 |     "Elements.+do not have a common length"
27 |   )
28 | })
29 | 
30 | test_that("verify_string_pattern() edge cases are OK", {
31 |   expect_true(verify_string_pattern("a", boundary()))
32 | })
33 | 


--------------------------------------------------------------------------------
/vignettes/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | *.R
3 | 


--------------------------------------------------------------------------------
/vignettes/alphordering-numbers.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Alphordering Numbers"
 3 | date: "`r Sys.Date()`"
 4 | output: rmarkdown::html_vignette
 5 | vignette: >
 6 |   %\VignetteIndexEntry{Alphordering Numbers}
 7 |   %\VignetteEngine{knitr::rmarkdown}
 8 |   %\VignetteEncoding{UTF-8}
 9 | ---
10 | 
11 | ```{r setup, include = FALSE}
12 | knitr::opts_chunk$set(
13 |   collapse = TRUE,
14 |   comment = "#>"
15 | )
16 | set.seed(1)
17 | library(magrittr)
18 | poorly_ordered <- expand.grid(98:103, 9:11) %>%
19 |   apply(1, function(x) paste0("patient", x[1], "-day", x[2], ".png")) %>%
20 |   sample(size = length(.))
21 | ```
22 | 
23 | ## Numbers don't comply with alphabetical order
24 | 
25 | `poorly_ordered` is a vector of file names of images of patient samples from different days.
26 | 
27 | ```{r poorly-ordered}
28 | poorly_ordered
29 | ```
30 | 
31 | How do we get this vector into order? Well, alphabetical order doesn't work:
32 | 
33 | ```{r sort-attempt}
34 | sort(poorly_ordered)
35 | ```
36 | 
37 | Patient 100 comes before patient 99. This is because 1 comes before 9 in alphabetical order.
38 | 
39 | 
40 | ## Alphordering numbers
41 | 
42 | It's possible to _alphord_ the numbers by prefixing them with zeroes:
43 | 
44 | ```{r alphordering}
45 | strex::str_alphord_nums(poorly_ordered)
46 | ```
47 | 
48 | Having done this, the alphabetical order is the one we want:
49 | 
50 | ```{r good-sort}
51 | sort(strex::str_alphord_nums(poorly_ordered))
52 | ```
53 | 


--------------------------------------------------------------------------------
/vignettes/argument-matching.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Argument Matching"
  3 | date: "`r Sys.Date()`"
  4 | output: rmarkdown::html_vignette
  5 | vignette: >
  6 |   %\VignetteIndexEntry{Argument Matching}
  7 |   %\VignetteEngine{knitr::rmarkdown}
  8 |   %\VignetteEncoding{UTF-8}
  9 | ---
 10 | 
 11 | ```{r setup, include = FALSE}
 12 | knitr::opts_chunk$set(
 13 |   collapse = TRUE,
 14 |   comment = "#>"
 15 | )
 16 | ```
 17 | 
 18 | 
 19 | # Ignoring case
 20 | 
 21 | 
 22 | ## `base`
 23 | 
 24 | The base function `match.arg()` is good, but it doesn't offer the possiblity to ignore case during argument matching. Sometimes it's good to ignore case; for example, if you're matching the arguments `c("yes", "no")`, there's no need to worry about case. 
 25 | 
 26 | ```{r base-ignore-case, error=TRUE}
 27 | base::match.arg("Y", c("yes", "no"))
 28 | ```
 29 | 
 30 | 
 31 | ## `strex`
 32 | 
 33 | The default behaviour of `strex::match_arg()` is to observe case, but case ignorance can be turned on with `ignore_case = TRUE`.
 34 |     
 35 | ```{r strex-ignore-case, error=TRUE}
 36 | strex::match_arg("Y", c("yes", "no"))
 37 | strex::match_arg("Y", c("yes", "no"), ignore_case = TRUE)
 38 | ```
 39 | 
 40 | 
 41 | # Error Messages
 42 | 
 43 | You can begin to see above that the error message from `strex::match_arg()` are more informative and nicely formatted. Here are a few more examples.
 44 | 
 45 | 
 46 | ## No matches 
 47 | 
 48 | ```{r no-matches, error=TRUE}
 49 | choices <- c("Apples", "Pears", "Bananas", "Oranges", "Avocados", "Apricots")
 50 | match.arg("Q", choices)
 51 | strex::match_arg("Q", choices)
 52 | ```
 53 | 
 54 | 
 55 | ## Multiple matches
 56 | 
 57 | ```{r multiple-matches, error=TRUE}
 58 | match.arg("A", choices)
 59 | strex::match_arg("A", choices)
 60 | ```
 61 | 
 62 | 
 63 | ## Wrong `arg` length
 64 | 
 65 | ```{r arg-too-long, error=TRUE}
 66 | match.arg(c("A", "a"), choices)
 67 | strex::match_arg(c("A", "a"), choices)
 68 | ```
 69 | 
 70 | 
 71 | ## Duplicate elements in `choices`
 72 | ```{r choices-duplicate, error=TRUE}
 73 | choices <- c(choices, "Pears")
 74 | match.arg("P", choices)
 75 | strex::match_arg("P", choices)
 76 | ```
 77 | 
 78 | 
 79 | ## Not specifying `choices`
 80 | 
 81 | It's OK not to specify choices in one circumstance: when `arg` is passed as a default argument of another function.
 82 | 
 83 | ```{r NULL-choices}
 84 | myword <- function(w = c("abacus", "baseball", "candy")) {
 85 |   w <- strex::match_arg(w)
 86 |   w
 87 | }
 88 | myword()
 89 | myword("b")
 90 | myword("c")
 91 | ```
 92 | 
 93 | This is very strict though, only the _symbol_ for the default argument can be passed, not any variant of it, not even something which evaluates to the same thing.
 94 | 
 95 | ```{r NULL-choices-errors, error=TRUE}
 96 | myword <- function(w = c("abacus", "baseball", "candy")) {
 97 |   w <- strex::match_arg(identity(w))
 98 |   w
 99 | }
100 | myword("b")
101 | myword <- function(w = c("abacus", "baseball", "candy")) {
102 |   w <- strex::match_arg(as.character(w))
103 |   w
104 | }
105 | myword("b")
106 | ```
107 | 


--------------------------------------------------------------------------------
/vignettes/before-and-after.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Before and After"
 3 | date: "`r Sys.Date()`"
 4 | output: rmarkdown::html_vignette
 5 | vignette: >
 6 |   %\VignetteIndexEntry{Before and After}
 7 |   %\VignetteEngine{knitr::rmarkdown}
 8 |   %\VignetteEncoding{UTF-8}
 9 | ---
10 | 
11 | ```{r setup, include = FALSE}
12 | knitr::opts_chunk$set(
13 |   collapse = TRUE,
14 |   comment = "#>"
15 | )
16 | ```
17 | 
18 | Often, we want the part of a string that comes before or after a given pattern. 
19 | 
20 | ```{r load}
21 | library(strex)
22 | ```
23 | 
24 | 
25 | ## Before
26 | 
27 | `str_before_nth()` gives you the part of a string before the `n`^th^ appearance of a pattern. It has the friends `str_before_first()` and `str_before_last()`.
28 | 
29 | ```{r before}
30 | string <- "ab..cd..de..fg..h"
31 | str_before_first(string, "e")
32 | str_before_nth(string, "\\.", 3)
33 | str_before_last(string, "\\.")
34 | str_before_nth(string, ".", -3)
35 | str_before_nth(rep(string, 2), fixed("."), -3)
36 | ```
37 | 
38 | 
39 | ## After
40 | 
41 | `str_after_nth()` gives you the part of a string after the `n`^th^ appearance of a pattern. It has the friends `str_after_first()` and `str_after_last()`.
42 | 
43 | ```{r after}
44 | string <- "ab..cd..de..fg..h"
45 | str_after_first(string, "e")
46 | str_after_nth(string, "\\.", 3)
47 | str_after_last(string, "\\.")
48 | str_after_nth(string, ".", -3)
49 | str_after_nth(rep(string, 2), fixed("."), -3)
50 | ```
51 | 
52 | 
53 | ## A more concrete example
54 | 
55 | ```{r james-harry}
56 | string <- "James did the cooking, Harry did the cleaning."
57 | ```
58 | 
59 | Let's write a function to figure out which task each of the lads did.
60 | 
61 | ```{r get-task}
62 | library(magrittr)
63 | get_task <- function(string, name) {
64 |   str_c(name, " did the ") %>%
65 |     str_after_first(string, .) %>%
66 |     str_before_first("[\\.,]")
67 | }
68 | get_task(string, "James")
69 | get_task(string, "Harry")
70 | ```
71 | 
72 | `get_task()` would have been more difficult to write without `str_after_first()` and `str_before_first()`.
73 | 


--------------------------------------------------------------------------------
/vignettes/detection.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "And/Or Detection"
 3 | output: rmarkdown::html_vignette
 4 | vignette: >
 5 |   %\VignetteIndexEntry{And/Or Detection}
 6 |   %\VignetteEngine{knitr::rmarkdown}
 7 |   %\VignetteEncoding{UTF-8}
 8 | ---
 9 | 
10 | ```{r, include = FALSE}
11 | knitr::opts_chunk$set(
12 |   collapse = TRUE,
13 |   comment = "#>"
14 | )
15 | ```
16 | 
17 | ```{r setup}
18 | library(strex)
19 | ```
20 | 
21 | ## How it works
22 | 
23 | `strex` offers easy and/or versions of `stringr::str_detect()` via `str_detect_all()` and `str_detect_any()`. These are vectorized over `string` but not `pattern`. `stringr::fixed()` and `stringr::coll())` are handled correctly. Otherwise, `stringr` regular expressions are used. For `str_detect_all()`, a pattern argument `c("x", "y")` is converted to `"(?=.*x)(?=.*y)"`. For `str_detect_any()`, a pattern argument `c("x", "y")` is converted to `"x|y"`. 
24 | 
25 | ## Examples
26 | 
27 | ```{r examples}
28 | str_detect_all("quick brown fox", c("x", "y", "z"))
29 | str_detect_all(c(".", "-"), ".")
30 | str_detect_all(c(".", "-"), coll("."))
31 | str_detect_all(c(".", "-"), coll("."), negate = TRUE)
32 | str_detect_all(c(".", "-"), c(".", ":"))
33 | str_detect_all(c(".", "-"), coll(c(".", ":")))
34 | str_detect_all("xyzabc", c("a", "c", "z"))
35 | str_detect_all(c("xyzabc", "abcxyz"), c(".b", "^x"))
36 | str_detect_any("quick brown fox", c("x", "y", "z"))
37 | str_detect_any(c(".", "-"), ".")
38 | str_detect_any(c(".", "-"), coll("."))
39 | str_detect_any(c(".", "-"), coll("."), negate = TRUE)
40 | str_detect_any(c(".", "-"), c(".", ":"))
41 | str_detect_any(c(".", "-"), coll(c(".", ":")))
42 | str_detect_any(c("xyzabc", "abcxyz"), c(".b", "^x"))
43 | ```
44 | 
45 | ## Performance
46 | 
47 | Unless you're doing a huge amount of computation, it won't matter, but FWIW, it's faster to convert to regex using `str_escape()` rather than using `coll()`.
48 | 
49 | ```{r performance}
50 | bench::mark(
51 |   str_detect_all(rep("*", 1000), rep(str_escape("*"), 555)),
52 |   str_detect_all(rep("*", 1000), coll(rep("*", 555))),
53 |   min_iterations = 100
54 | )
55 | ```
56 | 


--------------------------------------------------------------------------------
/vignettes/important-miscellany.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Important Miscellany"
  3 | date: "`r Sys.Date()`"
  4 | output: rmarkdown::html_vignette
  5 | vignette: >
  6 |   %\VignetteIndexEntry{Important Miscellany}
  7 |   %\VignetteEngine{knitr::rmarkdown}
  8 |   %\VignetteEncoding{UTF-8}
  9 | ---
 10 | 
 11 | ```{r setup, include = FALSE}
 12 | knitr::opts_chunk$set(
 13 |   collapse = TRUE,
 14 |   comment = "#>"
 15 | )
 16 | ```
 17 | 
 18 | 
 19 | ## The Importance of this miscellany
 20 | 
 21 | The features of `strex` that were deemed the _most_ interesting have been given their own vignettes. However, the package was intended as a miscellany of useful functions, so the functions demonstrated here encapsulate the spirit of this package, i.e. functions that save R string manipulators time.
 22 | 
 23 | ```{r load}
 24 | library(strex)
 25 | ```
 26 | 
 27 | 
 28 | ## Could this be numeric?
 29 | 
 30 | Sometimes you don't want to know whether something is numeric, just whether or not it could be. Now you can find out with `str_can_be_numeric()`.
 31 | 
 32 | ```{r can-be-numeric}
 33 | str_can_be_numeric(c("1a", "abc", "5", "2e7", "seven"))
 34 | ```
 35 | 
 36 | 
 37 | ## Currency
 38 | 
 39 | To get currencies and amounts mentioned in strings, there are `str_extract_currencies()` and `str_nth_currency()`, `str_first_currency()` and `str_last_currency()`. `str_first_currency()` just returns the first currency amount. `str_last_currency()` returns the last. `str_nth_currency()` allows you to get the second, third and so on. `str_extract_currencies()` returns all currency amounts mentioned in a string. 
 40 | 
 41 | ```{r, error=TRUE}
 42 | string <- c("Alan paid £5", "Joe paid $7")
 43 | str_first_currency(string)
 44 | string <- c("€1 is $1.17", "£1 is $1.29")
 45 | str_nth_currency(string, n = c(1, 2))
 46 | str_last_currency(string) # only gets the first mentioned
 47 | str_extract_currencies(string)
 48 | ```
 49 | 
 50 | 
 51 | ## Extract a single element of a string
 52 | 
 53 | This is a simple wrapper around `stringr::str_sub()`.
 54 | 
 55 | ```{r str-elem}
 56 | string <- "abcdefg"
 57 | str_sub(string, 3, 3)
 58 | str_elem(string, 3) # simpler and more exressive
 59 | ```
 60 | 
 61 | 
 62 | ## Extract numbers and non-numeric elements
 63 | 
 64 | ```{r extract-num-non-num}
 65 | string <- c("aa1bbb2ccc3", "xyz7ayc8jzk99elephant")
 66 | str_extract_numbers(string)
 67 | str_extract_non_numerics(string)
 68 | ```
 69 | 
 70 | 
 71 | ## Split a string by its numbers
 72 | 
 73 | ```{r split-by-numbers}
 74 | string <- c("aa1bbb2ccc3", "xyz7ayc8jzk99elephant")
 75 | str_split_by_numbers(string)
 76 | ```
 77 | 
 78 | 
 79 | ## Force a file name to have an extension
 80 | 
 81 | We can give files a given extension, leaving them alone if they already have it.
 82 | ```{r giv-ext}
 83 | string <- c("spreadsheet1.csv", "spreadsheet2")
 84 | str_give_ext(string, "csv")
 85 | ```
 86 | 
 87 | If the file already has an extension, we can append one or replace it.
 88 | 
 89 | ```{r give-ext-replace}
 90 | str_give_ext(string, "xls") # append
 91 | str_give_ext(string, "csv", replace = TRUE) # replace
 92 | ```
 93 | 
 94 | 
 95 | ## Strip away a file extension
 96 | 
 97 | ```{r before-last-dot}
 98 | string <- c("spreadsheet1.csv", "spreadsheet2")
 99 | str_before_last_dot(string)
100 | ```
101 | 
102 | 
103 | ## Remove quoted bits from a string
104 | 
105 | ```{r str-remove-quoted}
106 | string <- "I hate having these \"quotes\" in the middle of my strings."
107 | cat(string)
108 | str_remove_quoted(string)
109 | ```
110 | 
111 | 
112 | ## Split camel case
113 | 
114 | I'm not mad on CamelCase, I often want to deconstruct it.
115 | 
116 | ```{r camel}
117 | string <- c("CamelVar1", c("CamelVar2"))
118 | str_split_camel_case(string)
119 | ```
120 | 
121 | 
122 | ## Convert a string to a vector
123 | 
124 | This is something I did a lot to avoid using regular expression. Don't do it for that purpose. Learn regex. https://regexone.com/ is a very good start.
125 | 
126 | ```{r to-vec}
127 | string <- "R is good."
128 | str_to_vec(string)
129 | ```
130 | 
131 | 
132 | ## Trim anything, not just whitespace
133 | 
134 | What if something is needlessly surrounded by parentheses and we want to get rid of them?
135 | 
136 | ```{r trim-anything}
137 | string <- "(((Why all the parentheses?)))"
138 | string %>%
139 |   str_trim_anything(coll("("), side = "left") %>%
140 |   str_trim_anything(coll(")"), side = "r")
141 | ```
142 | 
143 | ## Remove duplicated bits of strings
144 | 
145 | ```{r singleize}
146 | string <- c("I often write the word *my* twice in a row in my my sentences.")
147 | str_singleize(string, " my")
148 | ```
149 | 
150 | 


--------------------------------------------------------------------------------
/vignettes/numbers-in-strings.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Numbers Within Strings"
  3 | date: "`r Sys.Date()`"
  4 | output: rmarkdown::html_vignette
  5 | vignette: >
  6 |   %\VignetteIndexEntry{Numbers Within Strings}
  7 |   %\VignetteEngine{knitr::rmarkdown}
  8 |   %\VignetteEncoding{UTF-8}
  9 | ---
 10 | 
 11 | ```{r setup, include = FALSE}
 12 | knitr::opts_chunk$set(
 13 |   collapse = TRUE,
 14 |   comment = "#>",
 15 |   tidy = ifelse(utils::packageVersion("knitr") >= "1.20.15", "styler", TRUE)
 16 | )
 17 | library(stringr)
 18 | ```
 19 | 
 20 | 
 21 | # A common way to encode numerical data
 22 | 
 23 | It's common for a lot of numerical information to be encoded in strings, particularly in file names. Consider a series of microscope images of cells from different patients detailing the patient number, the cell number and the number of hours after biopsy that the image was taken. They might be named like:
 24 | 
 25 | ```{r make-img_names, echo=FALSE}
 26 | img_names <- expand.grid(1:2, 1:3, c(0, 2.5)) %>%
 27 |   apply(1, function(x) {
 28 |     str_c("patient", x[1], "-cell", x[2], "-", x[3], "hours-after-biopsy.tif")
 29 |   }) %>%
 30 |   sort()
 31 | ```
 32 | ```{r print-img_names}
 33 | img_names
 34 | ```
 35 | 
 36 | 
 37 | # All of the numbers
 38 | 
 39 | For some crude reason, you might just want all of the numbers:
 40 | 
 41 | ```{r extract-all-numbers}
 42 | library(strex)
 43 | str_extract_numbers(img_names)
 44 | ```
 45 | 
 46 | It seems to have missed the fact that 2.5 is a number and not two numbers 2 and 5. This is because the default is `decimals = FALSE`. To recognise decimals, set `decimals = TRUE`. Also, note that there is an option to recognise scientific notation. More on that below.
 47 | 
 48 | ```{r extract-all-numbers-use-decimals}
 49 | str_extract_numbers(img_names, decimals = TRUE)
 50 | ```
 51 | 
 52 | It's also possible to extract the non-numeric parts of the strings:
 53 | 
 54 | ```{r extract-non-numbers}
 55 | str_extract_non_numerics(img_names, decimals = TRUE)
 56 | ```
 57 | 
 58 | 
 59 | # Extract specific numbers
 60 | 
 61 | What if we just want the cell number from each image?
 62 | 
 63 | ## The `n`^th^ number
 64 | 
 65 | We know the cell number is always the second number, so we can use the `str_nth_number()` function with `n = 2`.
 66 | 
 67 | ```{r nth-number-n2}
 68 | str_nth_number(img_names, n = 2)
 69 | ```
 70 | 
 71 | 
 72 | ## Numbers after patterns
 73 | 
 74 | To be more specific, you could say the cell number is the first number after the first instance of the word "cell". To go this route, `strex` provides `str_nth_number_after_mth()` which gives the `n`^th^ number after the `m`^th^ appearance of a given pattern:
 75 | 
 76 | ```{r nth-number-after-mth}
 77 | str_nth_number_after_mth(img_names, "cell", n = 1, m = 1)
 78 | ```
 79 | 
 80 | There's also a convenient wrapper for getting the first number after the first appearance of a pattern:
 81 | 
 82 | ```{r first-number-after-first}
 83 | str_first_number_after_first(img_names, "cell")
 84 | ```
 85 | 
 86 | 
 87 | ## Numbers before patterns
 88 | 
 89 | Now what if we want the number of hours after biopsy for each image? Looking at the image file names, we'd need the last number _before_ the first occurrence of the word "biopsy".
 90 | 
 91 | ```{r las-number-before-first}
 92 | str_last_number_before_first(img_names, "biopsy", decimals = TRUE)
 93 | ```
 94 | 
 95 | 
 96 | ## Tidy number extraction
 97 | 
 98 | To extract all of this information tidily, use a data frame:
 99 | 
100 | ```{r dataframe}
101 | data.frame(img_names,
102 |   patient = str_first_number_after_first(img_names, "patient"),
103 |   cell = str_first_number_after_first(img_names, "cell"),
104 |   hrs_after_biop = str_last_number_before_first(img_names, "biop",
105 |     decimals = TRUE
106 |   )
107 | )
108 | ```
109 | 
110 | 
111 | ## Other number formats
112 | 
113 | `strex` can also deal with numbers in scientific and comma notation.
114 | 
115 | ```{r scicom}
116 | string <- c("$1,000", "$1e6")
117 | str_first_number(string, big_mark = ",", sci = TRUE)
118 | ```
119 | 
120 | It can even do underscore notation or space notation, or both at once:
121 | 
122 | ```{r underscore-or-space}
123 | string <- c("1_000", "1 000", "1_000 000", "1 000_000")
124 | str_first_number(string, big_mark = "_ ")
125 | ```
126 | 
127 | # All of the number functions
128 | 
129 | There are a whole host of functions for extracting numbers from strings in the `strex` package:
130 | 
131 | ```{r all-number-functions}
132 | str_subset(ls("package:strex"), "number")
133 | ```
134 | 
135 | 
136 | # Regular expression
137 | 
138 | Of course, all of the above is possible with regular expression using `stringr`, it's just more difficult and less expressive:
139 | 
140 | ```{r regex}
141 | data.frame(img_names,
142 |   patient = str_match(img_names, "patient(\\d+)")[, 2],
143 |   cell = str_match(img_names, "cell(\\d+)")[, 2],
144 |   hrs_after_biop = str_match(img_names, "(\\d*\\.*\\d+)hour")[, 2]
145 | )
146 | ```
147 | 


--------------------------------------------------------------------------------