├── .Rbuildignore
├── .github
    ├── .gitignore
    └── workflows
    │   ├── R-CMD-check.yaml
    │   ├── pkgdown.yaml
    │   └── test-coverage.yaml
├── .gitignore
├── .lintr
├── DESCRIPTION
├── LICENSE
├── NAMESPACE
├── NEWS.md
├── R
    ├── aaa.R
    ├── capture.R
    ├── character_class.R
    ├── counts.R
    ├── escape.R
    ├── lookarounds.R
    ├── match.R
    ├── or.R
    ├── rex-mode.R
    ├── rex.R
    ├── shortcuts.R
    ├── utils.R
    ├── wildcards.R
    └── zzz.R
├── README.md
├── _pkgdown.yml
├── codecov.yml
├── cran-comments.md
├── man
    ├── as.regex.Rd
    ├── capture.Rd
    ├── character_class.Rd
    ├── character_class_escape.Rd
    ├── counts.Rd
    ├── escape.Rd
    ├── group.Rd
    ├── lookarounds.Rd
    ├── not.Rd
    ├── or.Rd
    ├── re_matches.Rd
    ├── re_substitutes.Rd
    ├── regex.Rd
    ├── register_shortcuts.Rd
    ├── rex.Rd
    ├── rex_mode.Rd
    ├── shortcuts.Rd
    ├── single_shortcuts.Rd
    └── wildcards.Rd
├── revdep
    ├── README.md
    ├── check.R
    ├── checks.rds
    ├── failures.md
    └── problems.md
├── rex.Rproj
├── tests
    ├── testthat.R
    └── testthat
    │   ├── test-aaa.R
    │   ├── test-capture.R
    │   ├── test-character_class.R
    │   ├── test-common.R
    │   ├── test-counts.R
    │   ├── test-escape.R
    │   ├── test-lookarounds.R
    │   ├── test-match.R
    │   ├── test-or.R
    │   ├── test-print.R
    │   ├── test-rex.R
    │   ├── test-rex_mode.R
    │   ├── test-shortcuts.R
    │   └── test-wildcards.R
└── vignettes
    ├── NASA.txt
    ├── log_parsing.Rmd
    ├── stackoverflow.R
    └── url_parsing.Rmd


/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | ^.*\.Rproj$
 2 | ^\.Rproj\.user$
 3 | ^\.travis\.yml$
 4 | ^travis-tool\.sh$
 5 | ^cran-comments\.md$
 6 | ^.*\.gz$
 7 | ^\.lintr$
 8 | ^vignettes/stackoverflow\.md$
 9 | ^revdep$
10 | ^CRAN-RELEASE$
11 | ^\.github$
12 | ^codecov\.yml$
13 | ^_pkgdown\.yml$
14 | ^docs$
15 | ^pkgdown$
16 | 


--------------------------------------------------------------------------------
/.github/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | 


--------------------------------------------------------------------------------
/.github/workflows/R-CMD-check.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   push:
 5 |     branches: [main, master]
 6 |   pull_request:
 7 |     branches: [main, master]
 8 | 
 9 | name: R-CMD-check
10 | 
11 | jobs:
12 |   R-CMD-check:
13 |     runs-on: ${{ matrix.config.os }}
14 | 
15 |     name: ${{ matrix.config.os }} (${{ matrix.config.r }})
16 | 
17 |     strategy:
18 |       fail-fast: false
19 |       matrix:
20 |         config:
21 |           - {os: macOS-latest,   r: 'release'}
22 |           - {os: windows-latest, r: 'release'}
23 |           - {os: ubuntu-latest,   r: 'devel', http-user-agent: 'release'}
24 |           - {os: ubuntu-latest,   r: 'release'}
25 |           - {os: ubuntu-latest,   r: 'oldrel-1'}
26 | 
27 |     env:
28 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
29 |       R_KEEP_PKG_SOURCE: yes
30 | 
31 |     steps:
32 |       - uses: actions/checkout@v2
33 | 
34 |       - uses: r-lib/actions/setup-pandoc@v2
35 | 
36 |       - uses: r-lib/actions/setup-r@v2
37 |         with:
38 |           r-version: ${{ matrix.config.r }}
39 |           http-user-agent: ${{ matrix.config.http-user-agent }}
40 |           use-public-rspm: true
41 | 
42 |       - uses: r-lib/actions/setup-r-dependencies@v2
43 |         with:
44 |           extra-packages: any::rcmdcheck
45 |           needs: check
46 | 
47 |       - uses: r-lib/actions/check-r-package@v2
48 | 


--------------------------------------------------------------------------------
/.github/workflows/pkgdown.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   push:
 5 |     branches: [main, master]
 6 |   pull_request:
 7 |     branches: [main, master]
 8 |   release:
 9 |     types: [published]
10 |   workflow_dispatch:
11 | 
12 | name: pkgdown
13 | 
14 | jobs:
15 |   pkgdown:
16 |     runs-on: ubuntu-latest
17 |     env:
18 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
19 |     steps:
20 |       - uses: actions/checkout@v2
21 | 
22 |       - uses: r-lib/actions/setup-pandoc@v2
23 | 
24 |       - uses: r-lib/actions/setup-r@v2
25 |         with:
26 |           use-public-rspm: true
27 | 
28 |       - uses: r-lib/actions/setup-r-dependencies@v2
29 |         with:
30 |           extra-packages: any::pkgdown, local::.
31 |           needs: website
32 | 
33 |       - name: Build site
34 |         run: Rscript -e 'pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE)'
35 | 
36 |       - name: Deploy to GitHub pages 🚀
37 |         if: github.event_name != 'pull_request'
38 |         uses: JamesIves/github-pages-deploy-action@4.1.4
39 |         with:
40 |           branch: gh-pages
41 |           folder: docs
42 | 


--------------------------------------------------------------------------------
/.github/workflows/test-coverage.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   push:
 5 |     branches: [main, master]
 6 |   pull_request:
 7 |     branches: [main, master]
 8 | 
 9 | name: test-coverage
10 | 
11 | jobs:
12 |   test-coverage:
13 |     runs-on: ubuntu-latest
14 |     env:
15 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
16 | 
17 |     steps:
18 |       - uses: actions/checkout@v2
19 | 
20 |       - uses: r-lib/actions/setup-r@v2
21 |         with:
22 |           use-public-rspm: true
23 | 
24 |       - uses: r-lib/actions/setup-r-dependencies@v2
25 |         with:
26 |           extra-packages: any::covr
27 |           needs: coverage
28 | 
29 |       - name: Test coverage
30 |         run: covr::codecov()
31 |         shell: Rscript {0}
32 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | inst/doc
3 | .Rhistory
4 | revdep/checks
5 | docs
6 | 


--------------------------------------------------------------------------------
/.lintr:
--------------------------------------------------------------------------------
1 | linters: modify_defaults(line_length_linter(120))
2 | exclusions: c("inst/doc/url_parsing.R", "inst/doc/log_parsing.R")
3 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Type: Package
 2 | Package: rex
 3 | Title: Friendly Regular Expressions
 4 | Version: 1.2.1.9000
 5 | Authors@R: c(
 6 |     person("Kevin", "Ushey", , "kevinushey@gmail.com", role = c("aut", "cre")),
 7 |     person("Jim", "Hester", , "james.f.hester@gmail.com", role = "aut"),
 8 |     person("Robert", "Krzyzanowski", , "rkrzyzanowski@gmail.com", role = "aut")
 9 |   )
10 | Description: A friendly interface for the construction of regular
11 |     expressions.
12 | License: MIT + file LICENSE
13 | URL: https://rex.r-lib.org, https://github.com/r-lib/rex
14 | BugReports: https://github.com/r-lib/rex/issues
15 | Suggests:
16 |     covr,
17 |     dplyr,
18 |     ggplot2,
19 |     Hmisc,
20 |     knitr,
21 |     magrittr,
22 |     rmarkdown,
23 |     roxygen2,
24 |     rvest,
25 |     stringr,
26 |     testthat
27 | VignetteBuilder: 
28 |     knitr
29 | Encoding: UTF-8
30 | RoxygenNote: 7.1.2
31 | Collate:
32 |     'aaa.R'
33 |     'utils.R'
34 |     'escape.R'
35 |     'capture.R'
36 |     'character_class.R'
37 |     'counts.R'
38 |     'lookarounds.R'
39 |     'match.R'
40 |     'or.R'
41 |     'rex-mode.R'
42 |     'rex.R'
43 |     'shortcuts.R'
44 |     'wildcards.R'
45 |     'zzz.R'
46 | Config/Needs/website: r-lib/pkgdown, tidyverse/tidytemplate
47 | Imports: 
48 |     withr
49 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | YEAR: 2014-2016
2 | COPYRIGHT HOLDER: James Hester and Kevin Ushey
3 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | S3method(as.character,regex)
 4 | S3method(as.regex,default)
 5 | S3method(c,regex)
 6 | S3method(character_class_escape,character)
 7 | S3method(character_class_escape,character_class)
 8 | S3method(character_class_escape,default)
 9 | S3method(character_class_escape,list)
10 | S3method(character_class_escape,regex)
11 | S3method(escape,character)
12 | S3method(escape,character_class)
13 | S3method(escape,default)
14 | S3method(escape,list)
15 | S3method(escape,regex)
16 | S3method(print,regex)
17 | export(as.regex)
18 | export(character_class)
19 | export(character_class_escape)
20 | export(escape)
21 | export(m)
22 | export(matches)
23 | export(re_matches)
24 | export(re_substitutes)
25 | export(regex)
26 | export(register_shortcuts)
27 | export(rex)
28 | export(rex_)
29 | export(rex_mode)
30 | export(s)
31 | export(shortcuts)
32 | export(substitutes)
33 | 


--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
 1 | ## Rex (development version)
 2 | 
 3 | * Rex no longer changes the state of the random number generator when attached.
 4 | 
 5 | ## Rex Version 1.2.1 ##
 6 | 
 7 | * Kevin Ushey is now the maintainer
 8 | 
 9 | ## Rex Version 1.2.0 ##
10 | 
11 | * `%>%` is no longer imported and then re-exported from rex
12 | 
13 | ## Rex Version 1.1.2 ##
14 | 
15 | * Updating tests to work with testthat version 1.0.2.9000.
16 | 
17 | * Add `m`, `matches` and `s`, `substitutes` aliases for `re_matches` and
18 |   `re_substitutes`.
19 | 
20 | ## Rex Version 1.1.1 ##
21 | 
22 | * Vignette tweak for ggplot2 2.0.0
23 | * Only print startup message some of the time.
24 | * Move register for magrittr pipe to `.onLoad()`
25 | 
26 | ## Rex Version 1.0.1 ##
27 | 
28 | * Work around ggplot2 bug with windows fonts
29 | 
30 | ## Rex Version 1.0.0 ##
31 | 
32 | * Include the capture results even if `locations = TRUE`
33 | * Add `:` operator for character ranges
34 | * Remove duplicate regex functino
35 | * Don't re-compute missing names
36 | * Reduce code duplication
37 | * Add examples for lookarounds
38 | 
39 | ## Rex Version 0.2.0 ##
40 | 
41 | ### Enhancements
42 | 
43 | * Add a newline shortcut
44 | * add register_shortcuts to allow use of rex in external packages without
45 |   spurious NOTES.
46 | 
47 | ## Rex Version 0.1.1 ##
48 | 
49 | ### Enhancements
50 | 
51 | * re_matches now has a "locations" argument, which returns the start and end
52 |   locations of the match or capture(s).
53 | * Simplify regular expressions generated from 'some_of' functions.
54 | 
55 | ### Bug fixes
56 | 
57 | * backslashes ("\\") are now properly escaped.
58 | 
59 | ### Misc
60 | 
61 | * Improve Rex mode documentation (#21 @Ironholds)
62 | * Improve Log parsing Vignette copy and Title (#18, #20 @Ironholds)
63 | * Add links to GitHub and issues page in DESCRIPTION
64 | 
65 | ## Rex Version 0.1.0 ##
66 | 
67 | Initial release
68 | 


--------------------------------------------------------------------------------
/R/aaa.R:
--------------------------------------------------------------------------------
 1 | .rex <- new.env(parent = emptyenv())
 2 | .rex$env <- new.env(parent = emptyenv())
 3 | .rex$mode <- FALSE
 4 | 
 5 | register <- function(...) {
 6 |   names <- gsub("`", "", as.character(eval(substitute(alist(...)))), fixed = TRUE)
 7 | 
 8 |   list2env(structure(list(...), .Names = names), envir = .rex$env)
 9 | }
10 | 
11 | register_object <- function(object) {
12 |   list2env(as.list(object), envir = .rex$env)
13 | }
14 | 


--------------------------------------------------------------------------------
/R/capture.R:
--------------------------------------------------------------------------------
 1 | #' @include escape.R
 2 | #' @include utils.R
 3 | NULL
 4 | 
 5 | #' Create a capture group
 6 | #'
 7 | #' Used to save the matched value within the group for use later in the regular
 8 | #' expression or to extract the values captured.  Both named and unnamed groups
 9 | #' can later be referenced using \code{\link{capture_group}}.
10 | #'
11 | #' @param name of the group.  Unnamed capture groups are numbers starting at 1
12 | #' in the order they appear in the regular expression.  If two groups have the
13 | #' same name, the leftmost group is the used in any reference.
14 | #' @param ... \code{\link{shortcuts}}, R variables, text, or other \pkg{rex}
15 | #' functions.
16 | #' @family rex
17 | #' @aliases .
18 | #' @seealso \code{\link{group}} for grouping without capturing.  Perl 5 Capture
19 | #' Groups \url{https://perldoc.perl.org/perlre#Capture-groups}
20 | #' @examples
21 | #'
22 | #' # Match paired quotation marks
23 | #' re <- rex(
24 | #'   # first quotation mark
25 | #'   capture(quotes),
26 | #'
27 | #'   # match all non-matching quotation marks
28 | #'   zero_or_more(except(capture_group(1))),
29 | #'
30 | #'   # end quotation mark (matches first)
31 | #'   capture_group(1)
32 | #' )
33 | #'
34 | #' #named capture - don't match apples to oranges
35 | #' re <- rex(
36 | #'   capture(name = "fruit", or("apple", "orange")),
37 | #'   "=",
38 | #'   capture_group("fruit")
39 | #' )
40 | capture <- . <- function(..., name = NULL) {
41 |   if(!is.null(name)) {
42 |     name <- paste0("?<", name, ">")
43 |   }
44 |   p( "(", name, p(escape_dots(...)), ")" )
45 | }
46 | register(capture, .)
47 | 
48 | #' @rdname capture
49 | capture_group <- function(name) {
50 |   p( "\\g{", name, "}" )
51 | }
52 | register(capture_group)
53 | 
54 | #' Create a grouped expression
55 | #'
56 | #' This is similar to \code{\link{capture}} except that it does not store the
57 | #' value of the group.  Best used when you want to combine several parts
58 | #' together and do not reference or extract the grouped value later.
59 | #' @inheritParams capture
60 | #' @seealso \code{\link{capture}} for grouping with capturing.  Perl 5 Extended
61 | #' Patterns \url{https://perldoc.perl.org/perlre#Extended-Patterns}
62 | #' @family rex
63 | group <- function(...) {
64 |   p( "(?:", p(escape_dots(...)), ")" )
65 | }
66 | register(group)
67 | 


--------------------------------------------------------------------------------
/R/character_class.R:
--------------------------------------------------------------------------------
 1 | #' @include utils.R
 2 | #' @include escape.R
 3 | NULL
 4 | 
 5 | #' Create character classes
 6 | #'
 7 | #' There are multiple ways you can define a character class.
 8 | 
 9 | #' @inheritParams capture
10 | #' @param start beginning of character class
11 | #' @param end end of character class
12 | #' @param x text to include in the character class (must be escaped manually)
13 | #' @inheritParams wildcards
14 | #' @export
15 | #' @family rex
16 | #' @examples
17 | #' # grey = gray
18 | #' re <- rex("gr", one_of("a", "e"), "y")
19 | #' grepl(re, c("grey", "gray")) # TRUE TRUE
20 | #'
21 | #' # Match non-vowels
22 | #' re <- rex(none_of("a", "e", "i", "o", "u"))
23 | #' # They can also be in the same string
24 | #' re <- rex(none_of("aeiou"))
25 | #' grepl(re, c("k", "l", "e")) # TRUE TRUE FALSE
26 | #'
27 | #' # Match range
28 | #' re <- rex(range("a", "e"))
29 | #' grepl(re, c("b", "d", "f")) # TRUE TRUE FALSE
30 | #'
31 | #' # Explicit creation
32 | #' re <- rex(character_class("abcd\\["))
33 | #' grepl(re, c("a", "d", "[", "]")) # TRUE TRUE TRUE FALSE
34 | #' @describeIn character_class explicitly define a character class
35 | character_class <- function(x) structure(x, class = c("character_class", "regex"))
36 | 
37 | #' @describeIn character_class matches one of the specified characters.
38 | one_of <- function(...) {
39 |   p( "[", p(character_class_escape_dots(...)), "]" )
40 | }
41 | register(one_of)
42 | 
43 | #' @describeIn character_class matches zero or more of the specified characters.
44 | any_of <- function(..., type = c("greedy", "lazy", "possessive")) {
45 |   add_type(p(one_of(...), "*"), type)
46 | }
47 | register(any_of)
48 | 
49 | #' @describeIn character_class matches one or more of the specified characters.
50 | some_of <- function(..., type = c("greedy", "lazy", "possessive")) {
51 |   add_type(p(one_of(...), "+"), type)
52 | }
53 | register(some_of)
54 | 
55 | #' @describeIn character_class matches anything but one of the specified characters.
56 | #' @aliases except
57 | none_of <- except <- function(...) {
58 |   p( "[^", p(character_class_escape_dots(...)), "]" )
59 | }
60 | register(none_of, except)
61 | 
62 | #' @describeIn character_class matches zero or more of anything but the specified characters.
63 | except_any_of <- function(..., type = c("greedy", "lazy", "possessive")) {
64 |   add_type(p(none_of(...), "*"), type)
65 | }
66 | register(except_any_of)
67 | 
68 | #' @describeIn character_class matches one or more of anything but the specified characters.
69 | except_some_of <- function(..., type = c("greedy", "lazy", "possessive")) {
70 |   add_type(p(none_of(...), "+"), type)
71 | }
72 | register(except_some_of)
73 | 
74 | #' @describeIn character_class matches one of any of the characters in the range.
75 | range <- function(start, end) {
76 |   character_class(p(character_class_escape(start), "-", character_class_escape(end)))
77 | }
78 | register(range)
79 | 
80 | #' @describeIn character_class matches one of any of the characters in the range.
81 | `:` <- function(start, end) {
82 |   if (all(is.character(start), is.character(end))) {
83 |     range(start, end)
84 |   } else {
85 |     .Primitive(":")(start, end)
86 |   }
87 | }
88 | register(`:`)
89 | 
90 | #' @describeIn character_class matches one of any of the characters except those in the range.
91 | exclude_range <- function(start, end) {
92 |   character_class(p("^", character_class_escape(start), "-", character_class_escape(end)))
93 | }
94 | register(exclude_range)
95 | 


--------------------------------------------------------------------------------
/R/counts.R:
--------------------------------------------------------------------------------
 1 | #' @include escape.R
 2 | #' @include utils.R
 3 | NULL
 4 | 
 5 | #' Counts
 6 | #'
 7 | #' Functions to restrict a regex to a specific number
 8 | #' @param x A regex pattern.
 9 | #' @param n An integer number
10 | #' @param low An integer number for the lower limit.
11 | #' @param high An integer number for the upper limit.
12 | #' @inheritParams zero_or_more
13 | #' @family rex
14 | #' @name counts
15 | NULL
16 | 
17 | #' @aliases n
18 | #' @describeIn counts \code{x} must occur exactly \code{n} times.
19 | n_times <- n <- function(x, n, type = c("greedy", "lazy", "possessive")) {
20 |   add_type(p("(?:", p(escape(x)), "){", n, "}"), type)
21 | }
22 | register(n_times, n)
23 | 
24 | #' @describeIn counts \code{x} must occur between \code{low} and \code{high} times.
25 | between <- function(x, low, high, type = c("greedy", "lazy", "possessive")) {
26 |   add_type(p("(?:", p(escape(x)), "){", low, ",", high, "}"), type)
27 | }
28 | register(between)
29 | 
30 | #' @describeIn counts \code{x} must occur at least \code{n} times.
31 | at_least <- function(x, n, type = c("greedy", "lazy", "possessive")) {
32 |   add_type(between(x, n, ""), type)
33 | }
34 | register(at_least)
35 | 
36 | #' @describeIn counts \code{x} must occur at most \code{n} times.
37 | at_most <- function(x, n, type = c("greedy", "lazy", "possessive")) {
38 |   add_type(between(x, 0, n), type)
39 | }
40 | register(at_most)
41 | 


--------------------------------------------------------------------------------
/R/escape.R:
--------------------------------------------------------------------------------
 1 | #' @include utils.R
 2 | NULL
 3 | 
 4 | #' Escape characters for a regex
 5 | #'
 6 | #' @param x Object to escape.
 7 | #' @export
 8 | escape <- function(x) UseMethod("escape")
 9 | 
10 | #' @describeIn escape Objects are simply passed through unchanged.
11 | #' @export
12 | escape.regex <- function(x) x
13 | 
14 | #' @describeIn escape Objects are surrounded by braces.
15 | #' @export
16 | escape.character_class <- function(x) {
17 |   p("[", x, "]")
18 | }
19 | 
20 | #' @describeIn escape Objects are properly escaped for regular expressions.
21 | #' @export
22 | escape.character <- function(x) {
23 |   chars <- c("*", ".", "?", "^", "+", "$", "|", "(", ")", "[", "]", "{", "}", "\\")
24 |   regex(sanitize(x, chars))
25 | }
26 | 
27 | #' @describeIn escape default escape coerces to character and escapes.
28 | #' @export
29 | escape.default <- function(x) {
30 |   escape.character(as.character(x))
31 | }
32 | 
33 | #' @describeIn escape simply call escape on all elements of the list.
34 | #' @export
35 | escape.list <- function(x) {
36 |   lapply(x, escape)
37 | }
38 | 
39 | escape_dots <- function(...) {
40 |   unlist(escape(eval(list(...))))
41 | }
42 | 
43 | #' Character class escapes
44 | #' @inheritParams escape
45 | #' @export
46 | character_class_escape <- function(x) UseMethod("character_class_escape")
47 | 
48 | #' @describeIn character_class_escape objects are passed through unchanged.
49 | #' @export
50 | character_class_escape.regex <- function(x) x
51 | 
52 | #' @describeIn character_class_escape objects are passed through unchanged.
53 | #' @export
54 | character_class_escape.character_class <- character_class_escape.regex
55 | 
56 | #' @describeIn character_class_escape objects properly escaped for character classes.
57 | #' @export
58 | character_class_escape.character <- function(x) {
59 |   regex(sanitize(x, c("-", "^", "[", "]", "\\")))
60 | }
61 | 
62 | #' @describeIn character_class_escape call \code{character_class_escape} on all elements of the list.
63 | #' @export
64 | character_class_escape.list <- function(x) {
65 |   lapply(x, character_class_escape)
66 | }
67 | 
68 | #' @describeIn character_class_escape coerce to \code{character} and \code{character_class_escape}.
69 | #' @export
70 | character_class_escape.default <- function(x) {
71 |   character_class_escape.character(as.character(x))
72 | }
73 | 
74 | character_class_escape_dots <- function(...) {
75 |   unlist(character_class_escape(eval(list(...))))
76 | }
77 | 
78 | sanitize <- function(x, chars) {
79 |   gsub(paste0("([\\", paste0(collapse = "\\", chars), "])"), "\\\\\\1", x, perl = TRUE)
80 | }
81 | 


--------------------------------------------------------------------------------
/R/lookarounds.R:
--------------------------------------------------------------------------------
 1 | #' @include escape.R
 2 | #' @include utils.R
 3 | NULL
 4 | 
 5 | #' Lookarounds
 6 | #'
 7 | #' These functions provide an interface to perl lookarounds.
 8 | #'
 9 | #' Special binary functions are used to infer an ordering, since often you
10 | #' might wish to match a word / set of characters conditional on the start
11 | #' and end of that word.
12 | #'
13 | #' \itemize{
14 | #'   \item \code{\%if_next_is\%}: \code{TRUE} if x follows y
15 | #'   \item \code{\%if_next_isnt\%}: \code{TRUE} if x does not follow y
16 | #'   \item \code{\%if_prev_is\%}: \code{TRUE} if y comes before x
17 | #'   \item \code{\%if_prev_isnt\%}: \code{TRUE} if y does not come before x
18 | #' }
19 | #' @param x A regex pattern.
20 | #' @param y A regex pattern.
21 | #' @name lookarounds
22 | #' @title Lookarounds
23 | #' @family rex
24 | #' @seealso Perl 5 Documentation \url{https://perldoc.perl.org/perlre#Extended-Patterns}
25 | #' @examples
26 | #' stopifnot(grepl(rex("crab" %if_next_is% "apple"), "crabapple", perl = TRUE))
27 | #' stopifnot(grepl(rex("crab" %if_prev_is% "apple"), "applecrab", perl = TRUE))
28 | #' stopifnot(grepl(rex(range("a", "e") %if_next_isnt% range("f", "g")),
29 | #'   "ah", perl = TRUE))
30 | #' stopifnot(grepl(rex(range("a", "e") %if_next_is% range("f", "i")),
31 | #'   "ah", perl = TRUE))
32 | NULL
33 | 
34 | #' @rdname lookarounds
35 | `%if_next_is%` <- function(x, y) {
36 |   p("(?:", escape(x), "(?=", escape(y), ")", ")")
37 | }
38 | register(`%if_next_is%`)
39 | 
40 | #' @rdname lookarounds
41 | `%if_next_isnt%` <- function(x, y) {
42 |   p("(?:", escape(x), "(?!", escape(y), ")", ")")
43 | }
44 | register(`%if_next_isnt%`)
45 | 
46 | #' @rdname lookarounds
47 | `%if_prev_is%` <- function(x, y) {
48 |   p("(?:", "(?<=", escape(y), ")", escape(x), ")")
49 | }
50 | register(`%if_prev_is%`)
51 | 
52 | #' @rdname lookarounds
53 | `%if_prev_isnt%` <- function(x, y) {
54 |   p("(?:", "(?<!", escape(y), ")", escape(x), ")")
55 | }
56 | register(`%if_prev_isnt%`)
57 | 


--------------------------------------------------------------------------------
/R/match.R:
--------------------------------------------------------------------------------
  1 | #' Match function
  2 | #'
  3 | #' @param data character vector to match against
  4 | #' @param pattern regular expression to use for matching
  5 | #' @param global use global matching
  6 | #' @param options regular expression options
  7 | #' @param locations rather than returning the values of the matched (or
  8 | #' captured) string, return a \code{data.frame} of the match locations in the
  9 | #' string.
 10 | #' @param ... options passed to regexpr or gregexpr
 11 | #' @return if no captures, returns a logical vector the same length as the
 12 | #' input character vector specifying if the relevant value matched or not.  If
 13 | #' there are captures in the regular expression, returns a \code{data.frame} with a
 14 | #' column for each capture group.  If \code{global} is \code{TRUE}, returns a
 15 | #' list of \code{data.frame}s.
 16 | #' @seealso \code{\link{regexp}} Section "Perl-like Regular Expressions" for a
 17 | #' discussion of the supported options
 18 | #' @examples
 19 | #' string <- c("this is a", "test string")
 20 | #' re_matches(string, rex("test")) # FALSE FALSE
 21 | #'
 22 | #' # named capture
 23 | #' re_matches(string, rex(capture(alphas, name = "first_word"), space,
 24 | #'   capture(alphas, name = "second_word")))
 25 | #' #   first_word second_word
 26 | #' # 1       this          is
 27 | #' # 2       test      string
 28 | #'
 29 | #' # capture returns NA when it fails to match
 30 | #' re_matches(string, rex(capture("test")))
 31 | #' #      1
 32 | #' # 1 test
 33 | #' # 2 <NA>
 34 | #' @aliases matches m
 35 | #' @export re_matches matches m
 36 | re_matches <- matches <- m <- function(data, pattern, global = FALSE, options = NULL, locations = FALSE, ...) {
 37 | 
 38 |   pattern <- add_options(pattern, options)
 39 | 
 40 |   process_matches <- function(match, string) {
 41 | 
 42 |     if(no_capture(match)) {
 43 | 
 44 |       # if no capture and no location just return if the regex matched
 45 |       if(!locations) {
 46 |         return(match != -1L)
 47 |       }
 48 | 
 49 |       # else return a data frame of the start and end locations
 50 |       match[ match == -1L ] <- NA_integer_
 51 |       starts <- match
 52 |       attributes(starts) <- NULL
 53 | 
 54 |       lengths <- attr(match, "match.length")
 55 |       ends <- starts + lengths - 1L
 56 | 
 57 |       return(data.frame(start = starts, end = ends))
 58 |     }
 59 | 
 60 |     # if a capture return a data frame with the capture results for each string
 61 |     starts <- attr(match, "capture.start")
 62 |     lengths <- attr(match, "capture.length")
 63 |     ends <- starts + lengths - 1L
 64 | 
 65 |     not_matched <- starts == -1L
 66 | 
 67 |     strings <- substring(string, starts, ends)
 68 | 
 69 |     strings[not_matched] <- NA_character_
 70 | 
 71 |     res <- matrix(ncol = ncol(starts), strings)
 72 | 
 73 |     nms <- auto_name(attr(match, "capture.names"))
 74 | 
 75 |     if (!locations) {
 76 |       colnames(res) <- nms
 77 |       return(as.data.frame(res, stringsAsFactors = FALSE, check.names = FALSE))
 78 |     }
 79 | 
 80 |     starts[not_matched] <- NA_integer_
 81 | 
 82 |     ends[not_matched] <- NA_integer_
 83 | 
 84 |     indexes <- unlist(lapply(seq_len(ncol(res)), function(x) {
 85 |         seq(x, by = ncol(res), length.out = 3)
 86 |     }))
 87 | 
 88 |     full <- data.frame(res, starts, ends, stringsAsFactors = FALSE, check.names = FALSE)[, indexes, drop = FALSE]
 89 |     full_names <- unlist(Map(function(name) c(name, paste(sep=".", name, c("start", "end"))), nms, USE.NAMES = FALSE))
 90 |     colnames(full) <- full_names
 91 | 
 92 |     full
 93 |   }
 94 | 
 95 |   if(global %==% TRUE) {
 96 |     mapply(process_matches, gregexpr(pattern = pattern, data, perl = TRUE, ...), data, SIMPLIFY = FALSE)
 97 |   }
 98 |   else {
 99 |     process_matches(regexpr(pattern = pattern, data, perl = TRUE, ...), data)
100 |   }
101 | }
102 | 
103 | #' Substitute regular expressions in a string with another string.
104 | #'
105 | #' @param data character vector to substitute
106 | #' @param pattern regular expression to match
107 | #' @param replacement replacement text to use
108 | #' @param global substitute all occurrences
109 | #' @param options option flags
110 | #' @param ... options passed to sub or gsub
111 | #' @seealso \code{\link{regexp}} Section "Perl-like Regular Expressions" for a
112 | #' discussion of the supported options
113 | #' @examples
114 | #' string <- c("this is a Test", "string")
115 | #' re_substitutes(string, "test", "not a test", options = "insensitive")
116 | #' re_substitutes(string, "i", "x", global = TRUE)
117 | #' re_substitutes(string, "(test)", "not a \\1", options = "insensitive")
118 | #' @aliases substitutes s
119 | #' @export re_substitutes substitutes s
120 | re_substitutes <- substitutes <- s <- function(data, pattern, replacement, global = FALSE, options = NULL, ...) {
121 |   pattern <- add_options(pattern, options)
122 |   method <- if (isTRUE(global)) gsub else sub
123 |   method(x = data, pattern = pattern, replacement = replacement, perl = TRUE, ...)
124 | }
125 | 
126 | add_options <- function(pattern, options) {
127 |   if (!is.null(options)) {
128 |     options <- match_args(options, names(option_map))
129 |     p("(?", p(option_map[options]), ")", pattern)
130 |   }
131 |   else {
132 |     pattern
133 |   }
134 | }
135 | 
136 | match_args <- function(arg, choices) {
137 |   matches <- pmatch(arg, choices)
138 |   if (anyNA(matches)) {
139 |     stop(gettextf("'arg' should be one of %s", toString(dQuote(choices))), domain = NA)
140 |   }
141 |   choices[matches]
142 | }
143 | 
144 | option_map <- c(
145 |   "insensitive" = "i",
146 |   "multi-line" = "m",
147 |   "single-line" = "s",
148 |   "extended" = "x",
149 |   "ungreedy" = "U"
150 |   )
151 | 
152 | no_capture <- function(match) {
153 |   is.null(attr(match, "capture.start", exact = TRUE))
154 | }
155 | 
156 | auto_name <- function(names) {
157 |   missing <- names == ""
158 |   if (!any(missing)) {
159 |     return(names)
160 |   }
161 |   names[missing] <- seq_along(names)[missing]
162 |   names
163 | }
164 | 


--------------------------------------------------------------------------------
/R/or.R:
--------------------------------------------------------------------------------
 1 | #' @include utils.R
 2 | #' @include escape.R
 3 | NULL
 4 | 
 5 | #' Or
 6 | #'
 7 | #' The special binary function \code{\%or\%} can be used to specify a set
 8 | #' of optional matches.
 9 | #'
10 | #' @rdname or
11 | #' @usage x \%or\% y
12 | #' @param x A string.
13 | #' @param y A string.
14 | #' @family rex
15 | #' @inheritParams capture
16 | `%or%` <- function(x, y) {
17 |   group(p(escape(x)), regex("|"), p(escape(y)))
18 | }
19 | register(`%or%`)
20 | 
21 | #' describeIn or regular function can also be used, useful for more than 2 arguments.
22 | or <- function(...) {
23 |   group(regex(paste0(collapse = "|", unlist(escape_dots(...)))))
24 | }
25 | register(or)
26 | 


--------------------------------------------------------------------------------
/R/rex-mode.R:
--------------------------------------------------------------------------------
 1 | #' Toggles \pkg{rex} mode.
 2 | #'
 3 | #' While within rex mode, functions used within the \code{\link{rex}} function
 4 | #' are attached, so one can get e.g. auto-completion within editors.
 5 | #'
 6 | #' @export
 7 | rex_mode <- function() {
 8 | 
 9 |   ## Enter rex mode
10 |   if (!.rex$mode) {
11 |     .rex$mode <- TRUE
12 |     message("Rex functions and shortcuts attached!")
13 |     ## We know what we're doing, so hide the R CMD check note
14 |     suppressMessages(
15 |       eval(call("attach", call("$", as.name(".rex"), as.name("env"))))
16 |     )
17 |   }
18 | 
19 |   ## Exit rex mode
20 |   else {
21 |     message("Rex functions and shortcuts detached!")
22 |     .rex$mode <- FALSE
23 |     detach(".rex$env")
24 |   }
25 | 
26 | }
27 | 


--------------------------------------------------------------------------------
/R/rex.R:
--------------------------------------------------------------------------------
 1 | #' @include escape.R
 2 | #' @include character_class.R
 3 | #' @include utils.R
 4 | NULL
 5 | 
 6 | #' Generate a regular expression.
 7 | #' @export
 8 | #' @family rex
 9 | #' @param ... \code{\link{shortcuts}}, R variables, text, or other \pkg{rex}
10 | #' functions.
11 | #' @param env environment to evaluate the rex expression in.
12 | #' @aliases rex_
13 | rex <- function(..., env = parent.frame()) {
14 |   args <- as.list(substitute(list(...))[-1])
15 |   rex_(args, env = env)
16 | }
17 | 
18 | #' @export
19 | rex_ <- function(args, env = parent.frame()) {
20 |   args <- Filter(function(x) !identical(x, quote(expr = )), args)
21 | 
22 |   eval_env <- list2env(as.list(.rex$env), parent = env)
23 |   evaled <- lapply(args, eval, envir = eval_env)
24 | 
25 |   p(escape(evaled))
26 | }
27 | 
28 | #' @describeIn regex coerce regex object to a character
29 | #' @export
30 | as.character.regex <- function(x, ...) x
31 | 
32 | #' Coerce objects to a \code{\link{regex}}.
33 | #' @name as.regex
34 | #' @param x Object to coerce to \code{\link{regex}}.
35 | #' @param ... further arguments passed to methods.
36 | #' @export
37 | as.regex <- function(x, ...) UseMethod("as.regex")
38 | 
39 | #' @export
40 | #' @describeIn as.regex Simply escape the Object.
41 | as.regex.default <- function(x, ...) escape(x)
42 | 
43 | #' @export
44 | #' @describeIn regex Print regex object
45 | print.regex <- function(x, ...){
46 |   cat(paste(strwrap(x), collapse = "\n"), "\n", sep = "")
47 | }
48 | 
49 | #' Regular Expression
50 | #'
51 | #' Specify an explicit regular expression.  This expression must already be
52 | #' escaped.
53 | #' @param x Object
54 | #' @param ... further arguments
55 | #' @seealso \code{\link{as.regex}} to coerce to a regex object.
56 | #' @export
57 | regex <- function(x, ...) structure(x, class = "regex")
58 | 
59 | #' Register the Rex shortcuts
60 | #'
61 | #' If you are using rex in another package you need to call this function to
62 | #' register all of the rex shortcuts so that spurious NOTEs about global
63 | #' variables being generated during R CMD check.
64 | #' @param pkg_name the package to register the shortcuts in
65 | #' @export
66 | register_shortcuts <- function(pkg_name) {
67 |   invisible(utils::globalVariables(ls(.rex$env), pkg_name))
68 | }
69 | 
70 | #' @export
71 | c.regex <- function(..., recursive = FALSE) {
72 |   regex(c(unlist(lapply(list(...), unclass))))
73 | }
74 | 


--------------------------------------------------------------------------------
/R/shortcuts.R:
--------------------------------------------------------------------------------
  1 | shortcut <- function(...) {
  2 |   data <- list(...)
  3 |   if (length(data) == 1L) {
  4 |     data <- data[[1L]]
  5 |   }
  6 |   structure(data, class = "shortcut")
  7 | }
  8 | 
  9 | #' Single shortcuts
 10 | #'
 11 | #' Each of these shortcuts has both a plural (-s) and inverse (non_) form.
 12 | single_shortcuts <- shortcut(
 13 | 
 14 |   ## Character class shortcuts
 15 |   alnum = character_class("[:alnum:]"),
 16 |   alpha = character_class("[:alpha:]"),
 17 |   letter = character_class("[:alpha:]"),
 18 |   blank = character_class("[:blank:]"),
 19 |   cntrl = character_class("[:cntrl:]"),
 20 |   digit = character_class("[:digit:]"),
 21 |   number = character_class("[:digit:]"),
 22 |   graph = character_class("[:graph:]"),
 23 |   lower = character_class("[:lower:]"),
 24 |   print = character_class("[:print:]"),
 25 |   punct = character_class("[:punct:]"),
 26 |   space = character_class("[:space:]"),
 27 |   upper = character_class("[:upper:]"),
 28 |   xdigit = character_class("[:xdigit:]"),
 29 |   newline = regex("\\R"),
 30 | 
 31 |   single_quote = character_class("'"),
 32 |   double_quote = character_class("\""),
 33 |   quote = character_class("'\"")
 34 | )
 35 | 
 36 | basic_shortcuts <- shortcut(
 37 | 
 38 |   dot = escape("."),
 39 |   any = any_char <- regex("."),
 40 |   something = regex(".+"),
 41 |   anything = regex(".*"),
 42 | 
 43 |   start = regex("^"),
 44 |   end = regex("$"),
 45 | 
 46 |   boundary = regex("\\b"),
 47 |   non_boundary = regex("\\B")
 48 | )
 49 | 
 50 | inverse <- function(x) {
 51 |   x[] <- lapply(x, function(xx) {
 52 |     val <- paste0("^", xx); class(val) <- class(xx)
 53 |     val
 54 |   })
 55 |   names(x) <- paste0("non_", names(x))
 56 |   x
 57 | }
 58 | 
 59 | plural <- function(x) {
 60 |   x[] <- lapply(x, function(xx) {
 61 |     val <- paste0(escape(xx), "+"); class(val) <- "regex"
 62 |     val
 63 |   })
 64 |   names(x) <- paste0(names(x), "s")
 65 |   x
 66 | }
 67 | 
 68 | multiple <- function(x) {
 69 |   x[] <- lapply(x, function(xx) {
 70 |     val <- paste0(escape(xx), "*"); class(val) <- "regex"
 71 |     val
 72 |   })
 73 |   names(x) <- paste0("any_", names(x), "s")
 74 |   x
 75 | }
 76 | 
 77 | #' Shortcuts
 78 | #'
 79 | #' Commonly used character classes and regular expressions.  These shortcuts
 80 | #' are substituted inside \code{rex} calls.
 81 | #'
 82 | #' \code{names(shortcuts)} will give you the full list of available shortcuts.
 83 | #' @export
 84 | #' @family rex
 85 | 
 86 | shortcuts <- shortcut(c(
 87 |   basic_shortcuts,
 88 |   single_shortcuts,
 89 |   plural(single_shortcuts),
 90 |   multiple(single_shortcuts),
 91 |   inverse(single_shortcuts),
 92 |   plural(inverse(single_shortcuts)),
 93 |   multiple(inverse(single_shortcuts))
 94 | ))
 95 | 
 96 | default_data_format.shortcut <- function(x) {
 97 |   build_rd <- get("build_rd", envir = asNamespace("roxygen2"))
 98 |   rd <- get("rd", envir = asNamespace("roxygen2"))
 99 | 
100 |   build_rd(rd("\\preformatted{"), paste0(names(x), " - ", x, collapse = "\n"), rd("}"))
101 | }
102 | 
103 | register_object(shortcuts)
104 | 


--------------------------------------------------------------------------------
/R/utils.R:
--------------------------------------------------------------------------------
1 | p <- function(...) {
2 |   regex(paste0(collapse = "", ...))
3 | }
4 | 
5 | `%==%` <- function(x, y) {
6 |   identical(x, y)
7 | }
8 | 


--------------------------------------------------------------------------------
/R/wildcards.R:
--------------------------------------------------------------------------------
 1 | #' @include escape.R
 2 | #' @include utils.R
 3 | NULL
 4 | 
 5 | #' Wildcards
 6 | #'
 7 | #' @inheritParams capture
 8 | #' @param type the type of match to perform.
 9 | #'
10 | #' There are three match types
11 | #' \enumerate{
12 | #'   \item \code{greedy}: match the longest string.  This is the default matching type.
13 | #'   \item \code{lazy}: match the shortest string.  This matches the shortest string from the same anchor point, not necessarily the shortest global string.
14 | #'   \item \code{possessive}: match and don't allow backtracking
15 | #' }
16 | #' @family rex
17 | #' @name wildcards
18 | NULL
19 | 
20 | #' @describeIn wildcards match \code{...} zero or more times.
21 | zero_or_more <- function(..., type = c("greedy", "lazy", "possessive")) {
22 |   add_type(p("(?:", p(escape_dots(...)), ")*"), type)
23 | }
24 | register(zero_or_more)
25 | 
26 | #' @describeIn wildcards match \code{...} one or more times.
27 | one_or_more <- function(..., type = c("greedy", "lazy", "possessive")) {
28 |   add_type(p("(?:", p(escape_dots(...)), ")+"), type)
29 | }
30 | register(one_or_more)
31 | 
32 | #' @describeIn wildcards match \code{...} zero or one times.
33 | #' @aliases zero_or_one
34 | maybe <- zero_or_one <- function(..., type = c("greedy", "lazy", "possessive")) {
35 |   p("(?:", p(escape_dots(...)), ")?")
36 | }
37 | register(maybe, zero_or_one)
38 | 
39 | add_type <- function(x, type = c("greedy", "lazy", "possessive")) {
40 |   type <- match.arg(type)
41 | 
42 |   switch(type,
43 |     greedy = x,
44 |     lazy = p(x, "?"),
45 |     possessive = p(x, "+")
46 |   )
47 | }
48 | 
49 | #' Do not match
50 | #'
51 | #' @inheritParams capture
52 | #' @inheritParams zero_or_more
53 | #' @family rex
54 | # This is slightly different than if_next_isn't because we want to match
55 | # anything that is not the search term as well
56 | not <- function(..., type = c("greedy", "lazy", "possessive")) {
57 |   add_type(p("(?:(?!", escape_dots(...), ").)*"), type = type)
58 | }
59 | register(not)
60 | 


--------------------------------------------------------------------------------
/R/zzz.R:
--------------------------------------------------------------------------------
1 | .onAttach <- function(lib, pkg) { # nolint
2 |   withr::with_preserve_seed({
3 |   if (!interactive() || stats::runif(1) > 0.1) return()
4 | 
5 |   packageStartupMessage("Welcome to rex, the friendly regular expression helper!\n",
6 |                         "Use 'rex_mode()' to toggle code completion for rex shortcuts and functions.")
7 |   })
8 | }
9 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Rex
 2 | 
 3 | <!-- badges: start -->
 4 | [![Codecov test coverage](https://codecov.io/gh/kevinushey/rex/branch/master/graph/badge.svg)](https://app.codecov.io/gh/kevinushey/rex?branch=main)
 5 | [![Lifecycle: stable](https://img.shields.io/badge/lifecycle-stable-brightgreen.svg)](https://lifecycle.r-lib.org/articles/stages.html)
 6 | [![R-CMD-check](https://github.com/kevinushey/rex/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/kevinushey/rex/actions/workflows/R-CMD-check.yaml)
 7 | <!-- badges: end -->
 8 | 
 9 | ### Friendly Regular Expressions
10 | 
11 | Regular expressions are very powerful feature, however they are often difficult
12 | to interpret. Rex allows you to build complex regular expressions from human
13 | readable expressions.  So instead of writing (and later trying to decipher)
14 | ```r
15 | r <- "^(?:(((?:[^:])+)://))?((?:(?:(?!:/).)*)+)(?:(:([[:digit:]]+)))?(?:(/.*))?$"
16 | ```
17 | 
18 | You can write
19 | 
20 | ```r
21 | r <- rex(
22 | 
23 |   start,
24 | 
25 |   ## match the protocol -- may exist or may not
26 |   maybe(capture(
27 |       capture(except_some_of(":")),
28 |       "://"
29 |       )),
30 | 
31 |   ## match the path
32 |   capture(one_or_more(not(":/"))),
33 | 
34 |   ## get the port
35 |   maybe(capture(":", capture(numbers))),
36 | 
37 |   ## and the rest
38 |   maybe(capture("/", anything)),
39 | 
40 |   end
41 | 
42 | )
43 | ```
44 | 
45 | While these expressions are a bit longer than their corresponding regular
46 | expression, they are much more readable and maintainable.
47 | 
48 | ## Installation
49 | 
50 | ```r
51 | install.packages("rex")
52 | ```
53 | 
54 | ## Usage
55 | 
56 | The vignettes have longer form usage examples.
57 | 
58 | - [URL Validation](http://rpubs.com/jimhester/rex-url_parsing)
59 | - [Webserver Log Parsing](http://rpubs.com/jimhester/rex-log_parsing)
60 | 
61 | Each `rex()` function call can include a number of functions and shortcuts.
62 | For a full list of the functions available please see `?rex` and `?shortcuts`.
63 | 
64 | ### Rex Mode
65 | 
66 | Rex functions are not exported because they are only useful within `rex()`
67 | calls, but they can be temporarily attached using `rex_mode()` which allows
68 | them to be auto-completed.
69 | 
70 | ### Using Rex in other packages
71 | 
72 | Using `rex` in other packages will generate spurious NOTEs from `R CMD check`
73 | unless you include a call to `rex::register_shortcuts()` with your package name
74 | somewhere in your package source.  This function registers all of the rex
75 | shortcuts as valid variables fixing the NOTEs.
76 | 
77 | ## See Also
78 | - [Regularity](https://github.com/andrewberls/regularity) - Ruby library that
79 |   partially inspired `rex`.
80 | - [PCRE](http://www.pcre.org/) - Perl Compatible Regular Expressions, the
81 |   engine that `rex` regular expressions use.
82 | - [Perl 5 Regular Expressions](https://perldoc.perl.org/perlre) - Perl
83 |   regular expression documentation, which are nearly 100% compatible with PCRE.
84 | 


--------------------------------------------------------------------------------
/_pkgdown.yml:
--------------------------------------------------------------------------------
 1 | url: https://rex.r-lib.org
 2 | 
 3 | template:
 4 |   package: tidytemplate
 5 |   bootstrap: 5
 6 |   includes:
 7 |     in_header: |
 8 |       <script defer data-domain="rex.r-lib.org,all.tidyverse.org" src="https://plausible.io/js/plausible.js"></script>
 9 | 
10 | reference:
11 | - title: Create a regular expression
12 |   contents:
13 |   - rex
14 |   - capture
15 |   - character_class
16 |   - n_times
17 |   - group
18 |   - "`%if_next_is%`"
19 |   - not
20 |   - or
21 |   - zero_or_more
22 | 
23 | - title: String manipulation
24 |   contents:
25 |   - re_matches
26 |   - re_substitutes
27 | 
28 | - title: For developers
29 |   contents:
30 |   - as.regex
31 |   - as.character.regex
32 |   - character_class_escape
33 |   - escape
34 |   - shortcuts
35 |   - single_shortcuts
36 |   - register_shortcuts
37 |   - rex_mode
38 | 
39 | 


--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
 1 | comment: false
 2 | 
 3 | coverage:
 4 |   status:
 5 |     project:
 6 |       default:
 7 |         target: auto
 8 |         threshold: 1%
 9 |         informational: true
10 |     patch:
11 |       default:
12 |         target: auto
13 |         threshold: 1%
14 |         informational: true
15 | 


--------------------------------------------------------------------------------
/cran-comments.md:
--------------------------------------------------------------------------------
1 | ## Downstream dependencies
2 | 
3 | * I ran R CMD check on all 17 downstream dependencies of rex
4 |   Summary at: https://github.com/kevinushey/rex/blob/master/revdep/#readme
5 | 
6 | * There were 0 ERRORs in downstream dependencies.
7 | 


--------------------------------------------------------------------------------
/man/as.regex.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/rex.R
 3 | \name{as.regex}
 4 | \alias{as.regex}
 5 | \alias{as.regex.default}
 6 | \title{Coerce objects to a \code{\link{regex}}.}
 7 | \usage{
 8 | as.regex(x, ...)
 9 | 
10 | \method{as.regex}{default}(x, ...)
11 | }
12 | \arguments{
13 | \item{x}{Object to coerce to \code{\link{regex}}.}
14 | 
15 | \item{...}{further arguments passed to methods.}
16 | }
17 | \description{
18 | Coerce objects to a \code{\link{regex}}.
19 | }
20 | \section{Methods (by class)}{
21 | \itemize{
22 | \item \code{default}: Simply escape the Object.
23 | }}
24 | 
25 | 


--------------------------------------------------------------------------------
/man/capture.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/capture.R
 3 | \name{capture}
 4 | \alias{capture}
 5 | \alias{.}
 6 | \alias{capture_group}
 7 | \title{Create a capture group}
 8 | \usage{
 9 | capture(..., name = NULL)
10 | 
11 | capture_group(name)
12 | }
13 | \arguments{
14 | \item{...}{\code{\link{shortcuts}}, R variables, text, or other \pkg{rex}
15 | functions.}
16 | 
17 | \item{name}{of the group.  Unnamed capture groups are numbers starting at 1
18 | in the order they appear in the regular expression.  If two groups have the
19 | same name, the leftmost group is the used in any reference.}
20 | }
21 | \description{
22 | Used to save the matched value within the group for use later in the regular
23 | expression or to extract the values captured.  Both named and unnamed groups
24 | can later be referenced using \code{\link{capture_group}}.
25 | }
26 | \examples{
27 | 
28 | # Match paired quotation marks
29 | re <- rex(
30 |   # first quotation mark
31 |   capture(quotes),
32 | 
33 |   # match all non-matching quotation marks
34 |   zero_or_more(except(capture_group(1))),
35 | 
36 |   # end quotation mark (matches first)
37 |   capture_group(1)
38 | )
39 | 
40 | #named capture - don't match apples to oranges
41 | re <- rex(
42 |   capture(name = "fruit", or("apple", "orange")),
43 |   "=",
44 |   capture_group("fruit")
45 | )
46 | }
47 | \seealso{
48 | \code{\link{group}} for grouping without capturing.  Perl 5 Capture
49 | Groups \url{https://perldoc.perl.org/perlre#Capture-groups}
50 | 
51 | Other rex: 
52 | \code{\link{\%or\%}()},
53 | \code{\link{character_class}()},
54 | \code{\link{counts}},
55 | \code{\link{group}()},
56 | \code{\link{lookarounds}},
57 | \code{\link{not}()},
58 | \code{\link{rex}()},
59 | \code{\link{shortcuts}},
60 | \code{\link{wildcards}}
61 | }
62 | \concept{rex}
63 | 


--------------------------------------------------------------------------------
/man/character_class.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/character_class.R
  3 | \name{character_class}
  4 | \alias{character_class}
  5 | \alias{one_of}
  6 | \alias{any_of}
  7 | \alias{some_of}
  8 | \alias{none_of}
  9 | \alias{except}
 10 | \alias{except_any_of}
 11 | \alias{except_some_of}
 12 | \alias{range}
 13 | \alias{:}
 14 | \alias{exclude_range}
 15 | \title{Create character classes}
 16 | \usage{
 17 | character_class(x)
 18 | 
 19 | one_of(...)
 20 | 
 21 | any_of(..., type = c("greedy", "lazy", "possessive"))
 22 | 
 23 | some_of(..., type = c("greedy", "lazy", "possessive"))
 24 | 
 25 | none_of(...)
 26 | 
 27 | except_any_of(..., type = c("greedy", "lazy", "possessive"))
 28 | 
 29 | except_some_of(..., type = c("greedy", "lazy", "possessive"))
 30 | 
 31 | range(start, end)
 32 | 
 33 | `:`(start, end)
 34 | 
 35 | exclude_range(start, end)
 36 | }
 37 | \arguments{
 38 | \item{x}{text to include in the character class (must be escaped manually)}
 39 | 
 40 | \item{...}{\code{\link{shortcuts}}, R variables, text, or other \pkg{rex}
 41 | functions.}
 42 | 
 43 | \item{type}{the type of match to perform.
 44 | 
 45 | There are three match types
 46 | \enumerate{
 47 |   \item \code{greedy}: match the longest string.  This is the default matching type.
 48 |   \item \code{lazy}: match the shortest string.  This matches the shortest string from the same anchor point, not necessarily the shortest global string.
 49 |   \item \code{possessive}: match and don't allow backtracking
 50 | }}
 51 | 
 52 | \item{start}{beginning of character class}
 53 | 
 54 | \item{end}{end of character class}
 55 | }
 56 | \description{
 57 | There are multiple ways you can define a character class.
 58 | }
 59 | \section{Functions}{
 60 | \itemize{
 61 | \item \code{character_class}: explicitly define a character class
 62 | 
 63 | \item \code{one_of}: matches one of the specified characters.
 64 | 
 65 | \item \code{any_of}: matches zero or more of the specified characters.
 66 | 
 67 | \item \code{some_of}: matches one or more of the specified characters.
 68 | 
 69 | \item \code{none_of}: matches anything but one of the specified characters.
 70 | 
 71 | \item \code{except_any_of}: matches zero or more of anything but the specified characters.
 72 | 
 73 | \item \code{except_some_of}: matches one or more of anything but the specified characters.
 74 | 
 75 | \item \code{range}: matches one of any of the characters in the range.
 76 | 
 77 | \item \code{:}: matches one of any of the characters in the range.
 78 | 
 79 | \item \code{exclude_range}: matches one of any of the characters except those in the range.
 80 | }}
 81 | 
 82 | \examples{
 83 | # grey = gray
 84 | re <- rex("gr", one_of("a", "e"), "y")
 85 | grepl(re, c("grey", "gray")) # TRUE TRUE
 86 | 
 87 | # Match non-vowels
 88 | re <- rex(none_of("a", "e", "i", "o", "u"))
 89 | # They can also be in the same string
 90 | re <- rex(none_of("aeiou"))
 91 | grepl(re, c("k", "l", "e")) # TRUE TRUE FALSE
 92 | 
 93 | # Match range
 94 | re <- rex(range("a", "e"))
 95 | grepl(re, c("b", "d", "f")) # TRUE TRUE FALSE
 96 | 
 97 | # Explicit creation
 98 | re <- rex(character_class("abcd\\\\["))
 99 | grepl(re, c("a", "d", "[", "]")) # TRUE TRUE TRUE FALSE
100 | }
101 | \seealso{
102 | Other rex: 
103 | \code{\link{\%or\%}()},
104 | \code{\link{capture}()},
105 | \code{\link{counts}},
106 | \code{\link{group}()},
107 | \code{\link{lookarounds}},
108 | \code{\link{not}()},
109 | \code{\link{rex}()},
110 | \code{\link{shortcuts}},
111 | \code{\link{wildcards}}
112 | }
113 | \concept{rex}
114 | 


--------------------------------------------------------------------------------
/man/character_class_escape.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/escape.R
 3 | \name{character_class_escape}
 4 | \alias{character_class_escape}
 5 | \alias{character_class_escape.regex}
 6 | \alias{character_class_escape.character_class}
 7 | \alias{character_class_escape.character}
 8 | \alias{character_class_escape.list}
 9 | \alias{character_class_escape.default}
10 | \title{Character class escapes}
11 | \usage{
12 | character_class_escape(x)
13 | 
14 | \method{character_class_escape}{regex}(x)
15 | 
16 | \method{character_class_escape}{character_class}(x)
17 | 
18 | \method{character_class_escape}{character}(x)
19 | 
20 | \method{character_class_escape}{list}(x)
21 | 
22 | \method{character_class_escape}{default}(x)
23 | }
24 | \arguments{
25 | \item{x}{Object to escape.}
26 | }
27 | \description{
28 | Character class escapes
29 | }
30 | \section{Methods (by class)}{
31 | \itemize{
32 | \item \code{regex}: objects are passed through unchanged.
33 | 
34 | \item \code{character_class}: objects are passed through unchanged.
35 | 
36 | \item \code{character}: objects properly escaped for character classes.
37 | 
38 | \item \code{list}: call \code{character_class_escape} on all elements of the list.
39 | 
40 | \item \code{default}: coerce to \code{character} and \code{character_class_escape}.
41 | }}
42 | 
43 | 


--------------------------------------------------------------------------------
/man/counts.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/counts.R
 3 | \name{counts}
 4 | \alias{counts}
 5 | \alias{n_times}
 6 | \alias{n}
 7 | \alias{between}
 8 | \alias{at_least}
 9 | \alias{at_most}
10 | \title{Counts}
11 | \usage{
12 | n_times(x, n, type = c("greedy", "lazy", "possessive"))
13 | 
14 | between(x, low, high, type = c("greedy", "lazy", "possessive"))
15 | 
16 | at_least(x, n, type = c("greedy", "lazy", "possessive"))
17 | 
18 | at_most(x, n, type = c("greedy", "lazy", "possessive"))
19 | }
20 | \arguments{
21 | \item{x}{A regex pattern.}
22 | 
23 | \item{n}{An integer number}
24 | 
25 | \item{type}{the type of match to perform.
26 | 
27 | There are three match types
28 | \enumerate{
29 |   \item \code{greedy}: match the longest string.  This is the default matching type.
30 |   \item \code{lazy}: match the shortest string.  This matches the shortest string from the same anchor point, not necessarily the shortest global string.
31 |   \item \code{possessive}: match and don't allow backtracking
32 | }}
33 | 
34 | \item{low}{An integer number for the lower limit.}
35 | 
36 | \item{high}{An integer number for the upper limit.}
37 | }
38 | \description{
39 | Functions to restrict a regex to a specific number
40 | }
41 | \section{Functions}{
42 | \itemize{
43 | \item \code{n_times}: \code{x} must occur exactly \code{n} times.
44 | 
45 | \item \code{between}: \code{x} must occur between \code{low} and \code{high} times.
46 | 
47 | \item \code{at_least}: \code{x} must occur at least \code{n} times.
48 | 
49 | \item \code{at_most}: \code{x} must occur at most \code{n} times.
50 | }}
51 | 
52 | \seealso{
53 | Other rex: 
54 | \code{\link{\%or\%}()},
55 | \code{\link{capture}()},
56 | \code{\link{character_class}()},
57 | \code{\link{group}()},
58 | \code{\link{lookarounds}},
59 | \code{\link{not}()},
60 | \code{\link{rex}()},
61 | \code{\link{shortcuts}},
62 | \code{\link{wildcards}}
63 | }
64 | \concept{rex}
65 | 


--------------------------------------------------------------------------------
/man/escape.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/escape.R
 3 | \name{escape}
 4 | \alias{escape}
 5 | \alias{escape.regex}
 6 | \alias{escape.character_class}
 7 | \alias{escape.character}
 8 | \alias{escape.default}
 9 | \alias{escape.list}
10 | \title{Escape characters for a regex}
11 | \usage{
12 | escape(x)
13 | 
14 | \method{escape}{regex}(x)
15 | 
16 | \method{escape}{character_class}(x)
17 | 
18 | \method{escape}{character}(x)
19 | 
20 | \method{escape}{default}(x)
21 | 
22 | \method{escape}{list}(x)
23 | }
24 | \arguments{
25 | \item{x}{Object to escape.}
26 | }
27 | \description{
28 | Escape characters for a regex
29 | }
30 | \section{Methods (by class)}{
31 | \itemize{
32 | \item \code{regex}: Objects are simply passed through unchanged.
33 | 
34 | \item \code{character_class}: Objects are surrounded by braces.
35 | 
36 | \item \code{character}: Objects are properly escaped for regular expressions.
37 | 
38 | \item \code{default}: default escape coerces to character and escapes.
39 | 
40 | \item \code{list}: simply call escape on all elements of the list.
41 | }}
42 | 
43 | 


--------------------------------------------------------------------------------
/man/group.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/capture.R
 3 | \name{group}
 4 | \alias{group}
 5 | \title{Create a grouped expression}
 6 | \usage{
 7 | group(...)
 8 | }
 9 | \arguments{
10 | \item{...}{\code{\link{shortcuts}}, R variables, text, or other \pkg{rex}
11 | functions.}
12 | }
13 | \description{
14 | This is similar to \code{\link{capture}} except that it does not store the
15 | value of the group.  Best used when you want to combine several parts
16 | together and do not reference or extract the grouped value later.
17 | }
18 | \seealso{
19 | \code{\link{capture}} for grouping with capturing.  Perl 5 Extended
20 | Patterns \url{https://perldoc.perl.org/perlre#Extended-Patterns}
21 | 
22 | Other rex: 
23 | \code{\link{\%or\%}()},
24 | \code{\link{capture}()},
25 | \code{\link{character_class}()},
26 | \code{\link{counts}},
27 | \code{\link{lookarounds}},
28 | \code{\link{not}()},
29 | \code{\link{rex}()},
30 | \code{\link{shortcuts}},
31 | \code{\link{wildcards}}
32 | }
33 | \concept{rex}
34 | 


--------------------------------------------------------------------------------
/man/lookarounds.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/lookarounds.R
 3 | \name{lookarounds}
 4 | \alias{lookarounds}
 5 | \alias{\%if_next_is\%}
 6 | \alias{\%if_next_isnt\%}
 7 | \alias{\%if_prev_is\%}
 8 | \alias{\%if_prev_isnt\%}
 9 | \title{Lookarounds}
10 | \usage{
11 | x \%if_next_is\% y
12 | 
13 | x \%if_next_isnt\% y
14 | 
15 | x \%if_prev_is\% y
16 | 
17 | x \%if_prev_isnt\% y
18 | }
19 | \arguments{
20 | \item{x}{A regex pattern.}
21 | 
22 | \item{y}{A regex pattern.}
23 | }
24 | \description{
25 | Lookarounds
26 | }
27 | \details{
28 | These functions provide an interface to perl lookarounds.
29 | 
30 | Special binary functions are used to infer an ordering, since often you
31 | might wish to match a word / set of characters conditional on the start
32 | and end of that word.
33 | 
34 | \itemize{
35 |   \item \code{\%if_next_is\%}: \code{TRUE} if x follows y
36 |   \item \code{\%if_next_isnt\%}: \code{TRUE} if x does not follow y
37 |   \item \code{\%if_prev_is\%}: \code{TRUE} if y comes before x
38 |   \item \code{\%if_prev_isnt\%}: \code{TRUE} if y does not come before x
39 | }
40 | }
41 | \examples{
42 | stopifnot(grepl(rex("crab" \%if_next_is\% "apple"), "crabapple", perl = TRUE))
43 | stopifnot(grepl(rex("crab" \%if_prev_is\% "apple"), "applecrab", perl = TRUE))
44 | stopifnot(grepl(rex(range("a", "e") \%if_next_isnt\% range("f", "g")),
45 |   "ah", perl = TRUE))
46 | stopifnot(grepl(rex(range("a", "e") \%if_next_is\% range("f", "i")),
47 |   "ah", perl = TRUE))
48 | }
49 | \seealso{
50 | Perl 5 Documentation \url{https://perldoc.perl.org/perlre#Extended-Patterns}
51 | 
52 | Other rex: 
53 | \code{\link{\%or\%}()},
54 | \code{\link{capture}()},
55 | \code{\link{character_class}()},
56 | \code{\link{counts}},
57 | \code{\link{group}()},
58 | \code{\link{not}()},
59 | \code{\link{rex}()},
60 | \code{\link{shortcuts}},
61 | \code{\link{wildcards}}
62 | }
63 | \concept{rex}
64 | 


--------------------------------------------------------------------------------
/man/not.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/wildcards.R
 3 | \name{not}
 4 | \alias{not}
 5 | \title{Do not match}
 6 | \usage{
 7 | not(..., type = c("greedy", "lazy", "possessive"))
 8 | }
 9 | \arguments{
10 | \item{...}{\code{\link{shortcuts}}, R variables, text, or other \pkg{rex}
11 | functions.}
12 | 
13 | \item{type}{the type of match to perform.
14 | 
15 | There are three match types
16 | \enumerate{
17 |   \item \code{greedy}: match the longest string.  This is the default matching type.
18 |   \item \code{lazy}: match the shortest string.  This matches the shortest string from the same anchor point, not necessarily the shortest global string.
19 |   \item \code{possessive}: match and don't allow backtracking
20 | }}
21 | }
22 | \description{
23 | Do not match
24 | }
25 | \seealso{
26 | Other rex: 
27 | \code{\link{\%or\%}()},
28 | \code{\link{capture}()},
29 | \code{\link{character_class}()},
30 | \code{\link{counts}},
31 | \code{\link{group}()},
32 | \code{\link{lookarounds}},
33 | \code{\link{rex}()},
34 | \code{\link{shortcuts}},
35 | \code{\link{wildcards}}
36 | }
37 | \concept{rex}
38 | 


--------------------------------------------------------------------------------
/man/or.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/or.R
 3 | \name{\%or\%}
 4 | \alias{\%or\%}
 5 | \alias{or}
 6 | \title{Or}
 7 | \usage{
 8 | x \%or\% y
 9 | 
10 | or(...)
11 | }
12 | \arguments{
13 | \item{x}{A string.}
14 | 
15 | \item{y}{A string.}
16 | 
17 | \item{...}{\code{\link{shortcuts}}, R variables, text, or other \pkg{rex}
18 | functions.}
19 | }
20 | \description{
21 | The special binary function \code{\%or\%} can be used to specify a set
22 | of optional matches.
23 | }
24 | \seealso{
25 | Other rex: 
26 | \code{\link{capture}()},
27 | \code{\link{character_class}()},
28 | \code{\link{counts}},
29 | \code{\link{group}()},
30 | \code{\link{lookarounds}},
31 | \code{\link{not}()},
32 | \code{\link{rex}()},
33 | \code{\link{shortcuts}},
34 | \code{\link{wildcards}}
35 | }
36 | \concept{rex}
37 | 


--------------------------------------------------------------------------------
/man/re_matches.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/match.R
 3 | \name{re_matches}
 4 | \alias{re_matches}
 5 | \alias{matches}
 6 | \alias{m}
 7 | \title{Match function}
 8 | \usage{
 9 | re_matches(
10 |   data,
11 |   pattern,
12 |   global = FALSE,
13 |   options = NULL,
14 |   locations = FALSE,
15 |   ...
16 | )
17 | }
18 | \arguments{
19 | \item{data}{character vector to match against}
20 | 
21 | \item{pattern}{regular expression to use for matching}
22 | 
23 | \item{global}{use global matching}
24 | 
25 | \item{options}{regular expression options}
26 | 
27 | \item{locations}{rather than returning the values of the matched (or
28 | captured) string, return a \code{data.frame} of the match locations in the
29 | string.}
30 | 
31 | \item{...}{options passed to regexpr or gregexpr}
32 | }
33 | \value{
34 | if no captures, returns a logical vector the same length as the
35 | input character vector specifying if the relevant value matched or not.  If
36 | there are captures in the regular expression, returns a \code{data.frame} with a
37 | column for each capture group.  If \code{global} is \code{TRUE}, returns a
38 | list of \code{data.frame}s.
39 | }
40 | \description{
41 | Match function
42 | }
43 | \examples{
44 | string <- c("this is a", "test string")
45 | re_matches(string, rex("test")) # FALSE FALSE
46 | 
47 | # named capture
48 | re_matches(string, rex(capture(alphas, name = "first_word"), space,
49 |   capture(alphas, name = "second_word")))
50 | #   first_word second_word
51 | # 1       this          is
52 | # 2       test      string
53 | 
54 | # capture returns NA when it fails to match
55 | re_matches(string, rex(capture("test")))
56 | #      1
57 | # 1 test
58 | # 2 <NA>
59 | }
60 | \seealso{
61 | \code{\link{regexp}} Section "Perl-like Regular Expressions" for a
62 | discussion of the supported options
63 | }
64 | 


--------------------------------------------------------------------------------
/man/re_substitutes.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/match.R
 3 | \name{re_substitutes}
 4 | \alias{re_substitutes}
 5 | \alias{substitutes}
 6 | \alias{s}
 7 | \title{Substitute regular expressions in a string with another string.}
 8 | \usage{
 9 | re_substitutes(data, pattern, replacement, global = FALSE, options = NULL, ...)
10 | }
11 | \arguments{
12 | \item{data}{character vector to substitute}
13 | 
14 | \item{pattern}{regular expression to match}
15 | 
16 | \item{replacement}{replacement text to use}
17 | 
18 | \item{global}{substitute all occurrences}
19 | 
20 | \item{options}{option flags}
21 | 
22 | \item{...}{options passed to sub or gsub}
23 | }
24 | \description{
25 | Substitute regular expressions in a string with another string.
26 | }
27 | \examples{
28 | string <- c("this is a Test", "string")
29 | re_substitutes(string, "test", "not a test", options = "insensitive")
30 | re_substitutes(string, "i", "x", global = TRUE)
31 | re_substitutes(string, "(test)", "not a \\\\1", options = "insensitive")
32 | }
33 | \seealso{
34 | \code{\link{regexp}} Section "Perl-like Regular Expressions" for a
35 | discussion of the supported options
36 | }
37 | 


--------------------------------------------------------------------------------
/man/regex.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/rex.R
 3 | \name{as.character.regex}
 4 | \alias{as.character.regex}
 5 | \alias{print.regex}
 6 | \alias{regex}
 7 | \title{Regular Expression}
 8 | \usage{
 9 | \method{as.character}{regex}(x, ...)
10 | 
11 | \method{print}{regex}(x, ...)
12 | 
13 | regex(x, ...)
14 | }
15 | \arguments{
16 | \item{x}{Object}
17 | 
18 | \item{...}{further arguments}
19 | }
20 | \description{
21 | Specify an explicit regular expression.  This expression must already be
22 | escaped.
23 | }
24 | \section{Methods (by generic)}{
25 | \itemize{
26 | \item \code{as.character}: coerce regex object to a character
27 | 
28 | \item \code{print}: Print regex object
29 | }}
30 | 
31 | \seealso{
32 | \code{\link{as.regex}} to coerce to a regex object.
33 | }
34 | 


--------------------------------------------------------------------------------
/man/register_shortcuts.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/rex.R
 3 | \name{register_shortcuts}
 4 | \alias{register_shortcuts}
 5 | \title{Register the Rex shortcuts}
 6 | \usage{
 7 | register_shortcuts(pkg_name)
 8 | }
 9 | \arguments{
10 | \item{pkg_name}{the package to register the shortcuts in}
11 | }
12 | \description{
13 | If you are using rex in another package you need to call this function to
14 | register all of the rex shortcuts so that spurious NOTEs about global
15 | variables being generated during R CMD check.
16 | }
17 | 


--------------------------------------------------------------------------------
/man/rex.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/rex.R
 3 | \name{rex}
 4 | \alias{rex}
 5 | \alias{rex_}
 6 | \title{Generate a regular expression.}
 7 | \usage{
 8 | rex(..., env = parent.frame())
 9 | }
10 | \arguments{
11 | \item{...}{\code{\link{shortcuts}}, R variables, text, or other \pkg{rex}
12 | functions.}
13 | 
14 | \item{env}{environment to evaluate the rex expression in.}
15 | }
16 | \description{
17 | Generate a regular expression.
18 | }
19 | \seealso{
20 | Other rex: 
21 | \code{\link{\%or\%}()},
22 | \code{\link{capture}()},
23 | \code{\link{character_class}()},
24 | \code{\link{counts}},
25 | \code{\link{group}()},
26 | \code{\link{lookarounds}},
27 | \code{\link{not}()},
28 | \code{\link{shortcuts}},
29 | \code{\link{wildcards}}
30 | }
31 | \concept{rex}
32 | 


--------------------------------------------------------------------------------
/man/rex_mode.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/rex-mode.R
 3 | \name{rex_mode}
 4 | \alias{rex_mode}
 5 | \title{Toggles \pkg{rex} mode.}
 6 | \usage{
 7 | rex_mode()
 8 | }
 9 | \description{
10 | While within rex mode, functions used within the \code{\link{rex}} function
11 | are attached, so one can get e.g. auto-completion within editors.
12 | }
13 | 


--------------------------------------------------------------------------------
/man/shortcuts.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/shortcuts.R
 3 | \docType{data}
 4 | \name{shortcuts}
 5 | \alias{shortcuts}
 6 | \title{Shortcuts}
 7 | \format{
 8 | An object of class \code{shortcut} of length 116.
 9 | }
10 | \usage{
11 | shortcuts
12 | }
13 | \description{
14 | Commonly used character classes and regular expressions.  These shortcuts
15 | are substituted inside \code{rex} calls.
16 | }
17 | \details{
18 | \code{names(shortcuts)} will give you the full list of available shortcuts.
19 | }
20 | \seealso{
21 | Other rex: 
22 | \code{\link{\%or\%}()},
23 | \code{\link{capture}()},
24 | \code{\link{character_class}()},
25 | \code{\link{counts}},
26 | \code{\link{group}()},
27 | \code{\link{lookarounds}},
28 | \code{\link{not}()},
29 | \code{\link{rex}()},
30 | \code{\link{wildcards}}
31 | }
32 | \concept{rex}
33 | \keyword{datasets}
34 | 


--------------------------------------------------------------------------------
/man/single_shortcuts.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/shortcuts.R
 3 | \docType{data}
 4 | \name{single_shortcuts}
 5 | \alias{single_shortcuts}
 6 | \title{Single shortcuts}
 7 | \format{
 8 | An object of class \code{shortcut} of length 18.
 9 | }
10 | \usage{
11 | single_shortcuts
12 | }
13 | \description{
14 | Each of these shortcuts has both a plural (-s) and inverse (non_) form.
15 | }
16 | \keyword{datasets}
17 | 


--------------------------------------------------------------------------------
/man/wildcards.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/wildcards.R
 3 | \name{wildcards}
 4 | \alias{wildcards}
 5 | \alias{zero_or_more}
 6 | \alias{one_or_more}
 7 | \alias{maybe}
 8 | \alias{zero_or_one}
 9 | \title{Wildcards}
10 | \usage{
11 | zero_or_more(..., type = c("greedy", "lazy", "possessive"))
12 | 
13 | one_or_more(..., type = c("greedy", "lazy", "possessive"))
14 | 
15 | maybe(..., type = c("greedy", "lazy", "possessive"))
16 | }
17 | \arguments{
18 | \item{...}{\code{\link{shortcuts}}, R variables, text, or other \pkg{rex}
19 | functions.}
20 | 
21 | \item{type}{the type of match to perform.
22 | 
23 | There are three match types
24 | \enumerate{
25 |   \item \code{greedy}: match the longest string.  This is the default matching type.
26 |   \item \code{lazy}: match the shortest string.  This matches the shortest string from the same anchor point, not necessarily the shortest global string.
27 |   \item \code{possessive}: match and don't allow backtracking
28 | }}
29 | }
30 | \description{
31 | Wildcards
32 | }
33 | \section{Functions}{
34 | \itemize{
35 | \item \code{zero_or_more}: match \code{...} zero or more times.
36 | 
37 | \item \code{one_or_more}: match \code{...} one or more times.
38 | 
39 | \item \code{maybe}: match \code{...} zero or one times.
40 | }}
41 | 
42 | \seealso{
43 | Other rex: 
44 | \code{\link{\%or\%}()},
45 | \code{\link{capture}()},
46 | \code{\link{character_class}()},
47 | \code{\link{counts}},
48 | \code{\link{group}()},
49 | \code{\link{lookarounds}},
50 | \code{\link{not}()},
51 | \code{\link{rex}()},
52 | \code{\link{shortcuts}}
53 | }
54 | \concept{rex}
55 | 


--------------------------------------------------------------------------------
/revdep/README.md:
--------------------------------------------------------------------------------
 1 | # Revdeps
 2 | 
 3 | ## All (16)
 4 | 
 5 | |package                              |version  |error |warning |note |
 6 | |:------------------------------------|:--------|:-----|:-------|:----|
 7 | |bdpar                                |2.0.0    |      |        |     |
 8 | |covr                                 |3.5.0    |      |        |     |
 9 | |[datarobot](problems.md#datarobot)   |2.17.0   |      |        |2    |
10 | |dparser                              |0.1.8    |      |        |     |
11 | |gramEvol                             |2.1-3    |      |        |     |
12 | |lintr                                |2.0.1    |      |        |     |
13 | |[mlr](problems.md#mlr)               |2.17.1   |      |        |1    |
14 | |mlrCPO                               |0.3.6    |      |        |     |
15 | |namedCapture                         |2019.8.7 |      |        |     |
16 | |[nlmixr](problems.md#nlmixr)         |1.1.1-7  |      |        |1    |
17 | |OpenML                               |1.10     |      |        |     |
18 | |ore                                  |1.6.3    |      |        |     |
19 | |[roxygen2md](problems.md#roxygen2md) |1.0.0    |      |        |1    |
20 | |[RxODE](problems.md#rxode)           |0.9.2-0  |1     |        |     |
21 | |table.express                        |0.3.1    |      |        |     |
22 | |todor                                |0.1.0    |      |        |     |
23 | 
24 | 


--------------------------------------------------------------------------------
/revdep/check.R:
--------------------------------------------------------------------------------
1 | library("devtools")
2 | 
3 | revdep_check()
4 | revdep_check_save_summary()
5 | 


--------------------------------------------------------------------------------
/revdep/checks.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-lib/rex/7a8fb36a29919c3d447cc823c672af9b5cf1fd68/revdep/checks.rds


--------------------------------------------------------------------------------
/revdep/failures.md:
--------------------------------------------------------------------------------
1 | *Wow, no problems at all. :)*


--------------------------------------------------------------------------------
/revdep/problems.md:
--------------------------------------------------------------------------------
  1 | # datarobot
  2 | 
  3 | <details>
  4 | 
  5 | * Version: 2.17.0
  6 | * Source code: https://github.com/cran/datarobot
  7 | * Date/Publication: 2020-02-22 05:50:07 UTC
  8 | * Number of recursive dependencies: 100
  9 | 
 10 | Run `revdep_details(,"datarobot")` for more info
 11 | 
 12 | </details>
 13 | 
 14 | ## In both
 15 | 
 16 | *   checking installed package size ... NOTE
 17 |     ```
 18 |       installed size is  8.9Mb
 19 |       sub-directories of 1Mb or more:
 20 |         doc       2.3Mb
 21 |         extdata   5.5Mb
 22 |     ```
 23 | 
 24 | *   checking dependencies in R code ... NOTE
 25 |     ```
 26 |     Namespace in Imports field not imported from: ‘curl’
 27 |       All declared Imports should be used.
 28 |     ```
 29 | 
 30 | # mlr
 31 | 
 32 | <details>
 33 | 
 34 | * Version: 2.17.1
 35 | * Source code: https://github.com/cran/mlr
 36 | * URL: https://mlr.mlr-org.com, https://github.com/mlr-org/mlr
 37 | * BugReports: https://github.com/mlr-org/mlr/issues
 38 | * Date/Publication: 2020-03-24 10:40:02 UTC
 39 | * Number of recursive dependencies: 389
 40 | 
 41 | Run `revdep_details(,"mlr")` for more info
 42 | 
 43 | </details>
 44 | 
 45 | ## In both
 46 | 
 47 | *   checking installed package size ... NOTE
 48 |     ```
 49 |       installed size is  5.0Mb
 50 |       sub-directories of 1Mb or more:
 51 |         R      1.5Mb
 52 |         data   2.3Mb
 53 |     ```
 54 | 
 55 | # nlmixr
 56 | 
 57 | <details>
 58 | 
 59 | * Version: 1.1.1-7
 60 | * Source code: https://github.com/cran/nlmixr
 61 | * URL: https://github.com/nlmixrdevelopment/nlmixr
 62 | * Date/Publication: 2020-03-18 22:50:02 UTC
 63 | * Number of recursive dependencies: 152
 64 | 
 65 | Run `revdep_details(,"nlmixr")` for more info
 66 | 
 67 | </details>
 68 | 
 69 | ## In both
 70 | 
 71 | *   checking installed package size ... NOTE
 72 |     ```
 73 |       installed size is 21.3Mb
 74 |       sub-directories of 1Mb or more:
 75 |         libs  19.6Mb
 76 |     ```
 77 | 
 78 | # roxygen2md
 79 | 
 80 | <details>
 81 | 
 82 | * Version: 1.0.0
 83 | * Source code: https://github.com/cran/roxygen2md
 84 | * URL: https://roxygen2md.r-lib.org, https://github.com/r-lib/roxygen2md
 85 | * BugReports: https://github.com/r-lib/roxygen2md/issues
 86 | * Date/Publication: 2019-06-17 15:40:03 UTC
 87 | * Number of recursive dependencies: 76
 88 | 
 89 | Run `revdep_details(,"roxygen2md")` for more info
 90 | 
 91 | </details>
 92 | 
 93 | ## In both
 94 | 
 95 | *   checking dependencies in R code ... NOTE
 96 |     ```
 97 |     Namespace in Imports field not imported from: ‘withr’
 98 |       All declared Imports should be used.
 99 |     ```
100 | 
101 | # RxODE
102 | 
103 | <details>
104 | 
105 | * Version: 0.9.2-0
106 | * Source code: https://github.com/cran/RxODE
107 | * URL: https://nlmixrdevelopment.github.io/RxODE/
108 | * BugReports: https://github.com/nlmixrdevelopment/RxODE/issues
109 | * Date/Publication: 2020-03-13 07:10:14 UTC
110 | * Number of recursive dependencies: 132
111 | 
112 | Run `revdep_details(,"RxODE")` for more info
113 | 
114 | </details>
115 | 
116 | ## In both
117 | 
118 | *   checking package dependencies ... ERROR
119 |     ```
120 |     Packages required but not available:
121 |       'knitr', 'PreciseSums', 'Rcpp', 'brew', 'cli', 'dparser', 'ggplot2',
122 |       'inline', 'magrittr', 'memoise', 'mvnfast', 'pillar', 'sys', 'units',
123 |       'assertthat', 'lotri', 'RcppArmadillo'
124 |     
125 |     Packages suggested but not available:
126 |       'DT', 'data.table', 'shiny', 'testthat', 'usethis', 'devtools',
127 |       'covr', 'rmarkdown', 'SnakeCharmR', 'dplyr', 'tidyr', 'tibble',
128 |       'curl', 'gridExtra', 'microbenchmark', 'scales', 'stringi',
129 |       'htmltools', 'reticulate', 'rlang', 'installr', 'learnr', 'remotes',
130 |       'crayon', 'xgxr', 'digest', 'vdiffr', 'ggrepel'
131 |     
132 |     VignetteBuilder package required for checking but not installed: ‘knitr’
133 |     
134 |     The suggested packages are required for a complete check.
135 |     Checking can be attempted without them by setting the environment
136 |     variable _R_CHECK_FORCE_SUGGESTS_ to a false value.
137 |     
138 |     See section ‘The DESCRIPTION file’ in the ‘Writing R Extensions’
139 |     manual.
140 |     ```
141 | 
142 | 


--------------------------------------------------------------------------------
/rex.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: knitr
13 | LaTeX: pdfLaTeX
14 | 
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | 
18 | BuildType: Package
19 | PackageUseDevtools: Yes
20 | PackageInstallArgs: --no-multiarch
21 | PackageRoxygenize: rd,collate,namespace
22 | 


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(rex)
3 | 
4 | test_check("rex")
5 | 


--------------------------------------------------------------------------------
/tests/testthat/test-aaa.R:
--------------------------------------------------------------------------------
 1 | context("aaa")
 2 | test_that("register adds a function to the .rex$env enviornment", {
 3 |   a <- identity
 4 | 
 5 |   register(a)
 6 | 
 7 |   expect_true("a" %in% ls(envir=.rex$env))
 8 | 
 9 |   rm("a", envir = .rex$env)
10 | })
11 | 
12 | test_that("register_object adds all the objects to the .rex$env environment", {
13 |   b <- list(x=1, y = 2)
14 | 
15 |   register_object(b)
16 | 
17 |   expect_true("x" %in% ls(envir=.rex$env))
18 | 
19 |   expect_true("y" %in% ls(envir=.rex$env))
20 | 
21 |   expect_false("b" %in% ls(envir=.rex$env))
22 | 
23 |   rm("x", envir = .rex$env)
24 | 
25 |   rm("y", envir = .rex$env)
26 | })
27 | 


--------------------------------------------------------------------------------
/tests/testthat/test-capture.R:
--------------------------------------------------------------------------------
 1 | context("capture")
 2 | test_that("matches basic characters", {
 3 |   x <- "text"
 4 |   re <- rex(capture(x))
 5 | 
 6 |   expect_equal(re, regex("(text)"))
 7 | 
 8 |   expect_true(grepl(re, x))
 9 | 
10 |   expect_equal(gsub(re, "\\1", x), x)
11 | 
12 |   expect_equal(gsub(re, "replacement", x), "replacement")
13 | })
14 | 
15 | test_that("escapes special characters", {
16 |   x <- "^[x$\\"
17 |   re <- rex(capture(x))
18 | 
19 |   expect_equal(re, regex("(\\^\\[x\\$\\\\)"))
20 | 
21 |   expect_true(grepl(re, x))
22 | 
23 |   expect_equal(gsub(re, "\\1", x), x)
24 | 
25 |   expect_equal(gsub(re, "replacement", x), "replacement")
26 | })
27 | 
28 | test_that("examples work", {
29 |  re <- rex(
30 |    # first quotation mark
31 |    capture(quote),
32 | 
33 |    # match all non-matching quotation marks
34 |    zero_or_more(except(capture_group(1))),
35 | 
36 |    # end quotation mark (matches first)
37 |    capture_group(1)
38 |  )
39 | 
40 |  expect_equal(re, regex("(['\"])(?:[^\\g{1}])*\\g{1}"))
41 | 
42 |  lapply(c("\"\"", "\"'\"", "\"arst\"", "''", "'arst'", "'\"'"),
43 |    function(x) {
44 |      expect_true(grepl(re, x, perl = TRUE), info=x)
45 |    }
46 |  )
47 | 
48 |  lapply(c("'a", "'asr\""), function(x) {
49 |     expect_false(grepl(re, x, perl = TRUE), info = x)
50 |  })
51 | 
52 | })
53 | 
54 | context("named capture")
55 | test_that("examples work", {
56 |   re <- rex(
57 |     capture(name = "fruit", or("apple", "orange")),
58 |     "=",
59 |     capture_group("fruit")
60 |     )
61 | 
62 |   expect_true(grepl(re, "apple=apple", perl=TRUE))
63 |   expect_true(grepl(re, "orange=orange", perl=TRUE))
64 |   expect_false(grepl(re, "apple=orange", perl=TRUE))
65 | })
66 | 


--------------------------------------------------------------------------------
/tests/testthat/test-character_class.R:
--------------------------------------------------------------------------------
  1 | context("one_of")
  2 | test_that("simple text is correct", {
  3 |   re <- rex(one_of(1:9))
  4 | 
  5 |   expect_equal(re, regex("[123456789]"))
  6 |   lapply(1:9, function(x) {
  7 |     expect_true(grepl(re, x, perl = TRUE), info=x)
  8 |   })
  9 |   expect_false(grepl(re, "a", perl = TRUE))
 10 | 
 11 | 
 12 |   vals <- c("a", "b", "c", 0, 3, 5)
 13 | 
 14 |   re <- rex(one_of(vals))
 15 | 
 16 |   expect_equal(re, regex("[abc035]"))
 17 | 
 18 |   lapply(vals, function(x) {
 19 |     expect_true(grepl(re, x, perl = TRUE), info = x)
 20 |   })
 21 | 
 22 |   expect_false(grepl(re, "d", perl = TRUE))
 23 | })
 24 | 
 25 | test_that("escapes correctly", {
 26 |   vals <- c("[", "]")
 27 | 
 28 |   re <- rex(one_of(vals))
 29 | 
 30 |   expect_equal(re, regex("[\\[\\]]"))
 31 | 
 32 |   lapply(vals, function(x) {
 33 |     expect_true(grepl(re, x, perl = TRUE), info = x)
 34 |   })
 35 | 
 36 |   expect_false(grepl(re, "{", perl = TRUE))
 37 | })
 38 | 
 39 | tests <- c(quote("a"),
 40 |       quote(any),
 41 |       quote(quote),
 42 |       quote(quotes),
 43 |       quote(lower),
 44 |       quote(upper),
 45 |       quote(list(upper, lower)),
 46 |       quote(list("[", "]")))
 47 | 
 48 | test_that("any_of equals one_of() plus *", {
 49 |   lapply(tests,
 50 |     function(x) {
 51 |       re1 <- regex(paste0(rex(one_of(eval(x))), "*"))
 52 |       re2 <- rex(any_of(eval(x)))
 53 | 
 54 |       expect_equal(re1, re2, info = paste(sep=" : ", re1, re2))
 55 |     })
 56 | })
 57 | 
 58 | test_that("some_of equals one_of plus +", {
 59 |   lapply(tests,
 60 |     function(x) {
 61 |       re1 <- regex(paste0(rex(one_of(eval(x))), "+"))
 62 |       re2 <- rex(some_of(eval(x)))
 63 | 
 64 |       expect_equal(re1, re2, info = paste(sep=" : ", re1, re2))
 65 |     })
 66 | })
 67 | 
 68 | test_that("except_any equals none_of() plus *", {
 69 |   lapply(tests,
 70 |     function(x) {
 71 |       re1 <- regex(paste0(rex(none_of(eval(x))), "*"))
 72 |       re2 <- rex(except_any_of(eval(x)))
 73 | 
 74 |       expect_equal(re1, re2, info = paste(sep=" : ", re1, re2))
 75 |     })
 76 | })
 77 | 
 78 | test_that("except_some equals none_of() plus +", {
 79 |   lapply(tests,
 80 |     function(x) {
 81 |       re1 <- regex(paste0(rex(none_of(eval(x))), "+"))
 82 |       re2 <- rex(except_some_of(eval(x)))
 83 | 
 84 |       expect_equal(re1, re2, info = paste(sep=" : ", re1, re2))
 85 |     })
 86 | })
 87 | 
 88 | context("none_of")
 89 | test_that("simple text is correct", {
 90 |   re <- rex(none_of(1:9))
 91 | 
 92 |   expect_equal(re, regex("[^123456789]"))
 93 |   lapply(1:9, function(x) {
 94 |     expect_false(grepl(re, x, perl = TRUE), info=x)
 95 |   })
 96 |   expect_true(grepl(re, "a", perl = TRUE))
 97 | 
 98 | 
 99 |   vals <- c("a", "b", "c", 0, 3, 5)
100 | 
101 |   re <- rex(none_of(vals))
102 | 
103 |   expect_equal(re, regex("[^abc035]"))
104 | 
105 |   lapply(vals, function(x) {
106 |     expect_false(grepl(re, x, perl = TRUE), info = x)
107 |   })
108 | 
109 |   expect_true(grepl(re, "d", perl = TRUE))
110 | })
111 | 
112 | test_that("escapes correctly", {
113 |   vals <- c("[", "]")
114 | 
115 |   re <- rex(none_of(vals))
116 | 
117 |   expect_equal(re, regex("[^\\[\\]]"))
118 | 
119 |   lapply(vals, function(x) {
120 |     expect_false(grepl(re, x, perl = TRUE), info = x)
121 |   })
122 | 
123 |   expect_true(grepl(re, "{", perl = TRUE))
124 | })
125 | 
126 | context("range")
127 | test_that("matches basic characters", {
128 |   re <- rex(range(1, 3))
129 | 
130 |   expect_equal(re, regex("[1-3]"))
131 | 
132 |   lapply(1:3, function(x) {
133 |     expect_true(grepl(re, x), info=x)
134 |   })
135 | 
136 |   lapply(4:9, function(x) {
137 |     expect_false(grepl(re, x), info=x)
138 |   })
139 | 
140 | })
141 | test_that("escapes special characters", {
142 |   re <- rex(range("[", "}"))
143 | 
144 |   expect_equal(re, regex("[\\[-}]"))
145 | 
146 |   lapply(c("[", "}"), function(x) {
147 |     expect_true(grepl(re, x), info=x)
148 |   })
149 | 
150 | })
151 | 
152 | context("exclude_range")
153 | test_that("matches basic characters", {
154 |   re <- rex(exclude_range(1, 3))
155 | 
156 |   expect_equal(re, regex("[^1-3]"))
157 | 
158 |   lapply(1:3, function(x) {
159 |     expect_false(grepl(re, x, perl = TRUE), info=x)
160 |   })
161 | 
162 |   lapply(4:9, function(x) {
163 |     expect_true(grepl(re, x, perl = TRUE), info=x)
164 |   })
165 | 
166 | })
167 | 
168 | context("one_of")
169 | test_that("matches basic characters", {
170 |   expect_equal(rex(one_of("a", "b", "rst")), regex("[abrst]"))
171 | })
172 | 
173 | test_that("escapes special characters", {
174 |   expect_equal(rex(one_of("^", "b", "\\")), regex("[\\^b\\\\]"))
175 | })
176 | 
177 | context("except")
178 | test_that("matches basic characters", {
179 |   expect_equal(rex(except("a", "b", "rst")), regex("[^abrst]"))
180 | })
181 | 
182 | test_that("escapes special characters", {
183 |   expect_equal(rex(except("^", "b")), regex("[^\\^b]"))
184 | })
185 | 
186 | test_that("none_of is the same as except", {
187 |   expect_equal(rex(none_of("^", "b", 1:10)), rex(except("^", "b", 1:10)))
188 | })
189 | 
190 | context("character_class")
191 | test_that("examples are correct", {
192 |   # grey = gray
193 |   re <- rex("gr", one_of("a", "e"), "y")
194 |   expect_equal(grepl(re, c("grey", "gray")), c(TRUE, TRUE)) # TRUE TRUE
195 | 
196 |   # Match non-vowels
197 |   re <- rex(none_of("a", "e", "i", "o", "u"))
198 |   # They can also be in the same string
199 |   re2 <- rex(none_of("aeiou"))
200 |   expect_identical(re, re2)
201 |   expect_equal(grepl(re, c("k", "l", "e")), c(TRUE, TRUE, FALSE)) # TRUE TRUE FALSE
202 | 
203 |   # Match range
204 |   re <- rex(range("a", "e"))
205 |   expect_equal(grepl(re, c("b", "d", "f")), c(TRUE, TRUE, FALSE)) # TRUE TRUE FALSE
206 | 
207 |   # Explicit creation (note you have to escape manually here)
208 |   re <- rex(character_class("abcd\\["))
209 |   expect_equal(grepl(re, c("a", "d", "[", "]")), c(TRUE, TRUE, TRUE, FALSE)) # TRUE TRUE TRUE FALSE
210 | })
211 | test_that("escapes special characters", {
212 |   re <- rex(exclude_range("[", "}"))
213 | 
214 |   expect_equal(re, regex("[^\\[-}]"))
215 | 
216 |   lapply(c("[", "}"), function(x) {
217 |     expect_false(grepl(re, x, perl = TRUE), info=x)
218 |   })
219 | 
220 |   expect_true(grepl(re, "A", perl = TRUE))
221 | })
222 | 


--------------------------------------------------------------------------------
/tests/testthat/test-common.R:
--------------------------------------------------------------------------------
  1 | context("rex")
  2 | 
  3 | test_that("start works", {
  4 | 
  5 |   r <- rex(
  6 |     start, letter
  7 |     )
  8 | 
  9 |   expect_true(grepl(r, "abcdef"))
 10 |   expect_false(grepl(r, "123456", perl = TRUE))
 11 | 
 12 | })
 13 | 
 14 | test_that("end works", {
 15 | 
 16 |   r <- rex(
 17 |     "Z", end
 18 |     )
 19 | 
 20 |   expect_true(grepl(r, "abcZ"))
 21 |   expect_false(grepl(r, "abc"))
 22 | 
 23 | })
 24 | 
 25 | test_that("version parsing works", {
 26 | 
 27 |   r <- rex(
 28 |     start,
 29 |     capture(numbers),
 30 |     any,
 31 |     capture(numbers),
 32 |     any,
 33 |     capture(numbers),
 34 |     any,
 35 |     capture(numbers),
 36 |     end
 37 |     )
 38 | 
 39 |   expect_identical(
 40 |     gsub(r, "\\1 \\2 \\3 \\4", "3.1.1-1", perl = TRUE),
 41 |     "3 1 1 1"
 42 |     )
 43 | 
 44 | })
 45 | 
 46 | test_that("verbs in rex work", {
 47 | 
 48 |   r <- rex(
 49 |     start, "foo", zero_or_more(any), "bar", end
 50 |     )
 51 | 
 52 |   expect_true(grepl(r, "fooABCbar", perl = TRUE))
 53 |   expect_true(grepl(r, "foo123\tbar", perl = TRUE))
 54 | 
 55 | })
 56 | 
 57 | test_that("Simple URL parsing works", {
 58 | 
 59 |   # TODO: get these working better again
 60 |   ## Decompose a URL into its components.
 61 |   ## Example by LT (http://www.cs.uiowa.edu/~luke/R/regexp.html).
 62 |   x <- "http://stat.umn.edu:80/xyz"
 63 |   re <- "^(?:(((?:(?:(?!:).)*)+)://))?((?:(?:(?!:/).)*)+)(?:(:((?:[[:digit:]]+)+)))?(?:(/(?:.)*))?$"
 64 |   #m <- regexec(re, x)
 65 |   #m
 66 |   #regmatches(x, m)
 67 |   ## Element 3 is the protocol, 4 is the host, 6 is the port, and 7
 68 |   ## is the path.  We can use this to make a function for extracting the
 69 |   ## parts of a URL:
 70 |   URL_parts <- function(x) {
 71 |     m <- regexec(re, x)
 72 |     parts <- do.call(rbind,
 73 |       lapply(regmatches(x, m), `[`, c(3L, 4L, 6L, 7L)))
 74 |     colnames(parts) <- c("protocol","host","port","path")
 75 |     parts
 76 |   }
 77 |   #URL_parts(x)
 78 | 
 79 |   r <- rex(
 80 | 
 81 |     start,
 82 | 
 83 |     ## match the protocol -- may exist or may not
 84 |     maybe(capture(
 85 |         capture(except_some_of(":")),
 86 |         "://"
 87 |         )),
 88 | 
 89 |     ## match the path
 90 |     capture(one_or_more(not(":/"))),
 91 | 
 92 |     ## get the port
 93 |     maybe(capture(":", capture(numbers))),
 94 | 
 95 |     ## and the rest
 96 |     maybe(capture("/", anything)),
 97 | 
 98 |     end
 99 | 
100 |     )
101 | 
102 |   rbind(r = r, m = re)
103 |   n <- gregexpr(r, x, perl = TRUE)[[1]]
104 |   split_matches <- function(string, matches) {
105 |     starts <- attr(matches, "capture.start")
106 |     lengths <- attr(matches, "capture.length")
107 |     ends <- starts + lengths - 1
108 |     c(string, substring(string, starts, ends))
109 |   }
110 | 
111 |   #split_matches(x, n)
112 |   #regmatches(x, m)[[1]]
113 |   #expect_equal(regmatches(x, m)[[1]], split_matches(x, n))
114 | 
115 | })
116 | 
117 | context("URL Validation")
118 | test_that("URL Validation works", {
119 |   valid_chars <- rex(except_some_of(".", "/", " ", "-"))
120 | 
121 |   `%>%` <- magrittr::`%>%`
122 | 
123 |   re <- rex(
124 |     start,
125 | 
126 |     # protocol identifier (optional) + //
127 |     group(list("http", maybe("s")) %or% "ftp", "://"),
128 | 
129 |     # user:pass authentication (optional)
130 |     maybe(non_spaces,
131 |       maybe(":", zero_or_more(non_space)),
132 |       "@"),
133 | 
134 |     #host name
135 |     group(zero_or_more(valid_chars, zero_or_more("-")), one_or_more(valid_chars)),
136 | 
137 |     #domain name
138 |     zero_or_more(".", zero_or_more(valid_chars, zero_or_more("-")), one_or_more(valid_chars)),
139 | 
140 |     #TLD identifier
141 |     group(".", valid_chars %>% at_least(2)),
142 | 
143 |     # server port number (optional)
144 |     maybe(":", digit %>% between(2, 5)),
145 | 
146 |     # resource path (optional)
147 |     maybe("/", non_space %>% zero_or_more()),
148 | 
149 |     end
150 |     )
151 | 
152 |   good <- c("http://foo.com/blah_blah",
153 |     "http://foo.com/blah_blah/",
154 |     "http://foo.com/blah_blah_(wikipedia)",
155 |     "http://foo.com/blah_blah_(wikipedia)_(again)",
156 |     "http://www.example.com/wpstyle/?p=364",
157 |     "https://www.example.com/foo/?bar=baz&inga=42&quux",
158 |     "http://✪df.ws/123",
159 |     "http://userid:password@example.com:8080",
160 |     "http://userid:password@example.com:8080/",
161 |     "http://userid@example.com",
162 |     "http://userid@example.com/",
163 |     "http://userid@example.com:8080",
164 |     "http://userid@example.com:8080/",
165 |     "http://userid:password@example.com",
166 |     "http://userid:password@example.com/",
167 |     "http://➡.ws/䨹",
168 |     "http://⌘.ws",
169 |     "http://⌘.ws/",
170 |     "http://foo.com/blah_(wikipedia)#cite-1",
171 |     "http://foo.com/blah_(wikipedia)_blah#cite-1",
172 |     "http://foo.com/unicode_(✪)_in_parens",
173 |     "http://foo.com/(something)?after=parens",
174 |     "http://☺.damowmow.com/",
175 |     "http://code.google.com/events/#&product=browser",
176 |     "http://j.mp",
177 |     "ftp://foo.bar/baz",
178 |     "http://foo.bar/?q=Test%20URL-encoded%20stuff",
179 |     "http://مثال.إختبار",
180 |     "http://例子.测试",
181 |     "http://-.~_!$&'()*+,;=:%40:80%2f::::::@example.com",
182 |     "http://1337.net",
183 |     "http://a.b-c.de",
184 |     "http://223.255.255.254")
185 | 
186 |   bad <- c(
187 |     "http://",
188 |     "http://.",
189 |     "http://..",
190 |     "http://../",
191 |     "http://?",
192 |     "http://??",
193 |     "http://??/",
194 |     "http://#",
195 |     "http://##",
196 |     "http://##/",
197 |     "http://foo.bar?q=Spaces should be encoded",
198 |     "//",
199 |     "//a",
200 |     "///a",
201 |     "///",
202 |     "http:///a",
203 |     "foo.com",
204 |     "rdar://1234",
205 |     "h://test", # nolint
206 |     "http:// shouldfail.com",
207 |     ":// should fail",
208 |     "http://foo.bar/foo(bar)baz quux",
209 |     "ftps://foo.bar/",
210 |     "http://-error-.invalid/",
211 |     "http://-a.b.co",
212 |     "http://a.b-.co",
213 |     "http://0.0.0.0",
214 |     "http://3628126748",
215 |     "http://.www.foo.bar/",
216 |     "http://www.foo.bar./",
217 |     "http://.www.foo.bar./")
218 | 
219 |   lapply(good, function(x) {
220 |     expect_true(grepl(re, x, perl = TRUE), info=x)
221 |     })
222 |   lapply(bad, function(x) {
223 |     expect_false(grepl(re, x, perl = TRUE), info=x)
224 |     })
225 | })
226 | 
227 | context("start")
228 | test_that("matches basic characters", {
229 |   expect_equal(rex(start, "f"), regex("^f"))
230 | })
231 | 
232 | test_that("escapes special characters", {
233 |   expect_equal(rex(start, "."), regex("^\\."))
234 | })
235 | 
236 | test_that("matches basic characters", {
237 |   expect_equal(rex(start, "x" %>% n_times(3)), regex("^(?:x){3}"))
238 | })
239 | 
240 | test_that("matches special identifiers", {
241 |   expect_equal(rex(start, number %>% n_times(2)), regex("^(?:[[:digit:]]){2}"))
242 | })
243 | 
244 | context("appending expressions")
245 | test_that("adds basic characters", {
246 |   expect_equal(rex("x", "y", "z"),
247 |     regex("xyz"))
248 |   expect_equal(rex("x", maybe("y"), "z"),
249 |     regex("x(?:y)?z"))
250 | })
251 | 
252 | test_that("escapes special characters", {
253 |   expect_equal(rex(numbers %>% between(0, 2), ".", "$", end),
254 |     regex("(?:[[:digit:]]+){0,2}\\.\\$$"))
255 | })
256 | 
257 | context("shortcuts - end")
258 | test_that("matches basic characters", {
259 |   expect_equal(rex("x", "y", end),
260 |     regex("xy$"))
261 | })
262 | 
263 | 
264 | test_that("escapes special characters", {
265 |   expect_equal(rex("x", "$", end),
266 |     regex("x\\$$"))
267 | })
268 | 
269 | context("general regex")
270 | test_that("returns a well-formed regex", {
271 |   expect_equal(rex(start, "w", "x" %or% "y", "z", end),
272 |     regex("^w(?:x|y)z$"))
273 | })
274 | 
275 | context("rex examples")
276 | re <-
277 |   rex(start,
278 |     number %>% n_times(3),
279 |     "-",
280 |     letter %>% n_times(2),
281 |     maybe("#"),
282 |     "a" %or% "b",
283 |     "c" %>% between(2, 4),
284 |     "$",
285 |     end
286 |     )
287 | 
288 | expect_true(grepl(re, "123-xy#accc$", perl=TRUE))
289 | expect_true(grepl(re, "999-dfbcc$"))
290 | expect_false(grepl(re, "000-df#baccccccccc$"))
291 | expect_false(grepl(re, "444-dd3ac$"))
292 | 
293 | context("issues")
294 | test_that("#11 Modifiers and named character classes", {
295 |   p <- rex(none_of(alpha))
296 |   expect_true(grepl(p, "6", perl = TRUE))
297 |   })
298 | 


--------------------------------------------------------------------------------
/tests/testthat/test-counts.R:
--------------------------------------------------------------------------------
 1 | context("n_times")
 2 | `%>%` <- magrittr::`%>%`
 3 | test_that("description", {
 4 |   re1 <- rex("x" %>% n_times(2))
 5 |   re2 <- rex("x" %>% n(2))
 6 | 
 7 |   expect_identical(re1, re2)
 8 | 
 9 |   expect_equal(re1,
10 |     regex("(?:x){2}"))
11 | 
12 |   expect_true(grepl(re1, "xx"))
13 |   expect_false(grepl(re1, "x"))
14 | })
15 | 
16 | context("between")
17 | test_that("creates a bounded repetition", {
18 |   re <- rex("x" %>% between(2, 4))
19 |   expect_equal(re, regex("(?:x){2,4}"))
20 | 
21 |   expect_true(grepl(re, "xxx"))
22 |   expect_false(grepl(re, "x"))
23 | })
24 | 
25 | context("at_least")
26 | test_that("creates a bounded repetition", {
27 |   re <- rex("x" %>% at_least(3))
28 |   expect_equal(re, regex("(?:x){3,}"))
29 | 
30 |   expect_true(grepl(re, "xxx"))
31 |   expect_false(grepl(re, "xx"))
32 | })
33 | 
34 | context("at_most")
35 | test_that("creates a repetition of n times at most", {
36 |   re <- rex(start, "x" %>% at_most(3), end)
37 |   expect_equal(re, regex("^(?:x){0,3}$"))
38 | 
39 |   expect_true(grepl(re, "xxx"))
40 |   expect_false(grepl(re, "xxxxx"))
41 |   expect_true(grepl(re, "xxx", perl = TRUE))
42 |   expect_false(grepl(re, "xxxxx", perl = TRUE))
43 | })
44 | 


--------------------------------------------------------------------------------
/tests/testthat/test-escape.R:
--------------------------------------------------------------------------------
1 | context("escape")
2 | test_that("default escape works properly", {
3 |   expect_equal(escape(1), structure("1", class="regex"))
4 | })
5 | 


--------------------------------------------------------------------------------
/tests/testthat/test-lookarounds.R:
--------------------------------------------------------------------------------
 1 | context("lookarounds")
 2 | test_that("lookarounds work", {
 3 | 
 4 |   r <- rex(
 5 |     start, "foo" %if_next_isnt% "bar"
 6 |   )
 7 | 
 8 |   expect_true(grepl(r, "fooba", perl = TRUE))
 9 |   expect_false(grepl(r, "foobar", perl = TRUE))
10 | })
11 | context("if_next_is")
12 | test_that("matches basic characters", {
13 |   re <- rex("a" %if_next_is% "b")
14 |   expect_equal(re, regex("(?:a(?=b))"))
15 |   expect_true(grepl(re, "ab", perl=TRUE))
16 |   expect_false(grepl(re, "ac", perl=TRUE))
17 | })
18 | test_that("escapes special characters", {
19 |   re <- rex("[" %if_next_is% "?=")
20 |   expect_equal(re, regex("(?:\\[(?=\\?=))"))
21 |   expect_true(grepl(re, "[?=", perl=TRUE))
22 |   expect_false(grepl(re, "?=[", perl=TRUE))
23 | })
24 | 
25 | context("if_next_isnt")
26 | test_that("matches basic characters", {
27 |   re <- rex("a" %if_next_isnt% "b")
28 |   expect_equal(re, regex("(?:a(?!b))"))
29 |   expect_true(grepl(re, "ac", perl=TRUE))
30 |   expect_false(grepl(re, "ab", perl=TRUE))
31 | })
32 | test_that("escapes special characters", {
33 |   re <- rex("[" %if_next_isnt% "?=")
34 |   expect_equal(re, regex("(?:\\[(?!\\?=))"))
35 |   expect_true(grepl(re, "?=[", perl=TRUE))
36 |   expect_false(grepl(re, "[?=", perl=TRUE))
37 | })
38 | 
39 | context("if_prev_is")
40 | test_that("matches basic characters", {
41 |   re <- rex("a" %if_prev_is% "b")
42 |   expect_equal(re, regex("(?:(?<=b)a)"))
43 |   expect_true(grepl(re, "ba", perl=TRUE))
44 |   expect_false(grepl(re, "ab", perl=TRUE))
45 | })
46 | test_that("escapes special characters", {
47 |   re <- rex("[" %if_prev_is% "<=?")
48 |   expect_equal(re, regex("(?:(?<=<=\\?)\\[)"))
49 |   expect_true(grepl(re, "<=?[", perl=TRUE))
50 |   expect_false(grepl(re, "[b", perl=TRUE))
51 | })
52 | 
53 | context("if_prev_isnt")
54 | test_that("matches basic characters", {
55 |   re <- rex("a" %if_prev_isnt% "b")
56 |   expect_equal(re, regex("(?:(?<!b)a)"))
57 |   expect_true(grepl(re, "ab", perl=TRUE))
58 |   expect_false(grepl(re, "ba", perl=TRUE))
59 | })
60 | test_that("escapes special characters", {
61 |   re <- rex("[" %if_prev_isnt% "!<?")
62 |   expect_equal(re, regex("(?:(?<!!<\\?)\\[)"))
63 |   expect_true(grepl(re, "[b", perl=TRUE))
64 |   expect_false(grepl(re, "!<?[", perl=TRUE))
65 | })
66 | 


--------------------------------------------------------------------------------
/tests/testthat/test-match.R:
--------------------------------------------------------------------------------
  1 | context("re_matches")
  2 | df2 <- function(...) {
  3 |   data.frame(..., stringsAsFactors = FALSE, check.names = FALSE)
  4 | }
  5 | string <- c("this is Text", "chr-12", "12343 66544456")
  6 | 
  7 | test_that("re_matches if given other than character vector", {
  8 | 
  9 |   expect_equal(re_matches(NA, rex(digit)), NA)
 10 |   expect_equal(re_matches(1, rex(digit)), TRUE)
 11 |   expect_equal(re_matches(1, rex(capture(digit))), df2("1" = "1"))
 12 |   expect_equal(re_matches("a", rex(capture(digit))), df2("1" = NA_character_))
 13 | })
 14 | 
 15 | test_that("re_matches no capture returns a logical", {
 16 | 
 17 |   expect_equal(re_matches(string, rex(digit)), c(FALSE, TRUE, TRUE))
 18 |   expect_equal(re_matches(string, rex(digits)), c(FALSE, TRUE, TRUE))
 19 |   expect_equal(re_matches(string, rex(alpha)), c(TRUE, TRUE, FALSE))
 20 |   expect_equal(re_matches(string, rex("-")), c(FALSE, TRUE, FALSE))
 21 | })
 22 | 
 23 | test_that("re_matches capture returns a data frame", {
 24 | 
 25 |   expect_equal(re_matches(string, rex(capture(digits))),
 26 |     df2(`1`=c(NA_character_, "12", "12343")))
 27 |   expect_equal(re_matches(string, rex(capture(alphas))),
 28 |     df2(`1`=c("this", "chr", NA_character_)))
 29 | 
 30 |   expect_equal(re_matches(string, rex(capture(alphas), " ", capture(alphas))),
 31 |     df2(`1`=c("this", NA_character_, NA_character_),
 32 |         `2`=c("is", NA_character_, NA_character_)))
 33 | })
 34 | 
 35 | test_that("re_matches with global returns a list of matches", {
 36 | 
 37 |   # Global without captures doesn"t make a ton of sense but lets test it anyway
 38 |   expect_equal(re_matches(string, rex("is"), global = TRUE),
 39 |     list(c(TRUE, TRUE), FALSE, FALSE))
 40 | })
 41 | 
 42 | test_that("re_matches with global returns a list of data.frames", {
 43 | 
 44 |   expect_equal(re_matches(string, rex(capture(any_letters, "is")), global = TRUE),
 45 |     list(df2(`1` = c("this", "is")), df2(`1` = NA_character_), df2(`1` = NA_character_)) )
 46 |   expect_equal(re_matches(string, rex(capture(digits, name = "number")), global = TRUE),
 47 |     list(df2("number" = NA_character_), df2("number" = "12"), df2("number" = c("12343", "66544456"))))
 48 | })
 49 | 
 50 | test_that("re_matches named capture returns named data frame", {
 51 | 
 52 |   expect_equal(re_matches(string, rex(capture(digits, name = "numbers"))),
 53 |     df2(numbers=c(NA_character_, "12", "12343")))
 54 |   expect_equal(re_matches(string, rex(capture(alphas, name = "letters"))),
 55 |     df2(letters=c("this", "chr", NA_character_)))
 56 | })
 57 | 
 58 | test_that("examples are correct", {
 59 | string <- c("this is a", "test string")
 60 | expect_equal(re_matches(string, rex(capture(alphas, name = "first_word"), space,
 61 |                 capture(alphas, name = "second_word"))),
 62 |             df2(first_word = c("this", "test"),
 63 |                 second_word = c("is", "string")))
 64 | 
 65 | expect_equal(re_matches(string, rex(capture("test"))),
 66 |   df2(`1`=c(NA_character_, "test")))
 67 | })
 68 | 
 69 | context("re_matches - locations")
 70 | test_that("re_matches if given other than character vector", {
 71 | 
 72 |   expect_equal(re_matches(NA, rex(digit), locations = TRUE),
 73 |     df2(start = NA_integer_, end = NA_integer_))
 74 | 
 75 |   expect_equal(re_matches(1, rex(digit), locations = TRUE),
 76 |     df2(start = 1L, end = 1L))
 77 | 
 78 |   expect_equal(re_matches(1, rex(capture(digit)), locations = TRUE),
 79 |     df2("1" = "1", "1.start" = 1L, "1.end" = 1L))
 80 | 
 81 |   expect_equal(re_matches("a", rex(capture(digit)), locations = TRUE),
 82 |     df2("1" = NA_character_, "1.start" = NA_integer_, "1.end" = NA_integer_))
 83 | })
 84 | 
 85 | test_that("re_matches no capture returns a logical", {
 86 | 
 87 |   expect_equal(re_matches(string, rex(digit), locations = TRUE),
 88 |     df2(start = c(NA_integer_, 5L, 1L), end = c(NA_integer_, 5L, 1L)))
 89 | 
 90 |   expect_equal(re_matches(string, rex(digits), locations = TRUE),
 91 |     df2(start = c(NA_integer_, 5L, 1L), end = c(NA_integer_, 6L, 5L)))
 92 | 
 93 |   expect_equal(re_matches(string, rex(alpha), locations = TRUE),
 94 |     df2(start = c(1L, 1L, NA_integer_), end = c(1L, 1L, NA_integer_)))
 95 | 
 96 |   expect_equal(re_matches(string, rex("-"), locations = TRUE),
 97 |     df2(start = c(NA_integer_, 4L, NA_integer_), end = c(NA_integer_, 4L, NA_integer_)))
 98 | })
 99 | 
100 | test_that("re_matches capture returns a data frame", {
101 | 
102 |   expect_equal(re_matches(string, rex(capture(digits)), locations = TRUE),
103 |     df2("1" = c(NA_character_, "12", "12343"), "1.start" = c(NA_integer_, 5L, 1L), "1.end" = c(NA_integer_, 6L, 5L)))
104 | 
105 |   expect_equal(re_matches(string, rex(capture(alphas)), locations = TRUE),
106 |     df2("1" = c("this", "chr", NA_character_), "1.start" = c(1L, 1L, NA_integer_), "1.end" = c(4L, 3L, NA_integer_)))
107 | 
108 |   expect_equal(re_matches(string, rex(capture(alphas), " ", capture(alphas)), locations = TRUE),
109 |     df2(
110 |       "1" = c("this", NA_character_, NA_character_),
111 |       "1.start" = c(1L, NA_integer_, NA_integer_),
112 |       "1.end" = c(4L, NA_integer_, NA_integer_),
113 | 
114 |       "2" = c("is", NA_character_, NA_character_),
115 |       "2.start" = c(6L, NA_integer_, NA_integer_),
116 |       "2.end" = c(7L, NA_integer_, NA_integer_)
117 |     ))
118 | })
119 | 
120 | test_that("re_matches named capture returns named data frame", {
121 | 
122 |   expect_equal(re_matches(string, rex(capture(digits, name = "numbers")), locations = TRUE),
123 |     df2("numbers" = c(NA_character_, "12", "12343"),
124 |         "numbers.start" = c(NA_integer_, 5L, 1L),
125 |         "numbers.end" = c(NA_integer_, 6L, 5L)))
126 | 
127 |   expect_equal(re_matches(string, rex(capture(alphas, name = "letters")), locations = TRUE),
128 |     df2("letters" = c("this", "chr", NA_character_),
129 |         "letters.start" = c(1L, 1L, NA_integer_),
130 |         "letters.end" = c(4L, 3L, NA_integer_)))
131 | 
132 | })
133 | 
134 | test_that("re_matches with global returns a list of matches", {
135 | 
136 |   expect_equal(re_matches(string, rex("is"), global = TRUE, locations = TRUE),
137 |     list(
138 |       df2(start = c(3L, 6L), end = c(4L, 7L)),
139 |       df2(start = NA_integer_, end = NA_integer_),
140 |       df2(start = NA_integer_, end = NA_integer_)
141 |       )
142 |     )
143 | })
144 | 
145 | test_that("re_matches with global returns a list of data.frames", {
146 | 
147 |   expect_equal(re_matches(string, rex(capture(any_letters, "is")), global = TRUE, locations = TRUE),
148 |     list(
149 |       df2("1" = c("this", "is"), "1.start" = c(1L, 6L), "1.end" = c(4L, 7L)),
150 |       df2("1" = NA_character_, "1.start" = NA_integer_, "1.end" = NA_integer_),
151 |       df2("1" = NA_character_, "1.start" = NA_integer_, "1.end" = NA_integer_)
152 |       )
153 |     )
154 | 
155 |   expect_equal(re_matches(string, rex(capture(digits, name = "number")), global = TRUE, locations = TRUE),
156 |     list(
157 |       df2(number = NA_character_, number.start = NA_integer_, number.end = NA_integer_),
158 |       df2(number = "12", number.start = 5L, number.end = 6L),
159 |       df2(number = c("12343", "66544456"), number.start = c(1L, 7L), number.end = c(5L, 14L))
160 |       )
161 |     )
162 | })
163 | 
164 | test_that("examples are correct", {
165 | string <- c("this is a", "test string")
166 | expect_equal(re_matches(string, rex(capture(alphas, name = "first_word"), space,
167 |                 capture(alphas, name = "second_word"))),
168 |             df2(first_word = c("this", "test"),
169 |                 second_word = c("is", "string")))
170 | 
171 | expect_equal(re_matches(string, rex(capture("test"))),
172 |   df2(`1`=c(NA_character_, "test")))
173 | })
174 | 
175 | context("re_substitutes")
176 | test_that("s substitutes properly, with and without options", {
177 |   expect_equal(re_substitutes(string, rex("Text"), "test"),
178 |     c("this is test", "chr-12", "12343 66544456"))
179 | 
180 |   expect_equal(re_substitutes(string, rex("text"), "test", options="insensitive"),
181 |     c("this is test", "chr-12", "12343 66544456"))
182 | 
183 |   expect_equal(re_substitutes(string, "i", "x", global = TRUE),
184 |     c("thxs xs Text", "chr-12", "12343 66544456"))
185 | })
186 | 
187 | context("match_args")
188 | test_that("match args stops if the arg does not match", {
189 |   expect_error(match_args("test", names(option_map)))
190 | 
191 |   expect_equal(match_args("ungreedy", names(option_map)), "ungreedy")
192 | 
193 |   expect_equal(match_args("ungre", names(option_map)), "ungreedy")
194 | })
195 | 


--------------------------------------------------------------------------------
/tests/testthat/test-or.R:
--------------------------------------------------------------------------------
 1 | context("or")
 2 | test_that("%or% creates an alternation", {
 3 |   re <- rex("w", "x" %or% "y", "z")
 4 |   expect_equal(re, regex("w(?:x|y)z"))
 5 | 
 6 |   expect_true(grepl(re, "wxz"))
 7 |   expect_true(grepl(re, "wyz"))
 8 |   expect_false(grepl(re, "waz"))
 9 | })
10 | 
11 | test_that("or with multiple inputs works", {
12 |   re <- rex(or("x", "yx", "z"))
13 |   expect_equal(re, regex("(?:x|yx|z)"))
14 |   lapply(c("x", "yx", "z"), function(x) {
15 |     expect_true(grepl(re, x, perl=TRUE), info=x)
16 |   })
17 | 
18 |   expect_false(grepl(re, c("y")))
19 |   expect_false(grepl(re, c("a")))
20 | })
21 | test_that("or with variable inputs works", {
22 |   variable <- c("test", "variable")
23 |   re <- rex(or(variable))
24 | 
25 |   expect_equal(re, regex("(?:test|variable)"))
26 | 
27 |   lapply(variable, function(x){
28 |     expect_true(grepl(re, x), info=x)
29 |   })
30 | })
31 | 


--------------------------------------------------------------------------------
/tests/testthat/test-print.R:
--------------------------------------------------------------------------------
1 | context("printing")
2 | test_that("print S3 method works properly", {
3 |   expect_output(print(rex("x")), "x")
4 | 
5 |   expect_output(print(rex("x\\")), "x\\\\")
6 | 
7 |   expect_output(print(rex("x[")), "x\\\\\\[")
8 | })
9 | 


--------------------------------------------------------------------------------
/tests/testthat/test-rex.R:
--------------------------------------------------------------------------------
1 | test_that("rex() ignores empty args", {
2 |   expect_equal(rex("x", ), rex("x"))
3 | })
4 | 


--------------------------------------------------------------------------------
/tests/testthat/test-rex_mode.R:
--------------------------------------------------------------------------------
 1 | context("rex_mode")
 2 | test_that("rex_mode attaches .rex$env to the search_path", {
 3 |   expect_false(".rex$env" %in% search())
 4 | 
 5 |   rex_mode()
 6 | 
 7 |   expect_true(".rex$env" %in% search())
 8 | 
 9 |   rex_mode()
10 | 
11 |   expect_false(".rex$env" %in% search())
12 | })
13 | 


--------------------------------------------------------------------------------
/tests/testthat/test-shortcuts.R:
--------------------------------------------------------------------------------
 1 | context("helper_functions")
 2 | 
 3 | test_that("inverse negates the shortcut", {
 4 |   test_shortcuts <- shortcuts[c("single_quote", "letter")]
 5 | 
 6 |   expect_equal(
 7 |     list(
 8 |       non_single_quote = structure("^'", class = c("character_class", "regex")),
 9 |       non_letter = structure("^[:alpha:]", class = c("character_class", "regex"))
10 |     ), inverse(test_shortcuts))
11 | })
12 | test_that("plural makes shortcut match one or more", {
13 |   test_shortcuts <- shortcuts[c("single_quote", "letter")]
14 | 
15 |   expect_equal(
16 |     list(
17 |       single_quotes = structure("[']+", class = c("regex")),
18 |       letters = structure("[[:alpha:]]+", class = c("regex"))
19 |     ), plural(test_shortcuts))
20 | })
21 | test_that("multiple makes shortcut match zero or more", {
22 |   test_shortcuts <- shortcuts[c("single_quote", "letter")]
23 | 
24 |   expect_equal(
25 |     list(
26 |       any_single_quotes = structure("[']*", class = c("regex")),
27 |       any_letters = structure("[[:alpha:]]*", class = c("regex"))
28 |     ), multiple(test_shortcuts))
29 | })
30 | 


--------------------------------------------------------------------------------
/tests/testthat/test-wildcards.R:
--------------------------------------------------------------------------------
 1 | context("zero_or_more")
 2 | test_that("recognizes basic characters", {
 3 |   expect_equal(rex(zero_or_more("a"), "b"),
 4 |     regex("(?:a)*b"))
 5 | })
 6 | 
 7 | test_that("recognizes special identifiers", {
 8 |   expect_equal(rex(zero_or_more(number), "b"),
 9 |     regex("(?:[[:digit:]])*b"))
10 | })
11 | 
12 | test_that("recognizes shortcuts", {
13 |   re <- rex(zero_or_more(number, type = "lazy"), "E")
14 |   expect_equal(re,
15 |     regex("(?:[[:digit:]])*?E"))
16 | 
17 |   expect_equal(regmatches(m=regexpr(re, "123EEE", perl = TRUE), "123EEE"), "123E")
18 | })
19 | 
20 | context("one_or_more")
21 | test_that("recognizes basic characters", {
22 |   expect_equal(rex(one_or_more("a"), "b"),
23 |     regex("(?:a)+b"))
24 | })
25 | 
26 | test_that("recognizes special identifiers", {
27 |   expect_equal(rex(one_or_more(letter), "b"),
28 |     regex("(?:[[:alpha:]])+b"))
29 | })
30 | 
31 | 
32 | context("maybe")
33 | test_that("recognizes basic characters", {
34 |     re <- rex("x", maybe("y"), "z")
35 |     expect_equal(re, regex("x(?:y)?z"))
36 |     expect_true(grepl(re, "xyz"))
37 |     expect_true(grepl(re, "xz"))
38 | })
39 | 
40 | context("not")
41 | test_that("creates a negative lookahead", {
42 |   re <- rex("x", not("y"), "z")
43 |   expect_equal(re, regex("x(?:(?!y).)*z"))
44 |   expect_true(grepl(re, "xazbc", perl=TRUE))
45 |   expect_true(grepl(re, "xxzabc", perl=TRUE))
46 |   expect_false(grepl(re, "xyzabc", perl=TRUE))
47 | 
48 |   re <- rex("x432", not("yars"), "tsrz")
49 |   expect_true(grepl(re, "x432tsrz", perl=TRUE))
50 |   expect_true(grepl(re, "x432yartsrz", perl=TRUE))
51 |   expect_false(grepl(re, "x432yarstsrz", perl=TRUE))
52 | 
53 |   re <- rex(start, not("ars"), "x432")
54 |   expect_true(grepl(re, "x432", perl=TRUE))
55 |   expect_true(grepl(re, "arx432", perl=TRUE))
56 |   expect_false(grepl(re, "arsx432", perl=TRUE))
57 | })
58 | 


--------------------------------------------------------------------------------
/vignettes/log_parsing.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Server Log Parsing"
 3 | author: "Jim Hester"
 4 | date: "`r Sys.Date()`"
 5 | output: rmarkdown::html_vignette
 6 | vignette: >
 7 |   %\VignetteIndexEntry{Server Log Parsing}
 8 |   %\VignetteEngine{knitr::rmarkdown}
 9 |   \usepackage[utf8]{inputenc}
10 | ---
11 | 
12 | Parsing server log files is a common task in server administration.
13 | [1](https://link.springer.com/article/10.1007/BF03325089),[2](https://stackoverflow.com/search?q=%22Apache+log%22)
14 | Historically R would not be well suited to this and it would be better
15 | performed using a scripting language such as perl. Rex, however, makes this
16 | easy to do and allows you to perform both the data cleaning and analysis in R!
17 | 
18 | Common server logs consist of space separated fields.
19 | 
20 | > 198.214.42.14 - - [21/Jul/1995:14:31:46 -0400] "GET /images/ HTTP/1.0" 200 17688
21 | 
22 | > lahal.ksc.nasa.gov - - [24/Jul/1995:12:42:40 -0400] "GET /images/USA-logosmall.gif HTTP/1.0" 200 234
23 | 
24 | The logs used in this vignette come from two months of all HTTP requests
25 | to the NASA Kennedy Space Center WWW server in Florida and are freely available
26 | for use. [3](https://web.archive.org/web/20181003084945/http://ita.ee.lbl.gov/html/contrib/NASA-HTTP.html)
27 | 
28 | ```{r include = FALSE}
29 | library(rex)
30 | library(dplyr)
31 | library(knitr)
32 | library(ggplot2)
33 | library(magrittr)
34 | ```
35 | 
36 | ```{r show.warnings=FALSE}
37 | parsed <- scan("NASA.txt", what = "character", sep = "\n") %>%
38 |   re_matches(
39 |     rex(
40 | 
41 |       # Get the time of the request
42 |       "[",
43 |         capture(name = "time",
44 |           except_any_of("]")
45 |         ),
46 |       "]",
47 | 
48 |       space, double_quote, "GET", space,
49 | 
50 |       # Get the filetype of the request if requesting a file
51 |       maybe(
52 |         non_spaces, ".",
53 |         capture(name = "filetype",
54 |           except_some_of(space, ".", "?", double_quote)
55 |         )
56 |       )
57 |     )
58 |   ) %>%
59 |   mutate(filetype = tolower(filetype),
60 |          time = as.POSIXct(time, format="%d/%b/%Y:%H:%M:%S %z"))
61 | ```
62 | 
63 | This gives us a nicely formatted data frame of the time and filetypes of the requests.
64 | ```{r echo = FALSE}
65 | kable(head(parsed, n = 10))
66 | ```
67 | 
68 | We can also easily generate a histogram of the filetypes, or a plot of requests over time.
69 | ```{r FALSE, fig.show='hold', warning = FALSE, message = FALSE}
70 | ggplot(na.omit(parsed)) + stat_count(aes(x=filetype))
71 | ggplot(na.omit(parsed)) + geom_histogram(aes(x=time)) + ggtitle("Requests over time")
72 | ```
73 | 


--------------------------------------------------------------------------------
/vignettes/stackoverflow.R:
--------------------------------------------------------------------------------
  1 | #' ---
  2 | #' title: "Stackoverflow Usage Examples"
  3 | #' author: "Jim Hester"
  4 | #' date: "`r Sys.Date()`"
  5 | #' output: rmarkdown::html_vignette
  6 | #' vignette: >
  7 | #'   %\VignetteIndexEntry{Stackoverflow Usage Examples}
  8 | #'   %\VignetteEngine{knitr::rmarkdown}
  9 | #'   \usepackage[utf8]{inputenc}
 10 | #' ---
 11 | 
 12 | #+ cache=FALSE, include=FALSE
 13 | knitr::opts_chunk$set(collapse = TRUE, comment = "#>")
 14 | knitr::render_markdown(strict = TRUE)
 15 | 
 16 | #' ### [http://stackoverflow.com/questions/27106552][]
 17 | 
 18 | #' Using [rex](http://cran.r-project.org/web/packages/rex/index.html) may make this type of task a little simpler.
 19 | 
 20 | x <- c("a=1", "b=3", "a=9", "c=2", "b=4", "a=2")
 21 | 
 22 | #' First extract the names and values from the strings.
 23 | 
 24 | library(rex)
 25 | matches <- re_matches(x,
 26 |   rex(
 27 |     capture(name="name", letter),
 28 |     "=",
 29 |     capture(name="value", digit)
 30 |     ))
 31 | matches
 32 | 
 33 | #' Then tally the groups using `split()`.
 34 | 
 35 | groups <- split(as.numeric(matches$value), matches$name)
 36 | groups
 37 | 
 38 | #' If we try to convert directly to a data.frame from `split()` the groups with
 39 | #' fewer members will have their members recycled rather than `NA`, so instead
 40 | #' explicitly fill with `NA`.
 41 | 
 42 | largest_group <- max(lengths(groups))
 43 | largest_group
 44 | 
 45 | groups <- lapply(groups, function(group) {
 46 |   if (length(group) < largest_group) {
 47 |     group[largest_group] <- NA
 48 |   }
 49 |   group
 50 | })
 51 | groups
 52 | 
 53 | #' Finally we can create the data.frame
 54 | 
 55 | do.call("data.frame", groups)
 56 | 
 57 | #' ### [http://stackoverflow.com/questions/14146362/][] ###
 58 | 
 59 | #' Using [rex](http://cran.r-project.org/web/packages/rex/index.html) may make this type of task a little simpler.
 60 | 
 61 | mystrings <- c("X2/D2/F4",
 62 |                "X10/D9/F4",
 63 |                "X3/D22/F4",
 64 |                "X9/D22/F9")
 65 | 
 66 | library(rex)
 67 | matches <- re_matches(mystrings,
 68 |   rex(
 69 |     "/",
 70 |     any,
 71 |     capture(name = "numbers", digits)
 72 |     )
 73 |   )
 74 | as.numeric(matches$numbers)
 75 | 
 76 | #' ### [http://stackoverflow.com/questions/8613237/][] ##
 77 | #' Using [rex](http://cran.r-project.org/web/packages/rex/index.html) may make this type of task a little simpler.
 78 | 
 79 | j <- "What kind of cheese isn't your cheese? (wonder) Nacho cheese! (groan) (Laugh)"
 80 | 
 81 | library(rex)
 82 | matches <- re_matches(j,
 83 |   rex(
 84 |     "(",
 85 |     capture(name = "text", except_any_of(")")),
 86 |     ")"),
 87 |   global = TRUE)
 88 | matches
 89 | 
 90 | #' ### [http://stackoverflow.com/questions/22976472][]
 91 | #' Using [rex](http://cran.r-project.org/web/packages/rex/index.html) may make this type of task a little simpler.
 92 | 
 93 | txt <- "this is just a test! i'm not sure if this is O.K. or if it will work? who knows. regex is sorta new to me..  There are certain cases that I may not figure out??  sad!  ^_^"
 94 | 
 95 | re <- rex(
 96 |   capture(name = "first_letter", alnum),
 97 |   capture(name = "sentence",
 98 |     any_non_puncts,
 99 |     zero_or_more(
100 |       group(
101 |         punct %if_next_isnt% space,
102 |         any_non_puncts
103 |         )
104 |       ),
105 |     maybe(punct)
106 |     )
107 |   )
108 | 
109 | re_substitutes(txt, re, "\\U\\1\\E\\2", global = TRUE)
110 | 
111 | #' ### [http://stackoverflow.com/questions/27172007][]
112 | #' Using [rex](http://cran.r-project.org/web/packages/rex/index.html) may make this type of task a little simpler.
113 | 
114 | x <- data.frame(
115 |   locationid = c(
116 |     1073744023,
117 |     1073744022,
118 |     1073744025,
119 |     1073744024,
120 |     1073744021,
121 |     1073744026
122 |     ),
123 |   address = c(
124 |     "525 East 68th Street, New York, NY      10065, USA",
125 |     "270 Park Avenue, New York, NY 10017, USA",
126 |     "Rockefeller Center, 50 Rockefeller Plaza, New York, NY 10020, USA",
127 |     "1251 Avenue of the Americas, New York, NY 10020, USA",
128 |     "1301 Avenue of the Americas, New York, NY 10019, USA",
129 |     "44 West 45th Street, New York, NY 10036, USA"
130 |     ))
131 | 
132 | library(rex)
133 | 
134 | sep <- rex(",", spaces)
135 | 
136 | re <-
137 |   rex(
138 |     capture(name = "address",
139 |       except_some_of(",")
140 |     ),
141 |     sep,
142 |     capture(name = "city",
143 |       except_some_of(",")
144 |     ),
145 |     sep,
146 |     capture(name = "state",
147 |       uppers
148 |     ),
149 |     spaces,
150 |     capture(name = "zip",
151 |       some_of(digit, "-")
152 |     ),
153 |     sep,
154 |     capture(name = "country",
155 |       something
156 |     ))
157 | 
158 | re_matches(x$address, re)
159 | 
160 | #' ### [http://stackoverflow.com/questions/27155297/][]
161 | #' Using [rex](http://cran.r-project.org/web/packages/rex/index.html) may make this type of task a little simpler.
162 | 
163 | library(rex)
164 | x <-
165 | "https://support.google.com/blogger/topic/12457
166 | https://support.google.com/blogger/topic/12457.
167 | https://support.google.com/blogger/topic/12457] 
168 | <<https://support.google.com/blogger/topic/12457>>
169 | https://support.google.com/blogger/topic/12457,
170 | https://support.google.com/blogger/topic/12457),
171 | xxxxxxhttps://support.google.com/blogger/topic/12457),hhhththta"
172 | 
173 | re <- rex(
174 |   capture(name = "url",
175 |     "https://support.google.com/blogger/topic/",
176 |     digits
177 |     ))
178 | 
179 | re_matches(x, re, global = TRUE)[[1]]
180 | 
181 | #' ### [http://stackoverflow.com/questions/27219421][]
182 | #' Using [rex](http://cran.r-project.org/web/packages/rex/index.html) may make this type of task a little simpler.
183 | tmp <- c("Little Street","A323", "Essex Road (A43)", "M43", "Orange street", "M4", "B2045", "New Street")
184 | 
185 | library(rex)
186 | classify_road <- function(x) {
187 |   res <- re_matches(x,
188 |     rex(
189 |       capture(name = "type",
190 |         upper
191 |       ),
192 |       digit
193 |     )
194 |   )
195 | 
196 |   res$type[ is.na(res$type) ] <- "Minor"
197 |   paste(res$type, "Road")
198 | }
199 | 
200 | classify_road(tmp)
201 | 
202 | #' ### [http://stackoverflow.com/questions/22247410][]
203 | #' Using [rex](http://cran.r-project.org/web/packages/rex/index.html) may make this type of task a little simpler.
204 | 
205 | x <- "this is a multiline text 
206 |           some more test here 
207 |           before we get to the good stuff 
208 |           \\end{figure}"
209 | 
210 | re <- rex("\\end{figure}")
211 | re_matches(x, re)
212 | 
213 | regexpr(re, x, perl = TRUE)
214 | 
215 | #' ### [http://stackoverflow.com/questions/23447261][]
216 | #' Using [rex](http://cran.r-project.org/web/packages/rex/index.html) may make this type of task a little simpler.
217 | 
218 | x <- structure(list(text = structure(c(4L, 6L, 1L, 2L, 5L, 3L), .Label =     c("ãããæããããéãããæãããInappropriate announce:-(",
219 | "@AirAsia your direct debit (Maybank) payment gateways is not working. Is it something     you are working to fix?",
220 | "@AirAsia Apart from the slight delay and shortage of food on our way back from Phuket, both flights were very smooth. Kudos :)",
221 | "RT @AirAsia: ØØÙØÙÙÙÙ ÙØØØ ØØØÙ ÙØØØØÙ ØØØØÙÙÙí í Now you can enjoy a #great :D breakfast onboard with our new breakfast meals! :D",
222 | "xdek ke flight @AirAsia Malaysia to LA... hahah..:p bagi la promo murah2 sikit, kompom aku beli...",
223 | "You know there is a problem when customer service asks you to wait for 103 minutes and your no is 42 in the queue. X-("
224 | ), class = "factor"), created = structure(c(5L, 4L, 4L, 3L, 2L,
225 | 1L), .Label = c("1/2/2014 16:14", "1/2/2014 17:00", "3/2/2014 0:54",
226 | "3/2/2014 0:58", "3/2/2014 1:28"), class = "factor")), .Names = c("text",
227 | "created"), class = "data.frame", row.names = c(NA, -6L))
228 | 
229 | emots <- as.character(outer(c(":", ";", ":-", ";-"), c(")", "(", "]", "[", "D", "o", "O", "P", "p"), paste0))
230 | 
231 | re_matches(x$text,
232 |   rex(
233 |     capture(name = "emoticons",
234 |       or(emots)
235 |     )
236 |   ),
237 |   global = TRUE)
238 | 
239 | #' ### [http://stackoverflow.com/questions/27234040][]
240 | #' Using [rex](http://cran.r-project.org/web/packages/rex/index.html) may make this type of task a little simpler.
241 | 
242 | z <- "<TABLE ALIGN=\"RIGHT\" BORDER CELLSPACING=\"0\" CELLPADDING=\"0\">
243 |    <CAPTION><B>MESA HIGH VICTORIES</B></CAPTION>
244 |    <TH>Team</TH>
245 |    <TH>Score</TH>
246 |    <TR ALIGN=\"CENTER\">
247 |    <TD><B>Parkfield High Demons</B></TD>
248 |    <TD><B>28 to 21</B></TD>
249 |    </TR>
250 |    <TR ALIGN=\"CENTER\">
251 |    <TD><B>Burns High Badgers</B></TD>
252 |    <TD><B>14 to 13</B></TD>
253 |    </TR>
254 |    </TABLE>"
255 | 
256 | re_matches(z,
257 |   rex(
258 |     capture(name="table",
259 |       "<TABLE", zero_or_more(any, type = "lazy"), "<TR"
260 |     )
261 |   ), options="single-line")
262 | 
263 | 
264 | #' ### [http://stackoverflow.com/questions/27236435][]
265 | #' Using [rex](http://cran.r-project.org/web/packages/rex/index.html) may make this type of task a little simpler.
266 | x <- "John a11|a12|\n  Ana a21|a22|\n  Jake a31|a23|\n   "
267 | 
268 | re_matches(x,
269 |   rex(
270 |       any_spaces,
271 |       capture(name = "text",
272 |         except_some_of("|")
273 |       ),
274 |       any_spaces),
275 |   global = TRUE)[[1]]
276 | 
277 | #' ### [http://stackoverflow.com/questions/25764839][]
278 | #' Using [rex](http://cran.r-project.org/web/packages/rex/index.html) may make this type of task a little simpler.
279 | x <- "MSGSRRKATPASRTRVGNYEMGRTLGEGSFAKVKYAKNTVTGDQAAIKILDREKVFRHKMVEQLKREISTMKLIKHPNVVEIIEVMASKTKIYIVLELVNGGELFDKIAQQGRLKEDEARRYFQQLINAVDYCHSRGVYHRDLKPENLILDANGVLKVSDFGLSAFSRQVREDGLLHTACGTPNYVAPEVLSDKGYDGAAADVWSCGVILFVLMAGYLPFDEPNLMTLYKRICKAEFSCPPWFSQGAKRVIKRILEPNPITRISIAELLEDEWFKKGYKPPSFDQDDEDITIDDVDAAFSNSKECLVTEKKEKPVSMNAFELISSSSEFSLENLFEKQAQLVKKETRFTSQRSASEIMSKMEETAKPLGFNVRKDNYKIKMKGDKSGRKGQLSVATEVFEVAPSLHVVELRKTGGDTLEFHKVCDSFYKNFSSGLKDVVWNTDAAAEEQKQ"
280 | re_matches(x,
281 |   rex(
282 |     capture(name = "amino_acids",
283 |       n(any, 6),
284 |       "K",
285 |       n(any, 6)
286 |       )
287 |     ),
288 |   global = TRUE)[[1]]
289 | 
290 | locs <- re_matches(x,
291 |   rex(
292 |     "K" %if_prev_is% n(any, 6) %if_next_is% n(any, 6)
293 |     ),
294 |   global = TRUE, locations = TRUE)[[1]]
295 | 
296 | substring(x, locs$start - 6, locs$end + 6)
297 | 
298 | #' ### [http://stackoverflow.com/questions/15954171][]
299 | #' Using [rex](http://cran.r-project.org/web/packages/rex/index.html) may make this type of task a little simpler.
300 | x <- "System configuration: lcpu=96 mem=196608MB ent=16.00"
301 | 
302 | library(rex)
303 | val <- as.numeric(
304 |   re_matches(x,
305 |     rex("ent=",
306 |       capture(name = "ent", some_of(digit, "."))
307 |       )
308 |     )$ent
309 |   )
310 | 
311 | #' ### [http://stackoverflow.com/questions/27273996][]
312 | #' Using [rex](http://cran.r-project.org/web/packages/rex/index.html) to construct the regular expression may make it more understandable.
313 | x <- c("_A00_A1234B_", "_A00_A12345B_", "_A1_A12345_")
314 | 
315 | #' approach #1, assumes always is between the second underscores.
316 | re_matches(x,
317 |   rex(
318 |     "_",
319 |     anything,
320 |     "_",
321 |     capture(anything),
322 |     "_"
323 |   )
324 | )
325 | 
326 | #' approach #2, assumes an alpha, followed by 4 or 5 digits with a possible trailing alpha.
327 | re_matches(x,
328 |   rex(
329 |     capture(
330 |       alpha,
331 |       between(digit, 4, 5),
332 |       maybe(alpha)
333 |     )
334 |   )
335 | )
336 | 
337 | #' ### [http://stackoverflow.com/questions/27238323][]
338 | #' Using [rex](http://cran.r-project.org/web/packages/rex/index.html) may make this type of task a little simpler.
339 | string <- c("ABC3JFD456", "ARST4DS324")
340 | 
341 | re_matches(string,
342 |   rex(
343 |     capture(name = "first_number", digit)
344 |     )
345 |   )
346 | 
347 | #' ### <http://stackoverflow.com/questions/27252250> ###
348 | #' Using [rex](http://cran.r-project.org/web/packages/rex/index.html) may make this type of task a little simpler.
349 | df <- structure(list(Object = c("T00055", "T00055", "E00336", "E00336",
350 | "E00336", "E00336", "T 00054"), Coding = c("T 00055_005_<002_+",
351 | "T 00055_008_<002_+", "E 00336_041_<001_+001_+", "E 00336_041_<001_+001_+001_+",
352 | "E 00336_041_<001_+001_+002_+", "E 00336_041_<001_+001_+002_<",
353 | "T 00054_013_<003_<015_+003_<001_<"), Fn = c(2L, 2L, 3L, 4L,
354 | 4L, 4L, 4L), Remaining = c(30L, 30L, 0L, 10L, 56L, 52L, 52L)), .Names = c("Object",
355 | "Coding", "Fn", "Remaining"), row.names = c(NA, -7L), class = "data.frame")
356 | 
357 | subset(df, grepl(rex(at_least(group("_+", anything), 2)), Coding))
358 | 
359 | #' ### <http://stackoverflow.com/questions/27195734>
360 | #' Using [rex](http://cran.r-project.org/web/packages/rex/index.html) may make this type of task a little simpler.
361 | 
362 | ids <- c("367025001", "CT_341796001", "M13X01692-01", "13C025050901", "13C00699551")
363 | 
364 | re_substitutes(ids,
365 |   rex(non_digits %or% list("01", end)),
366 |   "",
367 |   global = TRUE)
368 | 
369 | #' ### <http://stackoverflow.com/questions/27237233>
370 | #' Using [rex](http://cran.r-project.org/web/packages/rex/index.html) may make this type of task a little simpler.
371 | library("rvest")
372 | library("stringr")
373 | 
374 | minimal <- html("<!doctype html><title>blah</title> <p>&nbsp;foo")
375 | 
376 | bodytext <- minimal %>%
377 |   html_node("body") %>%
378 |   html_text()
379 | 
380 | re_substitutes(bodytext, rex(spaces), "", global = TRUE)
381 | 
382 | #' ### <http://stackoverflow.com/questions/27227229>
383 | #+ message=FALSE
384 | string <- "this\\(system) {is} [full]."
385 | library(Hmisc)
386 | gsub("\\\\(.)", "\\1", escapeRegex(string))
387 | 
388 | #' Alternatively [rex](http://cran.r-project.org/web/packages/rex/index.html) may make this type of task a little simpler.
389 | library(rex)
390 | re_substitutes(escape(string), rex("\\", capture(any)), "\\1", global = TRUE)
391 | 
392 | #' ### <http://stackoverflow.com/questions/27317497>
393 | #' [rex](http://cran.r-project.org/web/packages/rex/) has a [vignette for parsing server logs](http://cran.r-project.org/web/packages/rex/vignettes/log_parsing.html). While the format is not exactly the same as your log you should be able to adapt it to your case fairly easily.
394 | #' As far as reading the log in assuming the file fits in memory your best bet is to read the whole file first with `readLines()`, then the following will put each field into a `data.frame` column.
395 | x <- "Feb  6 12:14:14 localhost haproxy[14389]: 10.0.1.2:33317 [06/Feb/2009:12:14:14.655] http-in static/srv1 10/0/30/69/109 200 2750 - - ---- 1/1/1/1/0 0/0 {1wt.eu} {} \"GET /index.html HTTP/1.1\""
396 | library(rex)
397 | re <- rex(
398 | 
399 |   capture(name = "process_name", alpha),
400 |   "[",
401 |     capture(name = "pid", digits),
402 |   "]:",
403 |   spaces,
404 |   capture(name = "client_ip", any_of(digit, ".")),
405 |   ":",
406 |   capture(name = "client_port", digits),
407 |   spaces,
408 |   "[",
409 |     capture(name = "accept_date", except_some_of("]")),
410 |   "]",
411 |   spaces,
412 |   capture(name = "frontend_name", non_spaces),
413 |   spaces,
414 |   capture(name = "backend_name", except_some_of("/")),
415 |   "/",
416 |   capture(name = "server_name", non_spaces),
417 |   spaces,
418 |   capture(name = "Tq", some_of("-", digit)),
419 |   "/",
420 |   capture(name = "Tw", some_of("-", digit)),
421 |   "/",
422 |   capture(name = "Tc", some_of("-", digit)),
423 |   "/",
424 |   capture(name = "Tr", some_of("-", digit)),
425 |   "/",
426 |   capture(name = "Tt", some_of("+", digit)),
427 |   spaces,
428 |   capture(name = "status_code", digits),
429 |   spaces,
430 |   capture(name = "bytes_read", some_of("+", digit)),
431 |   spaces,
432 |   capture(name = "captured_request_cookie", non_spaces),
433 |   spaces,
434 |   capture(name = "captured_response_cookie", non_spaces),
435 |   spaces,
436 |   capture(name = "termination_state", non_spaces),
437 |   spaces,
438 |   capture(name = "actconn", digits),
439 |   "/",
440 |   capture(name = "feconn", digits),
441 |   "/",
442 |   capture(name = "beconn", digits),
443 |   "/",
444 |   capture(name = "srv_conn", digits),
445 |   "/",
446 |   capture(name = "retries", some_of("+", digit)),
447 |   spaces,
448 |   capture(name = "srv_queue", digits),
449 |   "/",
450 |   capture(name = "backend_queue", digits),
451 |   spaces,
452 |   "{",
453 |     capture(name = "captured_request_headers", except_any_of("}")),
454 |   "}",
455 |   spaces,
456 |   "{",
457 |     capture(name = "captured_response_headers", except_any_of("}")),
458 |   "}",
459 |   spaces,
460 |   double_quote,
461 |     capture(name = "http_request", non_quotes),
462 |   double_quote)
463 | 
464 | re_matches(x, re)
465 | 
466 | #' ### <http://stackoverflow.com/questions/27422350/extract-character-preceding-first-dot-in-a-string>
467 | #' Using [rex](http://cran.r-project.org/web/packages/rex/index.html) may make this type of task a little simpler.
468 | 
469 | my.data <- read.table(text = "
470 |      my.string  state
471 |      .........    A
472 |      1........    B
473 |      112......    C
474 |      11111....    D
475 |      1111113..    E
476 |      111111111    F
477 |      111111111    G
478 | ", header = TRUE, stringsAsFactors = FALSE)
479 | 
480 | library(rex)
481 | 
482 | re_matches(my.data$my.string,
483 |   rex(capture(except(".")), "."))$"1"
484 | 
485 | #' ### <http://stackoverflow.com/questions/27410736>
486 | #' Using [rex](http://cran.r-project.org/web/packages/rex/index.html) may make this type of task a little simpler.
487 | string <- "Shakira - Wolf - 02.Hips don't lie.mp3"
488 | 
489 | library(rex)
490 | re_matches(string,
491 |   rex(capture(zero_or_more(any, type="lazy")), spaces, "-"))$"1"
492 | 
493 | #' ### <http://stackoverflow.com/questions/27400286/>
494 | #' Using [rex](http://cran.r-project.org/web/packages/rex/index.html) may make this type of task a little simpler.
495 | 
496 | string <- "I t is tim e to g o"
497 | library(rex)
498 | re_substitutes(string, rex(
499 |     space %if_next_is%
500 |       list(
501 |         list(non_space, space, at_least(non_space, 2)) %or%
502 |         list(non_space, end)
503 |       )
504 |     ), "", global = TRUE)
505 | 
506 | #' ### <http://stackoverflow.com/questions/27553126>
507 | #' Using [rex](http://cran.r-project.org/web/packages/rex/index.html) may make this type of task a little simpler.
508 | 
509 | string <- "01:04:43.064 [12439] <2> xyz
510 | 01:04:43.067 [12439] <2> a lmn
511 | 01:04:43.068 [12439] <4> j klm
512 | x_times_wait to <3000>
513 | 01:04:43.068 [12439] <4> j klm
514 | enter_object <5000> main k"
515 | 
516 | library(rex)
517 | 
518 | timestamp <- rex(n(digit, 2), ":", n(digit, 2), ":", n(digit, 2), ".", n(digit, 3))
519 | 
520 | re <- rex(timestamp, space,
521 |           "[", digits, "]", space,
522 |           "<", digits, ">", space,
523 |           capture(anything))
524 | 
525 | re_matches(string, re, global = TRUE)
526 | 


--------------------------------------------------------------------------------
/vignettes/url_parsing.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "URL Validation"
  3 | author: "Jim Hester"
  4 | date: "`r Sys.Date()`"
  5 | output: rmarkdown::html_vignette
  6 | vignette: >
  7 |   %\VignetteIndexEntry{URL Validation}
  8 |   %\VignetteEngine{knitr::rmarkdown}
  9 |   \usepackage[utf8]{inputenc}
 10 | ---
 11 | 
 12 | Consider the task of correctly [validating a URL](https://mathiasbynens.be/demo/url-regex).
 13 | From that page two conclusions can be made.
 14 | 
 15 | 1. Validating URLs require complex regular expressions.
 16 | 2. Creating a correct regular expression is hard! (only 1 out of 13 regexs were valid for all cases).
 17 | 
 18 | Because of this one may be tempted to simply copy the best regex you can find ([gist](https://gist.github.com/dperini/729294)).
 19 | 
 20 | The problem with this is that while you can copy it now, what happens later when you find a case that is not handled correctly?  Can you correctly interpret and modify this?
 21 | ```{r url_parsing_stock, eval=F}
 22 | "^(?:(?:http(?:s)?|ftp)://)(?:\\S+(?::(?:\\S)*)?@)?(?:(?:[a-z0-9\u00a1-\uffff](?:-)*)*(?:[a-z0-9\u00a1-\uffff])+)(?:\\.(?:[a-z0-9\u00a1-\uffff](?:-)*)*(?:[a-z0-9\u00a1-\uffff])+)*(?:\\.(?:[a-z0-9\u00a1-\uffff]){2,})(?::(?:\\d){2,5})?(?:/(?:\\S)*)?$"
 23 | ```
 24 | 
 25 | However if you re-create the regex with `rex` it is much easier to understand and modify later if needed.
 26 | ```{r url_parsing_url}
 27 | library(rex)
 28 | library(magrittr)
 29 | 
 30 | valid_chars <- rex(except_some_of(".", "/", " ", "-"))
 31 | 
 32 | re <- rex(
 33 |   start,
 34 | 
 35 |   # protocol identifier (optional) + //
 36 |   group(list("http", maybe("s")) %or% "ftp", "://"),
 37 | 
 38 |   # user:pass authentication (optional)
 39 |   maybe(non_spaces,
 40 |     maybe(":", zero_or_more(non_space)),
 41 |     "@"),
 42 | 
 43 |   #host name
 44 |   group(zero_or_more(valid_chars, zero_or_more("-")), one_or_more(valid_chars)),
 45 | 
 46 |   #domain name
 47 |   zero_or_more(".", zero_or_more(valid_chars, zero_or_more("-")), one_or_more(valid_chars)),
 48 | 
 49 |   #TLD identifier
 50 |   group(".", valid_chars %>% at_least(2)),
 51 | 
 52 |   # server port number (optional)
 53 |   maybe(":", digit %>% between(2, 5)),
 54 | 
 55 |   # resource path (optional)
 56 |   maybe("/", non_space %>% zero_or_more()),
 57 | 
 58 |   end
 59 | )
 60 | ```
 61 | 
 62 | We can then validate that it correctly identifies both good and bad URLs. (_IP address validation removed_)
 63 | 
 64 | ```{r url_parsing_validate}
 65 | good <- c("http://foo.com/blah_blah",
 66 |   "http://foo.com/blah_blah/",
 67 |   "http://foo.com/blah_blah_(wikipedia)",
 68 |   "http://foo.com/blah_blah_(wikipedia)_(again)",
 69 |   "http://www.example.com/wpstyle/?p=364",
 70 |   "https://www.example.com/foo/?bar=baz&inga=42&quux",
 71 |   "http://✪df.ws/123",
 72 |   "http://userid:password@example.com:8080",
 73 |   "http://userid:password@example.com:8080/",
 74 |   "http://userid@example.com",
 75 |   "http://userid@example.com/",
 76 |   "http://userid@example.com:8080",
 77 |   "http://userid@example.com:8080/",
 78 |   "http://userid:password@example.com",
 79 |   "http://userid:password@example.com/",
 80 |   "http://➡.ws/䨹",
 81 |   "http://⌘.ws",
 82 |   "http://⌘.ws/",
 83 |   "http://foo.com/blah_(wikipedia)#cite-1",
 84 |   "http://foo.com/blah_(wikipedia)_blah#cite-1",
 85 |   "http://foo.com/unicode_(✪)_in_parens",
 86 |   "http://foo.com/(something)?after=parens",
 87 |   "http://☺.damowmow.com/",
 88 |   "http://code.google.com/events/#&product=browser",
 89 |   "http://j.mp",
 90 |   "ftp://foo.bar/baz",
 91 |   "http://foo.bar/?q=Test%20URL-encoded%20stuff",
 92 |   "http://مثال.إختبار",
 93 |   "http://例子.测试",
 94 |   "http://-.~_!$&'()*+,;=:%40:80%2f::::::@example.com",
 95 |   "http://1337.net",
 96 |   "http://a.b-c.de",
 97 |   "http://223.255.255.254")
 98 | 
 99 | bad <- c(
100 |   "http://",
101 |   "http://.",
102 |   "http://..",
103 |   "http://../",
104 |   "http://?",
105 |   "http://??",
106 |   "http://??/",
107 |   "http://#",
108 |   "http://##",
109 |   "http://##/",
110 |   "http://foo.bar?q=Spaces should be encoded",
111 |   "//",
112 |   "//a",
113 |   "///a",
114 |   "///",
115 |   "http:///a",
116 |   "foo.com",
117 |   "rdar://1234",
118 |   "h://test",
119 |   "http:// shouldfail.com",
120 |   ":// should fail",
121 |   "http://foo.bar/foo(bar)baz quux",
122 |   "ftps://foo.bar/",
123 |   "http://-error-.invalid/",
124 |   "http://-a.b.co",
125 |   "http://a.b-.co",
126 |   "http://0.0.0.0",
127 |   "http://3628126748",
128 |   "http://.www.foo.bar/",
129 |   "http://www.foo.bar./",
130 |   "http://.www.foo.bar./")
131 | 
132 | all(grepl(re, good) == TRUE)
133 | 
134 | all(grepl(re, bad) == FALSE)
135 | ```
136 | 
137 | You can now see the power and expressiveness of building regular expressions with `rex`!
138 | 


--------------------------------------------------------------------------------