├── .Rbuildignore
├── .gitignore
├── .travis.yml
├── DESCRIPTION
├── NAMESPACE
├── NEWS
├── NEWS.md
├── R
├── add_comments.R
├── as.regexr.R
├── comments.R
├── construct.R
├── regex-class.R
├── regexr-class.R
├── regexr-package.R
├── set_names.R
├── subs.R
├── test.R
├── unglue.R
└── utils.R
├── README.Rmd
├── README.md
├── inst
├── CITATION
├── build.R
├── extra_statdoc
│ └── readme.R
├── functions_table
│ └── functions.R
├── maintenance.R
├── regexr_logo
│ ├── Thumbs.db
│ └── r_regexr.png
├── staticdocs
│ └── index.R
└── web
│ ├── add_comments.html
│ ├── as.regexr.character.html
│ ├── as.regexr.default.html
│ ├── as.regexr.html
│ ├── comments-set-.regexr.html
│ ├── comments.html
│ ├── comments.regexr.html
│ ├── construct.html
│ ├── css
│ ├── bootstrap-responsive.css
│ ├── bootstrap-responsive.min.css
│ ├── bootstrap.css
│ ├── bootstrap.min.css
│ ├── highlight.css
│ └── staticdocs.css
│ ├── get_construct.html
│ ├── get_construct.reverse_construct.html
│ ├── img
│ ├── glyphicons-halflings-white.png
│ └── glyphicons-halflings.png
│ ├── index.html
│ ├── js
│ ├── bootstrap.js
│ └── bootstrap.min.js
│ ├── names-set-.regexr.html
│ ├── names.regexr.html
│ ├── print.regex.html
│ ├── print.regexr.html
│ ├── print.reverse_construct.html
│ ├── print.summary_regexr.html
│ ├── print.unglued.html
│ ├── regex-set-.regexr.html
│ ├── regex.html
│ ├── regex.regexr.html
│ ├── regexr.html
│ ├── set_names.html
│ ├── subs-set-.regexr.html
│ ├── subs.html
│ ├── subs.regexr.html
│ ├── summary.regexr.html
│ ├── test.html
│ ├── test.regexr.html
│ ├── unglue.html
│ └── unglue.regexr.html
├── man
├── add_comments.Rd
├── as.regexr.Rd
├── as.regexr.character.Rd
├── as.regexr.default.Rd
├── comments-set-.regexr.Rd
├── comments.Rd
├── comments.regexr.Rd
├── construct.Rd
├── get_construct.Rd
├── get_construct.reverse_construct.Rd
├── names-set-.regexr.Rd
├── names.regexr.Rd
├── print.regexr.Rd
├── print.reverse_construct.Rd
├── print.subcom.Rd
├── print.summary_regexr.Rd
├── print.unglued.Rd
├── regexr.Rd
├── set_names.Rd
├── subs-set-.regexr.Rd
├── subs.Rd
├── subs.regexr.Rd
├── summary.regexr.Rd
├── test.Rd
├── test.regexr.Rd
├── unglue.Rd
└── unglue.regexr.Rd
├── regexr.Rproj
└── tests
├── testthat.R
└── testthat
├── test-as.regexr.R
├── test-comment_binary_operator.R
├── test-comments.R
├── test-construct.R
├── test-get_construct.R
├── test-names.R
├── test-print.subcom.R
├── test-subs.R
├── test-summary.R
├── test-test.R
└── test-unglue.R
/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^.*\.Rproj$
2 | ^\.Rproj\.user$
3 | ^\.gitignore
4 | NEWS.md
5 | FAQ.md
6 | NEWS.html
7 | FAQ.html
8 | ^\.travis\.yml$
9 | inst/staticdocs
10 | inst/maintenance.R
11 | inst/extra_statdoc
12 | travis-tool.sh
13 | inst/web
14 | contributors.geojson
15 | inst/build.R
16 | inst/r_qdapRegex.pptx
17 | ^.*\.Rprofile$
18 | README.Rmd
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # History files
2 | .Rhistory
3 |
4 | # Example code in package build process
5 | *-Ex.R
6 |
7 | .Rprofile
8 | .Rproj.user
9 | qdapRegex.Rproj
10 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: c
2 |
3 | sudo: required
4 | before_install:
5 | - curl -OL http://raw.github.com/craigcitro/r-travis/master/scripts/travis-tool.sh
6 | - chmod 755 ./travis-tool.sh
7 | - ./travis-tool.sh bootstrap
8 | install:
9 | - sh -e /etc/init.d/xvfb start
10 | - ./travis-tool.sh aptget_install r-cran-xml
11 | - ./travis-tool.sh install_github hadley/devtools
12 | - ./travis-tool.sh install_github trinker/qdapRegex
13 | - ./travis-tool.sh install_deps
14 | - ./travis-tool.sh github_package jimhester/covr
15 | script: ./travis-tool.sh run_tests
16 | after_success:
17 | - Rscript -e 'library(covr);coveralls()'
18 | notifications:
19 | email:
20 | on_success: change
21 | on_failure: change
22 | env:
23 | global:
24 | - R_BUILD_ARGS="--resave-data=best"
25 | - R_CHECK_ARGS="--as-cran"
26 | - DISPLAY=:99.0
27 | - BOOTSTRAP_LATEX=1
28 |
--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
1 | Package: regexr
2 | Type: Package
3 | Title: Readable Regular Expressions
4 | Version: 1.1.0
5 | Date: 2015-07-03
6 | Authors@R: c(person("Tyler", "Rinker", email = "tyler.rinker@gmail.com", role = c("aut",
7 | "cre")))
8 | Maintainer: Tyler Rinker
9 | Depends: R (>= 3.1.0)
10 | Suggests: testthat, qdapRegex (>= 0.2.0)
11 | LazyData: TRUE
12 | Description: An R framework for constructing and managing human readable regular
13 | expressions. It aims to provide tools that enable the user to write regular
14 | expressions in a way that is similar to the ways R code is written. The tools
15 | allow the user to (1) write in smaller, modular, named, sub-expressions, (2)
16 | write top to bottom, rather than a single string (3) comment individual chunks,
17 | (4) indent expressions to clearly present regular expression groups, (5) add
18 | vertical line spaces and R comments (i.e., #), and (6) test the validity of the
19 | concatenated expression and the modular sub-expressions.
20 | License: GPL-2
21 | URL: http://trinker.github.com/regexr/
22 | BugReports: http://github.com/trinker/regexr/issues
23 | Roxygen: list(wrap = FALSE)
24 |
--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
1 | # Generated by roxygen2 (4.1.1): do not edit by hand
2 |
3 | S3method("comments<-",regexr)
4 | S3method("names<-",regexr)
5 | S3method("subs<-",regexr)
6 | S3method(as.regexr,character)
7 | S3method(as.regexr,default)
8 | S3method(comments,regexr)
9 | S3method(get_construct,reverse_construct)
10 | S3method(names,regexr)
11 | S3method(print,regexr)
12 | S3method(print,reverse_construct)
13 | S3method(print,subcom)
14 | S3method(print,summary_regexr)
15 | S3method(print,unglued)
16 | S3method(subs,regexr)
17 | S3method(summary,regexr)
18 | S3method(test,regexr)
19 | S3method(unglue,regexr)
20 | export("%:)%")
21 | export("%comment%")
22 | export("comments<-")
23 | export("subs<-")
24 | export(as.regexr)
25 | export(comments)
26 | export(construct)
27 | export(get_construct)
28 | export(set_comments)
29 | export(set_names)
30 | export(set_subs)
31 | export(subs)
32 | export(test)
33 | export(unglue)
34 |
--------------------------------------------------------------------------------
/NEWS:
--------------------------------------------------------------------------------
1 | NEWS
2 | ====
3 |
4 | Versioning
5 | ----------
6 |
7 | Releases will be numbered with the following semantic versioning format:
8 |
9 | ..
10 |
11 | And constructed with the following guidelines:
12 |
13 | * Breaking backward compatibility bumps the major (and resets the minor
14 | and patch)
15 | * New additions without breaking backward compatibility bumps the minor
16 | (and resets the patch)
17 | * Bug fixes and misc changes bumps the patch
18 |
19 | regexr 1.1.0
20 | ----------------------------------------------------------------
21 |
22 | BUG FIXES
23 |
24 | NEW FEATURES
25 |
26 | MINOR FEATURES
27 |
28 | IMPROVEMENTS
29 |
30 | CHANGES
31 |
32 | regexr 1.0.0 - 1.0.2
33 | ----------------------------------------------------------------
34 |
35 | The `regex` function has been removed (not `Deprecated`) as **regexr** is in beta
36 | mode. This move breaks backward compatibility and thus a bump in major release
37 | to version 1.0.0.
38 |
39 | CHANGES
40 |
41 | * `regex` function replaced with the `subs` function. This move is both
42 | semantic and pragmatic. The term "regular expression chunk" has been
43 | replaced with "sub-expressions". The `subs` function reflects the semantic
44 | change. Additionally, Richie Cotton's **regex** package
45 | (https://github.com/richierocks/regex), which is complementary to **regexr**,
46 | contains a `regex` function.
47 |
48 | * The `regex` class (created from adding a comment to a string) has been
49 | replaced with the `subcom` (sub-expression commented) class. This name is
50 | more informative and avoids conflicts with other regex packages that may
51 | contain a `regex` class.
52 |
53 |
54 |
55 | regexr 0.0.3 - 0.0.4
56 | ----------------------------------------------------------------
57 |
58 | NEW FEATURES
59 |
60 | * `as.regexr` added to coerce a regular expression to a `regexr` class using
61 | the regular expression breakdown of http://rick.measham.id.au/paste/explain.pl.
62 | The `get_construct` function extracts a script to create a `construct` regex
63 | from `regexr` objects that were coerced using `as.regexr`.
64 |
65 | MINOR FEATURES
66 |
67 | * `set_comments`, `set_regex`, and `set_names` added as a convenience functions
68 | to set elements of a `regexr` object and return the object.
69 |
70 | IMPROVEMENTS
71 |
72 | * `print.summary_regexr` used `message` to print to the console which had the
73 | potential to not return the entire input. `print.summary_regexr` now uses
74 | `cat` rather than `message`.
75 |
76 | CHANGES
77 |
78 | * The recommended structure for the `construct` code is no longer a separate
79 | line for each regular expression chunk's *name*, *regex*, and *comment* but
80 | *name*, *regex*, and *comment* all go on the same line. This makes the code
81 | less cluttered and easier to navigate, particularity if indentation is used to
82 | indicate nested grouping structures. Users may choose whatever configuration
83 | that suits their preference, however, examples will generally be shown via the
84 | new recommended, single line, structure.
85 |
86 |
87 |
88 | regexr 0.0.1 - 0.0.2
89 | ----------------------------------------------------------------
90 |
91 | **regexr** is an R framework for constructing human readable regular expressions.
92 | It aims to provide tools that enable the user to write regular expressions in a
93 | way that is similar to the ways R code is written. The tools allow the user to
94 | (1) write in smaller, modular, named, regular expression chunks, (2) write top
95 | to bottom, rather than a single string (3) comment individual chunks, (4) indent
96 | expressions to represent regular expression groups, and (5) test the validity of
97 | the concatenated expression and the modular chunks.
98 |
99 | This framework harnesses the power and flexibility of regular expressions but
100 | provides a structural frame that is more consistent with both coding writing and
101 | natural language conventions.
102 |
103 |
--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
1 | NEWS
2 | ====
3 |
4 | Versioning
5 | ----------
6 |
7 | Releases will be numbered with the following semantic versioning format:
8 |
9 | <major>.<minor>.<patch>
10 |
11 | And constructed with the following guidelines:
12 |
13 | * Breaking backward compatibility bumps the major (and resets the minor
14 | and patch)
15 | * New additions without breaking backward compatibility bumps the minor
16 | (and resets the patch)
17 | * Bug fixes and misc changes bumps the patch
18 |
19 | regexr 1.1.0
20 | ----------------------------------------------------------------
21 |
22 | **BUG FIXES**
23 |
24 | **NEW FEATURES**
25 |
26 | **MINOR FEATURES**
27 |
28 | **IMPROVEMENTS**
29 |
30 | **CHANGES**
31 |
32 | regexr 1.0.0 - 1.0.2
33 | ----------------------------------------------------------------
34 |
35 | The `regex` function has been removed (not `Deprecated`) as regexr is in beta
36 | mode. This move breaks backward compatibility and thus a bump in major release
37 | to version 1.0.0.
38 |
39 | **CHANGES**
40 |
41 | * `regex` function replaced with the `subs` function. This move is both
42 | semantic and pragmatic. The term "regular expression chunk" has been
43 | replaced with "sub-expressions". The `subs` function reflects the semantic
44 | change. Additionally, Richie Cotton's **regex** package
45 | (https://github.com/richierocks/regex), which is complementary to regexr,
46 | contains a `regex` function.
47 |
48 | * The `regex` class (created from adding a comment to a string) has been
49 | replaced with the `subcom` (sub-expression commented) class. This name is
50 | more informative and avoids conflicts with other regex packages that may
51 | contain a `regex` class.
52 |
53 |
54 |
55 | regexr 0.0.3 - 0.0.4
56 | ----------------------------------------------------------------
57 |
58 | **NEW FEATURES**
59 |
60 | * `as.regexr` added to coerce a regular expression to a `regexr` class using
61 | the regular expression breakdown of http://rick.measham.id.au/paste/explain.pl.
62 | The `get_construct` function extracts a script to create a `construct` regex
63 | from `regexr` objects that were coerced using `as.regexr`.
64 |
65 | **MINOR FEATURES**
66 |
67 | * `set_comments`, `set_regex`, and `set_names` added as a convenience functions
68 | to set elements of a `regexr` object and return the object.
69 |
70 | **IMPROVEMENTS**
71 |
72 | * `print.summary_regexr` used `message` to print to the console which had the
73 | potential to not return the entire input. `print.summary_regexr` now uses
74 | `cat` rather than `message`.
75 |
76 | **CHANGES**
77 |
78 | * The recommended structure for the `construct` code is no longer a separate
79 | line for each regular expression chunk's *name*, *regex*, and *comment* but
80 | *name*, *regex*, and *comment* all go on the same line. This makes the code
81 | less cluttered and easier to navigate, particularity if indentation is used to
82 | indicate nested grouping structures. Users may choose whatever configuration
83 | that suits their preference, however, examples will generally be shown via the
84 | new recommended, single line, structure.
85 |
86 |
87 |
88 | regexr 0.0.1 - 0.0.2
89 | ----------------------------------------------------------------
90 |
91 | regexr is an R framework for constructing human readable regular expressions.
92 | It aims to provide tools that enable the user to write regular expressions in a
93 | way that is similar to the ways R code is written. The tools allow the user to
94 | (1) write in smaller, modular, named, regular expression chunks, (2) write top
95 | to bottom, rather than a single string (3) comment individual chunks, (4) indent
96 | expressions to represent regular expression groups, and (5) test the validity of
97 | the concatenated expression and the modular chunks.
98 |
99 | This framework harnesses the power and flexibility of regular expressions but
100 | provides a structural frame that is more consistent with both coding writing and
101 | natural language conventions.
102 |
--------------------------------------------------------------------------------
/R/add_comments.R:
--------------------------------------------------------------------------------
1 | #' Add Comments to Character Strings.
2 | #'
3 | #' This operator allows you to add comments to character strings.
4 | #'
5 | #' @param x A character string that is to be commented.
6 | #' @param y A character string (the comment).
7 | #' @return Returns a character string of the class \code{subcom} with a comment
8 | #' added as a \code{"comment"} attribute.
9 | #' @keywords comment
10 | #' @export
11 | #' @note The operator, \code{\%:)\%}, is a simple smiley face emotion because
12 | #' commented code is happy code.
13 | #' @seealso \code{\link[base]{comment}}
14 | #' @rdname add_comments
15 | #' @examples
16 | #' a <- "The character string"
17 | #' b <- "The comment"
18 | #'
19 | #' (out <- a %:)% b)
20 | #' attributes(out)
21 | #' comment(out)
22 | #'
23 | #' minimal <- construct("a", "b", "c" %:)% "A love note to your future self")
24 | #' minimal
25 | #' comments(minimal)
26 | `%:)%` <- function(x, y) {
27 | class(x) <- c("subcom", "character")
28 | attributes(x)[["comment"]] <- y
29 | x
30 | }
31 |
32 | #' @export
33 | #' @rdname add_comments
34 | `%comment%` <- `%:)%`
35 |
--------------------------------------------------------------------------------
/R/as.regexr.R:
--------------------------------------------------------------------------------
1 | #' Generic Method to Coerce to regexr
2 | #'
3 | #' Coerce an object to \code{regexr} class.
4 | #'
5 | #' @param x An object to coerce to a \code{regexr} object.
6 | #' @param names logical. Should names be included in the \code{construct}
7 | #' script?
8 | #' @param comments logical. Should comments be included in the \code{construct}
9 | #' script?
10 | #' @param names.above logical. Should ames be included above the regex
11 | #' in the \code{construct} script? If \code{FALSE} names are placed in front of
12 | #' the sub-expressions.
13 | #' @param comments.below logical. Should comments be included below the
14 | #' sub-expressions in the \code{construct} script? If \code{FALSE} comments
15 | #' are placed behind the sub-expressions.
16 | #' @param \ldots Other arguments passed to \code{as.regexr} methods.
17 | #' @return Returns a dual \code{regexr} and \code{reverse_construct} object.
18 | #' @export
19 | #' @note \code{as.regexr.character} utilizes \url{http://rick.measham.id.au/paste/explain}
20 | #' to break the regular expression into sub-expressions.
21 | #' @examples
22 | #' library("qdapRegex")
23 | #' (myregex <- grab("@@rm_time2"))
24 | #' out <- as.regexr(myregex)
25 | #'
26 | #' out
27 | #' summary(out)
28 | #' comments(out)
29 | #' subs(out)
30 | #' test(out)
31 | #' get_construct(out)
32 | #'
33 | #' \dontrun{
34 | #' ## On Windows copy to clipboard
35 | #' get_construct(out, file="clipboard")
36 | #' }
37 | #'
38 | #' ## No names & comments behind sub-expressions
39 | #' myregex2 <- "(\\s*[a-z]+)([^)]+\\))"
40 | #' get_construct(as.regexr(myregex2, names=FALSE))
41 | #' get_construct(as.regexr(myregex2, names=FALSE, names.above = TRUE,
42 | #' comments.below = TRUE))
43 | as.regexr <- function(x, names = TRUE, comments = TRUE, names.above = FALSE,
44 | comments.below = FALSE, ...){
45 | UseMethod("as.regexr")
46 | }
47 |
48 |
49 | #' Coerce character to regexr
50 | #'
51 | #' Convert a regular expression to a commented \code{regexr} object.
52 | #'
53 | #' character Method for as.regexr
54 | #' @param x The \code{character} object.
55 | #' @param names logical. Should names be included in the \code{construct}
56 | #' script?
57 | #' @param comments logical. Should comments be included in the \code{construct}
58 | #' script?
59 | #' @param names.above logical. Should ames be included above the sub-expressions
60 | #' in the \code{construct} script? If \code{FALSE} names are placed in front of
61 | #' the sub-expressions.
62 | #' @param comments.below logical. Should comments be included below the
63 | #' sub-expressions in the \code{construct} script? If \code{FALSE} comments are
64 | #' placed behind the sub-expressions.
65 | #' @param \ldots Ignored.
66 | #' @export
67 | #' @method as.regexr character
68 | as.regexr.character <- function(x, names = TRUE, comments = TRUE,
69 | names.above = FALSE, comments.below = FALSE, ...){
70 |
71 | out <- regex_break_down(x)
72 |
73 | loc <- gregexpr("EXPLANATION", out[1])
74 | out <- out[-c(1:2)]
75 |
76 | breaks <- grepl("^-{10,}$", out)
77 | inds <- !breaks
78 |
79 | out <- split(out[inds], cumsum(breaks)[inds])
80 | names(out) <- as.numeric(names(out)) + 1
81 |
82 | pieces <- lapply(out, function(x){
83 | y1 <- gsub("\\s+$", "", substring(x, 1, loc))
84 | y1[-1] <- gsub("^\\s+", "", y1[-1])
85 | y2 <- gsub("^\\s+|\\s+$", "", substring(x, loc))
86 | lets <- c("n", "r", "t", "f", "a")
87 | for (i in seq_len(length(lets))){
88 | y2 <- gsub(paste0("\\\\", lets[i]), paste0("\\", lets[i]), y2, fixed=TRUE)
89 | }
90 | y1 <- paste(y1, collapse="")
91 | y2 <- paste(y2, collapse=" ")
92 |
93 | list(regex = y1, comment = y2)
94 | })
95 |
96 | pieces4regexr <- lapply(pieces, function(x){
97 | x[[1]] <- gsub("\\\\", "\\", gsub("^\\s+", "", x[[1]]), fixed=TRUE)
98 | x
99 | })
100 |
101 | out <- x
102 |
103 | class(out) <- c("regexr", "reverse_construct", class(out))
104 | attributes(out)[["subs"]] <- stats::setNames(sapply(pieces4regexr, "[", 1),
105 | names(pieces4regexr))
106 | attributes(out)[["comments"]] <- stats::setNames(sapply(pieces4regexr, "[", 2),
107 | names(pieces4regexr))
108 |
109 | if (!comments.below) {
110 | max.nchar.regex <- max(sapply(pieces, function(x) nchar(x[[1]])))
111 | }
112 |
113 | pieces4construct <- sapply(seq_along(pieces), function(i){
114 | x <- pieces[[i]]
115 | x[[1]] <- gsub("\"", "\\\\\"", x[[1]])
116 | x[[2]] <- gsub("\"", "\\\\\"", x[[2]])
117 | x[[1]] <- gsub(" ", " ", x[[1]])
118 | indent <- (nchar(x[[1]]) - nchar(gsub("^\\s+", "", x[[1]])))
119 | x[[1]] <- paste0(" ", x[[1]])
120 |
121 | if (isTRUE(names)) {
122 | if (isTRUE(names.above)) {
123 | thenames <- paste0(paste(rep(" ", indent), collapse=""), "`",
124 | names(pieces)[i], "` = \n")
125 | } else {
126 | thenames <- paste0(paste(rep(" ", indent), collapse=""), "`",
127 | names(pieces)[i], "` = ")
128 | }
129 | } else {
130 | thenames <- ""
131 | }
132 |
133 | if (isTRUE(names)) {
134 | if (isTRUE(names.above)) {
135 | theregexes <- gsub("(^\\s+)", "\\1\"", x[[1]])
136 | } else {
137 | theregexes <- gsub("^\\s+", "\\1\"", x[[1]])
138 | }
139 | } else {
140 | theregexes <- gsub("^\\s{4}", "", gsub("(^\\s+)", "\\1\"", x[[1]]))
141 | }
142 |
143 | if (isTRUE(comments)) {
144 | if (isTRUE(comments.below)) {
145 | thecomments <- paste0("\"\n", paste(rep(" ", indent + 8),
146 | collapse=""), "%:)%\"", x[[2]], "\"")
147 | } else {
148 | thecomments <- paste0("\"",
149 | paste(rep(" ", indent + (max.nchar.regex - nchar(theregexes)) + 10),
150 | collapse=""), "%:)% \"", x[[2]], "\"")
151 | }
152 | } else {
153 | thecomments <- ""
154 | }
155 | paste0(thenames, theregexes, thecomments)
156 | })
157 |
158 | reverse_construct <- paste0("construct(\n",
159 | paste(pieces4construct, collapse=",\n"), "\n)\n")
160 | class(reverse_construct) <- c("reverse_construct", class(reverse_construct))
161 | attributes(out)[["reverse_construct"]] <- reverse_construct
162 |
163 | out
164 | }
165 |
166 |
167 | #' Prints a reverse_construct object
168 | #'
169 | #' Prints a reverse_construct object.
170 | #'
171 | #' @param x A \code{reverse_construct} object.
172 | #' @param file A connection, or a character string naming the file to print to.
173 | #' If "" (the default), \code{\link[base]{cat}} prints to the console unless
174 | #' redirected by \code{\link[base]{sink}}. Windows users may use
175 | #' \code{file = "clipboard"} to copy the content to the clipboard.
176 | #' @param \ldots Other arguments passed to \code{\link[base]{cat}}.
177 | #' @export
178 | #' @method print reverse_construct
179 | print.reverse_construct <- function(x, file = "", ...){
180 | cat(x, file = file, ...)
181 | }
182 |
183 |
184 |
185 | #' Coerce default to regexr
186 | #'
187 | #' Convert a regular expression to a commented \code{regexr} object.
188 | #'
189 | #' default Method for as.regexr
190 | #' @param x The object to be corced to \code{regexr}.
191 | #' @param names logical. Should names be included in the \code{construct}
192 | #' script?
193 | #' @param comments logical. Should comments be included in the \code{construct}
194 | #' script?
195 | #' @param names.above logical. Should ames be included above the sub-expressions
196 | #' in the \code{construct} script? If \code{FALSE} names are placed in front of
197 | #' the sub-expressions.
198 | #' @param comments.below logical. Should comments be included below the
199 | #' sub-expressions in the \code{construct} script? If \code{FALSE} comments are
200 | #' placed behind the sub-expressions.
201 | #' @param \ldots Ignored.
202 | #' @export
203 | #' @method as.regexr default
204 | as.regexr.default <- as.regexr.character
205 |
206 |
207 | #' Extract Script from \code{reverse_construct} to \code{construct} a
208 | #' \code{regexr} Object.
209 | #'
210 | #' Pulls the \code{reverse_construct} attribute from a \code{reverse_construct}.
211 | #' This script can be assigned to an object and run in the console to create a
212 | #' comments, named \code{regexr} object.
213 | #'
214 | #' @param x A \code{reverse_construct} object.
215 | #' @param file A connection, or a character string naming the file to print to.
216 | #' If "" (the default), \code{\link[base]{cat}} prints to the console unless
217 | #' redirected by \code{\link[base]{sink}}. Windows users may use
218 | #' \code{file = "clipboard"} to copy the content to the clipboard.
219 | #' @param \ldots Other arguments passed to \code{\link[regexr]{print.reverse_construct}}.
220 | #' @return Returns an auto-commented script used to \code{construct} a
221 | #' \code{regexr} object.
222 | #' @export
223 | #' @examples
224 | #' library("qdapRegex")
225 | #' (myregex <- grab("@@rm_time2"))
226 | #' out <- as.regexr(myregex)
227 | #'
228 | #' out
229 | #' summary(out)
230 | #' comments(out)
231 | #' subs(out)
232 | #' test(out)
233 | #' get_construct(out)
234 | #'
235 | #' \dontrun{
236 | #' ## On Windows copy to clipboard
237 | #' get_construct(out, file="clipboard")
238 | #' }
239 | get_construct <- function(x, file = "", ...){
240 | UseMethod("get_construct")
241 | }
242 |
243 | #' Extract Script from \code{reverse_construct} to \code{construct} a
244 | #' \code{regexr} Object.
245 | #'
246 | #' Pulls the \code{reverse_construct} attribute from a \code{reverse_construct}.
247 | #' This script can be assigned to an object and run in the console to create a
248 | #' comments, named \code{regexr} object.
249 | #'
250 | #' reverse_construct Method for get_construct
251 | #' @param x A \code{reverse_construct} object.
252 | #' @param file A connection, or a character string naming the file to print to.
253 | #' If "" (the default), \code{\link[base]{cat}} prints to the console unless
254 | #' redirected by \code{\link[base]{sink}}. Windows users may use
255 | #' \code{file = "clipboard"} to copy the content to the clipboard. To print
256 | #' as \code{character} use \code{file = NULL}.
257 | #' @param \ldots Other arguments passed to \code{\link[regexr]{print.reverse_construct}}.
258 | #' @return Returns an auto-commented script used to \code{construct} a
259 | #' \code{regexr} object.
260 | #' @export
261 | #' @method get_construct reverse_construct
262 | get_construct.reverse_construct <- function(x, file = "", ...){
263 |
264 | out <- attributes(x)[["reverse_construct"]]
265 | if (!is.null(file)) {
266 | print(out, file = file, ...)
267 | } else {
268 | as.character(out)
269 | }
270 |
271 | }
272 |
273 |
274 |
275 |
276 | regex_break_down <- function(pattern){
277 |
278 | URL2 <- paste0("http://rick.measham.id.au/paste/explain.pl?regex=",
279 | utils::URLencode(pattern))
280 |
281 | ## replace invalid characters
282 | chars <- c(";", "+", "&")
283 | reps <- c("%3B", "%2B", "%26")
284 |
285 | for (i in seq_along(reps)){
286 | URL2 <- gsub(chars[i], reps[i], URL2, fixed=TRUE)
287 | }
288 |
289 | lns <- try(suppressWarnings(readLines(URL2)), TRUE)
290 |
291 | if (length(lns) == 1 && grepl("Error in file", lns, TRUE)) {
292 | stop("Cound not parse `pattern`. Check your Internet connection.")
293 | }
294 |
295 | lns <- gsub(""", "\"", lns[grep("NODE", lns):(length(lns) - 2)], fixed=TRUE)
296 | lns <- gsub(">", ">", gsub("<", "<", lns, fixed=TRUE), fixed=TRUE)
297 | lns <- gsub("\\", "\\\\", lns, fixed=TRUE)
298 | lets <- c("n", "r", "t", "f", "a")
299 | for (i in seq_len(length(lets))){
300 | lns <- gsub(paste0("\\\\", lets[i]), paste0("\\", lets[i]), lns, fixed=TRUE)
301 | }
302 | lns[length(lns)] <- gsub("$", "", lns[length(lns)])
303 |
304 | lns
305 | }
306 |
--------------------------------------------------------------------------------
/R/comments.R:
--------------------------------------------------------------------------------
1 | #' Get/Set Comments From a regexr Object
2 | #'
3 | #' \code{comments} - Get the \code{comments} from a \code{regexr} object.
4 | #'
5 | #' @param x A regexr object.
6 | #' @param value The comment(s) to assign.
7 | #' @param \ldots Ignored.
8 | #' @rdname comments
9 | #' @export
10 | #' @return \code{comments} - Returns a list of comments.
11 | #' @examples
12 | #' minimal <- construct("a", "b", "c" %:)% "Comment #3")
13 | #' minimal
14 | #' comments(minimal)
15 | #' comments(minimal)[2] <- "A comment"
16 | #' comments(minimal)
17 | #'
18 | #' minimal <- construct("a", "b", "c")
19 | #' out <- set_comments(minimal, paste("comment", 1:3))
20 | #' comments(out)
21 | comments <- function (x, ...){
22 | UseMethod("comments")
23 | }
24 |
25 | #' Comments of a regexr Object
26 | #'
27 | #' \code{comments<-} - Set the \code{comments} of a \code{regexr} object.
28 | #'
29 | #' @rdname comments
30 | #' @export
31 | `comments<-` <- function(x, value){
32 | UseMethod("comments<-")
33 | }
34 |
35 |
36 | #' Set the Comments in a \code{regexr} Object
37 | #'
38 | #' \code{set_comments} - This is a convenience function that sets the
39 | #' \code{\link[regexr]{comments}} on a \code{regexr} object and returns the
40 | #' object.
41 | #'
42 | #' @param y The comments to assign.
43 | #' @return \code{set_comments} - Returns a \code{regexr} object.
44 | #' @export
45 | #' @rdname comments
46 | set_comments <- function (x, y) {
47 | comments(x) <- y
48 | x
49 | }
50 |
51 |
--------------------------------------------------------------------------------
/R/construct.R:
--------------------------------------------------------------------------------
1 | #' Construct Human Readable Regular Expressions
2 | #'
3 | #' This function is used to construct human readable regular expressions from
4 | #' sub-expressions. The user may provide additional meta information about each
5 | #' sub-expression. This meta information is an optional name and comment for the
6 | #' sub-expressions. This allows one to write regular expressions in a fashion
7 | #' similar to writing code, that is the regular expression is written top to
8 | #' bottom, the syntax is broken up into manageable chunks, the sub-expressions
9 | #' can be indented to give structural insight such as nested groups. Finally,
10 | #' sub-expressions can be commented to provide linguistic grounding for more
11 | #' complex sub-expressions.
12 | #'
13 | #' @param \ldots A series of comma separated character strings (sub-expressions)
14 | #' that may optionally be named, commented (see \code{?`\%:)\%`}, and indented.
15 | #' @return Returns a character vector of the class \code{regexr}. The attributes
16 | #' of the returned object retain the original name and comment properties.
17 | #' @keywords regex
18 | #' @export
19 | #' @examples
20 | #' ## Minimal Example
21 | #' minimal <- construct("a", "b", "c")
22 | #' minimal
23 | #' unglue(minimal)
24 | #' comments(minimal)
25 | #' subs(minimal)
26 | #' test(minimal)
27 | #' summary(minimal)
28 | #'
29 | #' ## Example 1
30 | #' m <- construct(
31 | #' space = "\\s+" %:)% "I see",
32 | #' simp = "(?<=(foo))",
33 | #' or = "(;|:)\\s*" %:)% "comment on what this does",
34 | #' is_then = "[ia]s th[ae]n"
35 | #' )
36 | #'
37 | #' m
38 | #' unglue(m)
39 | #' summary(m)
40 | #' subs(m)
41 | #' comments(m)
42 | #' subs(m)[4] <- "(FO{2})|(BAR)"
43 | #' summary(m)
44 | #' test(m)
45 | #' \dontrun{
46 | #' subs(m)[5:7] <- c("(", "([A-Z]|(\\d{5})", ")")
47 | #' test(m)
48 | #' }
49 | #'
50 | #' library(qdapRegex)
51 | #' explain(m)
52 | #'
53 | #' ## Example 2 (Twitter Handle 2 ways)
54 | #' ## Bigger Sub-expressions
55 | #' twitter <- construct(
56 | #' no_at_wrd = "(? 0){
72 | null <- structure(list(NULL), .Names = "")
73 | attributes(x)[["subs"]] <- unlist(list(attributes(x)[["subs"]],
74 | rep(null, dif)), recursive=FALSE)
75 | }
76 | x
77 | }
78 |
79 | #' Get Sub-expressions From a regexr Object
80 | #'
81 | #' \code{subs} - Get the sub-expressions from a \code{regexr} object.
82 | #'
83 | #' regexr Method for subs
84 | #' @param x The \code{regexr} object.
85 | #' @param \ldots Ignored.
86 | #' @export
87 | #' @method subs regexr
88 | subs.regexr <- function(x, ...){
89 | attributes(x)[["subs"]]
90 | }
91 |
92 | #' Set Regex Sub-expressions From a regexr Object
93 | #'
94 | #' \code{subs<-} - Set the sub-expressions of a \code{regexr} object.
95 | #'
96 | #' regexr Method for subs<-
97 | #' @param x The \code{regexr} object.
98 | #' @param value The comment(s) to assign.
99 | #' @param \ldots Ignored.
100 | #' @export
101 | #' @method subs<- regexr
102 | `subs<-.regexr` <- function(x, value){
103 | attributes(x)[["subs"]] <- value
104 | len <- length(attributes(x)[["subs"]])
105 | dif <- diff(c(length(attributes(x)[["comments"]]), len))
106 | if (dif > 0){
107 | null <- structure(list(NULL), .Names = "")
108 | attributes(x)[["comments"]] <- unlist(list(attributes(x)[["comments"]],
109 | rep(null, dif)), recursive=FALSE)
110 | }
111 | x[[1]] <- paste(unlist(attributes(x)[["subs"]]), collapse="")
112 | x
113 | }
114 |
115 |
116 |
117 |
118 | #' Get Names of Sub-Expressions of a regexr Object
119 | #'
120 | #' Get names of a \code{regexr} object.
121 | #'
122 | #' @param x The \code{regexr} object.
123 | #' @param \ldots Ignored.
124 | #' @export
125 | #' @method names regexr
126 | names.regexr <- function(x, ...){
127 |
128 | names(attributes(x)[["subs"]])
129 |
130 | }
131 |
132 | #' Set Names of a Sub-expressions of a regexr Object
133 | #'
134 | #' Set names of a \code{regexr} object's sub-expressions.
135 | #'
136 | #' @param x The \code{regexr} object.
137 | #' @param value The comment(s) to assign.
138 | #' @param \ldots Ignored.
139 | #' @export
140 | #' @method names<- regexr
141 | `names<-.regexr` <- function(x, value){
142 |
143 | rnull <- is.null(names(attributes(x)[["subs"]]))
144 | cnull <- is.null(names(attributes(x)[["comments"]]))
145 |
146 | names(attributes(x)[["subs"]]) <- value
147 | if (rnull) {
148 | names(attributes(x)[["subs"]])[is.na(names(attributes(x)[["subs"]]))] <- ""
149 | }
150 | names(attributes(x)[["comments"]]) <- value
151 | if (cnull) {
152 | names(attributes(x)[["comments"]])[is.na(names(attributes(x)[["comments"]]))] <- ""
153 | }
154 | x
155 |
156 | }
157 |
158 | #' Summarize a regexr Object
159 | #'
160 | #' Summarize a \code{regexr} object.
161 | #'
162 | #' @param object The \code{regexr} object
163 | #' @param \ldots Ignored.
164 | #' @method summary regexr
165 | #' @export
166 | summary.regexr <- function(object, ...){
167 |
168 | if (length(attributes(object)[["comments"]]) !=
169 | length(attributes(object)[["subs"]])) {
170 | warning("Mismatch in number of subs and comments; items recycled\n",
171 | "Consider using `comments` and/or `subs` to update the regexr object")
172 | }
173 | out <- suppressWarnings(Map(function(x, y) list(comment = x, subs=y),
174 | attributes(object)[["comments"]],
175 | attributes(object)[["subs"]]
176 | ))
177 | class(out) <- "summary_regexr"
178 | attributes(out)[["subs"]] <- as.character(object)
179 | out
180 | }
181 |
182 |
183 | #' Prints a summary_regexr object
184 | #'
185 | #' Prints a summary_regexr object.
186 | #'
187 | #' @param x The summary_regexr object.
188 | #' @param \ldots Ignored.
189 | #' @export
190 | #' @method print summary_regexr
191 | print.summary_regexr <- function(x, ...){
192 |
193 | class(x) <- "list"
194 |
195 | reg <- attributes(x)[["subs"]]
196 | cat("\n", reg, "\n",
197 | paste(rep("=", nchar(reg)), collapse=""), "\n"
198 | )
199 |
200 | x <- namer(x)
201 |
202 | for (i in seq_along(x)) {
203 | element <- sprintf("SUB-EXPR %s: ", i)
204 | len <- nchar(element)
205 | message(element, x[[i]][["subs"]])
206 | message(sprintf("NAME%s: ", paste(rep(" ", len - 6), collapse="")), names(x)[i])
207 | message(sprintf("COMMENT%s: ", paste(rep(" ", len - 9), collapse="")),
208 | sprintf("\"%s\"", x[[i]][["comment"]]), "\n")
209 | }
210 | }
211 |
212 | #' Test Regular Expression Validity
213 | #'
214 | #' Test regular expression validity of a \code{regexr} object.
215 | #'
216 | #' test Method for subs<-
217 | #' @param x The \code{regexr} object.
218 | #' @param quiet logical. Should \code{test} print warnings about the
219 | #' concatenated expression and individual sub-expressions?
220 | #' @param \ldots Ignored.
221 | #' @export
222 | #' @method test regexr
223 | test.regexr <- function(x, quiet = FALSE, ...){
224 |
225 | out1 <- is.regex(x)
226 | if (!isTRUE(quiet) && !out1){
227 | warning("The concatenated regex is not valid\n\n", as.character(x), "\n")
228 | }
229 | out2 <- sapply(subs(x), is.regex)
230 | if (!isTRUE(quiet) && any(!out2)){
231 | warning("The following regex sub-expressions are not valid in isolation:\n\n",
232 | paste(paste0("(", seq_len(sum(!out2)), ") ",
233 | as.character(unlist(subs(x)))[!out2]), collapse="\n")
234 | )
235 | }
236 | list(regex = out1, subexpressions = out2)
237 | }
238 |
239 |
240 |
--------------------------------------------------------------------------------
/R/regexr-package.R:
--------------------------------------------------------------------------------
1 | #' regexr: Tools for Human Readable Regular Expressions
2 | #'
3 | #' \pkg{regexr} is an R framework for constructing and managing human readable
4 | #' regular expressions. It aims to provide tools that enable the user to write
5 | #' regular expressions in a way that is similar to the ways R code is written.
6 | #' The tools allow the user to:
7 | #' \enumerate{
8 | #' \item Write in smaller, modular, named, \emph{sub-expressions}
9 | #' \item Write top to bottom, rather than a single string
10 | #' \item Comment individual \emph{sub-expressions}
11 | #' \item Indent expressions to represent regular expression groups
12 | #' \item Add vertical line spaces and R comments (i.e., \code{#})
13 | #' \item Test the validity of the \emph{concatenated expression} and the modular sub-expressions
14 | #' }
15 | #' This framework harnesses the power and flexibility of regular expressions
16 | #' but provides a structural frame that is more consistent with both code
17 | #' writing and natural language conventions.
18 | #'
19 | #' @docType package
20 | #' @name regexr
21 | #' @aliases regexr package-regexr
22 | NULL
23 |
24 |
--------------------------------------------------------------------------------
/R/set_names.R:
--------------------------------------------------------------------------------
1 | #' Set the Names in a \code{regexr} Object
2 | #'
3 | #' This is a convenience function that sets the names on a \code{regexr} object
4 | #' and returns the object. This function works the same as
5 | #' \code{\link[stats]{setNames}} but provides a naming which is consistent with
6 | #' \code{set_regex} and \code{set_comments}.
7 | #'
8 | #' @param x The \code{regexr} object.
9 | #' @param y The names to assign.
10 | #' @return Returns a \code{regexr} object.
11 | #' @export
12 | #' @seealso \code{\link[stats]{setNames}}
13 | #' @examples
14 | #' minimal <- construct("a", "b", "c")
15 | #' out <- set_names(minimal, 1:3)
16 | #' names(out)
17 | set_names <- function(x, y){
18 | names(x) <- y
19 | x
20 | }
21 |
--------------------------------------------------------------------------------
/R/subs.R:
--------------------------------------------------------------------------------
1 | #' Get/Set Regex Sub-expressions From a regexr Object
2 | #'
3 | #' \code{subs} - Get the sub-expressions from a \code{regexr} object.
4 | #'
5 | #' @param x A regexr object.
6 | #' @param value The comment(s) to assign.
7 | #' @param \ldots Ignored.
8 | #' @rdname subs
9 | #' @export
10 | #' @return \code{subs} - Returns a list of sub-expressions.
11 | #' @examples
12 | #' minimal <- construct("a", "b", "c")
13 | #' minimal
14 | #' subs(minimal)
15 | #' subs(minimal)[2] <- "\\s+[A-Z]|[0-9]"
16 | #' subs(minimal)
17 | #'
18 | #' minimal <- construct("a", "b", "c")
19 | #' out <- set_subs(minimal, c("(", "\\s??", ")"))
20 | #' subs(out)
21 | subs <- function (x, ...){
22 | UseMethod("subs")
23 | }
24 |
25 | #' Set Regex Sub-expressions From a regexr Object
26 | #'
27 | #' \code{subs<-} - Set the sub-expressions(s) of a \code{regexr} object.
28 | #'
29 | #' @rdname subs
30 | #' @export
31 | `subs<-` <- function(x, value){
32 | UseMethod("subs<-")
33 | }
34 |
35 |
36 | #' Set the Sub-expressions in a \code{regexr} Object
37 | #'
38 | #' \code{set_subs} - This is a convenience function that sets the
39 | #' \code{\link[regexr]{subs}} on a \code{regexr} object and returns the object.
40 | #'
41 | #' @param y The sub-expressions to assign.
42 | #' @return \code{set_subs} - Returns a \code{regexr} object.
43 | #' @export
44 | #' @rdname subs
45 | set_subs <- function (x, y) {
46 | subs(x) <- y
47 | x
48 | }
49 |
50 |
--------------------------------------------------------------------------------
/R/test.R:
--------------------------------------------------------------------------------
1 | #' Test Regular Expression Validity
2 | #'
3 | #' Test regular expression validity of a \code{regexr} object.
4 | #'
5 | #' @param x A \code{regexr} object.
6 | #' @param quiet logical. Should \code{test} print warnings about the
7 | #' concatenated expression and individual sub-expressions?
8 | #' @param \ldots Ignored.
9 | #' @export
10 | #' @return Returns a list of two logical vectors. The first vector is a test of
11 | #' the concatenated expression. The second vector is a logical test of the
12 | #' validity of each sub-expressions that makes up the concatenated
13 | #' expression.
14 | #' @examples
15 | #' m <- construct(
16 | #' space =
17 | #' "\\s+"
18 | #' %:)%"I see",
19 | #'
20 | #' simp =
21 | #' "(?<=(foo))",
22 | #'
23 | #' or =
24 | #' "(;|:)\\s*"
25 | #' %:)%"comment on what this does",
26 | #'
27 | #' "[a]s th[atey]"
28 | #' )
29 | #'
30 | #'
31 | #' test(m)
32 | #' \dontrun{
33 | #' subs(m)[5:7] <- c("(", "([A-Z]|(\\d{5})", ")")
34 | #' test(m)
35 | #' }
36 | test <- function(x, quiet, ...){
37 | UseMethod("test")
38 | }
39 |
40 |
--------------------------------------------------------------------------------
/R/unglue.R:
--------------------------------------------------------------------------------
1 | #' Get Sub-expressions from \code{regexr} Object
2 | #'
3 | #' Get sub-expressions from \code{regexr} object.
4 | #'
5 | #' @param x A \code{regexr} object.
6 | #' @param \ldots Ignored.
7 | #' @export
8 | #' @return Returns a list of regular expression chunks.
9 | #' @examples
10 | #' minimal <- construct("a", "b", "c")
11 | #' minimal
12 | #' unglue(minimal)
13 | unglue <- function (x, ...){
14 | UseMethod("unglue")
15 | }
--------------------------------------------------------------------------------
/R/utils.R:
--------------------------------------------------------------------------------
1 | namer <- function(x, ...){
2 | if (is.null(names(x))) names(x) <- rep("", length(x))
3 | x
4 | }
5 |
6 |
7 | get_comment <- function(x, ...) {
8 | attributes(x)[["comment"]]
9 | }
10 |
11 |
12 | is.regex <- function (pattern) {
13 | out <- suppressWarnings(try(gsub(pattern, "", "hello", perl = TRUE),
14 | silent = TRUE))
15 | ifelse(inherits(out, "try-error"), FALSE, TRUE)
16 | }
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
1 | # regexr
2 |
3 | ```{r, echo=FALSE}
4 | desc <- suppressWarnings(readLines("DESCRIPTION"))
5 | regex <- "(^Version:\\s+)(\\d+\\.\\d+\\.\\d+)"
6 | loc <- grep(regex, desc)
7 | ver <- gsub(regex, "\\2", desc[loc])
8 | verbad <- sprintf('
', ver, ver)
9 | ````
10 | [](https://travis-ci.org/trinker/regexr)
11 | [](https://coveralls.io/r/trinker/regexr)
12 | [](http://dx.doi.org/10.5281/zenodo.13496)
13 | `r verbad`
14 |
15 |
16 |
17 | > One of the most powerful tools in writing maintainable code is break large methods into well-named smaller methods - a technique Kent Beck refers to as the Composed Method pattern.
18 |
19 | [-Martin Fowler-](http://martinfowler.com/bliki/ComposedRegex.html)
20 |
21 | [regexr](http://trinker.github.com/regexr_dev) is an R framework for constructing and managing human readable regular expressions. It aims to provide tools that enable the user to write regular expressions in a way that is similar to the ways R code is written. The tools allow the user to:
22 |
23 | 1. Write in smaller, modular, named, *sub-expressions*
24 | 2. Write top to bottom, rather than a single string
25 | 3. Comment individual chunks
26 | 4. Indent expressions to represent regular expression groups
27 | 5. Add vertical line spaces and R comments (i.e., `#`)
28 | 6. Test the validity of the *concatenated expression* and the modular *sub-expressions*
29 |
30 | This framework harnesses the power and flexibility of regular expressions but provides a structural frame that is more consistent with both code writing and natural language conventions. The user decides how to break, indent, name, and comment the sub-expressions in a way that is human readable, meaningful, and modular.
31 |
32 |
33 | ## Installation
34 |
35 | To download the development version of regexr:
36 |
37 | Download the [zip ball](https://github.com/trinker/regexr/zipball/master) or [tar ball](https://github.com/trinker/regexr/tarball/master), decompress and run `R CMD INSTALL` on it, or use the **pacman** package to install the development version:
38 |
39 | ```r
40 | if (!require("pacman")) install.packages("pacman")
41 | pacman::p_load_gh("trinker/regexr")
42 | ```
43 |
44 | ## Help
45 |
46 | - [Web Page](http://trinker.github.com/regexr/)
47 | - [Package PDF Help Manual](https://dl.dropboxusercontent.com/u/61803503/regexr.pdf)
48 |
49 | ## Contact
50 |
51 | You are welcome to:
52 | * submit suggestions and bug-reports at:
53 | * send a pull request on:
54 | * compose a friendly e-mail to:
55 |
56 | ## Functions
57 |
58 | ```{r, results='asis', warning=FALSE, echo=FALSE}
59 | library(regexr)
60 | thefuns <- readLines("inst/functions_table/functions.R")
61 | cat(paste(thefuns, collapse="\n"))
62 | ```
63 |
64 | ## Examples
65 |
66 | ```{r, echo=FALSE}
67 | library(regexr)
68 | ```
69 |
70 | ### Construction a Regular Expression
71 |
72 | The `construct` function creates an object of the class `regexr`. This is a character string with meta expression information (i.e., sub-expressions with corresponding names and comments) contained in the object's attributes.
73 |
74 |
75 | The `%:)%` binary operator allows the user to optionally add comments to the sub-expressions. The `%:)%`, containing a smiley face emoticon, is used here because commented code/sub-expressions is happy code☺.
76 |
77 |
78 | ```{r}
79 | m <- construct(
80 | space = "\\s+" %:)% "I see",
81 | simp = "(?<=(foo))",
82 | or = "(;|:)\\s*" %:)% "comment on what this does",
83 | is_then = "[ia]s th[ae]n"
84 | )
85 | m
86 | ```
87 |
88 | To see a larger script of a regular expession managed by **regexr** for the **qdapRegex** package [CLICK HERE](https://raw.githubusercontent.com/trinker/qdapRegex/master/inst/regex_scripts/rm_citation2.R).
89 |
90 | ### Viewing the `regexr` Object
91 |
92 | Th generic `summary` function provides an integrated view the sub-expressions with corresponding comments and names which make up the *concatenated expression*.
93 |
94 | ```{r}
95 | summary(m)
96 | ```
97 |
98 | ### Split `regexr` Object
99 |
100 | The `unglue` function splits the concatenated `regexr` expression into sub-expressions.
101 |
102 | ```{r}
103 | unglue(m)
104 | ```
105 |
106 | ### Get/Set Sub-Expressions, Comments, and Names of the Sub-Expressions of a `regexr` Object.
107 |
108 | The `subs`, `comments`, and `names` functions allow the user to view and alter the sub-expressions, comments, and names of the sub-expressions from a `regexr` object.
109 |
110 |
111 | ```{r}
112 | subs(m)
113 | comments(m)
114 | names(m)
115 | subs(m)[4] <- "(FO{2})|(BAR)"
116 | comments(m)[4] <- "Look for FOO or BAR"
117 | names(m)[4] <- "foo_bar"
118 | summary(m)
119 | ```
120 |
121 | ### Testing Regular Expressions
122 |
123 | The `test` function allows the user to check if the concatenated `regexr` expression and sub-expressions are valid regular expressions.
124 |
125 | ```{r}
126 | test(m)
127 | subs(m)[5:7] <- c("(", "([A-Z]|(\\d{5})", ")")
128 | test(m)
129 | ```
130 |
131 | ### Existing Regular Expression to `regexr`: Reverse Construction
132 |
133 | `as.regexr` allows the user to construct `regexr` objects from a regular expression and in the process generate an auto-commented & named sub-expressions `construct` script.
134 |
135 | ```{r}
136 | library("qdapRegex")
137 | (myregex <- grab("@rm_time"))
138 | out <- as.regexr(myregex)
139 | summary(out)
140 | ```
141 |
142 |
143 | We can use `get_construct` to extract an auto-commented & named `construct` script that can be optionally altered and used to `construct` a `regexr` object.
144 |
145 | ```{r, comment=NA}
146 | get_construct(out)
147 | ```
148 |
149 | Some may prefer that the `construct` script contains no names and/or comments. The user may also wish to place comments indented below the *sub-expressions* or names outdented and above the *sub-expressions*.
150 |
151 | ```{r, comment=NA}
152 | myregex2 <- "(\\s*[a-z]+)([^)]+\\))"
153 | get_construct(as.regexr(myregex2, comments.below=TRUE, names.above = TRUE))
154 | get_construct(as.regexr(myregex2, names = FALSE))
155 | ```
156 |
157 | ## Using regexr With the rebus Package
158 |
159 | Richard Cotton maintains the [`rebus`](https://github.com/richierocks/rebus) package to provide natural language based functions and constants that can be used to generate regular expressions. His work can be utilized within the **regexr** framework to maintain manageable commented and named *sub-expressions*.
160 |
161 | ```r
162 | install.packages("richierocks/rebus")
163 | library(rebus)
164 |
165 | out <- construct(
166 | year = YEAR %:)% "a year",
167 | or = "|" %:)% "or",
168 | min = ":" %c% MINUTE %:)% "colon followed by valid minutes"
169 | )
170 | ```
171 |
172 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # regexr
2 |
3 |
4 | [](https://travis-ci.org/trinker/regexr)
5 | [](https://coveralls.io/r/trinker/regexr)
6 | [](http://dx.doi.org/10.5281/zenodo.13496)
7 |
8 |
9 |
10 |
11 | > One of the most powerful tools in writing maintainable code is break large methods into well-named smaller methods - a technique Kent Beck refers to as the Composed Method pattern.
12 |
13 | [-Martin Fowler-](http://martinfowler.com/bliki/ComposedRegex.html)
14 |
15 | [regexr](http://trinker.github.com/regexr_dev) is an R framework for constructing and managing human readable regular expressions. It aims to provide tools that enable the user to write regular expressions in a way that is similar to the ways R code is written. The tools allow the user to:
16 |
17 | 1. Write in smaller, modular, named, *sub-expressions*
18 | 2. Write top to bottom, rather than a single string
19 | 3. Comment individual chunks
20 | 4. Indent expressions to represent regular expression groups
21 | 5. Add vertical line spaces and R comments (i.e., `#`)
22 | 6. Test the validity of the *concatenated expression* and the modular *sub-expressions*
23 |
24 | This framework harnesses the power and flexibility of regular expressions but provides a structural frame that is more consistent with both code writing and natural language conventions. The user decides how to break, indent, name, and comment the sub-expressions in a way that is human readable, meaningful, and modular.
25 |
26 |
27 | ## Installation
28 |
29 | To download the development version of regexr:
30 |
31 | Download the [zip ball](https://github.com/trinker/regexr/zipball/master) or [tar ball](https://github.com/trinker/regexr/tarball/master), decompress and run `R CMD INSTALL` on it, or use the **pacman** package to install the development version:
32 |
33 | ```r
34 | if (!require("pacman")) install.packages("pacman")
35 | pacman::p_load_gh("trinker/regexr")
36 | ```
37 |
38 | ## Help
39 |
40 | - [Web Page](http://trinker.github.com/regexr/)
41 | - [Package PDF Help Manual](https://dl.dropboxusercontent.com/u/61803503/regexr.pdf)
42 |
43 | ## Contact
44 |
45 | You are welcome to:
46 | * submit suggestions and bug-reports at:
47 | * send a pull request on:
48 | * compose a friendly e-mail to:
49 |
50 | ## Functions
51 |
52 | | Function | Description |
53 | |----------------------|----------------------|
54 | | `construct` | Write Human Readable *Concatenated Regex* |
55 | | `%:)%` | Add Comments to a Sub-expressions Within `construct` |
56 | | `unglue` | Break *Concatenated Regex* Into *Sub-expressions* |
57 | | `test` | Test Validity of *Concatenated Regex* & *Sub-expressions* |
58 | | `subs` | Get/Set Sub-expressions in a `regexr` Object |
59 | | `comments` | Get/Set Comments of Sub-expressions in a `regexr` Object |
60 | | `names` | Get/Set Names of Sub-expressions in a `regexr` Object |
61 | | `as.regexr`| Coerce Existing Regular Expressions to `regexr` Object |
62 |
63 | ## Examples
64 |
65 |
66 |
67 | ### Construction a Regular Expression
68 |
69 | The `construct` function creates an object of the class `regexr`. This is a character string with meta expression information (i.e., sub-expressions with corresponding names and comments) contained in the object's attributes.
70 |
71 |
72 | The `%:)%` binary operator allows the user to optionally add comments to the sub-expressions. The `%:)%`, containing a smiley face emoticon, is used here because commented code/sub-expressions is happy code☺.
73 |
74 |
75 |
76 | ```r
77 | m <- construct(
78 | space = "\\s+" %:)% "I see",
79 | simp = "(?<=(foo))",
80 | or = "(;|:)\\s*" %:)% "comment on what this does",
81 | is_then = "[ia]s th[ae]n"
82 | )
83 | m
84 | ```
85 |
86 | ```
87 | ## [1] "\\s+(?<=(foo))(;|:)\\s*[ia]s th[ae]n"
88 | ```
89 |
90 | To see a larger script of a regular expession managed by **regexr** for the **qdapRegex** package [CLICK HERE](https://raw.githubusercontent.com/trinker/qdapRegex/master/inst/regex_scripts/rm_citation2.R).
91 |
92 | ### Viewing the `regexr` Object
93 |
94 | Th generic `summary` function provides an integrated view the sub-expressions with corresponding comments and names which make up the *concatenated expression*.
95 |
96 |
97 | ```r
98 | summary(m)
99 | ```
100 |
101 | ```
102 | ##
103 | ## \s+(?<=(foo))(;|:)\s*[ia]s th[ae]n
104 | ## ==================================
105 | ```
106 |
107 | ```
108 | ## SUB-EXPR 1: \s+
109 | ## NAME : space
110 | ## COMMENT : "I see"
111 | ##
112 | ## SUB-EXPR 2: (?<=(foo))
113 | ## NAME : simp
114 | ## COMMENT :
115 | ##
116 | ## SUB-EXPR 3: (;|:)\s*
117 | ## NAME : or
118 | ## COMMENT : "comment on what this does"
119 | ##
120 | ## SUB-EXPR 4: [ia]s th[ae]n
121 | ## NAME : is_then
122 | ## COMMENT :
123 | ```
124 |
125 | ### Split `regexr` Object
126 |
127 | The `unglue` function splits the concatenated `regexr` expression into sub-expressions.
128 |
129 |
130 | ```r
131 | unglue(m)
132 | ```
133 |
134 | ```
135 | ## $space
136 | ## [1] "\\s+"
137 | ##
138 | ## $simp
139 | ## [1] "(?<=(foo))"
140 | ##
141 | ## $or
142 | ## [1] "(;|:)\\s*"
143 | ##
144 | ## $is_then
145 | ## [1] "[ia]s th[ae]n"
146 | ```
147 |
148 | ### Get/Set Sub-Expressions, Comments, and Names of the Sub-Expressions of a `regexr` Object.
149 |
150 | The `subs`, `comments`, and `names` functions allow the user to view and alter the sub-expressions, comments, and names of the sub-expressions from a `regexr` object.
151 |
152 |
153 |
154 | ```r
155 | subs(m)
156 | ```
157 |
158 | ```
159 | ## $space
160 | ## [1] "\\s+"
161 | ##
162 | ## $simp
163 | ## [1] "(?<=(foo))"
164 | ##
165 | ## $or
166 | ## [1] "(;|:)\\s*"
167 | ##
168 | ## $is_then
169 | ## [1] "[ia]s th[ae]n"
170 | ```
171 |
172 | ```r
173 | comments(m)
174 | ```
175 |
176 | ```
177 | ## $space
178 | ## [1] "I see"
179 | ##
180 | ## $simp
181 | ## NULL
182 | ##
183 | ## $or
184 | ## [1] "comment on what this does"
185 | ##
186 | ## $is_then
187 | ## NULL
188 | ```
189 |
190 | ```r
191 | names(m)
192 | ```
193 |
194 | ```
195 | ## [1] "space" "simp" "or" "is_then"
196 | ```
197 |
198 | ```r
199 | subs(m)[4] <- "(FO{2})|(BAR)"
200 | comments(m)[4] <- "Look for FOO or BAR"
201 | names(m)[4] <- "foo_bar"
202 | summary(m)
203 | ```
204 |
205 | ```
206 | ##
207 | ## \s+(?<=(foo))(;|:)\s*(FO{2})|(BAR)
208 | ## ==================================
209 | ```
210 |
211 | ```
212 | ## SUB-EXPR 1: \s+
213 | ## NAME : space
214 | ## COMMENT : "I see"
215 | ##
216 | ## SUB-EXPR 2: (?<=(foo))
217 | ## NAME : simp
218 | ## COMMENT :
219 | ##
220 | ## SUB-EXPR 3: (;|:)\s*
221 | ## NAME : or
222 | ## COMMENT : "comment on what this does"
223 | ##
224 | ## SUB-EXPR 4: (FO{2})|(BAR)
225 | ## NAME : foo_bar
226 | ## COMMENT : "Look for FOO or BAR"
227 | ```
228 |
229 | ### Testing Regular Expressions
230 |
231 | The `test` function allows the user to check if the concatenated `regexr` expression and sub-expressions are valid regular expressions.
232 |
233 |
234 | ```r
235 | test(m)
236 | ```
237 |
238 | ```
239 | ## $regex
240 | ## [1] TRUE
241 | ##
242 | ## $subexpressions
243 | ## space simp or foo_bar
244 | ## TRUE TRUE TRUE TRUE
245 | ```
246 |
247 | ```r
248 | subs(m)[5:7] <- c("(", "([A-Z]|(\\d{5})", ")")
249 | test(m)
250 | ```
251 |
252 | ```
253 | ## Warning in test.regexr(m): The concatenated regex is not valid
254 | ##
255 | ## \s+(?<=(foo))(;|:)\s*(FO{2})|(BAR)(([A-Z]|(\d{5}))
256 | ```
257 |
258 | ```
259 | ## Warning in test.regexr(m): The following regex sub-expressions are not valid in isolation:
260 | ##
261 | ## (1) (
262 | ## (2) ([A-Z]|(\d{5})
263 | ## (3) )
264 | ```
265 |
266 | ```
267 | ## $regex
268 | ## [1] FALSE
269 | ##
270 | ## $subexpressions
271 | ## space simp or foo_bar
272 | ## TRUE TRUE TRUE TRUE FALSE FALSE FALSE
273 | ```
274 |
275 | ### Existing Regular Expression to `regexr`: Reverse Construction
276 |
277 | `as.regexr` allows the user to construct `regexr` objects from a regular expression and in the process generate an auto-commented & named sub-expressions `construct` script.
278 |
279 |
280 | ```r
281 | library("qdapRegex")
282 | (myregex <- grab("@rm_time"))
283 | ```
284 |
285 | ```
286 | ## [1] "\\d{0,2}:\\d{2}(?:[:.]\\d+)?"
287 | ```
288 |
289 | ```r
290 | out <- as.regexr(myregex)
291 | summary(out)
292 | ```
293 |
294 | ```
295 | ##
296 | ## \d{0,2}:\d{2}(?:[:.]\d+)?
297 | ## =========================
298 | ```
299 |
300 | ```
301 | ## SUB-EXPR 1: \d{0,2}
302 | ## NAME : 1
303 | ## COMMENT : "digits (0-9) (between 0 and 2 times (matching the most amount possible))"
304 | ##
305 | ## SUB-EXPR 2: :
306 | ## NAME : 2
307 | ## COMMENT : "':'"
308 | ##
309 | ## SUB-EXPR 3: \d{2}
310 | ## NAME : 3
311 | ## COMMENT : "digits (0-9) (2 times)"
312 | ##
313 | ## SUB-EXPR 4: (?:
314 | ## NAME : 4
315 | ## COMMENT : "group, but do not capture (optional (matching the most amount possible)):"
316 | ##
317 | ## SUB-EXPR 5: [:.]
318 | ## NAME : 5
319 | ## COMMENT : "any character of: ':', '.'"
320 | ##
321 | ## SUB-EXPR 6: \d+
322 | ## NAME : 6
323 | ## COMMENT : "digits (0-9) (1 or more times (matching the most amount possible))"
324 | ##
325 | ## SUB-EXPR 7: )?
326 | ## NAME : 7
327 | ## COMMENT : "end of grouping"
328 | ```
329 |
330 |
331 | We can use `get_construct` to extract an auto-commented & named `construct` script that can be optionally altered and used to `construct` a `regexr` object.
332 |
333 |
334 | ```r
335 | get_construct(out)
336 | ```
337 |
338 | ```
339 | construct(
340 | `1` = "\\d{0,2}" %:)% "digits (0-9) (between 0 and 2 times (matching the most amount possible))",
341 | `2` = ":" %:)% "':'",
342 | `3` = "\\d{2}" %:)% "digits (0-9) (2 times)",
343 | `4` = "(?:" %:)% "group, but do not capture (optional (matching the most amount possible)):",
344 | `5` = "[:.]" %:)% "any character of: ':', '.'",
345 | `6` = "\\d+" %:)% "digits (0-9) (1 or more times (matching the most amount possible))",
346 | `7` = ")?" %:)% "end of grouping"
347 | )
348 | ```
349 |
350 | Some may prefer that the `construct` script contains no names and/or comments. The user may also wish to place comments indented below the *sub-expressions* or names outdented and above the *sub-expressions*.
351 |
352 |
353 | ```r
354 | myregex2 <- "(\\s*[a-z]+)([^)]+\\))"
355 | get_construct(as.regexr(myregex2, comments.below=TRUE, names.above = TRUE))
356 | ```
357 |
358 | ```
359 | construct(
360 | `1` =
361 | "("
362 | %:)%"group and capture to \\1:",
363 | `2` =
364 | "\\s*"
365 | %:)%"whitespace (\n, \r, \t, \f, and \" \") (0 or more times (matching the most amount possible))",
366 | `3` =
367 | "[a-z]+"
368 | %:)%"any character of: 'a' to 'z' (1 or more times (matching the most amount possible))",
369 | `4` =
370 | ")"
371 | %:)%"end of \\1",
372 | `5` =
373 | "("
374 | %:)%"group and capture to \\2:",
375 | `6` =
376 | "[^)]+"
377 | %:)%"any character except: ')' (1 or more times (matching the most amount possible))",
378 | `7` =
379 | "\\)"
380 | %:)%"')'",
381 | `8` =
382 | ")"
383 | %:)%"end of \\2"
384 | )
385 | ```
386 |
387 | ```r
388 | get_construct(as.regexr(myregex2, names = FALSE))
389 | ```
390 |
391 | ```
392 | construct(
393 | "(" %:)% "group and capture to \\1:",
394 | "\\s*" %:)% "whitespace (\n, \r, \t, \f, and \" \") (0 or more times (matching the most amount possible))",
395 | "[a-z]+" %:)% "any character of: 'a' to 'z' (1 or more times (matching the most amount possible))",
396 | ")" %:)% "end of \\1",
397 | "(" %:)% "group and capture to \\2:",
398 | "[^)]+" %:)% "any character except: ')' (1 or more times (matching the most amount possible))",
399 | "\\)" %:)% "')'",
400 | ")" %:)% "end of \\2"
401 | )
402 | ```
403 |
404 | ## Using regexr With the rebus Package
405 |
406 | Richard Cotton maintains the [`rebus`](https://github.com/richierocks/rebus) package to provide natural language based functions and constants that can be used to generate regular expressions. His work can be utilized within the **regexr** framework to maintain manageable commented and named *sub-expressions*.
407 |
408 | ```r
409 | install.packages("richierocks/rebus")
410 | library(rebus)
411 |
412 | out <- construct(
413 | year = YEAR %:)% "a year",
414 | or = "|" %:)% "or",
415 | min = ":" %c% MINUTE %:)% "colon followed by valid minutes"
416 | )
417 | ```
418 |
419 |
--------------------------------------------------------------------------------
/inst/CITATION:
--------------------------------------------------------------------------------
1 | citHeader("To cite regexr in publications, please use:")
2 |
3 |
4 | citEntry(entry = "manual",
5 | title = "{regexr}: Readable Regular Expressions",
6 | author = "Tyler W. Rinker",
7 | organization = "University at Buffalo/SUNY",
8 | address = "Buffalo, New York",
9 | note = "version 1.1.0",
10 | doi = "10.5281/zenodo.13496",
11 | year = "2014",
12 | url = "http://github.com/trinker/regexr",
13 | textVersion = paste("Rinker, T. W. (2014).",
14 | "regexr: Readable Regular Expressions.",
15 | "version 1.1.0. University at Buffalo. Buffalo, New York.",
16 | "http://github.com/trinker/regexr")
17 | )
--------------------------------------------------------------------------------
/inst/build.R:
--------------------------------------------------------------------------------
1 | root <- switch(Sys.info()[["user"]],
2 | Tyler = "C:/Users/Tyler",
3 | trinker = "C:/Users/trinker",
4 | message("Computer name not found")
5 | )
6 |
7 | repo <- pack <- basename(getwd())
8 |
9 | curd <- getwd()
10 | loc <- file.path(root, "Desktop")
11 | setwd(loc)
12 |
13 | base.git <- file.path(root, "GitHub")
14 |
15 |
16 | qman <- function(x = repo, db = file.path(root, "/Dropbox/Public"), dir=loc) {
17 | path <- file.path(dir, paste0(x, ".pdf"))
18 | if (!file.exists(path)) stop(paste(x, "does not exist..."))
19 | opath <- file.path(db, paste0(x, ".pdf"))
20 | file.copy(path, opath, overwrite = TRUE)
21 | message("manual copied!\n")
22 | }
23 |
24 |
25 | quick <- TRUE
26 | library(devtools)
27 |
28 | unlink(paste0(pack, ".pdf"), recursive = TRUE, force = TRUE)
29 | x <- file.path(base.git, pack)
30 | document(x)
31 | install(x, quick = quick, build_vignettes = FALSE, dependencies = TRUE)
32 |
33 | path <- find.package(pack)
34 | system(paste(shQuote(file.path(R.home("bin"), "R")),
35 | "CMD", "Rd2pdf", shQuote(path)))
36 |
37 | qman(repo, dir=loc)
38 | setwd(curd)
39 | message("Done!")
40 |
--------------------------------------------------------------------------------
/inst/extra_statdoc/readme.R:
--------------------------------------------------------------------------------
1 |
2 |
regexr is an R framework for constructing human readable regular expressions. It aims to provide tools that enable the user to write regular expressions in a way that is similar to the ways R code is written. The tools allow the user to (1) write in smaller, modular, named, regular expression chunks, (2) write top to bottom, rather than a single string (3) comment individual chunks, (4) indent expressions to represent regular expression groups, and (5) test the validity of the concatenated expression and the modular chunks.
3 |
4 |
This framework harnesses the power and flexibility of regular expressions but provides a structural frame that is more consistent with both code writing and natural language conventions. The user decides how to break, indent, name, and comment the regular expressions in a way that is human readable, meaningful, and modular.
5 |
Download the development version of regexr here
--------------------------------------------------------------------------------
/inst/functions_table/functions.R:
--------------------------------------------------------------------------------
1 | | Function | Description |
2 | |----------------------|----------------------|
3 | | `construct` | Write Human Readable *Concatenated Regex* |
4 | | `%:)%` | Add Comments to a Sub-expressions Within `construct` |
5 | | `unglue` | Break *Concatenated Regex* Into *Sub-expressions* |
6 | | `test` | Test Validity of *Concatenated Regex* & *Sub-expressions* |
7 | | `subs` | Get/Set Sub-expressions in a `regexr` Object |
8 | | `comments` | Get/Set Comments of Sub-expressions in a `regexr` Object |
9 | | `names` | Get/Set Names of Sub-expressions in a `regexr` Object |
10 | | `as.regexr`| Coerce Existing Regular Expressions to `regexr` Object |
--------------------------------------------------------------------------------
/inst/maintenance.R:
--------------------------------------------------------------------------------
1 | #========
2 | # BUILD
3 | #========
4 | source("inst/build.R")
5 |
6 | #==========================
7 | # Run unit tests
8 | #==========================
9 | devtools::test()
10 |
11 | #==========================
12 | # knit README.md
13 | #==========================
14 | knitr::knit2html("README.Rmd", output ='README.md'); reports::delete("README.html")
15 |
16 | #==========================
17 | # UPDATE NEWS
18 | #==========================
19 | update_news()
20 |
21 | #==========================
22 | # UPDATE VERSION
23 | #==========================
24 | update_version()
25 |
26 | #========================
27 | #staticdocs dev version
28 | #========================
29 | #packages
30 | # library(devtools); install_github("staticdocs", "hadley")
31 | # install_github("acc.roxygen2", "trinker");install.packages("rstudioapi")
32 | library(highlight); library(staticdocs);
33 |
34 | #STEP 1: create static doc
35 | #right now examples are FALSE in the future this will be true
36 | #in the future qdap2 will be the go to source
37 | R_USER <- switch(Sys.info()[["user"]],
38 | Tyler = "C:/Users/Tyler",
39 | trinker = "C:/Users/trinker",
40 | message("Computer name not found")
41 | )
42 | build_site(pkg=file.path(R_USER, "GitHub/regexr"), launch = FALSE)
43 |
44 | #STEP 2: reshape index
45 | path <- "inst/web"
46 | path2 <- file.path(path, "/index.html")
47 | rdme <- file.path(R_USER, "GitHub/regexr/inst/extra_statdoc/readme.R")
48 | library(acc.roxygen2); library(qdap);
49 | extras <- qcv("%comment%", "subs<-", "comments<-")
50 | ## drops <- qcv()
51 | expand_statdoc(path2, to.icon = extras, readme = rdme)
52 |
53 | x <- readLines(path2)
54 | x[grepl("
Authors
", x)] <- paste(c("
Author
"
55 | #rep("
Contributor
", 1)
56 | ),
57 | c("Tyler W. Rinker"))
58 |
59 | cat(paste(x, collapse="\n"), file=path2)
60 |
61 |
62 | #STEP 3: move to trinker.guthub
63 | library(reports)
64 | file <- file.path(R_USER, "/GitHub/trinker.github.com")
65 | incoming <- file.path(file, "regexr_dev")
66 | delete(incoming)
67 | file.copy(path, file, TRUE, TRUE)
68 | file.rename(file.path(file, "web"), incoming)
69 | ## delete(path)
70 | #==========================
71 | #staticdocs current version
72 | #==========================
73 | #packages
74 | # library(devtools); install_github("staticdocs", "hadley")
75 | # install_github("acc.roxygen2", "trinker"); install.packages("rstudioapi")
76 | library(highlight);library(staticdocs);
77 |
78 | #STEP 1: create static doc
79 | #right now examples are FALSE in the future this will be true
80 | #in the future qdap2 will be the go to source
81 | library(highlight); library(staticdocs);
82 |
83 | #STEP 1: create static doc
84 | #right now examples are FALSE in the future this will be true
85 | #in the future qdap2 will be the go to source
86 | R_USER <- switch(Sys.info()[["user"]],
87 | Tyler = "C:/Users/Tyler",
88 | trinker = "C:/Users/trinker",
89 | message("Computer name not found")
90 | )
91 | build_site(pkg=file.path(R_USER, "GitHub/regexr"), launch = FALSE)
92 |
93 | #STEP 2: reshape index
94 | path <- "inst/web"
95 | path2 <- file.path(path, "/index.html")
96 | rdme <- file.path(R_USER, "GitHub/regexr/inst/extra_statdoc/readme.R")
97 | library(acc.roxygen2); library(qdap);
98 | extras <- qcv("%comment%", "regex<-", "comments<-")
99 | ## drops <- qcv()
100 | expand_statdoc(path2, to.icon = extras, readme = rdme)
101 |
102 | x <- readLines(path2)
103 | x[grepl("
logical. Should names be included in the construct
51 | script?
52 |
comments
53 |
logical. Should comments be included in the construct
54 | script?
55 |
names.above
56 |
logical. Should ames be included above the sub-expressions
57 | in the construct script? If FALSE names are placed in front of
58 | the sub-expressions.
59 |
comments.below
60 |
logical. Should comments be included below the
61 | sub-expressions in the construct script? If FALSE comments are
62 | placed behind the sub-expressions.
63 |
...
64 |
Ignored.
65 |
66 |
67 |
68 |
Coerce character to regexr
69 |
70 |
71 |
72 |
73 |
Description
74 |
75 |
Convert a regular expression to a commented regexr object.
logical. Should names be included in the construct
51 | script?
52 |
comments
53 |
logical. Should comments be included in the construct
54 | script?
55 |
names.above
56 |
logical. Should ames be included above the sub-expressions
57 | in the construct script? If FALSE names are placed in front of
58 | the sub-expressions.
59 |
comments.below
60 |
logical. Should comments be included below the
61 | sub-expressions in the construct script? If FALSE comments are
62 | placed behind the sub-expressions.
63 |
...
64 |
Ignored.
65 |
66 |
67 |
68 |
Coerce default to regexr
69 |
70 |
71 |
72 |
73 |
Description
74 |
75 |
Convert a regular expression to a commented regexr object.
Extract Script from reverse_construct to construct a
39 | regexr Object.
40 |
41 |
42 |
43 |
Usage
44 |
"get_construct"(x, file = "", ...)
45 |
46 |
Arguments
47 |
48 |
x
49 |
A reverse_construct object.
50 |
file
51 |
A connection, or a character string naming the file to print to.
52 | If "" (the default), cat prints to the console unless
53 | redirected by sink. Windows users may use
54 | file = "clipboard" to copy the content to the clipboard. To print
55 | as character use file = NULL.
56 |
...
57 |
Other arguments passed to print.reverse_construct.
58 |
59 |
60 |
62 |
Extract Script from <code>reverse_construct</code> to <code>construct</code> a
63 | <code>regexr</code> Object.
64 |
65 |
66 |
67 |
68 |
Value
69 |
70 |
Returns an auto-commented script used to construct a
71 | regexr object.
72 |
73 |
74 |
75 |
76 |
Description
77 |
78 |
Pulls the reverse_construct attribute from a reverse_construct.
79 | This script can be assigned to an object and run in the console to create a
80 | comments, named regexr object.
regexr is an R framework for constructing human readable regular expressions. It aims to provide tools that enable the user to write regular expressions in a way that is similar to the ways R code is written. The tools allow the user to (1) write in smaller, modular, named, regular expression chunks, (2) write top to bottom, rather than a single string (3) comment individual chunks, (4) indent expressions to represent regular expression groups, and (5) test the validity of the concatenated expression and the modular chunks.
44 |
45 |
This framework harnesses the power and flexibility of regular expressions but provides a structural frame that is more consistent with both code writing and natural language conventions. The user decides how to break, indent, name, and comment the regular expressions in a way that is human readable, meaningful, and modular.
46 |
Download the development version of regexr here
47 |
48 |
Help topics
49 |
50 |
Constructing & Testing Regular Expressions
51 |
Function for constructing and testing regular expressions.
52 |
53 |
54 |
55 |
56 |
57 | construct Construct Human Readable Regular Expressions
58 |
59 |
60 | %:)%%comment% Add Comments to Character Strings.
A connection, or a character string naming the file to print to.
51 | If "" (the default), cat prints to the console unless
52 | redirected by sink. Windows users may use
53 | file = "clipboard" to copy the content to the clipboard.
regexr: Tools for Human Readable Regular Expressions
39 |
40 |
41 |
42 |
43 |
44 |
regexr: Tools for Human Readable Regular Expressions
45 |
46 |
47 |
48 |
49 |
Description
50 |
51 |
regexr is an R framework for constructing and managing human readable
52 | regular expressions. It aims to provide tools that enable the user to write
53 | regular expressions in a way that is similar to the ways R code is written.
54 | The tools allow the user to:
55 |
56 |
Write in smaller, modular, named, sub-expressions
57 |
58 |
Write top to bottom, rather than a single string
59 |
60 |
Comment individual sub-expressions
61 |
62 |
Indent expressions to represent regular expression groups
63 |
64 |
Add vertical line spaces and R comments (i.e., #)
65 |
66 |
Test the validity of the concatenated expression and the modular sub-expressions
67 |
68 |
69 |
70 |
This framework harnesses the power and flexibility of regular expressions
71 | but provides a structural frame that is more consistent with both code
72 | writing and natural language conventions.
This is a convenience function that sets the names on a regexr object
69 | and returns the object. This function works the same as
70 | setNames but provides a naming which is consistent with
71 | set_regex and set_comments.
logical. Should test print warnings about the
51 | concatenated expression and individual sub-expressions?
52 |
...
53 |
Ignored.
54 |
55 |
56 |
57 |
Test Regular Expression Validity
58 |
59 |
60 |
61 |
62 |
Value
63 |
64 |
Returns a list of two logical vectors. The first vector is a test of
65 | the concatenated expression. The second vector is a logical test of the
66 | validity of each sub-expressions that makes up the concatenated
67 | expression.
68 |
69 |
70 |
71 |
72 |
Description
73 |
74 |
Test regular expression validity of a regexr object.
75 |
76 |
77 |
78 |
Examples
79 |
m <- construct(
80 | space =
81 | "\\s+"
82 | %:)%"I see",
83 |
84 | simp =
85 | "(?<=(foo))",
86 |
87 | or =
88 | "(;|:)\\s*"
89 | %:)%"comment on what this does",
90 |
91 | "[a]s th[atey]"
92 | )
93 |
94 |
95 | test(m)
96 |